Compare commits

..

155 Commits

Author SHA1 Message Date
german77
fe2e710003 externals: Update sdl2 to 2.0.16 2021-08-10 19:16:30 -05:00
Fernando S
6a082df427 Merge pull request #6820 from yzct12345/split-cache
texture_cache: Split out template definitions
2021-08-10 12:23:05 +02:00
Ameer J
9fbe188c01 Merge pull request #6837 from german77/no-pause-screenshot
main: Avoid stopping emulation when taking a screenshot
2021-08-09 23:49:48 -04:00
bunnei
7df790f1ae Merge pull request #6823 from yzct12345/memory-cleanup
memory: Clean up code
2021-08-09 17:09:56 -07:00
bunnei
3e3bd425c1 Merge pull request #6839 from ameerj/frame-cap-positon
configure_general: Swap positions of speed limit and frame limit options
2021-08-09 12:32:07 -07:00
Mai M
2da91ec75b Merge pull request #6844 from ameerj/vp9-empty-frame
vp9: Ensure the first frame is complete
2021-08-08 19:02:39 -04:00
bunnei
b9eee1c539 Merge pull request #6843 from FernandoS27/lives-in-a-pineapple-under-the-sea-2
yuzu-cmd/CMakeLists: Correct attribution for this function.
2021-08-08 11:31:47 -07:00
Fernando Sahmkow
23ca1eb82e yuzu-cmd/CMakeLists: Correct attribution for this function. 2021-08-08 20:24:53 +02:00
ameerj
fa22695705 vp9: Ensure the first frame is complete
Silences a runtime error due to the first frame missing the frame data, and being set to hidden despite being a key-frame.
2021-08-08 13:49:00 -04:00
yzct12345
c4eafcc861 texture_cache: Address ameerj's review 2021-08-08 11:02:51 +00:00
Fernando S
859deda3bb Merge pull request #6834 from K0bin/buffer-image-granularity
Respect Vulkan bufferImageGranularity
2021-08-08 11:57:40 +02:00
bunnei
b023413c98 Merge pull request #6698 from german77/SDL_QoL
input_common: Improve SDL joystick and hide toggle option
2021-08-08 02:44:42 -07:00
bunnei
00358e2098 Merge pull request #6817 from gidoly/patch-1
Add description to fast gpu time option
2021-08-08 01:11:47 -07:00
ameerj
8e0cc3e59a configure_general: Swap positions of speed limit and frame limit options 2021-08-08 01:00:40 -04:00
german77
48b6d41f1b input_common: Improve SDL joystick and hide toggle option 2021-08-07 23:11:23 -05:00
bunnei
63325cafbe Merge pull request #6827 from Morph1984/uuid-hash
common: uuid: Add hash function for UUID
2021-08-07 17:18:46 -07:00
german77
acce512ae8 main: Avoid stopping emulation when taking a screenshot 2021-08-07 15:45:29 -05:00
bunnei
bd0e1d3a25 Merge pull request #6830 from ameerj/nvdec-unimpld-codec
nvdec: Better logging for unimplemented codecs
2021-08-07 12:37:39 -07:00
Robin Kertels
bb29dcb7f2 vulkan_memory_allocator: Respect bufferImageGranularity 2021-08-07 15:28:05 +02:00
bunnei
456adb95ff Merge pull request #6795 from sankasan/cmd-remove-cursor-fullscreen
yuzu-cmd: hide mouse cursor when started fullscreen
2021-08-07 02:00:29 -07:00
bunnei
bd1a764827 Merge pull request #6815 from german77/ui_improvements
settings_ui: Add emulated joystick position dot to controller preview
2021-08-06 23:54:23 -07:00
ameerj
928b64d2ce nvdec: Better logging for unimplemented codecs 2021-08-07 01:08:33 -04:00
bunnei
268b5764c7 Merge pull request #6791 from ameerj/astc-opt
astc_decoder: Various performance and memory optimizations
2021-08-06 21:45:24 -07:00
yzct12345
5f97f74a9a memory: Address lioncash's review 2021-08-07 03:03:21 +00:00
yzct12345
70cc4c0f46 memory: Dedup Read and Write and fix logging bugs 2021-08-07 01:32:06 +00:00
yzct12345
e80323b8b0 texture_cache: Address ameerj's review 2021-08-07 01:27:47 +00:00
bunnei
f183668a87 Merge pull request #6799 from ameerj/vp9-fixes
nvdec: Fix VP9 reference frame refreshes
2021-08-06 17:46:46 -07:00
ameerj
156ea746a3 nvhost_nvdec_common: Remove BufferMap
This was mainly used to keep track of mapped buffers for later unmapping.  Since unmap is no longer implemented, this no longer seves a valuable purpose.
2021-08-06 20:11:12 -04:00
ameerj
e3688f0627 vp9: Cleanup unused variables
With reference frames refreshes fix, we no longer need to buffer two frames in advance.
We can also remove other unused or otherwise unneeded variables.
2021-08-06 20:08:11 -04:00
ameerj
a3f80a97a3 vp9: Fix reference frame refreshes
This resolves the artifacting when decoding VP9 streams.
2021-08-06 20:08:08 -04:00
ameerj
cc8ac112fc nvhost_nvdec_common: Stub UnmapBuffer Ioctl
Skip unmapping nvdec buffers to avoid breaking the continuity of the VP9 reference frame addresses, and the risk of invalidating data before the async GPU thread is done with it.
2021-08-06 20:06:30 -04:00
bunnei
42d8e08f78 Merge pull request #6822 from yzct12345/clion-assert
assert: Avoid empty macros
2021-08-05 22:29:45 -07:00
Morph
d20c5ac720 common: uuid: Add hash function for UUID
Used when UUID is a key in an unordered_map. The hash is produced by XORing the high and low 64-bits of the UUID together.
2021-08-06 00:41:55 -04:00
yzct12345
e611f522c2 memory: Clean up CopyBlock too 2021-08-05 21:09:08 +00:00
gidoly
8ba551e1cd Update configure_graphics_advanced.ui
add description too fast gpu time
2021-08-06 06:08:12 +09:00
yzct12345
02e98f6c93 texture_cache: Don't change copyright year 2021-08-05 20:52:12 +00:00
yzct12345
5566f3dbc0 texture_cache: Address ameerj's review 2021-08-05 20:46:24 +00:00
yzct12345
4edfa6ad8f memory: Address lioncash's review 2021-08-05 20:29:43 +00:00
bunnei
e1a92db519 Merge pull request #6813 from Morph1984/hex-string-to-uuid
common: uuid: Add hex string to UUID constructor
2021-08-05 13:29:11 -07:00
yzct12345
6df9611059 memory: Clean up code 2021-08-05 20:11:14 +00:00
yzct12345
7e846be376 assert: Verify formatting 2021-08-05 17:46:22 +00:00
yzct12345
346149dcf9 assert: Avoid empty macros 2021-08-05 17:21:56 +00:00
yzct12345
f9563c8f24 texture_cache: Split templates out 2021-08-05 13:52:30 +00:00
Mai M
a1cb453470 Merge pull request #6819 from Morph1984/i-am-dumb
applet_swkbd: Include the null terminator in the buffer size calculation
2021-08-04 23:32:01 -04:00
Mai M
9a7d2e3659 Merge pull request #6818 from Morph1984/hex-util-bug
hex_util: Fix incorrect array size in AsArray
2021-08-04 23:31:04 -04:00
Morph
f10dc35dd0 applet_swkbd: Include the null terminator in the buffer size calculation
Some games may interpret the read string as a null-terminated string instead of just reading the string up to buffer_size.
2021-08-04 22:32:09 -04:00
Morph
7b39215c8a hex_util: Fix incorrect array size in AsArray
Although this isn't used, this is a potential bug as HexStringToArray will perform an out-of-bounds read.
2021-08-04 22:16:29 -04:00
Morph
edb9c72e26 Merge pull request #6816 from lat9nq/fix-mult-contrl
config: Read connected setting for controllers
2021-08-04 22:05:53 -04:00
lat9nq
be16d92060 config: Read connected setting for controllers
Currently yuzu will read the mapping but does not connect a controller
despite adding subsequent configurations for it. Read the `connected`
setting for now as a boolean like the Qt frontend.
2021-08-04 17:09:35 -04:00
german77
d5bf597436 settings_ui: Use better colors for the light theme 2021-08-04 11:47:06 -05:00
german77
1fb158ce90 settings_ui: Add emulated joystick position dot to controller preview 2021-08-04 11:46:54 -05:00
Morph
705f111058 common: uuid: Add hex string to UUID constructor
This allows for easily converting a hex string into a Common::UUID, which is backed by a 128 bit unsigned integer.
2021-08-04 10:45:41 -04:00
yzct12345
2868d4ba84 nvdec: Implement VA-API hardware video acceleration (#6713)
* nvdec: VA-API

* Verify formatting

* Forgot a semicolon for Windows

* Clarify comment about AV_PIX_FMT_NV12

* Fix assert log spam from missing negation

* vic: Remove forgotten debug code

* Address lioncash's review

* Mention VA-API is Intel/AMD

* Address v1993's review

* Hopefully fix CMakeLists style this time

* vic: Improve cache locality

* vic: Fix off-by-one error

* codec: Async

* codec: Forgot the GetValue()

* nvdec: Address ameerj's review

* codec: Fallback to CPU without VA-API support

* cmake: Address lat9nq's review

* cmake: Make VA-API optional

* vaapi: Multiple GPU

* Apply suggestions from code review

Co-authored-by: Ameer J <52414509+ameerj@users.noreply.github.com>

* nvdec: Address ameerj's review

* codec: Use anonymous instead of static

* nvdec: Remove enum and fix memory leak

* nvdec: Address ameerj's review

* codec: Remove preparation for threading

Co-authored-by: Ameer J <52414509+ameerj@users.noreply.github.com>
2021-08-03 23:43:11 -04:00
Morph
d16a337d98 Merge pull request #6805 from lat9nq/fix-user-profiles
config: Only read/write current_user on global config
2021-08-02 23:58:41 -04:00
lat9nq
6ab0c6a808 config: Only read/write current_user on global config 2021-08-02 18:29:24 -04:00
Morph
ffe553edbf Merge pull request #6801 from spholz/spholz-patch-1
network: fix ternary operator in Socket::SendTo
2021-08-02 14:56:16 -04:00
spholz
e71f78d04c network: fix ternary operator in Socket::SendTo 2021-08-02 20:12:12 +02:00
yzct12345
f56d0db5bd decoders: Optimize swizzle copy performance (#6790)
This makes UnswizzleTexture up to two times faster. It is the main bottleneck in NVDEC video decoding.
2021-08-02 11:18:58 -04:00
san
3e26141483 yuzu-cmd: hide cursor when in fullscreen
Exposed the SDL_ShowCursor function to EmuWindow baseclass. When creating the window (GL or VK) in fullscreen it now automatically hides the cursor.
2021-08-01 21:46:13 +02:00
Malte Jürgens
381aacdbb1 game_list: Make game list folder icons smaller (#6762)
Makes the default game list folder icons 48x48 by default instead of 64x64, and allows for selecting small (24x24) and large (72x72) icon sizes.
2021-08-01 12:59:36 -04:00
Morph
d20bcb7faf Merge pull request #6793 from Morph1984/lang-fix
service: set: Correct copy amount in GetAvailableLanguageCodes
2021-08-01 12:47:47 -04:00
Morph
3b4d427993 service: set: Correct copy amount in GetAvailableLanguageCodes 2021-08-01 11:59:52 -04:00
Fernando S
30f0b7cf31 Merge pull request #6720 from ameerj/vk-screenshot
renderer_vulkan: Implement screenshots
2021-08-01 13:31:33 +02:00
Ameer J
db32c3762b Merge pull request #6765 from ReinUsesLisp/y-negate-vk
vk_rasterizer: Flip viewport on Y_NEGATE
2021-08-01 01:47:37 -04:00
Ameer J
a086ee6a00 Merge pull request #6565 from lat9nq/bundle-ffmpeg
cmake, ci: Build bundled FFmpeg with yuzu
2021-08-01 01:34:10 -04:00
ameerj
c439fc9be9 astc_decoder: Reduce workgroup size
This reduces the amount of over dispatching when there are odd dimensions (i.e. ASTC 8x5), which rarely evenly divide into 32x32.
2021-08-01 01:22:27 -04:00
ameerj
5ab8053511 astc_decoder: Compute offset swizzles in-shader
Alleviates the dependency on the swizzle table and a uniform which is constant for all ASTC texture sizes.
2021-08-01 01:22:26 -04:00
ameerj
b2862e4772 astc_decoder: Make use of uvec4 for payload data 2021-07-31 22:28:04 -04:00
ameerj
a75d70fa90 astc_decoder: Simplify Select2DPartition 2021-07-31 21:36:26 -04:00
ameerj
5665d05547 astc_decoder: Optimize the use EncodingData
This buffer was a list of EncodingData structures sorted by their bit length, with some duplication from the cpu decoder implementation.
We can take advantage of its sorted property to optimize its usage in the shader.

Thanks to wwylele for the optimization idea.
2021-07-31 21:36:26 -04:00
ameerj
15c0c213b1 astc.h: Move data to cpp implementation
Moves leftover values that are no longer used by the gpu decoder back to the cpp implementation.
2021-07-31 21:26:42 -04:00
Mai M
7cf0958b06 Merge pull request #6788 from Morph1984/hle_api_12.1.0
hle: api_version: Update HOS version to 12.1.0
2021-07-31 20:12:35 -04:00
Morph
9143fe5d3a hle: api_version: Update HOS version to 12.1.0
Keeps us up to date with reporting the system version.
2021-07-31 14:39:49 -04:00
bunnei
47f13a9df4 Merge pull request #6752 from Morph1984/pt-br
service: ns, set: Add PT_BR (Brazilian Portuguese)
2021-07-30 14:42:11 -07:00
bunnei
2c7fdee7a7 Merge pull request #6775 from lat9nq/cmd-remove-global-core
emu_window: Remove global system instance
2021-07-30 13:28:24 -07:00
bunnei
7530594602 Merge pull request #6759 from ReinUsesLisp/pipeline-statistics
renderer_vulkan: Add setting to log pipeline statistics
2021-07-30 11:18:52 -07:00
bunnei
0334b9b776 Merge pull request #6770 from Morph1984/swkbd_buffer_size
applet_swkbd: Correct string buffer size calculation
2021-07-30 09:26:35 -07:00
lat9nq
335de3fdf5 emu_window: Remove global system instance
It was just the one in emu_window_sdl2, but since _gl and _vk inherit
from it, they all needed adjustments.

Leaves just the one auto system& in main().
2021-07-30 10:43:58 -04:00
Morph
ba3d230421 applet_swkbd: Correct string buffer size calculation
The buffer size here does not include the initial 8 bytes.
2021-07-30 02:19:04 -04:00
Morph
275db94bb8 configure_system: Add Brazilian Portuguese to the list of languages 2021-07-30 02:15:53 -04:00
Morph
6ca8ed9e58 service: set: Correct 4.0.0 max_entries to 0x40 (64) instead of 17 2021-07-30 02:15:53 -04:00
Morph
21ff0a3d6e service: ns, set: Add PT_BR (Brazilian Portuguese) 2021-07-30 02:15:53 -04:00
Morph
db07ca6c7f Merge pull request #6767 from ReinUsesLisp/fold-float-pack
shader: Fold UnpackFloat2x16 and PackFloat2x16
2021-07-30 02:07:52 -04:00
bunnei
a98f14e9b0 Merge pull request #6722 from ReinUsesLisp/xmad-opts
shader: Fold integer FMA from Nvidia's pattern
2021-07-29 18:45:37 -07:00
ReinUsesLisp
8c9febe8f7 shader: Fold UnpackFloat2x16 and PackFloat2x16
Simplifies the code a bit when possible. These instructions should be
no-ops codegen wise.
2021-07-29 21:22:52 -03:00
Ameer J
23b3333f72 Merge pull request #6751 from Morph1984/languagecode
service: ns: Map ZH_TW and ZH_CN to Traditional/Simplified Chinese
2021-07-29 14:41:03 -04:00
bunnei
5acf020389 Merge pull request #6742 from Morph1984/uuid
common: uuid: Return a lower-case hex string in Format
2021-07-29 09:37:15 -07:00
ReinUsesLisp
b185567a03 vk_rasterizer: Flip viewport on Y_NEGATE
Matches OpenGL's behavior. I don't believe this register flips geometry,
but we have to try to match behavior on both backends.
2021-07-29 02:17:53 -03:00
ameerj
7ac99bb127 renderers: Add explicit invert_y bool to screenshot callback
OpenGL and Vulkan images render in different coordinate systems. This allows us to specify the coordinate system of the screenshot within each renderer
2021-07-28 21:46:08 -04:00
ameerj
75e7f54fb0 renderer_vulkan: Implement screenshots 2021-07-28 21:45:55 -04:00
ameerj
548bac8989 vk_blit_screen: Add public CreateFramebuffer method 2021-07-28 21:43:02 -04:00
ameerj
1e6c5d323d vk_blit_screen: Make Draw method more generic
Allows specifying the framebuffer and render area dimensions, rather than being hard coded for the render window.
2021-07-28 21:37:30 -04:00
bunnei
124e3b4819 Merge pull request #6760 from ReinUsesLisp/fp16-collect
shader: Mark ConvertF16F32 and ConvertF32F16 as fp16 instructions
2021-07-28 14:55:06 -07:00
bunnei
f771d92e44 Merge pull request #6758 from jbeich/fastmem
host_memory: enable fastmem on FreeBSD
2021-07-28 13:01:54 -07:00
bunnei
d05e6003f0 Merge pull request #6700 from lat9nq/fullscreen-enum
general: Implement FullscreenMode enumeration
2021-07-28 11:36:42 -07:00
Morph
5593a3716e Merge pull request #6671 from jls47/master
applets/web: Addressing QT Navigation issues in Linux
2021-07-27 22:46:27 -04:00
Ameer J
d923ec5805 Merge pull request #6753 from jbeich/libusb
cmake: unbreak libusb detection on FreeBSD
2021-07-27 21:26:17 -04:00
ReinUsesLisp
1bb46b7d64 shader: Mark ConvertF16F32 and ConvertF32F16 as fp16 instructions
Fixes instances where fp16 types are not declared on SPIR-V but they are
used. This shouldn't happen on master, as it's been uncovered by an
additional optimization pass.
2021-07-27 21:33:05 -03:00
ReinUsesLisp
3b006f4fe2 renderer_vulkan: Add setting to log pipeline statistics
Use VK_KHR_pipeline_executable_properties when enabled and available to
log statistics about the pipeline cache in a game.

For example, this is on Turing GPUs when generating a pipeline cache
from Super Smash Bros. Ultimate:

Average pipeline statistics
==========================================
Code size:       6433.167
Register count:    32.939

More advanced results could be presented, at the moment it's just an
average of all 3D and compute pipelines.
2021-07-27 21:29:24 -03:00
bunnei
92887a65f0 Merge pull request #6749 from lioncash/rtarget
render_target: Add missing initializer for size extent
2021-07-27 17:28:53 -07:00
bunnei
6053f2e1fe Merge pull request #6730 from Morph1984/buf_to_stdstring
common: fs: fs_util: Add BufferToUTF8String
2021-07-27 15:50:54 -07:00
Jan Beich
353be2306c host_memory: Add workaround for FreeBSD 12
src/common/host_memory.cpp:360:14: error: use of undeclared identifier
      'memfd_create'
        fd = memfd_create("HostMemory", 0);
             ^
2021-07-27 20:15:23 +00:00
Jan Beich
c4cd82fa7c host_memory: Enable Linux implementation on FreeBSD
HW.Memory <Critical> common/host_memory.cpp:HostMemory:492: Fastmem unavailable, falling back to VirtualBuffer for memory allocation
2021-07-27 20:09:43 +00:00
Rodrigo Locatti
ab206d6378 Merge pull request #6748 from lioncash/engine-init
video_core/engine: Consistently initialize rasterizer pointers
2021-07-27 16:17:20 -03:00
Rodrigo Locatti
5da97c57cd Merge pull request #6744 from lioncash/exc
exception: Make constructors explicit
2021-07-27 16:11:17 -03:00
bunnei
2717e79c74 Merge pull request #6745 from lioncash/copies
video_core: Remove some unused variables
2021-07-27 11:38:32 -07:00
bunnei
e2c42ec5e2 Merge pull request #6747 from lioncash/wrapper
vulkan_wrapper: Fix SetObjectName() always indicating objects as images
2021-07-27 10:18:45 -07:00
jls47
ef29ed75b0 qt_web_browser: Fix lambda capture for HIDButton 2021-07-27 11:31:12 -04:00
jls47
3109d1c3db qt_web_browser: Focus on the first link element
Focusing on the first link element fixes element navigation upon loading the web applet in games such as Super Mario Odyssey
2021-07-27 11:31:11 -04:00
Jan Beich
a24224e274 cmake: don't use pkg-config directly with non-reference libusb
CMake Error at externals/libusb/CMakeLists.txt:120 (add_library):
  Cannot find source file:

    libusb/libusb/core.c

  Tried extensions .c .C .c++ .cc .cpp .cxx .cu .mpp .m .M .mm .h .hh .h++
  .hm .hpp .hxx .in .txx .f .F .for .f77 .f90 .f95 .f03 .ispc

CMake Error at externals/libusb/CMakeLists.txt:120 (add_library):
  No SOURCES given to target: usb

ld: error: undefined symbol: libusb_interrupt_transfer
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::SendVibrations()) in archive src/input_common/libinput_common.a
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::GetGCEndpoint(libusb_device*)) in archive src/input_common/libinput_common.a
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::AdapterInputThread()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_error_name
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::SendVibrations()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_control_transfer
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::CheckDeviceAccess()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_kernel_driver_active
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::CheckDeviceAccess()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_close
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::CheckDeviceAccess()) in archive src/input_common/libinput_common.a
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::ClearLibusbHandle()) in archive src/input_common/libinput_common.a
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::Reset()) in archive src/input_common/libinput_common.a
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::Setup()) in archive src/input_common/libinput_common.a
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::AdapterScanThread()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_detach_kernel_driver
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::CheckDeviceAccess()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_claim_interface
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::CheckDeviceAccess()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_get_config_descriptor
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::GetGCEndpoint(libusb_device*)) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_release_interface
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::ClearLibusbHandle()) in archive src/input_common/libinput_common.a
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::Reset()) in archive src/input_common/libinput_common.a
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::Setup()) in archive src/input_common/libinput_common.a
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::AdapterScanThread()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_init
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::Adapter()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_open_device_with_vid_pid
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::Setup()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_get_device
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::Setup()) in archive src/input_common/libinput_common.a

ld: error: undefined symbol: libusb_exit
>>> referenced by gc_adapter.cpp
>>>               gc_adapter.cpp.o:(GCAdapter::Adapter::Reset()) in archive src/input_common/libinput_common.a
2021-07-27 15:01:56 +00:00
Morph
a6359fe9ae service: ns: Remove unused ns_language header 2021-07-27 08:59:26 -04:00
Morph
d7cd316a6c service: ns: Map ZH_TW and ZH_CN to Traditional/Simplified Chinese 2021-07-27 08:54:41 -04:00
Lioncash
00e100de08 render_target: Add missing initializer for size extent
Everything else has a default constructor that does the straightforward
thing of initializing most members to a default value, except for the
size.

We explicitly initialize the size (and others, for consistency), to
prevent potential uninitialized reads from occurring. Particularly given
the largeish surface area that this struct is used in.
2021-07-27 07:41:21 -04:00
Lioncash
f8964dd89a video_core/engine: Consistently initialize rasterizer pointers
Ensures all of the engines have consistent and deterministic
initialization of the rasterizer pointers.
2021-07-27 07:30:57 -04:00
Lioncash
8c82c594f0 vulkan_wrapper: Fix SetObjectName() always indicating objects as images
We should be using the passed in object type instead.
2021-07-27 07:19:15 -04:00
Lioncash
ec56a17acd buffer_cache: Remove unused small_vector in CommitAsyncFlushesHigh()
Given this is non-trivial, the constructor is required to execute, so
this removes a bit of redundant codegen.
2021-07-27 06:24:44 -04:00
Lioncash
075a744e38 gl_shader_cache: Remove unused variable 2021-07-27 06:23:49 -04:00
Lioncash
296728ec46 vk_compute_pass: Remove unused captures
Resolves two compiler warnings.
2021-07-27 06:17:52 -04:00
Lioncash
c27ddb44de exception: Make constructors explicit
Ensures that exception construction is always explicit.
2021-07-27 04:15:14 -04:00
Lioncash
e490ddf327 exception: Make what() member function nodiscard 2021-07-27 04:14:32 -04:00
Lioncash
90f3678ada exception: Narrow down specific header
We can use the <exception> header instead of pulling in all of the
exception-style classes.
2021-07-27 04:09:18 -04:00
Morph
f5f04cce01 common: fs: fs_util: Add BufferToUTF8String
Allows for direct conversion to std::string without having to convert std::u8string to std::string
2021-07-27 00:02:22 -04:00
Morph
b9b48aee7d common: uuid: Return a lower-case hex string in Format 2021-07-26 23:54:59 -04:00
bunnei
d6c799494c Merge pull request #6696 from ameerj/speed-limit-rename
general: Rename "Frame Limit" references to "Speed Limit"
2021-07-26 18:51:00 -07:00
Rodrigo Locatti
7511f65e31 Merge pull request #6741 from ReinUsesLisp/stream-remove
vk_stream_buffer: Remove unused stream buffer
2021-07-26 20:35:01 -03:00
Rodrigo Locatti
1a94b3f452 Merge pull request #6740 from K0bin/hvv-fallback
Handle allocation failure in Staging buffer
2021-07-26 20:34:44 -03:00
Robin Kertels
75050c788c vk_staging_buffer_pool: Fall back to host memory when allocation fails 2021-07-26 23:37:18 +02:00
Rodrigo Locatti
c6991fa900 Merge pull request #6728 from ReinUsesLisp/null-buffer-usage
vk_buffer_cache: Add transform feedback usage to null buffer
2021-07-26 18:30:45 -03:00
Rodrigo Locatti
6c432d5349 Merge pull request #6729 from ReinUsesLisp/quad-indexed-barrier
vk_compute_pass: Fix pipeline barrier for indexed quads
2021-07-26 18:30:05 -03:00
ReinUsesLisp
5f9a4817a5 vk_stream_buffer: Remove unused stream buffer
Remove unused file.
2021-07-26 18:19:53 -03:00
Rodrigo Locatti
c0f99558fb Merge pull request #6724 from lioncash/nodisc-shader
shader_recompiler: Remove unnecessary [[nodiscard]] instances
2021-07-26 16:35:21 -03:00
Rodrigo Locatti
de0b89792c Merge pull request #6726 from lioncash/hguard
emit_spirv_instructions: Add missing header guard
2021-07-26 16:35:11 -03:00
Rodrigo Locatti
3d97f1e6cf Merge pull request #6727 from lioncash/topology
emit_glasm: Fix LINESS_ADJACENCY typo in InputPrimitive()
2021-07-26 16:35:03 -03:00
bunnei
7490117fa4 Merge pull request #6736 from CaptV0rt3x/patch-1
Config-graphics: reword GLASM option
2021-07-26 11:27:46 -07:00
Vamsi Krishna
c05bbf375d configure_graphics: reword GLASM option
Change wording to explain that GLASM is actually short for Assembly Shaders
2021-07-26 20:49:31 +05:30
Rodrigo Locatti
b2b3fcdccd Merge pull request #6723 from lioncash/shader
object_pool: Add missing return in Chunk move assignment operator
2021-07-26 06:01:21 -03:00
Rodrigo Locatti
4afc2de129 Merge pull request #6725 from lioncash/control-token
control_flow: Fix duplicate switch case in OpcodeToken
2021-07-26 06:00:23 -03:00
ReinUsesLisp
771dcb2a56 vk_compute_pass: Fix pipeline barrier for indexed quads
Use an index buffer barrier instead of a vertex input read barrier.
2021-07-26 05:51:09 -03:00
ReinUsesLisp
27ed6e7c2b vk_buffer_cache: Add transform feedback usage to null buffer
Fixes bad API usages on Vulkan.
2021-07-26 05:49:37 -03:00
Lioncash
3e7813e49d emit_glasm: Fix LINESS_ADJACENCY typo in InputPrimitive()
This should be LINES_ADJACENCY
2021-07-26 04:44:56 -04:00
Lioncash
06ca911621 shader_recompiler: Remove unnecessary [[nodiscard]] instances
[[nodiscard]] doesn't do anything on functions with a void return type
and causes superfluous warnings.
2021-07-26 04:23:59 -04:00
Lioncash
0b67df1f7c control_flow: Fix duplicate switch case in OpcodeToken
This previously duplicated the case of the PBK case above it.
2021-07-26 04:16:34 -04:00
Lioncash
89ad9df0e9 object_pool: Add missing return in Chunk move assignment operator
Prevents undefined behavior from occurring.
2021-07-26 04:01:05 -04:00
ReinUsesLisp
66a0cedba3 shader: Fold integer FMA from Nvidia's pattern
Fold shaders doing "a * b + c" on integers from the pattern generated by
Nvidia's GL compiler.

On a somewhat complex compute shader it reduces the code size by 16
instructions from 2 matches on Turing GPUs.

On Intel as extracted from KHR_pipeline_executable_properties:
Before the optimization:
```
Instruction Count: 2057
Basic Block Count: 45
Scratch Memory Size: 14752
Spill Count: 232
Fill Count: 261
SEND Count: 610
Cycle Count: 11325
```

After the optimization:
```
Instruction Count: 2046
Basic Block Count: 44
Scratch Memory Size: 13728
Spill Count: 219
Fill Count: 268
SEND Count: 604
Cycle Count: 11367
```
2021-07-26 04:58:02 -03:00
ReinUsesLisp
09fb41dc63 shader: Use TryInstRecursive on XMAD multiply folding
Simplify a bit the logic.
2021-07-26 04:15:27 -03:00
ReinUsesLisp
f6f0383b49 shader: Add TryInstRecursive utility to values 2021-07-26 01:31:05 -03:00
bunnei
c09557acd8 Merge pull request #6697 from ameerj/fps-cap
config, nvflinger: Add FPS cap setting
2021-07-25 16:23:44 -07:00
lat9nq
09d6cc9943 Merge branch 'master' into fullscreen-enum 2021-07-25 15:31:33 -04:00
ameerj
c80ae87b4e renderer_base: Removed redundant settings
use_framelimiter was not being used internally by the renderers.
set_background_color was always set to true as there is no toggle for the renderer background color, instead users directly choose the color of their choice.
2021-07-23 22:10:01 -04:00
ameerj
9dfbc9bdce general: Rename "Frame Limit" references to "Speed Limit"
This setting is best referred to as a speed limit, as it involves the limits of all timing based aspects of the emulator, not only framerate.
This allows us to differentiate it from the fps unlocker setting.
2021-07-23 22:10:01 -04:00
ameerj
2c6e274b39 config, nvflinger: Add FPS cap setting
Allows finer tuning of the FPS limit.
2021-07-23 22:04:36 -04:00
lat9nq
d8b00fd863 configuration: Use combobox apply template where possible
We don't need to manually apply this setting now that a template can do
this for us.
2021-07-23 10:18:07 -04:00
lat9nq
b11c81cc13 general: Implement FullscreenMode enumeration
Prevents us from using an unclear 0 or 1 to describe the fullscreen
mode.
2021-07-23 10:14:37 -04:00
lat9nq
ef70054367 cmake: Specify the compiler on autotools externals
Enables CCache on externals if available.
2021-07-06 12:54:24 -04:00
lat9nq
fbb26e6173 cmake, ci: Build bundled FFmpeg with yuzu
Drops usage of CMAKE_DEPENDENT_OPTION to allow using
YUZU_USE_BUNDLED_FFMPEG as an option on any platform. CI then now builds
FFmpeg always, netting about 10 MB less used on the AppImage.

Also somewhat fixes YUZU_USE_BUNDLED_QT so that it can be used even if
CMake doesn't clean up its state after running the first find_package.
2021-07-06 12:28:22 -04:00
141 changed files with 2574 additions and 2385 deletions

View File

@@ -18,7 +18,8 @@ cmake .. \
-DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON \
-DENABLE_QT_TRANSLATION=ON \
-DUSE_DISCORD_PRESENCE=ON \
-DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"}
-DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} \
-DYUZU_USE_BUNDLED_FFMPEG=ON
make -j$(nproc)

View File

@@ -25,7 +25,7 @@ option(YUZU_USE_BUNDLED_BOOST "Download bundled Boost" OFF)
option(YUZU_USE_BUNDLED_LIBUSB "Compile bundled libusb" OFF)
CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_FFMPEG "Download/Build bundled FFmpeg" ON "WIN32" OFF)
option(YUZU_USE_BUNDLED_FFMPEG "Download/Build bundled FFmpeg" "${WIN32}")
option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OFF)
@@ -376,7 +376,7 @@ if (ENABLE_SDL2)
if (YUZU_USE_BUNDLED_SDL2)
# Detect toolchain and platform
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
set(SDL2_VER "SDL2-2.0.15-prerelease")
set(SDL2_VER "SDL2-2.0.16")
else()
message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
endif()
@@ -396,7 +396,7 @@ if (ENABLE_SDL2)
elseif (YUZU_USE_EXTERNAL_SDL2)
message(STATUS "Using SDL2 from externals.")
else()
find_package(SDL2 2.0.15 REQUIRED)
find_package(SDL2 2.0.16 REQUIRED)
# Some installations don't set SDL2_LIBRARIES
if("${SDL2_LIBRARIES}" STREQUAL "")
@@ -496,7 +496,7 @@ endif()
# Ensure libusb is properly configured (based on dolphin libusb include)
if(NOT APPLE AND NOT YUZU_USE_BUNDLED_LIBUSB)
include(FindPkgConfig)
if (PKG_CONFIG_FOUND)
if (PKG_CONFIG_FOUND AND NOT CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD")
pkg_check_modules(LIBUSB QUIET libusb-1.0>=1.0.24)
else()
find_package(LibUSB)
@@ -583,8 +583,32 @@ if (YUZU_USE_BUNDLED_FFMPEG)
"${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}"
CACHE PATH "Path to FFmpeg headers" FORCE)
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
Include(FindPkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
endif()
if(LIBVA_FOUND)
pkg_check_modules(LIBDRM libdrm REQUIRED)
find_package(X11 REQUIRED)
pkg_check_modules(LIBVA-DRM libva-drm REQUIRED)
pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED)
set(FFmpeg_LIBVA_LIBRARIES
${LIBDRM_LIBRARIES}
${X11_LIBRARIES}
${LIBVA-DRM_LIBRARIES}
${LIBVA-X11_LIBRARIES}
${LIBVA_LIBRARIES})
set(FFmpeg_HWACCEL_FLAGS
--enable-hwaccel=h264_vaapi
--enable-hwaccel=vp9_vaapi
--enable-libdrm)
message(STATUS "VA-API found")
else()
set(FFmpeg_HWACCEL_FLAGS --disable-vaapi)
endif()
# `configure` parameters builds only exactly what yuzu needs from FFmpeg
# `--disable-{vaapi,vdpau}` is needed to avoid linking issues
# `--disable-vdpau` is needed to avoid linking issues
add_custom_command(
OUTPUT
${FFmpeg_MAKEFILE}
@@ -600,13 +624,16 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--disable-network
--disable-postproc
--disable-swresample
--disable-vaapi
--disable-vdpau
--enable-decoder=h264
--enable-decoder=vp9
--cc="${CMAKE_C_COMPILER}"
--cxx="${CMAKE_CXX_COMPILER}"
${FFmpeg_HWACCEL_FLAGS}
WORKING_DIRECTORY
${FFmpeg_BUILD_DIR}
)
unset(FFmpeg_HWACCEL_FLAGS)
# Workaround for Ubuntu 18.04's older version of make not being able to call make as a child
# with context of the jobserver. Also helps ninja users.
@@ -616,9 +643,10 @@ if (YUZU_USE_BUNDLED_FFMPEG)
OUTPUT_VARIABLE
SYSTEM_THREADS)
set(FFmpeg_BUILD_LIBRARIES ${FFmpeg_LIBRARIES})
add_custom_command(
OUTPUT
${FFmpeg_LIBRARIES}
${FFmpeg_BUILD_LIBRARIES}
COMMAND
make -j${SYSTEM_THREADS}
WORKING_DIRECTORY
@@ -628,7 +656,12 @@ if (YUZU_USE_BUNDLED_FFMPEG)
# ALL makes this custom target build every time
# but it won't actually build if the DEPENDS parameter is up to date
add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE})
add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_LIBRARIES} ffmpeg-configure)
add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure)
link_libraries(${FFmpeg_LIBVA_LIBRARIES})
set(FFmpeg_LIBRARIES ${FFmpeg_LIBVA_LIBRARIES} ${FFmpeg_BUILD_LIBRARIES}
CACHE PATH "Paths to FFmpeg libraries" FORCE)
unset(FFmpeg_BUILD_LIBRARIES)
unset(FFmpeg_LIBVA_LIBRARIES)
if (FFmpeg_FOUND)
message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}")

View File

@@ -51,11 +51,11 @@ QPushButton#GPUStatusBarButton:hover {
}
QPushButton#GPUStatusBarButton:checked {
color: #ff8040;
color: #b06020;
}
QPushButton#GPUStatusBarButton:!checked {
color: #40dd40;
color: #109010;
}
QPushButton#buttonRefreshDevices {

2
externals/SDL vendored

View File

@@ -67,6 +67,8 @@ if (MINGW OR (${CMAKE_SYSTEM_NAME} MATCHES "Linux") OR APPLE)
"${LIBUSB_MAKEFILE}"
COMMAND
env
CC="${CMAKE_C_COMPILER}"
CXX="${CMAKE_CXX_COMPILER}"
CFLAGS="${LIBUSB_CFLAGS}"
sh "${LIBUSB_CONFIGURE}"
${LIBUSB_CONFIGURE_ARGS}

View File

@@ -52,8 +52,12 @@ assert_noinline_call(const Fn& fn) {
#define DEBUG_ASSERT(_a_) ASSERT(_a_)
#define DEBUG_ASSERT_MSG(_a_, ...) ASSERT_MSG(_a_, __VA_ARGS__)
#else // not debug
#define DEBUG_ASSERT(_a_)
#define DEBUG_ASSERT_MSG(_a_, _desc_, ...)
#define DEBUG_ASSERT(_a_) \
do { \
} while (0)
#define DEBUG_ASSERT_MSG(_a_, _desc_, ...) \
do { \
} while (0)
#endif
#define UNIMPLEMENTED() ASSERT_MSG(false, "Unimplemented code!")

View File

@@ -20,6 +20,10 @@ std::string ToUTF8String(std::u8string_view u8_string) {
return std::string{u8_string.begin(), u8_string.end()};
}
std::string BufferToUTF8String(std::span<const u8> buffer) {
return std::string{buffer.begin(), std::ranges::find(buffer, u8{0})};
}
std::string PathToUTF8String(const std::filesystem::path& path) {
return ToUTF8String(path.u8string());
}

View File

@@ -46,6 +46,17 @@ concept IsChar = std::same_as<T, char>;
*/
[[nodiscard]] std::string ToUTF8String(std::u8string_view u8_string);
/**
* Converts a buffer of bytes to a UTF8-encoded std::string.
* This converts from the start of the buffer until the first encountered null-terminator.
* If no null-terminator is found, this converts the entire buffer instead.
*
* @param buffer Buffer of bytes
*
* @returns UTF-8 encoded std::string.
*/
[[nodiscard]] std::string BufferToUTF8String(std::span<const u8> buffer);
/**
* Converts a filesystem path to a UTF-8 encoded std::string.
*

View File

@@ -61,7 +61,7 @@ template <typename ContiguousContainer>
return out;
}
[[nodiscard]] constexpr std::array<u8, 16> AsArray(const char (&data)[17]) {
[[nodiscard]] constexpr std::array<u8, 16> AsArray(const char (&data)[33]) {
return HexStringToArray<16>(data);
}

View File

@@ -6,7 +6,7 @@
#include <windows.h>
#include "common/dynamic_library.h"
#elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv
#elif defined(__linux__) || defined(__FreeBSD__) // ^^^ Windows ^^^ vvv Linux vvv
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
@@ -343,7 +343,7 @@ private:
std::unordered_map<size_t, size_t> placeholder_host_pointers; ///< Placeholder backing offset
};
#elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv
#elif defined(__linux__) || defined(__FreeBSD__) // ^^^ Windows ^^^ vvv Linux vvv
class HostMemory::Impl {
public:
@@ -357,7 +357,12 @@ public:
});
// Backing memory initialization
#if defined(__FreeBSD__) && __FreeBSD__ < 13
// XXX Drop after FreeBSD 12.* reaches EOL on 2024-06-30
fd = shm_open(SHM_ANON, O_RDWR, 0600);
#else
fd = memfd_create("HostMemory", 0);
#endif
if (fd == -1) {
LOG_CRITICAL(HW_Memory, "memfd_create failed: {}", strerror(errno));
throw std::bad_alloc{};

View File

@@ -48,8 +48,8 @@ void LogSettings() {
log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue());
log_setting("Renderer_UseFrameLimit", values.use_frame_limit.GetValue());
log_setting("Renderer_FrameLimit", values.frame_limit.GetValue());
log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue());
log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue());
log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue());
log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
log_setting("Renderer_UseAsynchronousGpuEmulation",
@@ -132,8 +132,8 @@ void RestoreGlobalState(bool is_powered_on) {
values.vulkan_device.SetGlobal(true);
values.aspect_ratio.SetGlobal(true);
values.max_anisotropy.SetGlobal(true);
values.use_frame_limit.SetGlobal(true);
values.frame_limit.SetGlobal(true);
values.use_speed_limit.SetGlobal(true);
values.speed_limit.SetGlobal(true);
values.use_disk_shader_cache.SetGlobal(true);
values.gpu_accuracy.SetGlobal(true);
values.use_asynchronous_gpu_emulation.SetGlobal(true);

View File

@@ -42,6 +42,11 @@ enum class CPUAccuracy : u32 {
Unsafe = 2,
};
enum class FullscreenMode : u32 {
Borderless = 0,
Exclusive = 1,
};
/** The BasicSetting class is a simple resource manager. It defines a label and default value
* alongside the actual value of the setting for simpler and less-error prone use with frontend
* configurations. Setting a default value and label is required, though subclasses may deviate from
@@ -314,6 +319,7 @@ struct Values {
// Renderer
Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"};
BasicSetting<bool> renderer_debug{false, "debug"};
BasicSetting<bool> renderer_shader_feedback{false, "shader_feedback"};
BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
BasicSetting<bool> disable_shader_loop_safety_checks{false,
"disable_shader_loop_safety_checks"};
@@ -322,23 +328,24 @@ struct Values {
Setting<u16> resolution_factor{1, "resolution_factor"};
// *nix platforms may have issues with the borderless windowed fullscreen mode.
// Default to exclusive fullscreen on these platforms for now.
Setting<int> fullscreen_mode{
Setting<FullscreenMode> fullscreen_mode{
#ifdef _WIN32
0,
FullscreenMode::Borderless,
#else
1,
FullscreenMode::Exclusive,
#endif
"fullscreen_mode"};
Setting<int> aspect_ratio{0, "aspect_ratio"};
Setting<int> max_anisotropy{0, "max_anisotropy"};
Setting<bool> use_frame_limit{true, "use_frame_limit"};
Setting<u16> frame_limit{100, "frame_limit"};
Setting<bool> use_speed_limit{true, "use_speed_limit"};
Setting<u16> speed_limit{100, "speed_limit"};
Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"};
Setting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, "gpu_accuracy"};
Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
Setting<bool> accelerate_astc{true, "accelerate_astc"};
Setting<bool> use_vsync{true, "use_vsync"};
BasicSetting<u16> fps_cap{1000, "fps_cap"};
BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"};
Setting<ShaderBackend> shader_backend{ShaderBackend::GLASM, "shader_backend"};
Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};

View File

@@ -6,10 +6,64 @@
#include <fmt/format.h>
#include "common/assert.h"
#include "common/uuid.h"
namespace Common {
namespace {
bool IsHexDigit(char c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
u8 HexCharToByte(char c) {
if (c >= '0' && c <= '9') {
return static_cast<u8>(c - '0');
}
if (c >= 'a' && c <= 'f') {
return static_cast<u8>(c - 'a' + 10);
}
if (c >= 'A' && c <= 'F') {
return static_cast<u8>(c - 'A' + 10);
}
ASSERT_MSG(false, "{} is not a hexadecimal digit!", c);
return u8{0};
}
} // Anonymous namespace
u128 HexStringToU128(std::string_view hex_string) {
const size_t length = hex_string.length();
// Detect "0x" prefix.
const bool has_0x_prefix = length > 2 && hex_string[0] == '0' && hex_string[1] == 'x';
const size_t offset = has_0x_prefix ? 2 : 0;
// Check length.
if (length > 32 + offset) {
ASSERT_MSG(false, "hex_string has more than 32 hexadecimal characters!");
return INVALID_UUID;
}
u64 lo = 0;
u64 hi = 0;
for (size_t i = 0; i < length - offset; ++i) {
const char c = hex_string[length - 1 - i];
if (!IsHexDigit(c)) {
ASSERT_MSG(false, "{} is not a hexadecimal digit!", c);
return INVALID_UUID;
}
if (i < 16) {
lo |= u64{HexCharToByte(c)} << (i * 4);
}
if (i >= 16) {
hi |= u64{HexCharToByte(c)} << ((i - 16) * 4);
}
}
return u128{lo, hi};
}
UUID UUID::Generate() {
std::random_device device;
std::mt19937 gen(device());
@@ -18,7 +72,7 @@ UUID UUID::Generate() {
}
std::string UUID::Format() const {
return fmt::format("0x{:016X}{:016X}", uuid[1], uuid[0]);
return fmt::format("{:016x}{:016x}", uuid[1], uuid[0]);
}
std::string UUID::FormatSwitch() const {

View File

@@ -5,6 +5,7 @@
#pragma once
#include <string>
#include <string_view>
#include "common/common_types.h"
@@ -12,12 +13,30 @@ namespace Common {
constexpr u128 INVALID_UUID{{0, 0}};
/**
* Converts a hex string to a 128-bit unsigned integer.
*
* The hex string can be formatted in lowercase or uppercase, with or without the "0x" prefix.
*
* This function will assert and return INVALID_UUID under the following conditions:
* - If the hex string is more than 32 characters long
* - If the hex string contains non-hexadecimal characters
*
* @param hex_string Hexadecimal string
*
* @returns A 128-bit unsigned integer if successfully converted, INVALID_UUID otherwise.
*/
[[nodiscard]] u128 HexStringToU128(std::string_view hex_string);
struct UUID {
// UUIDs which are 0 are considered invalid!
u128 uuid;
UUID() = default;
constexpr explicit UUID(const u128& id) : uuid{id} {}
constexpr explicit UUID(const u64 lo, const u64 hi) : uuid{{lo, hi}} {}
explicit UUID(std::string_view hex_string) {
uuid = HexStringToU128(hex_string);
}
[[nodiscard]] constexpr explicit operator bool() const {
return uuid != INVALID_UUID;
@@ -50,3 +69,14 @@ struct UUID {
static_assert(sizeof(UUID) == 16, "UUID is an invalid size!");
} // namespace Common
namespace std {
template <>
struct hash<Common::UUID> {
size_t operator()(const Common::UUID& uuid) const noexcept {
return uuid.uuid[1] ^ uuid.uuid[0];
}
};
} // namespace std

View File

@@ -411,7 +411,7 @@ struct System::Impl {
std::string status_details = "";
std::unique_ptr<Core::PerfStats> perf_stats;
Core::FrameLimiter frame_limiter;
Core::SpeedLimiter speed_limiter;
bool is_multicore{};
bool is_async_gpu{};
@@ -606,12 +606,12 @@ const Core::PerfStats& System::GetPerfStats() const {
return *impl->perf_stats;
}
Core::FrameLimiter& System::FrameLimiter() {
return impl->frame_limiter;
Core::SpeedLimiter& System::SpeedLimiter() {
return impl->speed_limiter;
}
const Core::FrameLimiter& System::FrameLimiter() const {
return impl->frame_limiter;
const Core::SpeedLimiter& System::SpeedLimiter() const {
return impl->speed_limiter;
}
Loader::ResultStatus System::GetGameName(std::string& out) const {

View File

@@ -94,7 +94,7 @@ class ARM_Interface;
class CpuManager;
class DeviceMemory;
class ExclusiveMonitor;
class FrameLimiter;
class SpeedLimiter;
class PerfStats;
class Reporter;
class TelemetrySession;
@@ -292,11 +292,11 @@ public:
/// Provides a constant reference to the internal PerfStats instance.
[[nodiscard]] const Core::PerfStats& GetPerfStats() const;
/// Provides a reference to the frame limiter;
[[nodiscard]] Core::FrameLimiter& FrameLimiter();
/// Provides a reference to the speed limiter;
[[nodiscard]] Core::SpeedLimiter& SpeedLimiter();
/// Provides a constant referent to the frame limiter
[[nodiscard]] const Core::FrameLimiter& FrameLimiter() const;
/// Provides a constant reference to the speed limiter
[[nodiscard]] const Core::SpeedLimiter& SpeedLimiter() const;
/// Gets the name of the current game
[[nodiscard]] Loader::ResultStatus GetGameName(std::string& out) const;

View File

@@ -12,9 +12,9 @@ namespace HLE::ApiVersion {
// Horizon OS version constants.
constexpr u8 HOS_VERSION_MAJOR = 11;
constexpr u8 HOS_VERSION_MINOR = 0;
constexpr u8 HOS_VERSION_MICRO = 1;
constexpr u8 HOS_VERSION_MAJOR = 12;
constexpr u8 HOS_VERSION_MINOR = 1;
constexpr u8 HOS_VERSION_MICRO = 0;
// NintendoSDK version constants.
@@ -22,15 +22,15 @@ constexpr u8 SDK_REVISION_MAJOR = 1;
constexpr u8 SDK_REVISION_MINOR = 0;
constexpr char PLATFORM_STRING[] = "NX";
constexpr char VERSION_HASH[] = "69103fcb2004dace877094c2f8c29e6113be5dbf";
constexpr char DISPLAY_VERSION[] = "11.0.1";
constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 11.0.1-1.0";
constexpr char VERSION_HASH[] = "76b10c2dab7d3aa73fc162f8dff1655e6a21caf4";
constexpr char DISPLAY_VERSION[] = "12.1.0";
constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 12.1.0-1.0";
// Atmosphere version constants.
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 0;
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 19;
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 4;
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 5;
constexpr u32 GetTargetFirmware() {
return u32{HOS_VERSION_MAJOR} << 24 | u32{HOS_VERSION_MINOR} << 16 |

View File

@@ -292,7 +292,7 @@ public:
protected:
void Get(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_ACC, "called user_id={}", user_id.Format());
LOG_DEBUG(Service_ACC, "called user_id=0x{}", user_id.Format());
ProfileBase profile_base{};
ProfileData data{};
if (profile_manager.GetProfileBaseAndData(user_id, profile_base, data)) {
@@ -301,7 +301,7 @@ protected:
rb.Push(ResultSuccess);
rb.PushRaw(profile_base);
} else {
LOG_ERROR(Service_ACC, "Failed to get profile base and data for user={}",
LOG_ERROR(Service_ACC, "Failed to get profile base and data for user=0x{}",
user_id.Format());
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultUnknown); // TODO(ogniK): Get actual error code
@@ -309,14 +309,14 @@ protected:
}
void GetBase(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_ACC, "called user_id={}", user_id.Format());
LOG_DEBUG(Service_ACC, "called user_id=0x{}", user_id.Format());
ProfileBase profile_base{};
if (profile_manager.GetProfileBase(user_id, profile_base)) {
IPC::ResponseBuilder rb{ctx, 16};
rb.Push(ResultSuccess);
rb.PushRaw(profile_base);
} else {
LOG_ERROR(Service_ACC, "Failed to get profile base for user={}", user_id.Format());
LOG_ERROR(Service_ACC, "Failed to get profile base for user=0x{}", user_id.Format());
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultUnknown); // TODO(ogniK): Get actual error code
}
@@ -372,7 +372,7 @@ protected:
const auto user_data = ctx.ReadBuffer();
LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}",
LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid=0x{}",
Common::StringFromFixedZeroTerminatedBuffer(
reinterpret_cast<const char*>(base.username.data()), base.username.size()),
base.timestamp, base.user_uuid.Format());
@@ -405,7 +405,7 @@ protected:
const auto user_data = ctx.ReadBuffer();
const auto image_data = ctx.ReadBuffer(1);
LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}",
LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid=0x{}",
Common::StringFromFixedZeroTerminatedBuffer(
reinterpret_cast<const char*>(base.username.data()), base.username.size()),
base.timestamp, base.user_uuid.Format());
@@ -662,7 +662,7 @@ void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) {
void Module::Interface::GetUserExistence(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
Common::UUID user_id = rp.PopRaw<Common::UUID>();
LOG_DEBUG(Service_ACC, "called user_id={}", user_id.Format());
LOG_DEBUG(Service_ACC, "called user_id=0x{}", user_id.Format());
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
@@ -693,7 +693,7 @@ void Module::Interface::GetLastOpenedUser(Kernel::HLERequestContext& ctx) {
void Module::Interface::GetProfile(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
Common::UUID user_id = rp.PopRaw<Common::UUID>();
LOG_DEBUG(Service_ACC, "called user_id={}", user_id.Format());
LOG_DEBUG(Service_ACC, "called user_id=0x{}", user_id.Format());
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(ResultSuccess);
@@ -802,7 +802,7 @@ void Module::Interface::GetProfileEditor(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
Common::UUID user_id = rp.PopRaw<Common::UUID>();
LOG_DEBUG(Service_ACC, "called, user_id={}", user_id.Format());
LOG_DEBUG(Service_ACC, "called, user_id=0x{}", user_id.Format());
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(ResultSuccess);
@@ -844,7 +844,7 @@ void Module::Interface::StoreSaveDataThumbnailApplication(Kernel::HLERequestCont
IPC::RequestParser rp{ctx};
const auto uuid = rp.PopRaw<Common::UUID>();
LOG_WARNING(Service_ACC, "(STUBBED) called, uuid={}", uuid.Format());
LOG_WARNING(Service_ACC, "(STUBBED) called, uuid=0x{}", uuid.Format());
// TODO(ogniK): Check if application ID is zero on acc initialize. As we don't have a reliable
// way of confirming things like the TID, we're going to assume a non zero value for the time
@@ -858,7 +858,7 @@ void Module::Interface::StoreSaveDataThumbnailSystem(Kernel::HLERequestContext&
const auto uuid = rp.PopRaw<Common::UUID>();
const auto tid = rp.Pop<u64_le>();
LOG_WARNING(Service_ACC, "(STUBBED) called, uuid={}, tid={:016X}", uuid.Format(), tid);
LOG_WARNING(Service_ACC, "(STUBBED) called, uuid=0x{}, tid={:016X}", uuid.Format(), tid);
StoreSaveDataThumbnail(ctx, uuid, tid);
}

View File

@@ -377,7 +377,8 @@ void SoftwareKeyboard::SubmitForTextCheck(std::u16string submitted_text) {
if (swkbd_config_common.use_utf8) {
std::string utf8_submitted_text = Common::UTF16ToUTF8(current_text);
const u64 buffer_size = sizeof(u64) + utf8_submitted_text.size();
// Include the null terminator in the buffer size.
const u64 buffer_size = utf8_submitted_text.size() + 1;
LOG_DEBUG(Service_AM, "\nBuffer Size: {}\nUTF-8 Submitted Text: {}", buffer_size,
utf8_submitted_text);
@@ -386,7 +387,8 @@ void SoftwareKeyboard::SubmitForTextCheck(std::u16string submitted_text) {
std::memcpy(out_data.data() + sizeof(u64), utf8_submitted_text.data(),
utf8_submitted_text.size());
} else {
const u64 buffer_size = sizeof(u64) + current_text.size() * sizeof(char16_t);
// Include the null terminator in the buffer size.
const u64 buffer_size = (current_text.size() + 1) * sizeof(char16_t);
LOG_DEBUG(Service_AM, "\nBuffer Size: {}\nUTF-16 Submitted Text: {}", buffer_size,
Common::UTF16ToUTF8(current_text));

View File

@@ -158,7 +158,7 @@ private:
const auto local_play = rp.Pop<bool>();
const auto uuid = rp.PopRaw<Common::UUID>();
LOG_WARNING(Service_Friend, "(STUBBED) called local_play={} uuid={}", local_play,
LOG_WARNING(Service_Friend, "(STUBBED) called, local_play={}, uuid=0x{}", local_play,
uuid.Format());
IPC::ResponseBuilder rb{ctx, 2};
@@ -171,7 +171,7 @@ private:
const auto uuid = rp.PopRaw<Common::UUID>();
[[maybe_unused]] const auto filter = rp.PopRaw<SizedFriendFilter>();
const auto pid = rp.Pop<u64>();
LOG_WARNING(Service_Friend, "(STUBBED) called, offset={}, uuid={}, pid={}", friend_offset,
LOG_WARNING(Service_Friend, "(STUBBED) called, offset={}, uuid=0x{}, pid={}", friend_offset,
uuid.Format(), pid);
IPC::ResponseBuilder rb{ctx, 3};
@@ -289,7 +289,7 @@ void Module::Interface::CreateNotificationService(Kernel::HLERequestContext& ctx
IPC::RequestParser rp{ctx};
auto uuid = rp.PopRaw<Common::UUID>();
LOG_DEBUG(Service_Friend, "called, uuid={}", uuid.Format());
LOG_DEBUG(Service_Friend, "called, uuid=0x{}", uuid.Format());
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(ResultSuccess);

View File

@@ -339,13 +339,16 @@ std::optional<ApplicationLanguage> ConvertToApplicationLanguage(
case Set::LanguageCode::FR_CA:
return ApplicationLanguage::CanadianFrench;
case Set::LanguageCode::PT:
case Set::LanguageCode::PT_BR:
return ApplicationLanguage::Portuguese;
case Set::LanguageCode::RU:
return ApplicationLanguage::Russian;
case Set::LanguageCode::KO:
return ApplicationLanguage::Korean;
case Set::LanguageCode::ZH_TW:
case Set::LanguageCode::ZH_HANT:
return ApplicationLanguage::TraditionalChinese;
case Set::LanguageCode::ZH_CN:
case Set::LanguageCode::ZH_HANS:
return ApplicationLanguage::SimplifiedChinese;
default:

View File

@@ -1,42 +0,0 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <string>
#include "common/common_types.h"
#include "core/hle/service/set/set.h"
namespace Service::NS {
/// This is nn::ns::detail::ApplicationLanguage
enum class ApplicationLanguage : u8 {
AmericanEnglish = 0,
BritishEnglish,
Japanese,
French,
German,
LatinAmericanSpanish,
Spanish,
Italian,
Dutch,
CanadianFrench,
Portuguese,
Russian,
Korean,
TraditionalChinese,
SimplifiedChinese,
Count
};
using ApplicationLanguagePriorityList =
const std::array<ApplicationLanguage, static_cast<std::size_t>(ApplicationLanguage::Count)>;
constexpr u32 GetSupportedLanguageFlag(const ApplicationLanguage lang) {
return 1U << static_cast<u32>(lang);
}
const ApplicationLanguagePriorityList* GetApplicationLanguagePriorityList(ApplicationLanguage lang);
std::optional<ApplicationLanguage> ConvertToApplicationLanguage(
Service::Set::LanguageCode language_code);
std::optional<Service::Set::LanguageCode> ConvertToLanguageCode(ApplicationLanguage lang);
} // namespace Service::NS

View File

@@ -54,7 +54,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
system.GetPerfStats().EndSystemFrame();
system.GPU().SwapBuffers(&framebuffer);
system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
system.GetPerfStats().BeginSystemFrame();
}

View File

@@ -166,8 +166,6 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
} else {
cmd_buffer.map_address = object->dma_map_addr;
AddBufferMap(object->dma_map_addr, object->size, object->addr,
object->status == nvmap::Object::Status::Allocated);
}
}
std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
@@ -178,30 +176,11 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
}
NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
auto& gpu = system.GPU();
for (auto& cmd_buffer : cmd_buffer_handles) {
const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
if (!object) {
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
std::memcpy(output.data(), &params, output.size());
return NvResult::InvalidState;
}
if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) {
gpu.MemoryManager().Unmap(object->dma_map_addr, *size);
} else {
// This occurs quite frequently, however does not seem to impact functionality
LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr,
object->dma_map_addr);
}
object->dma_map_addr = 0;
}
// This is intntionally stubbed.
// Skip unmapping buffers here, as to not break the continuity of the VP9 reference frame
// addresses, and risk invalidating data before the async GPU thread is done with it
std::memset(output.data(), 0, output.size());
LOG_DEBUG(Service_NVDRV, "(STUBBED) called");
return NvResult::Success;
}
@@ -212,33 +191,4 @@ NvResult nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input,
return NvResult::Success;
}
std::optional<nvhost_nvdec_common::BufferMap> nvhost_nvdec_common::FindBufferMap(
GPUVAddr gpu_addr) const {
const auto it = std::find_if(
buffer_mappings.begin(), buffer_mappings.upper_bound(gpu_addr), [&](const auto& entry) {
return (gpu_addr >= entry.second.StartAddr() && gpu_addr < entry.second.EndAddr());
});
ASSERT(it != buffer_mappings.end());
return it->second;
}
void nvhost_nvdec_common::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
bool is_allocated) {
buffer_mappings.insert_or_assign(gpu_addr, BufferMap{gpu_addr, size, cpu_addr, is_allocated});
}
std::optional<std::size_t> nvhost_nvdec_common::RemoveBufferMap(GPUVAddr gpu_addr) {
const auto iter{buffer_mappings.find(gpu_addr)};
if (iter == buffer_mappings.end()) {
return std::nullopt;
}
std::size_t size = 0;
if (iter->second.IsAllocated()) {
size = iter->second.Size();
}
buffer_mappings.erase(iter);
return size;
}
} // namespace Service::Nvidia::Devices

View File

@@ -23,45 +23,6 @@ public:
~nvhost_nvdec_common() override;
protected:
class BufferMap final {
public:
constexpr BufferMap() = default;
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_)
: start_addr{start_addr_}, end_addr{start_addr_ + size_} {}
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_,
bool is_allocated_)
: start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_},
is_allocated{is_allocated_} {}
constexpr VAddr StartAddr() const {
return start_addr;
}
constexpr VAddr EndAddr() const {
return end_addr;
}
constexpr std::size_t Size() const {
return end_addr - start_addr;
}
constexpr VAddr CpuAddr() const {
return cpu_addr;
}
constexpr bool IsAllocated() const {
return is_allocated;
}
private:
GPUVAddr start_addr{};
GPUVAddr end_addr{};
VAddr cpu_addr{};
bool is_allocated{};
};
struct IoctlSetNvmapFD {
s32_le nvmap_fd{};
};
@@ -154,17 +115,11 @@ protected:
NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
s32_le nvmap_fd{};
u32_le submit_timeout{};
std::shared_ptr<nvmap> nvmap_dev;
SyncpointManager& syncpoint_manager;
std::array<u32, MaxSyncPoints> device_syncpoints{};
// This is expected to be ordered, therefore we must use a map, not unordered_map
std::map<GPUVAddr, BufferMap> buffer_mappings;
};
}; // namespace Devices
} // namespace Service::Nvidia

View File

@@ -307,11 +307,12 @@ void NVFlinger::Compose() {
}
s64 NVFlinger::GetNextTicks() const {
if (Settings::values.disable_fps_limit.GetValue()) {
return 0;
}
constexpr s64 max_hertz = 120LL;
return (1000000000 * (1LL << swap_interval)) / max_hertz;
static constexpr s64 max_hertz = 120LL;
const auto& settings = Settings::values;
const bool unlocked_fps = settings.disable_fps_limit.GetValue();
const s64 fps_cap = unlocked_fps ? static_cast<s64>(settings.fps_cap.GetValue()) : 1;
return (1000000000 * (1LL << swap_interval)) / (max_hertz * fps_cap);
}
} // namespace Service::NVFlinger

View File

@@ -12,7 +12,7 @@
namespace Service::Set {
namespace {
constexpr std::array<LanguageCode, 17> available_language_codes = {{
constexpr std::array<LanguageCode, 18> available_language_codes = {{
LanguageCode::JA,
LanguageCode::EN_US,
LanguageCode::FR,
@@ -30,6 +30,7 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{
LanguageCode::ES_419,
LanguageCode::ZH_HANS,
LanguageCode::ZH_HANT,
LanguageCode::PT_BR,
}};
enum class KeyboardLayout : u64 {
@@ -50,7 +51,7 @@ enum class KeyboardLayout : u64 {
ChineseTraditional = 14,
};
constexpr std::array<std::pair<LanguageCode, KeyboardLayout>, 17> language_to_layout{{
constexpr std::array<std::pair<LanguageCode, KeyboardLayout>, 18> language_to_layout{{
{LanguageCode::JA, KeyboardLayout::Japanese},
{LanguageCode::EN_US, KeyboardLayout::EnglishUs},
{LanguageCode::FR, KeyboardLayout::French},
@@ -68,10 +69,11 @@ constexpr std::array<std::pair<LanguageCode, KeyboardLayout>, 17> language_to_la
{LanguageCode::ES_419, KeyboardLayout::SpanishLatin},
{LanguageCode::ZH_HANS, KeyboardLayout::ChineseSimplified},
{LanguageCode::ZH_HANT, KeyboardLayout::ChineseTraditional},
{LanguageCode::PT_BR, KeyboardLayout::Portuguese},
}};
constexpr std::size_t pre4_0_0_max_entries = 15;
constexpr std::size_t post4_0_0_max_entries = 17;
constexpr std::size_t PRE_4_0_0_MAX_ENTRIES = 0xF;
constexpr std::size_t POST_4_0_0_MAX_ENTRIES = 0x40;
constexpr ResultCode ERR_INVALID_LANGUAGE{ErrorModule::Settings, 625};
@@ -81,9 +83,10 @@ void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_la
rb.Push(static_cast<u32>(num_language_codes));
}
void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_size) {
void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_entries) {
const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode);
const std::size_t copy_amount = std::min(requested_amount, max_size);
const std::size_t max_amount = std::min(requested_amount, max_entries);
const std::size_t copy_amount = std::min(available_language_codes.size(), max_amount);
const std::size_t copy_size = copy_amount * sizeof(LanguageCode);
ctx.WriteBuffer(available_language_codes.data(), copy_size);
@@ -118,7 +121,7 @@ LanguageCode GetLanguageCodeFromIndex(std::size_t index) {
void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
GetAvailableLanguageCodesImpl(ctx, pre4_0_0_max_entries);
GetAvailableLanguageCodesImpl(ctx, PRE_4_0_0_MAX_ENTRIES);
}
void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
@@ -140,19 +143,19 @@ void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
void SET::GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
GetAvailableLanguageCodesImpl(ctx, post4_0_0_max_entries);
GetAvailableLanguageCodesImpl(ctx, POST_4_0_0_MAX_ENTRIES);
}
void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
PushResponseLanguageCode(ctx, pre4_0_0_max_entries);
PushResponseLanguageCode(ctx, PRE_4_0_0_MAX_ENTRIES);
}
void SET::GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
PushResponseLanguageCode(ctx, post4_0_0_max_entries);
PushResponseLanguageCode(ctx, POST_4_0_0_MAX_ENTRIES);
}
void SET::GetQuestFlag(Kernel::HLERequestContext& ctx) {

View File

@@ -31,6 +31,7 @@ enum class LanguageCode : u64 {
ES_419 = 0x00003931342D7365,
ZH_HANS = 0x00736E61482D687A,
ZH_HANT = 0x00746E61482D687A,
PT_BR = 0x00000052422D7470,
};
LanguageCode GetLanguageCodeFromIndex(std::size_t idx);

View File

@@ -4,8 +4,6 @@
#include <algorithm>
#include <cstring>
#include <optional>
#include <utility>
#include "common/assert.h"
#include "common/atomic_ops.h"
@@ -14,12 +12,10 @@
#include "common/page_table.h"
#include "common/settings.h"
#include "common/swap.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/device_memory.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/physical_memory.h"
#include "core/memory.h"
#include "video_core/gpu.h"
@@ -62,17 +58,7 @@ struct Memory::Impl {
}
}
bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const {
const auto& page_table = process.PageTable().PageTableImpl();
const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType();
return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
}
bool IsValidVirtualAddress(VAddr vaddr) const {
return IsValidVirtualAddress(*system.CurrentProcess(), vaddr);
}
u8* GetPointerFromRasterizerCachedMemory(VAddr vaddr) const {
[[nodiscard]] u8* GetPointerFromRasterizerCachedMemory(VAddr vaddr) const {
const PAddr paddr{current_page_table->backing_addr[vaddr >> PAGE_BITS]};
if (!paddr) {
@@ -82,18 +68,6 @@ struct Memory::Impl {
return system.DeviceMemory().GetPointer(paddr) + vaddr;
}
u8* GetPointer(const VAddr vaddr) const {
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
return pointer + vaddr;
}
const auto type = Common::PageTable::PageInfo::ExtractType(raw_pointer);
if (type == Common::PageType::RasterizerCachedMemory) {
return GetPointerFromRasterizerCachedMemory(vaddr);
}
return nullptr;
}
u8 Read8(const VAddr addr) {
return Read<u8>(addr);
}
@@ -179,7 +153,7 @@ struct Memory::Impl {
std::string string;
string.reserve(max_length);
for (std::size_t i = 0; i < max_length; ++i) {
const char c = Read8(vaddr);
const char c = Read<s8>(vaddr);
if (c == '\0') {
break;
}
@@ -190,15 +164,14 @@ struct Memory::Impl {
return string;
}
void ReadBlock(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
void WalkBlock(const Kernel::KProcess& process, const VAddr addr, const std::size_t size,
auto on_unmapped, auto on_memory, auto on_rasterizer, auto increment) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = src_addr >> PAGE_BITS;
std::size_t page_offset = src_addr & PAGE_MASK;
std::size_t page_index = addr >> PAGE_BITS;
std::size_t page_offset = addr & PAGE_MASK;
while (remaining_size > 0) {
while (remaining_size) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
@@ -206,22 +179,18 @@ struct Memory::Impl {
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
std::memset(dest_buffer, 0, copy_amount);
on_unmapped(copy_amount, current_vaddr);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_buffer, src_ptr, copy_amount);
u8* mem_ptr = pointer + page_offset + (page_index << PAGE_BITS);
on_memory(copy_amount, mem_ptr);
break;
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
system.GPU().FlushRegion(current_vaddr, copy_amount);
std::memcpy(dest_buffer, host_ptr, copy_amount);
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
on_rasterizer(current_vaddr, copy_amount, host_ptr);
break;
}
default:
@@ -230,248 +199,122 @@ struct Memory::Impl {
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
increment(copy_amount);
remaining_size -= copy_amount;
}
}
void ReadBlockUnsafe(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = src_addr >> PAGE_BITS;
std::size_t page_offset = src_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
template <bool UNSAFE>
void ReadBlockImpl(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
WalkBlock(
process, src_addr, size,
[src_addr, size, &dest_buffer](const std::size_t copy_amount,
const VAddr current_vaddr) {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
std::memset(dest_buffer, 0, copy_amount);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
},
[&dest_buffer](const std::size_t copy_amount, const u8* const src_ptr) {
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
},
[&system = system, &dest_buffer](const VAddr current_vaddr,
const std::size_t copy_amount,
const u8* const host_ptr) {
if constexpr (!UNSAFE) {
system.GPU().FlushRegion(current_vaddr, copy_amount);
}
std::memcpy(dest_buffer, host_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
remaining_size -= copy_amount;
}
},
[&dest_buffer](const std::size_t copy_amount) {
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
});
}
void ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
ReadBlockImpl<false>(*system.CurrentProcess(), src_addr, dest_buffer, size);
}
void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
ReadBlockImpl<true>(*system.CurrentProcess(), src_addr, dest_buffer, size);
}
void WriteBlock(const Kernel::KProcess& process, const VAddr dest_addr, const void* src_buffer,
const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = dest_addr >> PAGE_BITS;
std::size_t page_offset = dest_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
template <bool UNSAFE>
void WriteBlockImpl(const Kernel::KProcess& process, const VAddr dest_addr,
const void* src_buffer, const std::size_t size) {
WalkBlock(
process, dest_addr, size,
[dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
LOG_ERROR(HW_Memory,
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
},
[&src_buffer](const std::size_t copy_amount, u8* const dest_ptr) {
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
},
[&system = system, &src_buffer](const VAddr current_vaddr,
const std::size_t copy_amount, u8* const host_ptr) {
if constexpr (!UNSAFE) {
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
}
std::memcpy(host_ptr, src_buffer, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void WriteBlockUnsafe(const Kernel::KProcess& process, const VAddr dest_addr,
const void* src_buffer, const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = dest_addr >> PAGE_BITS;
std::size_t page_offset = dest_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
std::memcpy(host_ptr, src_buffer, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
remaining_size -= copy_amount;
}
},
[&src_buffer](const std::size_t copy_amount) {
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
});
}
void WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
WriteBlockImpl<false>(*system.CurrentProcess(), dest_addr, src_buffer, size);
}
void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
WriteBlockImpl<true>(*system.CurrentProcess(), dest_addr, src_buffer, size);
}
void ZeroBlock(const Kernel::KProcess& process, const VAddr dest_addr, const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = dest_addr >> PAGE_BITS;
std::size_t page_offset = dest_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
WalkBlock(
process, dest_addr, size,
[dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
LOG_ERROR(HW_Memory,
"Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
},
[](const std::size_t copy_amount, u8* const dest_ptr) {
std::memset(dest_ptr, 0, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
},
[&system = system](const VAddr current_vaddr, const std::size_t copy_amount,
u8* const host_ptr) {
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
std::memset(host_ptr, 0, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
remaining_size -= copy_amount;
}
}
void ZeroBlock(const VAddr dest_addr, const std::size_t size) {
ZeroBlock(*system.CurrentProcess(), dest_addr, size);
},
[](const std::size_t copy_amount) {});
}
void CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr,
const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = src_addr >> PAGE_BITS;
std::size_t page_offset = src_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
WalkBlock(
process, dest_addr, size,
[this, &process, &dest_addr, &src_addr, size](const std::size_t copy_amount,
const VAddr current_vaddr) {
LOG_ERROR(HW_Memory,
"Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
ZeroBlock(process, dest_addr, copy_amount);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
const u8* src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
WriteBlock(process, dest_addr, src_ptr, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
},
[this, &process, &dest_addr](const std::size_t copy_amount, const u8* const src_ptr) {
WriteBlockImpl<false>(process, dest_addr, src_ptr, copy_amount);
},
[this, &system = system, &process, &dest_addr](
const VAddr current_vaddr, const std::size_t copy_amount, u8* const host_ptr) {
system.GPU().FlushRegion(current_vaddr, copy_amount);
WriteBlock(process, dest_addr, host_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_addr += static_cast<VAddr>(copy_amount);
src_addr += static_cast<VAddr>(copy_amount);
remaining_size -= copy_amount;
}
}
void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) {
return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size);
WriteBlockImpl<false>(process, dest_addr, host_ptr, copy_amount);
},
[&dest_addr, &src_addr](const std::size_t copy_amount) {
dest_addr += static_cast<VAddr>(copy_amount);
src_addr += static_cast<VAddr>(copy_amount);
});
}
void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
@@ -514,7 +357,7 @@ struct Memory::Impl {
} else {
// Switch page type to uncached if now uncached
switch (page_type) {
case Common::PageType::Unmapped:
case Common::PageType::Unmapped: // NOLINT(bugprone-branch-clone)
// It is not necessary for a process to have this region mapped into its address
// space, for example, a system module need not have a VRAM mapping.
break;
@@ -597,6 +440,44 @@ struct Memory::Impl {
}
}
[[nodiscard]] u8* GetPointerImpl(VAddr vaddr, auto on_unmapped, auto on_rasterizer) const {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
on_unmapped();
return nullptr;
}
// Avoid adding any extra logic to this fast-path block
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
return &pointer[vaddr];
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
on_unmapped();
return nullptr;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr);
return nullptr;
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
on_rasterizer();
return host_ptr;
}
default:
UNREACHABLE();
}
return nullptr;
}
[[nodiscard]] u8* GetPointer(const VAddr vaddr) const {
return GetPointerImpl(
vaddr, [vaddr]() { LOG_ERROR(HW_Memory, "Unmapped GetPointer @ 0x{:016X}", vaddr); },
[]() {});
}
/**
* Reads a particular data type out of memory at the given virtual address.
*
@@ -610,39 +491,17 @@ struct Memory::Impl {
*/
template <typename T>
T Read(VAddr vaddr) {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
return 0;
T result = 0;
const u8* const ptr = GetPointerImpl(
vaddr,
[vaddr]() {
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, vaddr);
},
[&system = system, vaddr]() { system.GPU().FlushRegion(vaddr, sizeof(T)); });
if (ptr) {
std::memcpy(&result, ptr, sizeof(T));
}
// Avoid adding any extra logic to this fast-path block
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (const u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
T value;
std::memcpy(&value, &pointer[vaddr], sizeof(T));
return value;
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
return 0;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
system.GPU().FlushRegion(vaddr, sizeof(T));
T value;
std::memcpy(&value, host_ptr, sizeof(T));
return value;
}
default:
UNREACHABLE();
}
return {};
return result;
}
/**
@@ -656,110 +515,46 @@ struct Memory::Impl {
*/
template <typename T>
void Write(VAddr vaddr, const T data) {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
return;
}
// Avoid adding any extra logic to this fast-path block
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
std::memcpy(&pointer[vaddr], &data, sizeof(T));
return;
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
return;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
system.GPU().InvalidateRegion(vaddr, sizeof(T));
std::memcpy(host_ptr, &data, sizeof(T));
break;
}
default:
UNREACHABLE();
u8* const ptr = GetPointerImpl(
vaddr,
[vaddr, data]() {
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
vaddr, static_cast<u64>(data));
},
[&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); });
if (ptr) {
std::memcpy(ptr, &data, sizeof(T));
}
}
template <typename T>
bool WriteExclusive(VAddr vaddr, const T data, const T expected) {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
return true;
}
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
// NOTE: Avoid adding any extra logic to this fast-path block
const auto volatile_pointer = reinterpret_cast<volatile T*>(&pointer[vaddr]);
u8* const ptr = GetPointerImpl(
vaddr,
[vaddr, data]() {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
sizeof(T) * 8, vaddr, static_cast<u64>(data));
},
[&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
return true;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case Common::PageType::RasterizerCachedMemory: {
u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
system.GPU().InvalidateRegion(vaddr, sizeof(T));
auto* pointer = reinterpret_cast<volatile T*>(&host_ptr);
return Common::AtomicCompareAndSwap(pointer, data, expected);
}
default:
UNREACHABLE();
}
return true;
}
bool WriteExclusive128(VAddr vaddr, const u128 data, const u128 expected) {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data[0]), vaddr);
return true;
}
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
// NOTE: Avoid adding any extra logic to this fast-path block
const auto volatile_pointer = reinterpret_cast<volatile u64*>(&pointer[vaddr]);
u8* const ptr = GetPointerImpl(
vaddr,
[vaddr, data]() {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
vaddr, static_cast<u64>(data[1]), static_cast<u64>(data[0]));
},
[&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(u128)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8,
static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr);
return true;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case Common::PageType::RasterizerCachedMemory: {
u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
system.GPU().InvalidateRegion(vaddr, sizeof(u128));
auto* pointer = reinterpret_cast<volatile u64*>(&host_ptr);
return Common::AtomicCompareAndSwap(pointer, data, expected);
}
default:
UNREACHABLE();
}
return true;
}
@@ -789,12 +584,11 @@ void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
impl->UnmapRegion(page_table, base, size);
}
bool Memory::IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const {
return impl->IsValidVirtualAddress(process, vaddr);
}
bool Memory::IsValidVirtualAddress(const VAddr vaddr) const {
return impl->IsValidVirtualAddress(vaddr);
const Kernel::KProcess& process = *system.CurrentProcess();
const auto& page_table = process.PageTable().PageTableImpl();
const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType();
return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
}
u8* Memory::GetPointer(VAddr vaddr) {
@@ -863,64 +657,38 @@ std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) {
void Memory::ReadBlock(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
impl->ReadBlock(process, src_addr, dest_buffer, size);
impl->ReadBlockImpl<false>(process, src_addr, dest_buffer, size);
}
void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
impl->ReadBlock(src_addr, dest_buffer, size);
}
void Memory::ReadBlockUnsafe(const Kernel::KProcess& process, const VAddr src_addr,
void* dest_buffer, const std::size_t size) {
impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
}
void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
}
void Memory::WriteBlock(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
std::size_t size) {
impl->WriteBlock(process, dest_addr, src_buffer, size);
impl->WriteBlockImpl<false>(process, dest_addr, src_buffer, size);
}
void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
impl->WriteBlock(dest_addr, src_buffer, size);
}
void Memory::WriteBlockUnsafe(const Kernel::KProcess& process, VAddr dest_addr,
const void* src_buffer, std::size_t size) {
impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
}
void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
const std::size_t size) {
impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
}
void Memory::ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size) {
impl->ZeroBlock(process, dest_addr, size);
}
void Memory::ZeroBlock(VAddr dest_addr, std::size_t size) {
impl->ZeroBlock(dest_addr, size);
}
void Memory::CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr,
const std::size_t size) {
impl->CopyBlock(process, dest_addr, src_addr, size);
}
void Memory::CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) {
impl->CopyBlock(dest_addr, src_addr, size);
}
void Memory::RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
impl->RasterizerMarkRegionCached(vaddr, size, cached);
}
bool IsKernelVirtualAddress(const VAddr vaddr) {
return KERNEL_REGION_VADDR <= vaddr && vaddr < KERNEL_REGION_END;
}
} // namespace Core::Memory

View File

@@ -39,11 +39,6 @@ enum : VAddr {
/// Application stack
DEFAULT_STACK_SIZE = 0x100000,
/// Kernel Virtual Address Range
KERNEL_REGION_VADDR = 0xFFFFFF8000000000,
KERNEL_REGION_SIZE = 0x7FFFE00000,
KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
};
/// Central class that handles all memory operations and state.
@@ -56,7 +51,7 @@ public:
Memory& operator=(const Memory&) = delete;
Memory(Memory&&) = default;
Memory& operator=(Memory&&) = default;
Memory& operator=(Memory&&) = delete;
/**
* Resets the state of the Memory system.
@@ -90,17 +85,6 @@ public:
*/
void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
/**
* Checks whether or not the supplied address is a valid virtual
* address for the given process.
*
* @param process The emulated process to check the address against.
* @param vaddr The virtual address to check the validity of.
*
* @returns True if the given virtual address is valid, false otherwise.
*/
bool IsValidVirtualAddress(const Kernel::KProcess& process, VAddr vaddr) const;
/**
* Checks whether or not the supplied address is a valid virtual
* address for the current process.
@@ -109,7 +93,7 @@ public:
*
* @returns True if the given virtual address is valid, false otherwise.
*/
bool IsValidVirtualAddress(VAddr vaddr) const;
[[nodiscard]] bool IsValidVirtualAddress(VAddr vaddr) const;
/**
* Gets a pointer to the given address.
@@ -134,7 +118,7 @@ public:
* @returns The pointer to the given address, if the address is valid.
* If the address is not valid, nullptr will be returned.
*/
const u8* GetPointer(VAddr vaddr) const;
[[nodiscard]] const u8* GetPointer(VAddr vaddr) const;
template <typename T>
const T* GetPointer(VAddr vaddr) const {
@@ -327,27 +311,6 @@ public:
void ReadBlock(const Kernel::KProcess& process, VAddr src_addr, void* dest_buffer,
std::size_t size);
/**
* Reads a contiguous block of bytes from a specified process' address space.
* This unsafe version does not trigger GPU flushing.
*
* @param process The process to read the data from.
* @param src_addr The virtual address to begin reading from.
* @param dest_buffer The buffer to place the read bytes into.
* @param size The amount of data to read, in bytes.
*
* @note If a size of 0 is specified, then this function reads nothing and
* no attempts to access memory are made at all.
*
* @pre dest_buffer must be at least size bytes in length, otherwise a
* buffer overrun will occur.
*
* @post The range [dest_buffer, size) contains the read bytes from the
* process' address space.
*/
void ReadBlockUnsafe(const Kernel::KProcess& process, VAddr src_addr, void* dest_buffer,
std::size_t size);
/**
* Reads a contiguous block of bytes from the current process' address space.
*
@@ -408,26 +371,6 @@ public:
void WriteBlock(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
std::size_t size);
/**
* Writes a range of bytes into a given process' address space at the specified
* virtual address.
* This unsafe version does not invalidate GPU Memory.
*
* @param process The process to write data into the address space of.
* @param dest_addr The destination virtual address to begin writing the data at.
* @param src_buffer The data to write into the process' address space.
* @param size The size of the data to write, in bytes.
*
* @post The address range [dest_addr, size) in the process' address space
* contains the data that was within src_buffer.
*
* @post If an attempt is made to write into an unmapped region of memory, the writes
* will be ignored and an error will be logged.
*
*/
void WriteBlockUnsafe(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
std::size_t size);
/**
* Writes a range of bytes into the current process' address space at the specified
* virtual address.
@@ -467,29 +410,6 @@ public:
*/
void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);
/**
* Fills the specified address range within a process' address space with zeroes.
*
* @param process The process that will have a portion of its memory zeroed out.
* @param dest_addr The starting virtual address of the range to zero out.
* @param size The size of the address range to zero out, in bytes.
*
* @post The range [dest_addr, size) within the process' address space is
* filled with zeroes.
*/
void ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size);
/**
* Fills the specified address range within the current process' address space with zeroes.
*
* @param dest_addr The starting virtual address of the range to zero out.
* @param size The size of the address range to zero out, in bytes.
*
* @post The range [dest_addr, size) within the current process' address space is
* filled with zeroes.
*/
void ZeroBlock(VAddr dest_addr, std::size_t size);
/**
* Copies data within a process' address space to another location within the
* same address space.
@@ -505,19 +425,6 @@ public:
void CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr,
std::size_t size);
/**
* Copies data within the current process' address space to another location within the
* same address space.
*
* @param dest_addr The destination virtual address to begin copying the data into.
* @param src_addr The source virtual address to begin copying the data from.
* @param size The size of the data to copy, in bytes.
*
* @post The range [dest_addr, size) within the current process' address space
* contains the same data within the range [src_addr, size).
*/
void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
/**
* Marks each page within the specified address range as cached or uncached.
*
@@ -535,7 +442,4 @@ private:
std::unique_ptr<Impl> impl;
};
/// Determines if the given VAddr is a kernel address
bool IsKernelVirtualAddress(VAddr vaddr);
} // namespace Core::Memory

View File

@@ -570,7 +570,7 @@ std::pair<s32, Errno> Socket::SendTo(u32 flags, const std::vector<u8>& message,
ASSERT(flags == 0);
const sockaddr* to = nullptr;
const int tolen = addr ? 0 : sizeof(sockaddr);
const int tolen = addr ? sizeof(sockaddr) : 0;
sockaddr host_addr_in;
if (addr) {

View File

@@ -127,15 +127,15 @@ double PerfStats::GetLastFrameTimeScale() const {
return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH;
}
void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) {
if (!Settings::values.use_frame_limit.GetValue() ||
void SpeedLimiter::DoSpeedLimiting(microseconds current_system_time_us) {
if (!Settings::values.use_speed_limit.GetValue() ||
Settings::values.use_multi_core.GetValue()) {
return;
}
auto now = Clock::now();
const double sleep_scale = Settings::values.frame_limit.GetValue() / 100.0;
const double sleep_scale = Settings::values.speed_limit.GetValue() / 100.0;
// Max lag caused by slow frames. Shouldn't be more than the length of a frame at the current
// speed percent or it will clamp too much and prevent this from properly limiting to that
@@ -143,17 +143,17 @@ void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) {
// limiting
const microseconds max_lag_time_us = duration_cast<microseconds>(
std::chrono::duration<double, std::chrono::microseconds::period>(25ms / sleep_scale));
frame_limiting_delta_err += duration_cast<microseconds>(
speed_limiting_delta_err += duration_cast<microseconds>(
std::chrono::duration<double, std::chrono::microseconds::period>(
(current_system_time_us - previous_system_time_us) / sleep_scale));
frame_limiting_delta_err -= duration_cast<microseconds>(now - previous_walltime);
frame_limiting_delta_err =
std::clamp(frame_limiting_delta_err, -max_lag_time_us, max_lag_time_us);
speed_limiting_delta_err -= duration_cast<microseconds>(now - previous_walltime);
speed_limiting_delta_err =
std::clamp(speed_limiting_delta_err, -max_lag_time_us, max_lag_time_us);
if (frame_limiting_delta_err > microseconds::zero()) {
std::this_thread::sleep_for(frame_limiting_delta_err);
if (speed_limiting_delta_err > microseconds::zero()) {
std::this_thread::sleep_for(speed_limiting_delta_err);
auto now_after_sleep = Clock::now();
frame_limiting_delta_err -= duration_cast<microseconds>(now_after_sleep - now);
speed_limiting_delta_err -= duration_cast<microseconds>(now_after_sleep - now);
now = now_after_sleep;
}

View File

@@ -85,11 +85,11 @@ private:
double previous_fps = 0;
};
class FrameLimiter {
class SpeedLimiter {
public:
using Clock = std::chrono::high_resolution_clock;
void DoFrameLimiting(std::chrono::microseconds current_system_time_us);
void DoSpeedLimiting(std::chrono::microseconds current_system_time_us);
private:
/// Emulated system time (in microseconds) at the last limiter invocation
@@ -98,7 +98,7 @@ private:
Clock::time_point previous_walltime = Clock::now();
/// Accumulated difference between walltime and emulated time
std::chrono::microseconds frame_limiting_delta_err{0};
std::chrono::microseconds speed_limiting_delta_err{0};
};
} // namespace Core

View File

@@ -221,8 +221,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
TranslateRenderer(Settings::values.renderer_backend.GetValue()));
AddField(field_type, "Renderer_ResolutionFactor",
Settings::values.resolution_factor.GetValue());
AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit.GetValue());
AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit.GetValue());
AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue());
AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue());
AddField(field_type, "Renderer_UseDiskShaderCache",
Settings::values.use_disk_shader_cache.GetValue());
AddField(field_type, "Renderer_GPUAccuracyLevel",

View File

@@ -304,10 +304,10 @@ std::vector<std::unique_ptr<Polling::DevicePoller>> InputSubsystem::GetPollers([
}
std::string GenerateKeyboardParam(int key_code) {
Common::ParamPackage param{
{"engine", "keyboard"},
{"code", std::to_string(key_code)},
};
Common::ParamPackage param;
param.Set("engine", "keyboard");
param.Set("code", key_code);
param.Set("toggle", false);
return param.Serialize();
}

View File

@@ -57,6 +57,7 @@ Common::ParamPackage MouseButtonFactory::GetNextInput() const {
if (pad.button != MouseInput::MouseButton::Undefined) {
params.Set("engine", "mouse");
params.Set("button", static_cast<u16>(pad.button));
params.Set("toggle", false);
return params;
}
}

View File

@@ -82,6 +82,12 @@ public:
state.buttons.insert_or_assign(button, value);
}
void PreSetButton(int button) {
if (!state.buttons.contains(button)) {
SetButton(button, false);
}
}
void SetMotion(SDL_ControllerSensorEvent event) {
constexpr float gravity_constant = 9.80665f;
std::lock_guard lock{mutex};
@@ -155,9 +161,16 @@ public:
state.axes.insert_or_assign(axis, value);
}
float GetAxis(int axis, float range) const {
void PreSetAxis(int axis) {
if (!state.axes.contains(axis)) {
SetAxis(axis, 0);
}
}
float GetAxis(int axis, float range, float offset) const {
std::lock_guard lock{mutex};
return static_cast<float>(state.axes.at(axis)) / (32767.0f * range);
const float value = static_cast<float>(state.axes.at(axis)) / 32767.0f;
return (value + offset) / range;
}
bool RumblePlay(u16 amp_low, u16 amp_high) {
@@ -174,9 +187,10 @@ public:
return false;
}
std::tuple<float, float> GetAnalog(int axis_x, int axis_y, float range) const {
float x = GetAxis(axis_x, range);
float y = GetAxis(axis_y, range);
std::tuple<float, float> GetAnalog(int axis_x, int axis_y, float range, float offset_x,
float offset_y) const {
float x = GetAxis(axis_x, range, offset_x);
float y = GetAxis(axis_y, range, offset_y);
y = -y; // 3DS uses an y-axis inverse from SDL
// Make sure the coordinates are in the unit circle,
@@ -483,7 +497,7 @@ public:
trigger_if_greater(trigger_if_greater_) {}
bool GetStatus() const override {
const float axis_value = joystick->GetAxis(axis, 1.0f);
const float axis_value = joystick->GetAxis(axis, 1.0f, 0.0f);
if (trigger_if_greater) {
return axis_value > threshold;
}
@@ -500,12 +514,14 @@ private:
class SDLAnalog final : public Input::AnalogDevice {
public:
explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_,
bool invert_x_, bool invert_y_, float deadzone_, float range_)
bool invert_x_, bool invert_y_, float deadzone_, float range_,
float offset_x_, float offset_y_)
: joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_),
invert_y(invert_y_), deadzone(deadzone_), range(range_) {}
invert_y(invert_y_), deadzone(deadzone_), range(range_), offset_x(offset_x_),
offset_y(offset_y_) {}
std::tuple<float, float> GetStatus() const override {
auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range);
auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range, offset_x, offset_y);
const float r = std::sqrt((x * x) + (y * y));
if (invert_x) {
x = -x;
@@ -522,8 +538,8 @@ public:
}
std::tuple<float, float> GetRawStatus() const override {
const float x = joystick->GetAxis(axis_x, range);
const float y = joystick->GetAxis(axis_y, range);
const float x = joystick->GetAxis(axis_x, range, offset_x);
const float y = joystick->GetAxis(axis_y, range, offset_y);
return {x, -y};
}
@@ -555,6 +571,8 @@ private:
const bool invert_y;
const float deadzone;
const float range;
const float offset_x;
const float offset_y;
};
class SDLVibration final : public Input::VibrationDevice {
@@ -621,7 +639,7 @@ public:
trigger_if_greater(trigger_if_greater_) {}
Input::MotionStatus GetStatus() const override {
const float axis_value = joystick->GetAxis(axis, 1.0f);
const float axis_value = joystick->GetAxis(axis, 1.0f, 0.0f);
bool trigger = axis_value < threshold;
if (trigger_if_greater) {
trigger = axis_value > threshold;
@@ -720,13 +738,13 @@ public:
LOG_ERROR(Input, "Unknown direction {}", direction_name);
}
// This is necessary so accessing GetAxis with axis won't crash
joystick->SetAxis(axis, 0);
joystick->PreSetAxis(axis);
return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
}
const int button = params.Get("button", 0);
// This is necessary so accessing GetButton with button won't crash
joystick->SetButton(button, false);
joystick->PreSetButton(button);
return std::make_unique<SDLButton>(joystick, button, toggle);
}
@@ -757,13 +775,15 @@ public:
const std::string invert_y_value = params.Get("invert_y", "+");
const bool invert_x = invert_x_value == "-";
const bool invert_y = invert_y_value == "-";
const float offset_x = params.Get("offset_x", 0.0f);
const float offset_y = params.Get("offset_y", 0.0f);
auto joystick = state.GetSDLJoystickByGUID(guid, port);
// This is necessary so accessing GetAxis with axis_x and axis_y won't crash
joystick->SetAxis(axis_x, 0);
joystick->SetAxis(axis_y, 0);
joystick->PreSetAxis(axis_x);
joystick->PreSetAxis(axis_y);
return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, invert_x, invert_y, deadzone,
range);
range, offset_x, offset_y);
}
private:
@@ -844,13 +864,13 @@ public:
LOG_ERROR(Input, "Unknown direction {}", direction_name);
}
// This is necessary so accessing GetAxis with axis won't crash
joystick->SetAxis(axis, 0);
joystick->PreSetAxis(axis);
return std::make_unique<SDLAxisMotion>(joystick, axis, threshold, trigger_if_greater);
}
const int button = params.Get("button", 0);
// This is necessary so accessing GetButton with button won't crash
joystick->SetButton(button, false);
joystick->PreSetButton(button);
return std::make_unique<SDLButtonMotion>(joystick, button);
}
@@ -995,6 +1015,7 @@ Common::ParamPackage BuildButtonParamPackageForButton(int port, std::string guid
params.Set("port", port);
params.Set("guid", std::move(guid));
params.Set("button", button);
params.Set("toggle", false);
return params;
}
@@ -1134,13 +1155,15 @@ Common::ParamPackage BuildParamPackageForBinding(int port, const std::string& gu
}
Common::ParamPackage BuildParamPackageForAnalog(int port, const std::string& guid, int axis_x,
int axis_y) {
int axis_y, float offset_x, float offset_y) {
Common::ParamPackage params;
params.Set("engine", "sdl");
params.Set("port", port);
params.Set("guid", guid);
params.Set("axis_x", axis_x);
params.Set("axis_y", axis_y);
params.Set("offset_x", offset_x);
params.Set("offset_y", offset_y);
params.Set("invert_x", "+");
params.Set("invert_y", "+");
return params;
@@ -1342,24 +1365,39 @@ AnalogMapping SDLState::GetAnalogMappingForDevice(const Common::ParamPackage& pa
const auto& binding_left_y =
SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_LEFTY);
if (params.Has("guid2")) {
joystick2->PreSetAxis(binding_left_x.value.axis);
joystick2->PreSetAxis(binding_left_y.value.axis);
const auto left_offset_x = -joystick2->GetAxis(binding_left_x.value.axis, 1.0f, 0);
const auto left_offset_y = -joystick2->GetAxis(binding_left_y.value.axis, 1.0f, 0);
mapping.insert_or_assign(
Settings::NativeAnalog::LStick,
BuildParamPackageForAnalog(joystick2->GetPort(), joystick2->GetGUID(),
binding_left_x.value.axis, binding_left_y.value.axis));
binding_left_x.value.axis, binding_left_y.value.axis,
left_offset_x, left_offset_y));
} else {
joystick->PreSetAxis(binding_left_x.value.axis);
joystick->PreSetAxis(binding_left_y.value.axis);
const auto left_offset_x = -joystick->GetAxis(binding_left_x.value.axis, 1.0f, 0);
const auto left_offset_y = -joystick->GetAxis(binding_left_y.value.axis, 1.0f, 0);
mapping.insert_or_assign(
Settings::NativeAnalog::LStick,
BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(),
binding_left_x.value.axis, binding_left_y.value.axis));
binding_left_x.value.axis, binding_left_y.value.axis,
left_offset_x, left_offset_y));
}
const auto& binding_right_x =
SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_RIGHTX);
const auto& binding_right_y =
SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_RIGHTY);
joystick->PreSetAxis(binding_right_x.value.axis);
joystick->PreSetAxis(binding_right_y.value.axis);
const auto right_offset_x = -joystick->GetAxis(binding_right_x.value.axis, 1.0f, 0);
const auto right_offset_y = -joystick->GetAxis(binding_right_y.value.axis, 1.0f, 0);
mapping.insert_or_assign(Settings::NativeAnalog::RStick,
BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(),
binding_right_x.value.axis,
binding_right_y.value.axis));
binding_right_y.value.axis, right_offset_x,
right_offset_y));
return mapping;
}
@@ -1563,8 +1601,9 @@ public:
}
if (const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which)) {
// Set offset to zero since the joystick is not on center
auto params = BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(),
first_axis, axis);
first_axis, axis, 0, 0);
first_axis = -1;
return params;
}

View File

@@ -350,7 +350,7 @@ std::string_view InputPrimitive(InputTopology topology) {
case InputTopology::Lines:
return "LINES";
case InputTopology::LinesAdjacency:
return "LINESS_ADJACENCY";
return "LINES_ADJACENCY";
case InputTopology::Triangles:
return "TRIANGLES";
case InputTopology::TrianglesAdjacency:

View File

@@ -4,7 +4,7 @@
#pragma once
#include <stdexcept>
#include <exception>
#include <string>
#include <string_view>
#include <utility>
@@ -17,7 +17,7 @@ class Exception : public std::exception {
public:
explicit Exception(std::string message) noexcept : err_message{std::move(message)} {}
const char* what() const noexcept override {
[[nodiscard]] const char* what() const noexcept override {
return err_message.c_str();
}
@@ -36,21 +36,21 @@ private:
class LogicError : public Exception {
public:
template <typename... Args>
LogicError(const char* message, Args&&... args)
explicit LogicError(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
};
class RuntimeError : public Exception {
public:
template <typename... Args>
RuntimeError(const char* message, Args&&... args)
explicit RuntimeError(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
};
class NotImplementedException : public Exception {
public:
template <typename... Args>
NotImplementedException(const char* message, Args&&... args)
explicit NotImplementedException(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {
Append(" is not implemented");
}
@@ -59,7 +59,7 @@ public:
class InvalidArgument : public Exception {
public:
template <typename... Args>
InvalidArgument(const char* message, Args&&... args)
explicit InvalidArgument(const char* message, Args&&... args)
: Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
};

View File

@@ -327,8 +327,8 @@ public:
const Value& derivates, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
[[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color,
TextureInstInfo info);
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);

View File

@@ -57,6 +57,7 @@ public:
[[nodiscard]] IR::Inst* Inst() const;
[[nodiscard]] IR::Inst* InstRecursive() const;
[[nodiscard]] IR::Inst* TryInstRecursive() const;
[[nodiscard]] IR::Value Resolve() const;
[[nodiscard]] IR::Reg Reg() const;
[[nodiscard]] IR::Pred Pred() const;
@@ -197,8 +198,8 @@ public:
}
template <typename FlagsType>
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
[[nodiscard]] void SetFlags(FlagsType value) noexcept {
requires(sizeof(FlagsType) <= sizeof(u32) &&
std::is_trivially_copyable_v<FlagsType>) void SetFlags(FlagsType value) noexcept {
std::memcpy(&flags, &value, sizeof(value));
}
@@ -308,6 +309,13 @@ inline IR::Inst* Value::InstRecursive() const {
return inst;
}
inline IR::Inst* Value::TryInstRecursive() const {
if (IsIdentity()) {
return inst->Arg(0).TryInstRecursive();
}
return type == Type::Opaque ? inst : nullptr;
}
inline IR::Value Value::Resolve() const {
if (IsIdentity()) {
return inst->Arg(0).Resolve();

View File

@@ -73,7 +73,7 @@ Token OpcodeToken(Opcode opcode) {
return Token::PBK;
case Opcode::PCNT:
case Opcode::CONT:
return Token::PBK;
return Token::PCNT;
case Opcode::PEXIT:
case Opcode::EXIT:
return Token::PEXIT;

View File

@@ -111,6 +111,8 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::ConvertF16U16:
case IR::Opcode::ConvertF16U32:
case IR::Opcode::ConvertF16U64:
case IR::Opcode::ConvertF16F32:
case IR::Opcode::ConvertF32F16:
case IR::Opcode::FPAbs16:
case IR::Opcode::FPAdd16:
case IR::Opcode::FPCeil16:

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <functional>
#include <tuple>
#include <type_traits>
@@ -88,6 +89,26 @@ bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
return true;
}
/// Return true when all values in a range are equal
template <typename Range>
bool AreEqual(const Range& range) {
auto resolver{[](const auto& value) { return value.Resolve(); }};
auto equal{[](const IR::Value& lhs, const IR::Value& rhs) {
if (lhs == rhs) {
return true;
}
// Not equal, but try to match if they read the same constant buffer
if (!lhs.IsImmediate() && !rhs.IsImmediate() &&
lhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 &&
rhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 &&
lhs.Inst()->Arg(0) == rhs.Inst()->Arg(0) && lhs.Inst()->Arg(1) == rhs.Inst()->Arg(1)) {
return true;
}
return false;
}};
return std::ranges::adjacent_find(range, std::not_fn(equal), resolver) == std::end(range);
}
void FoldGetRegister(IR::Inst& inst) {
if (inst.Arg(0).Reg() == IR::Reg::RZ) {
inst.ReplaceUsesWith(IR::Value{u32{0}});
@@ -100,6 +121,157 @@ void FoldGetPred(IR::Inst& inst) {
}
}
/// Replaces the XMAD pattern generated by an integer FMA
bool FoldXmadMultiplyAdd(IR::Block& block, IR::Inst& inst) {
/*
* We are looking for this specific pattern:
* %6 = BitFieldUExtract %op_b, #0, #16
* %7 = BitFieldUExtract %op_a', #16, #16
* %8 = IMul32 %6, %7
* %10 = BitFieldUExtract %op_a', #0, #16
* %11 = BitFieldInsert %8, %10, #16, #16
* %15 = BitFieldUExtract %op_b, #0, #16
* %16 = BitFieldUExtract %op_a, #0, #16
* %17 = IMul32 %15, %16
* %18 = IAdd32 %17, %op_c
* %22 = BitFieldUExtract %op_b, #16, #16
* %23 = BitFieldUExtract %11, #16, #16
* %24 = IMul32 %22, %23
* %25 = ShiftLeftLogical32 %24, #16
* %26 = ShiftLeftLogical32 %11, #16
* %27 = IAdd32 %26, %18
* %result = IAdd32 %25, %27
*
* And replace it with:
* %temp = IMul32 %op_a, %op_b
* %result = IAdd32 %temp, %op_c
*
* This optimization has been proven safe by Nvidia's compiler logic being reversed.
* (If Nvidia generates this code from 'fma(a, b, c)', we can do the same in the reverse order.)
*/
const IR::Value zero{0u};
const IR::Value sixteen{16u};
IR::Inst* const _25{inst.Arg(0).TryInstRecursive()};
IR::Inst* const _27{inst.Arg(1).TryInstRecursive()};
if (!_25 || !_27) {
return false;
}
if (_27->GetOpcode() != IR::Opcode::IAdd32) {
return false;
}
if (_25->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _25->Arg(1) != sixteen) {
return false;
}
IR::Inst* const _24{_25->Arg(0).TryInstRecursive()};
if (!_24 || _24->GetOpcode() != IR::Opcode::IMul32) {
return false;
}
IR::Inst* const _22{_24->Arg(0).TryInstRecursive()};
IR::Inst* const _23{_24->Arg(1).TryInstRecursive()};
if (!_22 || !_23) {
return false;
}
if (_22->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_23->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_22->Arg(1) != sixteen || _22->Arg(2) != sixteen) {
return false;
}
if (_23->Arg(1) != sixteen || _23->Arg(2) != sixteen) {
return false;
}
IR::Inst* const _11{_23->Arg(0).TryInstRecursive()};
if (!_11 || _11->GetOpcode() != IR::Opcode::BitFieldInsert) {
return false;
}
if (_11->Arg(2) != sixteen || _11->Arg(3) != sixteen) {
return false;
}
IR::Inst* const _8{_11->Arg(0).TryInstRecursive()};
IR::Inst* const _10{_11->Arg(1).TryInstRecursive()};
if (!_8 || !_10) {
return false;
}
if (_8->GetOpcode() != IR::Opcode::IMul32) {
return false;
}
if (_10->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
IR::Inst* const _6{_8->Arg(0).TryInstRecursive()};
IR::Inst* const _7{_8->Arg(1).TryInstRecursive()};
if (!_6 || !_7) {
return false;
}
if (_6->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_7->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_6->Arg(1) != zero || _6->Arg(2) != sixteen) {
return false;
}
if (_7->Arg(1) != sixteen || _7->Arg(2) != sixteen) {
return false;
}
IR::Inst* const _26{_27->Arg(0).TryInstRecursive()};
IR::Inst* const _18{_27->Arg(1).TryInstRecursive()};
if (!_26 || !_18) {
return false;
}
if (_26->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _26->Arg(1) != sixteen) {
return false;
}
if (_26->Arg(0).InstRecursive() != _11) {
return false;
}
if (_18->GetOpcode() != IR::Opcode::IAdd32) {
return false;
}
IR::Inst* const _17{_18->Arg(0).TryInstRecursive()};
if (!_17 || _17->GetOpcode() != IR::Opcode::IMul32) {
return false;
}
IR::Inst* const _15{_17->Arg(0).TryInstRecursive()};
IR::Inst* const _16{_17->Arg(1).TryInstRecursive()};
if (!_15 || !_16) {
return false;
}
if (_15->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_16->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_15->Arg(1) != zero || _16->Arg(1) != zero || _10->Arg(1) != zero) {
return false;
}
if (_15->Arg(2) != sixteen || _16->Arg(2) != sixteen || _10->Arg(2) != sixteen) {
return false;
}
const std::array<IR::Value, 3> op_as{
_7->Arg(0).Resolve(),
_16->Arg(0).Resolve(),
_10->Arg(0).Resolve(),
};
const std::array<IR::Value, 3> op_bs{
_22->Arg(0).Resolve(),
_6->Arg(0).Resolve(),
_15->Arg(0).Resolve(),
};
const IR::U32 op_c{_18->Arg(1)};
if (!AreEqual(op_as) || !AreEqual(op_bs)) {
return false;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.ReplaceUsesWith(ir.IAdd(ir.IMul(IR::U32{op_as[0]}, IR::U32{op_bs[1]}), op_c));
return true;
}
/// Replaces the pattern generated by two XMAD multiplications
bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
/*
@@ -116,33 +288,31 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
*
* This optimization has been proven safe by LLVM and MSVC.
*/
const IR::Value lhs_arg{inst.Arg(0)};
const IR::Value rhs_arg{inst.Arg(1)};
if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
IR::Inst* const rhs_mul{inst.Arg(1).TryInstRecursive()};
if (!lhs_shl || !rhs_mul) {
return false;
}
IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
lhs_shl->Arg(1) != IR::Value{16U}) {
return false;
}
if (lhs_shl->Arg(0).IsImmediate()) {
IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
if (!lhs_mul) {
return false;
}
IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
return false;
}
if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
return false;
}
const IR::U32 factor_b{lhs_mul->Arg(1)};
if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
if (factor_b.Resolve() != rhs_mul->Arg(1).Resolve()) {
return false;
}
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).TryInstRecursive()};
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).TryInstRecursive()};
if (!lhs_bfe || !rhs_bfe) {
return false;
}
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
@@ -155,10 +325,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
return false;
}
if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
const IR::U32 factor_a{lhs_bfe->Arg(0)};
if (factor_a.Resolve() != rhs_bfe->Arg(0).Resolve()) {
return false;
}
const IR::U32 factor_a{lhs_bfe->Arg(0)};
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
return true;
@@ -181,6 +351,9 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
if (FoldXmadMultiply(block, inst)) {
return;
}
if (FoldXmadMultiplyAdd(block, inst)) {
return;
}
}
}
@@ -476,6 +649,10 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
case IR::Opcode::UnpackHalf2x16:
return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
case IR::Opcode::PackFloat2x16:
return FoldInverseFunc(inst, IR::Opcode::UnpackFloat2x16);
case IR::Opcode::UnpackFloat2x16:
return FoldInverseFunc(inst, IR::Opcode::PackFloat2x16);
case IR::Opcode::SelectU1:
case IR::Opcode::SelectU8:
case IR::Opcode::SelectU16:

View File

@@ -63,6 +63,7 @@ private:
used_objects = std::exchange(rhs.used_objects, 0);
num_objects = std::exchange(rhs.num_objects, 0);
storage = std::move(rhs.storage);
return *this;
}
Chunk(Chunk&& rhs) noexcept

View File

@@ -1,5 +1,10 @@
add_subdirectory(host_shaders)
if(LIBVA_FOUND)
set_source_files_properties(command_classes/codecs/codec.cpp
PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
endif()
add_library(video_core STATIC
buffer_cache/buffer_base.h
buffer_cache/buffer_cache.cpp
@@ -92,6 +97,7 @@ add_library(video_core STATIC
renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h
renderer_opengl/gl_texture_cache_base.cpp
renderer_opengl/gl_query_cache.cpp
renderer_opengl/gl_query_cache.h
renderer_opengl/maxwell_to_gl.h
@@ -106,6 +112,8 @@ add_library(video_core STATIC
renderer_vulkan/maxwell_to_vk.cpp
renderer_vulkan/maxwell_to_vk.h
renderer_vulkan/pipeline_helper.h
renderer_vulkan/pipeline_statistics.cpp
renderer_vulkan/pipeline_statistics.h
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/vk_blit_screen.cpp
@@ -148,6 +156,7 @@ add_library(video_core STATIC
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_texture_cache.cpp
renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_texture_cache_base.cpp
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
shader_cache.cpp
@@ -179,6 +188,7 @@ add_library(video_core STATIC
texture_cache/samples_helper.h
texture_cache/slot_vector.h
texture_cache/texture_cache.h
texture_cache/texture_cache_base.h
texture_cache/types.h
texture_cache/util.cpp
texture_cache/util.h

View File

@@ -817,7 +817,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
const std::size_t size = interval.upper() - interval.lower();
const VAddr cpu_addr = interval.lower();
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
boost::container::small_vector<BufferCopy, 1> copies;
buffer.ForEachDownloadRangeAndClear(
cpu_addr, size, [&](u64 range_offset, u64 range_size) {
const VAddr buffer_addr = buffer.CpuAddr();

View File

@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <fstream>
#include <vector>
#include "common/assert.h"
@@ -17,10 +16,47 @@ extern "C" {
}
namespace Tegra {
#if defined(LIBVA_FOUND)
// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
namespace {
constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
"i915",
"amdgpu",
};
AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == AV_PIX_FMT_VAAPI) {
return AV_PIX_FMT_VAAPI;
}
}
LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
return *pix_fmts;
}
bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
AVDictionary* hwdevice_options = nullptr;
av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
for (const auto& driver : VAAPI_DRIVERS) {
av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
nullptr, hwdevice_options, 0);
if (hwdevice_error >= 0) {
LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
av_dict_free(&hwdevice_options);
return true;
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
av_dict_free(&hwdevice_options);
return false;
}
} // namespace
#endif
void AVFrameDeleter(AVFrame* ptr) {
av_frame_unref(ptr);
av_free(ptr);
av_frame_free(&ptr);
}
Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
@@ -32,19 +68,31 @@ Codec::~Codec() {
return;
}
// Free libav memory
AVFrame* av_frame{nullptr};
avcodec_send_packet(av_codec_ctx, nullptr);
av_frame = av_frame_alloc();
AVFrame* av_frame = av_frame_alloc();
avcodec_receive_frame(av_codec_ctx, av_frame);
avcodec_flush_buffers(av_codec_ctx);
av_frame_unref(av_frame);
av_free(av_frame);
av_frame_free(&av_frame);
avcodec_close(av_codec_ctx);
av_buffer_unref(&av_hw_device);
}
void Codec::InitializeHwdec() {
// Prioritize integrated GPU to mitigate bandwidth bottlenecks
#if defined(LIBVA_FOUND)
if (CreateVaapiHwdevice(&av_hw_device)) {
const auto hw_device_ctx = av_buffer_ref(av_hw_device);
ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
av_codec_ctx->hw_device_ctx = hw_device_ctx;
av_codec_ctx->get_format = GetHwFormat;
return;
}
#endif
// TODO more GPU accelerated decoders
}
void Codec::Initialize() {
AVCodecID codec{AV_CODEC_ID_NONE};
AVCodecID codec;
switch (current_codec) {
case NvdecCommon::VideoCodec::H264:
codec = AV_CODEC_ID_H264;
@@ -53,22 +101,24 @@ void Codec::Initialize() {
codec = AV_CODEC_ID_VP9;
break;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
return;
}
av_codec = avcodec_find_decoder(codec);
av_codec_ctx = avcodec_alloc_context3(av_codec);
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
// TODO(ameerj): libavcodec gpu hw acceleration
InitializeHwdec();
if (!av_codec_ctx->hw_device_ctx) {
LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
}
const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
if (av_error < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
avcodec_close(av_codec_ctx);
av_buffer_unref(&av_hw_device);
return;
}
initialized = true;
return;
}
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
@@ -80,36 +130,64 @@ void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
void Codec::Decode() {
const bool is_first_frame = !initialized;
if (!initialized) {
if (is_first_frame) {
Initialize();
}
bool vp9_hidden_frame = false;
AVPacket packet{};
av_init_packet(&packet);
std::vector<u8> frame_data;
if (current_codec == NvdecCommon::VideoCodec::H264) {
frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
} else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
frame_data = vp9_decoder->ComposeFrameHeader(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
}
AVPacket packet{};
av_init_packet(&packet);
packet.data = frame_data.data();
packet.size = static_cast<s32>(frame_data.size());
avcodec_send_packet(av_codec_ctx, &packet);
if (!vp9_hidden_frame) {
// Only receive/store visible frames
AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
avcodec_receive_frame(av_codec_ctx, frame.get());
av_frames.push(std::move(frame));
// Limit queue to 10 frames. Workaround for ZLA decode and queue spam
if (av_frames.size() > 10) {
av_frames.pop();
}
if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
return;
}
// Only receive/store visible frames
if (vp9_hidden_frame) {
return;
}
AVFrame* hw_frame = av_frame_alloc();
AVFrame* sw_frame = hw_frame;
ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
av_frame_free(&hw_frame);
return;
}
if (!hw_frame->width || !hw_frame->height) {
LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
av_frame_free(&hw_frame);
return;
}
#if defined(LIBVA_FOUND)
// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
if (hw_frame->format == AV_PIX_FMT_VAAPI) {
sw_frame = av_frame_alloc();
ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
// Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
// because Intel drivers crash unless using AV_PIX_FMT_NV12
sw_frame->format = AV_PIX_FMT_NV12;
const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
av_frame_free(&hw_frame);
}
#endif
if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
av_frame_free(&sw_frame);
return;
}
av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
if (av_frames.size() > 10) {
LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
av_frames.pop();
}
}
@@ -119,7 +197,6 @@ AVFramePtr Codec::GetCurrentFrame() {
if (av_frames.empty()) {
return AVFramePtr{nullptr, AVFrameDeleter};
}
AVFramePtr frame = std::move(av_frames.front());
av_frames.pop();
return frame;
@@ -144,6 +221,5 @@ std::string_view Codec::GetCurrentCodecName() const {
default:
return "Unknown";
}
};
}
} // namespace Tegra

View File

@@ -22,7 +22,6 @@ extern "C" {
namespace Tegra {
class GPU;
struct VicRegisters;
void AVFrameDeleter(AVFrame* ptr);
using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
@@ -55,10 +54,13 @@ public:
[[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
void InitializeHwdec();
bool initialized{};
NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
AVCodec* av_codec{nullptr};
AVBufferRef* av_hw_device{nullptr};
AVCodecContext* av_codec_ctx{nullptr};
GPU& gpu;

View File

@@ -11,6 +11,9 @@
namespace Tegra::Decoder {
namespace {
constexpr u32 diff_update_probability = 252;
constexpr u32 frame_sync_code = 0x498342;
// Default compressed header probabilities once frame context resets
constexpr Vp9EntropyProbs default_probs{
.y_mode_prob{
@@ -361,8 +364,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
// surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
// order: last, golden, altref, current. It may be worthwhile to track the updates done here
// to avoid buffering frame data needed for reference frame updating in the header composition.
// order: last, golden, altref, current.
std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4,
vp9_info.frame_offsets.begin());
@@ -384,40 +386,25 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
current_frame.info.bitstream_size);
}
// Buffer two frames, saving the last show frame info
if (!next_next_frame.bit_stream.empty()) {
if (!next_frame.bit_stream.empty()) {
Vp9FrameContainer temp{
.info = current_frame.info,
.bit_stream = std::move(current_frame.bit_stream),
};
next_next_frame.info.show_frame = current_frame.info.last_frame_shown;
current_frame.info = next_next_frame.info;
current_frame.bit_stream = std::move(next_next_frame.bit_stream);
next_next_frame = std::move(temp);
if (!next_frame.bit_stream.empty()) {
Vp9FrameContainer temp2{
.info = current_frame.info,
.bit_stream = std::move(current_frame.bit_stream),
};
next_frame.info.show_frame = current_frame.info.last_frame_shown;
current_frame.info = next_frame.info;
current_frame.bit_stream = std::move(next_frame.bit_stream);
next_frame = std::move(temp2);
} else {
next_frame.info = current_frame.info;
next_frame.bit_stream = std::move(current_frame.bit_stream);
}
next_frame.info.show_frame = current_frame.info.last_frame_shown;
current_frame.info = next_frame.info;
current_frame.bit_stream = std::move(next_frame.bit_stream);
next_frame = std::move(temp);
} else {
next_next_frame.info = current_frame.info;
next_next_frame.bit_stream = std::move(current_frame.bit_stream);
next_frame.info = current_frame.info;
next_frame.bit_stream = current_frame.bit_stream;
}
return current_frame;
}
std::vector<u8> VP9::ComposeCompressedHeader() {
VpxRangeEncoder writer{};
const bool update_probs = current_frame_info.show_frame && !current_frame_info.is_key_frame;
const bool update_probs = !current_frame_info.is_key_frame && current_frame_info.show_frame;
if (!current_frame_info.lossless) {
if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
writer.Write(3, 2);
@@ -613,86 +600,64 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
// Reset context
prev_frame_probs = default_probs;
swap_next_golden = false;
swap_ref_indices = false;
loop_filter_ref_deltas.fill(0);
loop_filter_mode_deltas.fill(0);
// allow frames offsets to stabilize before checking for golden frames
grace_period = 4;
// On key frames, all frame slots are set to the current frame,
// so the value of the selected slot doesn't really matter.
frame_ctxs.fill({current_frame_number, false, default_probs});
frame_ctxs.fill(default_probs);
// intra only, meaning the frame can be recreated with no other references
current_frame_info.intra_only = true;
} else {
if (!current_frame_info.show_frame) {
uncomp_writer.WriteBit(current_frame_info.intra_only);
if (!current_frame_info.last_frame_was_key) {
swap_next_golden = !swap_next_golden;
}
} else {
current_frame_info.intra_only = false;
}
if (!current_frame_info.error_resilient_mode) {
uncomp_writer.WriteU(0, 2); // Reset frame context.
}
// Last, Golden, Altref frames
std::array<s32, 3> ref_frame_index{0, 1, 2};
// Set when next frame is hidden
// altref and golden references are swapped
if (swap_next_golden) {
ref_frame_index = std::array<s32, 3>{0, 2, 1};
const auto& curr_offsets = current_frame_info.frame_offsets;
const auto& next_offsets = next_frame.info.frame_offsets;
const bool ref_frames_different = curr_offsets[1] != curr_offsets[2];
const bool next_references_swap =
(next_offsets[1] == curr_offsets[2]) || (next_offsets[2] == curr_offsets[1]);
const bool needs_ref_swap = ref_frames_different && next_references_swap;
if (needs_ref_swap) {
swap_ref_indices = !swap_ref_indices;
}
union {
u32 raw;
BitField<0, 1, u32> refresh_last;
BitField<1, 2, u32> refresh_golden;
BitField<2, 1, u32> refresh_alt;
} refresh_frame_flags;
// update Last Frame
u64 refresh_frame_flags = 1;
// golden frame may refresh, determined if the next golden frame offset is changed
bool golden_refresh = false;
if (grace_period <= 0) {
for (s32 index = 1; index < 3; ++index) {
if (current_frame_info.frame_offsets[index] !=
next_frame.info.frame_offsets[index]) {
current_frame_info.refresh_frame[index] = true;
golden_refresh = true;
grace_period = 3;
}
refresh_frame_flags.raw = 0;
for (u32 index = 0; index < 3; ++index) {
// Refresh indices that use the current frame as an index
if (curr_offsets[3] == next_offsets[index]) {
refresh_frame_flags.raw |= 1u << index;
}
}
if (current_frame_info.show_frame &&
(!next_frame.info.show_frame || next_frame.info.is_key_frame)) {
// Update golden frame
refresh_frame_flags = swap_next_golden ? 2 : 4;
if (swap_ref_indices) {
const u32 temp = refresh_frame_flags.refresh_golden;
refresh_frame_flags.refresh_golden.Assign(refresh_frame_flags.refresh_alt.Value());
refresh_frame_flags.refresh_alt.Assign(temp);
}
if (!current_frame_info.show_frame) {
// Update altref
refresh_frame_flags = swap_next_golden ? 2 : 4;
} else if (golden_refresh) {
refresh_frame_flags = 3;
}
if (current_frame_info.intra_only) {
uncomp_writer.WriteU(frame_sync_code, 24);
uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
uncomp_writer.WriteBit(false); // Render and frame size different.
} else {
uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
for (s32 index = 1; index < 4; index++) {
const bool swap_indices = needs_ref_swap ^ swap_ref_indices;
const auto ref_frame_index = swap_indices ? std::array{0, 2, 1} : std::array{0, 1, 2};
uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
for (size_t index = 1; index < 4; index++) {
uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
}
uncomp_writer.WriteBit(true); // Frame size with refs.
uncomp_writer.WriteBit(false); // Render and frame size different.
uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
@@ -714,10 +679,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
frame_ctx_idx = 1;
}
uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
prev_frame_probs =
frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy};
uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
prev_frame_probs = frame_ctxs[frame_ctx_idx]; // reference probabilities for compressed header
frame_ctxs[frame_ctx_idx] = current_frame_info.entropy;
uncomp_writer.WriteU(current_frame_info.first_level, 6);
uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
@@ -812,7 +776,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
current_frame_info = curr_frame.info;
bitstream = std::move(curr_frame.bit_stream);
}
// The uncompressed header routine sets PrevProb parameters needed for the compressed header
auto uncomp_writer = ComposeUncompressedHeader();
std::vector<u8> compressed_header = ComposeCompressedHeader();
@@ -828,13 +791,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
frame.begin() + uncompressed_header.size());
std::copy(bitstream.begin(), bitstream.end(),
frame.begin() + uncompressed_header.size() + compressed_header.size());
// keep track of frame number
current_frame_number++;
grace_period--;
// don't display hidden frames
hidden = !current_frame_info.show_frame;
return frame;
}

View File

@@ -14,7 +14,6 @@
namespace Tegra {
class GPU;
enum class FrameType { KeyFrame = 0, InterFrame = 1 };
namespace Decoder {
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
@@ -124,7 +123,7 @@ public:
/// Returns true if the most recent frame was a hidden frame.
[[nodiscard]] bool WasFrameHidden() const {
return hidden;
return !current_frame_info.show_frame;
}
private:
@@ -178,19 +177,12 @@ private:
std::array<s8, 4> loop_filter_ref_deltas{};
std::array<s8, 2> loop_filter_mode_deltas{};
bool hidden = false;
s64 current_frame_number = -2; // since we buffer 2 frames
s32 grace_period = 6; // frame offsets need to stabilize
std::array<FrameContexts, 4> frame_ctxs{};
Vp9FrameContainer next_frame{};
Vp9FrameContainer next_next_frame{};
bool swap_next_golden{};
std::array<Vp9EntropyProbs, 4> frame_ctxs{};
bool swap_ref_indices{};
Vp9PictureInfo current_frame_info{};
Vp9EntropyProbs prev_frame_probs{};
s32 diff_update_probability = 252;
s32 frame_sync_code = 0x498342;
};
} // namespace Decoder

View File

@@ -176,7 +176,7 @@ struct PictureInfo {
.frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
.error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
.last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
.show_frame = false,
.show_frame = true,
.ref_frame_sign_bias = ref_frame_sign_bias,
.base_q_index = base_q_index,
.y_dc_delta_q = y_dc_delta_q,
@@ -296,12 +296,6 @@ struct RefPoolElement {
bool refresh{};
};
struct FrameContexts {
s64 from;
bool adapted;
Vp9EntropyProbs probs;
};
#define ASSERT_POSITION(field_name, position) \
static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
"Field " #field_name " has invalid position")

View File

@@ -39,7 +39,7 @@ void Nvdec::Execute() {
codec->Decode();
break;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec()));
UNIMPLEMENTED_MSG("Codec {}", codec->GetCurrentCodecName());
break;
}
}

View File

@@ -46,11 +46,8 @@ void Vic::ProcessMethod(Method method, u32 argument) {
case Method::SetOutputSurfaceLumaOffset:
output_surface_luma_address = arg;
break;
case Method::SetOutputSurfaceChromaUOffset:
output_surface_chroma_u_address = arg;
break;
case Method::SetOutputSurfaceChromaVOffset:
output_surface_chroma_v_address = arg;
case Method::SetOutputSurfaceChromaOffset:
output_surface_chroma_address = arg;
break;
default:
break;
@@ -65,11 +62,10 @@ void Vic::Execute() {
const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
const AVFramePtr frame_ptr = nvdec_processor->GetFrame();
const auto* frame = frame_ptr.get();
if (!frame || frame->width == 0 || frame->height == 0) {
if (!frame) {
return;
}
const VideoPixelFormat pixel_format =
static_cast<VideoPixelFormat>(config.pixel_format.Value());
const auto pixel_format = static_cast<VideoPixelFormat>(config.pixel_format.Value());
switch (pixel_format) {
case VideoPixelFormat::BGRA8:
case VideoPixelFormat::RGBA8: {
@@ -83,16 +79,18 @@ void Vic::Execute() {
sws_freeContext(scaler_ctx);
scaler_ctx = nullptr;
// FFmpeg returns all frames in YUV420, convert it into expected format
scaler_ctx =
sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width,
frame->height, target_format, 0, nullptr, nullptr, nullptr);
// Frames are decoded into either YUV420 or NV12 formats. Convert to desired format
scaler_ctx = sws_getContext(frame->width, frame->height,
static_cast<AVPixelFormat>(frame->format), frame->width,
frame->height, target_format, 0, nullptr, nullptr, nullptr);
scaler_width = frame->width;
scaler_height = frame->height;
}
// Get Converted frame
const std::size_t linear_size = frame->width * frame->height * 4;
const u32 width = static_cast<u32>(frame->width);
const u32 height = static_cast<u32>(frame->height);
const std::size_t linear_size = width * height * 4;
// Only allocate frame_buffer once per stream, as the size is not expected to change
if (!converted_frame_buffer) {
@@ -109,11 +107,10 @@ void Vic::Execute() {
if (blk_kind != 0) {
// swizzle pitch linear to block linear
const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
block_height, 0);
const auto size =
Tegra::Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
luma_buffer.resize(size);
Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
frame->width, 4, luma_buffer.data(),
Tegra::Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
converted_frame_buffer.get(), block_height, 0, 0);
gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
@@ -131,41 +128,65 @@ void Vic::Execute() {
const std::size_t surface_height = config.surface_height_minus1 + 1;
const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
const std::size_t half_width = frame_width / 2;
const std::size_t half_height = frame_height / 2;
const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
const auto* luma_ptr = frame->data[0];
const auto* chroma_b_ptr = frame->data[1];
const auto* chroma_r_ptr = frame->data[2];
const auto stride = static_cast<size_t>(frame->linesize[0]);
const auto half_stride = static_cast<size_t>(frame->linesize[1]);
luma_buffer.resize(aligned_width * surface_height);
chroma_buffer.resize(aligned_width * surface_height / 2);
// Populate luma buffer
const u8* luma_src = frame->data[0];
for (std::size_t y = 0; y < frame_height; ++y) {
const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width;
for (std::size_t x = 0; x < frame_width; ++x) {
luma_buffer[dst + x] = luma_ptr[src + x];
luma_buffer[dst + x] = luma_src[src + x];
}
}
gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
luma_buffer.size());
// Populate chroma buffer from both channels with interleaving.
for (std::size_t y = 0; y < half_height; ++y) {
const std::size_t src = y * half_stride;
const std::size_t dst = y * aligned_width;
// Chroma
const std::size_t half_height = frame_height / 2;
const auto half_stride = static_cast<size_t>(frame->linesize[1]);
for (std::size_t x = 0; x < half_width; ++x) {
chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x];
chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x];
switch (frame->format) {
case AV_PIX_FMT_YUV420P: {
// Frame from FFmpeg software
// Populate chroma buffer from both channels with interleaving.
const std::size_t half_width = frame_width / 2;
const u8* chroma_b_src = frame->data[1];
const u8* chroma_r_src = frame->data[2];
for (std::size_t y = 0; y < half_height; ++y) {
const std::size_t src = y * half_stride;
const std::size_t dst = y * aligned_width;
for (std::size_t x = 0; x < half_width; ++x) {
chroma_buffer[dst + x * 2] = chroma_b_src[src + x];
chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x];
}
}
break;
}
gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
case AV_PIX_FMT_NV12: {
// Frame from VA-API hardware
// This is already interleaved so just copy
const u8* chroma_src = frame->data[1];
for (std::size_t y = 0; y < half_height; ++y) {
const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width;
for (std::size_t x = 0; x < frame_width; ++x) {
chroma_buffer[dst + x] = chroma_src[src + x];
}
}
break;
}
default:
UNREACHABLE();
break;
}
gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
chroma_buffer.size());
break;
}

View File

@@ -22,8 +22,8 @@ public:
SetControlParams = 0x1c1,
SetConfigStructOffset = 0x1c2,
SetOutputSurfaceLumaOffset = 0x1c8,
SetOutputSurfaceChromaUOffset = 0x1c9,
SetOutputSurfaceChromaVOffset = 0x1ca
SetOutputSurfaceChromaOffset = 0x1c9,
SetOutputSurfaceChromaUnusedOffset = 0x1ca
};
explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
@@ -64,8 +64,7 @@ private:
GPUVAddr config_struct_address{};
GPUVAddr output_surface_luma_address{};
GPUVAddr output_surface_chroma_u_address{};
GPUVAddr output_surface_chroma_v_address{};
GPUVAddr output_surface_chroma_address{};
SwsContext* scaler_ctx{};
s32 scaler_width{};

View File

@@ -299,7 +299,7 @@ public:
};
private:
VideoCore::RasterizerInterface* rasterizer;
VideoCore::RasterizerInterface* rasterizer = nullptr;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.

View File

@@ -227,7 +227,7 @@ private:
Core::System& system;
MemoryManager& memory_manager;
VideoCore::RasterizerInterface* rasterizer;
VideoCore::RasterizerInterface* rasterizer = nullptr;
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;

View File

@@ -10,33 +10,27 @@
#define END_PUSH_CONSTANTS };
#define UNIFORM(n)
#define BINDING_INPUT_BUFFER 0
#define BINDING_ENC_BUFFER 1
#define BINDING_SWIZZLE_BUFFER 2
#define BINDING_OUTPUT_IMAGE 3
#define BINDING_OUTPUT_IMAGE 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BEGIN_PUSH_CONSTANTS
#define END_PUSH_CONSTANTS
#define UNIFORM(n) layout(location = n) uniform
#define BINDING_SWIZZLE_BUFFER 0
#define BINDING_INPUT_BUFFER 1
#define BINDING_ENC_BUFFER 2
#define BINDING_INPUT_BUFFER 0
#define BINDING_OUTPUT_IMAGE 0
#endif
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
BEGIN_PUSH_CONSTANTS
UNIFORM(1) uvec2 block_dims;
UNIFORM(2) uint bytes_per_block_log2;
UNIFORM(3) uint layer_stride;
UNIFORM(4) uint block_size;
UNIFORM(5) uint x_shift;
UNIFORM(6) uint block_height;
UNIFORM(7) uint block_height_mask;
UNIFORM(2) uint layer_stride;
UNIFORM(3) uint block_size;
UNIFORM(4) uint x_shift;
UNIFORM(5) uint block_height;
UNIFORM(6) uint block_height_mask;
END_PUSH_CONSTANTS
struct EncodingData {
@@ -55,45 +49,35 @@ struct TexelWeightParams {
bool void_extent_hdr;
};
// Swizzle data
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
uint swizzle_table[];
};
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
uint astc_data[];
};
// ASTC Encodings data
layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues {
EncodingData encoding_values[];
uvec4 astc_data[];
};
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
const uint GOB_SIZE_X = 64;
const uint GOB_SIZE_Y = 8;
const uint GOB_SIZE_Z = 1;
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
const uint GOB_SIZE_X_SHIFT = 6;
const uint GOB_SIZE_Y_SHIFT = 3;
const uint GOB_SIZE_Z_SHIFT = 0;
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT;
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
const int BLOCK_SIZE_IN_BYTES = 16;
const int BLOCK_INFO_ERROR = 0;
const int BLOCK_INFO_VOID_EXTENT_HDR = 1;
const int BLOCK_INFO_VOID_EXTENT_LDR = 2;
const int BLOCK_INFO_NORMAL = 3;
const uint BYTES_PER_BLOCK_LOG2 = 4;
const int JUST_BITS = 0;
const int QUINT = 1;
const int TRIT = 2;
// ASTC Encodings data, sorted in ascending order based on their BitLength value
// (see GetBitLength() function)
EncodingData encoding_values[22] = EncodingData[](
EncodingData(JUST_BITS, 0, 0, 0), EncodingData(JUST_BITS, 1, 0, 0), EncodingData(TRIT, 0, 0, 0),
EncodingData(JUST_BITS, 2, 0, 0), EncodingData(QUINT, 0, 0, 0), EncodingData(TRIT, 1, 0, 0),
EncodingData(JUST_BITS, 3, 0, 0), EncodingData(QUINT, 1, 0, 0), EncodingData(TRIT, 2, 0, 0),
EncodingData(JUST_BITS, 4, 0, 0), EncodingData(QUINT, 2, 0, 0), EncodingData(TRIT, 3, 0, 0),
EncodingData(JUST_BITS, 5, 0, 0), EncodingData(QUINT, 3, 0, 0), EncodingData(TRIT, 4, 0, 0),
EncodingData(JUST_BITS, 6, 0, 0), EncodingData(QUINT, 4, 0, 0), EncodingData(TRIT, 5, 0, 0),
EncodingData(JUST_BITS, 7, 0, 0), EncodingData(QUINT, 5, 0, 0), EncodingData(TRIT, 6, 0, 0),
EncodingData(JUST_BITS, 8, 0, 0)
);
// The following constants are expanded variants of the Replicate()
// function calls corresponding to the following arguments:
// value: index into the generated table
@@ -135,44 +119,37 @@ const uint REPLICATE_7_BIT_TO_8_TABLE[128] =
// Input ASTC texture globals
uint current_index = 0;
int bitsread = 0;
uint total_bitsread = 0;
uint local_buff[16];
int total_bitsread = 0;
uvec4 local_buff;
// Color data globals
uint color_endpoint_data[16];
uvec4 color_endpoint_data;
int color_bitsread = 0;
uint total_color_bitsread = 0;
int color_index = 0;
// Four values, two endpoints, four maximum paritions
uint color_values[32];
int colvals_index = 0;
// Weight data globals
uint texel_weight_data[16];
uvec4 texel_weight_data;
int texel_bitsread = 0;
uint total_texel_bitsread = 0;
int texel_index = 0;
bool texel_flag = false;
// Global "vectors" to be pushed into when decoding
EncodingData result_vector[100];
EncodingData result_vector[144];
int result_index = 0;
EncodingData texel_vector[100];
EncodingData texel_vector[144];
int texel_vector_index = 0;
uint unquantized_texel_weights[2][144];
uint SwizzleOffset(uvec2 pos) {
pos = pos & SWIZZLE_MASK;
return swizzle_table[pos.y * 64 + pos.x];
}
uint ReadTexel(uint offset) {
// extract the 8-bit value from the 32-bit packed data.
return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8);
uint x = pos.x;
uint y = pos.y;
return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
(y % 2) * 16 + (x % 16);
}
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
@@ -278,14 +255,10 @@ uint Hash52(uint p) {
return p;
}
uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bool small_block) {
if (partition_count == 1) {
return 0;
}
uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool small_block) {
if (small_block) {
x <<= 1;
y <<= 1;
z <<= 1;
}
seed += (partition_count - 1) * 1024;
@@ -299,10 +272,6 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo
uint seed6 = uint((rnum >> 20) & 0xF);
uint seed7 = uint((rnum >> 24) & 0xF);
uint seed8 = uint((rnum >> 28) & 0xF);
uint seed9 = uint((rnum >> 18) & 0xF);
uint seed10 = uint((rnum >> 22) & 0xF);
uint seed11 = uint((rnum >> 26) & 0xF);
uint seed12 = uint(((rnum >> 30) | (rnum << 2)) & 0xF);
seed1 = (seed1 * seed1);
seed2 = (seed2 * seed2);
@@ -312,12 +281,8 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo
seed6 = (seed6 * seed6);
seed7 = (seed7 * seed7);
seed8 = (seed8 * seed8);
seed9 = (seed9 * seed9);
seed10 = (seed10 * seed10);
seed11 = (seed11 * seed11);
seed12 = (seed12 * seed12);
int sh1, sh2, sh3;
uint sh1, sh2;
if ((seed & 1) > 0) {
sh1 = (seed & 2) > 0 ? 4 : 5;
sh2 = (partition_count == 3) ? 6 : 5;
@@ -325,25 +290,19 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo
sh1 = (partition_count == 3) ? 6 : 5;
sh2 = (seed & 2) > 0 ? 4 : 5;
}
sh3 = (seed & 0x10) > 0 ? sh1 : sh2;
seed1 >>= sh1;
seed2 >>= sh2;
seed3 >>= sh1;
seed4 >>= sh2;
seed5 >>= sh1;
seed6 >>= sh2;
seed7 >>= sh1;
seed8 >>= sh2;
seed1 = (seed1 >> sh1);
seed2 = (seed2 >> sh2);
seed3 = (seed3 >> sh1);
seed4 = (seed4 >> sh2);
seed5 = (seed5 >> sh1);
seed6 = (seed6 >> sh2);
seed7 = (seed7 >> sh1);
seed8 = (seed8 >> sh2);
seed9 = (seed9 >> sh3);
seed10 = (seed10 >> sh3);
seed11 = (seed11 >> sh3);
seed12 = (seed12 >> sh3);
uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
uint a = seed1 * x + seed2 * y + (rnum >> 14);
uint b = seed3 * x + seed4 * y + (rnum >> 10);
uint c = seed5 * x + seed6 * y + (rnum >> 6);
uint d = seed7 * x + seed8 * y + (rnum >> 2);
a &= 0x3F;
b &= 0x3F;
@@ -368,58 +327,37 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo
}
}
uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool small_block) {
return SelectPartition(seed, x, y, 0, partition_count, small_block);
}
uint ReadBit() {
if (current_index >= local_buff.length()) {
uint ExtractBits(uvec4 payload, int offset, int bits) {
if (bits <= 0) {
return 0;
}
uint bit = bitfieldExtract(local_buff[current_index], bitsread, 1);
++bitsread;
++total_bitsread;
if (bitsread == 8) {
++current_index;
bitsread = 0;
int last_offset = offset + bits - 1;
int shifted_offset = offset >> 5;
if ((last_offset >> 5) == shifted_offset) {
return bitfieldExtract(payload[shifted_offset], offset & 31, bits);
}
return bit;
int first_bits = 32 - (offset & 31);
int result_first = int(bitfieldExtract(payload[shifted_offset], offset & 31, first_bits));
int result_second = int(bitfieldExtract(payload[shifted_offset + 1], 0, bits - first_bits));
return result_first | (result_second << first_bits);
}
uint StreamBits(uint num_bits) {
uint ret = 0;
for (uint i = 0; i < num_bits; i++) {
ret |= ((ReadBit() & 1) << i);
}
int int_bits = int(num_bits);
uint ret = ExtractBits(local_buff, total_bitsread, int_bits);
total_bitsread += int_bits;
return ret;
}
uint ReadColorBit() {
uint bit = 0;
if (texel_flag) {
bit = bitfieldExtract(texel_weight_data[texel_index], texel_bitsread, 1);
++texel_bitsread;
++total_texel_bitsread;
if (texel_bitsread == 8) {
++texel_index;
texel_bitsread = 0;
}
} else {
bit = bitfieldExtract(color_endpoint_data[color_index], color_bitsread, 1);
++color_bitsread;
++total_color_bitsread;
if (color_bitsread == 8) {
++color_index;
color_bitsread = 0;
}
}
return bit;
}
uint StreamColorBits(uint num_bits) {
uint ret = 0;
for (uint i = 0; i < num_bits; i++) {
ret |= ((ReadColorBit() & 1) << i);
int int_bits = int(num_bits);
if (texel_flag) {
ret = ExtractBits(texel_weight_data, texel_bitsread, int_bits);
texel_bitsread += int_bits;
} else {
ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
color_bitsread += int_bits;
}
return ret;
}
@@ -596,22 +534,16 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) {
for (uint i = 0; i < num_partitions; i++) {
num_values += ((modes[i] >> 2) + 1) << 1;
}
int range = 256;
while (--range > 0) {
EncodingData val = encoding_values[range];
// Find the largest encoding that's within color_data_bits
// TODO(ameerj): profile with binary search
int range = 0;
while (++range < encoding_values.length()) {
uint bit_length = GetBitLength(num_values, range);
if (bit_length <= color_data_bits) {
while (--range > 0) {
EncodingData newval = encoding_values[range];
if (newval.encoding != val.encoding && newval.num_bits != val.num_bits) {
break;
}
}
++range;
if (bit_length > color_data_bits) {
break;
}
}
DecodeIntegerSequence(range, num_values);
DecodeIntegerSequence(range - 1, num_values);
uint out_index = 0;
for (int itr = 0; itr < result_index; ++itr) {
if (out_index >= num_values) {
@@ -1028,7 +960,7 @@ int FindLayout(uint mode) {
return 5;
}
TexelWeightParams DecodeBlockInfo(uint block_index) {
TexelWeightParams DecodeBlockInfo() {
TexelWeightParams params = TexelWeightParams(uvec2(0), 0, false, false, false, false);
uint mode = StreamBits(11);
if ((mode & 0x1ff) == 0x1fc) {
@@ -1110,10 +1042,10 @@ TexelWeightParams DecodeBlockInfo(uint block_index) {
}
weight_index -= 2;
if ((mode_layout != 9) && ((mode & 0x200) != 0)) {
const int max_weights[6] = int[6](9, 11, 15, 19, 23, 31);
const int max_weights[6] = int[6](7, 8, 9, 10, 11, 12);
params.max_weight = max_weights[weight_index];
} else {
const int max_weights[6] = int[6](1, 2, 3, 4, 5, 7);
const int max_weights[6] = int[6](1, 2, 3, 4, 5, 6);
params.max_weight = max_weights[weight_index];
}
return params;
@@ -1144,8 +1076,8 @@ void FillVoidExtentLDR(ivec3 coord) {
}
}
void DecompressBlock(ivec3 coord, uint block_index) {
TexelWeightParams params = DecodeBlockInfo(block_index);
void DecompressBlock(ivec3 coord) {
TexelWeightParams params = DecodeBlockInfo();
if (params.error_state) {
FillError(coord);
return;
@@ -1212,7 +1144,7 @@ void DecompressBlock(ivec3 coord, uint block_index) {
// Read color data...
uint color_data_bits = remaining_bits;
while (remaining_bits > 0) {
int nb = int(min(remaining_bits, 8U));
int nb = int(min(remaining_bits, 32U));
uint b = StreamBits(nb);
color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, nb));
++ced_pointer;
@@ -1254,25 +1186,20 @@ void DecompressBlock(ivec3 coord, uint block_index) {
ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]);
}
for (uint i = 0; i < 16; i++) {
texel_weight_data[i] = local_buff[i];
}
for (uint i = 0; i < 8; i++) {
#define REVERSE_BYTE(b) ((b * 0x0802U & 0x22110U) | (b * 0x8020U & 0x88440U)) * 0x10101U >> 16
uint a = REVERSE_BYTE(texel_weight_data[i]);
uint b = REVERSE_BYTE(texel_weight_data[15 - i]);
#undef REVERSE_BYTE
texel_weight_data[i] = uint(bitfieldExtract(b, 0, 8));
texel_weight_data[15 - i] = uint(bitfieldExtract(a, 0, 8));
}
texel_weight_data = local_buff;
texel_weight_data = bitfieldReverse(texel_weight_data).wzyx;
uint clear_byte_start =
(GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1;
texel_weight_data[clear_byte_start - 1] =
texel_weight_data[clear_byte_start - 1] &
uint byte_insert = ExtractBits(texel_weight_data, int(clear_byte_start - 1) * 8, 8) &
uint(
((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1));
for (uint i = 0; i < 16 - clear_byte_start; i++) {
texel_weight_data[clear_byte_start + i] = 0U;
uint vec_index = (clear_byte_start - 1) >> 2;
texel_weight_data[vec_index] =
bitfieldInsert(texel_weight_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8);
for (uint i = clear_byte_start; i < 16; ++i) {
uint idx = i >> 2;
texel_weight_data[idx] = bitfieldInsert(texel_weight_data[idx], 0, int(i % 4) * 8, 8);
}
texel_flag = true; // use texel "vector" and bit stream in integer decoding
DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));
@@ -1281,8 +1208,11 @@ void DecompressBlock(ivec3 coord, uint block_index) {
for (uint j = 0; j < block_dims.y; j++) {
for (uint i = 0; i < block_dims.x; i++) {
uint local_partition = Select2DPartition(partition_index, i, j, num_partitions,
uint local_partition = 0;
if (num_partitions > 1) {
local_partition = Select2DPartition(partition_index, i, j, num_partitions,
(block_dims.y * block_dims.x) < 32);
}
vec4 p;
uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]);
uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]);
@@ -1303,7 +1233,7 @@ void DecompressBlock(ivec3 coord, uint block_index) {
void main() {
uvec3 pos = gl_GlobalInvocationID;
pos.x <<= bytes_per_block_log2;
pos.x <<= BYTES_PER_BLOCK_LOG2;
// Read as soon as possible due to its latency
const uint swizzle = SwizzleOffset(pos.xy);
@@ -1321,13 +1251,8 @@ void main() {
if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
return;
}
uint block_index =
pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
current_index = 0;
bitsread = 0;
for (int i = 0; i < 16; i++) {
local_buff[i] = ReadTexel(offset + i);
}
DecompressBlock(coord, block_index);
local_buff = astc_data[offset / 16];
DecompressBlock(coord);
}

View File

@@ -19,9 +19,6 @@ RendererBase::~RendererBase() = default;
void RendererBase::RefreshBaseSettings() {
UpdateCurrentFramebufferLayout();
renderer_settings.use_framelimiter = Settings::values.use_frame_limit.GetValue();
renderer_settings.set_background_color = true;
}
void RendererBase::UpdateCurrentFramebufferLayout() {
@@ -30,7 +27,7 @@ void RendererBase::UpdateCurrentFramebufferLayout() {
render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height);
}
void RendererBase::RequestScreenshot(void* data, std::function<void()> callback,
void RendererBase::RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout) {
if (renderer_settings.screenshot_requested) {
LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request");

View File

@@ -21,13 +21,10 @@ class GraphicsContext;
namespace VideoCore {
struct RendererSettings {
std::atomic_bool use_framelimiter{false};
std::atomic_bool set_background_color{false};
// Screenshot
std::atomic<bool> screenshot_requested{false};
void* screenshot_bits{};
std::function<void()> screenshot_complete_callback;
std::function<void(bool)> screenshot_complete_callback;
Layout::FramebufferLayout screenshot_framebuffer_layout;
};
@@ -83,7 +80,7 @@ public:
void RefreshBaseSettings();
/// Request a screenshot of the next frame
void RequestScreenshot(void* data, std::function<void()> callback,
void RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout);
protected:

View File

@@ -15,7 +15,7 @@
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#if defined(_MSC_VER) && defined(NDEBUG)
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]

View File

@@ -32,7 +32,7 @@
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace OpenGL {

View File

@@ -441,7 +441,6 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
OGLProgram source_program;
std::array<std::string, 5> sources;
std::array<std::vector<u32>, 5> sources_spirv;
Shader::Backend::Bindings binding;

View File

@@ -18,10 +18,8 @@
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
namespace {

View File

@@ -12,7 +12,7 @@
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace OpenGL {

View File

@@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>;
}

View File

@@ -328,12 +328,10 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
}
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
if (renderer_settings.set_background_color) {
// Update background color before drawing
glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
Settings::values.bg_green.GetValue() / 255.0f,
Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
}
// Update background color before drawing
glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
Settings::values.bg_green.GetValue() / 255.0f,
Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
// Set projection matrix
const std::array ortho_matrix =
@@ -488,7 +486,7 @@ void RendererOpenGL::RenderScreenshot() {
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
renderer_settings.screenshot_complete_callback();
renderer_settings.screenshot_complete_callback(true);
renderer_settings.screenshot_requested = false;
}

View File

@@ -60,19 +60,14 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
astc_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES,
0);
}
UtilShaders::~UtilShaders() = default;
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_ENC_BUFFER = 2;
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
const Extent2D tile_size{
@@ -80,34 +75,32 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
program_manager.BindComputeProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(1, tile_size.width, tile_size.height);
// Ensure buffer data is valid before dispatching
glFlush();
for (const SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
ASSERT(params.bytes_per_block_log2 == 4);
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.layer_stride);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glUniform1ui(2, params.layer_stride);
glUniform1ui(3, params.block_size);
glUniform1ui(4, params.x_shift);
glUniform1ui(5, params.block_height);
glUniform1ui(6, params.block_height_mask);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
// ASTC texture data
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}

View File

@@ -62,7 +62,6 @@ private:
ProgramManager& program_manager;
OGLBuffer swizzle_table_buffer;
OGLBuffer astc_buffer;
OGLProgram astc_decoder_program;
OGLProgram block_linear_unswizzle_2d_program;

View File

@@ -0,0 +1,100 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include <fmt/format.h>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using namespace std::string_view_literals;
static u64 GetUint64(const VkPipelineExecutableStatisticKHR& statistic) {
switch (statistic.format) {
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
return static_cast<u64>(statistic.value.i64);
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
return statistic.value.u64;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
return static_cast<u64>(statistic.value.f64);
default:
return 0;
}
}
PipelineStatistics::PipelineStatistics(const Device& device_) : device{device_} {}
void PipelineStatistics::Collect(VkPipeline pipeline) {
const auto& dev{device.GetLogical()};
const std::vector properties{dev.GetPipelineExecutablePropertiesKHR(pipeline)};
const u32 num_executables{static_cast<u32>(properties.size())};
for (u32 executable = 0; executable < num_executables; ++executable) {
const auto statistics{dev.GetPipelineExecutableStatisticsKHR(pipeline, executable)};
if (statistics.empty()) {
continue;
}
Stats stage_stats;
for (const auto& statistic : statistics) {
const char* const name{statistic.name};
if (name == "Binary Size"sv || name == "Code size"sv || name == "Instruction Count"sv) {
stage_stats.code_size = GetUint64(statistic);
} else if (name == "Register Count"sv) {
stage_stats.register_count = GetUint64(statistic);
} else if (name == "SGPRs"sv || name == "numUsedSgprs"sv) {
stage_stats.sgpr_count = GetUint64(statistic);
} else if (name == "VGPRs"sv || name == "numUsedVgprs"sv) {
stage_stats.vgpr_count = GetUint64(statistic);
} else if (name == "Branches"sv) {
stage_stats.branches_count = GetUint64(statistic);
} else if (name == "Basic Block Count"sv) {
stage_stats.basic_block_count = GetUint64(statistic);
}
}
std::lock_guard lock{mutex};
collected_stats.push_back(stage_stats);
}
}
void PipelineStatistics::Report() const {
double num{};
Stats total;
{
std::lock_guard lock{mutex};
for (const Stats& stats : collected_stats) {
total.code_size += stats.code_size;
total.register_count += stats.register_count;
total.sgpr_count += stats.sgpr_count;
total.vgpr_count += stats.vgpr_count;
total.branches_count += stats.branches_count;
total.basic_block_count += stats.basic_block_count;
}
num = static_cast<double>(collected_stats.size());
}
std::string report;
const auto add = [&](const char* fmt, u64 value) {
if (value > 0) {
report += fmt::format(fmt::runtime(fmt), static_cast<double>(value) / num);
}
};
add("Code size: {:9.03f}\n", total.code_size);
add("Register count: {:9.03f}\n", total.register_count);
add("SGPRs: {:9.03f}\n", total.sgpr_count);
add("VGPRs: {:9.03f}\n", total.vgpr_count);
add("Branches count: {:9.03f}\n", total.branches_count);
add("Basic blocks: {:9.03f}\n", total.basic_block_count);
LOG_INFO(Render_Vulkan,
"\nAverage pipeline statistics\n"
"==========================================\n"
"{}\n",
report);
}
} // namespace Vulkan

View File

@@ -0,0 +1,40 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <mutex>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class PipelineStatistics {
public:
explicit PipelineStatistics(const Device& device_);
void Collect(VkPipeline pipeline);
void Report() const;
private:
struct Stats {
u64 code_size{};
u64 register_count{};
u64 sgpr_count{};
u64 vgpr_count{};
u64 branches_count{};
u64 basic_block_count{};
};
const Device& device;
mutable std::mutex mutex;
std::vector<Stats> collected_stats;
};
} // namespace Vulkan

View File

@@ -138,6 +138,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
RenderScreenshot(*framebuffer, use_accelerated);
bool has_been_recreated = false;
const auto recreate_swapchain = [&] {
@@ -162,7 +163,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (has_been_recreated) {
blit_screen.Recreate();
}
const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
scheduler.Flush(render_semaphore);
scheduler.WaitWorker();
swapchain.Present(render_semaphore);
@@ -193,4 +194,153 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
if (!renderer_settings.screenshot_requested) {
return;
}
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
.imageType = VK_IMAGE_TYPE_2D,
.format = VK_FORMAT_B8G8R8A8_UNORM,
.extent =
{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *staging_image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = screen_info.is_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM,
.components{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
});
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
// Since we're not rendering to the screen, ignore the render semaphore.
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
const VkBufferCreateInfo dst_buffer_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = buffer_size,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
static constexpr VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkBufferImageCopy copy{
.bufferOffset = 0,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset{.x = 0, .y = 0, .z = 0},
.imageExtent{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer,
copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
// Ensure the copy is fully completed before saving the screenshot
scheduler.Finish();
// Copy backing image data to the QImage screenshot buffer
const auto dst_memory_map = dst_buffer_memory.Map();
std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
renderer_settings.screenshot_complete_callback(false);
renderer_settings.screenshot_requested = false;
}
} // namespace Vulkan

View File

@@ -54,6 +54,8 @@ public:
private:
void Report() const;
void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
Core::TelemetrySession& telemetry_session;
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;

View File

@@ -130,7 +130,10 @@ void VKBlitScreen::Recreate() {
CreateDynamicResources();
}
VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) {
VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
@@ -145,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
BufferData data;
SetUniformData(data, framebuffer);
SetVertexData(data, framebuffer);
SetUniformData(data, layout);
SetVertexData(data, framebuffer, layout);
const std::span<u8> mapped_span = buffer_commit.Map();
std::memcpy(mapped_span.data(), &data, sizeof(data));
@@ -220,52 +223,75 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
});
}
scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *renderpass,
.framebuffer = *framebuffers[image_index],
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
scheduler.Record(
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *renderpass,
.framebuffer = host_framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
return *semaphores[image_index];
}
VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
const std::size_t image_index = swapchain.GetImageIndex();
const VkExtent2D render_area = swapchain.GetSize();
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated);
}
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.renderPass = *renderpass,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = extent.width,
.height = extent.height,
.layers = 1,
});
}
void VKBlitScreen::CreateStaticResources() {
CreateShaders();
CreateSemaphores();
@@ -609,22 +635,9 @@ void VKBlitScreen::CreateFramebuffers() {
const VkExtent2D size{swapchain.GetSize()};
framebuffers.resize(image_count);
VkFramebufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.renderPass = *renderpass,
.attachmentCount = 1,
.pAttachments = nullptr,
.width = size.width,
.height = size.height,
.layers = 1,
};
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
ci.pAttachments = &image_view;
framebuffers[i] = device.GetLogical().CreateFramebuffer(ci);
framebuffers[i] = CreateFramebuffer(image_view, size);
}
}
@@ -752,15 +765,13 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
}
void VKBlitScreen::SetUniformData(BufferData& data,
const Tegra::FramebufferConfig& framebuffer) const {
const auto& layout = render_window.GetFramebufferLayout();
void VKBlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const {
data.uniform.modelview_matrix =
MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
}
void VKBlitScreen::SetVertexData(BufferData& data,
const Tegra::FramebufferConfig& framebuffer) const {
void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const {
const auto& framebuffer_transform_flags = framebuffer.transform_flags;
const auto& framebuffer_crop_rect = framebuffer.crop_rect;
@@ -798,7 +809,7 @@ void VKBlitScreen::SetVertexData(BufferData& data,
static_cast<f32>(screen_info.height);
}
const auto& screen = render_window.GetFramebufferLayout().screen;
const auto& screen = layout.screen;
const auto x = static_cast<f32>(screen.left);
const auto y = static_cast<f32>(screen.top);
const auto w = static_cast<f32>(screen.GetWidth());

View File

@@ -56,8 +56,16 @@ public:
void Recreate();
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated);
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
private:
struct BufferData;
@@ -81,8 +89,9 @@ private:
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,

View File

@@ -358,7 +358,7 @@ void BufferCacheRuntime::ReserveNullBuffer() {
if (null_buffer) {
return;
}
null_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
VkBufferCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -367,9 +367,13 @@ void BufferCacheRuntime::ReserveNullBuffer() {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
};
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
null_buffer = device.GetLogical().CreateBuffer(create_info);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null index buffer");
null_buffer.SetObjectNameEXT("Null buffer");
}
null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);

View File

@@ -30,16 +30,12 @@
namespace Vulkan {
using Tegra::Texture::SWIZZLE_TABLE;
using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES;
using namespace Tegra::Texture::ASTC;
namespace {
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
constexpr size_t ASTC_NUM_BINDINGS = 4;
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1;
constexpr size_t ASTC_NUM_BINDINGS = 2;
template <size_t size>
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
@@ -75,7 +71,7 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
.score = 2,
};
constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
{
.binding = ASTC_BINDING_INPUT_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
@@ -83,20 +79,6 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDIN
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_ENC_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_OUTPUT_IMAGE,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
@@ -108,12 +90,12 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDIN
constexpr DescriptorBankInfo ASTC_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 3,
.storage_buffers = 1,
.texture_buffers = 0,
.image_buffers = 0,
.textures = 0,
.images = 1,
.score = 4,
.score = 2,
};
constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
@@ -135,22 +117,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
.offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_ENC_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
.dstArrayElement = 0,
@@ -163,7 +129,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
struct AstcPushConstants {
std::array<u32, 2> blocks_dims;
u32 bytes_per_block_log2;
u32 layer_stride;
u32 block_size;
u32 x_shift;
@@ -258,10 +223,9 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
const VkBuffer buffer{staging.buffer};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
scheduler.Record([this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -319,14 +283,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex,
scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex,
index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
const std::array push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
@@ -355,46 +319,6 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
ASTCDecoderPass::~ASTCDecoderPass() = default;
void ASTCDecoderPass::MakeDataBuffer() {
constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE);
data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = TOTAL_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES,
sizeof(ASTC_ENCODINGS_VALUES));
// Tack on the swizzle table at the end of the buffer
std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE,
sizeof(SWIZZLE_TABLE));
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) {
static constexpr VkMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
};
const VkBufferCopy copy{
.srcOffset = offset,
.dstOffset = 0,
.size = TOTAL_BUFFER_SIZE,
};
cmdbuf.CopyBuffer(src, dst, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, write_barrier);
});
}
void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
using namespace VideoCommon::Accelerated;
@@ -403,9 +327,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
scheduler.RequestOutsideRenderPassOperationContext();
if (!data_buffer) {
MakeDataBuffer();
}
const VkPipeline vk_pipeline = *pipeline;
const VkImageAspectFlags aspect_mask = image.AspectMask();
const VkImage vk_image = image.Handle();
@@ -437,16 +358,13 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
});
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const u32 num_dispatches_z = image.info.resources.layers;
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES));
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
sizeof(SWIZZLE_TABLE));
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
@@ -454,11 +372,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
ASSERT(params.bytes_per_block_log2 == 4);
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
params, descriptor_data](vk::CommandBuffer cmdbuf) {
const AstcPushConstants uniforms{
.blocks_dims = block_dims,
.bytes_per_block_log2 = params.bytes_per_block_log2,
.layer_stride = params.layer_stride,
.block_size = params.block_size,
.x_shift = params.x_shift,

View File

@@ -96,15 +96,10 @@ public:
std::span<const VideoCommon::SwizzleParameters> swizzles);
private:
void MakeDataBuffer();
VKScheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
MemoryAllocator& memory_allocator;
vk::Buffer data_buffer;
MemoryCommit data_buffer_commit;
};
} // namespace Vulkan

View File

@@ -8,6 +8,7 @@
#include <boost/container/small_vector.hpp>
#include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
@@ -26,6 +27,7 @@ using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
vk::ShaderModule spv_module_)
: device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
@@ -36,7 +38,7 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
auto func{[this, &descriptor_pool, shader_notify] {
auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] {
DescriptorLayoutBuilder builder{device};
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
@@ -50,10 +52,14 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
.pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize,
};
VkPipelineCreateFlags flags{};
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.flags = flags,
.stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
@@ -67,6 +73,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
.basePipelineHandle = 0,
.basePipelineIndex = 0,
});
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
std::lock_guard lock{build_mutex};
is_built = true;
build_condvar.notify_one();

View File

@@ -25,6 +25,7 @@ class ShaderNotify;
namespace Vulkan {
class Device;
class PipelineStatistics;
class VKScheduler;
class ComputePipeline {
@@ -32,6 +33,7 @@ public:
explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
vk::ShaderModule spv_module);

View File

@@ -11,6 +11,7 @@
#include "common/bit_field.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
@@ -217,8 +218,8 @@ GraphicsPipeline::GraphicsPipeline(
VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_,
std::array<vk::ShaderModule, NUM_STAGES> stages,
PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos)
: key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
@@ -235,7 +236,7 @@ GraphicsPipeline::GraphicsPipeline(
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
}
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] {
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
uses_push_descriptor = builder.CanUsePushDescriptor();
descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
@@ -250,6 +251,9 @@ GraphicsPipeline::GraphicsPipeline(
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
Validate();
MakePipeline(render_pass);
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
std::lock_guard lock{build_mutex};
is_built = true;
@@ -782,10 +786,14 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
}
*/
}
VkPipelineCreateFlags flags{};
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.flags = flags,
.stageCount = static_cast<u32>(shader_stages.size()),
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_ci,

View File

@@ -60,6 +60,7 @@ struct hash<Vulkan::GraphicsPipelineCacheKey> {
namespace Vulkan {
class Device;
class PipelineStatistics;
class RenderPassCache;
class VKScheduler;
class VKUpdateDescriptorQueue;
@@ -73,8 +74,9 @@ public:
VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
VideoCore::ShaderNotify* shader_notify, const Device& device,
DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics,
RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key,
std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos);
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;

View File

@@ -29,6 +29,7 @@
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@@ -389,15 +390,19 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
size_t total{};
size_t built{};
bool has_loaded{};
std::unique_ptr<PipelineStatistics> statistics;
} state;
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
state.statistics = std::make_unique<PipelineStatistics>(device);
}
const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
ComputePipelineCacheKey key;
file.read(reinterpret_cast<char*>(&key), sizeof(key));
workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
ShaderPools pools;
auto pipeline{CreateComputePipeline(pools, key, env, false)};
auto pipeline{CreateComputePipeline(pools, key, env, state.statistics.get(), false)};
std::lock_guard lock{state.mutex};
if (pipeline) {
compute_cache.emplace(key, std::move(pipeline));
@@ -425,7 +430,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
for (auto& env : envs) {
env_ptrs.push_back(&env);
}
auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs),
state.statistics.get(), false)};
std::lock_guard lock{state.mutex};
graphics_cache.emplace(key, std::move(pipeline));
@@ -445,6 +451,10 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
lock.unlock();
workers.WaitForRequests();
if (state.statistics) {
state.statistics->Report();
}
}
GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
@@ -486,7 +496,8 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
bool build_in_parallel) try {
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
size_t env_index{0};
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
@@ -540,7 +551,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<GraphicsPipeline>(
maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key,
descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
std::move(modules), infos);
} catch (const Shader::Exception& exception) {
@@ -553,7 +564,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
main_pools.ReleaseContents();
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
auto pipeline{
CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), nullptr, true)};
if (!pipeline || pipeline_cache_filename.empty()) {
return pipeline;
}
@@ -578,7 +590,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
env.SetCachedSize(shader->size_bytes);
main_pools.ReleaseContents();
auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
auto pipeline{CreateComputePipeline(main_pools, key, env, nullptr, true)};
if (!pipeline || pipeline_cache_filename.empty()) {
return pipeline;
}
@@ -591,7 +603,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
bool build_in_parallel) try {
PipelineStatistics* statistics, bool build_in_parallel) try {
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
@@ -605,8 +617,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
}
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
thread_worker, &shader_notify, program.info,
std::move(spv_module));
thread_worker, statistics, &shader_notify,
program.info, std::move(spv_module));
} catch (const Shader::Exception& exception) {
LOG_ERROR(Render_Vulkan, "{}", exception.what());

View File

@@ -80,8 +80,9 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
namespace Vulkan {
class ComputePipeline;
class Device;
class DescriptorPool;
class Device;
class PipelineStatistics;
class RasterizerVulkan;
class RenderPassCache;
class VKScheduler;
@@ -128,7 +129,8 @@ private:
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel);
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
bool build_in_parallel);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
const ShaderInfo* shader);
@@ -136,6 +138,7 @@ private:
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
const ComputePipelineCacheKey& key,
Shader::Environment& env,
PipelineStatistics* statistics,
bool build_in_parallel);
const Device& device;

View File

@@ -32,7 +32,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -61,11 +61,16 @@ struct DrawParams {
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
const auto& src = regs.viewport_transform[index];
const float width = src.scale_x * 2.0f;
const float height = src.scale_y * 2.0f;
float y = src.translate_y - src.scale_y;
float height = src.scale_y * 2.0f;
if (regs.screen_y_control.y_negate) {
y += height;
height = -height;
}
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
VkViewport viewport{
.x = src.translate_x - src.scale_x,
.y = src.translate_y - src.scale_y,
.y = y,
.width = width != 0.0f ? width : 1.0f,
.height = height != 0.0f ? height : 1.0f,
.minDepth = src.translate_z - src.scale_z * reduce_z,

View File

@@ -61,11 +61,15 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p
return std::nullopt;
}
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
if (type) {
return *type;
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
bool try_device_local) {
std::optional<u32> type;
if (try_device_local) {
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
if (type) {
return *type;
}
}
// Otherwise try without the DEVICE_LOCAL_BIT
type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
@@ -115,12 +119,21 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.buffer = *stream_buffer,
};
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
VkMemoryAllocateInfo stream_memory_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = make_dedicated ? &dedicated_info : nullptr,
.allocationSize = requirements.size,
.memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
});
.memoryTypeIndex =
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
};
stream_memory = dev.TryAllocateMemory(stream_memory_info);
if (!stream_memory) {
LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
stream_memory_info.memoryTypeIndex =
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
stream_memory = dev.AllocateMemory(stream_memory_info);
}
if (device.HasDebuggingToolAttached()) {
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
}

View File

@@ -1,168 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <limits>
#include <optional>
#include <tuple>
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/literals.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
using namespace Common::Literals;
constexpr VkBufferUsageFlags BUFFER_USAGE =
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256_MiB;
/// Find a memory type with the passed requirements
std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
VkMemoryPropertyFlags wanted,
u32 filter = std::numeric_limits<u32>::max()) {
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
const auto flags = properties.memoryTypes[i].propertyFlags;
if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
return i;
}
}
return std::nullopt;
}
/// Get the preferred host visible memory type.
u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
u32 filter = std::numeric_limits<u32>::max()) {
// Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
// Otherwise search for a host visible allocation.
static constexpr auto HOST_MEMORY =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
if (!preferred_type) {
preferred_type = FindMemoryType(properties, HOST_MEMORY);
ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
}
return preferred_type.value_or(0);
}
} // Anonymous namespace
VKStreamBuffer::VKStreamBuffer(const Device& device_, VKScheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
CreateBuffers();
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
}
VKStreamBuffer::~VKStreamBuffer() = default;
std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {
ASSERT(size <= stream_buffer_size);
mapped_size = size;
if (alignment > 0) {
offset = Common::AlignUp(offset, alignment);
}
WaitPendingOperations(offset);
if (offset + size > stream_buffer_size) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidation_mark = current_watch_cursor;
current_watch_cursor = 0;
offset = 0;
// Swap watches and reset waiting cursors.
std::swap(previous_watches, current_watches);
wait_cursor = 0;
wait_bound = 0;
// Ensure that we don't wait for uncommitted fences.
scheduler.Flush();
}
return std::make_pair(memory.Map(offset, size), offset);
}
void VKStreamBuffer::Unmap(u64 size) {
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
memory.Unmap();
offset += size;
if (current_watch_cursor + 1 >= current_watches.size()) {
// Ensure that there are enough watches.
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
watch.tick = scheduler.CurrentTick();
}
void VKStreamBuffer::CreateBuffers() {
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
const u32 preferred_type = GetMemoryType(memory_properties);
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
// As per DXVK's example, using `heap_size / 2`
const VkDeviceSize allocable_size = heap_size / 2;
buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
.usage = BUFFER_USAGE,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
const u32 required_flags = requirements.memoryTypeBits;
stream_buffer_size = static_cast<u64>(requirements.size);
memory = device.GetLogical().AllocateMemory({
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = nullptr,
.allocationSize = requirements.size,
.memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
});
buffer.BindMemory(*memory, 0);
}
void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
watches.resize(watches.size() + grow_size);
}
void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
if (!invalidation_mark) {
return;
}
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick);
++wait_cursor;
}
}
} // namespace Vulkan

View File

@@ -1,76 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class VKFenceWatch;
class VKScheduler;
class VKStreamBuffer final {
public:
explicit VKStreamBuffer(const Device& device, VKScheduler& scheduler);
~VKStreamBuffer();
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::pair<u8*, u64> Map(u64 size, u64 alignment);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Unmap(u64 size);
VkBuffer Handle() const noexcept {
return *buffer;
}
u64 Address() const noexcept {
return 0;
}
private:
struct Watch {
u64 tick{};
u64 upper_bound{};
};
/// Creates Vulkan buffer handles committing the required the required memory.
void CreateBuffers();
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
void WaitPendingOperations(u64 requested_upper_bound);
const Device& device; ///< Vulkan device manager.
VKScheduler& scheduler; ///< Command scheduler.
vk::Buffer buffer; ///< Mapped buffer.
vk::DeviceMemory memory; ///< Memory allocation.
u64 stream_buffer_size{}; ///< Stream buffer size.
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
u64 wait_bound{}; ///< Highest offset being watched for completion.
};
} // namespace Vulkan

View File

@@ -19,6 +19,8 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"

View File

@@ -9,7 +9,7 @@
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"

View File

@@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
}

View File

@@ -6,7 +6,7 @@
#include "common/assert.h"
#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h"
@@ -14,6 +14,8 @@ namespace VideoCommon {
namespace {
using Tegra::Texture::TextureType;
constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
[[nodiscard]] u8 CastSwizzle(SwizzleSource source) {

View File

@@ -24,10 +24,10 @@ struct RenderTargets {
return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id);
}
std::array<ImageViewId, NUM_RT> color_buffer_ids;
ImageViewId depth_buffer_id;
std::array<ImageViewId, NUM_RT> color_buffer_ids{};
ImageViewId depth_buffer_id{};
std::array<u8, NUM_RT> draw_buffers{};
Extent2D size;
Extent2D size{};
};
} // namespace VideoCommon

View File

@@ -4,48 +4,11 @@
#pragma once
#include <algorithm>
#include <array>
#include <bit>
#include <memory>
#include <mutex>
#include <optional>
#include <span>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/literals.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/texture.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace VideoCommon {
@@ -61,352 +24,6 @@ using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::SurfaceType;
using namespace Common::Literals;
template <class P>
class TextureCache {
/// Address shift for caching images into a hash table
static constexpr u64 PAGE_BITS = 20;
/// Enables debugging features to the texture cache
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
/// Implement blits as copies between framebuffers
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// Image view ID for null descriptors
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
/// Sampler ID for bugged sampler ids
static constexpr SamplerId NULL_SAMPLER_ID{0};
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
struct BlitImages {
ImageId dst_id;
ImageId src_id;
PixelFormat dst_format;
PixelFormat src_format;
};
template <typename T>
struct IdentityHash {
[[nodiscard]] size_t operator()(T value) const noexcept {
return static_cast<size_t>(value);
}
};
public:
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
/// Notify the cache that a new frame has been queued
void TickFrame();
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices
void FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
void UpdateRenderTargets(bool is_clear);
/// Find a framebuffer with the currently bound render targets
/// UpdateRenderTargets should be called before this
Framebuffer* GetFramebuffer();
/// Mark images in a range as modified from the CPU
void WriteMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy);
/// Invalidate the contents of the color buffer index
/// These contents become unspecified, the cache can assume aggressive optimizations.
void InvalidateColorBuffer(size_t index);
/// Invalidate the contents of the depth buffer
/// These contents become unspecified, the cache can assume aggressive optimizations.
void InvalidateDepthBuffer();
/// Try to find a cached image view in the given CPU address
[[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
/// Return true when there are uncommitted images to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
/// Return true when the caller should wait for async downloads
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
/// Commit asynchronous downloads
void CommitAsyncFlushes();
/// Pop asynchronous downloads
void PopAsyncFlushes();
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
std::mutex mutex;
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Runs the Garbage Collector.
void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, u32 index);
/// Find or create a framebuffer with the given render target parameters
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
/// Create a new image view from a guest descriptor
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
/// Find or create an image from the given parameters
[[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options = RelaxedOptions{});
/// Find an image from the given parameters
[[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create a new image and join perfectly matching existing images
/// Remove joined images from the cache
[[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
/// Return a blit image pair from the given guest blit parameters
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
[[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
/// Find or create an image view for the depth buffer
[[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
/// Find or create a view for a render target with the given image parameters
[[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
bool is_clear);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func);
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
/// Register image in the page table
void RegisterImage(ImageId image);
/// Unregister image from the page table
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image);
/// Remove image views references from the cache
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
/// Remove framebuffers using the given image views from the cache
void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
/// Mark an image as modified from the GPU
void MarkModification(ImageBase& image) noexcept;
/// Synchronize image aliases, copying data if needed
void SynchronizeAliases(ImageId image_id);
/// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
/// Prepare an image view to be used
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
/// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
/// Create a render target from a given image and image view parameters
[[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
ImageId, const ImageViewInfo& view_info);
/// Returns true if the current clear parameters clear the whole image of a given image view
[[nodiscard]] bool IsFullClear(ImageViewId id);
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
RenderTargets render_targets;
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};
bool has_deleted_images = false;
u64 total_used_memory = 0;
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
SlotVector<Framebuffer> slot_framebuffers;
// TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads;
static constexpr size_t TICKS_TO_DESTROY = 6;
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
u64 modification_tick = 0;
u64 frame_tick = 0;
typename SlotVector<Image>::Iterator deletion_iterator;
};
template <class P>
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -820,40 +437,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
}
}
template <class P>
void TextureCache<P>::InvalidateColorBuffer(size_t index) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
color_buffer_id = FindColorBuffer(index, false);
if (!color_buffer_id) {
LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
return;
}
// When invalidating a color buffer, the old contents are no longer relevant
ImageView& color_buffer = slot_image_views[color_buffer_id];
Image& image = slot_images[color_buffer.image_id];
image.flags &= ~ImageFlagBits::CpuModified;
image.flags &= ~ImageFlagBits::GpuModified;
runtime.InvalidateColorBuffer(color_buffer, index);
}
template <class P>
void TextureCache<P>::InvalidateDepthBuffer() {
ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
depth_buffer_id = FindDepthBuffer(false);
if (!depth_buffer_id) {
LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
return;
}
// When invalidating the depth buffer, the old contents are no longer relevant
ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
image.flags &= ~ImageFlagBits::CpuModified;
image.flags &= ~ImageFlagBits::GpuModified;
ImageView& depth_buffer = slot_image_views[depth_buffer_id];
runtime.InvalidateDepthBuffer(depth_buffer);
}
template <class P>
typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
// TODO: Properly implement this

View File

@@ -0,0 +1,385 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <mutex>
#include <span>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "common/common_types.h"
#include "common/literals.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/texture.h"
namespace VideoCommon {
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TICEntry;
using Tegra::Texture::TSCEntry;
using VideoCore::Surface::GetFormatType;
using VideoCore::Surface::IsCopyCompatible;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using namespace Common::Literals;
template <class P>
class TextureCache {
/// Address shift for caching images into a hash table
static constexpr u64 PAGE_BITS = 20;
/// Enables debugging features to the texture cache
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
/// Implement blits as copies between framebuffers
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// Image view ID for null descriptors
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
/// Sampler ID for bugged sampler ids
static constexpr SamplerId NULL_SAMPLER_ID{0};
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
struct BlitImages {
ImageId dst_id;
ImageId src_id;
PixelFormat dst_format;
PixelFormat src_format;
};
template <typename T>
struct IdentityHash {
[[nodiscard]] size_t operator()(T value) const noexcept {
return static_cast<size_t>(value);
}
};
public:
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
/// Notify the cache that a new frame has been queued
void TickFrame();
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices
void FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
void UpdateRenderTargets(bool is_clear);
/// Find a framebuffer with the currently bound render targets
/// UpdateRenderTargets should be called before this
Framebuffer* GetFramebuffer();
/// Mark images in a range as modified from the CPU
void WriteMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy);
/// Try to find a cached image view in the given CPU address
[[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
/// Return true when there are uncommitted images to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
/// Return true when the caller should wait for async downloads
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
/// Commit asynchronous downloads
void CommitAsyncFlushes();
/// Pop asynchronous downloads
void PopAsyncFlushes();
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
std::mutex mutex;
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Runs the Garbage Collector.
void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, u32 index);
/// Find or create a framebuffer with the given render target parameters
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
/// Create a new image view from a guest descriptor
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
/// Find or create an image from the given parameters
[[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options = RelaxedOptions{});
/// Find an image from the given parameters
[[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create a new image and join perfectly matching existing images
/// Remove joined images from the cache
[[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
/// Return a blit image pair from the given guest blit parameters
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
[[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
/// Find or create an image view for the depth buffer
[[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
/// Find or create a view for a render target with the given image parameters
[[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
bool is_clear);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func);
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
/// Register image in the page table
void RegisterImage(ImageId image);
/// Unregister image from the page table
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image);
/// Remove image views references from the cache
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
/// Remove framebuffers using the given image views from the cache
void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
/// Mark an image as modified from the GPU
void MarkModification(ImageBase& image) noexcept;
/// Synchronize image aliases, copying data if needed
void SynchronizeAliases(ImageId image_id);
/// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
/// Prepare an image view to be used
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
/// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
/// Create a render target from a given image and image view parameters
[[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
ImageId, const ImageViewInfo& view_info);
/// Returns true if the current clear parameters clear the whole image of a given image view
[[nodiscard]] bool IsFullClear(ImageViewId id);
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
RenderTargets render_targets;
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};
bool has_deleted_images = false;
u64 total_used_memory = 0;
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
SlotVector<Framebuffer> slot_framebuffers;
// TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads;
static constexpr size_t TICKS_TO_DESTROY = 6;
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
u64 modification_tick = 0;
u64 frame_tick = 0;
typename SlotVector<Image>::Iterator deletion_iterator;
};
} // namespace VideoCommon

View File

@@ -151,6 +151,76 @@ private:
const IntType& m_Bits;
};
enum class IntegerEncoding { JustBits, Quint, Trit };
struct IntegerEncodedValue {
constexpr IntegerEncodedValue() = default;
constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
: encoding{encoding_}, num_bits{num_bits_} {}
constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
return encoding == other.encoding && num_bits == other.num_bits;
}
// Returns the number of bits required to encode num_vals values.
u32 GetBitLength(u32 num_vals) const {
u32 total_bits = num_bits * num_vals;
if (encoding == IntegerEncoding::Trit) {
total_bits += (num_vals * 8 + 4) / 5;
} else if (encoding == IntegerEncoding::Quint) {
total_bits += (num_vals * 7 + 2) / 3;
}
return total_bits;
}
IntegerEncoding encoding{};
u32 num_bits = 0;
u32 bit_value = 0;
union {
u32 quint_value = 0;
u32 trit_value;
};
};
// Returns a new instance of this struct that corresponds to the
// can take no more than mav_value values
static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
while (mav_value > 0) {
u32 check = mav_value + 1;
// Is mav_value a power of two?
if (!(check & (check - 1))) {
return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
}
// Is mav_value of the type 3*2^n - 1?
if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
}
// Is mav_value of the type 5*2^n - 1?
if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
}
// Apparently it can't be represented with a bounded integer sequence...
// just iterate.
mav_value--;
}
return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
}
static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
std::array<IntegerEncodedValue, 256> encodings{};
for (std::size_t i = 0; i < encodings.size(); ++i) {
encodings[i] = CreateEncoding(static_cast<u32>(i));
}
return encodings;
}
static constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
namespace Tegra::Texture::ASTC {
using IntegerEncodedVector = boost::container::static_vector<
IntegerEncodedValue, 256,
@@ -521,35 +591,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
return params;
}
static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
u32 blockHeight) {
// Don't actually care about the void extent, just read the bits...
for (s32 i = 0; i < 4; ++i) {
strm.ReadBits<13>();
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
// is the same as [(num_bits - 1):0] and repeats all the way down.
template <typename IntType>
static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
if (num_bits == 0 || to_bit == 0) {
return 0;
}
// Decode the RGBA components and renormalize them to the range [0, 255]
u16 r = static_cast<u16>(strm.ReadBits<16>());
u16 g = static_cast<u16>(strm.ReadBits<16>());
u16 b = static_cast<u16>(strm.ReadBits<16>());
u16 a = static_cast<u16>(strm.ReadBits<16>());
u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
(static_cast<u32>(a) & 0xFF00) << 16;
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = rgba;
const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
IntType res = v;
u32 reslen = num_bits;
while (reslen < to_bit) {
u32 comp = 0;
if (num_bits > to_bit - reslen) {
u32 newshift = to_bit - reslen;
comp = num_bits - newshift;
num_bits = newshift;
}
res = static_cast<IntType>(res << num_bits);
res = static_cast<IntType>(res | (v >> comp));
reslen += num_bits;
}
return res;
}
static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = 0xFFFF00FF;
}
static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
return std::size_t(1) << num_bits;
}
template <typename IntType, u32 num_bits, u32 to_bit>
static constexpr auto MakeReplicateTable() {
std::array<IntType, NumReplicateEntries(num_bits)> table{};
for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
table[value] = Replicate(value, num_bits, to_bit);
}
return table;
}
static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
@@ -572,6 +648,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>
static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
/// to the runtime implementation
static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
@@ -1316,6 +1395,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
#undef READ_INT_VALUES
}
static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
u32 blockHeight) {
// Don't actually care about the void extent, just read the bits...
for (s32 i = 0; i < 4; ++i) {
strm.ReadBits<13>();
}
// Decode the RGBA components and renormalize them to the range [0, 255]
u16 r = static_cast<u16>(strm.ReadBits<16>());
u16 g = static_cast<u16>(strm.ReadBits<16>());
u16 b = static_cast<u16>(strm.ReadBits<16>());
u16 a = static_cast<u16>(strm.ReadBits<16>());
u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
(static_cast<u32>(a) & 0xFF00) << 16;
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = rgba;
}
}
}
static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = 0xFFFF00FF;
}
}
}
static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
InputBitStream strm(inBuf);

View File

@@ -9,117 +9,6 @@
namespace Tegra::Texture::ASTC {
enum class IntegerEncoding { JustBits, Quint, Trit };
struct IntegerEncodedValue {
constexpr IntegerEncodedValue() = default;
constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
: encoding{encoding_}, num_bits{num_bits_} {}
constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
return encoding == other.encoding && num_bits == other.num_bits;
}
// Returns the number of bits required to encode num_vals values.
u32 GetBitLength(u32 num_vals) const {
u32 total_bits = num_bits * num_vals;
if (encoding == IntegerEncoding::Trit) {
total_bits += (num_vals * 8 + 4) / 5;
} else if (encoding == IntegerEncoding::Quint) {
total_bits += (num_vals * 7 + 2) / 3;
}
return total_bits;
}
IntegerEncoding encoding{};
u32 num_bits = 0;
u32 bit_value = 0;
union {
u32 quint_value = 0;
u32 trit_value;
};
};
// Returns a new instance of this struct that corresponds to the
// can take no more than mav_value values
constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
while (mav_value > 0) {
u32 check = mav_value + 1;
// Is mav_value a power of two?
if (!(check & (check - 1))) {
return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
}
// Is mav_value of the type 3*2^n - 1?
if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
}
// Is mav_value of the type 5*2^n - 1?
if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
}
// Apparently it can't be represented with a bounded integer sequence...
// just iterate.
mav_value--;
}
return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
}
constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
std::array<IntegerEncodedValue, 256> encodings{};
for (std::size_t i = 0; i < encodings.size(); ++i) {
encodings[i] = CreateEncoding(static_cast<u32>(i));
}
return encodings;
}
constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
// is the same as [(num_bits - 1):0] and repeats all the way down.
template <typename IntType>
constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
if (num_bits == 0 || to_bit == 0) {
return 0;
}
const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
IntType res = v;
u32 reslen = num_bits;
while (reslen < to_bit) {
u32 comp = 0;
if (num_bits > to_bit - reslen) {
u32 newshift = to_bit - reslen;
comp = num_bits - newshift;
num_bits = newshift;
}
res = static_cast<IntType>(res << num_bits);
res = static_cast<IntType>(res | (v >> comp));
reslen += num_bits;
}
return res;
}
constexpr std::size_t NumReplicateEntries(u32 num_bits) {
return std::size_t(1) << num_bits;
}
template <typename IntType, u32 num_bits, u32 to_bit>
constexpr auto MakeReplicateTable() {
std::array<IntType, NumReplicateEntries(num_bits)> table{};
for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
table[value] = Replicate(value, num_bits, to_bit);
}
return table;
}
constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);

View File

@@ -18,9 +18,9 @@
namespace Tegra::Texture {
namespace {
template <bool TO_LINEAR>
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth, u32 stride_alignment) {
// The origin of the transformation can be configured here, leave it as zero as the current API
// doesn't expose it.
static constexpr u32 origin_x = 0;
@@ -28,9 +28,9 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
static constexpr u32 origin_z = 0;
// We can configure here a custom pitch
// As it's not exposed 'width * bpp' will be the expected pitch.
const u32 pitch = width * bytes_per_pixel;
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
const u32 pitch = width * BYTES_PER_PIXEL;
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * BYTES_PER_PIXEL;
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
@@ -54,14 +54,14 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
for (u32 column = 0; column < width; ++column) {
const u32 x = (column + origin_x) * bytes_per_pixel;
const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
const u32 unswizzled_offset =
slice * pitch * height + line * pitch + column * bytes_per_pixel;
slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset);
offset >= input.size()) {
@@ -73,11 +73,45 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
std::memcpy(dst, src, bytes_per_pixel);
std::memcpy(dst, src, BYTES_PER_PIXEL);
}
}
}
}
template <bool TO_LINEAR>
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
switch (bytes_per_pixel) {
case 1:
return SwizzleImpl<TO_LINEAR, 1>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 2:
return SwizzleImpl<TO_LINEAR, 2>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 3:
return SwizzleImpl<TO_LINEAR, 3>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 4:
return SwizzleImpl<TO_LINEAR, 4>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 6:
return SwizzleImpl<TO_LINEAR, 6>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 8:
return SwizzleImpl<TO_LINEAR, 8>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 12:
return SwizzleImpl<TO_LINEAR, 12>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 16:
return SwizzleImpl<TO_LINEAR, 16>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
default:
UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
} // Anonymous namespace
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,

View File

@@ -526,6 +526,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
SetNext(next, workgroup_layout);
}
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties;
if (khr_pipeline_executable_properties) {
LOG_INFO(Render_Vulkan, "Enabling shader feedback, expect slower shader build times");
executable_properties = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR,
.pNext = nullptr,
.pipelineExecutableInfo = VK_TRUE,
};
SetNext(next, executable_properties);
}
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -824,6 +835,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
bool has_khr_shader_float16_int8{};
bool has_khr_workgroup_memory_explicit_layout{};
bool has_khr_pipeline_executable_properties{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
@@ -878,6 +890,10 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
true);
}
if (Settings::values.renderer_shader_feedback) {
test(has_khr_pipeline_executable_properties,
VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, false);
}
}
VkPhysicalDeviceFeatures2KHR features{};
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
@@ -1033,6 +1049,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
khr_workgroup_memory_explicit_layout = true;
}
}
if (has_khr_pipeline_executable_properties) {
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties;
executable_properties.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR;
executable_properties.pNext = nullptr;
features.pNext = &executable_properties;
physical.GetFeatures2KHR(features);
if (executable_properties.pipelineExecutableInfo) {
extensions.push_back(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME);
khr_pipeline_executable_properties = true;
}
}
if (khr_push_descriptor) {
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;

View File

@@ -214,6 +214,11 @@ public:
return khr_push_descriptor;
}
/// Returns true if VK_KHR_pipeline_executable_properties is enabled.
bool IsKhrPipelineEexecutablePropertiesEnabled() const {
return khr_pipeline_executable_properties;
}
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
return khr_workgroup_memory_explicit_layout;
@@ -378,6 +383,7 @@ private:
bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
bool khr_pipeline_executable_properties{}; ///< Support for executable properties.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.

Some files were not shown because too many files have changed in this diff Show More