Compare commits

..

184 Commits

Author SHA1 Message Date
Morph
dc61b7045b renderer_vulkan: Implement S8_UINT stencil format
It should be noted that on Windows, only nvidia gpus support this format natively as of this commit.
2021-11-18 00:05:51 -05:00
Morph
6dd6dc046c renderer_opengl: Implement S8_UINT stencil format 2021-11-17 15:05:07 -05:00
Morph
2348eb41f3 video_core: Add S8_UINT stencil format 2021-11-17 15:04:38 -05:00
bunnei
71313509f7 Merge pull request #7219 from FernandoS27/aristotles-right-testicle
Project A.R.T. Advanced Rendering Techniques
2021-11-16 18:52:11 -08:00
Morph
3154773c00 Merge pull request #7347 from lioncash/catch
CMakeLists: Update catch to 2.13.7
2021-11-16 21:26:11 -05:00
Fernando Sahmkow
1c8a3d8d29 TextureCache: Fix Automatic Anisotropic. 2021-11-17 03:15:08 +01:00
Lioncash
6e57c519e2 CMakeLists: Update catch to 2.13.7
Keeps the testing libraries up to date.
2021-11-16 20:04:25 -05:00
FernandoS27
1128cc35b9 TextureCache: OGL query device memory if possible. 2021-11-17 01:45:50 +01:00
Fernando Sahmkow
978f598ff6 TextureCache: Fix OGL cleaning 2021-11-17 00:59:46 +01:00
Fernando Sahmkow
282e04bffb TextureCache: Add automatic anisotropic filtering and refactor code. 2021-11-16 23:14:51 +01:00
Fernando Sahmkow
5230378709 TextureCache: Make a better Anisotropic setter. 2021-11-16 22:11:33 +01:00
Fernando Sahmkow
6c97ab571a Texture Cache: revert Image changes. 2021-11-16 22:11:33 +01:00
Fernando Sahmkow
6f98690963 ShaderCache: Better fix for Shuffling gl_FragCoord 2021-11-16 22:11:33 +01:00
FernandoS27
d46a71e786 HostShader: fix Gaussian filter. 2021-11-16 22:11:33 +01:00
FernandoS27
de1c8c5c2c Texture Cahe/Shader decompiler: Resize PointSize on rescaling, refactor and make reaper more agressive on 4Gb GPUs. 2021-11-16 22:11:33 +01:00
ameerj
917b2466ad texture_cache: Refactor Render Target scaling function 2021-11-16 22:11:33 +01:00
ameerj
9fc1fa1b0d gl_resource_manager: Ensure non EXT_framebuffer objects are created 2021-11-16 22:11:33 +01:00
FernandoS27
099b0b3167 Texture Cache: Fix memory usage on ScaleDown. 2021-11-16 22:11:33 +01:00
FernandoS27
9189aacfe2 OpenGL: Fix viewport/Scissor scaling on downscaling. 2021-11-16 22:11:33 +01:00
FernandoS27
c97c46747d Vulkan: fix regression. 2021-11-16 22:11:33 +01:00
ameerj
87abab71ff host_shaders: Misc copyright/style changes 2021-11-16 22:11:33 +01:00
ameerj
864f2e0b81 configure_graphics.ui: Cleanup scaling options and fix duplicate name warning 2021-11-16 22:11:33 +01:00
ameerj
99124b7261 FSR: Fix GCC build errors 2021-11-16 22:11:33 +01:00
Marshall Mohror
bb03675485 Vulkan: Reimplement FSR constant generation functions to avoid GCC warnings 2021-11-16 22:11:33 +01:00
ameerj
47369faaab vk_blit_screen: Fix AA destruction order 2021-11-16 22:11:32 +01:00
Marshall Mohror
dcc5b4f6b0 Presentation: Only use FP16 in scaling shaders on supported devices in Vulkan 2021-11-16 22:11:32 +01:00
ameerj
a39e867c73 renderer_vulkan/blit_image: Use generic color state on Depth to Color blits
Fixes Bayonetta 2 on AMD
2021-11-16 22:11:32 +01:00
ameerj
282a4501d9 vk_texture_cache: Refactor 3D scaling helpers 2021-11-16 22:11:32 +01:00
ameerj
93c9eb196f gl_rasterizer: Fix ScissorTest and Clear when scaling 2021-11-16 22:11:32 +01:00
ameerj
172d4f1e3b gl_texture_cache: Simplify scaling procedures 2021-11-16 22:11:32 +01:00
Fernando Sahmkow
5c6fa88935 OpenGlTextureCache: Fix state invalidation on rescaling. 2021-11-16 22:11:32 +01:00
Fernando Sahmkow
c5dbd93adb VulkanBufferCache: Avoid adding barriers between multiple copies. 2021-11-16 22:11:32 +01:00
Fernando Sahmkow
99547d2656 HostShader: Fix gaussian and add attribution. 2021-11-16 22:11:32 +01:00
Fernando Sahmkow
a96c9c803b Yuzu UI: Add button for Anti Alias 2021-11-16 22:11:32 +01:00
Fernando Sahmkow
21a8ba0437 Vulkan: Fix FXAA in AMD. 2021-11-16 22:11:32 +01:00
Fernando Sahmkow
6cdfaee7b4 Texture Cache: Fix blitting. 2021-11-16 22:11:32 +01:00
FernandoS27
e6f1ed08fb Vulkan: Implement FXAA 2021-11-16 22:11:32 +01:00
Marshall Mohror
056894f07a OpenGL: fix FXAA with scaling 2021-11-16 22:11:32 +01:00
Marshall Mohror
48cf376462 OpenGL: Implement FXAA 2021-11-16 22:11:32 +01:00
Marshall Mohror
74e39ed6ee Frontend: Add anti-aliasing method setting 2021-11-16 22:11:32 +01:00
Marshall Mohror
510caeefb3 Settings: Add anti-aliasing method setting 2021-11-16 22:11:32 +01:00
FernandoS27
2eff80b47f QtGUI: Add buttton to toggle the filter. 2021-11-16 22:11:32 +01:00
FernandoS27
9e065b9c7d VideoCore: Add gaussian filtering. 2021-11-16 22:11:32 +01:00
FernandoS27
bf01b7993d TextureCache: Improve Reaper. 2021-11-16 22:11:32 +01:00
FernandoS27
bb3e95133d Vulkan: fix waiting on semaphore. 2021-11-16 22:11:32 +01:00
Marshall Mohror
916b882ea8 Update scaleforce to use FP16 2021-11-16 22:11:32 +01:00
FernandoS27
e7fc60406e VideoCore: Add more rescaling option. 2021-11-16 22:11:31 +01:00
FernandoS27
d37d10e7a7 TextureCache: fix rescaling in aliases and overlap joins. 2021-11-16 22:11:31 +01:00
Marshall Mohror
7506ac4118 Presentation: Fix turning FSR on and off in settings 2021-11-16 22:11:31 +01:00
Fernando Sahmkow
4ad22c7d2b Video Core: fix building for GCC. 2021-11-16 22:11:31 +01:00
FernandoS27
826a350e2b Vulkan Rasterizer: Fix clears on integer textures. 2021-11-16 22:11:31 +01:00
FernandoS27
150bc45401 Texture cache: fix Intel with rescaler. 2021-11-16 22:11:31 +01:00
FernandoS27
f3ff8bdc0e TextureCache: Fix blitting filter in Vulkan and correct viewport/scissor calculation when downscaling. 2021-11-16 22:11:31 +01:00
Fernando Sahmkow
3b61de74e6 Texture Cache: fix memory managment and optimize scaled downloads, uploads. 2021-11-16 22:11:31 +01:00
Fernando Sahmkow
c2ca55c9d5 Texture Cache: ease the requirements of textures being blacklisted. 2021-11-16 22:11:31 +01:00
Fernando Sahmkow
50b4c774cb Vulkan: Fix Blit Depth Stencil 2021-11-16 22:11:31 +01:00
Fernando Sahmkow
425ab9ef4b Texture Cache: Fix downscaling and correct memory comsumption. 2021-11-16 22:11:31 +01:00
Fernando Sahmkow
b60966041c Presentation: add Nearest Neighbor filter. 2021-11-16 22:11:31 +01:00
ameerj
77b0812d69 externals: Add only included ffx-fsr headers
The submodule adds a lot of unneeded bloat due its addition of samples that contain large media files that are difficult to compress.
2021-11-16 22:11:31 +01:00
Marshall Mohror
37cb0377ae vulkan: Implement FidelityFX Super Resolution 2021-11-16 22:11:31 +01:00
FernandoS27
d4f5193bd3 Texture Cache: Rescale conversions between depth and color 2021-11-16 22:11:31 +01:00
Fernando Sahmkow
ef1dc42635 Texture cache: Fix memory consumption and ignore rating when a depth texture is rendered. 2021-11-16 22:11:31 +01:00
ameerj
618de4e787 vulkan: Fix rescaling push constant usage 2021-11-16 22:11:31 +01:00
Fernando Sahmkow
b7ccc58f23 Texture Cahe: Fix downscaling on SMO. 2021-11-16 22:11:31 +01:00
ameerj
0f14c9379e texture_cache_base: Remove unused function declarations 2021-11-16 22:11:31 +01:00
ameerj
ca1db63116 yuzu: Fix build errors 2021-11-16 22:11:31 +01:00
ameerj
ebf36f23dd vk_texture_cache: Use 3D to scale images when blit is unsupported 2021-11-16 22:11:31 +01:00
ameerj
4de584005f texture_cache: Fix infinitely recursive ImageCanRescale check 2021-11-16 22:11:31 +01:00
ameerj
b1ae935f11 vk_texture_cache: Fix BlitScale of non-2D images 2021-11-16 22:11:31 +01:00
ameerj
abd07e4158 video_core: Refactor resolution scale function 2021-11-16 22:11:31 +01:00
ameerj
b14f2c7c82 texture_cache: Fix image resolves when src/dst are not both scaled 2021-11-16 22:11:30 +01:00
lat9nq
49c0c7efd2 yuzu_cmd: Read resolution_setup and scaling_filter from config
Also adds descriptions and the settings to the default config.
2021-11-16 22:11:30 +01:00
lat9nq
1c93476a80 video_core,yuzu: Move UpdateRescalingInfo call to video_core
This only needs to happen once per game boot, so we can just call it
during CreateGPU and be done with it, avoiding the need to call it in
the frontends.
2021-11-16 22:11:30 +01:00
ameerj
3233fa5dc8 gl_texture_cache: Disable scissor test when scaling textures
Fixes a bug on BOTW where some objects were no longer being rendered after blitting
2021-11-16 22:11:30 +01:00
ameerj
89a7e566c7 vk_texture_cache: Fix unsupported blit format error checking 2021-11-16 22:11:30 +01:00
ameerj
f8339cd703 vk_texture_cache: Fix early returns on unsupported scales 2021-11-16 22:11:30 +01:00
ameerj
31478c6c1b video_core: Misc resolution scaling related refactoring 2021-11-16 22:11:30 +01:00
ameerj
88ef04dbaf texture_cache: Refactor scaled image size calculation 2021-11-16 22:11:30 +01:00
Fernando Sahmkow
237a43004f Texture Cache: Fix calculations when scaling. 2021-11-16 22:11:30 +01:00
ameerj
e0a3830855 gl_texture_cache: Fix BGR pbo size for scaled textures 2021-11-16 22:11:30 +01:00
ameerj
581ea90062 rescaling_pass: Fix IR errors when unscalable texture types are encountered 2021-11-16 22:11:30 +01:00
Fernando Sahmkow
ea82bd4b7e Texture Cache: Fix Rescaling on Multisample 2021-11-16 22:11:30 +01:00
Fernando Sahmkow
19ca0c9ab5 TextureCache: Base fixes on rescaling. 2021-11-16 22:11:30 +01:00
ameerj
99eec162da rescaling_pass: Logic simplification and minor style cleanup 2021-11-16 22:11:30 +01:00
ameerj
276565973f rescaling_pass: Scale ImageFetch offset if it exists
Plus some code deduplication
2021-11-16 22:11:30 +01:00
ameerj
dd66384451 rescaling_pass: Enable PatchImageQueryDimensions on fragment stages 2021-11-16 22:11:30 +01:00
ameerj
36f261edef vk_texture_cache: Simplify scaled image management 2021-11-16 22:11:30 +01:00
ameerj
8183142cd4 gl_texture_cache: Fix scaling backup logic 2021-11-16 22:11:30 +01:00
ameerj
122ddeb7ff vk_rasterizer: Fix scaling on Y_NEGATE 2021-11-16 22:11:30 +01:00
ameerj
16017ac450 vk_texture_cache: Use nearest neighbor scaling when available 2021-11-16 22:11:30 +01:00
ameerj
27af298e78 gl_texture_cache: Fix depth and integer format scaling blits 2021-11-16 22:11:30 +01:00
ameerj
b027fac794 gl_texture_cache/rescaling_pass: minor cleanup 2021-11-16 22:11:30 +01:00
ameerj
c8a971be91 vk_texture_cache: Minor cleanup 2021-11-16 22:11:30 +01:00
ameerj
edb5844240 rescaling_pass: Fix and simplify shuffle/fragcoord pass 2021-11-16 22:11:30 +01:00
Fernando Sahmkow
b3a9c8f108 Shader: Don't rescale FragCoord if used by Shuffle 2021-11-16 22:11:30 +01:00
ameerj
6000fe69a4 image_info: Mark MSAA textures as non-rescalable
Blitting or resolving multisampled images requires the dimensions of the src and dst to be equal for valid usage, making them difficult for resolution scaling using the current implementation.
2021-11-16 22:11:30 +01:00
ameerj
80f8d4989e bootmanager: Fix screenshot resolution factor usage
Fixes screenshots at non integer scaling
2021-11-16 22:11:30 +01:00
ameerj
fcf2b2c78a gl_texture_cache: Simplify scaling
We don't need to reconstruct new textures every time we ScaleUp/ScaleDown. We can scale up once, and revert to the original texture whenever scaling down.
Fixes memory leaks due to glDeleteTextures being deferred for later handling on some drivers
2021-11-16 22:11:29 +01:00
ameerj
ae8d19d17e Renderers: Unify post processing filter shaders 2021-11-16 22:11:29 +01:00
ameerj
29710f3250 gl_texture_cache: fix scaling on upload 2021-11-16 22:11:29 +01:00
Fernando Sahmkow
a6b88e85bf Renderer: Implement Bicubic and ScaleForce filters. 2021-11-16 22:11:29 +01:00
Fernando Sahmkow
c5bbbf3902 Texture Cache: fix scaling on upload and stop scaling on base resolution. 2021-11-16 22:11:29 +01:00
ameerj
68e038404c shader, video_core: Fix GCC build errors 2021-11-16 22:11:29 +01:00
ameerj
65781f88f8 emit_spirv: Fix RescalingLayout alignment 2021-11-16 22:11:29 +01:00
Fernando Sahmkow
d7c9792169 TextureCache: Fix Buffer Views Scaling. 2021-11-16 22:11:29 +01:00
Fernando Sahmkow
dfa8291526 RescalingPass: Agregate pixels on texelFetch while on Fragment Shader 2021-11-16 22:11:29 +01:00
Fernando Sahmkow
4b1393a691 Texture Cache: Correctly fix Blits Rescaling. 2021-11-16 22:11:29 +01:00
Fernando Sahmkow
8f78444de3 shader: Fix TextureSize check on rescaling. 2021-11-16 22:11:29 +01:00
ameerj
ed675cfd8c texture_cache: Disable dst_image scaling in BlitImage
Fixes scaling in Super Mario Party
2021-11-16 22:11:29 +01:00
ameerj
dc28284437 emit_spirv: Fix RescalingLayout alignment 2021-11-16 22:11:29 +01:00
ReinUsesLisp
e66d5b88a6 shader: Properly scale image reads and add GL SPIR-V support
Thanks for everything!
2021-11-16 22:11:29 +01:00
ReinUsesLisp
fc9bb3c3fe shader: Properly blacklist and scale image loads 2021-11-16 22:11:29 +01:00
ReinUsesLisp
c7a1cbad44 texture_cache: Add getter to query if image view is rescaled 2021-11-16 22:11:29 +01:00
ReinUsesLisp
526e47f148 vk_rasterizer: Minor style change 2021-11-16 22:11:29 +01:00
ReinUsesLisp
c9238555f7 gl_texture_cache: Fix scaling blits 2021-11-16 22:11:29 +01:00
ReinUsesLisp
cfeb161c7e glsl/glasm: Pass and use scaling parameters in shaders 2021-11-16 22:11:29 +01:00
ReinUsesLisp
4a512d6827 gl_rasterizer: Properly scale viewports and scissors 2021-11-16 22:11:29 +01:00
ameerj
05d98d9bbf gl_texture_cache: Fix multi layered texture Scale 2021-11-16 22:11:29 +01:00
ameerj
b6060873ce gl_compute_pipeline: Add downscale factor to shader uniforms 2021-11-16 22:11:29 +01:00
ameerj
9bc7b04ca5 gl_rasterizer: Fix rescale dirty state checking 2021-11-16 22:11:29 +01:00
ameerj
f086c82e1f gl_graphics_pipeline: Add downscale factor to shader uniforms 2021-11-16 22:11:28 +01:00
ReinUsesLisp
2182d25750 texture_cache: Fix blacklists on compute 2021-11-16 22:11:28 +01:00
ReinUsesLisp
56ccda1d99 texture_cache: Simplify image view queries and blacklisting 2021-11-16 22:11:28 +01:00
Fernando Sahmkow
48d81506a3 Vulkan: Fix downscaling Blit. 2021-11-16 22:11:28 +01:00
Fernando Sahmkow
07c564f38b Texture Cache: Implement Rating System. 2021-11-16 22:11:28 +01:00
Fernando Sahmkow
cee7eba64e OpenGL: set linear mag filter when blitting a downscaled image. 2021-11-16 22:11:28 +01:00
Fernando Sahmkow
117f8ee7a4 Vulkan: Fix AA when rescaling. 2021-11-16 22:11:28 +01:00
Fernando Sahmkow
0e8cf38f39 Texture Cache: Implement Blacklisting. 2021-11-16 22:11:28 +01:00
Morph
138d9d7eff main: Add resolution scale label in the status bar
Shows the resolution scale as "Scale: {}x" in the status bar, where {} is a floating point value representing the current resolution scaling factor.
2021-11-16 22:11:28 +01:00
ReinUsesLisp
d2388dd0d0 vulkan: Implement rescaling shader patching 2021-11-16 22:11:28 +01:00
ReinUsesLisp
dc72d4d4f5 vk_texture_cache: Properly scale blit source images 2021-11-16 22:11:28 +01:00
ReinUsesLisp
baf0993d5c vk_graphics_pipeline: Use Shader::NumDescriptors when possible 2021-11-16 22:11:28 +01:00
ReinUsesLisp
6f3a41abe2 opengl: Use Shader::NumDescriptors when possible 2021-11-16 22:11:28 +01:00
ReinUsesLisp
656adee630 spirv: Implement rescaling patching 2021-11-16 22:11:28 +01:00
ReinUsesLisp
01379c5e3c shader/rescaling_pass: Patch more instructions 2021-11-16 22:11:28 +01:00
ReinUsesLisp
c15332c44f shader: Add IsTextureScaled opcode 2021-11-16 22:11:28 +01:00
ReinUsesLisp
74efa57c1b texture_cache: Add image getters 2021-11-16 22:11:28 +01:00
ReinUsesLisp
c892359d1b shader: Add copy constructor to instructions 2021-11-16 22:11:28 +01:00
ReinUsesLisp
95761cc6a7 shader: Add integer division opcodes 2021-11-16 22:11:28 +01:00
ReinUsesLisp
43aa695a04 common/settings: Remove unused scaling options 2021-11-16 22:11:28 +01:00
ReinUsesLisp
e580299467 shader: Fix rescaling pass 2021-11-16 22:11:28 +01:00
ameerj
fad2c92a39 gl_texture_cache: Simplify rescaling 2021-11-16 22:11:28 +01:00
ameerj
d5143c83a9 texture_cache: Fix typo in aliased image rescaling 2021-11-16 22:11:28 +01:00
ReinUsesLisp
0fb4b84383 vk_texture_cache: Simplify and optimize scaling blits 2021-11-16 22:11:28 +01:00
ReinUsesLisp
520c4a44f6 vk_texture_cache: Fix scaling blit validation errors 2021-11-16 22:11:28 +01:00
ReinUsesLisp
1672e9ba09 shader: Fix resolution scaling pass 2021-11-16 22:11:27 +01:00
ReinUsesLisp
fb924ea85c shader: Add resolution down factor opcode 2021-11-16 22:11:27 +01:00
ameerj
fddf372c68 gl_texture_cache: Implement ScaleDown 2021-11-16 22:11:27 +01:00
ameerj
0a6c895af7 gl_texture_cache: Rescale fixes for multi-layered textures 2021-11-16 22:11:27 +01:00
Fernando Sahmkow
dfc65cd0a3 Texture Cache: Implement Rescaling on Aliases and Blits. 2021-11-16 22:11:27 +01:00
ReinUsesLisp
d464b122d5 Fix blits with mips 2021-11-16 22:11:27 +01:00
ReinUsesLisp
973f8f1d08 Fix blits 2021-11-16 22:11:27 +01:00
ameerj
de66a69ed4 renderer_gl: Resolution scaling fixes 2021-11-16 22:11:27 +01:00
Fernando Sahmkow
8704c93913 TextureCache: Fix rescaling of ImageCopies 2021-11-16 22:11:27 +01:00
Fernando Sahmkow
778700ff9d TextureCache: Modify Viewports/Scissors according to Rescale. 2021-11-16 22:11:27 +01:00
Fernando Sahmkow
71ca84d829 Settings: eliminate rescaling_factor. 2021-11-16 22:11:27 +01:00
Fernando Sahmkow
84f2aea896 Texture Cache: More rescaling fixes. 2021-11-16 22:11:27 +01:00
ameerj
10e5065a5c gl_texture_cache: WIP texture rescale 2021-11-16 22:11:27 +01:00
Fernando Sahmkow
ba18047e8d Texture Cache: Implement Vulkan UpScaling & DownScaling 2021-11-16 22:11:27 +01:00
Fernando Sahmkow
360e897ccd ShaderDecompiler: Add initial support for rescaling. 2021-11-16 22:11:27 +01:00
Fernando Sahmkow
37ef9c9130 Settings: Add resolution scaling to settings. 2021-11-16 22:11:27 +01:00
Fernando Sahmkow
22f4b290b6 VideoCore: Initial Setup for the Resolution Scaler. 2021-11-16 22:11:27 +01:00
Fernando S
720970c4c1 Merge pull request #7326 from ameerj/vp8
codecs: Implement VP8 video decoding support
2021-11-14 23:03:56 +01:00
bunnei
30442d8a89 Merge pull request #7260 from vonchenplus/spirv_support_legacy_attribute_v2
shader: Spirv support legacy attribute v2
2021-11-14 02:07:45 -08:00
bunnei
7f256392a1 Merge pull request #7305 from Morph1984/ci
ci: Print traceback on patch merge failure
2021-11-14 02:07:04 -08:00
bunnei
0eacc362dd Merge pull request #7272 from behunin/the-courteous-logger
Logging: Impl refactor
2021-11-13 00:03:54 -08:00
ameerj
c50f170597 codes: Rename ComposeFrameHeader to ComposeFrame
These functions were composing the entire frame, not just the headers. Rename to more accurately describe them.
2021-11-12 23:52:19 -05:00
ameerj
1994edfeb6 CMake: Enable VP8 ffmpeg decoders 2021-11-12 23:52:19 -05:00
ameerj
d35391b9f4 vp8: Implement header composition
Enables frame decoding with FFmpeg
2021-11-12 23:52:18 -05:00
ameerj
b39b33b1fe codecs: Add VP8 codec class 2021-11-12 19:49:45 -05:00
Mai M
852858c2cb Merge pull request #7320 from OatmealDome/homebrew-capabilities
program_metadata: Add default ThreadInfo capability
2021-11-11 00:26:59 -05:00
OatmealDome
50acc0da20 program_metadata: Add default ThreadInfo kernel capability 2021-11-10 22:01:05 -05:00
Fernando S
bdabd17c76 Merge pull request #7303 from Morph1984/swkbd-confirm-skip-textcheck
applets/swkbd: Skip text checking if the text has been confirmed
2021-11-10 13:42:11 +01:00
Morph
3af2117c88 applets/swkbd: Fix text check message encoding
The text check message can be encoded in UTF-8.
2021-11-08 14:57:53 -05:00
Morph
84934693cf ci: Print traceback on patch merge failure 2021-11-08 13:36:05 -05:00
Morph
1af499c15b applets/swkbd: Skip text checking if the text has been confirmed
Confirm means that the text has already been checked by the application to be correct, but is asking the user for confirmation.
The confirmation text itself seems to be corrupted though, this needs to be investigated.

Fixes the software keyboard in Famicom Detective Club: The Missing Heir
2021-11-08 13:05:50 -05:00
Narr the Reg
ced1302975 service/pctl: Stub EndFreeCommunication
- Used by Just Dance 2022
2021-11-05 12:16:19 -04:00
Feng Chen
70d51f72ec vulkan_device: Add missing vulkan image format R5G6B5 in GetFormatProperties
- Used by Dragon Quest Builders
2021-11-05 10:31:40 -04:00
Feng Chen
f2a4204245 Simply legacy attribute implement 2021-11-04 09:26:16 +08:00
Levi Behunin
d6b5f64484 Refactor Logging Impl
Loop on stop_token and remove final_entry in Entry.
Move Backend thread out of Impl Constructor to its own function.
Add Start function for backend thread.
Use stop token in PopWait and check if entry filename is nullptr before logging.
2021-11-01 18:36:27 -06:00
vonchenplus
49f9a44235 Support gl_FogFragCoord attribute 2021-10-31 23:34:17 +08:00
vonchenplus
36c21ff6cb Support gl_BackSecondaryColor attribute 2021-10-26 23:14:40 +08:00
vonchenplus
92bebecf46 Support gl_FrontSecondaryColor attribute 2021-10-26 23:09:44 +08:00
vonchenplus
b04c7b6343 Support gl_BackColor attribute 2021-10-26 23:01:57 +08:00
147 changed files with 9548 additions and 813 deletions

View File

@@ -1,7 +1,7 @@
# Download all pull requests as patches that match a specific label
# Usage: python download-patches-by-label.py <Label to Match> <Root Path Folder to DL to>
import requests, sys, json, urllib3.request, shutil, subprocess, os
import requests, sys, json, urllib3.request, shutil, subprocess, os, traceback
tagline = sys.argv[2]
@@ -33,4 +33,5 @@ try:
for i in range(1,30):
do_page(i)
except:
traceback.print_exc(file=sys.stdout)
sys.exit(-1)

View File

@@ -166,7 +166,7 @@ macro(yuzu_find_packages)
# Capitalization matters here. We need the naming to match the generated paths from Conan
set(REQUIRED_LIBS
# Cmake Pkg Prefix Version Conan Pkg
"Catch2 2.13 catch2/2.13.0"
"Catch2 2.13.7 catch2/2.13.7"
"fmt 8.0 fmt/8.0.0"
"lz4 1.8 lz4/1.9.2"
"nlohmann_json 3.8 nlohmann_json/3.8.0"
@@ -600,6 +600,7 @@ if (YUZU_USE_BUNDLED_FFMPEG)
${LIBVA_LIBRARIES})
set(FFmpeg_HWACCEL_FLAGS
--enable-hwaccel=h264_vaapi
--enable-hwaccel=vp8_vaapi
--enable-hwaccel=vp9_vaapi
--enable-libdrm)
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
@@ -620,6 +621,7 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--enable-ffnvcodec
--enable-nvdec
--enable-hwaccel=h264_nvdec
--enable-hwaccel=vp8_nvdec
--enable-hwaccel=vp9_nvdec
--extra-cflags=-I${CUDA_INCLUDE_DIRS}
)
@@ -670,6 +672,7 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--disable-postproc
--disable-swresample
--enable-decoder=h264
--enable-decoder=vp8
--enable-decoder=vp9
--cc="${CMAKE_C_COMPILER}"
--cxx="${CMAKE_CXX_COMPILER}"

2656
externals/FidelityFX-FSR/ffx-fsr/ffx_a.h vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

19
externals/FidelityFX-FSR/license.txt vendored Normal file
View File

@@ -0,0 +1,19 @@
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@@ -6,6 +6,7 @@
#include <chrono>
#include <climits>
#include <exception>
#include <stop_token>
#include <thread>
#include <vector>
@@ -186,6 +187,10 @@ public:
initialization_in_progress_suppress_logging = false;
}
static void Start() {
instance->StartBackendThread();
}
Impl(const Impl&) = delete;
Impl& operator=(const Impl&) = delete;
@@ -201,7 +206,7 @@ public:
}
void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
const char* function, std::string message) {
const char* function, std::string&& message) {
if (!filter.CheckMessage(log_class, log_level))
return;
const Entry& entry =
@@ -211,40 +216,41 @@ public:
private:
Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_)
: filter{filter_}, file_backend{file_backend_filename}, backend_thread{std::thread([this] {
Common::SetCurrentThreadName("yuzu:Log");
Entry entry;
const auto write_logs = [this, &entry]() {
ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });
};
while (true) {
entry = message_queue.PopWait();
if (entry.final_entry) {
break;
}
write_logs();
}
// Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a
// case where a system is repeatedly spamming logs even on close.
int max_logs_to_write = filter.IsDebug() ? INT_MAX : 100;
while (max_logs_to_write-- && message_queue.Pop(entry)) {
write_logs();
}
})} {}
: filter{filter_}, file_backend{file_backend_filename} {}
~Impl() {
StopBackendThread();
}
void StartBackendThread() {
backend_thread = std::thread([this] {
Common::SetCurrentThreadName("yuzu:Log");
Entry entry;
const auto write_logs = [this, &entry]() {
ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });
};
while (!stop.stop_requested()) {
entry = message_queue.PopWait(stop.get_token());
if (entry.filename != nullptr) {
write_logs();
}
}
// Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a
// case where a system is repeatedly spamming logs even on close.
int max_logs_to_write = filter.IsDebug() ? INT_MAX : 100;
while (max_logs_to_write-- && message_queue.Pop(entry)) {
write_logs();
}
});
}
void StopBackendThread() {
Entry stop_entry{};
stop_entry.final_entry = true;
message_queue.Push(stop_entry);
stop.request_stop();
backend_thread.join();
}
Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
const char* function, std::string message) const {
const char* function, std::string&& message) const {
using std::chrono::duration_cast;
using std::chrono::microseconds;
using std::chrono::steady_clock;
@@ -257,7 +263,6 @@ private:
.line_num = line_nr,
.function = function,
.message = std::move(message),
.final_entry = false,
};
}
@@ -278,8 +283,9 @@ private:
ColorConsoleBackend color_console_backend{};
FileBackend file_backend;
std::stop_source stop;
std::thread backend_thread;
MPSCQueue<Entry> message_queue{};
MPSCQueue<Entry, true> message_queue{};
std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
};
} // namespace
@@ -288,6 +294,10 @@ void Initialize() {
Impl::Initialize();
}
void Start() {
Impl::Start();
}
void DisableLoggingInTests() {
initialization_in_progress_suppress_logging = true;
}

View File

@@ -14,6 +14,8 @@ class Filter;
/// Initializes the logging system. This should be the first thing called in main.
void Initialize();
void Start();
void DisableLoggingInTests();
/**

View File

@@ -22,7 +22,6 @@ struct Entry {
unsigned int line_num = 0;
std::string function;
std::string message;
bool final_entry = false;
};
} // namespace Common::Log

View File

@@ -48,8 +48,8 @@ struct Rectangle {
}
[[nodiscard]] Rectangle<T> Scale(const float s) const {
return Rectangle{left, top, static_cast<T>(left + GetWidth() * s),
static_cast<T>(top + GetHeight() * s)};
return Rectangle{left, top, static_cast<T>(static_cast<float>(left + GetWidth()) * s),
static_cast<T>(static_cast<float>(top + GetHeight()) * s)};
}
};

View File

@@ -47,7 +47,9 @@ void LogSettings() {
log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue());
log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue());
log_setting("Renderer_ScalingFilter", values.scaling_filter.GetValue());
log_setting("Renderer_AntiAliasing", values.anti_aliasing.GetValue());
log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue());
log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue());
log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue());
@@ -105,6 +107,55 @@ float Volume() {
return values.volume.GetValue() / 100.0f;
}
void UpdateRescalingInfo() {
const auto setup = values.resolution_setup.GetValue();
auto& info = values.resolution_info;
info.downscale = false;
switch (setup) {
case ResolutionSetup::Res1_2X:
info.up_scale = 1;
info.down_shift = 1;
info.downscale = true;
break;
case ResolutionSetup::Res3_4X:
info.up_scale = 3;
info.down_shift = 2;
info.downscale = true;
break;
case ResolutionSetup::Res1X:
info.up_scale = 1;
info.down_shift = 0;
break;
case ResolutionSetup::Res2X:
info.up_scale = 2;
info.down_shift = 0;
break;
case ResolutionSetup::Res3X:
info.up_scale = 3;
info.down_shift = 0;
break;
case ResolutionSetup::Res4X:
info.up_scale = 4;
info.down_shift = 0;
break;
case ResolutionSetup::Res5X:
info.up_scale = 5;
info.down_shift = 0;
break;
case ResolutionSetup::Res6X:
info.up_scale = 6;
info.down_shift = 0;
break;
default:
UNREACHABLE();
info.up_scale = 1;
info.down_shift = 0;
}
info.up_factor = static_cast<f32>(info.up_scale) / (1U << info.down_shift);
info.down_factor = static_cast<f32>(1U << info.down_shift) / info.up_scale;
info.active = info.up_scale != 1 || info.down_shift != 0;
}
void RestoreGlobalState(bool is_powered_on) {
// If a game is running, DO NOT restore the global settings state
if (is_powered_on) {

View File

@@ -52,6 +52,56 @@ enum class NvdecEmulation : u32 {
GPU = 2,
};
enum class ResolutionSetup : u32 {
Res1_2X = 0,
Res3_4X = 1,
Res1X = 2,
Res2X = 3,
Res3X = 4,
Res4X = 5,
Res5X = 6,
Res6X = 7,
};
enum class ScalingFilter : u32 {
NearestNeighbor = 0,
Bilinear = 1,
Bicubic = 2,
Gaussian = 3,
ScaleForce = 4,
Fsr = 5,
LastFilter = Fsr,
};
enum class AntiAliasing : u32 {
None = 0,
Fxaa = 1,
LastAA = Fxaa,
};
struct ResolutionScalingInfo {
u32 up_scale{1};
u32 down_shift{0};
f32 up_factor{1.0f};
f32 down_factor{1.0f};
bool active{};
bool downscale{};
s32 ScaleUp(s32 value) const {
if (value == 0) {
return 0;
}
return std::max((value * static_cast<s32>(up_scale)) >> static_cast<s32>(down_shift), 1);
}
u32 ScaleUp(u32 value) const {
if (value == 0U) {
return 0U;
}
return std::max((value * up_scale) >> down_shift, 1U);
}
};
/** The BasicSetting class is a simple resource manager. It defines a label and default value
* alongside the actual value of the setting for simpler and less-error prone use with frontend
* configurations. Setting a default value and label is required, though subclasses may deviate from
@@ -451,7 +501,10 @@ struct Values {
"disable_shader_loop_safety_checks"};
Setting<int> vulkan_device{0, "vulkan_device"};
Setting<u16> resolution_factor{1, "resolution_factor"};
ResolutionScalingInfo resolution_info{};
Setting<ResolutionSetup> resolution_setup{ResolutionSetup::Res1X, "resolution_setup"};
Setting<ScalingFilter> scaling_filter{ScalingFilter::Bilinear, "scaling_filter"};
Setting<AntiAliasing> anti_aliasing{AntiAliasing::None, "anti_aliasing"};
// *nix platforms may have issues with the borderless windowed fullscreen mode.
// Default to exclusive fullscreen on these platforms for now.
RangedSetting<FullscreenMode> fullscreen_mode{
@@ -462,7 +515,7 @@ struct Values {
#endif
FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"};
RangedSetting<int> aspect_ratio{0, 0, 3, "aspect_ratio"};
RangedSetting<int> max_anisotropy{0, 0, 4, "max_anisotropy"};
RangedSetting<int> max_anisotropy{0, 0, 5, "max_anisotropy"};
Setting<bool> use_speed_limit{true, "use_speed_limit"};
RangedSetting<u16> speed_limit{100, 0, 9999, "speed_limit"};
Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"};
@@ -595,6 +648,8 @@ std::string GetTimeZoneString();
void LogSettings();
void UpdateRescalingInfo();
// Restore the global state of all applicable settings in the Values struct
void RestoreGlobalState(bool is_powered_on);

View File

@@ -53,13 +53,16 @@ Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
}
/*static*/ ProgramMetadata ProgramMetadata::GetDefault() {
// Allow use of cores 0~3 and thread priorities 1~63.
constexpr u32 default_thread_info_capability = 0x30007F7;
ProgramMetadata result;
result.LoadManual(
true /*is_64_bit*/, FileSys::ProgramAddressSpaceType::Is39Bit /*address_space*/,
0x2c /*main_thread_prio*/, 0 /*main_thread_core*/, 0x00100000 /*main_thread_stack_size*/,
0 /*title_id*/, 0xFFFFFFFFFFFFFFFF /*filesystem_permissions*/,
0x1FE00000 /*system_resource_size*/, {} /*capabilities*/);
0x1FE00000 /*system_resource_size*/, {default_thread_info_capability} /*capabilities*/);
return result;
}

View File

@@ -16,7 +16,8 @@ DefaultSoftwareKeyboardApplet::~DefaultSoftwareKeyboardApplet() = default;
void DefaultSoftwareKeyboardApplet::InitializeKeyboard(
bool is_inline, KeyboardInitializeParameters initialize_parameters,
std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> submit_normal_callback_,
std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)>
submit_normal_callback_,
std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)>
submit_inline_callback_) {
if (is_inline) {
@@ -128,7 +129,7 @@ void DefaultSoftwareKeyboardApplet::ExitKeyboard() const {
}
void DefaultSoftwareKeyboardApplet::SubmitNormalText(std::u16string text) const {
submit_normal_callback(Service::AM::Applets::SwkbdResult::Ok, text);
submit_normal_callback(Service::AM::Applets::SwkbdResult::Ok, text, true);
}
void DefaultSoftwareKeyboardApplet::SubmitInlineText(std::u16string_view text) const {

View File

@@ -57,7 +57,7 @@ public:
virtual void InitializeKeyboard(
bool is_inline, KeyboardInitializeParameters initialize_parameters,
std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)>
std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)>
submit_normal_callback_,
std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)>
submit_inline_callback_) = 0;
@@ -82,7 +82,7 @@ public:
void InitializeKeyboard(
bool is_inline, KeyboardInitializeParameters initialize_parameters,
std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)>
std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)>
submit_normal_callback_,
std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)>
submit_inline_callback_) override;
@@ -106,7 +106,7 @@ private:
KeyboardInitializeParameters parameters;
mutable std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)>
mutable std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)>
submit_normal_callback;
mutable std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)>
submit_inline_callback;

View File

@@ -44,16 +44,13 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) {
return res;
}
FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) {
u32 width, height;
FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale) {
const bool is_docked = Settings::values.use_docked_mode.GetValue();
const u32 screen_width = is_docked ? ScreenDocked::Width : ScreenUndocked::Width;
const u32 screen_height = is_docked ? ScreenDocked::Height : ScreenUndocked::Height;
if (Settings::values.use_docked_mode.GetValue()) {
width = ScreenDocked::Width * res_scale;
height = ScreenDocked::Height * res_scale;
} else {
width = ScreenUndocked::Width * res_scale;
height = ScreenUndocked::Height * res_scale;
}
const u32 width = static_cast<u32>(static_cast<f32>(screen_width) * res_scale);
const u32 height = static_cast<u32>(static_cast<f32>(screen_height) * res_scale);
return DefaultFrameLayout(width, height);
}

View File

@@ -60,7 +60,7 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height);
* Convenience method to get frame layout by resolution scale
* @param res_scale resolution scale factor
*/
FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale);
FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale);
/**
* Convenience method to determine emulation aspect ratio

View File

@@ -797,15 +797,11 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext&
rb.Push(ResultSuccess);
if (Settings::values.use_docked_mode.GetValue()) {
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight));
} else {
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight));
}
}

View File

@@ -109,13 +109,18 @@ void SoftwareKeyboard::Execute() {
ShowNormalKeyboard();
}
void SoftwareKeyboard::SubmitTextNormal(SwkbdResult result, std::u16string submitted_text) {
void SoftwareKeyboard::SubmitTextNormal(SwkbdResult result, std::u16string submitted_text,
bool confirmed) {
if (complete) {
return;
}
if (swkbd_config_common.use_text_check && result == SwkbdResult::Ok) {
SubmitForTextCheck(submitted_text);
if (confirmed) {
SubmitNormalOutputAndExit(result, submitted_text);
} else {
SubmitForTextCheck(submitted_text);
}
} else {
SubmitNormalOutputAndExit(result, submitted_text);
}
@@ -273,13 +278,21 @@ void SoftwareKeyboard::ProcessTextCheck() {
std::memcpy(&swkbd_text_check, text_check_data.data(), sizeof(SwkbdTextCheck));
std::u16string text_check_message =
swkbd_text_check.text_check_result == SwkbdTextCheckResult::Failure ||
swkbd_text_check.text_check_result == SwkbdTextCheckResult::Confirm
? Common::UTF16StringFromFixedZeroTerminatedBuffer(
swkbd_text_check.text_check_message.data(),
swkbd_text_check.text_check_message.size())
: u"";
std::u16string text_check_message = [this, &swkbd_text_check]() -> std::u16string {
if (swkbd_text_check.text_check_result == SwkbdTextCheckResult::Failure ||
swkbd_text_check.text_check_result == SwkbdTextCheckResult::Confirm) {
return swkbd_config_common.use_utf8
? Common::UTF8ToUTF16(Common::StringFromFixedZeroTerminatedBuffer(
reinterpret_cast<const char*>(
swkbd_text_check.text_check_message.data()),
swkbd_text_check.text_check_message.size() * sizeof(char16_t)))
: Common::UTF16StringFromFixedZeroTerminatedBuffer(
swkbd_text_check.text_check_message.data(),
swkbd_text_check.text_check_message.size());
} else {
return u"";
}
}();
LOG_INFO(Service_AM, "\nTextCheckResult: {}\nTextCheckMessage: {}",
GetTextCheckResultName(swkbd_text_check.text_check_result),
@@ -583,11 +596,12 @@ void SoftwareKeyboard::InitializeFrontendKeyboard() {
.disable_cancel_button{disable_cancel_button},
};
frontend.InitializeKeyboard(false, std::move(initialize_parameters),
[this](SwkbdResult result, std::u16string submitted_text) {
SubmitTextNormal(result, submitted_text);
},
{});
frontend.InitializeKeyboard(
false, std::move(initialize_parameters),
[this](SwkbdResult result, std::u16string submitted_text, bool confirmed) {
SubmitTextNormal(result, submitted_text, confirmed);
},
{});
}
}

View File

@@ -36,8 +36,9 @@ public:
*
* @param result SwkbdResult enum
* @param submitted_text UTF-16 encoded string
* @param confirmed Whether the text has been confirmed after TextCheckResult::Confirm
*/
void SubmitTextNormal(SwkbdResult result, std::u16string submitted_text);
void SubmitTextNormal(SwkbdResult result, std::u16string submitted_text, bool confirmed);
/**
* Submits the input text to the application.

View File

@@ -541,11 +541,8 @@ private:
switch (transaction) {
case TransactionId::Connect: {
IGBPConnectRequestParcel request{ctx.ReadBuffer()};
IGBPConnectResponseParcel response{
static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) *
Settings::values.resolution_factor.GetValue()),
static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
Settings::values.resolution_factor.GetValue())};
IGBPConnectResponseParcel response{static_cast<u32>(DisplayResolution::UndockedWidth),
static_cast<u32>(DisplayResolution::UndockedHeight)};
buffer_queue.Connect();
@@ -775,15 +772,11 @@ private:
rb.Push(ResultSuccess);
if (Settings::values.use_docked_mode.GetValue()) {
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight));
} else {
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth));
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight));
}
rb.PushRaw<float>(60.0f); // This wouldn't seem to be correct for 30 fps games.
@@ -1063,10 +1056,8 @@ private:
// This only returns the fixed values of 1280x720 and makes no distinguishing
// between docked and undocked dimensions. We take the liberty of applying
// the resolution scaling factor here.
rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight) *
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth));
rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight));
}
void SetLayerScalingMode(Kernel::HLERequestContext& ctx) {
@@ -1099,8 +1090,6 @@ private:
LOG_WARNING(Service_VI, "(STUBBED) called");
DisplayInfo display_info;
display_info.width *= static_cast<u64>(Settings::values.resolution_factor.GetValue());
display_info.height *= static_cast<u64>(Settings::values.resolution_factor.GetValue());
ctx.WriteBuffer(&display_info, sizeof(DisplayInfo));
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(ResultSuccess);

View File

@@ -229,8 +229,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue());
AddField(field_type, "Renderer_Backend",
TranslateRenderer(Settings::values.renderer_backend.GetValue()));
AddField(field_type, "Renderer_ResolutionFactor",
Settings::values.resolution_factor.GetValue());
AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue());
AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue());
AddField(field_type, "Renderer_UseDiskShaderCache",

View File

@@ -221,6 +221,7 @@ add_library(shader_recompiler STATIC
ir_opt/lower_fp16_to_fp32.cpp
ir_opt/lower_int64_to_int32.cpp
ir_opt/passes.h
ir_opt/rescaling_pass.cpp
ir_opt/ssa_rewrite_pass.cpp
ir_opt/texture_pass.cpp
ir_opt/verification_pass.cpp

View File

@@ -14,6 +14,8 @@ struct Bindings {
u32 storage_buffer{};
u32 texture{};
u32 image{};
u32 texture_scaling_index{};
u32 image_scaling_index{};
};
} // namespace Shader::Backend

View File

@@ -6,6 +6,7 @@
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
@@ -55,7 +56,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
}
if (!runtime_info.glasm_use_storage_buffers) {
if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
const size_t index{num + PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE};
Add("PARAM c[{}]={{program.local[0..{}]}};", index, index - 1);
}
}
stage = program.stage;

View File

@@ -448,6 +448,9 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I
header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
}
if (program.info.uses_rescaling_uniform) {
header += "PARAM scaling[1]={program.local[0..0]};";
}
header += "TEMP ";
for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
header += fmt::format("R{},", index);

View File

@@ -13,6 +13,8 @@
namespace Shader::Backend::GLASM {
constexpr u32 PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE = 1;
[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
IR::Program& program, Bindings& bindings);

View File

@@ -608,6 +608,24 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Re
ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
}
void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
if (!index.IsImmediate()) {
throw NotImplementedException("Non-constant texture rescaling");
}
ctx.Add("AND.U RC.x,scaling[0].x,{};"
"SNE.S {},RC.x,0;",
1u << index.U32(), ctx.reg_alloc.Define(inst));
}
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
if (!index.IsImmediate()) {
throw NotImplementedException("Non-constant texture rescaling");
}
ctx.Add("AND.U RC.x,scaling[0].y,{};"
"SNE.S {},RC.x,0;",
1u << index.U32(), ctx.reg_alloc.Define(inst));
}
void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");

View File

@@ -72,6 +72,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst);
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
@@ -303,6 +304,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
@@ -553,6 +556,8 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
Register color);
void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
void EmitBindlessImageAtomicIAdd32(EmitContext&);
void EmitBindlessImageAtomicSMin32(EmitContext&);
void EmitBindlessImageAtomicUMin32(EmitContext&);

View File

@@ -90,6 +90,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
}
void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
ctx.Add("DIV.S {}.x,{},{};", inst, a, b);
}
void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
ctx.Add("DIV.U {}.x,{},{};", inst, a, b);
}
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));

View File

@@ -210,6 +210,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
}
void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.F {}.x,scaling[0].z;", inst);
}
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {}.x,0;", inst);
}

View File

@@ -393,6 +393,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
DefineGenericOutput(index, program.invocations);
}
}
if (info.uses_rescaling_uniform) {
header += "layout(location=0) uniform vec4 scaling;";
}
DefineConstantBuffers(bindings);
DefineStorageBuffers(bindings);
SetupImages(bindings);

View File

@@ -445,6 +445,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst);
}
void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) {
ctx.AddF32("{}=scaling.z;", inst);
}
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) {
ctx.AddU32("{}=lmem[{}];", inst, word_offset);
}

View File

@@ -612,6 +612,22 @@ void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value
value);
}
void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
if (!index.IsImmediate()) {
throw NotImplementedException("Non-constant texture rescaling");
}
const u32 image_index{index.U32()};
ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index);
}
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
if (!index.IsImmediate()) {
throw NotImplementedException("Non-constant texture rescaling");
}
const u32 image_index{index.U32()};
ctx.AddU1("{}=(ftou(scaling.y)&{})!=0;", inst, 1u << image_index);
}
void EmitBindlessImageSampleImplicitLod(EmitContext&) {
NotImplemented();
}

View File

@@ -85,6 +85,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst);
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
@@ -362,6 +363,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin
void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
@@ -627,6 +630,8 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
std::string_view coords);
void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
std::string_view coords, std::string_view color);
void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
void EmitBindlessImageAtomicIAdd32(EmitContext&);
void EmitBindlessImageAtomicSMin32(EmitContext&);
void EmitBindlessImageAtomicUMin32(EmitContext&);

View File

@@ -78,6 +78,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin
ctx.AddU32("{}=uint({}*{});", inst, a, b);
}
void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
ctx.AddU32("{}=uint(int({})/int({}));", inst, a, b);
}
void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
ctx.AddU32("{}={}/{};", inst, a, b);
}
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
ctx.AddU32("{}=uint(-({}));", inst, value);
}

View File

@@ -7,11 +7,14 @@
#include <climits>
#include <string_view>
#include <boost/container/static_vector.hpp>
#include <fmt/format.h>
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/emit_context.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
namespace Shader::Backend::SPIRV {
namespace {
@@ -430,15 +433,33 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
}
}
size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations,
size_t start_offset) {
size_t FindAndSetNextUnusedLocation(std::bitset<IR::NUM_GENERICS>& used_locations,
size_t& start_offset) {
for (size_t location = start_offset; location < used_locations.size(); ++location) {
if (!used_locations.test(location)) {
start_offset = location;
used_locations.set(location);
return location;
}
}
throw RuntimeError("Unable to get an unused location for legacy attribute");
}
Id DefineLegacyInput(EmitContext& ctx, std::bitset<IR::NUM_GENERICS>& used_locations,
size_t& start_offset) {
const Id id{DefineInput(ctx, ctx.F32[4], true)};
const size_t location = FindAndSetNextUnusedLocation(used_locations, start_offset);
ctx.Decorate(id, spv::Decoration::Location, location);
return id;
}
Id DefineLegacyOutput(EmitContext& ctx, std::bitset<IR::NUM_GENERICS>& used_locations,
size_t& start_offset, std::optional<u32> invocations) {
const Id id{DefineOutput(ctx, ctx.F32[4], invocations)};
const size_t location = FindAndSetNextUnusedLocation(used_locations, start_offset);
ctx.Decorate(id, spv::Decoration::Location, location);
return id;
}
} // Anonymous namespace
void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -456,8 +477,9 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
IR::Program& program, Bindings& bindings)
: Sirit::Module(profile_.supported_spirv), profile{profile_},
runtime_info{runtime_info_}, stage{program.stage} {
: Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_},
stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index},
image_rescaling_index{bindings.image_scaling_index} {
const bool is_unified{profile.unified_descriptor_binding};
u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
@@ -474,10 +496,11 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
DefineStorageBuffers(program.info, storage_binding);
DefineTextureBuffers(program.info, texture_binding);
DefineImageBuffers(program.info, image_binding);
DefineTextures(program.info, texture_binding);
DefineImages(program.info, image_binding);
DefineTextures(program.info, texture_binding, bindings.texture_scaling_index);
DefineImages(program.info, image_binding, bindings.image_scaling_index);
DefineAttributeMemAccess(program.info);
DefineGlobalMemoryFunctions(program.info);
DefineRescalingInput(program.info);
}
EmitContext::~EmitContext() = default;
@@ -520,6 +543,64 @@ Id EmitContext::BitOffset16(const IR::Value& offset) {
return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u));
}
Id EmitContext::InputLegacyAttribute(IR::Attribute attribute) {
if (attribute >= IR::Attribute::ColorFrontDiffuseR &&
attribute <= IR::Attribute::ColorFrontDiffuseA) {
return input_front_color;
}
if (attribute >= IR::Attribute::ColorFrontSpecularR &&
attribute <= IR::Attribute::ColorFrontSpecularA) {
return input_front_secondary_color;
}
if (attribute >= IR::Attribute::ColorBackDiffuseR &&
attribute <= IR::Attribute::ColorBackDiffuseA) {
return input_back_color;
}
if (attribute >= IR::Attribute::ColorBackSpecularR &&
attribute <= IR::Attribute::ColorBackSpecularA) {
return input_back_secondary_color;
}
if (attribute == IR::Attribute::FogCoordinate) {
return input_fog_frag_coord;
}
if (attribute >= IR::Attribute::FixedFncTexture0S &&
attribute <= IR::Attribute::FixedFncTexture9Q) {
u32 index =
(static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
return input_fixed_fnc_textures[index];
}
throw InvalidArgument("Attribute is not legacy attribute {}", attribute);
}
Id EmitContext::OutputLegacyAttribute(IR::Attribute attribute) {
if (attribute >= IR::Attribute::ColorFrontDiffuseR &&
attribute <= IR::Attribute::ColorFrontDiffuseA) {
return output_front_color;
}
if (attribute >= IR::Attribute::ColorFrontSpecularR &&
attribute <= IR::Attribute::ColorFrontSpecularA) {
return output_front_secondary_color;
}
if (attribute >= IR::Attribute::ColorBackDiffuseR &&
attribute <= IR::Attribute::ColorBackDiffuseA) {
return output_back_color;
}
if (attribute >= IR::Attribute::ColorBackSpecularR &&
attribute <= IR::Attribute::ColorBackSpecularA) {
return output_back_secondary_color;
}
if (attribute == IR::Attribute::FogCoordinate) {
return output_fog_frag_coord;
}
if (attribute >= IR::Attribute::FixedFncTexture0S &&
attribute <= IR::Attribute::FixedFncTexture9Q) {
u32 index =
(static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
return output_fixed_fnc_textures[index];
}
throw InvalidArgument("Attribute is not legacy attribute {}", attribute);
}
void EmitContext::DefineCommonTypes(const Info& info) {
void_id = TypeVoid();
@@ -920,6 +1001,73 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
}
void EmitContext::DefineRescalingInput(const Info& info) {
if (!info.uses_rescaling_uniform) {
return;
}
if (profile.unified_descriptor_binding) {
DefineRescalingInputPushConstant();
} else {
DefineRescalingInputUniformConstant();
}
}
void EmitContext::DefineRescalingInputPushConstant() {
boost::container::static_vector<Id, 3> members{};
u32 member_index{0};
rescaling_textures_type = TypeArray(U32[1], Const(4u));
Decorate(rescaling_textures_type, spv::Decoration::ArrayStride, 4u);
members.push_back(rescaling_textures_type);
rescaling_textures_member_index = member_index++;
rescaling_images_type = TypeArray(U32[1], Const(NUM_IMAGE_SCALING_WORDS));
Decorate(rescaling_images_type, spv::Decoration::ArrayStride, 4u);
members.push_back(rescaling_images_type);
rescaling_images_member_index = member_index++;
if (stage != Stage::Compute) {
members.push_back(F32[1]);
rescaling_downfactor_member_index = member_index++;
}
const Id push_constant_struct{TypeStruct(std::span(members.data(), members.size()))};
Decorate(push_constant_struct, spv::Decoration::Block);
Name(push_constant_struct, "ResolutionInfo");
MemberDecorate(push_constant_struct, rescaling_textures_member_index, spv::Decoration::Offset,
static_cast<u32>(offsetof(RescalingLayout, rescaling_textures)));
MemberName(push_constant_struct, rescaling_textures_member_index, "rescaling_textures");
MemberDecorate(push_constant_struct, rescaling_images_member_index, spv::Decoration::Offset,
static_cast<u32>(offsetof(RescalingLayout, rescaling_images)));
MemberName(push_constant_struct, rescaling_images_member_index, "rescaling_images");
if (stage != Stage::Compute) {
MemberDecorate(push_constant_struct, rescaling_downfactor_member_index,
spv::Decoration::Offset,
static_cast<u32>(offsetof(RescalingLayout, down_factor)));
MemberName(push_constant_struct, rescaling_downfactor_member_index, "down_factor");
}
const Id pointer_type{TypePointer(spv::StorageClass::PushConstant, push_constant_struct)};
rescaling_push_constants = AddGlobalVariable(pointer_type, spv::StorageClass::PushConstant);
Name(rescaling_push_constants, "rescaling_push_constants");
if (profile.supported_spirv >= 0x00010400) {
interfaces.push_back(rescaling_push_constants);
}
}
void EmitContext::DefineRescalingInputUniformConstant() {
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, F32[4])};
rescaling_uniform_constant =
AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant);
Decorate(rescaling_uniform_constant, spv::Decoration::Location, 0u);
if (profile.supported_spirv >= 0x00010400) {
interfaces.push_back(rescaling_uniform_constant);
}
}
void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
if (info.constant_buffer_descriptors.empty()) {
return;
@@ -1108,7 +1256,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
}
}
void EmitContext::DefineTextures(const Info& info, u32& binding) {
void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) {
textures.reserve(info.texture_descriptors.size());
for (const TextureDescriptor& desc : info.texture_descriptors) {
const Id image_type{ImageType(*this, desc)};
@@ -1130,13 +1278,14 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) {
interfaces.push_back(id);
}
++binding;
++scaling_index;
}
if (info.uses_atomic_image_u32) {
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
}
}
void EmitContext::DefineImages(const Info& info, u32& binding) {
void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_index) {
images.reserve(info.image_descriptors.size());
for (const ImageDescriptor& desc : info.image_descriptors) {
if (desc.count != 1) {
@@ -1157,6 +1306,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding) {
interfaces.push_back(id);
}
++binding;
++scaling_index;
}
}
@@ -1279,22 +1429,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {
}
size_t previous_unused_location = 0;
if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
previous_unused_location = location;
used_locations.set(location);
const Id id{DefineInput(*this, F32[4], true)};
Decorate(id, spv::Decoration::Location, location);
input_front_color = id;
input_front_color = DefineLegacyInput(*this, used_locations, previous_unused_location);
}
if (loads.AnyComponent(IR::Attribute::ColorFrontSpecularR)) {
input_front_secondary_color =
DefineLegacyInput(*this, used_locations, previous_unused_location);
}
if (loads.AnyComponent(IR::Attribute::ColorBackDiffuseR)) {
input_back_color = DefineLegacyInput(*this, used_locations, previous_unused_location);
}
if (loads.AnyComponent(IR::Attribute::ColorBackSpecularR)) {
input_back_secondary_color =
DefineLegacyInput(*this, used_locations, previous_unused_location);
}
if (loads.AnyComponent(IR::Attribute::FogCoordinate)) {
input_fog_frag_coord = DefineLegacyInput(*this, used_locations, previous_unused_location);
}
for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
const size_t location =
FindNextUnusedLocation(used_locations, previous_unused_location);
previous_unused_location = location;
used_locations.set(location);
const Id id{DefineInput(*this, F32[4], true)};
Decorate(id, spv::Decoration::Location, location);
input_fixed_fnc_textures[index] = id;
input_fixed_fnc_textures[index] =
DefineLegacyInput(*this, used_locations, previous_unused_location);
}
}
if (stage == Stage::TessellationEval) {
@@ -1356,22 +1510,29 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
}
size_t previous_unused_location = 0;
if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
previous_unused_location = location;
used_locations.set(location);
const Id id{DefineOutput(*this, F32[4], invocations)};
Decorate(id, spv::Decoration::Location, static_cast<u32>(location));
output_front_color = id;
output_front_color =
DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations);
}
if (info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) {
output_front_secondary_color =
DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations);
}
if (info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) {
output_back_color =
DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations);
}
if (info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) {
output_back_secondary_color =
DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations);
}
if (info.stores.AnyComponent(IR::Attribute::FogCoordinate)) {
output_fog_frag_coord =
DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations);
}
for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
const size_t location =
FindNextUnusedLocation(used_locations, previous_unused_location);
previous_unused_location = location;
used_locations.set(location);
const Id id{DefineOutput(*this, F32[4], invocations)};
Decorate(id, spv::Decoration::Location, location);
output_fixed_fnc_textures[index] = id;
output_fixed_fnc_textures[index] =
DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations);
}
}
switch (stage) {

View File

@@ -113,6 +113,9 @@ public:
[[nodiscard]] Id BitOffset8(const IR::Value& offset);
[[nodiscard]] Id BitOffset16(const IR::Value& offset);
Id InputLegacyAttribute(IR::Attribute attribute);
Id OutputLegacyAttribute(IR::Attribute attribute);
Id Const(u32 value) {
return Constant(U32[1], value);
}
@@ -235,6 +238,16 @@ public:
Id indexed_load_func{};
Id indexed_store_func{};
Id rescaling_uniform_constant{};
Id rescaling_push_constants{};
Id rescaling_textures_type{};
Id rescaling_images_type{};
u32 rescaling_textures_member_index{};
u32 rescaling_images_member_index{};
u32 rescaling_downfactor_member_index{};
u32 texture_rescaling_index{};
u32 image_rescaling_index{};
Id local_memory{};
Id shared_memory_u8{};
@@ -269,12 +282,20 @@ public:
Id input_position{};
Id input_front_color{};
Id input_front_secondary_color{};
Id input_back_color{};
Id input_back_secondary_color{};
Id input_fog_frag_coord{};
std::array<Id, 10> input_fixed_fnc_textures{};
std::array<Id, 32> input_generics{};
Id output_point_size{};
Id output_position{};
Id output_front_color{};
Id output_front_secondary_color{};
Id output_back_color{};
Id output_back_secondary_color{};
Id output_fog_frag_coord{};
std::array<Id, 10> output_fixed_fnc_textures{};
std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
@@ -299,10 +320,13 @@ private:
void DefineStorageBuffers(const Info& info, u32& binding);
void DefineTextureBuffers(const Info& info, u32& binding);
void DefineImageBuffers(const Info& info, u32& binding);
void DefineTextures(const Info& info, u32& binding);
void DefineImages(const Info& info, u32& binding);
void DefineTextures(const Info& info, u32& binding, u32& scaling_index);
void DefineImages(const Info& info, u32& binding, u32& scaling_index);
void DefineAttributeMemAccess(const Info& info);
void DefineGlobalMemoryFunctions(const Info& info);
void DefineRescalingInput(const Info& info);
void DefineRescalingInputPushConstant();
void DefineRescalingInputUniformConstant();
void DefineInputs(const IR::Program& program);
void DefineOutputs(const IR::Program& program);

View File

@@ -16,6 +16,19 @@
namespace Shader::Backend::SPIRV {
constexpr u32 NUM_TEXTURE_SCALING_WORDS = 4;
constexpr u32 NUM_IMAGE_SCALING_WORDS = 2;
constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
NUM_TEXTURE_SCALING_WORDS + NUM_IMAGE_SCALING_WORDS;
struct RescalingLayout {
alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
alignas(16) u32 down_factor;
};
constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
IR::Program& program, Bindings& bindings);

View File

@@ -43,23 +43,12 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
}
}
bool IsFixedFncTexture(IR::Attribute attribute) {
return attribute >= IR::Attribute::FixedFncTexture0S &&
attribute <= IR::Attribute::FixedFncTexture9Q;
}
u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) {
if (!IsFixedFncTexture(attribute)) {
throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
}
return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u;
}
u32 FixedFncTextureAttributeElement(IR::Attribute attribute) {
if (!IsFixedFncTexture(attribute)) {
throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
}
return static_cast<u32>(attribute) % 4u;
bool IsLegacyAttribute(IR::Attribute attribute) {
return (attribute >= IR::Attribute::ColorFrontDiffuseR &&
attribute <= IR::Attribute::ColorBackSpecularA) ||
attribute == IR::Attribute::FogCoordinate ||
(attribute >= IR::Attribute::FixedFncTexture0S &&
attribute <= IR::Attribute::FixedFncTexture9Q);
}
template <typename... Args>
@@ -93,12 +82,16 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
}
}
if (IsFixedFncTexture(attr)) {
const u32 index{FixedFncTextureAttributeIndex(attr)};
const u32 element{FixedFncTextureAttributeElement(attr)};
const Id element_id{ctx.Const(element)};
return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index],
element_id);
if (IsLegacyAttribute(attr)) {
if (attr == IR::Attribute::FogCoordinate) {
return OutputAccessChain(ctx, ctx.output_f32, ctx.OutputLegacyAttribute(attr),
ctx.Const(0u));
} else {
const u32 element{static_cast<u32>(attr) % 4};
const Id element_id{ctx.Const(element)};
return OutputAccessChain(ctx, ctx.output_f32, ctx.OutputLegacyAttribute(attr),
element_id);
}
}
switch (attr) {
case IR::Attribute::PointSize:
@@ -111,14 +104,6 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
const Id element_id{ctx.Const(element)};
return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
}
case IR::Attribute::ColorFrontDiffuseR:
case IR::Attribute::ColorFrontDiffuseG:
case IR::Attribute::ColorFrontDiffuseB:
case IR::Attribute::ColorFrontDiffuseA: {
const u32 element{static_cast<u32>(attr) % 4};
const Id element_id{ctx.Const(element)};
return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id);
}
case IR::Attribute::ClipDistance0:
case IR::Attribute::ClipDistance1:
case IR::Attribute::ClipDistance2:
@@ -341,11 +326,17 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
const Id value{ctx.OpLoad(type->id, pointer)};
return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
}
if (IsFixedFncTexture(attr)) {
const u32 index{FixedFncTextureAttributeIndex(attr)};
const Id attr_id{ctx.input_fixed_fnc_textures[index]};
const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))};
return ctx.OpLoad(ctx.F32[1], attr_ptr);
if (IsLegacyAttribute(attr)) {
if (attr == IR::Attribute::FogCoordinate) {
const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex,
ctx.InputLegacyAttribute(attr), ctx.Const(0u))};
return ctx.OpLoad(ctx.F32[1], attr_ptr);
} else {
const Id element_id{ctx.Const(element)};
const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex,
ctx.InputLegacyAttribute(attr), element_id)};
return ctx.OpLoad(ctx.F32[1], attr_ptr);
}
}
switch (attr) {
case IR::Attribute::PrimitiveId:
@@ -356,13 +347,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
case IR::Attribute::PositionW:
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
ctx.Const(element)));
case IR::Attribute::ColorFrontDiffuseR:
case IR::Attribute::ColorFrontDiffuseG:
case IR::Attribute::ColorFrontDiffuseB:
case IR::Attribute::ColorFrontDiffuseA: {
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color,
ctx.Const(element)));
}
case IR::Attribute::InstanceId:
if (ctx.profile.support_vertex_instance_id) {
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
@@ -542,6 +526,18 @@ Id EmitYDirection(EmitContext& ctx) {
return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f);
}
Id EmitResolutionDownFactor(EmitContext& ctx) {
if (ctx.profile.unified_descriptor_binding) {
const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[1])};
const Id index{ctx.Const(ctx.rescaling_downfactor_member_index)};
const Id pointer{ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, index)};
return ctx.OpLoad(ctx.F32[1], pointer);
} else {
const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)};
return ctx.OpCompositeExtract(ctx.F32[1], composite, 2u);
}
}
Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
return ctx.OpLoad(ctx.U32[1], pointer);

View File

@@ -224,6 +224,36 @@ Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx
Decorate(ctx, inst, sample);
return ctx.OpCompositeExtract(result_type, sample, 1U);
}
Id IsScaled(EmitContext& ctx, const IR::Value& index, Id member_index, u32 base_index) {
const Id push_constant_u32{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1])};
Id bit{};
if (index.IsImmediate()) {
// Use BitwiseAnd instead of BitfieldExtract for better codegen on Nvidia OpenGL.
// LOP32I.NZ is used to set the predicate rather than BFE+ISETP.
const u32 index_value{index.U32() + base_index};
const Id word_index{ctx.Const(index_value / 32)};
const Id bit_index_mask{ctx.Const(1u << (index_value % 32))};
const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants,
member_index, word_index)};
const Id word{ctx.OpLoad(ctx.U32[1], pointer)};
bit = ctx.OpBitwiseAnd(ctx.U32[1], word, bit_index_mask);
} else {
Id index_value{ctx.Def(index)};
if (base_index != 0) {
index_value = ctx.OpIAdd(ctx.U32[1], index_value, ctx.Const(base_index));
}
const Id bit_index{ctx.OpBitwiseAnd(ctx.U32[1], index_value, ctx.Const(31u))};
bit = ctx.OpBitFieldUExtract(ctx.U32[1], index_value, bit_index, ctx.Const(1u));
}
return ctx.OpINotEqual(ctx.U1, bit, ctx.u32_zero_value);
}
Id BitTest(EmitContext& ctx, Id mask, Id bit) {
const Id shifted{ctx.OpShiftRightLogical(ctx.U32[1], mask, bit)};
const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))};
return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value);
}
} // Anonymous namespace
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@@ -470,4 +500,28 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
ctx.OpImageWrite(Image(ctx, index, info), coords, color);
}
Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index) {
if (ctx.profile.unified_descriptor_binding) {
const Id member_index{ctx.Const(ctx.rescaling_textures_member_index)};
return IsScaled(ctx, index, member_index, ctx.texture_rescaling_index);
} else {
const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)};
const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 0u)};
const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)};
return BitTest(ctx, mask, ctx.Def(index));
}
}
Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index) {
if (ctx.profile.unified_descriptor_binding) {
const Id member_index{ctx.Const(ctx.rescaling_images_member_index)};
return IsScaled(ctx, index, member_index, ctx.image_rescaling_index);
} else {
const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)};
const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 1u)};
const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)};
return BitTest(ctx, mask, ctx.Def(index));
}
}
} // namespace Shader::Backend::SPIRV

View File

@@ -75,6 +75,7 @@ Id EmitInvocationId(EmitContext& ctx);
Id EmitSampleId(EmitContext& ctx);
Id EmitIsHelperInvocation(EmitContext& ctx);
Id EmitYDirection(EmitContext& ctx);
Id EmitResolutionDownFactor(EmitContext& ctx);
Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
Id EmitUndefU1(EmitContext& ctx);
@@ -283,6 +284,8 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
Id EmitISub32(EmitContext& ctx, Id a, Id b);
Id EmitISub64(EmitContext& ctx, Id a, Id b);
Id EmitIMul32(EmitContext& ctx, Id a, Id b);
Id EmitSDiv32(EmitContext& ctx, Id a, Id b);
Id EmitUDiv32(EmitContext& ctx, Id a, Id b);
Id EmitINeg32(EmitContext& ctx, Id value);
Id EmitINeg64(EmitContext& ctx, Id value);
Id EmitIAbs32(EmitContext& ctx, Id value);
@@ -510,6 +513,8 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id derivates, Id offset, Id lod_clamp);
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index);
Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index);
Id EmitBindlessImageAtomicIAdd32(EmitContext&);
Id EmitBindlessImageAtomicSMin32(EmitContext&);
Id EmitBindlessImageAtomicUMin32(EmitContext&);

View File

@@ -72,6 +72,14 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
return ctx.OpIMul(ctx.U32[1], a, b);
}
Id EmitSDiv32(EmitContext& ctx, Id a, Id b) {
return ctx.OpSDiv(ctx.U32[1], a, b);
}
Id EmitUDiv32(EmitContext& ctx, Id a, Id b) {
return ctx.OpUDiv(ctx.U32[1], a, b);
}
Id EmitINeg32(EmitContext& ctx, Id value) {
return ctx.OpSNegate(ctx.U32[1], value);
}

View File

@@ -22,6 +22,11 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
PrependNewInst(end(), op, args);
}
Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) {
Inst* const inst{inst_pool->Create(base_inst)};
return instructions.insert(insertion_point, *inst);
}
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args, u32 flags) {
Inst* const inst{inst_pool->Create(op, flags)};

View File

@@ -40,6 +40,9 @@ public:
/// Appends a new instruction to the end of this basic block.
void AppendNewInst(Opcode op, std::initializer_list<Value> args);
/// Prepends a copy of an instruction to this basic block before the insertion point.
iterator PrependNewInst(iterator insertion_point, const Inst& base_inst);
/// Prepends a new instruction to this basic block before the insertion point.
iterator PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args = {}, u32 flags = 0);

View File

@@ -375,6 +375,10 @@ F32 IREmitter::YDirection() {
return Inst<F32>(Opcode::YDirection);
}
F32 IREmitter::ResolutionDownFactor() {
return Inst<F32>(Opcode::ResolutionDownFactor);
}
U32 IREmitter::LaneId() {
return Inst<U32>(Opcode::LaneId);
}
@@ -1141,6 +1145,10 @@ U32 IREmitter::IMul(const U32& a, const U32& b) {
return Inst<U32>(Opcode::IMul32, a, b);
}
U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) {
return Inst<U32>(is_signed ? Opcode::SDiv32 : Opcode::UDiv32, a, b);
}
U32U64 IREmitter::INeg(const U32U64& value) {
switch (value.Type()) {
case Type::U32:
@@ -1938,6 +1946,14 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c
return Inst(op, Flags{info}, handle, coords, value);
}
U1 IREmitter::IsTextureScaled(const U32& index) {
return Inst<U1>(Opcode::IsTextureScaled, index);
}
U1 IREmitter::IsImageScaled(const U32& index) {
return Inst<U1>(Opcode::IsImageScaled, index);
}
U1 IREmitter::VoteAll(const U1& value) {
return Inst<U1>(Opcode::VoteAll, value);
}

View File

@@ -102,6 +102,8 @@ public:
[[nodiscard]] U1 IsHelperInvocation();
[[nodiscard]] F32 YDirection();
[[nodiscard]] F32 ResolutionDownFactor();
[[nodiscard]] U32 LaneId();
[[nodiscard]] U32 LoadGlobalU8(const U64& address);
@@ -207,6 +209,7 @@ public:
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32U64 INeg(const U32U64& value);
[[nodiscard]] U32 IAbs(const U32& value);
[[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
@@ -356,6 +359,10 @@ public:
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] U1 IsTextureScaled(const U32& index);
[[nodiscard]] U1 IsImageScaled(const U32& index);
[[nodiscard]] U1 VoteAll(const U1& value);
[[nodiscard]] U1 VoteAny(const U1& value);
[[nodiscard]] U1 VoteEqual(const U1& value);

View File

@@ -47,6 +47,17 @@ Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
}
}
Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} {
if (base.op == Opcode::Phi) {
throw NotImplementedException("Copying phi node");
}
std::construct_at(&args);
const size_t num_args{base.NumArgs()};
for (size_t index = 0; index < num_args; ++index) {
SetArg(index, base.Arg(index));
}
}
Inst::~Inst() {
if (op == Opcode::Phi) {
std::destroy_at(&phi_args);

View File

@@ -62,6 +62,7 @@ OPCODE(InvocationId, U32,
OPCODE(SampleId, U32, )
OPCODE(IsHelperInvocation, U1, )
OPCODE(YDirection, F32, )
OPCODE(ResolutionDownFactor, F32, )
// Undefined
OPCODE(UndefU1, U1, )
@@ -286,6 +287,8 @@ OPCODE(IAdd64, U64, U64,
OPCODE(ISub32, U32, U32, U32, )
OPCODE(ISub64, U64, U64, U64, )
OPCODE(IMul32, U32, U32, U32, )
OPCODE(SDiv32, U32, U32, U32, )
OPCODE(UDiv32, U32, U32, U32, )
OPCODE(INeg32, U32, U32, )
OPCODE(INeg64, U64, U64, )
OPCODE(IAbs32, U32, U32, )
@@ -490,6 +493,9 @@ OPCODE(ImageGradient, F32x4, Opaq
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
OPCODE(IsTextureScaled, U1, U32, )
OPCODE(IsImageScaled, U1, U32, )
// Atomic Image operations
OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, )

View File

@@ -116,10 +116,10 @@ public:
class Inst : public boost::intrusive::list_base_hook<> {
public:
explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
explicit Inst(const Inst& base);
~Inst();
Inst& operator=(const Inst&) = delete;
Inst(const Inst&) = delete;
Inst& operator=(Inst&&) = delete;
Inst(Inst&&) = delete;

View File

@@ -179,6 +179,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Optimization::TexturePass(env, program);
Optimization::ConstantPropagationPass(program);
if (Settings::values.resolution_info.active) {
Optimization::RescalingPass(program);
}
Optimization::DeadCodeEliminationPass(program);
if (Settings::values.renderer_debug) {
Optimization::VerificationPass(program);

View File

@@ -430,6 +430,11 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::IsHelperInvocation:
info.uses_is_helper_invocation = true;
break;
case IR::Opcode::ResolutionDownFactor:
case IR::Opcode::IsTextureScaled:
case IR::Opcode::IsImageScaled:
info.uses_rescaling_uniform = true;
break;
case IR::Opcode::LaneId:
info.uses_subgroup_invocation_id = true;
break;

View File

@@ -19,6 +19,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program);
void IdentityRemovalPass(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);
void RescalingPass(IR::Program& program);
void SsaRewritePass(IR::Program& program);
void TexturePass(Environment& env, IR::Program& program);
void VerificationPass(const IR::Program& program);

View File

@@ -0,0 +1,327 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/alignment.h"
#include "common/settings.h"
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
#include "shader_recompiler/shader_info.h"
namespace Shader::Optimization {
namespace {
[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) {
switch (type) {
case TextureType::Color2D:
case TextureType::ColorArray2D:
return true;
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
break;
}
return false;
}
void VisitMark(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ShuffleIndex:
case IR::Opcode::ShuffleUp:
case IR::Opcode::ShuffleDown:
case IR::Opcode::ShuffleButterfly: {
const IR::Value shfl_arg{inst.Arg(0)};
if (shfl_arg.IsImmediate()) {
break;
}
const IR::Inst* const arg_inst{shfl_arg.InstRecursive()};
if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) {
break;
}
const IR::Value bitcast_arg{arg_inst->Arg(0)};
if (bitcast_arg.IsImmediate()) {
break;
}
IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()};
bool must_patch_outside = false;
if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) {
const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
bitcast_inst->SetFlags<u32>(0xDEADBEEF);
must_patch_outside = true;
break;
default:
break;
}
}
if (must_patch_outside) {
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 new_inst{&*block.PrependNewInst(it, inst)};
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
const IR::Value converted{ir.FPMul(new_inst, up_factor)};
inst.ReplaceUsesWith(converted);
}
break;
}
default:
break;
}
}
void PatchFragCoord(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 down_factor{ir.ResolutionDownFactor()};
const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())};
const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)};
inst.ReplaceUsesWith(downscaled_frag_coord);
}
void PatchPointSize(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 point_value{inst.Arg(1)};
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)};
inst.SetArg(1, upscaled_point_value);
}
[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
IR::U32 scaled_value{value};
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale));
}
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift));
}
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
}
[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value,
const IR::Attribute attrib) {
const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)};
const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)};
const IR::F32 frag_coord{ir.GetAttribute(attrib)};
const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)};
const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))};
const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))};
return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)};
}
[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
IR::U32 scaled_value{value};
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift));
}
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale));
}
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
}
void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) {
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
switch (info.type) {
case TextureType::Color2D:
case TextureType::ColorArray2D: {
const IR::Value new_inst{&*block.PrependNewInst(it, inst)};
const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})};
const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})};
const IR::Value replacement{ir.CompositeConstruct(
width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))};
inst.ReplaceUsesWith(replacement);
break;
}
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
size_t index) {
const IR::Value composite{inst.Arg(index)};
if (composite.IsEmpty()) {
return;
}
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
switch (info.type) {
case TextureType::Color2D:
inst.SetArg(index, ir.CompositeConstruct(x, y));
break;
case TextureType::ColorArray2D: {
const IR::U32 z{ir.CompositeExtract(composite, 2)};
inst.SetArg(index, ir.CompositeConstruct(x, y, z));
break;
}
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::Value coord{inst.Arg(1)};
const IR::U32 coord_x{ir.CompositeExtract(coord, 0)};
const IR::U32 coord_y{ir.CompositeExtract(coord, 1)};
const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)};
const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)};
switch (info.type) {
case TextureType::Color2D:
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y));
break;
case TextureType::ColorArray2D: {
const IR::U32 z{ir.CompositeExtract(coord, 2)};
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z));
break;
}
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
// Scale ImageFetch offset
ScaleIntegerComposite(ir, inst, is_scaled, 2);
}
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
}
void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
// Scale ImageFetch offset
ScaleIntegerComposite(ir, inst, is_scaled, 2);
}
void PatchImageRead(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
}
void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) {
const bool is_fragment_shader{program.stage == Stage::Fragment};
switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute: {
const IR::Attribute attr{inst.Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) {
PatchFragCoord(block, inst);
}
break;
default:
break;
}
break;
}
case IR::Opcode::SetAttribute: {
const IR::Attribute attr{inst.Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PointSize:
if (inst.Flags<u32>() != 0xDEADBEEF) {
PatchPointSize(block, inst);
}
break;
default:
break;
}
break;
}
case IR::Opcode::ImageQueryDimensions:
PatchImageQueryDimensions(block, inst);
break;
case IR::Opcode::ImageFetch:
if (is_fragment_shader) {
SubScaleImageFetch(block, inst);
} else {
PatchImageFetch(block, inst);
}
break;
case IR::Opcode::ImageRead:
if (is_fragment_shader) {
SubScaleImageRead(block, inst);
} else {
PatchImageRead(block, inst);
}
break;
default:
break;
}
}
} // Anonymous namespace
void RescalingPass(IR::Program& program) {
const bool is_fragment_shader{program.stage == Stage::Fragment};
if (is_fragment_shader) {
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
VisitMark(*block, inst);
}
}
}
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
Visit(program, *block, inst);
}
}
}
} // namespace Shader::Optimization

View File

@@ -172,6 +172,7 @@ struct Info {
bool uses_global_memory{};
bool uses_atomic_image_u32{};
bool uses_shadow_lod{};
bool uses_rescaling_uniform{};
IR::Type used_constant_buffer_types{};
IR::Type used_storage_buffer_types{};
@@ -190,4 +191,13 @@ struct Info {
ImageDescriptors image_descriptors;
};
template <typename Descriptors>
u32 NumDescriptors(const Descriptors& descriptors) {
u32 num{};
for (const auto& desc : descriptors) {
num += desc.count;
}
return num;
}
} // namespace Shader

View File

@@ -15,6 +15,8 @@ add_library(video_core STATIC
command_classes/codecs/codec.h
command_classes/codecs/h264.cpp
command_classes/codecs/h264.h
command_classes/codecs/vp8.cpp
command_classes/codecs/vp8.h
command_classes/codecs/vp9.cpp
command_classes/codecs/vp9.h
command_classes/codecs/vp9_types.h
@@ -130,6 +132,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_descriptor_pool.h
renderer_vulkan/vk_fence_manager.cpp
renderer_vulkan/vk_fence_manager.h
renderer_vulkan/vk_fsr.cpp
renderer_vulkan/vk_fsr.h
renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_master_semaphore.cpp

View File

@@ -853,12 +853,14 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
}
if constexpr (USE_MEMORY_MAPS) {
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
runtime.PreCopyBarrier();
for (auto& [copy, buffer_id] : downloads) {
// Have in mind the staging buffer offset for the copy
copy.dst_offset += download_staging.offset;
const std::array copies{copy};
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false);
}
runtime.PostCopyBarrier();
runtime.Finish();
for (const auto& [copy, buffer_id] : downloads) {
const Buffer& buffer = slot_buffers[buffer_id];

View File

@@ -8,6 +8,7 @@
#include "common/settings.h"
#include "video_core/command_classes/codecs/codec.h"
#include "video_core/command_classes/codecs/h264.h"
#include "video_core/command_classes/codecs/vp8.h"
#include "video_core/command_classes/codecs/vp9.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@@ -46,6 +47,7 @@ void AVFrameDeleter(AVFrame* ptr) {
Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
: gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
vp8_decoder(std::make_unique<Decoder::VP8>(gpu)),
vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
Codec::~Codec() {
@@ -135,7 +137,9 @@ void Codec::Initialize() {
switch (current_codec) {
case NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
case NvdecCommon::VideoCodec::Vp9:
case NvdecCommon::VideoCodec::VP8:
return AV_CODEC_ID_VP8;
case NvdecCommon::VideoCodec::VP9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
@@ -176,19 +180,27 @@ void Codec::Decode() {
return;
}
bool vp9_hidden_frame = false;
std::vector<u8> frame_data;
if (current_codec == NvdecCommon::VideoCodec::H264) {
frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
} else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
frame_data = vp9_decoder->ComposeFrameHeader(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
}
const auto& frame_data = [&]() {
switch (current_codec) {
case Tegra::NvdecCommon::VideoCodec::H264:
return h264_decoder->ComposeFrame(state, is_first_frame);
case Tegra::NvdecCommon::VideoCodec::VP8:
return vp8_decoder->ComposeFrame(state);
case Tegra::NvdecCommon::VideoCodec::VP9:
vp9_decoder->ComposeFrame(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
return vp9_decoder->GetFrameBytes();
default:
UNREACHABLE();
return std::vector<u8>{};
}
}();
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
if (!packet) {
LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
return;
}
packet->data = frame_data.data();
packet->data = const_cast<u8*>(frame_data.data());
packet->size = static_cast<s32>(frame_data.size());
if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
@@ -252,11 +264,11 @@ std::string_view Codec::GetCurrentCodecName() const {
return "None";
case NvdecCommon::VideoCodec::H264:
return "H264";
case NvdecCommon::VideoCodec::Vp8:
case NvdecCommon::VideoCodec::VP8:
return "VP8";
case NvdecCommon::VideoCodec::H265:
return "H265";
case NvdecCommon::VideoCodec::Vp9:
case NvdecCommon::VideoCodec::VP9:
return "VP9";
default:
return "Unknown";

View File

@@ -29,6 +29,7 @@ using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
namespace Decoder {
class H264;
class VP8;
class VP9;
} // namespace Decoder
@@ -72,6 +73,7 @@ private:
GPU& gpu;
const NvdecCommon::NvdecRegisters& state;
std::unique_ptr<Decoder::H264> h264_decoder;
std::unique_ptr<Decoder::VP8> vp8_decoder;
std::unique_ptr<Decoder::VP9> vp9_decoder;
std::queue<AVFramePtr> av_frames{};

View File

@@ -45,8 +45,8 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {}
H264::~H264() = default;
const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
bool is_first_frame) {
const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& state,
bool is_first_frame) {
H264DecoderContext context;
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));

View File

@@ -75,9 +75,9 @@ public:
explicit H264(GPU& gpu);
~H264();
/// Compose the H264 header of the frame for FFmpeg decoding
[[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
/// Compose the H264 frame for FFmpeg decoding
[[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state,
bool is_first_frame = false);
private:
std::vector<u8> frame;

View File

@@ -0,0 +1,55 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <vector>
#include "video_core/command_classes/codecs/vp8.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
VP8::VP8(GPU& gpu_) : gpu(gpu_) {}
VP8::~VP8() = default;
const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
VP8PictureInfo info;
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
const bool is_key_frame = info.key_frame == 1u;
const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
const size_t header_size = is_key_frame ? 10u : 3u;
frame.resize(header_size + bitstream_size);
// Based on page 30 of the VP8 specification.
// https://datatracker.ietf.org/doc/rfc6386/
frame[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes).
frame[0] |= static_cast<u8>((info.version & 7u) << 1u); // 3-bit version number
frame[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag
// The next 19-bits are the first partition size
frame[0] |= static_cast<u8>((info.first_part_size & 7u) << 5u);
frame[1] = static_cast<u8>((info.first_part_size & 0x7f8u) >> 3u);
frame[2] = static_cast<u8>((info.first_part_size & 0x7f800u) >> 11u);
if (is_key_frame) {
frame[3] = 0x9du;
frame[4] = 0x01u;
frame[5] = 0x2au;
// TODO(ameerj): Horizontal/Vertical Scale
// 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits)
frame[6] = static_cast<u8>(info.frame_width & 0xff);
frame[7] = static_cast<u8>(((info.frame_width >> 8) & 0x3f));
// 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits)
frame[8] = static_cast<u8>(info.frame_height & 0xff);
frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
}
const u64 bitstream_offset = state.frame_bitstream_offset;
gpu.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
return frame;
}
} // namespace Tegra::Decoder

View File

@@ -0,0 +1,74 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <vector>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/command_classes/nvdec_common.h"
namespace Tegra {
class GPU;
namespace Decoder {
class VP8 {
public:
explicit VP8(GPU& gpu);
~VP8();
/// Compose the VP8 frame for FFmpeg decoding
[[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state);
private:
std::vector<u8> frame;
GPU& gpu;
struct VP8PictureInfo {
INSERT_PADDING_WORDS_NOINIT(14);
u16 frame_width; // actual frame width
u16 frame_height; // actual frame height
u8 key_frame;
u8 version;
union {
u8 raw;
BitField<0, 2, u8> tile_format;
BitField<2, 3, u8> gob_height;
BitField<5, 3, u8> reserverd_surface_format;
};
u8 error_conceal_on; // 1: error conceal on; 0: off
u32 first_part_size; // the size of first partition(frame header and mb header partition)
u32 hist_buffer_size; // in units of 256
u32 vld_buffer_size; // in units of 1
// Current frame buffers
std::array<u32, 2> frame_stride; // [y_c]
u32 luma_top_offset; // offset of luma top field in units of 256
u32 luma_bot_offset; // offset of luma bottom field in units of 256
u32 luma_frame_offset; // offset of luma frame in units of 256
u32 chroma_top_offset; // offset of chroma top field in units of 256
u32 chroma_bot_offset; // offset of chroma bottom field in units of 256
u32 chroma_frame_offset; // offset of chroma frame in units of 256
INSERT_PADDING_BYTES_NOINIT(0x1c); // NvdecDisplayParams
// Decode picture buffer related
s8 current_output_memory_layout;
// output NV12/NV24 setting. index 0: golden; 1: altref; 2: last
std::array<s8, 3> output_memory_layout;
u8 segmentation_feature_data_update;
INSERT_PADDING_BYTES_NOINIT(3);
// ucode return result
u32 result_value;
std::array<u32, 8> partition_offset;
INSERT_PADDING_WORDS_NOINIT(3);
};
static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size");
};
} // namespace Decoder
} // namespace Tegra

View File

@@ -770,7 +770,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
return uncomp_writer;
}
const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state) {
void VP9::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
std::vector<u8> bitstream;
{
Vp9FrameContainer curr_frame = GetCurrentFrame(state);
@@ -792,7 +792,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
frame.begin() + uncompressed_header.size());
std::copy(bitstream.begin(), bitstream.end(),
frame.begin() + uncompressed_header.size() + compressed_header.size());
return frame;
}
VpxRangeEncoder::VpxRangeEncoder() {

View File

@@ -116,16 +116,20 @@ public:
VP9(VP9&&) = default;
VP9& operator=(VP9&&) = delete;
/// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
/// documentation
[[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
const NvdecCommon::NvdecRegisters& state);
/// Composes the VP9 frame from the GPU state information.
/// Based on the official VP9 spec documentation
void ComposeFrame(const NvdecCommon::NvdecRegisters& state);
/// Returns true if the most recent frame was a hidden frame.
[[nodiscard]] bool WasFrameHidden() const {
return !current_frame_info.show_frame;
}
/// Returns a const reference to the composed frame data.
[[nodiscard]] const std::vector<u8>& GetFrameBytes() const {
return frame;
}
private:
/// Generates compressed header probability updates in the bitstream writer
template <typename T, std::size_t N>

View File

@@ -35,7 +35,8 @@ AVFramePtr Nvdec::GetFrame() {
void Nvdec::Execute() {
switch (codec->GetCurrentCodec()) {
case NvdecCommon::VideoCodec::H264:
case NvdecCommon::VideoCodec::Vp9:
case NvdecCommon::VideoCodec::VP8:
case NvdecCommon::VideoCodec::VP9:
codec->Decode();
break;
default:

View File

@@ -13,9 +13,9 @@ namespace Tegra::NvdecCommon {
enum class VideoCodec : u64 {
None = 0x0,
H264 = 0x3,
Vp8 = 0x5,
VP8 = 0x5,
H265 = 0x7,
Vp9 = 0x9,
VP9 = 0x9,
};
// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
@@ -50,7 +50,10 @@ struct NvdecRegisters {
u64 h264_last_surface_chroma_offset; ///< 0x0858
std::array<u64, 17> surface_luma_offset; ///< 0x0860
std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970
INSERT_PADDING_WORDS_NOINIT(68); ///< 0x0970
u64 vp8_prob_data_offset; ///< 0x0A80
u64 vp8_header_partition_buf_offset; ///< 0x0A88
INSERT_PADDING_WORDS_NOINIT(60); ///< 0x0A90
u64 vp9_entropy_probs_offset; ///< 0x0B80
u64 vp9_backward_updates_offset; ///< 0x0B88
u64 vp9_last_frame_segmap_offset; ///< 0x0B90
@@ -81,6 +84,8 @@ ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
ASSERT_REG_POSITION(vp8_prob_data_offset, 0x150);
ASSERT_REG_POSITION(vp8_header_partition_buf_offset, 0x151);
ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);

View File

@@ -29,6 +29,8 @@ enum : u8 {
ColorBuffer6,
ColorBuffer7,
ZetaBuffer,
RescaleViewports,
RescaleScissors,
VertexBuffers,
VertexBuffer0,

View File

@@ -83,6 +83,7 @@ enum class DepthFormat : u32 {
S8_UINT_Z24_UNORM = 0x14,
D24X8_UNORM = 0x15,
D24S8_UNORM = 0x16,
S8_UINT = 0x17,
D24C8_UNORM = 0x18,
D32_FLOAT_S8X24_UINT = 0x19,
};

View File

@@ -1,3 +1,11 @@
set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr)
set(GLSL_INCLUDES
fidelityfx_fsr.comp
${FIDELITYFX_INCLUDE_DIR}/ffx_a.h
${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h
)
set(SHADER_FILES
astc_decoder.comp
block_linear_unswizzle_2d.comp
@@ -5,14 +13,25 @@ set(SHADER_FILES
convert_depth_to_float.frag
convert_float_to_depth.frag
full_screen_triangle.vert
fxaa.frag
fxaa.vert
opengl_copy_bc4.comp
opengl_present.frag
opengl_present.vert
opengl_present_scaleforce.frag
pitch_unswizzle.comp
present_bicubic.frag
present_gaussian.frag
vulkan_blit_color_float.frag
vulkan_blit_depth_stencil.frag
vulkan_fidelityfx_fsr_easu_fp16.comp
vulkan_fidelityfx_fsr_easu_fp32.comp
vulkan_fidelityfx_fsr_rcas_fp16.comp
vulkan_fidelityfx_fsr_rcas_fp32.comp
vulkan_present.frag
vulkan_present.vert
vulkan_present_scaleforce_fp16.frag
vulkan_present_scaleforce_fp32.frag
vulkan_quad_indexed.comp
vulkan_uint8.comp
)
@@ -76,7 +95,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})
OUTPUT
${SPIRV_HEADER_FILE}
COMMAND
${GLSLANGVALIDATOR} -V ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
MAIN_DEPENDENCY
${SOURCE_FILE}
)
@@ -84,9 +103,12 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})
endif()
endforeach()
set(SHADER_SOURCES ${SHADER_FILES})
list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
add_custom_target(host_shaders
DEPENDS
${SHADER_HEADERS}
SOURCES
${SHADER_FILES}
${SHADER_SOURCES}
)

View File

@@ -0,0 +1,116 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
//!#version 460 core
#extension GL_ARB_separate_shader_objects : enable
#extension GL_ARB_shading_language_420pack : enable
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types : require
// FidelityFX Super Resolution Sample
//
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
layout( push_constant ) uniform constants {
uvec4 Const0;
uvec4 Const1;
uvec4 Const2;
uvec4 Const3;
};
layout(set=0,binding=0) uniform sampler2D InputTexture;
layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
#define A_GPU 1
#define A_GLSL 1
#ifndef YUZU_USE_FP16
#include "ffx_a.h"
#if USE_EASU
#define FSR_EASU_F 1
AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
#endif
#if USE_RCAS
#define FSR_RCAS_F 1
AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
#endif
#else
#define A_HALF
#include "ffx_a.h"
#if USE_EASU
#define FSR_EASU_H 1
AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
#endif
#if USE_RCAS
#define FSR_RCAS_H 1
AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
#endif
#endif
#include "ffx_fsr1.h"
void CurrFilter(AU2 pos) {
#if USE_BILINEAR
AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
#endif
#if USE_EASU
#ifndef YUZU_USE_FP16
AF3 c;
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
#else
AH3 c;
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
#endif
#endif
#if USE_RCAS
#ifndef YUZU_USE_FP16
AF3 c;
FsrRcasF(c.r, c.g, c.b, pos, Const0);
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
#else
AH3 c;
FsrRcasH(c.r, c.g, c.b, pos, Const0);
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
#endif
#endif
}
layout(local_size_x=64) in;
void main() {
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
CurrFilter(gxy);
gxy.x += 8u;
CurrFilter(gxy);
gxy.y += 8u;
CurrFilter(gxy);
gxy.x -= 8u;
CurrFilter(gxy);
}

View File

@@ -0,0 +1,76 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// Source code is adapted from
// https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/
#version 460
#ifdef VULKAN
#define BINDING_COLOR_TEXTURE 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BINDING_COLOR_TEXTURE 0
#endif
layout (location = 0) in vec4 posPos;
layout (location = 0) out vec4 frag_color;
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
const float FXAA_SPAN_MAX = 8.0;
const float FXAA_REDUCE_MUL = 1.0 / 8.0;
const float FXAA_REDUCE_MIN = 1.0 / 128.0;
#define FxaaTexLod0(t, p) textureLod(t, p, 0.0)
#define FxaaTexOff(t, p, o) textureLodOffset(t, p, 0.0, o)
vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) {
vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz;
vec3 rgbNE = FxaaTexOff(tex, posPos.zw, ivec2(1,0)).xyz;
vec3 rgbSW = FxaaTexOff(tex, posPos.zw, ivec2(0,1)).xyz;
vec3 rgbSE = FxaaTexOff(tex, posPos.zw, ivec2(1,1)).xyz;
vec3 rgbM = FxaaTexLod0(tex, posPos.xy).xyz;
/*---------------------------------------------------------*/
vec3 luma = vec3(0.299, 0.587, 0.114);
float lumaNW = dot(rgbNW, luma);
float lumaNE = dot(rgbNE, luma);
float lumaSW = dot(rgbSW, luma);
float lumaSE = dot(rgbSE, luma);
float lumaM = dot(rgbM, luma);
/*---------------------------------------------------------*/
float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
/*---------------------------------------------------------*/
vec2 dir;
dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE));
/*---------------------------------------------------------*/
float dirReduce = max(
(lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL),
FXAA_REDUCE_MIN);
float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce);
dir = min(vec2( FXAA_SPAN_MAX, FXAA_SPAN_MAX),
max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX),
dir * rcpDirMin)) / textureSize(tex, 0);
/*--------------------------------------------------------*/
vec3 rgbA = (1.0 / 2.0) * (
FxaaTexLod0(tex, posPos.xy + dir * (1.0 / 3.0 - 0.5)).xyz +
FxaaTexLod0(tex, posPos.xy + dir * (2.0 / 3.0 - 0.5)).xyz);
vec3 rgbB = rgbA * (1.0 / 2.0) + (1.0 / 4.0) * (
FxaaTexLod0(tex, posPos.xy + dir * (0.0 / 3.0 - 0.5)).xyz +
FxaaTexLod0(tex, posPos.xy + dir * (3.0 / 3.0 - 0.5)).xyz);
float lumaB = dot(rgbB, luma);
if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA;
return rgbB;
}
void main() {
frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0);
}

View File

@@ -0,0 +1,38 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 460
out gl_PerVertex {
vec4 gl_Position;
};
const vec2 vertices[4] =
vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0));
layout (location = 0) out vec4 posPos;
#ifdef VULKAN
#define BINDING_COLOR_TEXTURE 0
#define VERTEX_ID gl_VertexIndex
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BINDING_COLOR_TEXTURE 0
#define VERTEX_ID gl_VertexID
#endif
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
const float FXAA_SUBPIX_SHIFT = 0;
void main() {
vec2 vertex = vertices[VERTEX_ID];
gl_Position = vec4(vertex, 0.0, 1.0);
vec2 vert_tex_coord = (vertex + 1.0) / 2.0;
posPos.xy = vert_tex_coord;
posPos.zw = vert_tex_coord - (0.5 + FXAA_SUBPIX_SHIFT) / textureSize(input_texture, 0);
}

View File

@@ -0,0 +1,130 @@
// MIT License
//
// Copyright (c) 2020 BreadFish64
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce
//! #version 460
#extension GL_ARB_separate_shader_objects : enable
#ifdef YUZU_USE_FP16
#extension GL_AMD_gpu_shader_half_float : enable
#extension GL_NV_gpu_shader5 : enable
#define lfloat float16_t
#define lvec2 f16vec2
#define lvec3 f16vec3
#define lvec4 f16vec4
#else
#define lfloat float
#define lvec2 vec2
#define lvec3 vec3
#define lvec4 vec4
#endif
#ifdef VULKAN
#define BINDING_COLOR_TEXTURE 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BINDING_COLOR_TEXTURE 0
#endif
layout (location = 0) in vec2 tex_coord;
layout (location = 0) out vec4 frag_color;
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
const bool ignore_alpha = true;
lfloat ColorDist1(lvec4 a, lvec4 b) {
// https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion
const lvec3 K = lvec3(0.2627, 0.6780, 0.0593);
const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b);
const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r);
lvec4 diff = a - b;
lfloat Y = dot(diff.rgb, K);
lfloat Cb = scaleB * (diff.b - Y);
lfloat Cr = scaleR * (diff.r - Y);
lvec3 YCbCr = lvec3(Y, Cb, Cr);
lfloat d = length(YCbCr);
if (ignore_alpha) {
return d;
}
return sqrt(a.a * b.a * d * d + diff.a * diff.a);
}
lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) {
return lvec4(
ColorDist1(ref, A),
ColorDist1(ref, B),
ColorDist1(ref, C),
ColorDist1(ref, D)
);
}
vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1)));
lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1)));
lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1)));
lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0)));
lvec4 cc = lvec4(texture(tex, tex_coord));
lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0)));
lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1)));
lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1)));
lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1)));
lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr);
lvec4 offset_br = ColorDist(cc, br, bc, bl, cl);
// Calculate how different cc is from the texels around it
const lfloat plus_weight = lfloat(1.5);
const lfloat cross_weight = lfloat(1.5);
lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight));
if (total_dist == lfloat(0.0)) {
return cc;
} else {
// Add together all the distances with direction taken into account
lvec4 tmp = offset_tl - offset_br;
lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight;
// When the image has thin points, they tend to split apart.
// This is because the texels all around are different and total_offset reaches into clear areas.
// This works pretty well to keep the offset in bounds for these cases.
lfloat clamp_val = length(total_offset) / total_dist;
vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0);
return texture(tex, tex_coord - final_offset);
}
}
void main() {
frag_color = Scaleforce(input_texture, tex_coord);
}

View File

@@ -0,0 +1,67 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 460 core
#ifdef VULKAN
#define BINDING_COLOR_TEXTURE 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BINDING_COLOR_TEXTURE 0
#endif
layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color;
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
vec4 cubic(float v) {
vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v;
vec4 s = n * n * n;
float x = s.x;
float y = s.y - 4.0 * s.x;
float z = s.z - 4.0 * s.y + 6.0 * s.x;
float w = 6.0 - x - y - z;
return vec4(x, y, z, w) * (1.0 / 6.0);
}
vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) {
vec2 texSize = textureSize(textureSampler, 0);
vec2 invTexSize = 1.0 / texSize;
texCoords = texCoords * texSize - 0.5;
vec2 fxy = fract(texCoords);
texCoords -= fxy;
vec4 xcubic = cubic(fxy.x);
vec4 ycubic = cubic(fxy.y);
vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy;
vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw);
vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s;
offset *= invTexSize.xxyy;
vec4 sample0 = texture(textureSampler, offset.xz);
vec4 sample1 = texture(textureSampler, offset.yz);
vec4 sample2 = texture(textureSampler, offset.xw);
vec4 sample3 = texture(textureSampler, offset.yw);
float sx = s.x / (s.x + s.y);
float sy = s.z / (s.z + s.w);
return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy);
}
void main() {
color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f);
}

View File

@@ -0,0 +1,70 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// Code adapted from the following sources:
// - https://learnopengl.com/Advanced-Lighting/Bloom
// - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/
#version 460 core
#ifdef VULKAN
#define BINDING_COLOR_TEXTURE 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BINDING_COLOR_TEXTURE 0
#endif
layout(location = 0) in vec2 frag_tex_coord;
layout(location = 0) out vec4 color;
layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308);
const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703);
vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) {
vec4 result = vec4(0.0f);
for (int i = 1; i < 3; i++) {
result += texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) * weight[i];
result += texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) * weight[i];
}
return result;
}
vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) {
vec4 result = vec4(0.0f);
for (int i = 1; i < 3; i++) {
result += texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) * weight[i];
result += texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) * weight[i];
}
return result;
}
vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) {
vec4 result = vec4(0.0f);
for (int i = 1; i < 3; i++) {
result +=
texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) * weight[i];
result +=
texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) * weight[i];
}
return result;
}
void main() {
vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0];
vec2 tex_offset = 1.0f / textureSize(color_texture, 0);
// TODO(Blinkhawk): This code can be optimized through shader group instructions.
vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
color = vec4(combination + base, 1.0f);
}

View File

@@ -0,0 +1,11 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 460 core
#extension GL_GOOGLE_include_directive : enable
#define YUZU_USE_FP16
#define USE_EASU 1
#include "fidelityfx_fsr.comp"

View File

@@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 460 core
#extension GL_GOOGLE_include_directive : enable
#define USE_EASU 1
#include "fidelityfx_fsr.comp"

View File

@@ -0,0 +1,11 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 460 core
#extension GL_GOOGLE_include_directive : enable
#define YUZU_USE_FP16
#define USE_RCAS 1
#include "fidelityfx_fsr.comp"

View File

@@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 460 core
#extension GL_GOOGLE_include_directive : enable
#define USE_RCAS 1
#include "fidelityfx_fsr.comp"

View File

@@ -0,0 +1,7 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#define YUZU_USE_FP16
#include "opengl_present_scaleforce.frag"

View File

@@ -0,0 +1,5 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#include "opengl_present_scaleforce.frag"

View File

@@ -5,6 +5,7 @@
#include <algorithm>
#include <span>
#include "shader_recompiler/backend/glasm/emit_glasm.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
@@ -229,8 +230,10 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
glProgramLocalParametersI4uivNV(
PROGRAM_LUT[stage],
Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
}
}
@@ -250,8 +253,10 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
glProgramLocalParametersI4uivNV(
GL_COMPUTE_PROGRAM_NV,
Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
}
}

View File

@@ -19,15 +19,6 @@ using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 16;
template <typename Range>
u32 AccumulateCount(const Range& range) {
u32 num{};
for (const auto& desc : range) {
num += desc.count;
}
return num;
}
size_t ComputePipelineKey::Hash() const noexcept {
return static_cast<size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
@@ -58,17 +49,17 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)};
ASSERT(num_textures <= MAX_TEXTURES);
const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)};
ASSERT(num_images <= MAX_IMAGES);
const bool is_glasm{assembly_program.handle != 0};
const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)};
use_storage_buffers =
!is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
writes_global_memory = !use_storage_buffers &&
@@ -88,8 +79,7 @@ void ComputePipeline::Configure() {
}
texture_cache.SynchronizeComputeDescriptors();
std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
std::array<GLuint, MAX_TEXTURES> samplers;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
@@ -119,33 +109,39 @@ void ComputePipeline::Configure() {
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
views.push_back({
.index = handle.first,
.blacklist = blacklist,
.id = {},
});
}
}};
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
views.push_back({handle.first});
samplers[sampler_binding++] = 0;
}
}
std::ranges::for_each(info.image_buffer_descriptors, add_image);
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
views.push_back({handle.first});
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers[sampler_binding++] = sampler->Handle();
}
}
std::ranges::for_each(info.image_descriptors, add_image);
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written);
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
@@ -161,7 +157,7 @@ void ComputePipeline::Configure() {
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
@@ -177,23 +173,45 @@ void ComputePipeline::Configure() {
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
buffer_cache.BindHostComputeBuffers();
const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
num_image_buffers};
texture_binding += num_texture_buffers;
image_binding += num_image_buffers;
u32 texture_scaling_mask{};
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
textures[texture_binding++] = image_view.Handle(desc.type);
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << texture_binding;
}
++texture_binding;
}
}
u32 image_scaling_mask{};
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
images[image_binding++] = image_view.StorageView(desc.type, desc.format);
images[image_binding] = image_view.StorageView(desc.type, desc.format);
if (texture_cache.IsRescaling(image_view)) {
image_scaling_mask |= 1u << image_binding;
}
++image_binding;
}
}
if (info.uses_rescaling_uniform) {
const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
if (assembly_program.handle != 0) {
glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask,
float_image_scaling_mask, 0.0f, 0.0f);
} else {
glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask,
float_image_scaling_mask, 0.0f, 0.0f);
}
}
if (texture_binding != 0) {

View File

@@ -15,7 +15,7 @@
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/texture_cache/texture_cache.h"
#if defined(_MSC_VER) && defined(NDEBUG)
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
@@ -27,6 +27,7 @@ namespace OpenGL {
namespace {
using Shader::ImageBufferDescriptor;
using Shader::ImageDescriptor;
using Shader::NumDescriptors;
using Shader::TextureBufferDescriptor;
using Shader::TextureDescriptor;
using Tegra::Texture::TexturePair;
@@ -35,15 +36,6 @@ using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 8;
template <typename Range>
u32 AccumulateCount(const Range& range) {
u32 num{};
for (const auto& desc : range) {
num += desc.count;
}
return num;
}
GLenum Stage(size_t stage_index) {
switch (stage_index) {
case 0:
@@ -204,23 +196,23 @@ GraphicsPipeline::GraphicsPipeline(
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
base_uniform_bindings[stage + 1] += NumDescriptors(info.constant_buffer_descriptors);
base_storage_bindings[stage + 1] += NumDescriptors(info.storage_buffers_descriptors);
}
enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
const u32 num_tex_buffer_bindings{NumDescriptors(info.texture_buffer_descriptors)};
num_texture_buffers[stage] += num_tex_buffer_bindings;
num_textures += num_tex_buffer_bindings;
const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
const u32 num_img_buffers_bindings{NumDescriptors(info.image_buffer_descriptors)};
num_image_buffers[stage] += num_img_buffers_bindings;
num_images += num_img_buffers_bindings;
num_textures += AccumulateCount(info.texture_descriptors);
num_images += AccumulateCount(info.image_descriptors);
num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
num_textures += NumDescriptors(info.texture_descriptors);
num_images += NumDescriptors(info.image_descriptors);
num_storage_buffers += NumDescriptors(info.storage_buffers_descriptors);
writes_global_memory |= std::ranges::any_of(
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
@@ -288,10 +280,9 @@ GraphicsPipeline::GraphicsPipeline(
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
std::array<GLuint, MAX_TEXTURES> samplers;
size_t image_view_index{};
size_t views_index{};
GLsizei sampler_binding{};
texture_cache.SynchronizeGraphicsDescriptors();
@@ -336,30 +327,34 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_view_index++] = handle.first;
views[views_index++] = {
.index = handle.first,
.blacklist = blacklist,
.id = {},
};
}
}};
if constexpr (Spec::has_texture_buffers) {
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_view_index++] = handle.first;
views[views_index++] = {handle.first};
samplers[sampler_binding++] = 0;
}
}
}
if constexpr (Spec::has_image_buffers) {
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc);
add_image(desc, false);
}
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_view_index++] = handle.first;
views[views_index++] = {handle.first};
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
samplers[sampler_binding++] = sampler->Handle();
@@ -367,7 +362,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
add_image(desc);
add_image(desc, desc.is_written);
}
}
}};
@@ -386,13 +381,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
config_stage(4);
}
const std::span indices_span(image_view_indices.data(), image_view_index);
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index));
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
ImageId* texture_buffer_index{image_view_ids.data()};
VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
size_t index{};
const auto add_buffer{[&](const auto& desc) {
@@ -402,12 +396,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++index;
++texture_buffer_index;
++texture_buffer_it;
}
}};
const Shader::Info& info{stage_infos[stage]};
@@ -423,13 +417,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
add_buffer(desc);
}
}
for (const auto& desc : info.texture_descriptors) {
texture_buffer_index += desc.count;
}
texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors);
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
texture_buffer_index += desc.count;
}
texture_buffer_it += Shader::NumDescriptors(info.image_descriptors);
}
}};
if constexpr (Spec::enabled_stages[0]) {
@@ -453,12 +443,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if (!is_built.load(std::memory_order::relaxed)) {
WaitForBuild();
}
if (assembly_programs[0].handle != 0) {
const bool use_assembly{assembly_programs[0].handle != 0};
if (use_assembly) {
program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
} else {
program_manager.BindSourcePrograms(source_programs);
}
const ImageId* views_it{image_view_ids.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
GLsizei texture_binding = 0;
GLsizei image_binding = 0;
std::array<GLuint, MAX_TEXTURES> textures;
@@ -473,20 +464,49 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
views_it += num_texture_buffers[stage];
views_it += num_image_buffers[stage];
u32 texture_scaling_mask{};
u32 image_scaling_mask{};
u32 stage_texture_binding{};
u32 stage_image_binding{};
const auto& info{stage_infos[stage]};
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
textures[texture_binding++] = image_view.Handle(desc.type);
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << stage_texture_binding;
}
++texture_binding;
++stage_texture_binding;
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
images[image_binding++] = image_view.StorageView(desc.type, desc.format);
images[image_binding] = image_view.StorageView(desc.type, desc.format);
if (texture_cache.IsRescaling(image_view)) {
image_scaling_mask |= 1u << stage_image_binding;
}
++image_binding;
++stage_image_binding;
}
}
if (info.uses_rescaling_uniform) {
const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
const bool is_rescaling{texture_cache.IsRescaling()};
const f32 config_down_factor{Settings::values.resolution_info.down_factor};
const f32 down_factor{is_rescaling ? config_down_factor : 1.0f};
if (use_assembly) {
glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_texture_scaling_mask,
float_image_scaling_mask, down_factor, 0.0f);
} else {
glProgramUniform4f(source_programs[stage].handle, 0, float_texture_scaling_mask,
float_image_scaling_mask, down_factor, 0.0f);
}
}
}};

View File

@@ -184,6 +184,10 @@ void RasterizerOpenGL::Clear() {
SyncRasterizeEnable();
SyncStencilTestState();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UpdateRenderTargets(true);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
SyncViewport();
if (regs.clear_flags.scissor) {
SyncScissorTest();
} else {
@@ -192,10 +196,6 @@ void RasterizerOpenGL::Clear() {
}
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UpdateRenderTargets(true);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
}
@@ -214,8 +214,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters();
SyncState();
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
@@ -223,6 +221,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
pipeline->Configure(is_indexed);
SyncState();
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
BeginTransformFeedback(pipeline, primitive_mode);
@@ -533,7 +533,8 @@ void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d.dirty.flags;
const auto& regs = maxwell3d.regs;
const bool dirty_viewport = flags[Dirty::Viewports];
const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports];
const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports;
const bool dirty_clip_control = flags[Dirty::ClipControl];
if (dirty_clip_control || flags[Dirty::FrontFace]) {
@@ -553,8 +554,7 @@ void RasterizerOpenGL::SyncViewport() {
}
glFrontFace(mode);
}
if (dirty_viewport || flags[Dirty::ClipControl]) {
if (dirty_viewport || dirty_clip_control) {
flags[Dirty::ClipControl] = false;
bool flip_y = false;
@@ -570,37 +570,58 @@ void RasterizerOpenGL::SyncViewport() {
state_tracker.ClipControl(origin, depth);
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
}
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
const auto conv = [scale](float value) -> GLfloat {
float new_value = value * scale;
if (scale < 1.0f) {
const bool sign = std::signbit(value);
new_value = std::round(std::abs(new_value));
new_value = sign ? -new_value : new_value;
}
return static_cast<GLfloat>(new_value);
};
if (dirty_viewport) {
flags[Dirty::Viewports] = false;
const bool force = flags[Dirty::ViewportTransform];
const bool force = flags[Dirty::ViewportTransform] || rescale_viewports;
flags[Dirty::ViewportTransform] = false;
flags[VideoCommon::Dirty::RescaleViewports] = false;
for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
if (!force && !flags[Dirty::Viewport0 + i]) {
for (size_t index = 0; index < Maxwell::NumViewports; ++index) {
if (!force && !flags[Dirty::Viewport0 + index]) {
continue;
}
flags[Dirty::Viewport0 + i] = false;
flags[Dirty::Viewport0 + index] = false;
const auto& src = regs.viewport_transform[i];
const Common::Rectangle<f32> rect{src.GetRect()};
glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
rect.GetHeight());
const auto& src = regs.viewport_transform[index];
GLfloat x = conv(src.translate_x - src.scale_x);
GLfloat y = conv(src.translate_y - src.scale_y);
GLfloat width = conv(src.scale_x * 2.0f);
GLfloat height = conv(src.scale_y * 2.0f);
if (height < 0) {
y += height;
height = -height;
}
glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f,
height != 0.0f ? height : 1.0f);
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
if (device.HasDepthBufferFloat()) {
glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth);
glDepthRangeIndexeddNV(static_cast<GLuint>(index), near_depth, far_depth);
} else {
glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
glDepthRangeIndexed(static_cast<GLuint>(index), near_depth, far_depth);
}
if (!GLAD_GL_NV_viewport_swizzle) {
continue;
}
glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x),
glViewportSwizzleNV(static_cast<GLuint>(index),
MaxwellToGL::ViewportSwizzle(src.swizzle.x),
MaxwellToGL::ViewportSwizzle(src.swizzle.y),
MaxwellToGL::ViewportSwizzle(src.swizzle.z),
MaxwellToGL::ViewportSwizzle(src.swizzle.w));
@@ -903,14 +924,34 @@ void RasterizerOpenGL::SyncLogicOpState() {
void RasterizerOpenGL::SyncScissorTest() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::Scissors]) {
if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) {
return;
}
flags[Dirty::Scissors] = false;
const bool force = flags[VideoCommon::Dirty::RescaleScissors];
flags[VideoCommon::Dirty::RescaleScissors] = false;
const auto& regs = maxwell3d.regs;
const auto& resolution = Settings::values.resolution_info;
const bool is_rescaling{texture_cache.IsRescaling()};
const u32 up_scale = is_rescaling ? resolution.up_scale : 1U;
const u32 down_shift = is_rescaling ? resolution.down_shift : 0U;
const auto scale_up = [up_scale, down_shift](u32 value) -> u32 {
if (value == 0) {
return 0U;
}
const u32 upset = value * up_scale;
u32 acumm{};
if ((up_scale >> down_shift) == 0) {
acumm = upset % 2;
}
const u32 converted_value = upset >> down_shift;
return std::max<u32>(converted_value + acumm, 1U);
};
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
if (!flags[Dirty::Scissor0 + index]) {
if (!force && !flags[Dirty::Scissor0 + index]) {
continue;
}
flags[Dirty::Scissor0 + index] = false;
@@ -918,8 +959,8 @@ void RasterizerOpenGL::SyncScissorTest() {
const auto& src = regs.scissor_test[index];
if (src.enable) {
glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
src.max_x - src.min_x, src.max_y - src.min_y);
glScissorIndexed(static_cast<GLuint>(index), scale_up(src.min_x), scale_up(src.min_y),
scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y));
} else {
glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
}
@@ -935,8 +976,9 @@ void RasterizerOpenGL::SyncPointState() {
oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale));
}
void RasterizerOpenGL::SyncLineState() {

View File

@@ -166,7 +166,12 @@ void OGLFramebuffer::Create() {
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
// Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
// a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
// across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
// mismatching size, this is why core framebuffers are preferred.
glGenFramebuffers(1, &handle);
glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
}
void OGLFramebuffer::Release() {

View File

@@ -426,16 +426,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
// Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
total_storage_buffers += desc.count;
}
total_storage_buffers +=
Shader::NumDescriptors(programs[index].info.storage_buffers_descriptors);
} else {
// VertexB path when VertexA is present.
auto& program_va{programs[0]};
auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
total_storage_buffers += desc.count;
}
total_storage_buffers +=
Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors);
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
}
}
@@ -510,10 +508,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
u32 num_storage_buffers{};
for (const auto& desc : program.info.storage_buffers_descriptors) {
num_storage_buffers += desc.count;
}
const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)};
Shader::RuntimeInfo info;
info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();

View File

@@ -9,8 +9,8 @@
#include <glad/glad.h>
#include "common/literals.h"
#include "common/settings.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -42,6 +42,7 @@ using VideoCore::Surface::IsPixelFormatSRGB;
using VideoCore::Surface::MaxPixelFormat;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType;
using namespace Common::Literals;
struct CopyOrigin {
GLint level;
@@ -147,6 +148,8 @@ GLenum AttachmentType(PixelFormat format) {
switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {
case SurfaceType::Depth:
return GL_DEPTH_ATTACHMENT;
case SurfaceType::Stencil:
return GL_STENCIL_ATTACHMENT;
case SurfaceType::DepthStencil:
return GL_DEPTH_STENCIL_ATTACHMENT;
default:
@@ -316,6 +319,52 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
}
}
OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) {
const GLenum target = ImageTarget(info);
const GLsizei width = info.size.width;
const GLsizei height = info.size.height;
const GLsizei depth = info.size.depth;
const int max_host_mip_levels = std::bit_width(info.size.width);
const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
const GLsizei num_layers = info.resources.layers;
const GLsizei num_samples = info.num_samples;
GLuint handle = 0;
OGLTexture texture;
if (target != GL_TEXTURE_BUFFER) {
texture.Create(target);
handle = texture.handle;
}
switch (target) {
case GL_TEXTURE_1D_ARRAY:
glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
break;
case GL_TEXTURE_2D_ARRAY:
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
break;
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
// TODO: Where should 'fixedsamplelocations' come from?
const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
height >> samples_y, num_layers, GL_FALSE);
break;
}
case GL_TEXTURE_RECTANGLE:
glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
break;
case GL_TEXTURE_3D:
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
break;
case GL_TEXTURE_BUFFER:
UNREACHABLE();
break;
default:
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
break;
}
return texture;
}
[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) {
switch (format) {
case PixelFormat::B5G6R5_UNORM:
@@ -359,7 +408,8 @@ ImageBufferMap::~ImageBufferMap() {
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
StateTracker& state_tracker_)
: device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
: device{device_}, state_tracker{state_tracker_},
util_shaders(program_manager), resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
@@ -426,6 +476,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
if (resolution.active) {
for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) {
rescale_draw_fbos[i].Create();
rescale_read_fbos[i].Create();
}
}
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -442,6 +499,15 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
if (GLAD_GL_NVX_gpu_memory_info) {
GLint cur_avail_mem_kb = 0;
glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb);
return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
}
return 2_GiB; // Return minimum requirements
}
void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
std::span<const ImageCopy> copies) {
const GLuint dst_name = dst_image.Handle();
@@ -605,13 +671,13 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req
return found;
}
Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
if (CanBeAccelerated(runtime, info)) {
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
if (CanBeAccelerated(*runtime, info)) {
flags |= ImageFlagBits::AcceleratedUpload;
}
if (IsConverted(runtime.device, info.format, info.type)) {
if (IsConverted(runtime->device, info.format, info.type)) {
flags |= ImageFlagBits::Converted;
gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
gl_format = GL_RGBA;
@@ -622,58 +688,25 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
gl_format = tuple.format;
gl_type = tuple.type;
}
const GLenum target = ImageTarget(info);
const GLsizei width = info.size.width;
const GLsizei height = info.size.height;
const GLsizei depth = info.size.depth;
const int max_host_mip_levels = std::bit_width(info.size.width);
const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
const GLsizei num_layers = info.resources.layers;
const GLsizei num_samples = info.num_samples;
GLuint handle = 0;
if (target != GL_TEXTURE_BUFFER) {
texture.Create(target);
handle = texture.handle;
}
switch (target) {
case GL_TEXTURE_1D_ARRAY:
glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
break;
case GL_TEXTURE_2D_ARRAY:
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
break;
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
// TODO: Where should 'fixedsamplelocations' come from?
const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
height >> samples_y, num_layers, GL_FALSE);
break;
}
case GL_TEXTURE_RECTANGLE:
glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
break;
case GL_TEXTURE_3D:
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
break;
case GL_TEXTURE_BUFFER:
UNREACHABLE();
break;
default:
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
break;
}
if (runtime.device.HasDebuggingToolAttached()) {
texture = MakeImage(info, gl_internal_format);
current_texture = texture.handle;
if (runtime->device.HasDebuggingToolAttached()) {
const std::string name = VideoCommon::Name(*this);
glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
static_cast<GLsizei>(name.size()), name.data());
glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE,
texture.handle, static_cast<GLsizei>(name.size()), name.data());
}
}
Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
Image::~Image() = default;
void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown(true);
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
@@ -693,12 +726,18 @@ void Image::UploadMemory(const ImageBufferMap& map,
}
CopyBufferToImage(copy, map.offset);
}
if (is_rescaled) {
ScaleUp();
}
}
void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown();
}
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
@@ -716,6 +755,9 @@ void Image::DownloadMemory(ImageBufferMap& map,
}
CopyImageToBuffer(copy, map.offset);
}
if (is_rescaled) {
ScaleUp(true);
}
}
GLuint Image::StorageHandle() noexcept {
@@ -741,11 +783,11 @@ GLuint Image::StorageHandle() noexcept {
return store_view.handle;
}
store_view.Create();
glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0,
glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0,
info.resources.levels, 0, info.resources.layers);
return store_view.handle;
default:
return texture.handle;
return current_texture;
}
}
@@ -849,6 +891,146 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
}
}
void Image::Scale(bool up_scale) {
const auto format_type = GetFormatType(info.format);
const GLenum attachment = [format_type] {
switch (format_type) {
case SurfaceType::ColorTexture:
return GL_COLOR_ATTACHMENT0;
case SurfaceType::Depth:
return GL_DEPTH_ATTACHMENT;
case SurfaceType::Stencil:
return GL_STENCIL_ATTACHMENT;
case SurfaceType::DepthStencil:
return GL_DEPTH_STENCIL_ATTACHMENT;
default:
UNREACHABLE();
return GL_COLOR_ATTACHMENT0;
}
}();
const GLenum mask = [format_type] {
switch (format_type) {
case SurfaceType::ColorTexture:
return GL_COLOR_BUFFER_BIT;
case SurfaceType::Depth:
return GL_DEPTH_BUFFER_BIT;
case SurfaceType::Stencil:
return GL_STENCIL_BUFFER_BIT;
case SurfaceType::DepthStencil:
return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
default:
UNREACHABLE();
return GL_COLOR_BUFFER_BIT;
}
}();
const size_t fbo_index = [format_type] {
switch (format_type) {
case SurfaceType::ColorTexture:
return 0;
case SurfaceType::Depth:
return 1;
case SurfaceType::Stencil:
return 2;
case SurfaceType::DepthStencil:
return 3;
default:
UNREACHABLE();
return 0;
}
}();
const bool is_2d = info.type == ImageType::e2D;
const bool is_color{(mask & GL_COLOR_BUFFER_BIT) != 0};
// Integer formats must use NEAREST filter
const bool linear_color_format{is_color && !IsPixelFormatInteger(info.format)};
const GLenum filter = linear_color_format ? GL_LINEAR : GL_NEAREST;
const auto& resolution = runtime->resolution;
const u32 scaled_width = resolution.ScaleUp(info.size.width);
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
const u32 original_width = info.size.width;
const u32 original_height = info.size.height;
if (!upscaled_backup.handle) {
auto dst_info = info;
dst_info.size.width = scaled_width;
dst_info.size.height = scaled_height;
upscaled_backup = MakeImage(dst_info, gl_internal_format);
}
const u32 src_width = up_scale ? original_width : scaled_width;
const u32 src_height = up_scale ? original_height : scaled_height;
const u32 dst_width = up_scale ? scaled_width : original_width;
const u32 dst_height = up_scale ? scaled_height : original_height;
const auto src_handle = up_scale ? texture.handle : upscaled_backup.handle;
const auto dst_handle = up_scale ? upscaled_backup.handle : texture.handle;
// TODO (ameerj): Investigate other GL states that affect blitting.
glDisablei(GL_SCISSOR_TEST, 0);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(dst_width),
static_cast<GLfloat>(dst_height));
const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle;
const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle;
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
for (s32 level = 0; level < info.resources.levels; ++level) {
const u32 src_level_width = std::max(1u, src_width >> level);
const u32 src_level_height = std::max(1u, src_height >> level);
const u32 dst_level_width = std::max(1u, dst_width >> level);
const u32 dst_level_height = std::max(1u, dst_height >> level);
glNamedFramebufferTextureLayer(read_fbo, attachment, src_handle, level, layer);
glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_handle, level, layer);
glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0,
0, dst_level_width, dst_level_height, mask, filter);
}
}
current_texture = dst_handle;
auto& state_tracker = runtime->GetStateTracker();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
}
bool Image::ScaleUp(bool ignore) {
if (True(flags & ImageFlagBits::Rescaled)) {
return false;
}
if (gl_format == 0 && gl_type == 0) {
// compressed textures
return false;
}
if (info.type == ImageType::Linear) {
UNREACHABLE();
return false;
}
flags |= ImageFlagBits::Rescaled;
if (!runtime->resolution.active) {
return false;
}
has_scaled = true;
if (ignore) {
current_texture = upscaled_backup.handle;
return true;
}
Scale(true);
return true;
}
bool Image::ScaleDown(bool ignore) {
if (False(flags & ImageFlagBits::Rescaled)) {
return false;
}
flags &= ~ImageFlagBits::Rescaled;
if (!runtime->resolution.active) {
return false;
}
if (ignore) {
current_texture = texture.handle;
return true;
}
Scale(false);
return true;
}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
@@ -862,7 +1044,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
flat_range = info.range;
set_object_label = device.HasDebuggingToolAttached();
is_render_target = info.IsRenderTarget();
original_texture = image.texture.handle;
original_texture = image.Handle();
num_samples = image.info.num_samples;
if (!is_render_target) {
swizzle[0] = info.x_source;
@@ -950,9 +1132,11 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info)
: VideoCommon::ImageViewBase{info, view_info} {}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
ImageView::~ImageView() = default;
GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
if (image_format == Shader::ImageFormat::Typeless) {
return Handle(texture_type);
@@ -1037,7 +1221,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy());
const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy);
} else {
LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
}
@@ -1056,13 +1241,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
// Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
// a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
// across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
// mismatching size, this is why core framebuffers are preferred.
GLuint handle;
glGenFramebuffers(1, &handle);
glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
framebuffer.Create();
GLuint handle = framebuffer.handle;
GLsizei num_buffers = 0;
std::array<GLenum, NUM_RT> gl_draw_buffers;
@@ -1082,10 +1262,20 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
}
if (const ImageView* const image_view = depth_buffer; image_view) {
if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) {
buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
} else {
switch (GetFormatType(image_view->format)) {
case SurfaceType::Depth:
buffer_bits |= GL_DEPTH_BUFFER_BIT;
break;
case SurfaceType::Stencil:
buffer_bits |= GL_STENCIL_BUFFER_BIT;
break;
case SurfaceType::DepthStencil:
buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
break;
default:
UNREACHABLE();
buffer_bits |= GL_DEPTH_BUFFER_BIT;
break;
}
const GLenum attachment = AttachmentType(image_view->format);
AttachTexture(handle, attachment, image_view);
@@ -1110,31 +1300,31 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
const std::string name = VideoCommon::Name(key);
glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data());
}
framebuffer.handle = handle;
}
Framebuffer::~Framebuffer() = default;
void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies) {
static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
const u32 requested_pbo_size =
std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes);
if (bgr_pbo_size < requested_pbo_size) {
bgr_pbo.Create();
bgr_pbo_size = requested_pbo_size;
glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY);
}
const u32 img_bpp = BytesPerBlock(src_image.info.format);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);
ASSERT(copy.dst_offset == zero_offset);
const u32 num_src_layers = static_cast<u32>(copy.src_subresource.num_layers);
const u32 copy_size = copy.extent.width * copy.extent.height * num_src_layers * img_bpp;
if (bgr_pbo_size < copy_size) {
bgr_pbo.Create();
bgr_pbo_size = copy_size;
glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY);
}
// Copy from source to PBO
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle);
glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
copy.src_subresource.num_layers, src_image.GlFormat(),
src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr);
num_src_layers, src_image.GlFormat(), src_image.GlType(),
static_cast<GLsizei>(bgr_pbo_size), nullptr);
// Copy from PBO to destination in desired GL format
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);

View File

@@ -15,6 +15,10 @@
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace Settings {
struct ResolutionScalingInfo;
}
namespace OpenGL {
class Device;
@@ -78,9 +82,11 @@ public:
ImageBufferMap DownloadStagingBuffer(size_t size);
u64 GetDeviceLocalMemory() const;
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {
UNIMPLEMENTED();
}
@@ -110,6 +116,12 @@ public:
bool HasNativeASTC() const noexcept;
void TickFrame() {}
StateTracker& GetStateTracker() {
return state_tracker;
}
private:
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
@@ -149,6 +161,10 @@ private:
OGLTextureView null_image_view_cube;
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
std::array<OGLFramebuffer, 4> rescale_draw_fbos;
std::array<OGLFramebuffer, 4> rescale_read_fbos;
const Settings::ResolutionScalingInfo& resolution;
};
class Image : public VideoCommon::ImageBase {
@@ -157,6 +173,7 @@ class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
explicit Image(const VideoCommon::NullImageParams&);
~Image();
@@ -174,7 +191,7 @@ public:
GLuint StorageHandle() noexcept;
GLuint Handle() const noexcept {
return texture.handle;
return current_texture;
}
GLuint GlFormat() const noexcept {
@@ -185,16 +202,25 @@ public:
return gl_type;
}
bool ScaleUp(bool ignore = false);
bool ScaleDown(bool ignore = false);
private:
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
void Scale(bool up_scale);
OGLTexture texture;
OGLTexture upscaled_backup;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
GLenum gl_type = GL_NONE;
TextureCacheRuntime* runtime{};
GLuint current_texture{};
};
class ImageView : public VideoCommon::ImageViewBase {
@@ -206,7 +232,15 @@ public:
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
~ImageView();
ImageView(const ImageView&) = delete;
ImageView& operator=(const ImageView&) = delete;
ImageView(ImageView&&) = default;
ImageView& operator=(ImageView&&) = default;
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
@@ -276,6 +310,14 @@ public:
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&&) = default;
Framebuffer& operator=(Framebuffer&&) = default;
[[nodiscard]] GLuint Handle() const noexcept {
return framebuffer.handle;
}
@@ -293,7 +335,7 @@ struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = false;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;

View File

@@ -108,6 +108,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
{GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE}, // S8_UINT
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,

View File

@@ -21,8 +21,13 @@
#include "core/memory.h"
#include "core/perf_stats.h"
#include "core/telemetry_session.h"
#include "video_core/host_shaders/fxaa_frag.h"
#include "video_core/host_shaders/fxaa_vert.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/host_shaders/present_bicubic_frag.h"
#include "video_core/host_shaders/present_gaussian_frag.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -208,7 +213,9 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
framebuffer_crop_rect = framebuffer.crop_rect;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
screen_info.was_accelerated =
rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride);
if (screen_info.was_accelerated) {
return;
}
@@ -251,12 +258,25 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
void RendererOpenGL::InitOpenGLObjects() {
// Create shader programs
fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);
fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER);
present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER);
present_gaussian_fragment =
CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER);
present_scaleforce_fragment =
CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
GL_FRAGMENT_SHADER);
// Generate presentation sampler
present_sampler.Create();
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
present_sampler_nn.Create();
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -274,6 +294,8 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
fxaa_framebuffer.Create();
}
void RendererOpenGL::AddTelemetryFields() {
@@ -325,18 +347,130 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
texture.resource.Release();
texture.resource.Create(GL_TEXTURE_2D);
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
fxaa_texture.Release();
fxaa_texture.Create(GL_TEXTURE_2D);
glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F,
Settings::values.resolution_info.ScaleUp(screen_info.texture.width),
Settings::values.resolution_info.ScaleUp(screen_info.texture.height));
glNamedFramebufferTexture(fxaa_framebuffer.handle, GL_COLOR_ATTACHMENT0, fxaa_texture.handle,
0);
}
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
// TODO: Signal state tracker about these changes
state_tracker.NotifyScreenDrawVertexArray();
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
state_tracker.NotifyColorMask(0);
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
state_tracker.NotifyCullTest();
state_tracker.NotifyDepthTest();
state_tracker.NotifyStencilTest();
state_tracker.NotifyPolygonOffset();
state_tracker.NotifyRasterizeEnable();
state_tracker.NotifyFramebufferSRGB();
state_tracker.NotifyLogicOp();
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
// Update background color before drawing
glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
Settings::values.bg_green.GetValue() / 255.0f,
Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
glEnable(GL_CULL_FACE);
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
glDisable(GL_POLYGON_OFFSET_FILL);
glDisable(GL_RASTERIZER_DISCARD);
glDisable(GL_ALPHA_TEST);
glDisablei(GL_BLEND, 0);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glBindTextureUnit(0, screen_info.display_texture);
if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa) {
program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle);
glEnablei(GL_SCISSOR_TEST, 0);
auto viewport_width = screen_info.texture.width;
auto scissor_width = framebuffer_crop_rect.GetWidth();
if (scissor_width <= 0) {
scissor_width = viewport_width;
}
auto viewport_height = screen_info.texture.height;
auto scissor_height = framebuffer_crop_rect.GetHeight();
if (scissor_height <= 0) {
scissor_height = viewport_height;
}
if (screen_info.was_accelerated) {
viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width);
scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width);
viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height);
scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height);
}
glScissorIndexed(0, 0, 0, scissor_width, scissor_height);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width),
static_cast<GLfloat>(viewport_height));
glDepthRangeIndexed(0, 0.0, 0.0);
glBindSampler(0, present_sampler.handle);
GLint old_read_fb;
GLint old_draw_fb;
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fxaa_framebuffer.handle);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
glBindTextureUnit(0, fxaa_texture.handle);
}
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
GLuint fragment_handle;
const auto filter = Settings::values.scaling_filter.GetValue();
switch (filter) {
case Settings::ScalingFilter::NearestNeighbor:
fragment_handle = present_bilinear_fragment.handle;
break;
case Settings::ScalingFilter::Bilinear:
fragment_handle = present_bilinear_fragment.handle;
break;
case Settings::ScalingFilter::Bicubic:
fragment_handle = present_bicubic_fragment.handle;
break;
case Settings::ScalingFilter::Gaussian:
fragment_handle = present_gaussian_fragment.handle;
break;
case Settings::ScalingFilter::ScaleForce:
fragment_handle = present_scaleforce_fragment.handle;
break;
case Settings::ScalingFilter::Fsr:
LOG_WARNING(
Render_OpenGL,
"FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce");
fragment_handle = present_scaleforce_fragment.handle;
break;
default:
fragment_handle = present_bilinear_fragment.handle;
break;
}
program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle);
glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
ortho_matrix.data());
@@ -370,6 +504,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.texture.height);
}
if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa &&
!screen_info.was_accelerated) {
scale_u /= Settings::values.resolution_info.up_factor;
scale_v /= Settings::values.resolution_info.up_factor;
}
const auto& screen = layout.screen;
const std::array vertices = {
@@ -380,47 +519,14 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
};
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
// TODO: Signal state tracker about these changes
state_tracker.NotifyScreenDrawVertexArray();
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
state_tracker.NotifyColorMask(0);
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
state_tracker.NotifyCullTest();
state_tracker.NotifyDepthTest();
state_tracker.NotifyStencilTest();
state_tracker.NotifyPolygonOffset();
state_tracker.NotifyRasterizeEnable();
state_tracker.NotifyFramebufferSRGB();
state_tracker.NotifyLogicOp();
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
glDisable(GL_POLYGON_OFFSET_FILL);
glDisable(GL_RASTERIZER_DISCARD);
glDisable(GL_ALPHA_TEST);
glDisablei(GL_BLEND, 0);
glDisablei(GL_SCISSOR_TEST, 0);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glDepthRangeIndexed(0, 0.0, 0.0);
glEnableVertexAttribArray(PositionLocation);
glEnableVertexAttribArray(TexCoordLocation);
@@ -440,8 +546,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
}
glBindTextureUnit(0, screen_info.display_texture);
glBindSampler(0, present_sampler.handle);
if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) {
glBindSampler(0, present_sampler.handle);
} else {
glBindSampler(0, present_sampler_nn.handle);
}
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

View File

@@ -50,6 +50,7 @@ struct TextureInfo {
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture{};
bool was_accelerated = false;
bool display_srgb{};
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
@@ -109,9 +110,15 @@ private:
// OpenGL object IDs
OGLSampler present_sampler;
OGLSampler present_sampler_nn;
OGLBuffer vertex_buffer;
OGLProgram fxaa_vertex;
OGLProgram fxaa_fragment;
OGLProgram present_vertex;
OGLProgram present_fragment;
OGLProgram present_bilinear_fragment;
OGLProgram present_bicubic_fragment;
OGLProgram present_gaussian_fragment;
OGLProgram present_scaleforce_fragment;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
@@ -119,6 +126,8 @@ private:
/// Display information for Switch screen
ScreenInfo screen_info;
OGLTexture fxaa_texture;
OGLFramebuffer fxaa_framebuffer;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;

View File

@@ -363,7 +363,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
BlitImageHelper::~BlitImageHelper() = default;
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
@@ -373,9 +373,8 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
.operation = operation,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
const VkPipeline pipeline = FindOrEmplacePipeline(key);
const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
src_view](vk::CommandBuffer cmdbuf) {
@@ -398,10 +397,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
Tegra::Engines::Fermi2D::Operation operation) {
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
const BlitImagePipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.operation = operation,
};
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkSampler sampler = *nearest_sampler;
const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
src_stencil_view, this](vk::CommandBuffer cmdbuf) {
@@ -419,40 +421,45 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
}
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale,
u32 down_shift) {
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale,
u32 down_shift) {
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale,
u32 down_shift) {
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale,
u32 down_shift) {
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = *nearest_sampler;
const VkExtent2D extent{
.width = src_image_view.size.width,
.height = src_image_view.size.height,
.width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
.height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
};
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift,
this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
.y = 0,
@@ -488,7 +495,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
scheduler.InvalidateState();
}
VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) {
const auto it = std::ranges::find(blit_color_keys, key);
if (it != blit_color_keys.end()) {
return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
@@ -542,12 +549,14 @@ VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& ke
return *blit_color_pipelines.back();
}
VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
if (blit_depth_stencil_pipeline) {
return *blit_depth_stencil_pipeline;
VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key) {
const auto it = std::ranges::find(blit_depth_stencil_keys, key);
if (it != blit_depth_stencil_keys.end()) {
return *blit_depth_stencil_pipelines[std::distance(blit_depth_stencil_keys.begin(), it)];
}
blit_depth_stencil_keys.push_back(key);
const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
blit_depth_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -560,15 +569,15 @@ VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *two_textures_pipeline_layout,
.renderPass = renderpass,
.renderPass = key.renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
return *blit_depth_stencil_pipeline;
}));
return *blit_depth_stencil_pipelines.back();
}
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {

View File

@@ -34,7 +34,7 @@ public:
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
@@ -44,21 +44,25 @@ public:
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
u32 up_scale, u32 down_shift);
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
u32 up_scale, u32 down_shift);
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
u32 up_scale, u32 down_shift);
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
u32 up_scale, u32 down_shift);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
const ImageView& src_image_view, u32 up_scale, u32 down_shift);
[[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
@@ -84,7 +88,8 @@ private:
std::vector<BlitImagePipelineKey> blit_color_keys;
std::vector<vk::Pipeline> blit_color_pipelines;
vk::Pipeline blit_depth_stencil_pipeline;
std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;

View File

@@ -208,6 +208,9 @@ struct FormatTuple {
{VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
{VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM
// Stencil formats
{VK_FORMAT_S8_UINT, Attachable}, // S8_UINT
// DepthStencil formats
{VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT
{VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated)

View File

@@ -10,6 +10,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@@ -20,6 +21,8 @@
namespace Vulkan {
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
@@ -68,18 +71,28 @@ public:
}
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
using Shader::Backend::SPIRV::RescalingLayout;
const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
const VkPushConstantRange range{
.stageFlags = static_cast<VkShaderStageFlags>(
is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
.offset = 0,
.size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
};
return device->GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = descriptor_set_layout ? 1U : 0U,
.pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &range,
});
}
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
@@ -115,6 +128,7 @@ private:
}
const Device* device{};
bool is_compute{};
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
u32 binding{};
@@ -122,31 +136,68 @@ private:
size_t offset{};
};
inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
const ImageId*& image_view_ids, TextureCache& texture_cache,
VKUpdateDescriptorQueue& update_descriptor_queue) {
for (const auto& desc : info.texture_buffer_descriptors) {
image_view_ids += desc.count;
class RescalingPushConstant {
public:
explicit RescalingPushConstant() noexcept {}
void PushTexture(bool is_rescaled) noexcept {
*texture_ptr |= is_rescaled ? texture_bit : 0u;
texture_bit <<= 1u;
if (texture_bit == 0u) {
texture_bit = 1u;
++texture_ptr;
}
}
for (const auto& desc : info.image_buffer_descriptors) {
image_view_ids += desc.count;
void PushImage(bool is_rescaled) noexcept {
*image_ptr |= is_rescaled ? image_bit : 0u;
image_bit <<= 1u;
if (image_bit == 0u) {
image_bit = 1u;
++image_ptr;
}
}
const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept {
return words;
}
private:
std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{};
u32* texture_ptr{words.data()};
u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS};
u32 texture_bit{1u};
u32 image_bit{1u};
};
inline void PushImageDescriptors(TextureCache& texture_cache,
VKUpdateDescriptorQueue& update_descriptor_queue,
const Shader::Info& info, RescalingPushConstant& rescaling,
const VkSampler*& samplers,
const VideoCommon::ImageViewInOut*& views) {
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
views += num_texture_buffers;
views += num_image_buffers;
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const VideoCommon::ImageViewId image_view_id{(views++)->id};
const VkSampler sampler{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
ImageView& image_view{texture_cache.GetImageView((views++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
update_descriptor_queue.AddImage(vk_image_view);
rescaling.PushImage(texture_cache.IsRescaling(image_view));
}
}
}

View File

@@ -12,14 +12,22 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/host_shaders/fxaa_frag_spv.h"
#include "video_core/host_shaders/fxaa_vert_spv.h"
#include "video_core/host_shaders/present_bicubic_frag_spv.h"
#include "video_core/host_shaders/present_gaussian_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_fsr.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
@@ -144,8 +152,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
UpdateDescriptorSet(image_index,
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
VkImageView source_image_view =
use_accelerated ? screen_info.image_view : *raw_image_views[image_index];
BufferData data;
SetUniformData(data, layout);
@@ -222,9 +230,134 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
read_barrier);
cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, write_barrier);
});
}
const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
if (use_accelerated && anti_alias_pass != Settings::AntiAliasing::None) {
UpdateAADescriptorSet(image_index, source_image_view, false);
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
};
scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = 0,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = {},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
{
VkImageMemoryBarrier fsr_write_barrier = base_barrier;
fsr_write_barrier.image = *aa_image;
fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier);
}
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *aa_renderpass,
.framebuffer = *aa_framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
switch (anti_alias_pass) {
case Settings::AntiAliasing::Fxaa:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
break;
default:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
break;
}
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
aa_descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
{
VkImageMemoryBarrier blit_read_barrier = base_barrier;
blit_read_barrier.image = *aa_image;
blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier);
}
});
source_image_view = *aa_image_view;
}
if (fsr) {
auto crop_rect = framebuffer.crop_rect;
if (crop_rect.GetWidth() == 0) {
crop_rect.right = framebuffer.width;
}
if (crop_rect.GetHeight() == 0) {
crop_rect.bottom = framebuffer.height;
}
crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
VkExtent2D fsr_input_size{
.width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
.height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
};
VkImageView fsr_image_view =
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
UpdateDescriptorSet(image_index, fsr_image_view, true);
} else {
const bool is_nn =
Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
UpdateDescriptorSet(image_index, source_image_view, is_nn);
}
scheduler.Record(
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
@@ -258,8 +391,28 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
.offset = {0, 0},
.extent = size,
};
const auto filter = Settings::values.scaling_filter.GetValue();
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
switch (filter) {
case Settings::ScalingFilter::NearestNeighbor:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
break;
case Settings::ScalingFilter::Bilinear:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
break;
case Settings::ScalingFilter::Bicubic:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline);
break;
case Settings::ScalingFilter::Gaussian:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline);
break;
case Settings::ScalingFilter::ScaleForce:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline);
break;
default:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
break;
}
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
@@ -281,11 +434,16 @@ VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& frameb
}
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
return CreateFramebuffer(image_view, extent, renderpass);
}
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent,
vk::RenderPass& rd) {
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.renderPass = *renderpass,
.renderPass = *rd,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = extent.width,
@@ -308,9 +466,21 @@ void VKBlitScreen::CreateDynamicResources() {
CreateRenderPass();
CreateFramebuffers();
CreateGraphicsPipeline();
fsr.reset();
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
CreateFSR();
}
}
void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
if (!fsr) {
CreateFSR();
}
} else {
fsr.reset();
}
if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
return;
}
@@ -324,7 +494,16 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
void VKBlitScreen::CreateShaders() {
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV);
fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV);
bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV);
gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV);
if (device.IsFloat16Supported()) {
scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV);
} else {
scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV);
}
}
void VKBlitScreen::CreateSemaphores() {
@@ -344,6 +523,13 @@ void VKBlitScreen::CreateDescriptorPool() {
},
}};
const std::array<VkDescriptorPoolSize, 1> pool_sizes_aa{{
{
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = static_cast<u32>(image_count * 2),
},
}};
const VkDescriptorPoolCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
@@ -353,19 +539,33 @@ void VKBlitScreen::CreateDescriptorPool() {
.pPoolSizes = pool_sizes.data(),
};
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
const VkDescriptorPoolCreateInfo ci_aa{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = static_cast<u32>(image_count),
.poolSizeCount = static_cast<u32>(pool_sizes_aa.size()),
.pPoolSizes = pool_sizes_aa.data(),
};
aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa);
}
void VKBlitScreen::CreateRenderPass() {
renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
}
vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
const VkAttachmentDescription color_attachment{
.flags = 0,
.format = swapchain.GetImageViewFormat(),
.format = format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL,
};
const VkAttachmentReference color_attachment_ref{
@@ -408,7 +608,7 @@ void VKBlitScreen::CreateRenderPass() {
.pDependencies = &dependency,
};
renderpass = device.GetLogical().CreateRenderPass(renderpass_ci);
return device.GetLogical().CreateRenderPass(renderpass_ci);
}
void VKBlitScreen::CreateDescriptorSetLayout() {
@@ -429,6 +629,23 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
},
}};
const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings_aa{{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
.pImmutableSamplers = nullptr,
},
}};
const VkDescriptorSetLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -437,11 +654,21 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
.pBindings = layout_bindings.data(),
};
const VkDescriptorSetLayoutCreateInfo ci_aa{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = static_cast<u32>(layout_bindings_aa.size()),
.pBindings = layout_bindings_aa.data(),
};
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa);
}
void VKBlitScreen::CreateDescriptorSets() {
const std::vector layouts(image_count, *descriptor_set_layout);
const std::vector layouts_aa(image_count, *aa_descriptor_set_layout);
const VkDescriptorSetAllocateInfo ai{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
@@ -451,7 +678,16 @@ void VKBlitScreen::CreateDescriptorSets() {
.pSetLayouts = layouts.data(),
};
const VkDescriptorSetAllocateInfo ai_aa{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *aa_descriptor_pool,
.descriptorSetCount = static_cast<u32>(image_count),
.pSetLayouts = layouts_aa.data(),
};
descriptor_sets = descriptor_pool.Allocate(ai);
aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa);
}
void VKBlitScreen::CreatePipelineLayout() {
@@ -464,11 +700,21 @@ void VKBlitScreen::CreatePipelineLayout() {
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
const VkPipelineLayoutCreateInfo ci_aa{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = aa_descriptor_set_layout.address(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa);
}
void VKBlitScreen::CreateGraphicsPipeline() {
const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
const std::array<VkPipelineShaderStageCreateInfo, 2> bilinear_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
@@ -483,7 +729,70 @@ void VKBlitScreen::CreateGraphicsPipeline() {
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *fragment_shader,
.module = *bilinear_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
}};
const std::array<VkPipelineShaderStageCreateInfo, 2> bicubic_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = *vertex_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *bicubic_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
}};
const std::array<VkPipelineShaderStageCreateInfo, 2> gaussian_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = *vertex_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *gaussian_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
}};
const std::array<VkPipelineShaderStageCreateInfo, 2> scaleforce_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = *vertex_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *scaleforce_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
@@ -583,12 +892,12 @@ void VKBlitScreen::CreateGraphicsPipeline() {
.pDynamicStates = dynamic_states.data(),
};
const VkGraphicsPipelineCreateInfo pipeline_ci{
const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(shader_stages.size()),
.pStages = shader_stages.data(),
.stageCount = static_cast<u32>(bilinear_shader_stages.size()),
.pStages = bilinear_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
@@ -605,7 +914,76 @@ void VKBlitScreen::CreateGraphicsPipeline() {
.basePipelineIndex = 0,
};
pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci);
const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(bicubic_shader_stages.size()),
.pStages = bicubic_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
.pViewportState = &viewport_state_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisampling_ci,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *pipeline_layout,
.renderPass = *renderpass,
.subpass = 0,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
};
const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(gaussian_shader_stages.size()),
.pStages = gaussian_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
.pViewportState = &viewport_state_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisampling_ci,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *pipeline_layout,
.renderPass = *renderpass,
.subpass = 0,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
};
const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(scaleforce_shader_stages.size()),
.pStages = scaleforce_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
.pViewportState = &viewport_state_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisampling_ci,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *pipeline_layout,
.renderPass = *renderpass,
.subpass = 0,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
};
bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci);
bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci);
gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci);
scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci);
}
void VKBlitScreen::CreateSampler() {
@@ -614,8 +992,29 @@ void VKBlitScreen::CreateSampler() {
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_LINEAR,
.minFilter = VK_FILTER_LINEAR,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
.unnormalizedCoordinates = VK_FALSE,
};
const VkSamplerCreateInfo ci_nn{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_NEAREST,
.minFilter = VK_FILTER_NEAREST,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
@@ -631,6 +1030,7 @@ void VKBlitScreen::CreateSampler() {
};
sampler = device.GetLogical().CreateSampler(ci);
nn_sampler = device.GetLogical().CreateSampler(ci_nn);
}
void VKBlitScreen::CreateFramebuffers() {
@@ -639,7 +1039,7 @@ void VKBlitScreen::CreateFramebuffers() {
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
framebuffers[i] = CreateFramebuffer(image_view, size);
framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
}
}
@@ -649,6 +1049,11 @@ void VKBlitScreen::ReleaseRawImages() {
}
raw_images.clear();
raw_buffer_commits.clear();
aa_image_view.reset();
aa_image.reset();
aa_commit = MemoryCommit{};
buffer.reset();
buffer_commit = MemoryCommit{};
}
@@ -675,8 +1080,11 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
raw_image_views.resize(image_count);
raw_buffer_commits.resize(image_count);
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
u32 down_shift = 0) {
u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
: VK_IMAGE_USAGE_TRANSFER_DST_BIT;
return device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -684,26 +1092,30 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
.format = GetFormat(framebuffer),
.extent =
{
.width = framebuffer.width,
.height = framebuffer.height,
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
.tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal);
raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
};
const auto create_commit = [&](vk::Image& image) {
return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
};
const auto create_image_view = [&](vk::Image& image) {
return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *raw_images[i],
.image = *image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = GetFormat(framebuffer),
.components =
@@ -722,10 +1134,211 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
.layerCount = 1,
},
});
};
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = create_image();
raw_buffer_commits[i] = create_commit(raw_images[i]);
raw_image_views[i] = create_image_view(raw_images[i]);
}
// AA Resources
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
aa_image = create_image(true, up_scale, down_shift);
aa_commit = create_commit(aa_image);
aa_image_view = create_image_view(aa_image);
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
};
if (aa_renderpass) {
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
return;
}
aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false);
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = *fxaa_vertex_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *fxaa_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
}};
const auto vertex_binding_description = ScreenRectVertex::GetDescription();
const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
const VkPipelineVertexInputStateCreateInfo vertex_input_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = &vertex_binding_description,
.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()},
.pVertexAttributeDescriptions = vertex_attrs_description.data(),
};
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
.primitiveRestartEnable = VK_FALSE,
};
const VkPipelineViewportStateCreateInfo viewport_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.viewportCount = 1,
.pViewports = nullptr,
.scissorCount = 1,
.pScissors = nullptr,
};
const VkPipelineRasterizationStateCreateInfo rasterization_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_CLOCKWISE,
.depthBiasEnable = VK_FALSE,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
const VkPipelineMultisampleStateCreateInfo multisampling_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
const VkPipelineColorBlendAttachmentState color_blend_attachment{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = 1,
.pAttachments = &color_blend_attachment,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
static constexpr std::array dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
};
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
.pDynamicStates = dynamic_states.data(),
};
const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(fxaa_shader_stages.size()),
.pStages = fxaa_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
.pViewportState = &viewport_state_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisampling_ci,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *aa_pipeline_layout,
.renderPass = *aa_renderpass,
.subpass = 0,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
};
// AA
aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
}
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
const VkDescriptorImageInfo image_info{
.sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkWriteDescriptorSet sampler_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = aa_descriptor_sets[image_index],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
const VkWriteDescriptorSet sampler_write_2{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = aa_descriptor_sets[image_index],
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
}
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = offsetof(BufferData, uniform),
@@ -746,7 +1359,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
};
const VkDescriptorImageInfo image_info{
.sampler = *sampler,
.sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
@@ -798,17 +1411,19 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
f32 scale_u = 1.0f;
f32 scale_v = 1.0f;
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.height);
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
if (!fsr) {
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.height);
}
}
const auto& screen = layout.screen;
@@ -822,6 +1437,15 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
}
void VKBlitScreen::CreateFSR() {
const auto& layout = render_window.GetFramebufferLayout();
const VkExtent2D fsr_size{
.width = layout.screen.GetWidth(),
.height = layout.screen.GetHeight(),
};
fsr = std::make_unique<FSR>(device, memory_allocator, image_count, fsr_size);
}
u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}

View File

@@ -34,6 +34,7 @@ namespace Vulkan {
struct ScreenInfo;
class Device;
class FSR;
class RasterizerVulkan;
class VKScheduler;
class VKSwapchain;
@@ -66,6 +67,9 @@ public:
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent, vk::RenderPass& rd);
private:
struct BufferData;
@@ -74,6 +78,7 @@ private:
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
@@ -88,11 +93,14 @@ private:
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
void CreateFSR();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
@@ -107,14 +115,24 @@ private:
const VKScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
vk::ShaderModule fragment_shader;
vk::ShaderModule fxaa_vertex_shader;
vk::ShaderModule fxaa_fragment_shader;
vk::ShaderModule bilinear_fragment_shader;
vk::ShaderModule bicubic_fragment_shader;
vk::ShaderModule gaussian_fragment_shader;
vk::ShaderModule scaleforce_fragment_shader;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::PipelineLayout pipeline_layout;
vk::Pipeline pipeline;
vk::Pipeline nearest_neightbor_pipeline;
vk::Pipeline bilinear_pipeline;
vk::Pipeline bicubic_pipeline;
vk::Pipeline gaussian_pipeline;
vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
vk::Sampler nn_sampler;
vk::Sampler sampler;
vk::Buffer buffer;
@@ -126,8 +144,22 @@ private:
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<MemoryCommit> raw_buffer_commits;
vk::DescriptorPool aa_descriptor_pool;
vk::DescriptorSetLayout aa_descriptor_set_layout;
vk::PipelineLayout aa_pipeline_layout;
vk::Pipeline aa_pipeline;
vk::RenderPass aa_renderpass;
vk::Framebuffer aa_framebuffer;
vk::DescriptorSets aa_descriptor_sets;
vk::Image aa_image;
vk::ImageView aa_image_view;
MemoryCommit aa_commit;
u32 raw_width = 0;
u32 raw_height = 0;
std::unique_ptr<FSR> fsr;
};
} // namespace Vulkan

View File

@@ -146,7 +146,7 @@ void BufferCacheRuntime::Finish() {
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -163,10 +163,42 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
if (barrier) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
}
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
if (barrier) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
}
});
}
void BufferCacheRuntime::PreCopyBarrier() {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER);
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
});
}
void BufferCacheRuntime::PostCopyBarrier() {
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER);
});

View File

@@ -69,8 +69,12 @@ public:
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void PreCopyBarrier();
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies);
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void PostCopyBarrier();
void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);

Some files were not shown because too many files have changed in this diff Show More