Compare commits

..

14 Commits

Author SHA1 Message Date
Merry
af6290ed12 dynarmic: Update and enable DYNARMIC_IGNORE_ASSERTS 2021-08-15 19:33:02 +01:00
Merry
1770503185 xbyak: Update include path 2021-08-15 19:26:38 +01:00
bunnei
87d63b858a Merge pull request #6861 from yzct12345/const-mempy-is-all-the-speed
decoders: Optimize memcpy for the other functions
2021-08-15 02:38:12 -07:00
bunnei
bdd617da03 Merge pull request #6868 from yzct12345/safe-threads-no-deadlocks
threadsafe_queue: Fix deadlock
2021-08-14 02:28:59 -07:00
yzct12345
0ba521e634 threadsafe_queue: Fix deadlock
This fixes a lost wakeup in SPSCQueue. If the reader is in just the right position, the writer's notification will be lost and this will be a problem if the writer then does something to wait on the reader.

This was discovered to affect my upcoming stacktrace PR. I don't think any performance decrease will be noticeable because an uncontended mutex is smart enough to skip the syscall. This PR might also resolve some rare deadlocks but I don't know of any examples.
2021-08-13 19:22:51 +00:00
bunnei
71d8d84b59 Merge pull request #6862 from german77/badsdl
input_common: Disable sdl raw input mode
2021-08-12 21:14:26 -07:00
bunnei
0509fe3377 Merge pull request #6838 from ameerj/sws-align
vic: Specify sws_scale height stride.
2021-08-12 11:28:33 -07:00
german77
2a2f0bfe9e input_common: Disable sdl raw input mode 2021-08-12 13:17:07 -05:00
yzct12345
430255caf8 decoders: Templates allow memcpy optimizations 2021-08-12 04:45:25 +00:00
Mai M
043904bae1 Merge pull request #6855 from german77/sdl16
externals: Update sdl2 to 2.0.16
2021-08-11 23:14:53 -04:00
Mai M
756d76d971 Merge pull request #6860 from lat9nq/ranged-settings-2
settings: Fix MSVC issues
2021-08-11 17:53:09 -04:00
lat9nq
5be2d6fd28 settings: Fix MSVC issues
According to https://stackoverflow.com/questions/469508, we run into a
MSVC bug (since VS 2005) when using diamond inheritance for
RangedSetting.

This explicitly implements those functions in RangedSetting. GetValue is
implemented as just calling the inherited version. The explicit
converson operator is reimplemented. I opted for this over ignoring the
warning with a pragma since this specifies the inherited behavior, and I
have now less faith in MSVC to pick the right one.

In addition, we mark destructors as virtual to silence what I believe is
a fair MSVC compilation error.
2021-08-11 17:12:14 -04:00
german77
fe2e710003 externals: Update sdl2 to 2.0.16 2021-08-10 19:16:30 -05:00
ameerj
a779cede7c vic: Specify sws_scale height stride.
Silences a sws_scale runtime warning about unaligned strides.
2021-08-09 23:24:16 -04:00
13 changed files with 190 additions and 114 deletions

View File

@@ -376,7 +376,7 @@ if (ENABLE_SDL2)
if (YUZU_USE_BUNDLED_SDL2)
# Detect toolchain and platform
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
set(SDL2_VER "SDL2-2.0.15-prerelease")
set(SDL2_VER "SDL2-2.0.16")
else()
message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
endif()
@@ -396,7 +396,7 @@ if (ENABLE_SDL2)
elseif (YUZU_USE_EXTERNAL_SDL2)
message(STATUS "Using SDL2 from externals.")
else()
find_package(SDL2 2.0.15 REQUIRED)
find_package(SDL2 2.0.16 REQUIRED)
# Some installations don't set SDL2_LIBRARIES
if("${SDL2_LIBRARIES}" STREQUAL "")

View File

@@ -7,7 +7,9 @@ include(DownloadExternals)
# xbyak
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
add_library(xbyak INTERFACE)
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/xbyak/xbyak DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
target_include_directories(xbyak SYSTEM INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
endif()
@@ -19,6 +21,7 @@ target_include_directories(catch-single-include INTERFACE catch/single_include)
if (ARCHITECTURE_x86_64)
set(DYNARMIC_TESTS OFF)
set(DYNARMIC_NO_BUNDLED_FMT ON)
set(DYNARMIC_IGNORE_ASSERTS ON CACHE BOOL "" FORCE)
add_subdirectory(dynarmic)
endif()

2
externals/SDL vendored

View File

@@ -75,7 +75,7 @@ public:
*/
explicit BasicSetting(const Type& default_val, const std::string& name)
: default_value{default_val}, global{default_val}, label{name} {}
~BasicSetting() = default;
virtual ~BasicSetting() = default;
/**
* Returns a reference to the setting's value.
@@ -161,7 +161,7 @@ public:
explicit BasicRangedSetting(const Type& default_val, const Type& min_val, const Type& max_val,
const std::string& name)
: BasicSetting<Type>{default_val, name}, minimum{min_val}, maximum{max_val} {}
~BasicRangedSetting() = default;
virtual ~BasicRangedSetting() = default;
/**
* Like BasicSetting's SetValue, except value is clamped to the range of the setting.
@@ -208,7 +208,7 @@ public:
*/
explicit Setting(const Type& default_val, const std::string& name)
: BasicSetting<Type>(default_val, name) {}
~Setting() = default;
virtual ~Setting() = default;
/**
* Tells this setting to represent either the global or custom setting when other member
@@ -237,13 +237,13 @@ public:
*
* @returns The required value of the setting
*/
[[nodiscard]] const Type& GetValue() const override {
[[nodiscard]] virtual const Type& GetValue() const override {
if (use_global) {
return this->global;
}
return custom;
}
[[nodiscard]] const Type& GetValue(bool need_global) const {
[[nodiscard]] virtual const Type& GetValue(bool need_global) const {
if (use_global || need_global) {
return this->global;
}
@@ -286,7 +286,7 @@ public:
*
* @returns A reference to the current setting value
*/
explicit operator const Type&() const override {
virtual explicit operator const Type&() const override {
if (use_global) {
return this->global;
}
@@ -318,7 +318,22 @@ public:
: BasicSetting<Type>{default_val, name},
BasicRangedSetting<Type>{default_val, min_val, max_val, name}, Setting<Type>{default_val,
name} {}
~RangedSetting() = default;
virtual ~RangedSetting() = default;
// The following are needed to avoid a MSVC bug
// (source: https://stackoverflow.com/questions/469508)
[[nodiscard]] const Type& GetValue() const override {
return Setting<Type>::GetValue();
}
[[nodiscard]] const Type& GetValue(bool need_global) const override {
return Setting<Type>::GetValue(need_global);
}
explicit operator const Type&() const override {
if (this->use_global) {
return this->global;
}
return this->custom;
}
/**
* Like BasicSetting's SetValue, except value is clamped to the range of the setting. Sets the

View File

@@ -46,15 +46,13 @@ public:
ElementPtr* new_ptr = new ElementPtr();
write_ptr->next.store(new_ptr, std::memory_order_release);
write_ptr = new_ptr;
++size;
const size_t previous_size{size++};
// Acquire the mutex and then immediately release it as a fence.
// cv_mutex must be held or else there will be a missed wakeup if the other thread is in the
// line before cv.wait
// TODO(bunnei): This can be replaced with C++20 waitable atomics when properly supported.
// See discussion on https://github.com/yuzu-emu/yuzu/pull/3173 for details.
if (previous_size == 0) {
std::lock_guard lock{cv_mutex};
}
std::lock_guard lock{cv_mutex};
cv.notify_one();
}

View File

@@ -6,7 +6,7 @@
#include <bitset>
#include <initializer_list>
#include <xbyak.h>
#include <xbyak/xbyak.h>
#include "common/assert.h"
namespace Common::X64 {

View File

@@ -5,7 +5,7 @@
#pragma once
#include <type_traits>
#include <xbyak.h>
#include <xbyak/xbyak.h>
#include "common/x64/xbyak_abi.h"
namespace Common::X64 {

View File

@@ -889,6 +889,9 @@ SDLState::SDLState() {
RegisterFactory<VibrationDevice>("sdl", vibration_factory);
RegisterFactory<MotionDevice>("sdl", motion_factory);
// Disable raw input. When enabled this setting causes SDL to die when a web applet opens
SDL_SetHint(SDL_HINT_JOYSTICK_RAWINPUT, "0");
// Enable HIDAPI rumble. This prevents SDL from disabling motion on PS4 and PS5 controllers
SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI_PS4_RUMBLE, "1");
SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI_PS5_RUMBLE, "1");

View File

@@ -96,12 +96,11 @@ void Vic::Execute() {
if (!converted_frame_buffer) {
converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
}
const int converted_stride{frame->width * 4};
const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
&converted_frame_buf_addr, &converted_stride);
&converted_frame_buf_addr, converted_stride.data());
const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
if (blk_kind != 0) {

View File

@@ -6,7 +6,7 @@
#include <array>
#include <bitset>
#include <xbyak.h>
#include <xbyak/xbyak.h>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/x64/xbyak_abi.h"

View File

@@ -84,34 +84,107 @@ template <bool TO_LINEAR>
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
switch (bytes_per_pixel) {
case 1:
return SwizzleImpl<TO_LINEAR, 1>(output, input, width, height, depth, block_height,
#define BPP_CASE(x) \
case x: \
return SwizzleImpl<TO_LINEAR, x>(output, input, width, height, depth, block_height, \
block_depth, stride_alignment);
case 2:
return SwizzleImpl<TO_LINEAR, 2>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 3:
return SwizzleImpl<TO_LINEAR, 3>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 4:
return SwizzleImpl<TO_LINEAR, 4>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 6:
return SwizzleImpl<TO_LINEAR, 6>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 8:
return SwizzleImpl<TO_LINEAR, 8>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 12:
return SwizzleImpl<TO_LINEAR, 12>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
case 16:
return SwizzleImpl<TO_LINEAR, 16>(output, input, width, height, depth, block_height,
block_depth, stride_alignment);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
template <u32 BYTES_PER_PIXEL>
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit,
u32 offset_x, u32 offset_y) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs =
(swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
for (u32 line = 0; line < subrect_height; ++line) {
const u32 dst_y = line + offset_y;
const u32 gob_address_y =
(dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 dst_x = x + offset_x;
const u32 gob_address =
gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height;
const u32 swizzled_offset = gob_address + table[(dst_x * BYTES_PER_PIXEL) % GOB_SIZE_X];
const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL;
const u8* const source_line = unswizzled_data + unswizzled_offset;
u8* const dest_addr = swizzled_data + swizzled_offset;
std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL);
}
}
}
template <u32 BYTES_PER_PIXEL>
void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height,
u32 origin_x, u32 origin_y, u8* output, const u8* input) {
const u32 stride = width * BYTES_PER_PIXEL;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
const u32 block_height_mask = (1U << block_height) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block_height;
for (u32 line = 0; line < line_count; ++line) {
const u32 src_y = line + origin_y;
const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
const u32 src_offset_y = (block_y >> block_height) * block_size +
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
for (u32 column = 0; column < line_length_in; ++column) {
const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL;
const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL;
std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL);
}
}
}
template <u32 BYTES_PER_PIXEL>
void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output,
const u8* input) {
UNIMPLEMENTED_IF(origin_x > 0);
UNIMPLEMENTED_IF(origin_y > 0);
const u32 stride = width * BYTES_PER_PIXEL;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
const u32 block_height_mask = (1U << block_height) - 1;
const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
for (u32 line = 0; line < line_count; ++line) {
const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
const u32 block_y = line / GOB_SIZE_Y;
const u32 dst_offset_y =
(block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
for (u32 x = 0; x < line_length_in; ++x) {
const u32 dst_offset =
((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X];
const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch;
std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL);
}
}
}
} // Anonymous namespace
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
@@ -131,81 +204,67 @@ void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_p
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
u32 block_height_bit, u32 offset_x, u32 offset_y) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs =
(swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
for (u32 line = 0; line < subrect_height; ++line) {
const u32 dst_y = line + offset_y;
const u32 gob_address_y =
(dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 dst_x = x + offset_x;
const u32 gob_address =
gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height;
const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X];
const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel;
const u8* const source_line = unswizzled_data + unswizzled_offset;
u8* const dest_addr = swizzled_data + swizzled_offset;
std::memcpy(dest_addr, source_line, bytes_per_pixel);
}
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \
swizzled_data, unswizzled_data, block_height_bit, offset_x, \
offset_y);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
const u32 stride = width * bytes_per_pixel;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
const u32 block_height_mask = (1U << block_height) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block_height;
for (u32 line = 0; line < line_count; ++line) {
const u32 src_y = line + origin_y;
const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
const u32 src_offset_y = (block_y >> block_height) * block_size +
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
for (u32 column = 0; column < line_length_in; ++column) {
const u32 src_x = (column + origin_x) * bytes_per_pixel;
const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
}
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \
origin_x, origin_y, output, input);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
u32 origin_y, u8* output, const u8* input) {
UNIMPLEMENTED_IF(origin_x > 0);
UNIMPLEMENTED_IF(origin_y > 0);
const u32 stride = width * bytes_per_pixel;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
const u32 block_height_mask = (1U << block_height) - 1;
const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
for (u32 line = 0; line < line_count; ++line) {
const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
const u32 block_y = line / GOB_SIZE_Y;
const u32 dst_offset_y =
(block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
for (u32 x = 0; x < line_length_in; ++x) {
const u32 dst_offset =
((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X];
const u32 src_offset = x * bytes_per_pixel + line * pitch;
std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel);
}
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \
block_height, block_depth, origin_x, origin_y, output, \
input);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
@@ -228,7 +287,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
u8* dest_addr = swizzle_data + swizzled_offset;
count++;
std::memcpy(dest_addr, source_line, 1);
*dest_addr = *source_line;
}
}
}

View File

@@ -1161,8 +1161,7 @@ void Device::CollectPhysicalMemoryInfo() {
}
void Device::CollectToolingInfo() {
if (!ext_tooling_info || true) {
// Disabled to work around https://github.com/yuzu-emu/yuzu/issues/6835
if (!ext_tooling_info) {
return;
}
const auto vkGetPhysicalDeviceToolPropertiesEXT =