Compare commits

..

11 Commits

Author SHA1 Message Date
Vedarius TopBAE1 Vincent A. Russell
a9ff3a232b Create python-publish.yml 2022-11-26 18:25:50 -06:00
bunnei
eabe45346f Merge pull request #9318 from goldenx86/glsl-ftw
Replace GLSL as the default OpenGL shader backend
2022-11-26 15:57:37 -08:00
Matías Locatti
701ca96827 Oops 2022-11-26 17:39:43 -03:00
Matías Locatti
26211ac339 Replace GLSL as the default OpenGL shader backend
GLASM is not very compatible with the latest games, and too many people have the special superpower to break their Vulkan support.
2022-11-26 17:27:04 -03:00
liamwhite
3e53d8138c Merge pull request #9288 from vonchenplus/deferred_draw
video_core: Fine tune maxwell drawing trigger mechanism
2022-11-26 09:35:45 -05:00
liamwhite
ddca512f3f Merge pull request #9307 from Morph1984/not-used-correctly
maxwell_to_vk: Fix format usage bits and add R16_SINT
2022-11-26 09:08:55 -05:00
liamwhite
e16d1b85f1 Merge pull request #9297 from Kelebek1/sink_oob
[audio_core] Fix an OoB with sample sinking
2022-11-25 12:53:29 -05:00
Morph
852de7a771 maxwell_to_vk: Add R16_SINT
This was somehow missed when the format was added to GL
2022-11-23 21:30:58 -05:00
Morph
ca154d466a maxwell_to_vk: Fix format usage bits
- VK_FORMAT_B8G8R8A8_UNORM supports the STORAGE_IMAGE_BIT
- VK_FORMAT_R4G4B4A4_UNORM_PACK16 does not support the COLOR_ATTACHMENT_BIT
2022-11-23 21:29:43 -05:00
Kelebek1
84d4da89a5 Use the maximum input index for samples buffer span size, not just the input count 2022-11-22 15:32:11 +00:00
FengChen
1d57851fc7 video_core: Optimize maxwell drawing trigger mechanism 2022-11-22 17:53:26 +08:00
9 changed files with 127 additions and 85 deletions

39
.github/workflows/python-publish.yml vendored Normal file
View File

@@ -0,0 +1,39 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
name: Upload Python Package
on:
release:
types: [published]
permissions:
contents: read
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}

View File

@@ -460,21 +460,23 @@ void CommandBuffer::GenerateDeviceSinkCommand(const s32 node_id, const s16 buffe
cmd.session_id = session_id;
cmd.input_count = parameter.input_count;
s16 max_input{0};
for (u32 i = 0; i < parameter.input_count; i++) {
cmd.inputs[i] = buffer_offset + parameter.inputs[i];
max_input = std::max(max_input, cmd.inputs[i]);
}
if (state.upsampler_info != nullptr) {
const auto size_{state.upsampler_info->sample_count * parameter.input_count};
const auto size_bytes{size_ * sizeof(s32)};
const auto addr{memory_pool->Translate(state.upsampler_info->samples_pos, size_bytes)};
cmd.sample_buffer = {reinterpret_cast<s32*>(addr),
parameter.input_count * state.upsampler_info->sample_count};
(max_input + 1) * state.upsampler_info->sample_count};
} else {
cmd.sample_buffer = samples_buffer;
}
cmd.input_count = parameter.input_count;
for (u32 i = 0; i < parameter.input_count; i++) {
cmd.inputs[i] = buffer_offset + parameter.inputs[i];
}
GenerateEnd<DeviceSinkCommand>(cmd);
}

View File

@@ -266,20 +266,19 @@ void SinkStream::ProcessAudioOutAndRender(std::span<s16> output_buffer, std::siz
}
void SinkStream::Stall() {
std::scoped_lock lk{stall_guard};
if (stalled_lock) {
if (stalled) {
return;
}
stalled_lock = system.StallProcesses();
stalled = true;
system.StallProcesses();
}
void SinkStream::Unstall() {
std::scoped_lock lk{stall_guard};
if (!stalled_lock) {
if (!stalled) {
return;
}
system.UnstallProcesses();
stalled_lock.unlock();
stalled = false;
}
} // namespace AudioCore::Sink

View File

@@ -6,7 +6,6 @@
#include <array>
#include <atomic>
#include <memory>
#include <mutex>
#include <span>
#include <vector>
@@ -241,8 +240,8 @@ private:
f32 system_volume{1.0f};
/// Set via IAudioDevice service calls
f32 device_volume{1.0f};
std::mutex stall_guard;
std::unique_lock<std::mutex> stalled_lock;
/// True if coretiming has been stalled
bool stalled{false};
};
using SinkStreamPtr = std::unique_ptr<SinkStream>;

View File

@@ -442,7 +442,7 @@ struct Values {
SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"};
SwitchableSetting<bool> use_vsync{true, "use_vsync"};
SwitchableSetting<ShaderBackend, true> shader_backend{ShaderBackend::GLASM, ShaderBackend::GLSL,
SwitchableSetting<ShaderBackend, true> shader_backend{ShaderBackend::GLSL, ShaderBackend::GLSL,
ShaderBackend::SPIRV, "shader_backend"};
SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};

View File

@@ -189,7 +189,7 @@ struct System::Impl {
kernel.Suspend(false);
core_timing.SyncPause(false);
is_paused.store(false, std::memory_order_relaxed);
is_paused = false;
return status;
}
@@ -200,13 +200,14 @@ struct System::Impl {
core_timing.SyncPause(true);
kernel.Suspend(true);
is_paused.store(true, std::memory_order_relaxed);
is_paused = true;
return status;
}
bool IsPaused() const {
return is_paused.load(std::memory_order_relaxed);
std::unique_lock lk(suspend_guard);
return is_paused;
}
std::unique_lock<std::mutex> StallProcesses() {
@@ -217,7 +218,7 @@ struct System::Impl {
}
void UnstallProcesses() {
if (!IsPaused()) {
if (!is_paused) {
core_timing.SyncPause(false);
kernel.Suspend(false);
}
@@ -464,7 +465,7 @@ struct System::Impl {
}
mutable std::mutex suspend_guard;
std::atomic_bool is_paused{};
bool is_paused{};
std::atomic<bool> is_shutting_down{};
Timing::CoreTiming core_timing;

View File

@@ -126,6 +126,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true;
draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true;
draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true;
draw_command[MAXWELL3D_REG_INDEX(draw.instance_id)] = true;
}
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
@@ -285,31 +286,58 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
const u32 argument = ProcessShadowRam(method, method_argument);
ProcessDirtyRegisters(method, argument);
if (draw_command[method]) {
regs.reg_array[method] = method_argument;
deferred_draw_method.push_back(method);
auto u32_to_u8 = [&](const u32 argument) {
inline_index_draw_indexes.push_back(static_cast<u8>(argument & 0x000000ff));
inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x0000ff00) >> 8));
inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x00ff0000) >> 16));
inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0xff000000) >> 24));
auto update_inline_index = [&](const u32 index) {
inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x00ff0000) >> 16));
inline_index_draw_indexes.push_back(static_cast<u8>((index & 0xff000000) >> 24));
draw_mode = DrawMode::InlineIndex;
};
if (MAXWELL3D_REG_INDEX(draw_inline_index) == method) {
u32_to_u8(method_argument);
} else if (MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method) {
u32_to_u8(regs.inline_index_2x16.even);
u32_to_u8(regs.inline_index_2x16.odd);
} else if (MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) {
u32_to_u8(regs.inline_index_4x8.index0);
u32_to_u8(regs.inline_index_4x8.index1);
u32_to_u8(regs.inline_index_4x8.index2);
u32_to_u8(regs.inline_index_4x8.index3);
switch (method) {
case MAXWELL3D_REG_INDEX(draw.end):
switch (draw_mode) {
case DrawMode::General:
ProcessDraw(1);
break;
case DrawMode::InlineIndex:
regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4);
regs.index_buffer.format = Regs::IndexFormat::UnsignedInt;
ProcessDraw(1);
inline_index_draw_indexes.clear();
break;
case DrawMode::Instance:
break;
}
break;
case MAXWELL3D_REG_INDEX(draw_inline_index):
update_inline_index(method_argument);
break;
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
update_inline_index(regs.inline_index_2x16.even);
update_inline_index(regs.inline_index_2x16.odd);
break;
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
update_inline_index(regs.inline_index_4x8.index0);
update_inline_index(regs.inline_index_4x8.index1);
update_inline_index(regs.inline_index_4x8.index2);
update_inline_index(regs.inline_index_4x8.index3);
break;
case MAXWELL3D_REG_INDEX(draw.instance_id):
draw_mode =
(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) ||
(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged)
? DrawMode::Instance
: DrawMode::General;
break;
}
} else {
ProcessDeferredDraw();
const u32 argument = ProcessShadowRam(method, method_argument);
ProcessDirtyRegisters(method, argument);
ProcessMethodCall(method, argument, method_argument, is_last_call);
}
}
@@ -620,57 +648,27 @@ void Maxwell3D::ProcessDraw(u32 instance_count) {
}
void Maxwell3D::ProcessDeferredDraw() {
if (deferred_draw_method.empty()) {
if (draw_mode != DrawMode::Instance || deferred_draw_method.empty()) {
return;
}
enum class DrawMode {
Undefined,
General,
Instance,
};
DrawMode draw_mode{DrawMode::Undefined};
u32 method_count = static_cast<u32>(deferred_draw_method.size());
u32 method = deferred_draw_method[method_count - 1];
if (MAXWELL3D_REG_INDEX(draw.end) != method) {
return;
}
draw_mode = (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) ||
(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged)
? DrawMode::Instance
: DrawMode::General;
u32 instance_count = 0;
if (draw_mode == DrawMode::Instance) {
u32 vertex_buffer_count = 0;
u32 index_buffer_count = 0;
for (u32 index = 0; index < method_count; ++index) {
method = deferred_draw_method[index];
if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) {
instance_count = ++vertex_buffer_count;
} else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) {
instance_count = ++index_buffer_count;
}
}
ASSERT_MSG(!(vertex_buffer_count && index_buffer_count),
"Instance both indexed and direct?");
} else {
instance_count = 1;
for (u32 index = 0; index < method_count; ++index) {
method = deferred_draw_method[index];
if (MAXWELL3D_REG_INDEX(draw_inline_index) == method ||
MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method ||
MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) {
regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4);
regs.index_buffer.format = Regs::IndexFormat::UnsignedInt;
break;
}
u32 instance_count = 1;
u32 vertex_buffer_count = 0;
u32 index_buffer_count = 0;
for (u32 index = 0; index < method_count; ++index) {
u32 method = deferred_draw_method[index];
if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) {
instance_count = ++vertex_buffer_count;
} else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) {
instance_count = ++index_buffer_count;
}
}
ASSERT_MSG(!(vertex_buffer_count && index_buffer_count), "Instance both indexed and direct?");
ProcessDraw(instance_count);
deferred_draw_method.clear();
inline_index_draw_indexes.clear();
}
} // namespace Tegra::Engines

View File

@@ -3148,10 +3148,12 @@ private:
/// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
void ProcessTopologyOverride();
void ProcessDraw(u32 instance_count = 1);
/// Handles deferred draw(e.g., instance draw).
void ProcessDeferredDraw();
/// Handles a draw.
void ProcessDraw(u32 instance_count = 1);
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
@@ -3178,6 +3180,8 @@ private:
std::array<bool, Regs::NUM_REGS> draw_command{};
std::vector<u32> deferred_draw_method;
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
DrawMode draw_mode{DrawMode::General};
};
#define ASSERT_REG_POSITION(field_name, position) \

View File

@@ -150,7 +150,7 @@ struct FormatTuple {
{VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UFLOAT
{VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SFLOAT
{VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4_UNORM
{VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // B8G8R8A8_UNORM
{VK_FORMAT_B8G8R8A8_UNORM, Attachable | Storage}, // B8G8R8A8_UNORM
{VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // R32G32B32A32_FLOAT
{VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage}, // R32G32B32A32_SINT
{VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // R32G32_FLOAT
@@ -160,7 +160,7 @@ struct FormatTuple {
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
{VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM
{VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
{VK_FORMAT_UNDEFINED}, // R16_SINT
{VK_FORMAT_R16_SINT, Attachable | Storage}, // R16_SINT
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
{VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT
@@ -184,7 +184,7 @@ struct FormatTuple {
{VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
{VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB
{VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB
{VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM
{VK_FORMAT_R4G4B4A4_UNORM_PACK16}, // A4B4G4R4_UNORM
{VK_FORMAT_R4G4_UNORM_PACK8}, // G4R4_UNORM
{VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
{VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB