Compare commits

..

39 Commits

Author SHA1 Message Date
z0z0z
637f8f9f1c Fix Linux 2019-05-11 15:36:01 -04:00
bunnei
7cb17834c7 Merge pull request #2437 from lioncash/audctl
service/audctl: Update documentation comments to be relative to 8.0.0
2019-05-09 13:24:13 -04:00
bunnei
f3317cf2db Merge pull request #2454 from lioncash/cflag
src/CMakeLists: Add /Zc:externConstexpr to the MSVC build flags
2019-05-09 13:23:49 -04:00
bunnei
daca045fcd Merge pull request #2442 from FernandoS27/astc-fix
Fix Layered ASTC Textures
2019-05-09 13:23:14 -04:00
bunnei
f69d3a6351 Merge pull request #2443 from ReinUsesLisp/skip-repeated-variants
gl_shader_disk_cache: Skip stored shader variants instead of asserting
2019-05-09 13:22:42 -04:00
bunnei
5907619a04 Merge pull request #2445 from FearlessTobi/port-4749
Port citra-emu/citra#4749: "web_service: Misc fixes"
2019-05-09 13:22:00 -04:00
bunnei
9567b3a293 Merge pull request #2458 from lioncash/hotkey
yuzu/hotkeys: Remove unnecessary constructor
2019-05-09 13:21:36 -04:00
bunnei
c6f3831320 Merge pull request #2456 from lioncash/qualifier
yuzu/compatdb: Remove unnecessary qualifiers
2019-05-09 13:21:04 -04:00
bunnei
8abf0add04 Merge pull request #2459 from lioncash/what
configure_dialog: Remove the Whats This? button from the dialog
2019-05-09 13:20:12 -04:00
bunnei
5b6571c170 Merge pull request #2453 from lioncash/enum
core/memory: Remove unused FlushMode enum
2019-05-09 13:19:49 -04:00
bunnei
c27b81cb85 Merge pull request #2429 from FernandoS27/compute
Corrections and Implementation on GPU Engines
2019-05-09 13:19:22 -04:00
bunnei
0e9a17b029 Merge pull request #2440 from lioncash/dynarmic
externals: Update dynarmic to master
2019-05-09 13:18:49 -04:00
Lioncash
f3c18d622e configure_dialog: Remove the Whats This? button from the dialog 2019-05-09 03:20:13 -04:00
Lioncash
8bdef4f951 yuzu/hotkeys: Remove unnecessary constructor
The behavior of the Hotkey constructor is already accomplished via in-class member
initializers, so the constructor is superfluous here.
2019-05-09 02:17:22 -04:00
Lioncash
a97120efc1 yuzu/compatdb: Remove unnecessary qualifiers
Keeps the code consistent in regards to how the buttons are referred to.
2019-05-09 01:08:06 -04:00
Lioncash
70c6506a7e src/CMakeLists: Add /Zc:externConstexpr to the MSVC build flags
The C++ standard allows constexpr variables declared with the extern
keyword to have external linkage. Previously MSVC wasn't abiding by
this. This just makes the compiler more standards compliant during
builds.

Given we currently don't make use of anything that would break by this,
this is safe to enable.
2019-05-07 14:06:22 -04:00
Lioncash
6ca7241bd9 src/CMakeLists: Vertically order compilation flags
Makes it much nicer to visually scan the options. This also starts the
flag descriptions from the same column for the same reason.
2019-05-07 14:05:48 -04:00
Lioncash
495a8d8d95 core/memory: Remove unused FlushMode enum
Recent changes to memory-related code resulted in this being unused, so
we can remove it.
2019-05-07 13:55:17 -04:00
Lioncash
0964444529 externals: Update dynarmic to master
Better instruction support has been added since the last update.
2019-05-07 10:39:25 -04:00
Merry
c63e68c480 Merge pull request #2451 from lioncash/travis
travis: Update to using Xcode 10.2
2019-05-07 15:36:13 +01:00
Lioncash
4aefd45193 travis: Update to using Xcode 10.2
Keeps the CI toolchain updated. This is also necessary for updating
dynarmic.
2019-05-07 06:40:30 -04:00
Rodrigo Locatti
6743982d28 Merge pull request #2447 from lioncash/dtor
core/frontend/emu_window: Make GraphicsContext's destructor virtual
2019-05-07 05:54:04 -03:00
Rodrigo Locatti
57db3f6763 Merge pull request #2448 from lioncash/pragma
common/zstd_compression: Remove #pragma once directive from source file
2019-05-07 05:51:37 -03:00
Rodrigo Locatti
a206418846 Merge pull request #2449 from lioncash/unused-var
gl_rasterizer: Silence unused variable warnings
2019-05-07 05:50:59 -03:00
zhupengfei
10c4f23953 core/telemetry_session: Only create the backend when we really need it
The backend is not used until we decide to submit the testcase/telemetry, and creating it early prevents users from updating the credentials properly while the games are running.
2019-05-04 19:45:48 +02:00
Lioncash
9e15193ef8 shader/decode/texture: Remove unused variable
This isn't used anywhere, so we can get rid of it.
2019-05-04 02:10:38 -04:00
Lioncash
08b270676b gl_rasterizer: Silence unused variable warning
Makes use of src, so it's not considered unused.
2019-05-04 02:00:17 -04:00
Lioncash
1230a0e7ce core/frontend/emu_window: Make GraphicsContext's destructor virtual
This class is used in a polymorphic context, so destruction of the
context will lead to undefined behavior if the destructor isn't virtual.
2019-05-04 01:47:38 -04:00
Fernando Sahmkow
e64c41efe8 Refactors and name corrections. 2019-05-01 15:31:39 -04:00
ReinUsesLisp
4aa081b4e7 gl_shader_disk_cache: Skip stored shader variants instead of asserting
Instead of asserting on already stored shader variants, silently skip them.
This shouldn't be happening but when a shader is invalidated and it is
not stored in the shader cache, this assert would hit and save that
shader anyways when the asserts are disabled.
2019-05-01 00:36:11 -03:00
Fernando Sahmkow
95261639fb Fix Layered ASTC Textures
By adding the missing layer offset in ASTC compression.
2019-04-30 23:02:31 -04:00
Lioncash
565fce71b1 service/audctl: Update documentation comments to be relative to 8.0.0
The state of these service calls are still the same in version 8.0.0.
2019-04-27 23:17:58 -04:00
Fernando Sahmkow
b3118ee316 Fixes and Corrections to DMA Engine 2019-04-23 15:28:18 -04:00
Fernando Sahmkow
f1e5314f1a Add Swizzle Parameters to the DMA engine 2019-04-23 11:21:00 -04:00
Fernando Sahmkow
e140e2ebc6 Add Documentation Headers to all the GPU Engines 2019-04-23 08:44:52 -04:00
Fernando Sahmkow
021d28c9b8 Corrections and styling 2019-04-23 08:02:24 -04:00
Fernando Sahmkow
701ce1c9d0 Implement Maxwell3D Data Upload 2019-04-22 19:27:36 -04:00
Fernando Sahmkow
e4ff140b99 Introduce skeleton of the GPU Compute Engine. 2019-04-22 19:05:43 -04:00
Fernando Sahmkow
a91d3fc639 Revamp Kepler Memory to use a subegine to manage uploads 2019-04-22 18:50:56 -04:00
31 changed files with 547 additions and 188 deletions

View File

@@ -24,7 +24,7 @@ matrix:
- os: osx
env: NAME="macos build"
sudo: false
osx_image: xcode10.1
osx_image: xcode10.2
install: "./.travis/macos/deps.sh"
script: "./.travis/macos/build.sh"
after_success: "./.travis/macos/upload.sh"

View File

@@ -7,6 +7,10 @@ include(DownloadExternals)
add_library(catch-single-include INTERFACE)
target_include_directories(catch-single-include INTERFACE catch/single_include)
# libfmt
add_subdirectory(fmt)
add_library(fmt::fmt ALIAS fmt)
# Dynarmic
if (ARCHITECTURE_x86_64)
set(DYNARMIC_TESTS OFF)
@@ -14,10 +18,6 @@ if (ARCHITECTURE_x86_64)
add_subdirectory(dynarmic)
endif()
# libfmt
add_subdirectory(fmt)
add_library(fmt::fmt ALIAS fmt)
# getopt
if (MSVC)
add_subdirectory(getopt)

View File

@@ -21,15 +21,27 @@ if (MSVC)
# Ensure that projects build with Unicode support.
add_definitions(-DUNICODE -D_UNICODE)
# /W3 - Level 3 warnings
# /MP - Multi-threaded compilation
# /Zi - Output debugging information
# /Zo - enhanced debug info for optimized builds
# /permissive- - enables stricter C++ standards conformance checks
# /EHsc - C++-only exception handling semantics
# /Zc:throwingNew - let codegen assume `operator new` will never return null
# /Zc:inline - let codegen omit inline functions in object files
add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
# /W3 - Level 3 warnings
# /MP - Multi-threaded compilation
# /Zi - Output debugging information
# /Zo - Enhanced debug info for optimized builds
# /permissive- - Enables stricter C++ standards conformance checks
# /EHsc - C++-only exception handling semantics
# /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates
# /Zc:inline - Let codegen omit inline functions in object files
# /Zc:throwingNew - Let codegen assume `operator new` (without std::nothrow) will never return null
add_compile_options(
/W3
/MP
/Zi
/Zo
/permissive-
/EHsc
/std:c++latest
/Zc:externConstexpr
/Zc:inline
/Zc:throwingNew
)
# /GS- - No stack buffer overflow checks
add_compile_options("$<$<CONFIG:Release>:/GS->")

View File

@@ -10,6 +10,8 @@
namespace Core::Frontend {
GraphicsContext::~GraphicsContext() = default;
class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>,
public std::enable_shared_from_this<TouchState> {
public:

View File

@@ -19,6 +19,8 @@ namespace Core::Frontend {
*/
class GraphicsContext {
public:
virtual ~GraphicsContext();
/// Makes the graphics context current for the caller thread
virtual void MakeCurrent() = 0;

View File

@@ -50,7 +50,7 @@ void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called.");
// This service function is currently hardcoded on the
// actual console to this value (as of 6.0.0).
// actual console to this value (as of 8.0.0).
constexpr s32 target_min_volume = 0;
IPC::ResponseBuilder rb{ctx, 3};
@@ -62,7 +62,7 @@ void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called.");
// This service function is currently hardcoded on the
// actual console to this value (as of 6.0.0).
// actual console to this value (as of 8.0.0).
constexpr s32 target_max_volume = 15;
IPC::ResponseBuilder rb{ctx, 3};

View File

@@ -72,15 +72,6 @@ u8* GetPointer(VAddr vaddr);
std::string ReadCString(VAddr vaddr, std::size_t max_length);
enum class FlushMode {
/// Write back modified surfaces to RAM
Flush,
/// Remove region from the cache
Invalidate,
/// Write back modified surfaces to RAM, and also remove them from the cache
FlushAndInvalidate,
};
/**
* Mark each page touching the region as cached.
*/

View File

@@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) {
}
TelemetrySession::TelemetrySession() {
#ifdef ENABLE_WEB_SERVICE
backend = std::make_unique<WebService::TelemetryJson>(
Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
#else
backend = std::make_unique<Telemetry::NullVisitor>();
#endif
// Log one-time top-level information
AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId());
@@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() {
.count()};
AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time);
#ifdef ENABLE_WEB_SERVICE
auto backend = std::make_unique<WebService::TelemetryJson>(
Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
#else
auto backend = std::make_unique<Telemetry::NullVisitor>();
#endif
// Complete the session, submitting to web service if necessary
// This is just a placeholder to wrap up the session once the core completes and this is
// destroyed. This will be moved elsewhere once we are actually doing real I/O with the service.
field_collection.Accept(*backend);
if (Settings::values.enable_telemetry)
backend->Complete();
@@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() {
bool TelemetrySession::SubmitTestcase() {
#ifdef ENABLE_WEB_SERVICE
auto backend = std::make_unique<WebService::TelemetryJson>(
Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
field_collection.Accept(*backend);
return backend->SubmitTestcase();
#else

View File

@@ -39,7 +39,6 @@ public:
private:
Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
};
/**

View File

@@ -3,6 +3,8 @@ add_library(video_core STATIC
dma_pusher.h
debug_utils/debug_utils.cpp
debug_utils/debug_utils.h
engines/engine_upload.cpp
engines/engine_upload.h
engines/fermi_2d.cpp
engines/fermi_2d.h
engines/kepler_compute.cpp

View File

@@ -0,0 +1,48 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines::Upload {
State::State(MemoryManager& memory_manager, Registers& regs)
: memory_manager(memory_manager), regs(regs) {}
void State::ProcessExec(const bool is_linear) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
inner_buffer.resize(copy_size);
this->is_linear = is_linear;
}
void State::ProcessData(const u32 data, const bool is_last_call) {
const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
write_offset += sub_copy_size;
if (!is_last_call) {
return;
}
const GPUVAddr address{regs.dest.Address()};
if (is_linear) {
memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
tmp_buffer.resize(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
tmp_buffer.data());
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
}
} // namespace Tegra::Engines::Upload

View File

@@ -0,0 +1,75 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
class MemoryManager;
}
namespace Tegra::Engines::Upload {
struct Registers {
u32 line_length_in;
u32 line_count;
struct {
u32 address_high;
u32 address_low;
u32 pitch;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 width;
u32 height;
u32 depth;
u32 z;
u32 x;
u32 y;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
u32 BlockWidth() const {
return 1U << block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
}
} dest;
};
class State {
public:
State(MemoryManager& memory_manager, Registers& regs);
~State() = default;
void ProcessExec(const bool is_linear);
void ProcessData(const u32 data, const bool is_last_call);
private:
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
std::vector<u8> tmp_buffer;
bool is_linear = false;
Registers& regs;
MemoryManager& memory_manager;
};
} // namespace Tegra::Engines::Upload

View File

@@ -21,6 +21,12 @@ class RasterizerInterface;
namespace Tegra::Engines {
/**
* This Engine is known as G80_2D. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
*/
#define FERMI2D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))

View File

@@ -4,12 +4,21 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
memory_manager,
regs.upload} {}
KeplerCompute::~KeplerCompute() = default;
@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
switch (method_call.method) {
case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
upload_state.ProcessExec(regs.exec_upload.linear != 0);
break;
}
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
break;
}
case KEPLER_COMPUTE_REG_INDEX(launch):
// Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
// kernels)
UNREACHABLE_MSG("Compute shaders are not implemented");
ProcessLaunch();
break;
default:
break;
}
}
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
}
} // namespace Tegra::Engines

View File

@@ -6,22 +6,40 @@
#include <array>
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
namespace Core {
class System;
}
namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
/**
* This Engine is known as GK104_Compute. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
*/
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
class KeplerCompute final {
public:
explicit KeplerCompute(MemoryManager& memory_manager);
explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
~KeplerCompute();
static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +49,181 @@ public:
union {
struct {
INSERT_PADDING_WORDS(0xAF);
INSERT_PADDING_WORDS(0x60);
Upload::Registers upload;
struct {
union {
BitField<0, 1, u32> linear;
};
} exec_upload;
u32 data_upload;
INSERT_PADDING_WORDS(0x3F);
struct {
u32 address;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
}
} launch_desc_loc;
INSERT_PADDING_WORDS(0x1);
u32 launch;
INSERT_PADDING_WORDS(0xC48);
INSERT_PADDING_WORDS(0x4A7);
struct {
u32 address_high;
u32 address_low;
u32 limit;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} tsc;
INSERT_PADDING_WORDS(0x3);
struct {
u32 address_high;
u32 address_low;
u32 limit;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} tic;
INSERT_PADDING_WORDS(0x22);
struct {
u32 address_high;
u32 address_low;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} code_loc;
INSERT_PADDING_WORDS(0x3FE);
u32 texture_const_buffer_index;
INSERT_PADDING_WORDS(0x374);
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
struct LaunchParams {
static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
INSERT_PADDING_WORDS(0x8);
u32 program_start;
INSERT_PADDING_WORDS(0x2);
BitField<30, 1, u32> linked_tsc;
BitField<0, 31, u32> grid_dim_x;
union {
BitField<0, 16, u32> grid_dim_y;
BitField<16, 16, u32> grid_dim_z;
};
INSERT_PADDING_WORDS(0x3);
BitField<0, 16, u32> shared_alloc;
BitField<0, 31, u32> block_dim_x;
union {
BitField<0, 16, u32> block_dim_y;
BitField<16, 16, u32> block_dim_z;
};
union {
BitField<0, 8, u32> const_buffer_enable_mask;
BitField<29, 2, u32> cache_layout;
} memory_config;
INSERT_PADDING_WORDS(0x8);
struct {
u32 address_low;
union {
BitField<0, 8, u32> address_high;
BitField<15, 17, u32> size;
};
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
address_low);
}
} const_buffer_config[8];
union {
BitField<0, 20, u32> local_pos_alloc;
BitField<27, 5, u32> barrier_alloc;
};
union {
BitField<0, 20, u32> local_neg_alloc;
BitField<24, 5, u32> gpr_alloc;
};
INSERT_PADDING_WORDS(0x11);
} launch_description;
struct {
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
} state{};
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
"KeplerCompute Regs has wrong size");
static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
"KeplerCompute LaunchParams has wrong size");
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
Upload::State upload_state;
void ProcessLaunch();
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(launch, 0xAF);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(code_loc, 0x582);
ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
#undef ASSERT_REG_POSITION

View File

@@ -14,9 +14,8 @@
namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
: system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
KeplerMemory::~KeplerMemory() = default;
@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
switch (method_call.method) {
case KEPLERMEMORY_REG_INDEX(exec): {
ProcessExec();
upload_state.ProcessExec(regs.exec.linear != 0);
break;
}
case KEPLERMEMORY_REG_INDEX(data): {
ProcessData(method_call.argument, method_call.IsLastCall());
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
break;
}
}
}
void KeplerMemory::ProcessExec() {
state.write_offset = 0;
state.copy_size = regs.line_length_in * regs.line_count;
state.inner_buffer.resize(state.copy_size);
}
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
state.write_offset += sub_copy_size;
if (is_last_call) {
const GPUVAddr address{regs.dest.Address()};
if (regs.exec.linear != 0) {
memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
std::vector<u8> tmp_buffer(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
state.inner_buffer.data(), tmp_buffer.data());
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
}
} // namespace Tegra::Engines

View File

@@ -10,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
namespace Core {
@@ -20,19 +21,20 @@ namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
/**
* This Engine is known as P2MF. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
*/
#define KEPLERMEMORY_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
class KeplerMemory final {
public:
KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
KeplerMemory(Core::System& system, MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
@@ -45,42 +47,7 @@ public:
struct {
INSERT_PADDING_WORDS(0x60);
u32 line_length_in;
u32 line_count;
struct {
u32 address_high;
u32 address_low;
u32 pitch;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 width;
u32 height;
u32 depth;
u32 z;
u32 x;
u32 y;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
u32 BlockWidth() const {
return 1U << block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
}
} dest;
Upload::Registers upload;
struct {
union {
@@ -96,28 +63,17 @@ public:
};
} regs{};
struct {
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
} state{};
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
void ProcessExec();
void ProcessData(u32 data, bool is_last_call);
Upload::State upload_state;
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(line_length_in, 0x60);
ASSERT_REG_POSITION(line_count, 0x61);
ASSERT_REG_POSITION(dest, 0x62);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec, 0x6C);
ASSERT_REG_POSITION(data, 0x6D);
#undef ASSERT_REG_POSITION

View File

@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
*this} {
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
InitializeRegisterDefaults();
}
@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessSyncPoint();
break;
}
case MAXWELL3D_REG_INDEX(exec_upload): {
upload_state.ProcessExec(regs.exec_upload.linear != 0);
break;
}
case MAXWELL3D_REG_INDEX(data_upload): {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
dirty_flags.OnMemoryWrite();
}
break;
}
default:
break;
}

View File

@@ -14,6 +14,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
#include "video_core/macro_interpreter.h"
#include "video_core/textures/texture.h"
@@ -32,6 +33,12 @@ class RasterizerInterface;
namespace Tegra::Engines {
/**
* This Engine is known as GF100_3D. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
*/
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
@@ -580,7 +587,18 @@ public:
u32 bind;
} macros;
INSERT_PADDING_WORDS(0x69);
INSERT_PADDING_WORDS(0x17);
Upload::Registers upload;
struct {
union {
BitField<0, 1, u32> linear;
};
} exec_upload;
u32 data_upload;
INSERT_PADDING_WORDS(0x44);
struct {
union {
@@ -1176,6 +1194,8 @@ private:
/// Interpreter for the macro codes uploaded to the GPU.
MacroInterpreter macro_interpreter;
Upload::State upload_state;
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;
@@ -1219,6 +1239,9 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(macros, 0x45);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);

View File

@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.exec.enable_2d == 1);
const std::size_t copy_size = regs.x_count * regs.y_count;
auto source_ptr{memory_manager.GetPointer(source)};
auto dst_ptr{memory_manager.GetPointer(dest)};
if (!source_ptr) {
LOG_ERROR(HW_GPU, "source_ptr is invalid");
return;
}
if (!dst_ptr) {
LOG_ERROR(HW_GPU, "dst_ptr is invalid");
return;
}
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
};
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
ASSERT(regs.src_params.size_z == 1);
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
const std::size_t src_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
copy_size * src_bytes_per_pixel);
const std::size_t dst_size = regs.dst_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.pos_y);
regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
write_buffer.data(), regs.src_params.BlockHeight(),
regs.src_params.pos_x, regs.src_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
} else {
ASSERT(regs.dst_params.size_z == 1);
ASSERT(regs.src_pitch == regs.x_count);
ASSERT(regs.dst_params.BlockDepth() == 1);
const u32 src_bpp = regs.src_pitch / regs.x_count;
const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
FlushAndInvalidate(regs.src_pitch * regs.y_count,
regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
const std::size_t dst_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t dst_layer_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t src_size = regs.src_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
src_bytes_per_pixel,
write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
read_buffer.data(), regs.dst_params.BlockHeight());
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
}

View File

@@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -25,6 +26,11 @@ class RasterizerInterface;
namespace Tegra::Engines {
/**
* This Engine is known as GK104_Copy. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
*/
class MaxwellDMA final {
public:
explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@@ -63,6 +69,16 @@ public:
static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
enum class ComponentMode : u32 {
Src0 = 0,
Src1 = 1,
Src2 = 2,
Src3 = 3,
Const0 = 4,
Const1 = 5,
Zero = 6,
};
enum class CopyMode : u32 {
None = 0,
Unk1 = 1,
@@ -128,7 +144,26 @@ public:
u32 x_count;
u32 y_count;
INSERT_PADDING_WORDS(0xBB);
INSERT_PADDING_WORDS(0xB8);
u32 const0;
u32 const1;
union {
BitField<0, 4, ComponentMode> component0;
BitField<4, 4, ComponentMode> component1;
BitField<8, 4, ComponentMode> component2;
BitField<12, 4, ComponentMode> component3;
BitField<16, 2, u32> component_size;
BitField<20, 3, u32> src_num_components;
BitField<24, 3, u32> dst_num_components;
u32 SrcBytePerPixel() const {
return src_num_components.Value() * component_size.Value();
}
u32 DstBytePerPixel() const {
return dst_num_components.Value() * component_size.Value();
}
} swizzle_config;
Parameters dst_params;
@@ -149,6 +184,9 @@ private:
MemoryManager& memory_manager;
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
ASSERT_REG_POSITION(dst_pitch, 0x105);
ASSERT_REG_POSITION(x_count, 0x106);
ASSERT_REG_POSITION(y_count, 0x107);
ASSERT_REG_POSITION(const0, 0x1C0);
ASSERT_REG_POSITION(const1, 0x1C1);
ASSERT_REG_POSITION(swizzle_config, 0x1C2);
ASSERT_REG_POSITION(dst_params, 0x1C3);
ASSERT_REG_POSITION(src_params, 0x1CA);

View File

@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
}
GPU::~GPU() = default;

View File

@@ -922,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
viewport.y = viewport_rect.bottom;
viewport.width = viewport_rect.GetWidth();
viewport.height = viewport_rect.GetHeight();
viewport.depth_range_far = regs.viewports[i].depth_range_far;
viewport.depth_range_near = regs.viewports[i].depth_range_near;
viewport.depth_range_far = src.depth_range_far;
viewport.depth_range_near = src.depth_range_near;
}
state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;

View File

@@ -475,7 +475,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
auto& usages{it->second};
ASSERT(usages.find(usage) == usages.end());
if (usages.find(usage) != usages.end()) {
// Skip this variant since the shader is already stored.
return;
}
usages.insert(usage);
FileUtil::IOFile file = AppendTransferableFile();

View File

@@ -541,7 +541,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
bool is_array, bool is_aoffi) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();

View File

@@ -1616,6 +1616,7 @@ namespace Tegra::Texture::ASTC {
std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
uint32_t depth, uint32_t block_width, uint32_t block_height) {
uint32_t blockIdx = 0;
std::size_t depth_offset = 0;
std::vector<uint8_t> outData(height * width * depth * 4);
for (uint32_t k = 0; k < depth; k++) {
for (uint32_t j = 0; j < height; j += block_height) {
@@ -1630,7 +1631,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
uint32_t decompWidth = std::min(block_width, width - i);
uint32_t decompHeight = std::min(block_height, height - j);
uint8_t* outRow = outData.data() + (j * width + i) * 4;
uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
for (uint32_t jj = 0; jj < decompHeight; jj++) {
memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
}
@@ -1638,6 +1639,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
blockIdx++;
}
}
depth_offset += height * width * 4;
}
return outData;

View File

@@ -58,7 +58,7 @@ void CompatDB::Submit() {
button(NextButton)->setEnabled(false);
button(NextButton)->setText(tr("Submitting"));
button(QWizard::CancelButton)->setVisible(false);
button(CancelButton)->setVisible(false);
testcase_watcher.setFuture(QtConcurrent::run(
[] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
@@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() {
tr("An error occured while sending the Testcase"));
button(NextButton)->setEnabled(true);
button(NextButton)->setText(tr("Next"));
button(QWizard::CancelButton)->setVisible(true);
button(CancelButton)->setVisible(true);
} else {
next();
// older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a
// workaround
button(QWizard::CancelButton)->setVisible(false);
button(CancelButton)->setVisible(false);
}
}

View File

@@ -17,8 +17,12 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
ui->hotkeysTab->Populate(registry);
this->setConfiguration();
this->PopulateSelectionList();
setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
&ConfigureDialog::UpdateVisibleTabs);
adjustSize();
ui->selectorList->setCurrentRow(0);

View File

@@ -67,8 +67,6 @@ public:
private:
struct Hotkey {
Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
QKeySequence keyseq;
QShortcut* shortcut = nullptr;
Qt::ShortcutContext context = Qt::WindowShortcut;

View File

@@ -2129,6 +2129,11 @@ int main(int argc, char* argv[]) {
// generating shaders
setlocale(LC_ALL, "C");
// Environment variables
#ifdef __linux__
setenv("MESA_GL_VERSION_OVERRIDE", "4.5COMPAT", 0);
#endif
GMainWindow main_window;
// After settings have been loaded by GMainWindow, apply the filter
main_window.show();