Compare commits

...

26 Commits

Author SHA1 Message Date
Subv
9f6a5660e8 GPU: Allow using the old NV04 values for the depth test function.
These seem to be just a valid as the GL token values. Thanks @ReinUsesLisp

This restores graphical output to Disgaea 5
2018-07-05 13:01:31 -05:00
bunnei
637f9d780a Merge pull request #624 from Subv/f2f_round
GPU: Implemented the F2F 'round' rounding mode.
2018-07-05 11:30:29 -04:00
bunnei
956b5db52e Merge pull request #623 from Subv/vertex_types
GPU: Implement the Size_16_16 and Size_10_10_10_2 vertex attribute types
2018-07-05 11:30:01 -04:00
bunnei
8b815877a6 Merge pull request #622 from Subv/unused_tex
GPU: Ignore unused textures and corrected the TEX shader instruction decoding.
2018-07-05 11:29:17 -04:00
bunnei
1b0a74e23f Merge pull request #621 from Subv/psetp_
GPU: Implemented the PSETP shader instruction.
2018-07-05 11:28:50 -04:00
bunnei
9a3c0b161e Merge pull request #620 from Subv/depth_z32f
GPU: Implemented the 32 bit float depth buffer format.
2018-07-05 11:09:15 -04:00
Subv
d800a02b4b GPU: Implemented the F2F 'round' rounding mode.
It's implemented via the GLSL 'roundEven()' function.
2018-07-04 15:43:21 -05:00
Subv
ce39ae3e57 GPU: Implement the Size_16_16 and Size_10_10_10_2 vertex attribute types.
Both signed and unsigned variants.
2018-07-04 15:22:34 -05:00
Subv
4bda9693be GPU: Ignore textures that the GLSL compiler deemed unused when binding textures to the shaders. 2018-07-04 15:20:12 -05:00
Subv
c42b818cf9 GPU: Corrected the decoding for the TEX shader instruction. 2018-07-04 15:19:20 -05:00
Subv
53a55bd751 GPU: Implemented the PSETP shader instruction.
It's similar to the isetp and fsetp instructions but it works on predicates instead.
2018-07-04 15:15:03 -05:00
bunnei
2355460d7c Merge pull request #619 from Subv/flip_cull
GPU: Flip the triangle front face winding if the GPU is configured to not flip the triangles.
2018-07-04 12:13:38 -04:00
Subv
016e357c75 GPU: Implemented the 32 bit float depth buffer format. 2018-07-04 10:42:33 -05:00
Subv
c1bebdef5e GPU: Flip the triangle front face winding if the GPU is configured to not flip the triangles.
OpenGL's default behavior is already correct when the GPU is configured to flip the triangles.

This fixes 1-2 Switch's splash screen.
2018-07-04 10:26:46 -05:00
bunnei
81a44d38ee Merge pull request #618 from Subv/clear_used_buffers
GPU: Only configure the used framebuffers during clear.
2018-07-04 00:12:46 -04:00
Subv
5a9df3c675 GPU: Only configure the used framebuffers during clear.
Don't try to configure the color buffer if it is not being cleared, it may not be completely valid at this point.
2018-07-03 22:32:59 -05:00
bunnei
c996787d84 Merge pull request #609 from Subv/clear_buffers
GPU: Implemented the CLEAR_BUFFERS register.
2018-07-03 19:34:34 -04:00
bunnei
4030f600dc Merge pull request #616 from bunnei/s8z24
gl_rasterizer_cache: Implement PixelFormat S8Z24.
2018-07-03 18:26:31 -04:00
Subv
78443a7f29 GPU: Factor out the framebuffer configuration code for both Clear and Draw commands. 2018-07-03 16:56:47 -05:00
Subv
c1811ed3d1 GPU: Support clears that don't clear the color buffer. 2018-07-03 16:56:47 -05:00
Subv
be51120d23 GPU: Bind and clear the render target when the CLEAR_BUFFERS register is written to. 2018-07-03 16:56:44 -05:00
Subv
827bb08c91 GPU: Added registers for the CLEAR_BUFFERS and CLEAR_COLOR methods. 2018-07-03 16:56:31 -05:00
bunnei
c164f02c48 Merge pull request #613 from jroweboy/qt-style
Add qt windowsvistastyle dll to the build
2018-07-03 17:48:29 -04:00
bunnei
9da1552417 gl_rasterizer_cache: Implement PixelFormat S8Z24. 2018-07-03 14:58:13 -04:00
James Rowe
6ff20dc6a7 Add qt windowsvistastyle dll to the build 2018-07-03 14:44:13 -04:00
David
3dab0e284b Update AudioRenderer Voice Sections (#614)
* voice section updating

* fixed slight offset miscalculation

* fixed overflow
2018-07-03 13:09:10 -04:00
14 changed files with 455 additions and 53 deletions

View File

@@ -3,7 +3,9 @@ function(copy_yuzu_Qt5_deps target_dir)
set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$<CONFIG>/")
set(Qt5_DLL_DIR "${Qt5_DIR}/../../../bin")
set(Qt5_PLATFORMS_DIR "${Qt5_DIR}/../../../plugins/platforms/")
set(Qt5_STYLES_DIR "${Qt5_DIR}/../../../plugins/styles/")
set(PLATFORMS ${DLL_DEST}platforms/)
set(STYLES ${DLL_DEST}styles/)
windows_copy_files(${target_dir} ${Qt5_DLL_DIR} ${DLL_DEST}
icudt*.dll
icuin*.dll
@@ -14,4 +16,5 @@ function(copy_yuzu_Qt5_deps target_dir)
Qt5Widgets$<$<CONFIG:Debug>:d>.*
)
windows_copy_files(yuzu ${Qt5_PLATFORMS_DIR} ${PLATFORMS} qwindows$<$<CONFIG:Debug>:d>.*)
windows_copy_files(yuzu ${Qt5_STYLES_DIR} ${STYLES} qwindowsvistastyle$<$<CONFIG:Debug>:d>.*)
endfunction(copy_yuzu_Qt5_deps)

View File

@@ -116,6 +116,7 @@ after_build:
mkdir $RELEASE_DIST
mkdir $RELEASE_DIST/platforms
mkdir $RELEASE_DIST/styles
# copy the compiled binaries and other release files to the release folder
Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
@@ -136,6 +137,9 @@ after_build:
# copy the qt windows plugin dll to platforms
Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms"
# copy the qt windows vista style dll to platforms
Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles"
7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\*
7z a $MINGW_SEVENZIP $RELEASE_DIST
}

View File

@@ -47,6 +47,7 @@ public:
// Start the audio event
CoreTiming::ScheduleEvent(audio_ticks, audio_event);
voice_status_list.reserve(worker_params.voice_count);
}
~IAudioRenderer() {
CoreTiming::UnscheduleEvent(audio_event, 0);
@@ -68,6 +69,12 @@ private:
buf.data() + sizeof(UpdateDataHeader) + config.behavior_size,
memory_pool_count * sizeof(MemoryPoolInfo));
std::vector<VoiceInfo> voice_info(worker_params.voice_count);
std::memcpy(voice_info.data(),
buf.data() + sizeof(UpdateDataHeader) + config.behavior_size +
config.memory_pools_size + config.voice_resource_size,
worker_params.voice_count * sizeof(VoiceInfo));
UpdateDataHeader response_data{worker_params};
ASSERT(ctx.GetWriteBufferSize() == response_data.total_size);
@@ -86,6 +93,23 @@ private:
std::memcpy(output.data() + sizeof(UpdateDataHeader), memory_pool.data(),
response_data.memory_pools_size);
for (unsigned i = 0; i < voice_info.size(); i++) {
if (voice_info[i].is_new) {
voice_status_list[i].played_sample_count = 0;
voice_status_list[i].wave_buffer_consumed = 0;
} else if (voice_info[i].play_state == (u8)PlayStates::Started) {
for (u32 buff_idx = 0; buff_idx < voice_info[i].wave_buffer_count; buff_idx++) {
voice_status_list[i].played_sample_count +=
(voice_info[i].wave_buffer[buff_idx].end_sample_offset -
voice_info[i].wave_buffer[buff_idx].start_sample_offset) /
2;
voice_status_list[i].wave_buffer_consumed++;
}
}
}
std::memcpy(output.data() + sizeof(UpdateDataHeader) + response_data.memory_pools_size,
voice_status_list.data(), response_data.voices_size);
ctx.WriteBuffer(output);
IPC::ResponseBuilder rb{ctx, 2};
@@ -130,6 +154,11 @@ private:
Released = 0x6,
};
enum class PlayStates : u8 {
Started = 0,
Stopped = 1,
};
struct MemoryPoolEntry {
MemoryPoolStates state;
u32_le unknown_4;
@@ -175,11 +204,69 @@ private:
};
static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size");
struct BiquadFilter {
u8 enable;
INSERT_PADDING_BYTES(1);
s16_le numerator[3];
s16_le denominator[2];
};
static_assert(sizeof(BiquadFilter) == 0xc, "BiquadFilter has wrong size");
struct WaveBuffer {
u64_le buffer_addr;
u64_le buffer_sz;
s32_le start_sample_offset;
s32_le end_sample_offset;
u8 loop;
u8 end_of_stream;
u8 sent_to_server;
INSERT_PADDING_BYTES(5);
u64 context_addr;
u64 context_sz;
INSERT_PADDING_BYTES(8);
};
static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer has wrong size");
struct VoiceInfo {
u32_le id;
u32_le node_id;
u8 is_new;
u8 is_in_use;
u8 play_state;
u8 sample_format;
u32_le sample_rate;
u32_le priority;
u32_le sorting_order;
u32_le channel_count;
float_le pitch;
float_le volume;
BiquadFilter biquad_filter[2];
u32_le wave_buffer_count;
u16_le wave_buffer_head;
INSERT_PADDING_BYTES(6);
u64_le additional_params_addr;
u64_le additional_params_sz;
u32_le mix_id;
u32_le splitter_info_id;
WaveBuffer wave_buffer[4];
u32_le voice_channel_resource_ids[6];
INSERT_PADDING_BYTES(24);
};
static_assert(sizeof(VoiceInfo) == 0x170, "VoiceInfo is wrong size");
struct VoiceOutStatus {
u64_le played_sample_count;
u32_le wave_buffer_consumed;
INSERT_PADDING_WORDS(1);
};
static_assert(sizeof(VoiceOutStatus) == 0x10, "VoiceOutStatus has wrong size");
/// This is used to trigger the audio event callback.
CoreTiming::EventType* audio_event;
Kernel::SharedPtr<Kernel::Event> system_event;
AudioRendererParameter worker_params;
std::vector<VoiceOutStatus> voice_status_list;
};
class IAudioDevice final : public ServiceFramework<IAudioDevice> {

View File

@@ -126,6 +126,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
DrawArrays();
break;
}
case MAXWELL3D_REG_INDEX(clear_buffers): {
ProcessClearBuffers();
break;
}
case MAXWELL3D_REG_INDEX(query.query_get): {
ProcessQueryGet();
break;
@@ -415,5 +419,13 @@ bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
UNREACHABLE();
}
void Maxwell3D::ProcessClearBuffers() {
ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
regs.clear_buffers.R == regs.clear_buffers.B &&
regs.clear_buffers.R == regs.clear_buffers.A);
VideoCore::g_renderer->Rasterizer()->Clear();
}
} // namespace Engines
} // namespace Tegra

View File

@@ -281,14 +281,26 @@ public:
};
enum class ComparisonOp : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterEqual = 6,
Always = 7,
// These values are used by Nouveau and most games, they correspond to the OpenGL token
// values for these operations.
Never = 0x200,
Less = 0x201,
Equal = 0x202,
LessEqual = 0x203,
Greater = 0x204,
NotEqual = 0x205,
GreaterEqual = 0x206,
Always = 0x207,
// These values are used by some games, they seem to be NV04 values.
NeverOld = 1,
LessOld = 2,
EqualOld = 3,
LessEqualOld = 4,
GreaterOld = 5,
NotEqualOld = 6,
GreaterEqualOld = 7,
AlwaysOld = 8,
};
struct Cull {
@@ -436,7 +448,12 @@ public:
u32 count;
} vertex_buffer;
INSERT_PADDING_WORDS(0x99);
INSERT_PADDING_WORDS(1);
float clear_color[4];
float clear_depth;
INSERT_PADDING_WORDS(0x93);
struct {
u32 address_high;
@@ -473,9 +490,11 @@ public:
u32 depth_write_enabled;
INSERT_PADDING_WORDS(0x8);
INSERT_PADDING_WORDS(0x7);
BitField<0, 3, ComparisonOp> depth_test_func;
u32 d3d_cull_mode;
ComparisonOp depth_test_func;
INSERT_PADDING_WORDS(0xB);
@@ -493,7 +512,13 @@ public:
u32 enable[NumRenderTargets];
} blend;
INSERT_PADDING_WORDS(0x2D);
INSERT_PADDING_WORDS(0xB);
union {
BitField<4, 1, u32> triangle_rast_flip;
} screen_y_control;
INSERT_PADDING_WORDS(0x21);
u32 vb_element_base;
@@ -523,7 +548,12 @@ public:
}
} tic;
INSERT_PADDING_WORDS(0x22);
INSERT_PADDING_WORDS(0x21);
union {
BitField<2, 1, u32> coord_origin;
BitField<3, 10, u32> enable;
} point_coord_replace;
struct {
u32 code_address_high;
@@ -584,7 +614,21 @@ public:
Cull cull;
INSERT_PADDING_WORDS(0x77);
INSERT_PADDING_WORDS(0x2B);
union {
u32 raw;
BitField<0, 1, u32> Z;
BitField<1, 1, u32> S;
BitField<2, 1, u32> R;
BitField<3, 1, u32> G;
BitField<4, 1, u32> B;
BitField<5, 1, u32> A;
BitField<6, 4, u32> RT;
BitField<10, 11, u32> layer;
} clear_buffers;
INSERT_PADDING_WORDS(0x4B);
struct {
u32 query_address_high;
@@ -766,6 +810,9 @@ private:
/// Handles writes to the macro uploading registers.
void ProcessMacroUpload(u32 data);
/// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers();
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
@@ -788,21 +835,27 @@ ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform[0], 0x280);
ASSERT_REG_POSITION(viewport, 0x300);
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(clear_color[0], 0x360);
ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
ASSERT_REG_POSITION(depth_test_func, 0x4C3);
ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(clear_buffers, 0x674);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(independent_blend, 0x780);

View File

@@ -328,6 +328,19 @@ union Instruction {
BitField<49, 3, PredCondition> cond;
} isetp;
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
BitField<12, 3, u64> pred12;
BitField<15, 1, u64> neg_pred12;
BitField<24, 2, PredOperation> cond;
BitField<29, 3, u64> pred29;
BitField<32, 1, u64> neg_pred29;
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred39;
BitField<45, 2, PredOperation> op;
} psetp;
union {
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
@@ -641,7 +654,7 @@ private:
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),

View File

@@ -19,6 +19,9 @@ public:
/// Draw the current batch of vertex arrays
virtual void DrawArrays() = 0;
/// Clear the current framebuffer
virtual void Clear() = 0;
/// Notify rasterizer that the specified Maxwell register has been changed
virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;

View File

@@ -297,11 +297,8 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
return true;
}
void RasterizerOpenGL::DrawArrays() {
if (accelerate_draw == AccelDraw::Disabled)
return;
MICROPROFILE_SCOPE(OpenGL_Drawing);
std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
bool using_depth_fb) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
// Sync the depth test state before configuring the framebuffer surfaces.
@@ -310,9 +307,6 @@ void RasterizerOpenGL::DrawArrays() {
// TODO(bunnei): Implement this
const bool has_stencil = false;
const bool using_color_fb = true;
const bool using_depth_fb = regs.zeta.Address() != 0;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
const bool write_color_fb =
@@ -344,11 +338,6 @@ void RasterizerOpenGL::DrawArrays() {
BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
SyncViewport(surfaces_rect);
SyncBlendState();
SyncCullMode();
// TODO(bunnei): Sync framebuffer_scale uniform here
// TODO(bunnei): Sync scissorbox uniform(s) here
// Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
// scissor test to prevent drawing outside of the framebuffer region
@@ -359,6 +348,66 @@ void RasterizerOpenGL::DrawArrays() {
state.scissor.height = draw_rect.GetHeight();
state.Apply();
// Only return the surface to be marked as dirty if writing to it is enabled.
return std::make_pair(write_color_fb ? color_surface : nullptr,
write_depth_fb ? depth_surface : nullptr);
}
void RasterizerOpenGL::Clear() {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
bool use_color_fb = false;
bool use_depth_fb = false;
GLbitfield clear_mask = 0;
if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B &&
regs.clear_buffers.A) {
clear_mask |= GL_COLOR_BUFFER_BIT;
use_color_fb = true;
}
if (regs.clear_buffers.Z) {
clear_mask |= GL_DEPTH_BUFFER_BIT;
use_depth_fb = true;
}
if (clear_mask == 0)
return;
auto [dirty_color_surface, dirty_depth_surface] =
ConfigureFramebuffers(use_color_fb, use_depth_fb);
// TODO(Subv): Support clearing only partial colors.
glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2],
regs.clear_color[3]);
glClearDepth(regs.clear_depth);
glClear(clear_mask);
// Mark framebuffer surfaces as dirty
if (dirty_color_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
}
}
void RasterizerOpenGL::DrawArrays() {
if (accelerate_draw == AccelDraw::Disabled)
return;
MICROPROFILE_SCOPE(OpenGL_Drawing);
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
auto [dirty_color_surface, dirty_depth_surface] =
ConfigureFramebuffers(true, regs.zeta.Address() != 0);
SyncBlendState();
SyncCullMode();
// TODO(bunnei): Sync framebuffer_scale uniform here
// TODO(bunnei): Sync scissorbox uniform(s) here
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
@@ -439,11 +488,11 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
// Mark framebuffer surfaces as dirty
if (color_surface != nullptr && write_color_fb) {
res_cache.MarkSurfaceAsDirty(color_surface);
if (dirty_color_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
}
if (depth_surface != nullptr && write_depth_fb) {
res_cache.MarkSurfaceAsDirty(depth_surface);
if (dirty_depth_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
}
}
@@ -637,7 +686,10 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
// Bind the uniform to the sampler.
GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
ASSERT(uniform != -1);
if (uniform == -1) {
continue;
}
glProgramUniform1i(program, uniform, current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
@@ -722,6 +774,16 @@ void RasterizerOpenGL::SyncCullMode() {
if (state.cull.enabled) {
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
// If the GPU is configured to flip the rasterized triangles, then we need to flip the
// notion of front and back. Note: We flip the triangles when the value of the register is 0
// because OpenGL already does it for us.
if (regs.screen_y_control.triangle_rast_flip == 0) {
if (state.cull.front_face == GL_CCW)
state.cull.front_face = GL_CW;
else if (state.cull.front_face == GL_CW)
state.cull.front_face = GL_CCW;
}
}
}

View File

@@ -7,6 +7,7 @@
#include <array>
#include <cstddef>
#include <memory>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -28,6 +29,7 @@ public:
~RasterizerOpenGL() override;
void DrawArrays() override;
void Clear() override;
void NotifyMaxwellRegisterChanged(u32 method) override;
void FlushAll() override;
void FlushRegion(Tegra::GPUVAddr addr, u64 size) override;
@@ -81,6 +83,10 @@ private:
u32 border_color_a;
};
/// Configures the color and depth framebuffer states and returns the dirty <Color, Depth>
/// surfaces if writing was enabled.
std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb);
/// Binds the framebuffer color and depth surface
void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
bool has_stencil);

View File

@@ -65,6 +65,25 @@ struct FormatTuple {
return params;
}
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format) {
SurfaceParams params{};
params.addr = zeta_address;
params.is_tiled = true;
params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
params.pixel_format = PixelFormatFromDepthFormat(format);
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
params.size_in_bytes = params.SizeInBytes();
params.width = config.width;
params.height = config.height;
params.unaligned_height = config.height;
params.size_in_bytes = params.SizeInBytes();
return params;
}
static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5
@@ -88,6 +107,9 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
// DepthStencil formats
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
false}, // Z24S8
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
false}, // S8Z24
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -131,13 +153,6 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
return {0, actual_height, width, 0};
}
static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) {
u32 block_width{};
u32 block_height{};
std::tie(block_width, block_height) = GetASTCBlockSize(format);
data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
}
template <bool morton_to_gl, PixelFormat format>
void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
@@ -177,6 +192,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4>, MortonCopy<true, PixelFormat::Z24S8>,
MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>,
};
static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -197,6 +213,8 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
nullptr,
MortonCopy<false, PixelFormat::ABGR8>,
MortonCopy<false, PixelFormat::Z24S8>,
MortonCopy<false, PixelFormat::S8Z24>,
MortonCopy<false, PixelFormat::Z32F>,
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -234,6 +252,71 @@ CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
rect.GetWidth(), rect.GetHeight());
}
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
union S8Z24 {
BitField<0, 24, u32> z24;
BitField<24, 8, u32> s8;
};
static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
union Z24S8 {
BitField<0, 8, u32> s8;
BitField<8, 24, u32> z24;
};
static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
S8Z24 input_pixel{};
Z24S8 output_pixel{};
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
const size_t offset{y * width + x};
std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
output_pixel.s8.Assign(input_pixel.s8);
output_pixel.z24.Assign(input_pixel.z24);
std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8));
}
}
}
/**
* Helper function to perform software conversion (as needed) when loading a buffer from Switch
* memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
* typical desktop GPUs.
*/
static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
u32 width, u32 height) {
switch (pixel_format) {
case PixelFormat::ASTC_2D_4X4: {
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
u32 block_width{};
u32 block_height{};
std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
break;
}
case PixelFormat::S8Z24:
// Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
ConvertS8Z24ToZ24S8(data, width, height);
break;
}
}
/**
* Helper function to perform software conversion (as needed) when flushing a buffer to Switch
* memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
* typical desktop GPUs.
*/
static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& /*data*/, PixelFormat pixel_format,
u32 /*width*/, u32 /*height*/) {
switch (pixel_format) {
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::S8Z24:
LOG_CRITICAL(Render_OpenGL, "Unimplemented pixel_format={}",
static_cast<u32>(pixel_format));
UNREACHABLE();
break;
}
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
void CachedSurface::LoadGLBuffer() {
ASSERT(params.type != SurfaceType::Fill);
@@ -256,10 +339,7 @@ void CachedSurface::LoadGLBuffer() {
params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
}
if (IsPixelFormatASTC(params.pixel_format)) {
// ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this
ConvertASTCToRGBA8(gl_buffer, params.pixel_format, params.width, params.height);
}
ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
@@ -272,6 +352,9 @@ void CachedSurface::FlushGLBuffer() {
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
params.height);
if (!params.is_tiled) {
std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
} else {
@@ -399,15 +482,16 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!");
// get color and depth surfaces
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(regs.rt[0])};
SurfaceParams depth_params{color_params};
SurfaceParams color_params{};
SurfaceParams depth_params{};
if (using_color_fb) {
color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]);
}
if (using_depth_fb) {
depth_params.addr = regs.zeta.Address();
depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(regs.zeta.format);
depth_params.component_type = SurfaceParams::ComponentTypeFromDepthFormat(regs.zeta.format);
depth_params.type = SurfaceParams::GetFormatType(depth_params.pixel_format);
depth_params.size_in_bytes = depth_params.SizeInBytes();
depth_params =
SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format);
}
MathUtil::Rectangle<u32> color_rect{};

View File

@@ -41,6 +41,8 @@ struct SurfaceParams {
// DepthStencil formats
Z24S8 = 13,
S8Z24 = 14,
Z32F = 15,
MaxDepthStencilFormat,
@@ -92,6 +94,8 @@ struct SurfaceParams {
4, // DXN1
4, // ASTC_2D_4X4
1, // Z24S8
1, // S8Z24
1, // Z32F
}};
ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -117,6 +121,8 @@ struct SurfaceParams {
64, // DXN1
32, // ASTC_2D_4X4
32, // Z24S8
32, // S8Z24
32, // Z32F
}};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -128,8 +134,12 @@ struct SurfaceParams {
static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
case Tegra::DepthFormat::S8_Z24_UNORM:
return PixelFormat::S8Z24;
case Tegra::DepthFormat::Z24_S8_UNORM:
return PixelFormat::Z24S8;
case Tegra::DepthFormat::Z32_FLOAT:
return PixelFormat::Z32F;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -226,8 +236,12 @@ struct SurfaceParams {
static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) {
switch (format) {
case PixelFormat::S8Z24:
return Tegra::DepthFormat::S8_Z24_UNORM;
case PixelFormat::Z24S8:
return Tegra::DepthFormat::Z24_S8_UNORM;
case PixelFormat::Z32F:
return Tegra::DepthFormat::Z32_FLOAT;
default:
UNREACHABLE();
}
@@ -274,8 +288,11 @@ struct SurfaceParams {
static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
case Tegra::DepthFormat::S8_Z24_UNORM:
case Tegra::DepthFormat::Z24_S8_UNORM:
return ComponentType::UNorm;
case Tegra::DepthFormat::Z32_FLOAT:
return ComponentType::Float;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -318,13 +335,18 @@ struct SurfaceParams {
return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
}
/// Creates SurfaceParams from a texture configation
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
/// Creates SurfaceParams from a framebuffer configation
/// Creates SurfaceParams from a framebuffer configuration
static SurfaceParams CreateForFramebuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config,
Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format);
Tegra::GPUVAddr addr;
bool is_tiled;
u32 block_height;

View File

@@ -1213,6 +1213,9 @@ private:
switch (instr.conversion.f2f.rounding) {
case Tegra::Shader::F2fRoundingOp::None:
break;
case Tegra::Shader::F2fRoundingOp::Round:
op_a = "roundEven(" + op_a + ')';
break;
case Tegra::Shader::F2fRoundingOp::Floor:
op_a = "floor(" + op_a + ')';
break;
@@ -1477,6 +1480,36 @@ private:
}
break;
}
case OpCode::Type::PredicateSetPredicate: {
std::string op_a =
GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
std::string op_b =
GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
using Tegra::Shader::Pred;
// We can't use the constant predicate as destination.
ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
std::string second_pred =
GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
std::string combiner = GetPredicateCombiner(instr.psetp.op);
std::string predicate =
'(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.psetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
// if enabled
SetPredicate(instr.psetp.pred0,
"!(" + predicate + ") " + combiner + " (" + second_pred + ')');
}
break;
}
case OpCode::Type::FloatSet: {
std::string op_a = instr.fset.neg_a ? "-" : "";
op_a += regs.GetRegisterAsFloat(instr.gpr8);

View File

@@ -29,6 +29,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
case Maxwell::VertexAttribute::Size::Size_16_16:
return GL_UNSIGNED_SHORT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
@@ -41,6 +45,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_BYTE;
case Maxwell::VertexAttribute::Size::Size_16_16:
return GL_SHORT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
@@ -203,20 +211,28 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
switch (comparison) {
case Maxwell::ComparisonOp::Never:
case Maxwell::ComparisonOp::NeverOld:
return GL_NEVER;
case Maxwell::ComparisonOp::Less:
case Maxwell::ComparisonOp::LessOld:
return GL_LESS;
case Maxwell::ComparisonOp::Equal:
case Maxwell::ComparisonOp::EqualOld:
return GL_EQUAL;
case Maxwell::ComparisonOp::LessEqual:
case Maxwell::ComparisonOp::LessEqualOld:
return GL_LEQUAL;
case Maxwell::ComparisonOp::Greater:
case Maxwell::ComparisonOp::GreaterOld:
return GL_GREATER;
case Maxwell::ComparisonOp::NotEqual:
case Maxwell::ComparisonOp::NotEqualOld:
return GL_NOTEQUAL;
case Maxwell::ComparisonOp::GreaterEqual:
case Maxwell::ComparisonOp::GreaterEqualOld:
return GL_GEQUAL;
case Maxwell::ComparisonOp::Always:
case Maxwell::ComparisonOp::AlwaysOld:
return GL_ALWAYS;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));

View File

@@ -76,7 +76,9 @@ u32 BytesPerPixel(TextureFormat format) {
static u32 DepthBytesPerPixel(DepthFormat format) {
switch (format) {
case DepthFormat::S8_Z24_UNORM:
case DepthFormat::Z24_S8_UNORM:
case DepthFormat::Z32_FLOAT:
return 4;
default:
UNIMPLEMENTED_MSG("Format not implemented");
@@ -129,7 +131,9 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
switch (format) {
case DepthFormat::S8_Z24_UNORM:
case DepthFormat::Z24_S8_UNORM:
case DepthFormat::Z32_FLOAT:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, block_height);
break;