Compare commits

..

2 Commits

Author SHA1 Message Date
Joshua Goins
f2c5af2de7 Allow vertical scrolling in input configuration
On screens that are of limited height (like the Steam Deck), certain
buttons of the input configuration are cut off and cannot be accessed.
Now a vertical scrollbar will show up when the minimum height is not
available.
2023-05-16 15:32:47 -04:00
Joshua Goins
e3df744955 Add a minimum size on ZL/ZR sliders for themes with have big handles
KDE Plasma's Breeze has big slider handle, which is cut off so let's add
a minimum size to these sliders.
2023-05-16 15:25:48 -04:00
59 changed files with 586 additions and 2184 deletions

View File

@@ -139,7 +139,3 @@ if (NOT TARGET LLVM::Demangle)
target_sources(demangle PRIVATE demangle/ItaniumDemangle.cpp)
add_library(LLVM::Demangle ALIAS demangle)
endif()
add_library(stb STATIC)
target_include_directories(stb PUBLIC ./stb)
target_sources(stb PRIVATE stb/stb_dxt.cpp)

View File

@@ -1,765 +0,0 @@
// SPDX-FileCopyrightText: fabian "ryg" giesen
// SPDX-License-Identifier: MIT
// stb_dxt.h - v1.12 - DXT1/DXT5 compressor
#include <stb_dxt.h>
#include <stdlib.h>
#include <string.h>
#if !defined(STBD_FABS)
#include <math.h>
#endif
#ifndef STBD_FABS
#define STBD_FABS(x) fabs(x)
#endif
static const unsigned char stb__OMatch5[256][2] = {
{0, 0}, {0, 0}, {0, 1}, {0, 1}, {1, 0}, {1, 0}, {1, 0}, {1, 1}, {1, 1},
{1, 1}, {1, 2}, {0, 4}, {2, 1}, {2, 1}, {2, 1}, {2, 2}, {2, 2}, {2, 2},
{2, 3}, {1, 5}, {3, 2}, {3, 2}, {4, 0}, {3, 3}, {3, 3}, {3, 3}, {3, 4},
{3, 4}, {3, 4}, {3, 5}, {4, 3}, {4, 3}, {5, 2}, {4, 4}, {4, 4}, {4, 5},
{4, 5}, {5, 4}, {5, 4}, {5, 4}, {6, 3}, {5, 5}, {5, 5}, {5, 6}, {4, 8},
{6, 5}, {6, 5}, {6, 5}, {6, 6}, {6, 6}, {6, 6}, {6, 7}, {5, 9}, {7, 6},
{7, 6}, {8, 4}, {7, 7}, {7, 7}, {7, 7}, {7, 8}, {7, 8}, {7, 8}, {7, 9},
{8, 7}, {8, 7}, {9, 6}, {8, 8}, {8, 8}, {8, 9}, {8, 9}, {9, 8}, {9, 8},
{9, 8}, {10, 7}, {9, 9}, {9, 9}, {9, 10}, {8, 12}, {10, 9}, {10, 9}, {10, 9},
{10, 10}, {10, 10}, {10, 10}, {10, 11}, {9, 13}, {11, 10}, {11, 10}, {12, 8}, {11, 11},
{11, 11}, {11, 11}, {11, 12}, {11, 12}, {11, 12}, {11, 13}, {12, 11}, {12, 11}, {13, 10},
{12, 12}, {12, 12}, {12, 13}, {12, 13}, {13, 12}, {13, 12}, {13, 12}, {14, 11}, {13, 13},
{13, 13}, {13, 14}, {12, 16}, {14, 13}, {14, 13}, {14, 13}, {14, 14}, {14, 14}, {14, 14},
{14, 15}, {13, 17}, {15, 14}, {15, 14}, {16, 12}, {15, 15}, {15, 15}, {15, 15}, {15, 16},
{15, 16}, {15, 16}, {15, 17}, {16, 15}, {16, 15}, {17, 14}, {16, 16}, {16, 16}, {16, 17},
{16, 17}, {17, 16}, {17, 16}, {17, 16}, {18, 15}, {17, 17}, {17, 17}, {17, 18}, {16, 20},
{18, 17}, {18, 17}, {18, 17}, {18, 18}, {18, 18}, {18, 18}, {18, 19}, {17, 21}, {19, 18},
{19, 18}, {20, 16}, {19, 19}, {19, 19}, {19, 19}, {19, 20}, {19, 20}, {19, 20}, {19, 21},
{20, 19}, {20, 19}, {21, 18}, {20, 20}, {20, 20}, {20, 21}, {20, 21}, {21, 20}, {21, 20},
{21, 20}, {22, 19}, {21, 21}, {21, 21}, {21, 22}, {20, 24}, {22, 21}, {22, 21}, {22, 21},
{22, 22}, {22, 22}, {22, 22}, {22, 23}, {21, 25}, {23, 22}, {23, 22}, {24, 20}, {23, 23},
{23, 23}, {23, 23}, {23, 24}, {23, 24}, {23, 24}, {23, 25}, {24, 23}, {24, 23}, {25, 22},
{24, 24}, {24, 24}, {24, 25}, {24, 25}, {25, 24}, {25, 24}, {25, 24}, {26, 23}, {25, 25},
{25, 25}, {25, 26}, {24, 28}, {26, 25}, {26, 25}, {26, 25}, {26, 26}, {26, 26}, {26, 26},
{26, 27}, {25, 29}, {27, 26}, {27, 26}, {28, 24}, {27, 27}, {27, 27}, {27, 27}, {27, 28},
{27, 28}, {27, 28}, {27, 29}, {28, 27}, {28, 27}, {29, 26}, {28, 28}, {28, 28}, {28, 29},
{28, 29}, {29, 28}, {29, 28}, {29, 28}, {30, 27}, {29, 29}, {29, 29}, {29, 30}, {29, 30},
{30, 29}, {30, 29}, {30, 29}, {30, 30}, {30, 30}, {30, 30}, {30, 31}, {30, 31}, {31, 30},
{31, 30}, {31, 30}, {31, 31}, {31, 31},
};
static const unsigned char stb__OMatch6[256][2] = {
{0, 0}, {0, 1}, {1, 0}, {1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 2},
{2, 3}, {3, 2}, {3, 3}, {3, 3}, {3, 4}, {4, 3}, {4, 4}, {4, 4}, {4, 5},
{5, 4}, {5, 5}, {5, 5}, {5, 6}, {6, 5}, {6, 6}, {6, 6}, {6, 7}, {7, 6},
{7, 7}, {7, 7}, {7, 8}, {8, 7}, {8, 8}, {8, 8}, {8, 9}, {9, 8}, {9, 9},
{9, 9}, {9, 10}, {10, 9}, {10, 10}, {10, 10}, {10, 11}, {11, 10}, {8, 16}, {11, 11},
{11, 12}, {12, 11}, {9, 17}, {12, 12}, {12, 13}, {13, 12}, {11, 16}, {13, 13}, {13, 14},
{14, 13}, {12, 17}, {14, 14}, {14, 15}, {15, 14}, {14, 16}, {15, 15}, {15, 16}, {16, 14},
{16, 15}, {17, 14}, {16, 16}, {16, 17}, {17, 16}, {18, 15}, {17, 17}, {17, 18}, {18, 17},
{20, 14}, {18, 18}, {18, 19}, {19, 18}, {21, 15}, {19, 19}, {19, 20}, {20, 19}, {20, 20},
{20, 20}, {20, 21}, {21, 20}, {21, 21}, {21, 21}, {21, 22}, {22, 21}, {22, 22}, {22, 22},
{22, 23}, {23, 22}, {23, 23}, {23, 23}, {23, 24}, {24, 23}, {24, 24}, {24, 24}, {24, 25},
{25, 24}, {25, 25}, {25, 25}, {25, 26}, {26, 25}, {26, 26}, {26, 26}, {26, 27}, {27, 26},
{24, 32}, {27, 27}, {27, 28}, {28, 27}, {25, 33}, {28, 28}, {28, 29}, {29, 28}, {27, 32},
{29, 29}, {29, 30}, {30, 29}, {28, 33}, {30, 30}, {30, 31}, {31, 30}, {30, 32}, {31, 31},
{31, 32}, {32, 30}, {32, 31}, {33, 30}, {32, 32}, {32, 33}, {33, 32}, {34, 31}, {33, 33},
{33, 34}, {34, 33}, {36, 30}, {34, 34}, {34, 35}, {35, 34}, {37, 31}, {35, 35}, {35, 36},
{36, 35}, {36, 36}, {36, 36}, {36, 37}, {37, 36}, {37, 37}, {37, 37}, {37, 38}, {38, 37},
{38, 38}, {38, 38}, {38, 39}, {39, 38}, {39, 39}, {39, 39}, {39, 40}, {40, 39}, {40, 40},
{40, 40}, {40, 41}, {41, 40}, {41, 41}, {41, 41}, {41, 42}, {42, 41}, {42, 42}, {42, 42},
{42, 43}, {43, 42}, {40, 48}, {43, 43}, {43, 44}, {44, 43}, {41, 49}, {44, 44}, {44, 45},
{45, 44}, {43, 48}, {45, 45}, {45, 46}, {46, 45}, {44, 49}, {46, 46}, {46, 47}, {47, 46},
{46, 48}, {47, 47}, {47, 48}, {48, 46}, {48, 47}, {49, 46}, {48, 48}, {48, 49}, {49, 48},
{50, 47}, {49, 49}, {49, 50}, {50, 49}, {52, 46}, {50, 50}, {50, 51}, {51, 50}, {53, 47},
{51, 51}, {51, 52}, {52, 51}, {52, 52}, {52, 52}, {52, 53}, {53, 52}, {53, 53}, {53, 53},
{53, 54}, {54, 53}, {54, 54}, {54, 54}, {54, 55}, {55, 54}, {55, 55}, {55, 55}, {55, 56},
{56, 55}, {56, 56}, {56, 56}, {56, 57}, {57, 56}, {57, 57}, {57, 57}, {57, 58}, {58, 57},
{58, 58}, {58, 58}, {58, 59}, {59, 58}, {59, 59}, {59, 59}, {59, 60}, {60, 59}, {60, 60},
{60, 60}, {60, 61}, {61, 60}, {61, 61}, {61, 61}, {61, 62}, {62, 61}, {62, 62}, {62, 62},
{62, 63}, {63, 62}, {63, 63}, {63, 63},
};
static int stb__Mul8Bit(int a, int b) {
int t = a * b + 128;
return (t + (t >> 8)) >> 8;
}
static void stb__From16Bit(unsigned char* out, unsigned short v) {
int rv = (v & 0xf800) >> 11;
int gv = (v & 0x07e0) >> 5;
int bv = (v & 0x001f) >> 0;
// expand to 8 bits via bit replication
out[0] = static_cast<unsigned char>((rv * 33) >> 2);
out[1] = static_cast<unsigned char>((gv * 65) >> 4);
out[2] = static_cast<unsigned char>((bv * 33) >> 2);
out[3] = 0;
}
static unsigned short stb__As16Bit(int r, int g, int b) {
return (unsigned short)((stb__Mul8Bit(r, 31) << 11) + (stb__Mul8Bit(g, 63) << 5) +
stb__Mul8Bit(b, 31));
}
// linear interpolation at 1/3 point between a and b, using desired rounding
// type
static int stb__Lerp13(int a, int b) {
#ifdef STB_DXT_USE_ROUNDING_BIAS
// with rounding bias
return a + stb__Mul8Bit(b - a, 0x55);
#else
// without rounding bias
// replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really
// need every ounce of speed.
return (2 * a + b) / 3;
#endif
}
// linear interpolation at 1/2 point between a and b
static int stb__Lerp12(int a, int b) {
return (a + b) / 2;
}
// lerp RGB color
static void stb__Lerp13RGB(unsigned char* out, unsigned char* p1, unsigned char* p2) {
out[0] = (unsigned char)stb__Lerp13(p1[0], p2[0]);
out[1] = (unsigned char)stb__Lerp13(p1[1], p2[1]);
out[2] = (unsigned char)stb__Lerp13(p1[2], p2[2]);
}
static void stb__Lerp12RGB(unsigned char* out, unsigned char* p1, unsigned char* p2) {
out[0] = (unsigned char)stb__Lerp12(p1[0], p2[0]);
out[1] = (unsigned char)stb__Lerp12(p1[1], p2[1]);
out[2] = (unsigned char)stb__Lerp12(p1[2], p2[2]);
}
/****************************************************************************/
static void stb__Eval4Colors(unsigned char* color, unsigned short c0, unsigned short c1) {
stb__From16Bit(color + 0, c0);
stb__From16Bit(color + 4, c1);
stb__Lerp13RGB(color + 8, color + 0, color + 4);
stb__Lerp13RGB(color + 12, color + 4, color + 0);
}
static void stb__Eval3Colors(unsigned char* color, unsigned short c0, unsigned short c1) {
stb__From16Bit(color + 0, c0);
stb__From16Bit(color + 4, c1);
stb__Lerp12RGB(color + 8, color + 0, color + 4);
}
// The color matching function
static unsigned int stb__MatchColorsBlock(unsigned char* block, unsigned char* color) {
unsigned int mask = 0;
int dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
int dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
int dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
int dots[16];
int stops[4];
int i;
int c0Point, halfPoint, c3Point;
for (i = 0; i < 16; i++)
dots[i] = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + block[i * 4 + 2] * dirb;
for (i = 0; i < 4; i++)
stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + color[i * 4 + 2] * dirb;
// think of the colors as arranged on a line; project point onto that line,
// then choose next color out of available ones. we compute the crossover
// points for "best color in top half"/"best in bottom half" and then the same
// inside that subinterval.
//
// relying on this 1d approximation isn't always optimal in terms of euclidean
// distance, but it's very close and a lot faster.
// http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
c0Point = (stops[1] + stops[3]);
halfPoint = (stops[3] + stops[2]);
c3Point = (stops[2] + stops[0]);
for (i = 15; i >= 0; i--) {
int dot = dots[i] * 2;
mask <<= 2;
if (dot < halfPoint)
mask |= (dot < c0Point) ? 1 : 3;
else
mask |= (dot < c3Point) ? 2 : 0;
}
return mask;
}
static unsigned int stb__MatchColorsAlphaBlock(unsigned char* block, unsigned char* color) {
unsigned int mask = 0;
int dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
int dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
int dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
int dots[16];
int stops[3];
int i;
int c0Point, c2Point;
for (i = 0; i < 16; i++)
dots[i] = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + block[i * 4 + 2] * dirb;
for (i = 0; i < 3; i++)
stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + color[i * 4 + 2] * dirb;
c0Point = (stops[1] + stops[2]);
c2Point = (stops[2] + stops[0]);
for (i = 15; i >= 0; i--) {
int dot = dots[i] * 2;
mask <<= 2;
if (block[i * 4 + 3] == 0)
mask |= 3;
else if (dot < c2Point)
mask |= (dot < c0Point) ? 0 : 2;
else
mask |= (dot < c0Point) ? 1 : 0;
}
return mask;
}
static void stb__ReorderColors(unsigned short* pmax16, unsigned short* pmin16) {
if (*pmin16 < *pmax16) {
unsigned short t = *pmin16;
*pmin16 = *pmax16;
*pmax16 = t;
}
}
static void stb__FinalizeColors(unsigned short* pmax16, unsigned short* pmin16,
unsigned int* pmask) {
if (*pmax16 < *pmin16) {
unsigned short t = *pmin16;
*pmin16 = *pmax16;
*pmax16 = t;
*pmask ^= 0x55555555;
}
}
// The color optimization function. (Clever code, part 1)
static void stb__OptimizeColorsBlock(unsigned char* block, unsigned short* pmax16,
unsigned short* pmin16) {
int mind, maxd;
unsigned char *minp, *maxp;
double magn;
int v_r, v_g, v_b;
static const int nIterPower = 4;
float covf[6], vfr, vfg, vfb;
// determine color distribution
int cov[6];
int mu[3], min[3], max[3];
int ch, i, iter;
for (ch = 0; ch < 3; ch++) {
const unsigned char* bp = ((const unsigned char*)block) + ch;
int muv, minv, maxv;
muv = minv = maxv = bp[0];
for (i = 4; i < 64; i += 4) {
muv += bp[i];
if (bp[i] < minv)
minv = bp[i];
else if (bp[i] > maxv)
maxv = bp[i];
}
mu[ch] = (muv + 8) >> 4;
min[ch] = minv;
max[ch] = maxv;
}
// determine covariance matrix
for (i = 0; i < 6; i++)
cov[i] = 0;
for (i = 0; i < 16; i++) {
int r = block[i * 4 + 0] - mu[0];
int g = block[i * 4 + 1] - mu[1];
int b = block[i * 4 + 2] - mu[2];
cov[0] += r * r;
cov[1] += r * g;
cov[2] += r * b;
cov[3] += g * g;
cov[4] += g * b;
cov[5] += b * b;
}
// convert covariance matrix to float, find principal axis via power iter
for (i = 0; i < 6; i++)
covf[i] = static_cast<float>(cov[i]) / 255.0f;
vfr = (float)(max[0] - min[0]);
vfg = (float)(max[1] - min[1]);
vfb = (float)(max[2] - min[2]);
for (iter = 0; iter < nIterPower; iter++) {
float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
vfr = r;
vfg = g;
vfb = b;
}
magn = STBD_FABS(vfr);
if (STBD_FABS(vfg) > magn)
magn = STBD_FABS(vfg);
if (STBD_FABS(vfb) > magn)
magn = STBD_FABS(vfb);
if (magn < 4.0f) { // too small, default to luminance
v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
v_g = 587;
v_b = 114;
} else {
magn = 512.0 / magn;
v_r = (int)(vfr * magn);
v_g = (int)(vfg * magn);
v_b = (int)(vfb * magn);
}
minp = maxp = block;
mind = maxd = block[0] * v_r + block[1] * v_g + block[2] * v_b;
// Pick colors at extreme points
for (i = 1; i < 16; i++) {
int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + block[i * 4 + 2] * v_b;
if (dot < mind) {
mind = dot;
minp = block + i * 4;
}
if (dot > maxd) {
maxd = dot;
maxp = block + i * 4;
}
}
*pmax16 = stb__As16Bit(maxp[0], maxp[1], maxp[2]);
*pmin16 = stb__As16Bit(minp[0], minp[1], minp[2]);
stb__ReorderColors(pmax16, pmin16);
}
static void stb__OptimizeColorsAlphaBlock(unsigned char* block, unsigned short* pmax16,
unsigned short* pmin16) {
int mind, maxd;
unsigned char *minp, *maxp;
double magn;
int v_r, v_g, v_b;
static const int nIterPower = 4;
float covf[6], vfr, vfg, vfb;
// determine color distribution
int cov[6];
int mu[3], min[3], max[3];
int ch, i, iter;
for (ch = 0; ch < 3; ch++) {
const unsigned char* bp = ((const unsigned char*)block) + ch;
int muv = 0, minv = 256, maxv = -1;
int num = 0;
for (i = 0; i < 64; i += 4) {
if (bp[3 - ch] == 0) {
continue;
}
muv += bp[i];
if (bp[i] < minv)
minv = bp[i];
else if (bp[i] > maxv)
maxv = bp[i];
num++;
}
mu[ch] = num > 0 ? (muv + 8) / num : 0;
min[ch] = minv;
max[ch] = maxv;
}
// determine covariance matrix
for (i = 0; i < 6; i++)
cov[i] = 0;
for (i = 0; i < 16; i++) {
if (block[i * 4 + 3] == 0) {
continue;
}
int r = block[i * 4 + 0] - mu[0];
int g = block[i * 4 + 1] - mu[1];
int b = block[i * 4 + 2] - mu[2];
cov[0] += r * r;
cov[1] += r * g;
cov[2] += r * b;
cov[3] += g * g;
cov[4] += g * b;
cov[5] += b * b;
}
// convert covariance matrix to float, find principal axis via power iter
for (i = 0; i < 6; i++)
covf[i] = static_cast<float>(cov[i]) / 255.0f;
vfr = (float)(max[0] - min[0]);
vfg = (float)(max[1] - min[1]);
vfb = (float)(max[2] - min[2]);
for (iter = 0; iter < nIterPower; iter++) {
float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
vfr = r;
vfg = g;
vfb = b;
}
magn = STBD_FABS(vfr);
if (STBD_FABS(vfg) > magn)
magn = STBD_FABS(vfg);
if (STBD_FABS(vfb) > magn)
magn = STBD_FABS(vfb);
if (magn < 4.0f) { // too small, default to luminance
v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
v_g = 587;
v_b = 114;
} else {
magn = 512.0 / magn;
v_r = (int)(vfr * magn);
v_g = (int)(vfg * magn);
v_b = (int)(vfb * magn);
}
minp = maxp = NULL;
mind = 0x7fffffff;
maxd = -0x80000000;
// Pick colors at extreme points
for (i = 0; i < 16; i++) {
if (block[i * 4 + 3] == 0) {
continue;
}
int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + block[i * 4 + 2] * v_b;
if (dot < mind) {
mind = dot;
minp = block + i * 4;
}
if (dot > maxd) {
maxd = dot;
maxp = block + i * 4;
}
}
if (!maxp) {
// all alpha, no color
*pmin16 = 0xffff;
*pmax16 = 0;
} else {
// endpoint colors found
*pmax16 = stb__As16Bit(maxp[0], maxp[1], maxp[2]);
*pmin16 = stb__As16Bit(minp[0], minp[1], minp[2]);
if (*pmax16 == *pmin16) {
// modify the endpoints to indicate presence of an alpha block
if (*pmax16 > 0) {
(*pmax16)--;
} else {
(*pmin16)++;
}
}
stb__ReorderColors(pmax16, pmin16);
}
}
static const float stb__midpoints5[32] = {
0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f,
0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f,
0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f,
0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f};
static const float stb__midpoints6[64] = {
0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f,
0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f,
0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f,
0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f,
0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f,
0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f,
0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f,
0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f};
static unsigned short stb__Quantize5(float x) {
unsigned short q;
x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate
q = (unsigned short)(x * 31);
q += (x > stb__midpoints5[q]);
return q;
}
static unsigned short stb__Quantize6(float x) {
unsigned short q;
x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate
q = (unsigned short)(x * 63);
q += (x > stb__midpoints6[q]);
return q;
}
// The refinement function. (Clever code, part 2)
// Tries to optimize colors to suit block contents better.
// (By solving a least squares system via normal equations+Cramer's rule)
static int stb__RefineBlock(unsigned char* block, unsigned short* pmax16, unsigned short* pmin16,
unsigned int mask) {
static const int w1Tab[4] = {3, 0, 2, 1};
static const int prods[4] = {0x090000, 0x000900, 0x040102, 0x010402};
// ^some magic to save a lot of multiplies in the accumulating loop...
// (precomputed products of weights for least squares system, accumulated
// inside one 32-bit register)
float f;
unsigned short oldMin, oldMax, min16, max16;
int i, akku = 0, xx, xy, yy;
int At1_r, At1_g, At1_b;
int At2_r, At2_g, At2_b;
unsigned int cm = mask;
oldMin = *pmin16;
oldMax = *pmax16;
if ((mask ^ (mask << 2)) < 4) // all pixels have the same index?
{
// yes, linear system would be singular; solve using optimal
// single-color match on average color
int r = 8, g = 8, b = 8;
for (i = 0; i < 16; ++i) {
r += block[i * 4 + 0];
g += block[i * 4 + 1];
b += block[i * 4 + 2];
}
r >>= 4;
g >>= 4;
b >>= 4;
max16 = static_cast<unsigned short>((stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) |
stb__OMatch5[b][0]);
min16 = static_cast<unsigned short>((stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) |
stb__OMatch5[b][1]);
} else {
At1_r = At1_g = At1_b = 0;
At2_r = At2_g = At2_b = 0;
for (i = 0; i < 16; ++i, cm >>= 2) {
int step = cm & 3;
int w1 = w1Tab[step];
int r = block[i * 4 + 0];
int g = block[i * 4 + 1];
int b = block[i * 4 + 2];
akku += prods[step];
At1_r += w1 * r;
At1_g += w1 * g;
At1_b += w1 * b;
At2_r += r;
At2_g += g;
At2_b += b;
}
At2_r = 3 * At2_r - At1_r;
At2_g = 3 * At2_g - At1_g;
At2_b = 3 * At2_b - At1_b;
// extract solutions and decide solvability
xx = akku >> 16;
yy = (akku >> 8) & 0xff;
xy = (akku >> 0) & 0xff;
f = 3.0f / 255.0f / static_cast<float>(xx * yy - xy * xy);
max16 = static_cast<unsigned short>(
stb__Quantize5(static_cast<float>(At1_r * yy - At2_r * xy) * f) << 11);
max16 |= static_cast<unsigned short>(
stb__Quantize6(static_cast<float>(At1_g * yy - At2_g * xy) * f) << 5);
max16 |= static_cast<unsigned short>(
stb__Quantize5(static_cast<float>(At1_b * yy - At2_b * xy) * f) << 0);
min16 = static_cast<unsigned short>(
stb__Quantize5(static_cast<float>(At2_r * xx - At1_r * xy) * f) << 11);
min16 |= static_cast<unsigned short>(
stb__Quantize6(static_cast<float>(At2_g * xx - At1_g * xy) * f) << 5);
min16 |= static_cast<unsigned short>(
stb__Quantize5(static_cast<float>(At2_b * xx - At1_b * xy) * f) << 0);
}
*pmin16 = min16;
*pmax16 = max16;
stb__ReorderColors(pmax16, pmin16);
return oldMin != min16 || oldMax != max16;
}
// Color block compression
static void stb__CompressColorBlock(unsigned char* dest, unsigned char* block, int alpha,
int mode) {
unsigned int mask;
int i;
int refinecount;
unsigned short max16, min16;
unsigned char color[4 * 4];
refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
// check if block is constant
for (i = 1; i < 16; i++)
if (((unsigned int*)block)[i] != ((unsigned int*)block)[0])
break;
if (i == 16 && block[3] == 0 && alpha) { // constant alpha
mask = 0xffffffff;
max16 = 0;
min16 = 0xffff;
} else if (i == 16) { // constant color
int r = block[0], g = block[1], b = block[2];
mask = 0xaaaaaaaa;
max16 = static_cast<unsigned short>((stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) |
stb__OMatch5[b][0]);
min16 = static_cast<unsigned short>((stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) |
stb__OMatch5[b][1]);
} else if (alpha) {
stb__OptimizeColorsAlphaBlock(block, &max16, &min16);
stb__Eval3Colors(color, max16, min16);
mask = stb__MatchColorsAlphaBlock(block, color);
} else {
// first step: PCA+map along principal axis
stb__OptimizeColorsBlock(block, &max16, &min16);
if (max16 != min16) {
stb__Eval4Colors(color, max16, min16);
mask = stb__MatchColorsBlock(block, color);
} else
mask = 0;
// third step: refine (multiple times if requested)
for (i = 0; i < refinecount; i++) {
unsigned int lastmask = mask;
if (stb__RefineBlock(block, &max16, &min16, mask)) {
if (max16 != min16) {
stb__Eval4Colors(color, max16, min16);
mask = stb__MatchColorsBlock(block, color);
} else {
mask = 0;
break;
}
}
if (mask == lastmask)
break;
}
}
// write the color block
if (!alpha)
stb__FinalizeColors(&max16, &min16, &mask);
dest[0] = (unsigned char)(max16);
dest[1] = (unsigned char)(max16 >> 8);
dest[2] = (unsigned char)(min16);
dest[3] = (unsigned char)(min16 >> 8);
dest[4] = (unsigned char)(mask);
dest[5] = (unsigned char)(mask >> 8);
dest[6] = (unsigned char)(mask >> 16);
dest[7] = (unsigned char)(mask >> 24);
}
// Alpha block compression (this is easy for a change)
static void stb__CompressAlphaBlock(unsigned char* dest, unsigned char* src, int stride) {
int i, dist, bias, dist4, dist2, bits, mask;
// find min/max color
int mn, mx;
mn = mx = src[0];
for (i = 1; i < 16; i++) {
if (src[i * stride] < mn)
mn = src[i * stride];
else if (src[i * stride] > mx)
mx = src[i * stride];
}
// encode them
dest[0] = (unsigned char)mx;
dest[1] = (unsigned char)mn;
dest += 2;
// determine bias and emit color indices
// given the choice of mx/mn, these indices are optimal:
// http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
dist = mx - mn;
dist4 = dist * 4;
dist2 = dist * 2;
bias = (dist < 8) ? (dist - 1) : (dist / 2 + 2);
bias -= mn * 7;
bits = 0, mask = 0;
for (i = 0; i < 16; i++) {
int a = src[i * stride] * 7 + bias;
int ind, t;
// select index. this is a "linear scale" lerp factor between 0 (val=min)
// and 7 (val=max).
t = (a >= dist4) ? -1 : 0;
ind = t & 4;
a -= dist4 & t;
t = (a >= dist2) ? -1 : 0;
ind += t & 2;
a -= dist2 & t;
ind += (a >= dist);
// turn linear scale into DXT index (0/1 are extremal pts)
ind = -ind & 7;
ind ^= (2 > ind);
// write index
mask |= ind << bits;
if ((bits += 3) >= 8) {
*dest++ = (unsigned char)mask;
mask >>= 8;
bits -= 8;
}
}
}
void stb_compress_bc1_block(unsigned char* dest, const unsigned char* src, int alpha, int mode) {
stb__CompressColorBlock(dest, (unsigned char*)src, alpha, mode);
}
void stb_compress_bc3_block(unsigned char* dest, const unsigned char* src, int mode) {
unsigned char data[16][4];
int i;
stb__CompressAlphaBlock(dest, (unsigned char*)src + 3, 4);
dest += 8;
// make a new copy of the data in which alpha is opaque,
// because code uses a fast test for color constancy
memcpy(data, src, 4 * 16);
for (i = 0; i < 16; ++i)
data[i][3] = 255;
src = &data[0][0];
stb__CompressColorBlock(dest, (unsigned char*)src, 0, mode);
}

View File

@@ -1,36 +0,0 @@
// SPDX-FileCopyrightText: fabian "ryg" giesen
// SPDX-License-Identifier: MIT
// stb_dxt.h - v1.12 - DXT1/DXT5 compressor
#ifndef STB_INCLUDE_STB_DXT_H
#define STB_INCLUDE_STB_DXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef STB_DXT_STATIC
#define STBDDEF static
#else
#define STBDDEF extern
#endif
// compression mode (bitflags)
#define STB_DXT_NORMAL 0
#define STB_DXT_DITHER 1 // use dithering. was always dubious, now deprecated. does nothing!
#define STB_DXT_HIGHQUAL \
2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
STBDDEF void stb_compress_bc1_block(unsigned char* dest,
const unsigned char* src_rgba_four_bytes_per_pixel, int alpha,
int mode);
STBDDEF void stb_compress_bc3_block(unsigned char* dest, const unsigned char* src, int mode);
#define STB_COMPRESS_DXT_BLOCK
#ifdef __cplusplus
}
#endif
#endif // STB_INCLUDE_STB_DXT_H

View File

@@ -154,11 +154,6 @@ void AudioRenderer::ThreadFunc() {
return;
case RenderMessage::AudioRenderer_Render: {
if (system.IsShuttingDown()) [[unlikely]] {
std::this_thread::sleep_for(std::chrono::milliseconds(5));
mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_RenderResponse);
continue;
}
std::array<bool, MaxRendererSessions> buffers_reset{};
std::array<u64, MaxRendererSessions> render_times_taken{};
const auto start_time{system.CoreTiming().GetClockTicks()};

View File

@@ -27,7 +27,7 @@ bool SystemManager::InitializeUnsafe() {
if (!active) {
if (adsp.Start()) {
active = true;
thread = std::jthread([this](std::stop_token stop_token) { ThreadFunc(stop_token); });
thread = std::jthread([this](std::stop_token stop_token) { ThreadFunc(); });
}
}
@@ -39,7 +39,8 @@ void SystemManager::Stop() {
return;
}
active = false;
thread.request_stop();
update.store(true);
update.notify_all();
thread.join();
adsp.Stop();
}
@@ -84,12 +85,12 @@ bool SystemManager::Remove(System& system_) {
return true;
}
void SystemManager::ThreadFunc(std::stop_token stop_token) {
void SystemManager::ThreadFunc() {
static constexpr char name[]{"AudioRenderSystemManager"};
MicroProfileOnThreadCreate(name);
Common::SetCurrentThreadName(name);
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
while (active && !stop_token.stop_requested()) {
while (active) {
{
std::scoped_lock l{mutex1};

View File

@@ -66,7 +66,13 @@ private:
/**
* Main thread responsible for command generation.
*/
void ThreadFunc(std::stop_token stop_token);
void ThreadFunc();
enum class StreamState {
Filling,
Steady,
Draining,
};
/// Core system
Core::System& core;
@@ -84,6 +90,8 @@ private:
ADSP::ADSP& adsp;
/// AudioRenderer mailbox for communication
ADSP::AudioRenderer_Mailbox* mailbox{};
/// Atomic for main thread to wait on
std::atomic<bool> update{};
};
} // namespace AudioCore::AudioRenderer

View File

@@ -271,8 +271,8 @@ u64 SinkStream::GetExpectedPlayedSampleCount() {
void SinkStream::WaitFreeSpace() {
std::unique_lock lk{release_mutex};
release_cv.wait_for(lk, std::chrono::milliseconds(5),
[this]() { return queued_buffers < max_queue_size; });
release_cv.wait(
lk, [this]() { return queued_buffers < max_queue_size || system.IsShuttingDown(); });
}
} // namespace AudioCore::Sink

View File

@@ -61,7 +61,6 @@ void LogSettings() {
log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
log_setting("Renderer_AsyncASTC", values.async_astc.GetValue());
log_setting("Renderer_AstcRecompression", values.astc_recompression.GetValue());
log_setting("Renderer_UseVsync", values.vsync_mode.GetValue());
log_setting("Renderer_UseReactiveFlushing", values.use_reactive_flushing.GetValue());
log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
@@ -225,7 +224,6 @@ void RestoreGlobalState(bool is_powered_on) {
values.nvdec_emulation.SetGlobal(true);
values.accelerate_astc.SetGlobal(true);
values.async_astc.SetGlobal(true);
values.astc_recompression.SetGlobal(true);
values.use_reactive_flushing.SetGlobal(true);
values.shader_backend.SetGlobal(true);
values.use_asynchronous_shaders.SetGlobal(true);

View File

@@ -90,12 +90,6 @@ enum class AntiAliasing : u32 {
LastAA = Smaa,
};
enum class AstcRecompression : u32 {
Uncompressed = 0,
Bc1 = 1,
Bc3 = 2,
};
struct ResolutionScalingInfo {
u32 up_scale{1};
u32 down_shift{0};
@@ -479,9 +473,6 @@ struct Values {
SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
"use_vulkan_driver_pipeline_cache"};
SwitchableSetting<bool> enable_compute_pipelines{false, "enable_compute_pipelines"};
SwitchableSetting<AstcRecompression, true> astc_recompression{
AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3,
"astc_recompression"};
SwitchableSetting<u8> bg_red{0, "bg_red"};
SwitchableSetting<u8> bg_green{0, "bg_green"};

View File

@@ -1283,14 +1283,9 @@ bool EmulatedController::HasNfc() const {
}
bool EmulatedController::WriteNfc(const std::vector<u8>& data) {
auto& nfc_output_device = output_devices[static_cast<std::size_t>(DeviceIndex::Right)];
auto& nfc_virtual_output_device = output_devices[3];
auto& nfc_output_device = output_devices[3];
if (nfc_output_device->SupportsNfc() != Common::Input::NfcState::NotSupported) {
return nfc_output_device->WriteNfcData(data) == Common::Input::NfcState::Success;
}
return nfc_virtual_output_device->WriteNfcData(data) == Common::Input::NfcState::Success;
return nfc_output_device->WriteNfcData(data) == Common::Input::NfcState::Success;
}
void EmulatedController::SetLedPattern() {

View File

@@ -144,10 +144,14 @@ private:
class KScopedMemoryBlockManagerAuditor {
public:
explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager* m) : m_manager(m) {}
explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager* m) : m_manager(m) {
ASSERT(m_manager->CheckState());
}
explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager& m)
: KScopedMemoryBlockManagerAuditor(std::addressof(m)) {}
~KScopedMemoryBlockManagerAuditor() = default;
~KScopedMemoryBlockManagerAuditor() {
ASSERT(m_manager->CheckState());
}
private:
KMemoryBlockManager* m_manager;

View File

@@ -52,6 +52,9 @@ bool IsAmiiboValid(const EncryptedNTAG215File& ntag_file) {
if (ntag_file.compability_container != 0xEEFF10F1U) {
return false;
}
if (amiibo_data.constant_value != 0xA5) {
return false;
}
if (amiibo_data.model_info.tag_type != NFC::PackedTagType::Type2) {
return false;
}

View File

@@ -119,31 +119,18 @@ bool NfcDevice::LoadNfcTag(std::span<const u8> data) {
memcpy(&tag_data, data.data(), sizeof(NFP::EncryptedNTAG215File));
is_plain_amiibo = NFP::AmiiboCrypto::IsAmiiboValid(tag_data);
is_write_protected = false;
if (is_plain_amiibo) {
encrypted_tag_data = NFP::AmiiboCrypto::EncodedDataToNfcData(tag_data);
LOG_INFO(Service_NFP, "Using plain amiibo");
} else {
tag_data = {};
memcpy(&encrypted_tag_data, data.data(), sizeof(NFP::EncryptedNTAG215File));
}
device_state = DeviceState::TagFound;
deactivate_event->GetReadableEvent().Clear();
activate_event->Signal();
// Fallback for plain amiibos
if (is_plain_amiibo) {
LOG_INFO(Service_NFP, "Using plain amiibo");
encrypted_tag_data = NFP::AmiiboCrypto::EncodedDataToNfcData(tag_data);
return true;
}
// Fallback for encrypted amiibos without keys
if (!NFP::AmiiboCrypto::IsKeyAvailable()) {
LOG_INFO(Service_NFC, "Loading amiibo without keys");
memcpy(&encrypted_tag_data, data.data(), sizeof(NFP::EncryptedNTAG215File));
BuildAmiiboWithoutKeys();
is_plain_amiibo = true;
is_write_protected = true;
return true;
}
tag_data = {};
memcpy(&encrypted_tag_data, data.data(), sizeof(NFP::EncryptedNTAG215File));
return true;
}
@@ -359,15 +346,23 @@ Result NfcDevice::Mount(NFP::ModelType model_type, NFP::MountTarget mount_target
return ResultWrongDeviceState;
}
// The loaded amiibo is not encrypted
if (is_plain_amiibo) {
device_state = DeviceState::TagMounted;
mount_target = mount_target_;
return ResultSuccess;
}
if (!NFP::AmiiboCrypto::IsAmiiboValid(encrypted_tag_data)) {
LOG_ERROR(Service_NFP, "Not an amiibo");
return ResultNotAnAmiibo;
}
// The loaded amiibo is not encrypted
if (is_plain_amiibo) {
// Mark amiibos as read only when keys are missing
if (!NFP::AmiiboCrypto::IsKeyAvailable()) {
LOG_ERROR(Service_NFP, "No keys detected");
device_state = DeviceState::TagMounted;
mount_target = mount_target_;
mount_target = NFP::MountTarget::Rom;
return ResultSuccess;
}
@@ -426,11 +421,11 @@ Result NfcDevice::Flush() {
tag_data.write_counter++;
const auto result = FlushWithBreak(NFP::BreakType::Normal);
FlushWithBreak(NFP::BreakType::Normal);
is_data_moddified = false;
return result;
return ResultSuccess;
}
Result NfcDevice::FlushDebug() {
@@ -449,11 +444,11 @@ Result NfcDevice::FlushDebug() {
tag_data.write_counter++;
const auto result = FlushWithBreak(NFP::BreakType::Normal);
FlushWithBreak(NFP::BreakType::Normal);
is_data_moddified = false;
return result;
return ResultSuccess;
}
Result NfcDevice::FlushWithBreak(NFP::BreakType break_type) {
@@ -462,11 +457,6 @@ Result NfcDevice::FlushWithBreak(NFP::BreakType break_type) {
return ResultWrongDeviceState;
}
if (is_write_protected) {
LOG_ERROR(Service_NFP, "No keys available skipping write request");
return ResultSuccess;
}
std::vector<u8> data(sizeof(NFP::EncryptedNTAG215File));
if (is_plain_amiibo) {
memcpy(data.data(), &tag_data, sizeof(tag_data));
@@ -1043,6 +1033,7 @@ Result NfcDevice::GetAll(NFP::NfpData& data) const {
}
NFP::CommonInfo common_info{};
Service::Mii::MiiManager manager;
const u64 application_id = tag_data.application_id;
GetCommonInfo(common_info);
@@ -1258,28 +1249,6 @@ void NfcDevice::UpdateRegisterInfoCrc() {
tag_data.register_info_crc = crc.checksum();
}
void NfcDevice::BuildAmiiboWithoutKeys() {
Service::Mii::MiiManager manager;
auto& settings = tag_data.settings;
tag_data = NFP::AmiiboCrypto::NfcDataToEncodedData(encrypted_tag_data);
// Common info
tag_data.write_counter = 0;
tag_data.amiibo_version = 0;
settings.write_date = GetAmiiboDate(GetCurrentPosixTime());
// Register info
SetAmiiboName(settings, {'y', 'u', 'z', 'u', 'A', 'm', 'i', 'i', 'b', 'o'});
settings.settings.font_region.Assign(0);
settings.init_date = GetAmiiboDate(GetCurrentPosixTime());
tag_data.owner_mii = manager.BuildFromStoreData(manager.BuildDefault(0));
// Admin info
settings.settings.amiibo_initialized.Assign(1);
settings.settings.appdata_initialized.Assign(0);
}
u64 NfcDevice::GetHandle() const {
// Generate a handle based of the npad id
return static_cast<u64>(npad_id);

View File

@@ -110,8 +110,6 @@ private:
void UpdateSettingsCrc();
void UpdateRegisterInfoCrc();
void BuildAmiiboWithoutKeys();
bool is_controller_set{};
int callback_key;
const Core::HID::NpadIdType npad_id;
@@ -130,7 +128,6 @@ private:
bool is_data_moddified{};
bool is_app_area_open{};
bool is_plain_amiibo{};
bool is_write_protected{};
NFP::MountTarget mount_target{NFP::MountTarget::None};
NFP::NTAG215File tag_data{};

View File

@@ -195,8 +195,8 @@ void Joycons::RegisterNewDevice(SDL_hid_device_info* device_info) {
OnMotionUpdate(port, type, id, value);
}},
.on_ring_data = {[this](f32 ring_data) { OnRingConUpdate(ring_data); }},
.on_amiibo_data = {[this, port, type](const std::vector<u8>& amiibo_data) {
OnAmiiboUpdate(port, type, amiibo_data);
.on_amiibo_data = {[this, port](const std::vector<u8>& amiibo_data) {
OnAmiiboUpdate(port, amiibo_data);
}},
.on_camera_data = {[this, port](const std::vector<u8>& camera_data,
Joycon::IrsResolution format) {
@@ -291,13 +291,9 @@ Common::Input::NfcState Joycons::SupportsNfc(const PadIdentifier& identifier_) c
return Common::Input::NfcState::Success;
};
Common::Input::NfcState Joycons::WriteNfcData(const PadIdentifier& identifier,
Common::Input::NfcState Joycons::WriteNfcData(const PadIdentifier& identifier_,
const std::vector<u8>& data) {
auto handle = GetHandle(identifier);
if (handle->WriteNfcData(data) != Joycon::DriverResult::Success) {
return Common::Input::NfcState::WriteFailed;
}
return Common::Input::NfcState::Success;
return Common::Input::NfcState::NotSupported;
};
Common::Input::DriverResult Joycons::SetPollingMode(const PadIdentifier& identifier,
@@ -402,9 +398,8 @@ void Joycons::OnRingConUpdate(f32 ring_data) {
SetAxis(identifier, 100, ring_data);
}
void Joycons::OnAmiiboUpdate(std::size_t port, Joycon::ControllerType type,
const std::vector<u8>& amiibo_data) {
const auto identifier = GetIdentifier(port, type);
void Joycons::OnAmiiboUpdate(std::size_t port, const std::vector<u8>& amiibo_data) {
const auto identifier = GetIdentifier(port, Joycon::ControllerType::Right);
const auto nfc_state = amiibo_data.empty() ? Common::Input::NfcState::AmiiboRemoved
: Common::Input::NfcState::NewAmiibo;
SetNfc(identifier, {nfc_state, amiibo_data});

View File

@@ -81,8 +81,7 @@ private:
void OnMotionUpdate(std::size_t port, Joycon::ControllerType type, int id,
const Joycon::MotionData& value);
void OnRingConUpdate(f32 ring_data);
void OnAmiiboUpdate(std::size_t port, Joycon::ControllerType type,
const std::vector<u8>& amiibo_data);
void OnAmiiboUpdate(std::size_t port, const std::vector<u8>& amiibo_data);
void OnCameraUpdate(std::size_t port, const std::vector<u8>& camera_data,
Joycon::IrsResolution format);

View File

@@ -492,26 +492,6 @@ DriverResult JoyconDriver::SetRingConMode() {
return result;
}
DriverResult JoyconDriver::WriteNfcData(std::span<const u8> data) {
std::scoped_lock lock{mutex};
disable_input_thread = true;
if (!supported_features.nfc) {
return DriverResult::NotSupported;
}
if (!nfc_protocol->IsEnabled()) {
return DriverResult::Disabled;
}
if (!amiibo_detected) {
return DriverResult::ErrorWritingData;
}
const auto result = nfc_protocol->WriteAmiibo(data);
disable_input_thread = false;
return result;
}
bool JoyconDriver::IsConnected() const {
std::scoped_lock lock{mutex};
return is_connected.load();

View File

@@ -49,7 +49,6 @@ public:
DriverResult SetIrMode();
DriverResult SetNfcMode();
DriverResult SetRingConMode();
DriverResult WriteNfcData(std::span<const u8> data);
void SetCallbacks(const JoyconCallbacks& callbacks);

View File

@@ -265,7 +265,7 @@ DriverResult JoyconCommonProtocol::SendMCUData(ReportMode report_mode, MCUSubCom
DriverResult JoyconCommonProtocol::WaitSetMCUMode(ReportMode report_mode, MCUMode mode) {
MCUCommandResponse output{};
constexpr std::size_t MaxTries{16};
constexpr std::size_t MaxTries{8};
std::size_t tries{};
do {

View File

@@ -23,7 +23,6 @@ constexpr std::array<u8, 8> DefaultVibrationBuffer{0x0, 0x1, 0x40, 0x40, 0x0, 0x
using MacAddress = std::array<u8, 6>;
using SerialNumber = std::array<u8, 15>;
using TagUUID = std::array<u8, 7>;
enum class ControllerType : u8 {
None = 0x00,
@@ -277,13 +276,12 @@ enum class MCUPacketFlag : u8 {
LastCommandPacket = 0x08,
};
enum class NFCCommand : u8 {
enum class NFCReadCommand : u8 {
CancelAll = 0x00,
StartPolling = 0x01,
StopPolling = 0x02,
StartWaitingRecieve = 0x04,
ReadNtag = 0x06,
WriteNtag = 0x08,
Ntag = 0x06,
Mifare = 0x0F,
};
@@ -294,19 +292,14 @@ enum class NFCTagType : u8 {
enum class NFCPages {
Block0 = 0,
Block3 = 3,
Block45 = 45,
Block135 = 135,
Block231 = 231,
};
enum class NFCStatus : u8 {
Ready = 0x00,
Polling = 0x01,
LastPackage = 0x04,
WriteDone = 0x05,
TagLost = 0x07,
WriteReady = 0x09,
};
enum class IrsMode : u8 {
@@ -566,32 +559,13 @@ static_assert(sizeof(NFCReadBlockCommand) == 0x9, "NFCReadBlockCommand is an inv
struct NFCReadCommandData {
u8 unknown;
u8 uuid_length;
TagUUID uid;
u8 unknown_2;
std::array<u8, 6> uid;
NFCTagType tag_type;
NFCReadBlockCommand read_block;
};
static_assert(sizeof(NFCReadCommandData) == 0x13, "NFCReadCommandData is an invalid size");
#pragma pack(push, 1)
struct NFCWriteCommandData {
u8 unknown;
u8 uuid_length;
TagUUID uid;
NFCTagType tag_type;
u8 unknown2;
u8 unknown3;
u8 unknown4;
u8 unknown5;
u8 unknown6;
u8 unknown7;
u8 unknown8;
u8 magic;
u16_be write_count;
u8 amiibo_version;
};
static_assert(sizeof(NFCWriteCommandData) == 0x15, "NFCWriteCommandData is an invalid size");
#pragma pack(pop)
struct NFCPollingCommandData {
u8 enable_mifare;
u8 unknown_1;
@@ -602,9 +576,9 @@ struct NFCPollingCommandData {
static_assert(sizeof(NFCPollingCommandData) == 0x05, "NFCPollingCommandData is an invalid size");
struct NFCRequestState {
NFCCommand command_argument;
u8 block_id;
NFCReadCommand command_argument;
u8 packet_id;
INSERT_PADDING_BYTES(0x1);
MCUPacketFlag packet_flag;
u8 data_length;
union {
@@ -617,18 +591,6 @@ struct NFCRequestState {
};
static_assert(sizeof(NFCRequestState) == 0x26, "NFCRequestState is an invalid size");
struct NFCDataChunk {
u8 nfc_page;
u8 data_size;
std::array<u8, 0xFF> data;
};
struct NFCWritePackage {
NFCWriteCommandData command_data;
u8 number_of_chunks;
std::array<NFCDataChunk, 4> data_chunks;
};
struct IrsConfigure {
MCUCommand command;
MCUSubCommand sub_command;

View File

@@ -34,12 +34,6 @@ DriverResult NfcProtocol::EnableNfc() {
result = ConfigureMCU(config);
}
if (result == DriverResult::Success) {
result = WaitSetMCUMode(ReportMode::NFC_IR_MODE_60HZ, MCUMode::NFC);
}
if (result == DriverResult::Success) {
result = WaitUntilNfcIs(NFCStatus::Ready);
}
return result;
}
@@ -62,20 +56,13 @@ DriverResult NfcProtocol::StartNFCPollingMode() {
LOG_DEBUG(Input, "Start NFC pooling Mode");
ScopedSetBlocking sb(this);
DriverResult result{DriverResult::Success};
TagFoundData tag_data{};
if (result == DriverResult::Success) {
MCUCommandResponse output{};
result = SendStopPollingRequest(output);
result = WaitSetMCUMode(ReportMode::NFC_IR_MODE_60HZ, MCUMode::NFC);
}
if (result == DriverResult::Success) {
result = WaitUntilNfcIs(NFCStatus::Ready);
}
if (result == DriverResult::Success) {
MCUCommandResponse output{};
result = SendStartPollingRequest(output);
}
if (result == DriverResult::Success) {
result = WaitUntilNfcIs(NFCStatus::Polling);
result = WaitUntilNfcIsReady();
}
if (result == DriverResult::Success) {
is_enabled = true;
@@ -90,94 +77,49 @@ DriverResult NfcProtocol::ScanAmiibo(std::vector<u8>& data) {
}
update_counter = 0;
LOG_DEBUG(Input, "Scan for amiibos");
LOG_DEBUG(Input, "Start NFC pooling Mode");
ScopedSetBlocking sb(this);
DriverResult result{DriverResult::Success};
TagFoundData tag_data{};
if (result == DriverResult::Success) {
result = IsTagInRange(tag_data);
result = StartPolling(tag_data);
}
if (result == DriverResult::Success) {
result = ReadTag(tag_data);
}
if (result == DriverResult::Success) {
result = WaitUntilNfcIsReady();
}
if (result == DriverResult::Success) {
result = StartPolling(tag_data, 7);
}
if (result == DriverResult::Success) {
std::string uuid_string;
for (auto& content : tag_data.uuid) {
uuid_string += fmt::format(" {:02x}", content);
}
LOG_INFO(Input, "Tag detected, type={}, uuid={}", tag_data.type, uuid_string);
result = GetAmiiboData(data);
}
return result;
}
DriverResult NfcProtocol::WriteAmiibo(std::span<const u8> data) {
LOG_DEBUG(Input, "Write amiibo");
ScopedSetBlocking sb(this);
DriverResult result{DriverResult::Success};
TagUUID tag_uuid = GetTagUUID(data);
TagFoundData tag_data{};
if (result == DriverResult::Success) {
result = IsTagInRange(tag_data, 7);
}
if (result == DriverResult::Success) {
if (tag_data.uuid != tag_uuid) {
result = DriverResult::InvalidParameters;
}
}
if (result == DriverResult::Success) {
MCUCommandResponse output{};
result = SendStopPollingRequest(output);
}
if (result == DriverResult::Success) {
result = WaitUntilNfcIs(NFCStatus::Ready);
}
if (result == DriverResult::Success) {
MCUCommandResponse output{};
result = SendStartPollingRequest(output, true);
}
if (result == DriverResult::Success) {
result = WaitUntilNfcIs(NFCStatus::WriteReady);
}
if (result == DriverResult::Success) {
result = WriteAmiiboData(tag_uuid, data);
}
if (result == DriverResult::Success) {
result = WaitUntilNfcIs(NFCStatus::WriteDone);
}
if (result == DriverResult::Success) {
MCUCommandResponse output{};
result = SendStopPollingRequest(output);
}
return result;
}
bool NfcProtocol::HasAmiibo() {
if (update_counter++ < AMIIBO_UPDATE_DELAY) {
return true;
}
update_counter = 0;
ScopedSetBlocking sb(this);
DriverResult result{DriverResult::Success};
TagFoundData tag_data{};
if (result == DriverResult::Success) {
result = IsTagInRange(tag_data, 7);
result = StartPolling(tag_data);
}
return result == DriverResult::Success;
}
DriverResult NfcProtocol::WaitUntilNfcIs(NFCStatus status) {
DriverResult NfcProtocol::WaitUntilNfcIsReady() {
constexpr std::size_t timeout_limit = 10;
MCUCommandResponse output{};
std::size_t tries = 0;
do {
auto result = SendNextPackageRequest(output, {});
auto result = SendStartWaitingRecieveRequest(output);
if (result != DriverResult::Success) {
return result;
@@ -187,17 +129,18 @@ DriverResult NfcProtocol::WaitUntilNfcIs(NFCStatus status) {
}
} while (output.mcu_report != MCUReport::NFCState ||
(output.mcu_data[1] << 8) + output.mcu_data[0] != 0x0500 ||
output.mcu_data[5] != 0x31 || output.mcu_data[6] != static_cast<u8>(status));
output.mcu_data[5] != 0x31 || output.mcu_data[6] != 0x00);
return DriverResult::Success;
}
DriverResult NfcProtocol::IsTagInRange(TagFoundData& data, std::size_t timeout_limit) {
DriverResult NfcProtocol::StartPolling(TagFoundData& data, std::size_t timeout_limit) {
LOG_DEBUG(Input, "Start Polling for tag");
MCUCommandResponse output{};
std::size_t tries = 0;
do {
const auto result = SendNextPackageRequest(output, {});
const auto result = SendStartPollingRequest(output);
if (result != DriverResult::Success) {
return result;
}
@@ -206,31 +149,94 @@ DriverResult NfcProtocol::IsTagInRange(TagFoundData& data, std::size_t timeout_l
}
} while (output.mcu_report != MCUReport::NFCState ||
(output.mcu_data[1] << 8) + output.mcu_data[0] != 0x0500 ||
(output.mcu_data[6] != 0x09 && output.mcu_data[6] != 0x04));
output.mcu_data[6] != 0x09);
data.type = output.mcu_data[12];
data.uuid_size = std::min(output.mcu_data[14], static_cast<u8>(sizeof(TagUUID)));
data.uuid.resize(output.mcu_data[14]);
memcpy(data.uuid.data(), output.mcu_data.data() + 15, data.uuid.size());
return DriverResult::Success;
}
DriverResult NfcProtocol::GetAmiiboData(std::vector<u8>& ntag_data) {
constexpr std::size_t timeout_limit = 60;
DriverResult NfcProtocol::ReadTag(const TagFoundData& data) {
constexpr std::size_t timeout_limit = 10;
MCUCommandResponse output{};
std::size_t tries = 0;
u8 package_index = 0;
std::size_t ntag_buffer_pos = 0;
auto result = SendReadAmiiboRequest(output, NFCPages::Block135);
if (result != DriverResult::Success) {
return result;
std::string uuid_string;
for (auto& content : data.uuid) {
uuid_string += fmt::format(" {:02x}", content);
}
LOG_INFO(Input, "Tag detected, type={}, uuid={}", data.type, uuid_string);
tries = 0;
NFCPages ntag_pages = NFCPages::Block0;
// Read Tag data
while (tries++ < timeout_limit) {
result = SendNextPackageRequest(output, package_index);
while (true) {
auto result = SendReadAmiiboRequest(output, ntag_pages);
const auto nfc_status = static_cast<NFCStatus>(output.mcu_data[6]);
if (result != DriverResult::Success) {
return result;
}
if ((output.mcu_report == MCUReport::NFCReadData ||
output.mcu_report == MCUReport::NFCState) &&
nfc_status == NFCStatus::TagLost) {
return DriverResult::ErrorReadingData;
}
if (output.mcu_report == MCUReport::NFCReadData && output.mcu_data[1] == 0x07 &&
output.mcu_data[2] == 0x01) {
if (data.type != 2) {
continue;
}
switch (output.mcu_data[24]) {
case 0:
ntag_pages = NFCPages::Block135;
break;
case 3:
ntag_pages = NFCPages::Block45;
break;
case 4:
ntag_pages = NFCPages::Block231;
break;
default:
return DriverResult::ErrorReadingData;
}
continue;
}
if (output.mcu_report == MCUReport::NFCState && nfc_status == NFCStatus::LastPackage) {
// finished
SendStopPollingRequest(output);
return DriverResult::Success;
}
// Ignore other state reports
if (output.mcu_report == MCUReport::NFCState) {
continue;
}
if (tries++ > timeout_limit) {
return DriverResult::Timeout;
}
}
return DriverResult::Success;
}
DriverResult NfcProtocol::GetAmiiboData(std::vector<u8>& ntag_data) {
constexpr std::size_t timeout_limit = 10;
MCUCommandResponse output{};
std::size_t tries = 0;
NFCPages ntag_pages = NFCPages::Block135;
std::size_t ntag_buffer_pos = 0;
// Read Tag data
while (true) {
auto result = SendReadAmiiboRequest(output, ntag_pages);
const auto nfc_status = static_cast<NFCStatus>(output.mcu_data[6]);
if (result != DriverResult::Success) {
@@ -253,7 +259,6 @@ DriverResult NfcProtocol::GetAmiiboData(std::vector<u8>& ntag_data) {
memcpy(ntag_data.data() + ntag_buffer_pos, output.mcu_data.data() + 6,
payload_size);
}
package_index++;
continue;
}
@@ -261,99 +266,31 @@ DriverResult NfcProtocol::GetAmiiboData(std::vector<u8>& ntag_data) {
LOG_INFO(Input, "Finished reading amiibo");
return DriverResult::Success;
}
}
return DriverResult::Timeout;
}
DriverResult NfcProtocol::WriteAmiiboData(const TagUUID& tag_uuid, std::span<const u8> data) {
constexpr std::size_t timeout_limit = 60;
const auto nfc_data = MakeAmiiboWritePackage(tag_uuid, data);
const std::vector<u8> nfc_buffer_data = SerializeWritePackage(nfc_data);
std::span<const u8> buffer(nfc_buffer_data);
MCUCommandResponse output{};
u8 block_id = 1;
u8 package_index = 0;
std::size_t tries = 0;
std::size_t current_position = 0;
LOG_INFO(Input, "Writing amiibo data");
auto result = SendWriteAmiiboRequest(output, tag_uuid);
if (result != DriverResult::Success) {
return result;
}
// Read Tag data but ignore the actual sent data
while (tries++ < timeout_limit) {
result = SendNextPackageRequest(output, package_index);
const auto nfc_status = static_cast<NFCStatus>(output.mcu_data[6]);
if (result != DriverResult::Success) {
return result;
}
if ((output.mcu_report == MCUReport::NFCReadData ||
output.mcu_report == MCUReport::NFCState) &&
nfc_status == NFCStatus::TagLost) {
return DriverResult::ErrorReadingData;
}
if (output.mcu_report == MCUReport::NFCReadData && output.mcu_data[1] == 0x07) {
package_index++;
// Ignore other state reports
if (output.mcu_report == MCUReport::NFCState) {
continue;
}
if (output.mcu_report == MCUReport::NFCState && nfc_status == NFCStatus::LastPackage) {
LOG_INFO(Input, "Finished reading amiibo");
break;
if (tries++ > timeout_limit) {
return DriverResult::Timeout;
}
}
// Send Data. Nfc buffer size is 31, Send the data in smaller packages
while (current_position < buffer.size() && tries++ < timeout_limit) {
const std::size_t next_position =
std::min(current_position + sizeof(NFCRequestState::raw_data), buffer.size());
const std::size_t block_size = next_position - current_position;
const bool is_last_packet = block_size < sizeof(NFCRequestState::raw_data);
SendWriteDataAmiiboRequest(output, block_id, is_last_packet,
buffer.subspan(current_position, block_size));
const auto nfc_status = static_cast<NFCStatus>(output.mcu_data[6]);
if ((output.mcu_report == MCUReport::NFCReadData ||
output.mcu_report == MCUReport::NFCState) &&
nfc_status == NFCStatus::TagLost) {
return DriverResult::ErrorReadingData;
}
// Increase position when data is confirmed by the joycon
if (output.mcu_report == MCUReport::NFCState &&
(output.mcu_data[1] << 8) + output.mcu_data[0] == 0x0500 &&
output.mcu_data[3] == block_id) {
block_id++;
current_position = next_position;
}
}
return result;
return DriverResult::Success;
}
DriverResult NfcProtocol::SendStartPollingRequest(MCUCommandResponse& output,
bool is_second_attempt) {
DriverResult NfcProtocol::SendStartPollingRequest(MCUCommandResponse& output) {
NFCRequestState request{
.command_argument = NFCCommand::StartPolling,
.block_id = {},
.packet_id = {},
.command_argument = NFCReadCommand::StartPolling,
.packet_id = 0x0,
.packet_flag = MCUPacketFlag::LastCommandPacket,
.data_length = sizeof(NFCPollingCommandData),
.nfc_polling =
{
.enable_mifare = 0x00,
.unknown_1 = static_cast<u8>(is_second_attempt ? 0xe8 : 0x00),
.unknown_2 = static_cast<u8>(is_second_attempt ? 0x03 : 0x00),
.enable_mifare = 0x01,
.unknown_1 = 0x00,
.unknown_2 = 0x00,
.unknown_3 = 0x2c,
.unknown_4 = 0x01,
},
@@ -369,11 +306,10 @@ DriverResult NfcProtocol::SendStartPollingRequest(MCUCommandResponse& output,
DriverResult NfcProtocol::SendStopPollingRequest(MCUCommandResponse& output) {
NFCRequestState request{
.command_argument = NFCCommand::StopPolling,
.block_id = {},
.packet_id = {},
.command_argument = NFCReadCommand::StopPolling,
.packet_id = 0x0,
.packet_flag = MCUPacketFlag::LastCommandPacket,
.data_length = {},
.data_length = 0,
.raw_data = {},
.crc = {},
};
@@ -385,13 +321,12 @@ DriverResult NfcProtocol::SendStopPollingRequest(MCUCommandResponse& output) {
output);
}
DriverResult NfcProtocol::SendNextPackageRequest(MCUCommandResponse& output, u8 packet_id) {
DriverResult NfcProtocol::SendStartWaitingRecieveRequest(MCUCommandResponse& output) {
NFCRequestState request{
.command_argument = NFCCommand::StartWaitingRecieve,
.block_id = {},
.packet_id = packet_id,
.command_argument = NFCReadCommand::StartWaitingRecieve,
.packet_id = 0x0,
.packet_flag = MCUPacketFlag::LastCommandPacket,
.data_length = {},
.data_length = 0,
.raw_data = {},
.crc = {},
};
@@ -405,17 +340,17 @@ DriverResult NfcProtocol::SendNextPackageRequest(MCUCommandResponse& output, u8
DriverResult NfcProtocol::SendReadAmiiboRequest(MCUCommandResponse& output, NFCPages ntag_pages) {
NFCRequestState request{
.command_argument = NFCCommand::ReadNtag,
.block_id = {},
.packet_id = {},
.command_argument = NFCReadCommand::Ntag,
.packet_id = 0x0,
.packet_flag = MCUPacketFlag::LastCommandPacket,
.data_length = sizeof(NFCReadCommandData),
.nfc_read =
{
.unknown = 0xd0,
.uuid_length = sizeof(NFCReadCommandData::uid),
.uuid_length = 0x07,
.unknown_2 = 0x00,
.uid = {},
.tag_type = NFCTagType::Ntag215,
.tag_type = NFCTagType::AllTags,
.read_block = GetReadBlockCommand(ntag_pages),
},
.crc = {},
@@ -428,135 +363,12 @@ DriverResult NfcProtocol::SendReadAmiiboRequest(MCUCommandResponse& output, NFCP
output);
}
DriverResult NfcProtocol::SendWriteAmiiboRequest(MCUCommandResponse& output,
const TagUUID& tag_uuid) {
NFCRequestState request{
.command_argument = NFCCommand::ReadNtag,
.block_id = {},
.packet_id = {},
.packet_flag = MCUPacketFlag::LastCommandPacket,
.data_length = sizeof(NFCReadCommandData),
.nfc_read =
{
.unknown = 0xd0,
.uuid_length = sizeof(NFCReadCommandData::uid),
.uid = tag_uuid,
.tag_type = NFCTagType::Ntag215,
.read_block = GetReadBlockCommand(NFCPages::Block3),
},
.crc = {},
};
std::array<u8, sizeof(NFCRequestState)> request_data{};
memcpy(request_data.data(), &request, sizeof(NFCRequestState));
request_data[36] = CalculateMCU_CRC8(request_data.data(), 36);
return SendMCUData(ReportMode::NFC_IR_MODE_60HZ, MCUSubCommand::ReadDeviceMode, request_data,
output);
}
DriverResult NfcProtocol::SendWriteDataAmiiboRequest(MCUCommandResponse& output, u8 block_id,
bool is_last_packet,
std::span<const u8> data) {
const auto data_size = std::min(data.size(), sizeof(NFCRequestState::raw_data));
NFCRequestState request{
.command_argument = NFCCommand::WriteNtag,
.block_id = block_id,
.packet_id = {},
.packet_flag =
is_last_packet ? MCUPacketFlag::LastCommandPacket : MCUPacketFlag::MorePacketsRemaining,
.data_length = static_cast<u8>(data_size),
.raw_data = {},
.crc = {},
};
memcpy(request.raw_data.data(), data.data(), data_size);
std::array<u8, sizeof(NFCRequestState)> request_data{};
memcpy(request_data.data(), &request, sizeof(NFCRequestState));
request_data[36] = CalculateMCU_CRC8(request_data.data(), 36);
return SendMCUData(ReportMode::NFC_IR_MODE_60HZ, MCUSubCommand::ReadDeviceMode, request_data,
output);
}
std::vector<u8> NfcProtocol::SerializeWritePackage(const NFCWritePackage& package) const {
const std::size_t header_size =
sizeof(NFCWriteCommandData) + sizeof(NFCWritePackage::number_of_chunks);
std::vector<u8> serialized_data(header_size);
std::size_t start_index = 0;
memcpy(serialized_data.data(), &package, header_size);
start_index += header_size;
for (const auto& data_chunk : package.data_chunks) {
const std::size_t chunk_size =
sizeof(NFCDataChunk::nfc_page) + sizeof(NFCDataChunk::data_size) + data_chunk.data_size;
serialized_data.resize(start_index + chunk_size);
memcpy(serialized_data.data() + start_index, &data_chunk, chunk_size);
start_index += chunk_size;
}
return serialized_data;
}
NFCWritePackage NfcProtocol::MakeAmiiboWritePackage(const TagUUID& tag_uuid,
std::span<const u8> data) const {
return {
.command_data{
.unknown = 0xd0,
.uuid_length = sizeof(NFCReadCommandData::uid),
.uid = tag_uuid,
.tag_type = NFCTagType::Ntag215,
.unknown2 = 0x00,
.unknown3 = 0x01,
.unknown4 = 0x04,
.unknown5 = 0xff,
.unknown6 = 0xff,
.unknown7 = 0xff,
.unknown8 = 0xff,
.magic = data[16],
.write_count = static_cast<u16>((data[17] << 8) + data[18]),
.amiibo_version = data[19],
},
.number_of_chunks = 3,
.data_chunks =
{
MakeAmiiboChunk(0x05, 0x20, data),
MakeAmiiboChunk(0x20, 0xf0, data),
MakeAmiiboChunk(0x5c, 0x98, data),
},
};
}
NFCDataChunk NfcProtocol::MakeAmiiboChunk(u8 page, u8 size, std::span<const u8> data) const {
constexpr u8 PAGE_SIZE = 4;
if (static_cast<std::size_t>(page * PAGE_SIZE) + size >= data.size()) {
return {};
}
NFCDataChunk chunk{
.nfc_page = page,
.data_size = size,
.data = {},
};
std::memcpy(chunk.data.data(), data.data() + (page * PAGE_SIZE), size);
return chunk;
}
NFCReadBlockCommand NfcProtocol::GetReadBlockCommand(NFCPages pages) const {
switch (pages) {
case NFCPages::Block0:
return {
.block_count = 1,
};
case NFCPages::Block3:
return {
.block_count = 1,
.blocks =
{
NFCReadBlock{0x03, 0x03},
},
};
case NFCPages::Block45:
return {
.block_count = 1,
@@ -591,17 +403,6 @@ NFCReadBlockCommand NfcProtocol::GetReadBlockCommand(NFCPages pages) const {
};
}
TagUUID NfcProtocol::GetTagUUID(std::span<const u8> data) const {
if (data.size() < 10) {
return {};
}
// crc byte 3 is omitted in this operation
return {
data[0], data[1], data[2], data[4], data[5], data[6], data[7],
};
}
bool NfcProtocol::IsEnabled() const {
return is_enabled;
}

View File

@@ -27,8 +27,6 @@ public:
DriverResult ScanAmiibo(std::vector<u8>& data);
DriverResult WriteAmiibo(std::span<const u8> data);
bool HasAmiibo();
bool IsEnabled() const;
@@ -39,42 +37,27 @@ private:
struct TagFoundData {
u8 type;
u8 uuid_size;
TagUUID uuid;
std::vector<u8> uuid;
};
DriverResult WaitUntilNfcIs(NFCStatus status);
DriverResult WaitUntilNfcIsReady();
DriverResult IsTagInRange(TagFoundData& data, std::size_t timeout_limit = 1);
DriverResult StartPolling(TagFoundData& data, std::size_t timeout_limit = 1);
DriverResult ReadTag(const TagFoundData& data);
DriverResult GetAmiiboData(std::vector<u8>& data);
DriverResult WriteAmiiboData(const TagUUID& tag_uuid, std::span<const u8> data);
DriverResult SendStartPollingRequest(MCUCommandResponse& output,
bool is_second_attempt = false);
DriverResult SendStartPollingRequest(MCUCommandResponse& output);
DriverResult SendStopPollingRequest(MCUCommandResponse& output);
DriverResult SendNextPackageRequest(MCUCommandResponse& output, u8 packet_id);
DriverResult SendStartWaitingRecieveRequest(MCUCommandResponse& output);
DriverResult SendReadAmiiboRequest(MCUCommandResponse& output, NFCPages ntag_pages);
DriverResult SendWriteAmiiboRequest(MCUCommandResponse& output, const TagUUID& tag_uuid);
DriverResult SendWriteDataAmiiboRequest(MCUCommandResponse& output, u8 block_id,
bool is_last_packet, std::span<const u8> data);
std::vector<u8> SerializeWritePackage(const NFCWritePackage& package) const;
NFCWritePackage MakeAmiiboWritePackage(const TagUUID& tag_uuid, std::span<const u8> data) const;
NFCDataChunk MakeAmiiboChunk(u8 page, u8 size, std::span<const u8> data) const;
NFCReadBlockCommand GetReadBlockCommand(NFCPages pages) const;
TagUUID GetTagUUID(std::span<const u8> data) const;
bool is_enabled{};
std::size_t update_counter{};
};

View File

@@ -380,16 +380,13 @@ void InputEngine::TriggerOnMotionChange(const PadIdentifier& identifier, int mot
if (!configuring || !mapping_callback.on_data) {
return;
}
const auto old_value = GetMotion(identifier, motion);
bool is_active = false;
if (std::abs(value.accel_x - old_value.accel_x) > 1.5f ||
std::abs(value.accel_y - old_value.accel_y) > 1.5f ||
std::abs(value.accel_z - old_value.accel_z) > 1.5f) {
if (std::abs(value.accel_x) > 1.5f || std::abs(value.accel_y) > 1.5f ||
std::abs(value.accel_z) > 1.5f) {
is_active = true;
}
if (std::abs(value.gyro_x - old_value.gyro_x) > 0.6f ||
std::abs(value.gyro_y - old_value.gyro_y) > 0.6f ||
std::abs(value.gyro_z - old_value.gyro_z) > 0.6f) {
if (std::abs(value.gyro_x) > 0.6f || std::abs(value.gyro_y) > 0.6f ||
std::abs(value.gyro_z) > 0.6f) {
is_active = true;
}
if (!is_active) {

View File

@@ -30,7 +30,7 @@ void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& hi
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> lo_bits_reg;
BitField<0, 8, IR::Reg> lo_bits_reg;
BitField<37, 2, MaxShift> max_shift;
BitField<47, 1, u64> cc;
BitField<48, 2, u64> x_mode;

View File

@@ -246,14 +246,10 @@ add_library(video_core STATIC
texture_cache/util.h
textures/astc.h
textures/astc.cpp
textures/bcn.cpp
textures/bcn.h
textures/decoders.cpp
textures/decoders.h
textures/texture.cpp
textures/texture.h
textures/workers.cpp
textures/workers.h
transform_feedback.cpp
transform_feedback.h
video_core.cpp
@@ -279,7 +275,7 @@ add_library(video_core STATIC
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PUBLIC glad shader_recompiler stb)
target_link_libraries(video_core PUBLIC glad shader_recompiler)
if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
add_dependencies(video_core ffmpeg-build)

View File

@@ -2,8 +2,6 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "common/microprofile.h"
#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/control/channel_state_cache.inc"
namespace VideoCommon {
@@ -11,6 +9,4 @@ MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 12
MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
template class VideoCommon::ChannelSetupCaches<VideoCommon::BufferCacheChannelInfo>;
} // namespace VideoCommon

View File

@@ -30,8 +30,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
}
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const s64 min_spacing_expected = device_memory - 1_GiB;
const s64 min_spacing_critical = device_memory - 512_MiB;
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
const s64 min_spacing_critical = device_memory - 1_GiB;
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
@@ -64,22 +64,17 @@ void BufferCache<P>::RunGarbageCollector() {
template <class P>
void BufferCache<P>::TickFrame() {
// Calculate hits and shots and move hit bits to the right
const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
channel_state->uniform_cache_hits.end());
const u32 shots = std::reduce(channel_state->uniform_cache_shots.begin(),
channel_state->uniform_cache_shots.end());
std::copy_n(channel_state->uniform_cache_hits.begin(),
channel_state->uniform_cache_hits.size() - 1,
channel_state->uniform_cache_hits.begin() + 1);
std::copy_n(channel_state->uniform_cache_shots.begin(),
channel_state->uniform_cache_shots.size() - 1,
channel_state->uniform_cache_shots.begin() + 1);
channel_state->uniform_cache_hits[0] = 0;
channel_state->uniform_cache_shots[0] = 0;
const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1,
uniform_cache_hits.begin() + 1);
std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1,
uniform_cache_shots.begin() + 1);
uniform_cache_hits[0] = 0;
uniform_cache_shots[0] = 0;
const bool skip_preferred = hits * 256 < shots * 251;
channel_state->uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
// If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage()) {
@@ -169,10 +164,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
BufferId buffer_a;
BufferId buffer_b;
do {
channel_state->has_deleted_buffers = false;
has_deleted_buffers = false;
buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
} while (channel_state->has_deleted_buffers);
} while (has_deleted_buffers);
auto& src_buffer = slot_buffers[buffer_a];
auto& dest_buffer = slot_buffers[buffer_b];
SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
@@ -277,30 +272,30 @@ void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr
.size = size,
.buffer_id = BufferId{},
};
channel_state->uniform_buffers[stage][index] = binding;
uniform_buffers[stage][index] = binding;
}
template <class P>
void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
channel_state->uniform_buffers[stage][index] = NULL_BINDING;
uniform_buffers[stage][index] = NULL_BINDING;
}
template <class P>
void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
MICROPROFILE_SCOPE(GPU_PrepareBuffers);
do {
channel_state->has_deleted_buffers = false;
has_deleted_buffers = false;
DoUpdateGraphicsBuffers(is_indexed);
} while (channel_state->has_deleted_buffers);
} while (has_deleted_buffers);
}
template <class P>
void BufferCache<P>::UpdateComputeBuffers() {
MICROPROFILE_SCOPE(GPU_PrepareBuffers);
do {
channel_state->has_deleted_buffers = false;
has_deleted_buffers = false;
DoUpdateComputeBuffers();
} while (channel_state->has_deleted_buffers);
} while (has_deleted_buffers);
}
template <class P>
@@ -343,102 +338,98 @@ template <class P>
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
const UniformBufferSizes* sizes) {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
if (channel_state->enabled_uniform_buffer_masks != mask) {
if (enabled_uniform_buffer_masks != mask) {
if constexpr (IS_OPENGL) {
channel_state->fast_bound_uniform_buffers.fill(0);
fast_bound_uniform_buffers.fill(0);
}
channel_state->dirty_uniform_buffers.fill(~u32{0});
channel_state->uniform_buffer_binding_sizes.fill({});
dirty_uniform_buffers.fill(~u32{0});
uniform_buffer_binding_sizes.fill({});
}
}
channel_state->enabled_uniform_buffer_masks = mask;
channel_state->uniform_buffer_sizes = sizes;
enabled_uniform_buffer_masks = mask;
uniform_buffer_sizes = sizes;
}
template <class P>
void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
const ComputeUniformBufferSizes* sizes) {
channel_state->enabled_compute_uniform_buffer_mask = mask;
channel_state->compute_uniform_buffer_sizes = sizes;
enabled_compute_uniform_buffer_mask = mask;
compute_uniform_buffer_sizes = sizes;
}
template <class P>
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
channel_state->enabled_storage_buffers[stage] = 0;
channel_state->written_storage_buffers[stage] = 0;
enabled_storage_buffers[stage] = 0;
written_storage_buffers[stage] = 0;
}
template <class P>
void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
u32 cbuf_offset, bool is_written) {
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
enabled_storage_buffers[stage] |= 1U << ssbo_index;
written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
channel_state->storage_buffers[stage][ssbo_index] =
StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
}
template <class P>
void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
channel_state->enabled_texture_buffers[stage] = 0;
channel_state->written_texture_buffers[stage] = 0;
channel_state->image_texture_buffers[stage] = 0;
enabled_texture_buffers[stage] = 0;
written_texture_buffers[stage] = 0;
image_texture_buffers[stage] = 0;
}
template <class P>
void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
u32 size, PixelFormat format, bool is_written,
bool is_image) {
channel_state->enabled_texture_buffers[stage] |= 1U << tbo_index;
channel_state->written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
enabled_texture_buffers[stage] |= 1U << tbo_index;
written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
channel_state->image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
}
channel_state->texture_buffers[stage][tbo_index] =
GetTextureBufferBinding(gpu_addr, size, format);
texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
}
template <class P>
void BufferCache<P>::UnbindComputeStorageBuffers() {
channel_state->enabled_compute_storage_buffers = 0;
channel_state->written_compute_storage_buffers = 0;
channel_state->image_compute_texture_buffers = 0;
enabled_compute_storage_buffers = 0;
written_compute_storage_buffers = 0;
image_compute_texture_buffers = 0;
}
template <class P>
void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
bool is_written) {
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
enabled_compute_storage_buffers |= 1U << ssbo_index;
written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
const auto& launch_desc = kepler_compute->launch_description;
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
const auto& cbufs = launch_desc.const_buffer_config;
const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
channel_state->compute_storage_buffers[ssbo_index] =
StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
}
template <class P>
void BufferCache<P>::UnbindComputeTextureBuffers() {
channel_state->enabled_compute_texture_buffers = 0;
channel_state->written_compute_texture_buffers = 0;
channel_state->image_compute_texture_buffers = 0;
enabled_compute_texture_buffers = 0;
written_compute_texture_buffers = 0;
image_compute_texture_buffers = 0;
}
template <class P>
void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
PixelFormat format, bool is_written, bool is_image) {
channel_state->enabled_compute_texture_buffers |= 1U << tbo_index;
channel_state->written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
enabled_compute_texture_buffers |= 1U << tbo_index;
written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
channel_state->image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
}
channel_state->compute_texture_buffers[tbo_index] =
GetTextureBufferBinding(gpu_addr, size, format);
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
}
template <class P>
@@ -681,10 +672,10 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
template <class P>
void BufferCache<P>::BindHostIndexBuffer() {
Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id];
TouchBuffer(buffer, channel_state->index_buffer.buffer_id);
const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr);
const u32 size = channel_state->index_buffer.size;
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
TouchBuffer(buffer, index_buffer.buffer_id);
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
const u32 size = index_buffer.size;
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
if constexpr (USE_MEMORY_MAPS) {
@@ -698,7 +689,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
}
} else {
SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size);
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
}
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
const u32 new_offset =
@@ -715,7 +706,7 @@ template <class P>
void BufferCache<P>::BindHostVertexBuffers() {
auto& flags = maxwell3d->dirty.flags;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = channel_state->vertex_buffers[index];
const Binding& binding = vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
@@ -738,19 +729,19 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() {
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
};
if (current_draw_indirect->include_count) {
bind_buffer(channel_state->count_buffer_binding);
bind_buffer(count_buffer_binding);
}
bind_buffer(channel_state->indirect_buffer_binding);
bind_buffer(indirect_buffer_binding);
}
template <class P>
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
u32 dirty = ~0U;
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
dirty = std::exchange(channel_state->dirty_uniform_buffers[stage], 0);
dirty = std::exchange(dirty_uniform_buffers[stage], 0);
}
u32 binding_index = 0;
ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) {
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
const bool needs_bind = ((dirty >> index) & 1) != 0;
BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
@@ -762,13 +753,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
template <class P>
void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
bool needs_bind) {
const Binding& binding = channel_state->uniform_buffers[stage][index];
const Binding& binding = uniform_buffers[stage][index];
const VAddr cpu_addr = binding.cpu_addr;
const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
size <= channel_state->uniform_buffer_skip_cache_size &&
size <= uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(cpu_addr, size);
if (use_fast_buffer) {
if constexpr (IS_OPENGL) {
@@ -776,11 +767,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
// Fast path for Nvidia
const bool should_fast_bind =
!HasFastUniformBufferBound(stage, binding_index) ||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
uniform_buffer_binding_sizes[stage][binding_index] != size;
if (should_fast_bind) {
// We only have to bind when the currently bound buffer is not the fast version
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
fast_bound_uniform_buffers[stage] |= 1U << binding_index;
uniform_buffer_binding_sizes[stage][binding_index] = size;
runtime.BindFastUniformBuffer(stage, binding_index, size);
}
const auto span = ImmediateBufferWithData(cpu_addr, size);
@@ -789,8 +780,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
}
if constexpr (IS_OPENGL) {
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
fast_bound_uniform_buffers[stage] |= 1U << binding_index;
uniform_buffer_binding_sizes[stage][binding_index] = size;
}
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
@@ -800,15 +791,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
// Classic cached path
const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size);
if (sync_cached) {
++channel_state->uniform_cache_hits[0];
++uniform_cache_hits[0];
}
++channel_state->uniform_cache_shots[0];
++uniform_cache_shots[0];
// Skip binding if it's not needed and if the bound buffer is not the fast version
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
needs_bind |= HasFastUniformBufferBound(stage, binding_index);
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
}
if (!needs_bind) {
return;
@@ -816,14 +807,14 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const u32 offset = buffer.Offset(cpu_addr);
if constexpr (IS_OPENGL) {
// Fast buffer will be unbound
channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
// Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
}
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
uniform_buffer_binding_sizes[stage][binding_index] = size;
}
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
@@ -835,15 +826,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
template <class P>
void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
u32 binding_index = 0;
ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
const Binding& binding = channel_state->storage_buffers[stage][index];
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
const Binding& binding = storage_buffers[stage][index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
++binding_index;
@@ -855,8 +846,8 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
template <class P>
void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index];
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
const TextureBufferBinding& binding = texture_buffers[stage][index];
Buffer& buffer = slot_buffers[binding.buffer_id];
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -864,7 +855,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
if (((image_texture_buffers[stage] >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format);
} else {
runtime.BindTextureBuffer(buffer, offset, size, format);
@@ -881,7 +872,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
return;
}
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
const Binding& binding = channel_state->transform_feedback_buffers[index];
const Binding& binding = transform_feedback_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const u32 size = binding.size;
@@ -896,16 +887,15 @@ template <class P>
void BufferCache<P>::BindHostComputeUniformBuffers() {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
// Mark all uniform buffers as dirty
channel_state->dirty_uniform_buffers.fill(~u32{0});
channel_state->fast_bound_uniform_buffers.fill(0);
dirty_uniform_buffers.fill(~u32{0});
fast_bound_uniform_buffers.fill(0);
}
u32 binding_index = 0;
ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) {
const Binding& binding = channel_state->compute_uniform_buffers[index];
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
const Binding& binding = compute_uniform_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const u32 size =
std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -921,16 +911,15 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
template <class P>
void BufferCache<P>::BindHostComputeStorageBuffers() {
u32 binding_index = 0;
ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
const Binding& binding = channel_state->compute_storage_buffers[index];
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
const Binding& binding = compute_storage_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
const bool is_written =
((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
++binding_index;
@@ -942,8 +931,8 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
template <class P>
void BufferCache<P>::BindHostComputeTextureBuffers() {
ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index];
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
const TextureBufferBinding& binding = compute_texture_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -951,7 +940,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
if (((image_compute_texture_buffers >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format);
} else {
runtime.BindTextureBuffer(buffer, offset, size, format);
@@ -965,7 +954,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
template <class P>
void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
do {
channel_state->has_deleted_buffers = false;
has_deleted_buffers = false;
if (is_indexed) {
UpdateIndexBuffer();
}
@@ -979,7 +968,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
if (current_draw_indirect) {
UpdateDrawIndirect();
}
} while (channel_state->has_deleted_buffers);
} while (has_deleted_buffers);
}
template <class P>
@@ -1010,7 +999,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
slot_buffers.erase(inline_buffer_id);
inline_buffer_id = CreateBuffer(0, buffer_size);
}
channel_state->index_buffer = Binding{
index_buffer = Binding{
.cpu_addr = 0,
.size = inline_index_size,
.buffer_id = inline_buffer_id,
@@ -1026,10 +1015,10 @@ void BufferCache<P>::UpdateIndexBuffer() {
(index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
const u32 size = std::min(address_size, draw_size);
if (size == 0 || !cpu_addr) {
channel_state->index_buffer = NULL_BINDING;
index_buffer = NULL_BINDING;
return;
}
channel_state->index_buffer = Binding{
index_buffer = Binding{
.cpu_addr = *cpu_addr,
.size = size,
.buffer_id = FindBuffer(*cpu_addr, size),
@@ -1062,13 +1051,13 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
u32 size = address_size; // TODO: Analyze stride and number of vertices
if (array.enable == 0 || size == 0 || !cpu_addr) {
channel_state->vertex_buffers[index] = NULL_BINDING;
vertex_buffers[index] = NULL_BINDING;
return;
}
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
}
channel_state->vertex_buffers[index] = Binding{
vertex_buffers[index] = Binding{
.cpu_addr = *cpu_addr,
.size = size,
.buffer_id = FindBuffer(*cpu_addr, size),
@@ -1090,24 +1079,23 @@ void BufferCache<P>::UpdateDrawIndirect() {
};
};
if (current_draw_indirect->include_count) {
update(current_draw_indirect->count_start_address, sizeof(u32),
channel_state->count_buffer_binding);
update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
}
update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
channel_state->indirect_buffer_binding);
indirect_buffer_binding);
}
template <class P>
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) {
Binding& binding = channel_state->uniform_buffers[stage][index];
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
Binding& binding = uniform_buffers[stage][index];
if (binding.buffer_id) {
// Already updated
return;
}
// Mark as dirty
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->dirty_uniform_buffers[stage] |= 1U << index;
dirty_uniform_buffers[stage] |= 1U << index;
}
// Resolve buffer
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
@@ -1116,10 +1104,10 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
template <class P>
void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
const u32 written_mask = channel_state->written_storage_buffers[stage];
ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
const u32 written_mask = written_storage_buffers[stage];
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
// Resolve buffer
Binding& binding = channel_state->storage_buffers[stage][index];
Binding& binding = storage_buffers[stage][index];
const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
binding.buffer_id = buffer_id;
// Mark buffer as written if needed
@@ -1131,11 +1119,11 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
template <class P>
void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
Binding& binding = channel_state->texture_buffers[stage][index];
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
Binding& binding = texture_buffers[stage][index];
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
// Mark buffer as written if needed
if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) {
if (((written_texture_buffers[stage] >> index) & 1) != 0) {
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
}
});
@@ -1158,11 +1146,11 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
const u32 size = binding.size;
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (binding.enable == 0 || size == 0 || !cpu_addr) {
channel_state->transform_feedback_buffers[index] = NULL_BINDING;
transform_feedback_buffers[index] = NULL_BINDING;
return;
}
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
channel_state->transform_feedback_buffers[index] = Binding{
transform_feedback_buffers[index] = Binding{
.cpu_addr = *cpu_addr,
.size = size,
.buffer_id = buffer_id,
@@ -1172,8 +1160,8 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
template <class P>
void BufferCache<P>::UpdateComputeUniformBuffers() {
ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) {
Binding& binding = channel_state->compute_uniform_buffers[index];
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
Binding& binding = compute_uniform_buffers[index];
binding = NULL_BINDING;
const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
@@ -1190,12 +1178,12 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
template <class P>
void BufferCache<P>::UpdateComputeStorageBuffers() {
ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
// Resolve buffer
Binding& binding = channel_state->compute_storage_buffers[index];
Binding& binding = compute_storage_buffers[index];
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
// Mark as written if needed
if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) {
if (((written_compute_storage_buffers >> index) & 1) != 0) {
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
}
});
@@ -1203,11 +1191,11 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
template <class P>
void BufferCache<P>::UpdateComputeTextureBuffers() {
ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
Binding& binding = channel_state->compute_texture_buffers[index];
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
Binding& binding = compute_texture_buffers[index];
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
// Mark as written if needed
if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) {
if (((written_compute_texture_buffers >> index) & 1) != 0) {
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
}
});
@@ -1622,13 +1610,13 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
const auto replace = [scalar_replace](std::span<Binding> bindings) {
std::ranges::for_each(bindings, scalar_replace);
};
scalar_replace(channel_state->index_buffer);
replace(channel_state->vertex_buffers);
std::ranges::for_each(channel_state->uniform_buffers, replace);
std::ranges::for_each(channel_state->storage_buffers, replace);
replace(channel_state->transform_feedback_buffers);
replace(channel_state->compute_uniform_buffers);
replace(channel_state->compute_storage_buffers);
scalar_replace(index_buffer);
replace(vertex_buffers);
std::ranges::for_each(uniform_buffers, replace);
std::ranges::for_each(storage_buffers, replace);
replace(transform_feedback_buffers);
replace(compute_uniform_buffers);
replace(compute_storage_buffers);
// Mark the whole buffer as CPU written to stop tracking CPU writes
if (!do_not_mark) {
@@ -1646,8 +1634,8 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
template <class P>
void BufferCache<P>::NotifyBufferDeletion() {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->dirty_uniform_buffers.fill(~u32{0});
channel_state->uniform_buffer_binding_sizes.fill({});
dirty_uniform_buffers.fill(~u32{0});
uniform_buffer_binding_sizes.fill({});
}
auto& flags = maxwell3d->dirty.flags;
flags[Dirty::IndexBuffer] = true;
@@ -1655,12 +1643,13 @@ void BufferCache<P>::NotifyBufferDeletion() {
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
flags[Dirty::VertexBuffer0 + index] = true;
}
channel_state->has_deleted_buffers = true;
has_deleted_buffers = true;
}
template <class P>
Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
bool is_written) const {
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
u32 cbuf_index,
bool is_written) const {
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
const auto size = [&]() {
const bool is_nvn_cbuf = cbuf_index == 0;
@@ -1675,7 +1664,7 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
// cbufs, which do not store the sizes adjacent to the addresses, so use the fully
// mapped buffer size for now.
const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
return std::min(memory_layout_size, static_cast<u32>(8_MiB));
return memory_layout_size;
}();
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr || size == 0) {
@@ -1692,8 +1681,8 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
}
template <class P>
TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
PixelFormat format) {
typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
GPUVAddr gpu_addr, u32 size, PixelFormat format) {
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
TextureBufferBinding binding;
if (!cpu_addr || size == 0) {
@@ -1732,7 +1721,7 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
template <class P>
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
if constexpr (IS_OPENGL) {
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
} else {
// Only OpenGL has fast uniform buffers
return false;
@@ -1741,14 +1730,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
template <class P>
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id];
return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr));
auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
}
template <class P>
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id];
return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr));
auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
}
} // namespace VideoCommon

View File

@@ -86,78 +86,8 @@ enum class ObtainBufferOperation : u32 {
MarkQuery = 3,
};
static constexpr BufferId NULL_BUFFER_ID{0};
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
struct Binding {
VAddr cpu_addr{};
u32 size{};
BufferId buffer_id;
};
struct TextureBufferBinding : Binding {
PixelFormat format;
};
static constexpr Binding NULL_BINDING{
.cpu_addr = 0,
.size = 0,
.buffer_id = NULL_BUFFER_ID,
};
class BufferCacheChannelInfo : public ChannelInfo {
public:
BufferCacheChannelInfo() = delete;
BufferCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept : ChannelInfo(state) {}
BufferCacheChannelInfo(const BufferCacheChannelInfo& state) = delete;
BufferCacheChannelInfo& operator=(const BufferCacheChannelInfo&) = delete;
Binding index_buffer;
std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
Binding count_buffer_binding;
Binding indirect_buffer_binding;
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
u32 enabled_compute_uniform_buffer_mask = 0;
const UniformBufferSizes* uniform_buffer_sizes{};
const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
std::array<u32, NUM_STAGES> enabled_storage_buffers{};
std::array<u32, NUM_STAGES> written_storage_buffers{};
u32 enabled_compute_storage_buffers = 0;
u32 written_compute_storage_buffers = 0;
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
std::array<u32, NUM_STAGES> written_texture_buffers{};
std::array<u32, NUM_STAGES> image_texture_buffers{};
u32 enabled_compute_texture_buffers = 0;
u32 written_compute_texture_buffers = 0;
u32 image_compute_texture_buffers = 0;
std::array<u32, 16> uniform_cache_hits{};
std::array<u32, 16> uniform_cache_shots{};
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
bool has_deleted_buffers = false;
std::array<u32, NUM_STAGES> dirty_uniform_buffers{};
std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>
uniform_buffer_binding_sizes{};
};
template <class P>
class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInfo> {
template <typename P>
class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
// Page size for caching purposes.
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
static constexpr u32 CACHING_PAGEBITS = 16;
@@ -174,6 +104,8 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
static constexpr BufferId NULL_BUFFER_ID{0};
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
@@ -217,6 +149,8 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
using OverlapSection = boost::icl::inter_section<int>;
using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
struct Empty {};
struct OverlapResult {
std::vector<BufferId> ids;
VAddr begin;
@@ -224,7 +158,25 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
bool has_stream_leap = false;
};
struct Binding {
VAddr cpu_addr{};
u32 size{};
BufferId buffer_id;
};
struct TextureBufferBinding : Binding {
PixelFormat format;
};
static constexpr Binding NULL_BINDING{
.cpu_addr = 0,
.size = 0,
.buffer_id = NULL_BUFFER_ID,
};
public:
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
@@ -544,6 +496,51 @@ private:
u32 last_index_count = 0;
Binding index_buffer;
std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
Binding count_buffer_binding;
Binding indirect_buffer_binding;
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
u32 enabled_compute_uniform_buffer_mask = 0;
const UniformBufferSizes* uniform_buffer_sizes{};
const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
std::array<u32, NUM_STAGES> enabled_storage_buffers{};
std::array<u32, NUM_STAGES> written_storage_buffers{};
u32 enabled_compute_storage_buffers = 0;
u32 written_compute_storage_buffers = 0;
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
std::array<u32, NUM_STAGES> written_texture_buffers{};
std::array<u32, NUM_STAGES> image_texture_buffers{};
u32 enabled_compute_texture_buffers = 0;
u32 written_compute_texture_buffers = 0;
u32 image_compute_texture_buffers = 0;
std::array<u32, 16> uniform_cache_hits{};
std::array<u32, 16> uniform_cache_shots{};
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
bool has_deleted_buffers = false;
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
dirty_uniform_buffers{};
std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
uniform_buffer_binding_sizes{};
MemoryTracker memory_tracker;
IntervalSet uncommitted_ranges;
IntervalSet common_ranges;

View File

@@ -117,7 +117,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
for (auto& stage_uniforms : fast_uniforms) {
for (OGLBuffer& buffer : stage_uniforms) {
buffer.Create();
glNamedBufferData(buffer.handle, VideoCommon::DEFAULT_SKIP_CACHE_SIZE, nullptr,
glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr,
GL_STREAM_DRAW);
}
}

View File

@@ -233,8 +233,6 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
const VideoCommon::ImageInfo& info) {
if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) {
return Settings::values.accelerate_astc.GetValue() &&
Settings::values.astc_recompression.GetValue() ==
Settings::AstcRecompression::Uncompressed &&
!Settings::values.async_astc.GetValue();
}
// Disable other accelerated uploads for now as they don't implement swizzled uploads
@@ -439,19 +437,6 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
return GL_R32UI;
}
[[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) {
switch (Settings::values.astc_recompression.GetValue()) {
case Settings::AstcRecompression::Bc1:
return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT : GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
break;
case Settings::AstcRecompression::Bc3:
return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT : GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
break;
default:
return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
}
}
} // Anonymous namespace
ImageBufferMap::~ImageBufferMap() {
@@ -754,16 +739,9 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,
if (IsConverted(runtime->device, info.format, info.type)) {
flags |= ImageFlagBits::Converted;
flags |= ImageFlagBits::CostlyLoad;
const bool is_srgb = IsPixelFormatSRGB(info.format);
gl_internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
if (IsPixelFormatASTC(info.format)) {
gl_internal_format = SelectAstcFormat(info.format, is_srgb);
gl_format = GL_NONE;
}
} else {
const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
gl_internal_format = tuple.internal_format;
@@ -1152,12 +1130,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
views{runtime.null_image_views} {
const Device& device = runtime.device;
if (True(image.flags & ImageFlagBits::Converted)) {
const bool is_srgb = IsPixelFormatSRGB(info.format);
internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
if (IsPixelFormatASTC(info.format)) {
internal_format = SelectAstcFormat(info.format, is_srgb);
}
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
} else {
internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
}

View File

@@ -144,10 +144,6 @@ public:
return state_tracker;
}
void BarrierFeedbackLoop() const noexcept {
// OpenGL does not require a barrier for attachment feedback loops.
}
private:
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);

View File

@@ -6,7 +6,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/surface.h"
@@ -238,25 +237,14 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
PixelFormat pixel_format) {
ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples));
FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)];
// Transcode on hardware that doesn't support ASTC natively
// Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
switch (Settings::values.astc_recompression.GetValue()) {
case Settings::AstcRecompression::Uncompressed:
if (is_srgb) {
tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
} else {
tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
tuple.usage |= Storage;
}
break;
case Settings::AstcRecompression::Bc1:
tuple.format = is_srgb ? VK_FORMAT_BC1_RGBA_SRGB_BLOCK : VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
break;
case Settings::AstcRecompression::Bc3:
tuple.format = is_srgb ? VK_FORMAT_BC3_SRGB_BLOCK : VK_FORMAT_BC3_UNORM_BLOCK;
break;
if (is_srgb) {
tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
} else {
tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
tuple.usage |= Storage;
}
}
const bool attachable = (tuple.usage & Attachable) != 0;

View File

@@ -481,13 +481,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
texture_cache.UpdateRenderTargets(false);
texture_cache.CheckFeedbackLoop(views);
ConfigureDraw(rescaling, render_area);
}
void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
const RenderAreaPushConstant& render_area) {
texture_cache.UpdateRenderTargets(false);
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
if (!is_built.load(std::memory_order::relaxed)) {

View File

@@ -10,16 +10,11 @@
namespace Vulkan {
constexpr u64 FENCE_RESERVE_SIZE = 8;
MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) {
if (!device.HasTimelineSemaphore()) {
static constexpr VkFenceCreateInfo fence_ci{
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
free_queue.resize(FENCE_RESERVE_SIZE);
std::ranges::generate(free_queue,
[&] { return device.GetLogical().CreateFence(fence_ci); });
wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); });
fence = device.GetLogical().CreateFence(fence_ci);
return;
}
@@ -172,53 +167,16 @@ VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphor
.pSignalSemaphores = &signal_semaphore,
};
auto fence = GetFreeFence();
auto result = device.GetGraphicsQueue().Submit(submit_info, *fence);
if (result == VK_SUCCESS) {
std::scoped_lock lock{wait_mutex};
wait_queue.emplace(host_tick, std::move(fence));
wait_cv.notify_one();
fence.Wait();
fence.Reset();
gpu_tick.store(host_tick);
gpu_tick.notify_all();
}
return result;
}
void MasterSemaphore::WaitThread(std::stop_token token) {
while (!token.stop_requested()) {
u64 host_tick;
vk::Fence fence;
{
std::unique_lock lock{wait_mutex};
Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); });
if (token.stop_requested()) {
return;
}
std::tie(host_tick, fence) = std::move(wait_queue.front());
wait_queue.pop();
}
fence.Wait();
fence.Reset();
gpu_tick.store(host_tick);
gpu_tick.notify_all();
std::scoped_lock lock{free_mutex};
free_queue.push_front(std::move(fence));
}
}
vk::Fence MasterSemaphore::GetFreeFence() {
std::scoped_lock lock{free_mutex};
if (free_queue.empty()) {
static constexpr VkFenceCreateInfo fence_ci{
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
return device.GetLogical().CreateFence(fence_ci);
}
auto fence = std::move(free_queue.back());
free_queue.pop_back();
return fence;
}
} // namespace Vulkan

View File

@@ -5,10 +5,8 @@
#include <atomic>
#include <condition_variable>
#include <deque>
#include <mutex>
#include <thread>
#include <queue>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
@@ -19,8 +17,6 @@ namespace Vulkan {
class Device;
class MasterSemaphore {
using Waitable = std::pair<u64, vk::Fence>;
public:
explicit MasterSemaphore(const Device& device);
~MasterSemaphore();
@@ -61,22 +57,13 @@ private:
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick);
void WaitThread(std::stop_token token);
vk::Fence GetFreeFence();
private:
const Device& device; ///< Device.
vk::Fence fence; ///< Fence.
vk::Semaphore semaphore; ///< Timeline semaphore.
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
std::atomic<u64> current_tick{1}; ///< Current logical tick.
std::mutex wait_mutex;
std::mutex free_mutex;
std::condition_variable_any wait_cv;
std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread.
std::deque<vk::Fence> free_queue; ///< Holds available fences for submission.
std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
std::jthread wait_thread; ///< Helper thread that waits for submitted fences.
};
} // namespace Vulkan

View File

@@ -34,8 +34,8 @@ VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats)
return found != formats.end() ? *found : formats[0];
}
static VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox,
bool has_fifo_relaxed) {
static constexpr VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox,
bool has_fifo_relaxed) {
// Mailbox doesn't lock the application like FIFO (vsync)
// FIFO present mode locks the framerate to the monitor's refresh rate
Settings::VSyncMode setting = [has_imm, has_mailbox]() {

View File

@@ -861,10 +861,6 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
return *buffers[level];
}
void TextureCacheRuntime::BarrierFeedbackLoop() {
scheduler.RequestOutsideRenderPassOperationContext();
}
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
@@ -1272,9 +1268,7 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
if (Settings::values.async_astc.GetValue()) {
flags |= VideoCommon::ImageFlagBits::AsynchronousDecode;
} else if (Settings::values.astc_recompression.GetValue() ==
Settings::AstcRecompression::Uncompressed &&
Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) {
} else if (Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) {
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
}
flags |= VideoCommon::ImageFlagBits::Converted;
@@ -1289,9 +1283,7 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
.usage = VK_IMAGE_USAGE_STORAGE_BIT,
};
current_image = *original_image;
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported() &&
Settings::values.astc_recompression.GetValue() ==
Settings::AstcRecompression::Uncompressed) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
const auto& device = runtime->device.GetLogical();
storage_image_views.reserve(info.resources.levels);
for (s32 level = 0; level < info.resources.levels; ++level) {

View File

@@ -103,8 +103,6 @@ public:
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
void BarrierFeedbackLoop();
const Device& device;
Scheduler& scheduler;
MemoryAllocator& memory_allocator;

View File

@@ -155,7 +155,7 @@ void ImageBase::CheckAliasState() {
flags &= ~ImageFlagBits::Alias;
}
bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
ASSERT(lhs.info.type == rhs.info.type);
std::optional<SubresourceBase> base;
@@ -169,7 +169,7 @@ bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
}
if (!base) {
LOG_ERROR(HW_GPU, "Image alias should have been flipped");
return false;
return;
}
const PixelFormat lhs_format = lhs.info.format;
const PixelFormat rhs_format = rhs.info.format;
@@ -248,13 +248,12 @@ bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
}
ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
if (lhs_alias.copies.empty()) {
return false;
return;
}
lhs.aliased_images.push_back(std::move(lhs_alias));
rhs.aliased_images.push_back(std::move(rhs_alias));
lhs.flags &= ~ImageFlagBits::IsRescalable;
rhs.flags &= ~ImageFlagBits::IsRescalable;
return true;
}
} // namespace VideoCommon

View File

@@ -142,6 +142,6 @@ struct ImageAllocBase {
std::vector<ImageId> images;
};
bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
} // namespace VideoCommon

View File

@@ -49,8 +49,8 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
if constexpr (HAS_DEVICE_MEMORY_INFO) {
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const s64 min_spacing_expected = device_memory - 1_GiB;
const s64 min_spacing_critical = device_memory - 512_MiB;
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
const s64 min_spacing_critical = device_memory - 1_GiB;
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
@@ -86,12 +86,10 @@ void TextureCache<P>::RunGarbageCollector() {
// used by the async decoder thread.
return false;
}
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
return false;
}
const bool must_download =
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
if (!high_priority_mode && must_download) {
if (!high_priority_mode &&
(must_download || True(image.flags & ImageFlagBits::CostlyLoad))) {
return false;
}
if (must_download) {
@@ -139,6 +137,7 @@ void TextureCache<P>::TickFrame() {
TickAsyncDecode();
runtime.TickFrame();
critical_gc = 0;
++frame_tick;
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
@@ -184,42 +183,6 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
views);
}
template <class P>
void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
const bool requires_barrier = [&] {
for (const auto& view : views) {
if (!view.id) {
continue;
}
auto& image_view = slot_image_views[view.id];
// Check color targets
for (const auto& ct_view_id : render_targets.color_buffer_ids) {
if (ct_view_id) {
auto& ct_view = slot_image_views[ct_view_id];
if (image_view.image_id == ct_view.image_id) {
return true;
}
}
}
// Check zeta target
if (render_targets.depth_buffer_id) {
auto& zt_view = slot_image_views[render_targets.depth_buffer_id];
if (image_view.image_id == zt_view.image_id) {
return true;
}
}
}
return false;
}();
if (requires_barrier) {
runtime.BarrierFeedbackLoop();
}
}
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
if (index > channel_state->graphics_sampler_table.Limit()) {
@@ -1311,18 +1274,17 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
join_overlap_ids.clear();
join_overlaps_found.clear();
join_left_aliased_ids.clear();
join_right_aliased_ids.clear();
join_ignore_textures.clear();
join_bad_overlap_ids.clear();
join_copies_to_do.clear();
join_alias_indices.clear();
boost::container::small_vector<ImageId, 4> overlap_ids;
std::unordered_set<ImageId> overlaps_found;
boost::container::small_vector<ImageId, 4> left_aliased_ids;
boost::container::small_vector<ImageId, 4> right_aliased_ids;
std::unordered_set<ImageId> ignore_textures;
boost::container::small_vector<ImageId, 4> bad_overlap_ids;
boost::container::small_vector<ImageId, 4> all_siblings;
const bool this_is_linear = info.type == ImageType::Linear;
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
if (True(overlap.flags & ImageFlagBits::Remapped)) {
join_ignore_textures.insert(overlap_id);
ignore_textures.insert(overlap_id);
return;
}
const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
@@ -1332,11 +1294,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
if (this_is_linear && overlap_is_linear) {
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
// Alias linear images with the same pitch
join_left_aliased_ids.push_back(overlap_id);
left_aliased_ids.push_back(overlap_id);
}
return;
}
join_overlaps_found.insert(overlap_id);
overlaps_found.insert(overlap_id);
static constexpr bool strict_size = true;
const std::optional<OverlapResult> solution = ResolveOverlap(
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1344,33 +1306,33 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
gpu_addr = solution->gpu_addr;
cpu_addr = solution->cpu_addr;
new_info.resources = solution->resources;
join_overlap_ids.push_back(overlap_id);
join_copies_to_do.emplace_back(JoinCopy{false, overlap_id});
overlap_ids.push_back(overlap_id);
all_siblings.push_back(overlap_id);
return;
}
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
join_left_aliased_ids.push_back(overlap_id);
left_aliased_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::Alias;
join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
all_siblings.push_back(overlap_id);
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
broken_views, native_bgr)) {
join_right_aliased_ids.push_back(overlap_id);
right_aliased_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::Alias;
join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
all_siblings.push_back(overlap_id);
} else {
join_bad_overlap_ids.push_back(overlap_id);
bad_overlap_ids.push_back(overlap_id);
}
};
ForEachImageInRegion(cpu_addr, size_bytes, region_check);
const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
if (!join_overlaps_found.contains(overlap_id)) {
if (!overlaps_found.contains(overlap_id)) {
if (True(overlap.flags & ImageFlagBits::Remapped)) {
join_ignore_textures.insert(overlap_id);
ignore_textures.insert(overlap_id);
}
if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
join_ignore_textures.insert(overlap_id);
ignore_textures.insert(overlap_id);
}
}
};
@@ -1378,11 +1340,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
bool can_rescale = info.rescaleable;
bool any_rescaled = false;
for (const auto& copy : join_copies_to_do) {
for (const ImageId sibling_id : all_siblings) {
if (!can_rescale) {
break;
}
Image& sibling = slot_images[copy.id];
Image& sibling = slot_images[sibling_id];
can_rescale &= ImageCanRescale(sibling);
any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
}
@@ -1390,13 +1352,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
can_rescale &= any_rescaled;
if (can_rescale) {
for (const auto& copy : join_copies_to_do) {
Image& sibling = slot_images[copy.id];
for (const ImageId sibling_id : all_siblings) {
Image& sibling = slot_images[sibling_id];
ScaleUp(sibling);
}
} else {
for (const auto& copy : join_copies_to_do) {
Image& sibling = slot_images[copy.id];
for (const ImageId sibling_id : all_siblings) {
Image& sibling = slot_images[sibling_id];
ScaleDown(sibling);
}
}
@@ -1408,7 +1370,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
new_image.flags |= ImageFlagBits::Sparse;
}
for (const ImageId overlap_id : join_ignore_textures) {
for (const ImageId overlap_id : ignore_textures) {
Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
UNIMPLEMENTED();
@@ -1429,60 +1391,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
ScaleDown(new_image);
}
std::ranges::sort(join_copies_to_do, [this](const JoinCopy& lhs, const JoinCopy& rhs) {
const ImageBase& lhs_image = slot_images[lhs.id];
const ImageBase& rhs_image = slot_images[rhs.id];
std::ranges::sort(overlap_ids, [this](const ImageId lhs, const ImageId rhs) {
const ImageBase& lhs_image = slot_images[lhs];
const ImageBase& rhs_image = slot_images[rhs];
return lhs_image.modification_tick < rhs_image.modification_tick;
});
ImageBase& new_image_base = new_image;
for (const ImageId aliased_id : join_right_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
size_t alias_index = new_image_base.aliased_images.size();
if (!AddImageAlias(new_image_base, aliased, new_image_id, aliased_id)) {
continue;
}
join_alias_indices.emplace(aliased_id, alias_index);
new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : join_left_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
size_t alias_index = new_image_base.aliased_images.size();
if (!AddImageAlias(aliased, new_image_base, aliased_id, new_image_id)) {
continue;
}
join_alias_indices.emplace(aliased_id, alias_index);
new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : join_bad_overlap_ids) {
ImageBase& aliased = slot_images[aliased_id];
aliased.overlapping_images.push_back(new_image_id);
new_image.overlapping_images.push_back(aliased_id);
if (aliased.info.resources.levels == 1 && aliased.info.block.depth == 0 &&
aliased.overlapping_images.size() > 1) {
aliased.flags |= ImageFlagBits::BadOverlap;
}
if (new_image.info.resources.levels == 1 && new_image.info.block.depth == 0 &&
new_image.overlapping_images.size() > 1) {
new_image.flags |= ImageFlagBits::BadOverlap;
}
}
for (const auto& copy_object : join_copies_to_do) {
Image& overlap = slot_images[copy_object.id];
if (copy_object.is_alias) {
if (!overlap.IsSafeDownload()) {
continue;
}
const auto alias_pointer = join_alias_indices.find(copy_object.id);
if (alias_pointer == join_alias_indices.end()) {
continue;
}
const AliasedImage& aliased = new_image.aliased_images[alias_pointer->second];
CopyImage(new_image_id, aliased.id, aliased.copies);
new_image.modification_tick = overlap.modification_tick;
continue;
}
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
new_image.flags |= ImageFlagBits::GpuModified;
const auto& resolution = Settings::values.resolution_info;
@@ -1495,15 +1411,35 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
} else {
runtime.CopyImage(new_image, overlap, std::move(copies));
}
new_image.modification_tick = overlap.modification_tick;
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, copy_object.id);
UntrackImage(overlap, overlap_id);
}
UnregisterImage(overlap_id);
DeleteImage(overlap_id);
}
ImageBase& new_image_base = new_image;
for (const ImageId aliased_id : right_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : left_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : bad_overlap_ids) {
ImageBase& aliased = slot_images[aliased_id];
aliased.overlapping_images.push_back(new_image_id);
new_image.overlapping_images.push_back(aliased_id);
if (aliased.info.resources.levels == 1 && aliased.overlapping_images.size() > 1) {
aliased.flags |= ImageFlagBits::BadOverlap;
}
if (new_image.info.resources.levels == 1 && new_image.overlapping_images.size() > 1) {
new_image.flags |= ImageFlagBits::BadOverlap;
}
UnregisterImage(copy_object.id);
DeleteImage(copy_object.id);
}
RegisterImage(new_image_id);
return new_image_id;
}
@@ -1533,7 +1469,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag
if (!copy.must_accelerate) {
do {
if (!src_id && !dst_id) {
return std::nullopt;
break;
}
if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) {
break;
@@ -1911,6 +1847,10 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory += Common::AlignUp(tentative_size, 1024);
if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) {
RunGarbageCollector();
critical_gc++;
}
image.lru_index = lru_cache.Insert(image_id, frame_tick);
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {

View File

@@ -10,9 +10,7 @@
#include <span>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <boost/container/small_vector.hpp>
#include <queue>
#include "common/common_types.h"
@@ -150,9 +148,6 @@ public:
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<ImageViewInOut> views);
/// Handle feedback loops during draws.
void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
@@ -429,6 +424,7 @@ private:
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
size_t critical_gc;
struct BufferDownload {
GPUVAddr address;
@@ -478,20 +474,6 @@ private:
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
// Join caching
boost::container::small_vector<ImageId, 4> join_overlap_ids;
std::unordered_set<ImageId> join_overlaps_found;
boost::container::small_vector<ImageId, 4> join_left_aliased_ids;
boost::container::small_vector<ImageId, 4> join_right_aliased_ids;
std::unordered_set<ImageId> join_ignore_textures;
boost::container::small_vector<ImageId, 4> join_bad_overlap_ids;
struct JoinCopy {
bool is_alias;
ImageId id;
};
boost::container::small_vector<JoinCopy, 4> join_copies_to_do;
std::unordered_map<ImageId, size_t> join_alias_indices;
};
} // namespace VideoCommon

View File

@@ -18,8 +18,6 @@
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/scratch_buffer.h"
#include "common/settings.h"
#include "video_core/compatible_formats.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@@ -30,7 +28,6 @@
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/astc.h"
#include "video_core/textures/bcn.h"
#include "video_core/textures/decoders.h"
namespace VideoCommon {
@@ -123,9 +120,7 @@ template <u32 GOB_EXTENT>
return {
.width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
.height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
.depth = level == 0
? block_size.depth
: AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
.depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
};
}
@@ -167,13 +162,6 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
if (level == 0) {
return Extent3D{
.width = info.block.width,
.height = info.block.height,
.depth = info.block.depth,
};
}
const Extent3D blocks = NumLevelBlocks(info, level);
return Extent3D{
.width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
@@ -597,21 +585,6 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
return info.size.width * BytesPerBlock(info.format);
}
static constexpr Extent2D TILE_SIZE{1, 1};
if (IsPixelFormatASTC(info.format) && Settings::values.astc_recompression.GetValue() !=
Settings::AstcRecompression::Uncompressed) {
const u32 bpp_div =
Settings::values.astc_recompression.GetValue() == Settings::AstcRecompression::Bc1 ? 2
: 1;
// NumBlocksPerLayer doesn't account for this correctly, so we have to do it manually.
u32 output_size = 0;
for (s32 i = 0; i < info.resources.levels; i++) {
const auto mip_size = AdjustMipSize(info.size, i);
const u32 plane_dim =
Common::AlignUp(mip_size.width, 4U) * Common::AlignUp(mip_size.height, 4U);
output_size += (plane_dim * info.size.depth * info.resources.layers) / bpp_div;
}
return output_size;
}
return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
}
@@ -912,7 +885,6 @@ BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies) {
u32 output_offset = 0;
Common::ScratchBuffer<u8> decode_scratch;
const Extent2D tile_size = DefaultBlockSize(info.format);
for (BufferImageCopy& copy : copies) {
@@ -923,58 +895,22 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
ASSERT(copy.image_extent == mip_size);
ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
const auto input_offset = input.subspan(copy.buffer_offset);
if (IsPixelFormatASTC(info.format)) {
Tegra::Texture::ASTC::Decompress(
input.subspan(copy.buffer_offset), copy.image_extent.width,
copy.image_extent.height,
copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
tile_size.height, output.subspan(output_offset));
} else {
DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
output.subspan(output_offset));
}
copy.buffer_offset = output_offset;
copy.buffer_row_length = mip_size.width;
copy.buffer_image_height = mip_size.height;
const auto recompression_setting = Settings::values.astc_recompression.GetValue();
const bool astc = IsPixelFormatASTC(info.format);
if (astc && recompression_setting == Settings::AstcRecompression::Uncompressed) {
Tegra::Texture::ASTC::Decompress(
input_offset, copy.image_extent.width, copy.image_extent.height,
copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
tile_size.height, output.subspan(output_offset));
output_offset += copy.image_extent.width * copy.image_extent.height *
copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
} else if (astc) {
// BC1 uses 0.5 bytes per texel
// BC3 uses 1 byte per texel
const auto compress = recompression_setting == Settings::AstcRecompression::Bc1
? Tegra::Texture::BCN::CompressBC1
: Tegra::Texture::BCN::CompressBC3;
const auto bpp_div = recompression_setting == Settings::AstcRecompression::Bc1 ? 2 : 1;
const u32 plane_dim = copy.image_extent.width * copy.image_extent.height;
const u32 level_size = plane_dim * copy.image_extent.depth *
copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
decode_scratch.resize_destructive(level_size);
Tegra::Texture::ASTC::Decompress(
input_offset, copy.image_extent.width, copy.image_extent.height,
copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
tile_size.height, decode_scratch);
compress(decode_scratch, copy.image_extent.width, copy.image_extent.height,
copy.image_subresource.num_layers * copy.image_extent.depth,
output.subspan(output_offset));
const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) *
Common::AlignUp(copy.image_extent.height, 4);
copy.buffer_size =
(aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) /
bpp_div;
output_offset += static_cast<u32>(copy.buffer_size);
} else {
DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset));
output_offset += copy.image_extent.width * copy.image_extent.height *
copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
}
output_offset += copy.image_extent.width * copy.image_extent.height *
copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
}
}
@@ -1297,9 +1233,7 @@ u32 MapSizeBytes(const ImageBase& image) {
static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) ==
0x7f8000);
static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x40000);
static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0}, 0) == 0x40000);
static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
0x2afc00);

View File

@@ -16,8 +16,8 @@
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/polyfill_ranges.h"
#include "common/thread_worker.h"
#include "video_core/textures/astc.h"
#include "video_core/textures/workers.h"
class InputBitStream {
public:
@@ -1656,7 +1656,8 @@ void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height,
const u32 rows = Common::DivideUp(height, block_height);
const u32 cols = Common::DivideUp(width, block_width);
Common::ThreadWorker& workers{GetThreadWorkers()};
static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
"ASTCDecompress"};
for (u32 z = 0; z < depth; ++z) {
const u32 depth_offset = z * height * width * 4;

View File

@@ -1,87 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <stb_dxt.h>
#include <string.h>
#include "common/alignment.h"
#include "video_core/textures/bcn.h"
#include "video_core/textures/workers.h"
namespace Tegra::Texture::BCN {
using BCNCompressor = void(u8* block_output, const u8* block_input, bool any_alpha);
template <u32 BytesPerBlock, bool ThresholdAlpha = false>
void CompressBCN(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
std::span<uint8_t> output, BCNCompressor f) {
constexpr u8 alpha_threshold = 128;
constexpr u32 bytes_per_px = 4;
const u32 plane_dim = width * height;
Common::ThreadWorker& workers{GetThreadWorkers()};
for (u32 z = 0; z < depth; z++) {
for (u32 y = 0; y < height; y += 4) {
auto compress_row = [z, y, width, height, plane_dim, f, data, output]() {
for (u32 x = 0; x < width; x += 4) {
// Gather 4x4 block of RGBA texels
u8 input_colors[4][4][4];
bool any_alpha = false;
for (u32 j = 0; j < 4; j++) {
for (u32 i = 0; i < 4; i++) {
const size_t coord =
(z * plane_dim + (y + j) * width + (x + i)) * bytes_per_px;
if ((x + i < width) && (y + j < height)) {
if constexpr (ThresholdAlpha) {
if (data[coord + 3] >= alpha_threshold) {
input_colors[j][i][0] = data[coord + 0];
input_colors[j][i][1] = data[coord + 1];
input_colors[j][i][2] = data[coord + 2];
input_colors[j][i][3] = 255;
} else {
any_alpha = true;
memset(input_colors[j][i], 0, bytes_per_px);
}
} else {
memcpy(input_colors[j][i], &data[coord], bytes_per_px);
}
} else {
memset(input_colors[j][i], 0, bytes_per_px);
}
}
}
const u32 bytes_per_row = BytesPerBlock * Common::DivideUp(width, 4U);
const u32 bytes_per_plane = bytes_per_row * Common::DivideUp(height, 4U);
f(output.data() + z * bytes_per_plane + (y / 4) * bytes_per_row +
(x / 4) * BytesPerBlock,
reinterpret_cast<u8*>(input_colors), any_alpha);
}
};
workers.QueueWork(std::move(compress_row));
}
workers.WaitForRequests();
}
}
void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
std::span<uint8_t> output) {
CompressBCN<8, true>(data, width, height, depth, output,
[](u8* block_output, const u8* block_input, bool any_alpha) {
stb_compress_bc1_block(block_output, block_input, any_alpha,
STB_DXT_NORMAL);
});
}
void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
std::span<uint8_t> output) {
CompressBCN<16, false>(data, width, height, depth, output,
[](u8* block_output, const u8* block_input, bool any_alpha) {
stb_compress_bc3_block(block_output, block_input, STB_DXT_NORMAL);
});
}
} // namespace Tegra::Texture::BCN

View File

@@ -1,17 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <stdint.h>
namespace Tegra::Texture::BCN {
void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
std::span<uint8_t> output);
void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
std::span<uint8_t> output);
} // namespace Tegra::Texture::BCN

View File

@@ -1,15 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/textures/workers.h"
namespace Tegra::Texture {
Common::ThreadWorker& GetThreadWorkers() {
static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
"ImageTranscode"};
return workers;
}
} // namespace Tegra::Texture

View File

@@ -1,12 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/thread_worker.h"
namespace Tegra::Texture {
Common::ThreadWorker& GetThreadWorkers();
}

View File

@@ -406,14 +406,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
dynamic_state3_blending = false;
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 23, 1, 0)) {
LOG_WARNING(Render_Vulkan,
"RADV versions older than 23.1.0 have broken depth clamp dynamic state");
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false;
dynamic_state3_enables = false;
}
}
if (extensions.vertex_input_dynamic_state && is_radv) {
// TODO(ameerj): Blacklist only offending driver versions
@@ -471,18 +463,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format");
must_emulate_bgr565 = true;
}
if (extensions.push_descriptor && is_intel_anv) {
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version >= VK_MAKE_API_VERSION(0, 22, 3, 0) &&
version < VK_MAKE_API_VERSION(0, 23, 2, 0)) {
// Disable VK_KHR_push_descriptor due to
// mesa/mesa/-/commit/ff91c5ca42bc80aa411cb3fd8f550aa6fdd16bdc
LOG_WARNING(Render_Vulkan,
"ANV drivers 22.3.0 to 23.1.0 have broken VK_KHR_push_descriptor");
extensions.push_descriptor = false;
loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
}
}
if (is_mvk) {
LOG_WARNING(Render_Vulkan,
"MVK driver breaks when using more than 16 vertex attributes/bindings");
@@ -1002,11 +982,6 @@ u64 Device::GetDeviceMemoryUsage() const {
}
void Device::CollectPhysicalMemoryInfo() {
// Account for resolution scaling in memory limits
const size_t normal_memory = 6_GiB;
const size_t scaler_memory = 1_GiB * Settings::values.resolution_info.ScaleUp(1);
// Calculate limits using memory budget
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
const auto mem_info =
@@ -1036,12 +1011,11 @@ void Device::CollectPhysicalMemoryInfo() {
if (!is_integrated) {
const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB);
device_access_memory -= reserve_memory;
device_access_memory = std::min<u64>(device_access_memory, normal_memory + scaler_memory);
return;
}
const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);
device_access_memory = static_cast<u64>(std::max<s64>(
std::min<s64>(available_memory - 8_GiB, 4_GiB), std::min<s64>(local_memory, 4_GiB)));
std::min<s64>(available_memory - 8_GiB, 4_GiB), static_cast<s64>(local_memory)));
}
void Device::CollectToolingInfo() {

View File

@@ -711,7 +711,6 @@ void Config::ReadRendererValues() {
ReadGlobalSetting(Settings::values.nvdec_emulation);
ReadGlobalSetting(Settings::values.accelerate_astc);
ReadGlobalSetting(Settings::values.async_astc);
ReadGlobalSetting(Settings::values.astc_recompression);
ReadGlobalSetting(Settings::values.use_reactive_flushing);
ReadGlobalSetting(Settings::values.shader_backend);
ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
@@ -1360,10 +1359,6 @@ void Config::SaveRendererValues() {
Settings::values.nvdec_emulation.UsingGlobal());
WriteGlobalSetting(Settings::values.accelerate_astc);
WriteGlobalSetting(Settings::values.async_astc);
WriteSetting(QString::fromStdString(Settings::values.astc_recompression.GetLabel()),
static_cast<u32>(Settings::values.astc_recompression.GetValue(global)),
static_cast<u32>(Settings::values.astc_recompression.GetDefault()),
Settings::values.astc_recompression.UsingGlobal());
WriteGlobalSetting(Settings::values.use_reactive_flushing);
WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
static_cast<u32>(Settings::values.shader_backend.GetValue(global)),

View File

@@ -208,4 +208,3 @@ Q_DECLARE_METATYPE(Settings::ScalingFilter);
Q_DECLARE_METATYPE(Settings::AntiAliasing);
Q_DECLARE_METATYPE(Settings::RendererBackend);
Q_DECLARE_METATYPE(Settings::ShaderBackend);
Q_DECLARE_METATYPE(Settings::AstcRecompression);

View File

@@ -27,7 +27,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
ui->async_present->setEnabled(runtime_lock);
ui->renderer_force_max_clock->setEnabled(runtime_lock);
ui->async_astc->setEnabled(runtime_lock);
ui->astc_recompression_combobox->setEnabled(runtime_lock);
ui->use_asynchronous_shaders->setEnabled(runtime_lock);
ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
ui->enable_compute_pipelines_checkbox->setEnabled(runtime_lock);
@@ -48,20 +47,14 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
static_cast<int>(Settings::values.gpu_accuracy.GetValue()));
ui->anisotropic_filtering_combobox->setCurrentIndex(
Settings::values.max_anisotropy.GetValue());
ui->astc_recompression_combobox->setCurrentIndex(
static_cast<int>(Settings::values.astc_recompression.GetValue()));
} else {
ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy);
ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox,
&Settings::values.max_anisotropy);
ConfigurationShared::SetPerGameSetting(ui->astc_recompression_combobox,
&Settings::values.astc_recompression);
ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
!Settings::values.gpu_accuracy.UsingGlobal());
ConfigurationShared::SetHighlight(ui->af_label,
!Settings::values.max_anisotropy.UsingGlobal());
ConfigurationShared::SetHighlight(ui->label_astc_recompression,
!Settings::values.astc_recompression.UsingGlobal());
}
}
@@ -78,8 +71,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
ui->use_reactive_flushing, use_reactive_flushing);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc,
async_astc);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.astc_recompression,
ui->astc_recompression_combobox);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
ui->use_asynchronous_shaders,
use_asynchronous_shaders);
@@ -114,8 +105,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
Settings::values.renderer_force_max_clock.UsingGlobal());
ui->use_reactive_flushing->setEnabled(Settings::values.use_reactive_flushing.UsingGlobal());
ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal());
ui->astc_recompression_combobox->setEnabled(
Settings::values.astc_recompression.UsingGlobal());
ui->use_asynchronous_shaders->setEnabled(
Settings::values.use_asynchronous_shaders.UsingGlobal());
ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
@@ -155,9 +144,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
ConfigurationShared::SetColoredComboBox(
ui->anisotropic_filtering_combobox, ui->af_label,
static_cast<int>(Settings::values.max_anisotropy.GetValue(true)));
ConfigurationShared::SetColoredComboBox(
ui->astc_recompression_combobox, ui->label_astc_recompression,
static_cast<int>(Settings::values.astc_recompression.GetValue(true)));
}
void ConfigureGraphicsAdvanced::ExposeComputeOption() {

View File

@@ -69,50 +69,6 @@
</layout>
</widget>
</item>
<item>
<widget class="QWidget" name="astc_recompression_layout" native="true">
<layout class="QHBoxLayout" name="horizontalLayout_3">
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
<item>
<widget class="QLabel" name="label_astc_recompression">
<property name="text">
<string>ASTC recompression:</string>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="astc_recompression_combobox">
<item>
<property name="text">
<string>Uncompressed (Best quality)</string>
</property>
</item>
<item>
<property name="text">
<string>BC1 (Low quality)</string>
</property>
</item>
<item>
<property name="text">
<string>BC3 (Medium quality)</string>
</property>
</item>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QCheckBox" name="async_present">
<property name="text">

View File

@@ -3,6 +3,7 @@
#include <memory>
#include <thread>
#include <QScrollArea>
#include "core/core.h"
#include "core/hid/emulated_controller.h"
@@ -112,7 +113,13 @@ void ConfigureInput::Initialize(InputCommon::InputSubsystem* input_subsystem,
for (std::size_t i = 0; i < player_tabs.size(); ++i) {
player_tabs[i]->setLayout(new QHBoxLayout(player_tabs[i]));
player_tabs[i]->layout()->addWidget(player_controllers[i]);
auto scroll_area = new QScrollArea(player_tabs[i]);
player_tabs[i]->layout()->addWidget(scroll_area);
scroll_area->setWidget(player_controllers[i]);
scroll_area->setWidgetResizable(true);
scroll_area->setFrameShape(QFrame::Shape::NoFrame);
connect(player_controllers[i], &ConfigureInputPlayer::Connected, [&, i](bool is_connected) {
// Ensures that the controllers are always connected in sequential order
if (is_connected) {

View File

@@ -10,6 +10,18 @@
<height>487</height>
</rect>
</property>
<property name="sizePolicy">
<sizepolicy hsizetype="Ignored" vsizetype="Minimum">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize">
<size>
<width>0</width>
<height>743</height>
</size>
</property>
<property name="windowTitle">
<string>Configure Input</string>
</property>
@@ -1327,22 +1339,28 @@
</property>
</widget>
</item>
<item>
<widget class="QSlider" name="sliderZLThreshold">
<property name="maximumSize">
<size>
<width>70</width>
<height>15</height>
</size>
</property>
<property name="maximum">
<number>100</number>
</property>
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item>
<widget class="QSlider" name="sliderZLThreshold">
<property name="minimumSize">
<size>
<width>0</width>
<height>20</height>
</size>
</property>
<property name="maximumSize">
<size>
<width>70</width>
<height>20</height>
</size>
</property>
<property name="maximum">
<number>100</number>
</property>
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
</layout>
</widget>
</item>
@@ -1774,22 +1792,28 @@
</property>
</widget>
</item>
<item>
<widget class="QSlider" name="sliderZRThreshold">
<property name="maximumSize">
<size>
<width>70</width>
<height>15</height>
</size>
</property>
<property name="maximum">
<number>100</number>
</property>
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item>
<widget class="QSlider" name="sliderZRThreshold">
<property name="minimumSize">
<size>
<width>0</width>
<height>20</height>
</size>
</property>
<property name="maximumSize">
<size>
<width>70</width>
<height>20</height>
</size>
</property>
<property name="maximum">
<number>100</number>
</property>
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
</layout>
</widget>
</item>

View File

@@ -318,7 +318,6 @@ void Config::ReadValues() {
ReadSetting("Renderer", Settings::values.nvdec_emulation);
ReadSetting("Renderer", Settings::values.accelerate_astc);
ReadSetting("Renderer", Settings::values.async_astc);
ReadSetting("Renderer", Settings::values.astc_recompression);
ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);

View File

@@ -360,10 +360,6 @@ accelerate_astc =
# 0 (default): Off, 1: On
async_astc =
# Recompress ASTC textures to a different format.
# 0 (default): Uncompressed, 1: BC1 (Low quality), 2: BC3: (Medium quality)
async_astc =
# Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value
# 0: Off, 1: On (default)
use_speed_limit =

View File

@@ -227,7 +227,7 @@ int main(int argc, char** argv) {
};
while (optind < argc) {
int arg = getopt_long(argc, argv, "g:fhvp::c:u:", long_options, &option_index);
int arg = getopt_long(argc, argv, "g:fhvp::c:", long_options, &option_index);
if (arg != -1) {
switch (static_cast<char>(arg)) {
case 'c':
@@ -283,7 +283,7 @@ int main(int argc, char** argv) {
break;
case 'u':
selected_user = atoi(optarg);
break;
return 0;
case 'v':
PrintVersion();
return 0;