common_types: Convert typedefs to using aliases

May as well while we're making changes to this file.
common_types: Remove unnecessary check for whether or not__func__ is defined
2018-04-19 22:26:35 -04:00 · 2018-04-19 22:25:19 -04:00 · 2018-04-19 21:09:25 -04:00 · 2018-04-19 21:09:02 -04:00 · 2018-04-19 20:12:58 -04:00 · 2018-04-19 20:09:27 -04:00
63 changed files with 3296 additions and 849 deletions
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr

 It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.

-yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu does not run any commercial Switch games. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
+yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.

 yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.

--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,6 +32,8 @@ add_library(common STATIC
    break_points.cpp
    break_points.h
    chunk_file.h
+    cityhash.cpp
+    cityhash.h
    code_block.h
    color.h
    common_funcs.h
@@ -39,7 +41,6 @@ add_library(common STATIC
    common_types.h
    file_util.cpp
    file_util.h
-    hash.cpp
    hash.h
    linear_disk_cache.h
    logging/backend.cpp
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -115,7 +115,7 @@ private:
    // assignment would copy the full storage value, rather than just the bits
    // relevant to this particular bit field.
    // We don't delete it because we want BitField to be trivially copyable.
-    BitField& operator=(const BitField&) = default;
+    constexpr BitField& operator=(const BitField&) = default;

    // StorageType is T for non-enum types and the underlying type of T if
    // T is an enumeration. Note that T is wrapped within an enable_if in the
@@ -166,20 +166,20 @@ public:
    // so that we can use this within unions
    constexpr BitField() = default;

-    FORCE_INLINE operator T() const {
+    constexpr FORCE_INLINE operator T() const {
        return Value();
    }

-    FORCE_INLINE void Assign(const T& value) {
+    constexpr FORCE_INLINE void Assign(const T& value) {
        storage = (storage & ~mask) | FormatValue(value);
    }

-    FORCE_INLINE T Value() const {
+    constexpr T Value() const {
        return ExtractValue(storage);
    }

    // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015
-    FORCE_INLINE bool ToBool() const {
+    constexpr FORCE_INLINE bool ToBool() const {
        return Value() != 0;
    }

@@ -192,11 +192,6 @@ private:
    static_assert(position < 8 * sizeof(T), "Invalid position");
    static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
    static_assert(bits > 0, "Invalid number of bits");
-    static_assert(std::is_pod<T>::value, "Invalid base type");
+    static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
 };
 #pragma pack()
-
-#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
-static_assert(std::is_trivially_copyable<BitField<0, 1, unsigned>>::value,
-              "BitField must be trivially copyable");
-#endif
--- a/src/common/cityhash.cpp
+++ b/src/common/cityhash.cpp
@@ -0,0 +1,340 @@
+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash, by Geoff Pike and Jyrki Alakuijala
+//
+// This file provides CityHash64() and related functions.
+//
+// It's probably possible to create even faster hash functions by
+// writing a program that systematically explores some of the space of
+// possible hash functions, by using SIMD instructions, or by
+// compromising on hash quality.
+
+#include <algorithm>
+#include <string.h> // for memcpy and memset
+#include "cityhash.h"
+#include "common/swap.h"
+
+// #include "config.h"
+#ifdef __GNUC__
+#define HAVE_BUILTIN_EXPECT 1
+#endif
+#ifdef COMMON_BIG_ENDIAN
+#define WORDS_BIGENDIAN 1
+#endif
+
+using namespace std;
+
+typedef uint8_t uint8;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+
+namespace Common {
+
+static uint64 UNALIGNED_LOAD64(const char* p) {
+    uint64 result;
+    memcpy(&result, p, sizeof(result));
+    return result;
+}
+
+static uint32 UNALIGNED_LOAD32(const char* p) {
+    uint32 result;
+    memcpy(&result, p, sizeof(result));
+    return result;
+}
+
+#ifdef WORDS_BIGENDIAN
+#define uint32_in_expected_order(x) (swap32(x))
+#define uint64_in_expected_order(x) (swap64(x))
+#else
+#define uint32_in_expected_order(x) (x)
+#define uint64_in_expected_order(x) (x)
+#endif
+
+#if !defined(LIKELY)
+#if HAVE_BUILTIN_EXPECT
+#define LIKELY(x) (__builtin_expect(!!(x), 1))
+#else
+#define LIKELY(x) (x)
+#endif
+#endif
+
+static uint64 Fetch64(const char* p) {
+    return uint64_in_expected_order(UNALIGNED_LOAD64(p));
+}
+
+static uint32 Fetch32(const char* p) {
+    return uint32_in_expected_order(UNALIGNED_LOAD32(p));
+}
+
+// Some primes between 2^63 and 2^64 for various uses.
+static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
+static const uint64 k1 = 0xb492b66fbe98f273ULL;
+static const uint64 k2 = 0x9ae16a3b2f90404fULL;
+
+// Bitwise right rotate.  Normally this will compile to a single
+// instruction, especially if the shift is a manifest constant.
+static uint64 Rotate(uint64 val, int shift) {
+    // Avoid shifting by 64: doing so yields an undefined result.
+    return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
+}
+
+static uint64 ShiftMix(uint64 val) {
+    return val ^ (val >> 47);
+}
+
+static uint64 HashLen16(uint64 u, uint64 v) {
+    return Hash128to64(uint128(u, v));
+}
+
+static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
+    // Murmur-inspired hashing.
+    uint64 a = (u ^ v) * mul;
+    a ^= (a >> 47);
+    uint64 b = (v ^ a) * mul;
+    b ^= (b >> 47);
+    b *= mul;
+    return b;
+}
+
+static uint64 HashLen0to16(const char* s, size_t len) {
+    if (len >= 8) {
+        uint64 mul = k2 + len * 2;
+        uint64 a = Fetch64(s) + k2;
+        uint64 b = Fetch64(s + len - 8);
+        uint64 c = Rotate(b, 37) * mul + a;
+        uint64 d = (Rotate(a, 25) + b) * mul;
+        return HashLen16(c, d, mul);
+    }
+    if (len >= 4) {
+        uint64 mul = k2 + len * 2;
+        uint64 a = Fetch32(s);
+        return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
+    }
+    if (len > 0) {
+        uint8 a = s[0];
+        uint8 b = s[len >> 1];
+        uint8 c = s[len - 1];
+        uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
+        uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
+        return ShiftMix(y * k2 ^ z * k0) * k2;
+    }
+    return k2;
+}
+
+// This probably works well for 16-byte strings as well, but it may be overkill
+// in that case.
+static uint64 HashLen17to32(const char* s, size_t len) {
+    uint64 mul = k2 + len * 2;
+    uint64 a = Fetch64(s) * k1;
+    uint64 b = Fetch64(s + 8);
+    uint64 c = Fetch64(s + len - 8) * mul;
+    uint64 d = Fetch64(s + len - 16) * k2;
+    return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul);
+}
+
+// Return a 16-byte hash for 48 bytes.  Quick and dirty.
+// Callers do best to use "random-looking" values for a and b.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a,
+                                                   uint64 b) {
+    a += w;
+    b = Rotate(b + a + z, 21);
+    uint64 c = a;
+    a += x;
+    a += y;
+    b += Rotate(a, 44);
+    return make_pair(a + z, b + c);
+}
+
+// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) {
+    return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a,
+                                  b);
+}
+
+// Return an 8-byte hash for 33 to 64 bytes.
+static uint64 HashLen33to64(const char* s, size_t len) {
+    uint64 mul = k2 + len * 2;
+    uint64 a = Fetch64(s) * k2;
+    uint64 b = Fetch64(s + 8);
+    uint64 c = Fetch64(s + len - 24);
+    uint64 d = Fetch64(s + len - 32);
+    uint64 e = Fetch64(s + 16) * k2;
+    uint64 f = Fetch64(s + 24) * 9;
+    uint64 g = Fetch64(s + len - 8);
+    uint64 h = Fetch64(s + len - 16) * mul;
+    uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
+    uint64 v = ((a + g) ^ d) + f + 1;
+    uint64 w = swap64((u + v) * mul) + h;
+    uint64 x = Rotate(e + f, 42) + c;
+    uint64 y = (swap64((v + w) * mul) + g) * mul;
+    uint64 z = e + f + c;
+    a = swap64((x + z) * mul + y) + b;
+    b = ShiftMix((z + a) * mul + d + h) * mul;
+    return b + x;
+}
+
+uint64 CityHash64(const char* s, size_t len) {
+    if (len <= 32) {
+        if (len <= 16) {
+            return HashLen0to16(s, len);
+        } else {
+            return HashLen17to32(s, len);
+        }
+    } else if (len <= 64) {
+        return HashLen33to64(s, len);
+    }
+
+    // For strings over 64 bytes we hash the end first, and then as we
+    // loop we keep 56 bytes of state: v, w, x, y, and z.
+    uint64 x = Fetch64(s + len - 40);
+    uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
+    uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
+    pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
+    pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
+    x = x * k1 + Fetch64(s);
+
+    // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
+    len = (len - 1) & ~static_cast<size_t>(63);
+    do {
+        x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+        y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+        x ^= w.second;
+        y += v.first + Fetch64(s + 40);
+        z = Rotate(z + w.first, 33) * k1;
+        v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+        w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+        std::swap(z, x);
+        s += 64;
+        len -= 64;
+    } while (len != 0);
+    return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
+                     HashLen16(v.second, w.second) + x);
+}
+
+uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
+    return CityHash64WithSeeds(s, len, k2, seed);
+}
+
+uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) {
+    return HashLen16(CityHash64(s, len) - seed0, seed1);
+}
+
+// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
+// of any length representable in signed long.  Based on City and Murmur.
+static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
+    uint64 a = Uint128Low64(seed);
+    uint64 b = Uint128High64(seed);
+    uint64 c = 0;
+    uint64 d = 0;
+    signed long l = static_cast<long>(len) - 16;
+    if (l <= 0) { // len <= 16
+        a = ShiftMix(a * k1) * k1;
+        c = b * k1 + HashLen0to16(s, len);
+        d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
+    } else { // len > 16
+        c = HashLen16(Fetch64(s + len - 8) + k1, a);
+        d = HashLen16(b + len, c + Fetch64(s + len - 16));
+        a += d;
+        do {
+            a ^= ShiftMix(Fetch64(s) * k1) * k1;
+            a *= k1;
+            b ^= a;
+            c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
+            c *= k1;
+            d ^= c;
+            s += 16;
+            l -= 16;
+        } while (l > 0);
+    }
+    a = HashLen16(a, c);
+    b = HashLen16(d, b);
+    return uint128(a ^ b, HashLen16(b, a));
+}
+
+uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
+    if (len < 128) {
+        return CityMurmur(s, len, seed);
+    }
+
+    // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
+    // v, w, x, y, and z.
+    pair<uint64, uint64> v, w;
+    uint64 x = Uint128Low64(seed);
+    uint64 y = Uint128High64(seed);
+    uint64 z = len * k1;
+    v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
+    v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
+    w.first = Rotate(y + z, 35) * k1 + x;
+    w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
+
+    // This is the same inner loop as CityHash64(), manually unrolled.
+    do {
+        x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+        y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+        x ^= w.second;
+        y += v.first + Fetch64(s + 40);
+        z = Rotate(z + w.first, 33) * k1;
+        v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+        w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+        std::swap(z, x);
+        s += 64;
+        x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+        y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+        x ^= w.second;
+        y += v.first + Fetch64(s + 40);
+        z = Rotate(z + w.first, 33) * k1;
+        v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+        w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+        std::swap(z, x);
+        s += 64;
+        len -= 128;
+    } while (LIKELY(len >= 128));
+    x += Rotate(v.first + z, 49) * k0;
+    y = y * k0 + Rotate(w.second, 37);
+    z = z * k0 + Rotate(w.first, 27);
+    w.first *= 9;
+    v.first *= k0;
+    // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
+    for (size_t tail_done = 0; tail_done < len;) {
+        tail_done += 32;
+        y = Rotate(x + y, 42) * k0 + v.second;
+        w.first += Fetch64(s + len - tail_done + 16);
+        x = x * k0 + w.first;
+        z += w.second + Fetch64(s + len - tail_done);
+        w.second += v.first;
+        v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
+        v.first *= k0;
+    }
+    // At this point our 56 bytes of state should contain more than
+    // enough information for a strong 128-bit hash.  We use two
+    // different 56-byte-to-8-byte hashes to get a 16-byte final result.
+    x = HashLen16(x, v.first);
+    y = HashLen16(y + z, w.first);
+    return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
+}
+
+uint128 CityHash128(const char* s, size_t len) {
+    return len >= 16
+               ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0))
+               : CityHash128WithSeed(s, len, uint128(k0, k1));
+}
+
+} // namespace Common
--- a/src/common/cityhash.h
+++ b/src/common/cityhash.h
@@ -0,0 +1,110 @@
+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash, by Geoff Pike and Jyrki Alakuijala
+//
+// http://code.google.com/p/cityhash/
+//
+// This file provides a few functions for hashing strings.  All of them are
+// high-quality functions in the sense that they pass standard tests such
+// as Austin Appleby's SMHasher.  They are also fast.
+//
+// For 64-bit x86 code, on short strings, we don't know of anything faster than
+// CityHash64 that is of comparable quality.  We believe our nearest competitor
+// is Murmur3.  For 64-bit x86 code, CityHash64 is an excellent choice for hash
+// tables and most other hashing (excluding cryptography).
+//
+// For 64-bit x86 code, on long strings, the picture is more complicated.
+// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
+// CityHashCrc128 appears to be faster than all competitors of comparable
+// quality.  CityHash128 is also good but not quite as fast.  We believe our
+// nearest competitor is Bob Jenkins' Spooky.  We don't have great data for
+// other 64-bit CPUs, but for long strings we know that Spooky is slightly
+// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
+// Note that CityHashCrc128 is declared in citycrc.h.
+//
+// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
+// is of comparable quality.  We believe our nearest competitor is Murmur3A.
+// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
+//
+// Functions in the CityHash family are not suitable for cryptography.
+//
+// Please see CityHash's README file for more details on our performance
+// measurements and so on.
+//
+// WARNING: This code has been only lightly tested on big-endian platforms!
+// It is known to work well on little-endian platforms that have a small penalty
+// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
+// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
+// bug reports are welcome.
+//
+// By the way, for some hash functions, given strings a and b, the hash
+// of a+b is easily derived from the hashes of a and b.  This property
+// doesn't hold for any hash functions in this file.
+
+#pragma once
+
+#include <utility>
+#include <stdint.h>
+#include <stdlib.h> // for size_t.
+
+namespace Common {
+
+typedef std::pair<uint64_t, uint64_t> uint128;
+
+inline uint64_t Uint128Low64(const uint128& x) {
+    return x.first;
+}
+inline uint64_t Uint128High64(const uint128& x) {
+    return x.second;
+}
+
+// Hash function for a byte array.
+uint64_t CityHash64(const char* buf, size_t len);
+
+// Hash function for a byte array.  For convenience, a 64-bit seed is also
+// hashed into the result.
+uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed);
+
+// Hash function for a byte array.  For convenience, two seeds are also
+// hashed into the result.
+uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1);
+
+// Hash function for a byte array.
+uint128 CityHash128(const char* s, size_t len);
+
+// Hash function for a byte array.  For convenience, a 128-bit seed is also
+// hashed into the result.
+uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed);
+
+// Hash 128 input bits down to 64 bits of output.
+// This is intended to be a reasonably good hash function.
+inline uint64_t Hash128to64(const uint128& x) {
+    // Murmur-inspired hashing.
+    const uint64_t kMul = 0x9ddfea08eb382d69ULL;
+    uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
+    a ^= (a >> 47);
+    uint64_t b = (Uint128High64(x) ^ a) * kMul;
+    b ^= (b >> 47);
+    b *= kMul;
+    return b;
+}
+
+} // namespace Common
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@@ -27,29 +27,23 @@
 #include <array>
 #include <cstdint>

-#ifdef _MSC_VER
-#ifndef __func__
-#define __func__ __FUNCTION__
-#endif
-#endif
+using u8 = std::uint8_t;   ///< 8-bit unsigned byte
+using u16 = std::uint16_t; ///< 16-bit unsigned short
+using u32 = std::uint32_t; ///< 32-bit unsigned word
+using u64 = std::uint64_t; ///< 64-bit unsigned int

-typedef std::uint8_t u8;   ///< 8-bit unsigned byte
-typedef std::uint16_t u16; ///< 16-bit unsigned short
-typedef std::uint32_t u32; ///< 32-bit unsigned word
-typedef std::uint64_t u64; ///< 64-bit unsigned int
+using s8 = std::int8_t;   ///< 8-bit signed byte
+using s16 = std::int16_t; ///< 16-bit signed short
+using s32 = std::int32_t; ///< 32-bit signed word
+using s64 = std::int64_t; ///< 64-bit signed int

-typedef std::int8_t s8;   ///< 8-bit signed byte
-typedef std::int16_t s16; ///< 16-bit signed short
-typedef std::int32_t s32; ///< 32-bit signed word
-typedef std::int64_t s64; ///< 64-bit signed int
-
-typedef float f32;  ///< 32-bit floating point
-typedef double f64; ///< 64-bit floating point
+using f32 = float;  ///< 32-bit floating point
+using f64 = double; ///< 64-bit floating point

 // TODO: It would be nice to eventually replace these with strong types that prevent accidental
 // conversion between each other.
-typedef u64 VAddr; ///< Represents a pointer in the userspace virtual address space.
-typedef u64 PAddr; ///< Represents a pointer in the ARM11 physical address space.
+using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
+using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.

 using u128 = std::array<std::uint64_t, 2>;
 static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
--- a/src/common/hash.cpp
+++ b/src/common/hash.cpp
@@ -1,141 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#if defined(_MSC_VER)
-#include <stdlib.h>
-#endif
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "common/hash.h"
-
-namespace Common {
-
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do
-// the conversion here
-static FORCE_INLINE u64 getblock64(const u64* p, size_t i) {
-    return p[i];
-}
-
-// Finalization mix - force all bits of a hash block to avalanche
-static FORCE_INLINE u64 fmix64(u64 k) {
-    k ^= k >> 33;
-    k *= 0xff51afd7ed558ccdllu;
-    k ^= k >> 33;
-    k *= 0xc4ceb9fe1a85ec53llu;
-    k ^= k >> 33;
-
-    return k;
-}
-
-// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit
-// platforms (MurmurHash3_x64_128). It was taken from:
-// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
-void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) {
-    const u8* data = (const u8*)key;
-    const size_t nblocks = len / 16;
-
-    u64 h1 = seed;
-    u64 h2 = seed;
-
-    const u64 c1 = 0x87c37b91114253d5llu;
-    const u64 c2 = 0x4cf5ad432745937fllu;
-
-    // Body
-
-    const u64* blocks = (const u64*)(data);
-
-    for (size_t i = 0; i < nblocks; i++) {
-        u64 k1 = getblock64(blocks, i * 2 + 0);
-        u64 k2 = getblock64(blocks, i * 2 + 1);
-
-        k1 *= c1;
-        k1 = _rotl64(k1, 31);
-        k1 *= c2;
-        h1 ^= k1;
-
-        h1 = _rotl64(h1, 27);
-        h1 += h2;
-        h1 = h1 * 5 + 0x52dce729;
-
-        k2 *= c2;
-        k2 = _rotl64(k2, 33);
-        k2 *= c1;
-        h2 ^= k2;
-
-        h2 = _rotl64(h2, 31);
-        h2 += h1;
-        h2 = h2 * 5 + 0x38495ab5;
-    }
-
-    // Tail
-
-    const u8* tail = (const u8*)(data + nblocks * 16);
-
-    u64 k1 = 0;
-    u64 k2 = 0;
-
-    switch (len & 15) {
-    case 15:
-        k2 ^= ((u64)tail[14]) << 48;
-    case 14:
-        k2 ^= ((u64)tail[13]) << 40;
-    case 13:
-        k2 ^= ((u64)tail[12]) << 32;
-    case 12:
-        k2 ^= ((u64)tail[11]) << 24;
-    case 11:
-        k2 ^= ((u64)tail[10]) << 16;
-    case 10:
-        k2 ^= ((u64)tail[9]) << 8;
-    case 9:
-        k2 ^= ((u64)tail[8]) << 0;
-        k2 *= c2;
-        k2 = _rotl64(k2, 33);
-        k2 *= c1;
-        h2 ^= k2;
-
-    case 8:
-        k1 ^= ((u64)tail[7]) << 56;
-    case 7:
-        k1 ^= ((u64)tail[6]) << 48;
-    case 6:
-        k1 ^= ((u64)tail[5]) << 40;
-    case 5:
-        k1 ^= ((u64)tail[4]) << 32;
-    case 4:
-        k1 ^= ((u64)tail[3]) << 24;
-    case 3:
-        k1 ^= ((u64)tail[2]) << 16;
-    case 2:
-        k1 ^= ((u64)tail[1]) << 8;
-    case 1:
-        k1 ^= ((u64)tail[0]) << 0;
-        k1 *= c1;
-        k1 = _rotl64(k1, 31);
-        k1 *= c2;
-        h1 ^= k1;
-    };
-
-    // Finalization
-
-    h1 ^= len;
-    h2 ^= len;
-
-    h1 += h2;
-    h2 += h1;
-
-    h1 = fmix64(h1);
-    h2 = fmix64(h2);
-
-    h1 += h2;
-    h2 += h1;
-
-    ((u64*)out)[0] = h1;
-    ((u64*)out)[1] = h2;
-}
-
-} // namespace Common
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -5,12 +5,12 @@
 #pragma once

 #include <cstddef>
+#include <cstring>
+#include "common/cityhash.h"
 #include "common/common_types.h"

 namespace Common {

-void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out);
-
 /**
 * Computes a 64-bit hash over the specified block of data
 * @param data Block of data to compute hash over
@@ -18,9 +18,54 @@ void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out);
 * @returns 64-bit hash value that was computed over the data block
 */
 static inline u64 ComputeHash64(const void* data, size_t len) {
-    u64 res[2];
-    MurmurHash3_128(data, len, 0, res);
-    return res[0];
+    return CityHash64(static_cast<const char*>(data), len);
 }

+/**
+ * Computes a 64-bit hash of a struct. In addition to being trivially copyable, it is also critical
+ * that either the struct includes no padding, or that any padding is initialized to a known value
+ * by memsetting the struct to 0 before filling it in.
+ */
+template <typename T>
+static inline u64 ComputeStructHash64(const T& data) {
+    static_assert(std::is_trivially_copyable<T>(),
+                  "Type passed to ComputeStructHash64 must be trivially copyable");
+    return ComputeHash64(&data, sizeof(data));
+}
+
+/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
+template <typename T>
+struct HashableStruct {
+    // In addition to being trivially copyable, T must also have a trivial default constructor,
+    // because any member initialization would be overridden by memset
+    static_assert(std::is_trivial<T>(), "Type passed to HashableStruct must be trivial");
+    /*
+     * We use a union because "implicitly-defined copy/move constructor for a union X copies the
+     * object representation of X." and "implicitly-defined copy assignment operator for a union X
+     * copies the object representation (3.9) of X." = Bytewise copy instead of memberwise copy.
+     * This is important because the padding bytes are included in the hash and comparison between
+     * objects.
+     */
+    union {
+        T state;
+    };
+
+    HashableStruct() {
+        // Memset structure to zero padding bits, so that they will be deterministic when hashing
+        std::memset(&state, 0, sizeof(T));
+    }
+
+    bool operator==(const HashableStruct<T>& o) const {
+        return std::memcmp(&state, &o.state, sizeof(T)) == 0;
+    };
+
+    bool operator!=(const HashableStruct<T>& o) const {
+        return !(*this == o);
+    };
+
+    size_t Hash() const {
+        return Common::ComputeStructHash64(state);
+    }
+};
+
 } // namespace Common
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -12,6 +12,8 @@ add_library(core STATIC
    file_sys/errors.h
    file_sys/filesystem.cpp
    file_sys/filesystem.h
+    file_sys/partition_filesystem.cpp
+    file_sys/partition_filesystem.h
    file_sys/path_parser.cpp
    file_sys/path_parser.h
    file_sys/program_metadata.cpp
--- a/src/core/file_sys/disk_filesystem.cpp
+++ b/src/core/file_sys/disk_filesystem.cpp
@@ -57,10 +57,14 @@ ResultVal<std::unique_ptr<StorageBackend>> Disk_FileSystem::OpenFile(const std::
        std::make_unique<Disk_Storage>(std::move(file)));
 }

-ResultCode Disk_FileSystem::DeleteFile(const Path& path) const {
-    LOG_WARNING(Service_FS, "(STUBBED) called");
-    // TODO(bunnei): Use correct error code
-    return ResultCode(-1);
+ResultCode Disk_FileSystem::DeleteFile(const std::string& path) const {
+    if (!FileUtil::Exists(path)) {
+        return ERROR_PATH_NOT_FOUND;
+    }
+
+    FileUtil::Delete(path);
+
+    return RESULT_SUCCESS;
 }

 ResultCode Disk_FileSystem::RenameFile(const Path& src_path, const Path& dest_path) const {
--- a/src/core/file_sys/disk_filesystem.h
+++ b/src/core/file_sys/disk_filesystem.h
@@ -25,7 +25,7 @@ public:

    ResultVal<std::unique_ptr<StorageBackend>> OpenFile(const std::string& path,
                                                        Mode mode) const override;
-    ResultCode DeleteFile(const Path& path) const override;
+    ResultCode DeleteFile(const std::string& path) const override;
    ResultCode RenameFile(const Path& src_path, const Path& dest_path) const override;
    ResultCode DeleteDirectory(const Path& path) const override;
    ResultCode DeleteDirectoryRecursively(const Path& path) const override;
--- a/src/core/file_sys/filesystem.h
+++ b/src/core/file_sys/filesystem.h
@@ -97,7 +97,7 @@ public:
     * @param path Path relative to the archive
     * @return Result of the operation
     */
-    virtual ResultCode DeleteFile(const Path& path) const = 0;
+    virtual ResultCode DeleteFile(const std::string& path) const = 0;

    /**
     * Create a directory specified by its path
--- a/src/core/file_sys/partition_filesystem.cpp
+++ b/src/core/file_sys/partition_filesystem.cpp
@@ -0,0 +1,125 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cinttypes>
+#include <utility>
+#include "common/file_util.h"
+#include "common/logging/log.h"
+#include "core/file_sys/partition_filesystem.h"
+#include "core/loader/loader.h"
+
+namespace FileSys {
+
+Loader::ResultStatus PartitionFilesystem::Load(const std::string& file_path, size_t offset) {
+    FileUtil::IOFile file(file_path, "rb");
+    if (!file.IsOpen())
+        return Loader::ResultStatus::Error;
+
+    // At least be as large as the header
+    if (file.GetSize() < sizeof(Header))
+        return Loader::ResultStatus::Error;
+
+    // For cartridges, HFSs can get very large, so we need to calculate the size up to
+    // the actual content itself instead of just blindly reading in the entire file.
+    Header pfs_header;
+    if (!file.ReadBytes(&pfs_header, sizeof(Header)))
+        return Loader::ResultStatus::Error;
+
+    bool is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0);
+    size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
+    size_t metadata_size =
+        sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size;
+
+    // Actually read in now...
+    file.Seek(offset, SEEK_SET);
+    std::vector<u8> file_data(metadata_size);
+
+    if (!file.ReadBytes(file_data.data(), metadata_size))
+        return Loader::ResultStatus::Error;
+
+    Loader::ResultStatus result = Load(file_data);
+    if (result != Loader::ResultStatus::Success)
+        LOG_ERROR(Service_FS, "Failed to load PFS from file %s!", file_path.c_str());
+
+    return result;
+}
+
+Loader::ResultStatus PartitionFilesystem::Load(const std::vector<u8>& file_data, size_t offset) {
+    size_t total_size = file_data.size() - offset;
+    if (total_size < sizeof(Header))
+        return Loader::ResultStatus::Error;
+
+    memcpy(&pfs_header, &file_data[offset], sizeof(Header));
+    is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0);
+
+    size_t entries_offset = offset + sizeof(Header);
+    size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
+    size_t strtab_offset = entries_offset + (pfs_header.num_entries * entry_size);
+    for (u16 i = 0; i < pfs_header.num_entries; i++) {
+        FileEntry entry;
+
+        memcpy(&entry.fs_entry, &file_data[entries_offset + (i * entry_size)], sizeof(FSEntry));
+        entry.name = std::string(reinterpret_cast<const char*>(
+            &file_data[strtab_offset + entry.fs_entry.strtab_offset]));
+        pfs_entries.push_back(std::move(entry));
+    }
+
+    content_offset = strtab_offset + pfs_header.strtab_size;
+
+    return Loader::ResultStatus::Success;
+}
+
+u32 PartitionFilesystem::GetNumEntries() const {
+    return pfs_header.num_entries;
+}
+
+u64 PartitionFilesystem::GetEntryOffset(int index) const {
+    if (index > GetNumEntries())
+        return 0;
+
+    return content_offset + pfs_entries[index].fs_entry.offset;
+}
+
+u64 PartitionFilesystem::GetEntrySize(int index) const {
+    if (index > GetNumEntries())
+        return 0;
+
+    return pfs_entries[index].fs_entry.size;
+}
+
+std::string PartitionFilesystem::GetEntryName(int index) const {
+    if (index > GetNumEntries())
+        return "";
+
+    return pfs_entries[index].name;
+}
+
+u64 PartitionFilesystem::GetFileOffset(const std::string& name) const {
+    for (u32 i = 0; i < pfs_header.num_entries; i++) {
+        if (pfs_entries[i].name == name)
+            return content_offset + pfs_entries[i].fs_entry.offset;
+    }
+
+    return 0;
+}
+
+u64 PartitionFilesystem::GetFileSize(const std::string& name) const {
+    for (u32 i = 0; i < pfs_header.num_entries; i++) {
+        if (pfs_entries[i].name == name)
+            return pfs_entries[i].fs_entry.size;
+    }
+
+    return 0;
+}
+
+void PartitionFilesystem::Print() const {
+    NGLOG_DEBUG(Service_FS, "Magic:                  {:.4}", pfs_header.magic.data());
+    NGLOG_DEBUG(Service_FS, "Files:                  {}", pfs_header.num_entries);
+    for (u32 i = 0; i < pfs_header.num_entries; i++) {
+        NGLOG_DEBUG(Service_FS, " > File {}:              {} (0x{:X} bytes, at 0x{:X})", i,
+                    pfs_entries[i].name.c_str(), pfs_entries[i].fs_entry.size,
+                    GetFileOffset(pfs_entries[i].name));
+    }
+}
+} // namespace FileSys
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -0,0 +1,87 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <string>
+#include <vector>
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "common/swap.h"
+
+namespace Loader {
+enum class ResultStatus;
+}
+
+namespace FileSys {
+
+/**
+ * Helper which implements an interface to parse PFS/HFS filesystems.
+ * Data can either be loaded from a file path or data with an offset into it.
+ */
+class PartitionFilesystem {
+public:
+    Loader::ResultStatus Load(const std::string& file_path, size_t offset = 0);
+    Loader::ResultStatus Load(const std::vector<u8>& file_data, size_t offset = 0);
+
+    u32 GetNumEntries() const;
+    u64 GetEntryOffset(int index) const;
+    u64 GetEntrySize(int index) const;
+    std::string GetEntryName(int index) const;
+    u64 GetFileOffset(const std::string& name) const;
+    u64 GetFileSize(const std::string& name) const;
+
+    void Print() const;
+
+private:
+    struct Header {
+        std::array<char, 4> magic;
+        u32_le num_entries;
+        u32_le strtab_size;
+        INSERT_PADDING_BYTES(0x4);
+    };
+
+    static_assert(sizeof(Header) == 0x10, "PFS/HFS header structure size is wrong");
+
+#pragma pack(push, 1)
+    struct FSEntry {
+        u64_le offset;
+        u64_le size;
+        u32_le strtab_offset;
+    };
+
+    static_assert(sizeof(FSEntry) == 0x14, "FS entry structure size is wrong");
+
+    struct PFSEntry {
+        FSEntry fs_entry;
+        INSERT_PADDING_BYTES(0x4);
+    };
+
+    static_assert(sizeof(PFSEntry) == 0x18, "PFS entry structure size is wrong");
+
+    struct HFSEntry {
+        FSEntry fs_entry;
+        u32_le hash_region_size;
+        INSERT_PADDING_BYTES(0x8);
+        std::array<char, 0x20> hash;
+    };
+
+    static_assert(sizeof(HFSEntry) == 0x40, "HFS entry structure size is wrong");
+
+#pragma pack(pop)
+
+    struct FileEntry {
+        FSEntry fs_entry;
+        std::string name;
+    };
+
+    Header pfs_header;
+    bool is_hfs;
+    size_t content_offset;
+
+    std::vector<FileEntry> pfs_entries;
+};
+
+} // namespace FileSys
--- a/src/core/file_sys/romfs_filesystem.cpp
+++ b/src/core/file_sys/romfs_filesystem.cpp
@@ -20,7 +20,7 @@ ResultVal<std::unique_ptr<StorageBackend>> RomFS_FileSystem::OpenFile(const std:
        std::make_unique<RomFS_Storage>(romfs_file, data_offset, data_size));
 }

-ResultCode RomFS_FileSystem::DeleteFile(const Path& path) const {
+ResultCode RomFS_FileSystem::DeleteFile(const std::string& path) const {
    LOG_CRITICAL(Service_FS, "Attempted to delete a file from an ROMFS archive (%s).",
                 GetName().c_str());
    // TODO(bunnei): Use correct error code
--- a/src/core/file_sys/romfs_filesystem.h
+++ b/src/core/file_sys/romfs_filesystem.h
@@ -31,7 +31,7 @@ public:

    ResultVal<std::unique_ptr<StorageBackend>> OpenFile(const std::string& path,
                                                        Mode mode) const override;
-    ResultCode DeleteFile(const Path& path) const override;
+    ResultCode DeleteFile(const std::string& path) const override;
    ResultCode RenameFile(const Path& src_path, const Path& dest_path) const override;
    ResultCode DeleteDirectory(const Path& path) const override;
    ResultCode DeleteDirectoryRecursively(const Path& path) const override;
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -861,14 +861,14 @@ static const FunctionDef SVC_Table[] = {
    {0x2B, nullptr, "FlushDataCache"},
    {0x2C, nullptr, "MapPhysicalMemory"},
    {0x2D, nullptr, "UnmapPhysicalMemory"},
-    {0x2E, nullptr, "Unknown"},
+    {0x2E, nullptr, "GetNextThreadInfo"},
    {0x2F, nullptr, "GetLastThreadInfo"},
    {0x30, nullptr, "GetResourceLimitLimitValue"},
    {0x31, nullptr, "GetResourceLimitCurrentValue"},
    {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"},
    {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"},
-    {0x34, nullptr, "Unknown"},
-    {0x35, nullptr, "Unknown"},
+    {0x34, nullptr, "WaitForAddress"},
+    {0x35, nullptr, "SignalToAddress"},
    {0x36, nullptr, "Unknown"},
    {0x37, nullptr, "Unknown"},
    {0x38, nullptr, "Unknown"},
@@ -876,7 +876,7 @@ static const FunctionDef SVC_Table[] = {
    {0x3A, nullptr, "Unknown"},
    {0x3B, nullptr, "Unknown"},
    {0x3C, nullptr, "DumpInfo"},
-    {0x3D, nullptr, "Unknown"},
+    {0x3D, nullptr, "DumpInfoNew"},
    {0x3E, nullptr, "Unknown"},
    {0x3F, nullptr, "Unknown"},
    {0x40, nullptr, "CreateSession"},
@@ -887,9 +887,9 @@ static const FunctionDef SVC_Table[] = {
    {0x45, nullptr, "CreateEvent"},
    {0x46, nullptr, "Unknown"},
    {0x47, nullptr, "Unknown"},
-    {0x48, nullptr, "Unknown"},
-    {0x49, nullptr, "Unknown"},
-    {0x4A, nullptr, "Unknown"},
+    {0x48, nullptr, "AllocateUnsafeMemory"},
+    {0x49, nullptr, "FreeUnsafeMemory"},
+    {0x4A, nullptr, "SetUnsafeAllocationLimit"},
    {0x4B, nullptr, "CreateJitMemory"},
    {0x4C, nullptr, "MapJitMemory"},
    {0x4D, nullptr, "SleepSystem"},
@@ -926,7 +926,7 @@ static const FunctionDef SVC_Table[] = {
    {0x6C, nullptr, "SetHardwareBreakPoint"},
    {0x6D, nullptr, "GetDebugThreadParam"},
    {0x6E, nullptr, "Unknown"},
-    {0x6F, nullptr, "Unknown"},
+    {0x6F, nullptr, "GetMemoryInfo"},
    {0x70, nullptr, "CreatePort"},
    {0x71, nullptr, "ManageNamedPort"},
    {0x72, nullptr, "ConnectToPort"},
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -380,7 +380,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {

 u64 VMManager::GetTotalMemoryUsage() {
    LOG_WARNING(Kernel, "(STUBBED) called");
-    return 0xBE000000;
+    return 0xF8000000;
 }

 u64 VMManager::GetTotalHeapUsage() {
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -38,7 +38,10 @@ class IProfile final : public ServiceFramework<IProfile> {
 public:
    IProfile() : ServiceFramework("IProfile") {
        static const FunctionInfo functions[] = {
+            {0, nullptr, "Get"},
            {1, &IProfile::GetBase, "GetBase"},
+            {10, nullptr, "GetImageSize"},
+            {11, nullptr, "LoadImage"},
        };
        RegisterHandlers(functions);
    }
@@ -59,6 +62,11 @@ public:
        static const FunctionInfo functions[] = {
            {0, &IManagerForApplication::CheckAvailability, "CheckAvailability"},
            {1, &IManagerForApplication::GetAccountId, "GetAccountId"},
+            {2, nullptr, "EnsureIdTokenCacheAsync"},
+            {3, nullptr, "LoadIdTokenCache"},
+            {130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"},
+            {150, nullptr, "CreateAuthorizationRequest"},
+            {160, nullptr, "StoreOpenContext"},
        };
        RegisterHandlers(functions);
    }
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -236,7 +236,7 @@ public:
        : ServiceFramework("IFileSystem"), backend(std::move(backend)) {
        static const FunctionInfo functions[] = {
            {0, &IFileSystem::CreateFile, "CreateFile"},
-            {1, nullptr, "DeleteFile"},
+            {1, &IFileSystem::DeleteFile, "DeleteFile"},
            {2, &IFileSystem::CreateDirectory, "CreateDirectory"},
            {3, nullptr, "DeleteDirectory"},
            {4, nullptr, "DeleteDirectoryRecursively"},
@@ -273,6 +273,20 @@ public:
        rb.Push(backend->CreateFile(name, size));
    }

+    void DeleteFile(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+
+        auto file_buffer = ctx.ReadBuffer();
+        auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
+
+        std::string name(file_buffer.begin(), end);
+
+        LOG_DEBUG(Service_FS, "called file %s", name.c_str());
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(backend->DeleteFile(name));
+    }
+
    void CreateDirectory(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};

--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -47,10 +47,10 @@ PL_U::PL_U() : ServiceFramework("pl:u") {
    FileUtil::CreateFullPath(filepath); // Create path if not already created
    FileUtil::IOFile file(filepath, "rb");

+    shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE);
    if (file.IsOpen()) {
        // Read shared font data
        ASSERT(file.GetSize() == SHARED_FONT_MEM_SIZE);
-        shared_font = std::make_shared<std::vector<u8>>(static_cast<size_t>(file.GetSize()));
        file.ReadBytes(shared_font->data(), shared_font->size());
    } else {
        LOG_WARNING(Service_NS, "Unable to load shared font: %s", filepath.c_str());
@@ -97,22 +97,19 @@ void PL_U::GetSharedMemoryAddressOffset(Kernel::HLERequestContext& ctx) {
 }

 void PL_U::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) {
-    if (shared_font != nullptr) {
-        // TODO(bunnei): This is a less-than-ideal solution to load a RAM dump of the Switch shared
-        // font data. This (likely) relies on exact address, size, and offsets from the original
-        // dump. In the future, we need to replace this with a more robust solution.
+    // TODO(bunnei): This is a less-than-ideal solution to load a RAM dump of the Switch shared
+    // font data. This (likely) relies on exact address, size, and offsets from the original
+    // dump. In the future, we need to replace this with a more robust solution.

-        // Map backing memory for the font data
-        Core::CurrentProcess()->vm_manager.MapMemoryBlock(SHARED_FONT_MEM_VADDR, shared_font, 0,
-                                                          SHARED_FONT_MEM_SIZE,
-                                                          Kernel::MemoryState::Shared);
+    // Map backing memory for the font data
+    Core::CurrentProcess()->vm_manager.MapMemoryBlock(
+        SHARED_FONT_MEM_VADDR, shared_font, 0, SHARED_FONT_MEM_SIZE, Kernel::MemoryState::Shared);

-        // Create shared font memory object
-        shared_font_mem = Kernel::SharedMemory::Create(
-            Core::CurrentProcess(), SHARED_FONT_MEM_SIZE, Kernel::MemoryPermission::ReadWrite,
-            Kernel::MemoryPermission::Read, SHARED_FONT_MEM_VADDR, Kernel::MemoryRegion::BASE,
-            "PL_U:shared_font_mem");
-    }
+    // Create shared font memory object
+    shared_font_mem = Kernel::SharedMemory::Create(
+        Core::CurrentProcess(), SHARED_FONT_MEM_SIZE, Kernel::MemoryPermission::ReadWrite,
+        Kernel::MemoryPermission::Read, SHARED_FONT_MEM_VADDR, Kernel::MemoryRegion::BASE,
+        "PL_U:shared_font_mem");

    LOG_DEBUG(Service_NS, "called");
    IPC::ResponseBuilder rb{ctx, 2, 1};
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>

 #include "common/alignment.h"
+#include "common/microprofile.h"
 #include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/core_timing.h"
@@ -128,6 +129,8 @@ void NVFlinger::Compose() {
        // Search for a queued buffer and acquire it
        auto buffer = buffer_queue->AcquireBuffer();

+        MicroProfileFlip();
+
        if (buffer == boost::none) {
            // There was no queued buffer to draw, render previous frame
            Core::System::GetInstance().perf_stats.EndGameFrame();
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -36,6 +36,7 @@ SET::SET() : ServiceFramework("set") {
        {5, nullptr, "GetAvailableLanguageCodes2"},
        {6, nullptr, "GetAvailableLanguageCodeCount2"},
        {7, nullptr, "GetKeyCodeMap"},
+        {8, nullptr, "GetQuestFlag"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/set/set_cal.cpp
+++ b/src/core/hle/service/set/set_cal.cpp
@@ -32,6 +32,15 @@ SET_CAL::SET_CAL() : ServiceFramework("set:cal") {
        {21, nullptr, "GetEticketDeviceKey"},
        {22, nullptr, "GetSpeakerParameter"},
        {23, nullptr, "GetLcdVendorId"},
+        {24, nullptr, "GetEciDeviceCertificate2"},
+        {25, nullptr, "GetEciDeviceKey2"},
+        {26, nullptr, "GetAmiiboKey"},
+        {27, nullptr, "GetAmiiboEcqvCertificate"},
+        {28, nullptr, "GetAmiiboEcdsaCertificate"},
+        {29, nullptr, "GetAmiiboEcqvBlsKey"},
+        {30, nullptr, "GetAmiiboEcqvBlsCertificate"},
+        {31, nullptr, "GetAmiiboEcqvBlsRootCertificate"},
+        {32, nullptr, "GetUnknownId"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -27,6 +27,7 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
        {2, nullptr, "GetNetworkSettings"},
        {3, nullptr, "GetFirmwareVersion"},
        {4, nullptr, "GetFirmwareVersion2"},
+        {5, nullptr, "GetFirmwareVersionDigest"},
        {7, nullptr, "GetLockScreenFlag"},
        {8, nullptr, "SetLockScreenFlag"},
        {9, nullptr, "GetBacklightSettings"},
@@ -159,6 +160,15 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
        {138, nullptr, "GetWebInspectorFlag"},
        {139, nullptr, "GetAllowedSslHosts"},
        {140, nullptr, "GetHostFsMountPoint"},
+        {141, nullptr, "GetRequiresRunRepairTimeReviser"},
+        {142, nullptr, "SetRequiresRunRepairTimeReviser"},
+        {143, nullptr, "SetBlePairingSettings"},
+        {144, nullptr, "GetBlePairingSettings"},
+        {145, nullptr, "GetConsoleSixAxisSensorAngularVelocityTimeBias"},
+        {146, nullptr, "SetConsoleSixAxisSensorAngularVelocityTimeBias"},
+        {147, nullptr, "GetConsoleSixAxisSensorAngularAcceleration"},
+        {148, nullptr, "SetConsoleSixAxisSensorAngularAcceleration"},
+        {149, nullptr, "GetRebootlessSystemUpdateVersion"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -79,9 +79,34 @@ BSD::BSD(const char* name) : ServiceFramework(name) {
        {0, &BSD::RegisterClient, "RegisterClient"},
        {1, &BSD::StartMonitoring, "StartMonitoring"},
        {2, &BSD::Socket, "Socket"},
+        {3, nullptr, "SocketExempt"},
+        {4, nullptr, "Open"},
+        {5, nullptr, "Select"},
+        {6, nullptr, "Poll"},
+        {7, nullptr, "Sysctl"},
+        {8, nullptr, "Recv"},
+        {9, nullptr, "RecvFrom"},
+        {10, nullptr, "Send"},
        {11, &BSD::SendTo, "SendTo"},
+        {12, nullptr, "Accept"},
+        {13, nullptr, "Bind"},
        {14, &BSD::Connect, "Connect"},
+        {15, nullptr, "GetPeerName"},
+        {16, nullptr, "GetSockName"},
+        {17, nullptr, "GetSockOpt"},
+        {18, nullptr, "Listen"},
+        {19, nullptr, "Ioctl"},
+        {20, nullptr, "Fcntl"},
+        {21, nullptr, "SetSockOpt"},
+        {22, nullptr, "Shutdown"},
+        {23, nullptr, "ShutdownAllSockets"},
+        {24, nullptr, "Write"},
+        {25, nullptr, "Read"},
        {26, &BSD::Close, "Close"},
+        {27, nullptr, "DuplicateSocket"},
+        {28, nullptr, "GetResourceStatistics"},
+        {29, nullptr, "RecvMMsg"},
+        {30, nullptr, "SendMMsg"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/sockets/sfdnsres.cpp
+++ b/src/core/hle/service/sockets/sfdnsres.cpp
@@ -30,6 +30,7 @@ SFDNSRES::SFDNSRES() : ServiceFramework("sfdnsres") {
        {7, nullptr, "GetNameInfo"},
        {8, nullptr, "RequestCancelHandle"},
        {9, nullptr, "CancelSocketCall"},
+        {11, nullptr, "ClearDnsIpServerAddressArray"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/spl/spl.cpp
+++ b/src/core/hle/service/spl/spl.cpp
@@ -33,6 +33,12 @@ SPL::SPL(std::shared_ptr<Module> module) : Module::Interface(std::move(module),
        {23, nullptr, "GetSplWaitEvent"},
        {24, nullptr, "SetSharedData"},
        {25, nullptr, "GetSharedData"},
+        {26, nullptr, "ImportSslRsaKey"},
+        {27, nullptr, "SecureExpModWithSslKey"},
+        {28, nullptr, "ImportEsRsaKey"},
+        {29, nullptr, "SecureExpModWithEsKey"},
+        {30, nullptr, "EncryptManuRsaKeyForImport"},
+        {31, nullptr, "GetPackage2Hash"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -20,7 +20,11 @@ public:
    ISystemClock() : ServiceFramework("ISystemClock") {
        static const FunctionInfo functions[] = {
            {0, &ISystemClock::GetCurrentTime, "GetCurrentTime"},
-            {2, &ISystemClock::GetSystemClockContext, "GetSystemClockContext"}};
+            {1, nullptr, "SetCurrentTime"},
+            {2, &ISystemClock::GetSystemClockContext, "GetSystemClockContext"},
+            {3, nullptr, "SetSystemClockContext"},
+
+        };
        RegisterHandlers(functions);
    }

--- a/src/core/hle/service/time/time_s.cpp
+++ b/src/core/hle/service/time/time_s.cpp
@@ -14,6 +14,17 @@ TIME_S::TIME_S(std::shared_ptr<Module> time) : Module::Interface(std::move(time)
        {2, &TIME_S::GetStandardSteadyClock, "GetStandardSteadyClock"},
        {3, &TIME_S::GetTimeZoneService, "GetTimeZoneService"},
        {4, &TIME_S::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"},
+        {5, nullptr, "GetEphemeralNetworkSystemClock"},
+        {50, nullptr, "SetStandardSteadyClockInternalOffset"},
+        {100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"},
+        {101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"},
+        {102, nullptr, "GetStandardUserSystemClockInitialYear"},
+        {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"},
+        {300, nullptr, "CalculateMonotonicSystemClockBaseTimePoint"},
+        {400, nullptr, "GetClockSnapshot"},
+        {401, nullptr, "GetClockSnapshotFromSystemClockContext"},
+        {500, nullptr, "CalculateStandardUserSystemClockDifferenceByUser"},
+        {501, nullptr, "CalculateSpanBetween"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/time/time_u.cpp
+++ b/src/core/hle/service/time/time_u.cpp
@@ -14,6 +14,17 @@ TIME_U::TIME_U(std::shared_ptr<Module> time) : Module::Interface(std::move(time)
        {2, &TIME_U::GetStandardSteadyClock, "GetStandardSteadyClock"},
        {3, &TIME_U::GetTimeZoneService, "GetTimeZoneService"},
        {4, &TIME_U::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"},
+        {5, nullptr, "GetEphemeralNetworkSystemClock"},
+        {50, nullptr, "SetStandardSteadyClockInternalOffset"},
+        {100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"},
+        {101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"},
+        {102, nullptr, "GetStandardUserSystemClockInitialYear"},
+        {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"},
+        {300, nullptr, "CalculateMonotonicSystemClockBaseTimePoint"},
+        {400, nullptr, "GetClockSnapshot"},
+        {401, nullptr, "GetClockSnapshotFromSystemClockContext"},
+        {500, nullptr, "CalculateStandardUserSystemClockDifferenceByUser"},
+        {501, nullptr, "CalculateSpanBetween"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -580,7 +580,48 @@ public:
    ISystemDisplayService() : ServiceFramework("ISystemDisplayService") {
        static const FunctionInfo functions[] = {
            {1200, nullptr, "GetZOrderCountMin"},
+            {1202, nullptr, "GetZOrderCountMax"},
+            {1203, nullptr, "GetDisplayLogicalResolution"},
+            {1204, nullptr, "SetDisplayMagnification"},
+            {2201, nullptr, "SetLayerPosition"},
+            {2203, nullptr, "SetLayerSize"},
+            {2204, nullptr, "GetLayerZ"},
            {2205, &ISystemDisplayService::SetLayerZ, "SetLayerZ"},
+            {2207, nullptr, "SetLayerVisibility"},
+            {2209, nullptr, "SetLayerAlpha"},
+            {2312, nullptr, "CreateStrayLayer"},
+            {2400, nullptr, "OpenIndirectLayer"},
+            {2401, nullptr, "CloseIndirectLayer"},
+            {2402, nullptr, "FlipIndirectLayer"},
+            {3000, nullptr, "ListDisplayModes"},
+            {3001, nullptr, "ListDisplayRgbRanges"},
+            {3002, nullptr, "ListDisplayContentTypes"},
+            {3200, nullptr, "GetDisplayMode"},
+            {3201, nullptr, "SetDisplayMode"},
+            {3202, nullptr, "GetDisplayUnderscan"},
+            {3203, nullptr, "SetDisplayUnderscan"},
+            {3204, nullptr, "GetDisplayContentType"},
+            {3205, nullptr, "SetDisplayContentType"},
+            {3206, nullptr, "GetDisplayRgbRange"},
+            {3207, nullptr, "SetDisplayRgbRange"},
+            {3208, nullptr, "GetDisplayCmuMode"},
+            {3209, nullptr, "SetDisplayCmuMode"},
+            {3210, nullptr, "GetDisplayContrastRatio"},
+            {3211, nullptr, "SetDisplayContrastRatio"},
+            {3214, nullptr, "GetDisplayGamma"},
+            {3215, nullptr, "SetDisplayGamma"},
+            {3216, nullptr, "GetDisplayCmuLuma"},
+            {3217, nullptr, "SetDisplayCmuLuma"},
+            {8225, nullptr, "GetSharedBufferMemoryHandleId"},
+            {8250, nullptr, "OpenSharedLayer"},
+            {8251, nullptr, "CloseSharedLayer"},
+            {8252, nullptr, "ConnectSharedLayer"},
+            {8253, nullptr, "DisconnectSharedLayer"},
+            {8254, nullptr, "AcquireSharedFrameBuffer"},
+            {8255, nullptr, "PresentSharedFrameBuffer"},
+            {8256, nullptr, "GetSharedFrameBufferAcquirableEvent"},
+            {8257, nullptr, "FillSharedFrameBufferColor"},
+            {8258, nullptr, "CancelSharedFrameBuffer"},
        };
        RegisterHandlers(functions);
    }
@@ -603,10 +644,72 @@ public:
    explicit IManagerDisplayService(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
        : ServiceFramework("IManagerDisplayService"), nv_flinger(std::move(nv_flinger)) {
        static const FunctionInfo functions[] = {
+            {200, nullptr, "AllocateProcessHeapBlock"},
+            {201, nullptr, "FreeProcessHeapBlock"},
            {1020, &IManagerDisplayService::CloseDisplay, "CloseDisplay"},
            {1102, nullptr, "GetDisplayResolution"},
            {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
+            {2011, nullptr, "DestroyManagedLayer"},
+            {2050, nullptr, "CreateIndirectLayer"},
+            {2051, nullptr, "DestroyIndirectLayer"},
+            {2052, nullptr, "CreateIndirectProducerEndPoint"},
+            {2053, nullptr, "DestroyIndirectProducerEndPoint"},
+            {2054, nullptr, "CreateIndirectConsumerEndPoint"},
+            {2055, nullptr, "DestroyIndirectConsumerEndPoint"},
+            {2300, nullptr, "AcquireLayerTexturePresentingEvent"},
+            {2301, nullptr, "ReleaseLayerTexturePresentingEvent"},
+            {2302, nullptr, "GetDisplayHotplugEvent"},
+            {2402, nullptr, "GetDisplayHotplugState"},
+            {2501, nullptr, "GetCompositorErrorInfo"},
+            {2601, nullptr, "GetDisplayErrorEvent"},
+            {4201, nullptr, "SetDisplayAlpha"},
+            {4203, nullptr, "SetDisplayLayerStack"},
+            {4205, nullptr, "SetDisplayPowerState"},
+            {4206, nullptr, "SetDefaultDisplay"},
            {6000, &IManagerDisplayService::AddToLayerStack, "AddToLayerStack"},
+            {6001, nullptr, "RemoveFromLayerStack"},
+            {6002, nullptr, "SetLayerVisibility"},
+            {6003, nullptr, "SetLayerConfig"},
+            {6004, nullptr, "AttachLayerPresentationTracer"},
+            {6005, nullptr, "DetachLayerPresentationTracer"},
+            {6006, nullptr, "StartLayerPresentationRecording"},
+            {6007, nullptr, "StopLayerPresentationRecording"},
+            {6008, nullptr, "StartLayerPresentationFenceWait"},
+            {6009, nullptr, "StopLayerPresentationFenceWait"},
+            {6010, nullptr, "GetLayerPresentationAllFencesExpiredEvent"},
+            {7000, nullptr, "SetContentVisibility"},
+            {8000, nullptr, "SetConductorLayer"},
+            {8100, nullptr, "SetIndirectProducerFlipOffset"},
+            {8200, nullptr, "CreateSharedBufferStaticStorage"},
+            {8201, nullptr, "CreateSharedBufferTransferMemory"},
+            {8202, nullptr, "DestroySharedBuffer"},
+            {8203, nullptr, "BindSharedLowLevelLayerToManagedLayer"},
+            {8204, nullptr, "BindSharedLowLevelLayerToIndirectLayer"},
+            {8207, nullptr, "UnbindSharedLowLevelLayer"},
+            {8208, nullptr, "ConnectSharedLowLevelLayerToSharedBuffer"},
+            {8209, nullptr, "DisconnectSharedLowLevelLayerFromSharedBuffer"},
+            {8210, nullptr, "CreateSharedLayer"},
+            {8211, nullptr, "DestroySharedLayer"},
+            {8216, nullptr, "AttachSharedLayerToLowLevelLayer"},
+            {8217, nullptr, "ForceDetachSharedLayerFromLowLevelLayer"},
+            {8218, nullptr, "StartDetachSharedLayerFromLowLevelLayer"},
+            {8219, nullptr, "FinishDetachSharedLayerFromLowLevelLayer"},
+            {8220, nullptr, "GetSharedLayerDetachReadyEvent"},
+            {8221, nullptr, "GetSharedLowLevelLayerSynchronizedEvent"},
+            {8222, nullptr, "CheckSharedLowLevelLayerSynchronized"},
+            {8223, nullptr, "RegisterSharedBufferImporterAruid"},
+            {8224, nullptr, "UnregisterSharedBufferImporterAruid"},
+            {8227, nullptr, "CreateSharedBufferProcessHeap"},
+            {8228, nullptr, "GetSharedLayerLayerStacks"},
+            {8229, nullptr, "SetSharedLayerLayerStacks"},
+            {8291, nullptr, "PresentDetachedSharedFrameBufferToLowLevelLayer"},
+            {8292, nullptr, "FillDetachedSharedFrameBufferColor"},
+            {8293, nullptr, "GetDetachedSharedFrameBufferImage"},
+            {8294, nullptr, "SetDetachedSharedFrameBufferImage"},
+            {8295, nullptr, "CopyDetachedSharedFrameBufferImage"},
+            {8296, nullptr, "SetDetachedSharedFrameBufferSubImage"},
+            {8297, nullptr, "GetSharedFrameBufferContentParameter"},
+            {8298, nullptr, "ExpandStartupLogoOnSharedFrameBuffer"},
        };
        RegisterHandlers(functions);
    }
@@ -825,13 +928,21 @@ IApplicationDisplayService::IApplicationDisplayService(
         "GetIndirectDisplayTransactionService"},
        {1000, &IApplicationDisplayService::ListDisplays, "ListDisplays"},
        {1010, &IApplicationDisplayService::OpenDisplay, "OpenDisplay"},
+        {1011, nullptr, "OpenDefaultDisplay"},
        {1020, &IApplicationDisplayService::CloseDisplay, "CloseDisplay"},
+        {1101, nullptr, "SetDisplayEnabled"},
        {1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
-        {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
        {2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
+        {2021, nullptr, "CloseLayer"},
        {2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
        {2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
+        {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
+        {2102, nullptr, "ConvertScalingMode"},
+        {2450, nullptr, "GetIndirectLayerImageMap"},
+        {2451, nullptr, "GetIndirectLayerImageCropMap"},
+        {2460, nullptr, "GetIndirectLayerImageRequiredMemoryInfo"},
        {5202, &IApplicationDisplayService::GetDisplayVsyncEvent, "GetDisplayVsyncEvent"},
+        {5203, nullptr, "GetDisplayVsyncEventForDebug"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/vi/vi_u.cpp
+++ b/src/core/hle/service/vi/vi_u.cpp
@@ -11,7 +11,6 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
    : Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) {
    static const FunctionInfo functions[] = {
        {0, &VI_U::GetDisplayService, "GetDisplayService"},
-        {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
    };
    RegisterHandlers(functions);
 }
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -9,6 +9,7 @@ add_library(video_core STATIC
    engines/maxwell_3d.h
    engines/maxwell_compute.cpp
    engines/maxwell_compute.h
+    engines/shader_bytecode.h
    gpu.cpp
    gpu.h
    macro_interpreter.cpp
@@ -27,6 +28,8 @@ add_library(video_core STATIC
    renderer_opengl/gl_shader_decompiler.h
    renderer_opengl/gl_shader_gen.cpp
    renderer_opengl/gl_shader_gen.h
+    renderer_opengl/gl_shader_manager.cpp
+    renderer_opengl/gl_shader_manager.h
    renderer_opengl/gl_shader_util.cpp
    renderer_opengl/gl_shader_util.h
    renderer_opengl/gl_state.cpp
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -74,8 +74,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {

    regs.reg_array[method] = value;

-#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32))
-
    switch (method) {
    case MAXWELL3D_REG_INDEX(code_address.code_address_high):
    case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
@@ -136,7 +134,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
        break;
    }

-#undef MAXWELL3D_REG_INDEX
+    VideoCore::g_renderer->Rasterizer()->NotifyMaxwellRegisterChanged(method);

    if (debug_context) {
        debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
@@ -165,6 +163,7 @@ void Maxwell3D::ProcessQueryGet() {
 void Maxwell3D::DrawArrays() {
    LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(),
              regs.vertex_buffer.count);
+    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");

    auto debug_context = Core::System::GetInstance().GetGPUDebugContext();

@@ -176,7 +175,8 @@ void Maxwell3D::DrawArrays() {
        debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
    }

-    VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/);
+    const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
+    VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(is_indexed);
 }

 void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
@@ -218,10 +218,12 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
    Texture::TICEntry tic_entry;
    Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));

-    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear,
-               "TIC versions other than BlockLinear are unimplemented");
+    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
+                   tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
+               "TIC versions other than BlockLinear or Pitch are unimplemented");

-    ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D,
+    ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
+                   (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
               "Texture types other than Texture2D are unimplemented");

    auto r_type = tic_entry.r_type.Value();
@@ -301,5 +303,26 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
    return regs.reg_array[method];
 }

+bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
+    // The Vertex stage is always enabled.
+    if (stage == Regs::ShaderStage::Vertex)
+        return true;
+
+    switch (stage) {
+    case Regs::ShaderStage::TesselationControl:
+        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationControl)]
+                   .enable != 0;
+    case Regs::ShaderStage::TesselationEval:
+        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationEval)]
+                   .enable != 0;
+    case Regs::ShaderStage::Geometry:
+        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Geometry)].enable != 0;
+    case Regs::ShaderStage::Fragment:
+        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Fragment)].enable != 0;
+    }
+
+    UNREACHABLE();
+}
+
 } // namespace Engines
 } // namespace Tegra
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -20,6 +20,9 @@
 namespace Tegra {
 namespace Engines {

+#define MAXWELL3D_REG_INDEX(field_name)                                                            \
+    (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
+
 class Maxwell3D final {
 public:
    explicit Maxwell3D(MemoryManager& memory_manager);
@@ -248,6 +251,52 @@ public:
            Patches = 0xe,
        };

+        enum class IndexFormat : u32 {
+            UnsignedByte = 0x0,
+            UnsignedShort = 0x1,
+            UnsignedInt = 0x2,
+        };
+
+        struct Blend {
+            enum class Equation : u32 {
+                Add = 1,
+                Subtract = 2,
+                ReverseSubtract = 3,
+                Min = 4,
+                Max = 5,
+            };
+
+            enum class Factor : u32 {
+                Zero = 0x1,
+                One = 0x2,
+                SourceColor = 0x3,
+                OneMinusSourceColor = 0x4,
+                SourceAlpha = 0x5,
+                OneMinusSourceAlpha = 0x6,
+                DestAlpha = 0x7,
+                OneMinusDestAlpha = 0x8,
+                DestColor = 0x9,
+                OneMinusDestColor = 0xa,
+                SourceAlphaSaturate = 0xb,
+                Source1Color = 0x10,
+                OneMinusSource1Color = 0x11,
+                Source1Alpha = 0x12,
+                OneMinusSource1Alpha = 0x13,
+                ConstantColor = 0x61,
+                OneMinusConstantColor = 0x62,
+                ConstantAlpha = 0x63,
+                OneMinusConstantAlpha = 0x64,
+            };
+
+            u32 separate_alpha;
+            Equation equation_rgb;
+            Factor factor_source_rgb;
+            Factor factor_dest_rgb;
+            Equation equation_a;
+            Factor factor_source_a;
+            Factor factor_dest_a;
+        };
+
        union {
            struct {
                INSERT_PADDING_WORDS(0x200);
@@ -270,7 +319,15 @@ public:
                    }
                } rt[NumRenderTargets];

-                INSERT_PADDING_WORDS(0x80);
+                struct {
+                    f32 scale_x;
+                    f32 scale_y;
+                    f32 scale_z;
+                    u32 translate_x;
+                    u32 translate_y;
+                    u32 translate_z;
+                    INSERT_PADDING_WORDS(2);
+                } viewport_transform[NumViewports];

                struct {
                    union {
@@ -375,7 +432,42 @@ public:
                    };
                } draw;

-                INSERT_PADDING_WORDS(0x139);
+                INSERT_PADDING_WORDS(0x6B);
+
+                struct {
+                    u32 start_addr_high;
+                    u32 start_addr_low;
+                    u32 end_addr_high;
+                    u32 end_addr_low;
+                    IndexFormat format;
+                    u32 first;
+                    u32 count;
+
+                    unsigned FormatSizeInBytes() const {
+                        switch (format) {
+                        case IndexFormat::UnsignedByte:
+                            return 1;
+                        case IndexFormat::UnsignedShort:
+                            return 2;
+                        case IndexFormat::UnsignedInt:
+                            return 4;
+                        }
+                        UNREACHABLE();
+                    }
+
+                    GPUVAddr StartAddress() const {
+                        return static_cast<GPUVAddr>(
+                            (static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low);
+                    }
+
+                    GPUVAddr EndAddress() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
+                                                     end_addr_low);
+                    }
+                } index_array;
+
+                INSERT_PADDING_WORDS(0xC7);
+
                struct {
                    u32 query_address_high;
                    u32 query_address_low;
@@ -410,7 +502,9 @@ public:
                    }
                } vertex_array[NumVertexArrays];

-                INSERT_PADDING_WORDS(0x40);
+                Blend blend;
+
+                INSERT_PADDING_WORDS(0x39);

                struct {
                    u32 limit_high;
@@ -427,14 +521,11 @@ public:
                        BitField<0, 1, u32> enable;
                        BitField<4, 4, ShaderProgram> program;
                    };
-                    u32 start_id;
-                    INSERT_PADDING_WORDS(1);
-                    u32 gpr_alloc;
-                    ShaderStage type;
-                    INSERT_PADDING_WORDS(9);
+                    u32 offset;
+                    INSERT_PADDING_WORDS(14);
                } shader_config[MaxShaderProgram];

-                INSERT_PADDING_WORDS(0x8C);
+                INSERT_PADDING_WORDS(0x80);

                struct {
                    u32 cb_size;
@@ -507,6 +598,7 @@ public:
    };

    State state{};
+    MemoryManager& memory_manager;

    /// Reads a register value located at the input method address
    u32 GetRegisterValue(u32 method) const;
@@ -520,9 +612,10 @@ public:
    /// Returns a list of enabled textures for the specified shader stage.
    std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;

-private:
-    MemoryManager& memory_manager;
+    /// Returns whether the specified shader stage is enabled or not.
+    bool IsShaderStageEnabled(Regs::ShaderStage stage) const;

+private:
    std::unordered_map<u32, std::vector<u32>> uploaded_macros;

    /// Macro method that is currently being executed / being fed parameters.
@@ -564,6 +657,7 @@ private:
                  "Field " #field_name " has invalid position")

 ASSERT_REG_POSITION(rt, 0x200);
+ASSERT_REG_POSITION(viewport_transform[0], 0x280);
 ASSERT_REG_POSITION(viewport, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
 ASSERT_REG_POSITION(zeta, 0x3F8);
@@ -573,8 +667,10 @@ ASSERT_REG_POSITION(tsc, 0x557);
 ASSERT_REG_POSITION(tic, 0x55D);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
+ASSERT_REG_POSITION(blend, 0x780);
 ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
 ASSERT_REG_POSITION(shader_config[0], 0x800);
 ASSERT_REG_POSITION(const_buffer, 0x8E0);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -0,0 +1,361 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstring>
+#include <map>
+#include <string>
+#include "common/bit_field.h"
+
+namespace Tegra {
+namespace Shader {
+
+struct Register {
+    constexpr Register() = default;
+
+    constexpr Register(u64 value) : value(value) {}
+
+    constexpr operator u64() const {
+        return value;
+    }
+
+    template <typename T>
+    constexpr u64 operator-(const T& oth) const {
+        return value - oth;
+    }
+
+    template <typename T>
+    constexpr u64 operator&(const T& oth) const {
+        return value & oth;
+    }
+
+    constexpr u64 operator&(const Register& oth) const {
+        return value & oth.value;
+    }
+
+    constexpr u64 operator~() const {
+        return ~value;
+    }
+
+private:
+    u64 value{};
+};
+
+union Attribute {
+    Attribute() = default;
+
+    constexpr explicit Attribute(u64 value) : value(value) {}
+
+    enum class Index : u64 {
+        Position = 7,
+        Attribute_0 = 8,
+    };
+
+    union {
+        BitField<22, 2, u64> element;
+        BitField<24, 6, Index> index;
+        BitField<47, 3, u64> size;
+    } fmt20;
+
+    union {
+        BitField<30, 2, u64> element;
+        BitField<32, 6, Index> index;
+    } fmt28;
+
+    BitField<39, 8, u64> reg;
+    u64 value{};
+};
+
+union Sampler {
+    Sampler() = default;
+
+    constexpr explicit Sampler(u64 value) : value(value) {}
+
+    enum class Index : u64 {
+        Sampler_0 = 8,
+    };
+
+    BitField<36, 13, Index> index;
+    u64 value{};
+};
+
+union Uniform {
+    BitField<20, 14, u64> offset;
+    BitField<34, 5, u64> index;
+};
+
+union OpCode {
+    enum class Id : u64 {
+        TEXS = 0x6C,
+        IPA = 0xE0,
+        FMUL32_IMM = 0x1E,
+        FFMA_IMM = 0x65,
+        FFMA_CR = 0x93,
+        FFMA_RC = 0xA3,
+        FFMA_RR = 0xB3,
+
+        FADD_C = 0x98B,
+        FMUL_C = 0x98D,
+        MUFU = 0xA10,
+        FADD_R = 0xB8B,
+        FMUL_R = 0xB8D,
+        LD_A = 0x1DFB,
+        ST_A = 0x1DFE,
+
+        FSETP_R = 0x5BB,
+        FSETP_C = 0x4BB,
+        EXIT = 0xE30,
+        KIL = 0xE33,
+
+        FMUL_IMM = 0x70D,
+        FMUL_IMM_x = 0x72D,
+        FADD_IMM = 0x70B,
+        FADD_IMM_x = 0x72B,
+    };
+
+    enum class Type {
+        Trivial,
+        Arithmetic,
+        Ffma,
+        Flow,
+        Memory,
+        Unknown,
+    };
+
+    struct Info {
+        Type type;
+        std::string name;
+    };
+
+    OpCode() = default;
+
+    constexpr OpCode(Id value) : value(static_cast<u64>(value)) {}
+
+    constexpr OpCode(u64 value) : value{value} {}
+
+    constexpr Id EffectiveOpCode() const {
+        switch (op1) {
+        case Id::TEXS:
+            return op1;
+        }
+
+        switch (op2) {
+        case Id::IPA:
+        case Id::FMUL32_IMM:
+            return op2;
+        }
+
+        switch (op3) {
+        case Id::FFMA_IMM:
+        case Id::FFMA_CR:
+        case Id::FFMA_RC:
+        case Id::FFMA_RR:
+            return op3;
+        }
+
+        switch (op4) {
+        case Id::EXIT:
+        case Id::FSETP_R:
+        case Id::FSETP_C:
+        case Id::KIL:
+            return op4;
+        }
+
+        switch (op5) {
+        case Id::MUFU:
+        case Id::LD_A:
+        case Id::ST_A:
+        case Id::FADD_R:
+        case Id::FADD_C:
+        case Id::FMUL_R:
+        case Id::FMUL_C:
+            return op5;
+
+        case Id::FMUL_IMM:
+        case Id::FMUL_IMM_x:
+            return Id::FMUL_IMM;
+
+        case Id::FADD_IMM:
+        case Id::FADD_IMM_x:
+            return Id::FADD_IMM;
+        }
+
+        return static_cast<Id>(value);
+    }
+
+    static const Info& GetInfo(const OpCode& opcode) {
+        static const std::map<Id, Info> info_table{BuildInfoTable()};
+        const auto& search{info_table.find(opcode.EffectiveOpCode())};
+        if (search != info_table.end()) {
+            return search->second;
+        }
+
+        static const Info unknown{Type::Unknown, "UNK"};
+        return unknown;
+    }
+
+    constexpr operator Id() const {
+        return static_cast<Id>(value);
+    }
+
+    constexpr OpCode operator<<(size_t bits) const {
+        return value << bits;
+    }
+
+    constexpr OpCode operator>>(size_t bits) const {
+        return value >> bits;
+    }
+
+    template <typename T>
+    constexpr u64 operator-(const T& oth) const {
+        return value - oth;
+    }
+
+    constexpr u64 operator&(const OpCode& oth) const {
+        return value & oth.value;
+    }
+
+    constexpr u64 operator~() const {
+        return ~value;
+    }
+
+    static std::map<Id, Info> BuildInfoTable() {
+        std::map<Id, Info> info_table;
+        info_table[Id::TEXS] = {Type::Memory, "texs"};
+        info_table[Id::LD_A] = {Type::Memory, "ld_a"};
+        info_table[Id::ST_A] = {Type::Memory, "st_a"};
+        info_table[Id::MUFU] = {Type::Arithmetic, "mufu"};
+        info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"};
+        info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"};
+        info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"};
+        info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"};
+        info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"};
+        info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"};
+        info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"};
+        info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"};
+        info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"};
+        info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"};
+        info_table[Id::FMUL32_IMM] = {Type::Arithmetic, "fmul32_imm"};
+        info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"};
+        info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"};
+        info_table[Id::EXIT] = {Type::Trivial, "exit"};
+        info_table[Id::IPA] = {Type::Trivial, "ipa"};
+        info_table[Id::KIL] = {Type::Flow, "kil"};
+        return info_table;
+    }
+
+    BitField<57, 7, Id> op1;
+    BitField<56, 8, Id> op2;
+    BitField<55, 9, Id> op3;
+    BitField<52, 12, Id> op4;
+    BitField<51, 13, Id> op5;
+    u64 value{};
+};
+static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size");
+
+} // namespace Shader
+} // namespace Tegra
+
+namespace std {
+
+// TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330.
+template <>
+struct make_unsigned<Tegra::Shader::Attribute> {
+    using type = Tegra::Shader::Attribute;
+};
+
+template <>
+struct make_unsigned<Tegra::Shader::Register> {
+    using type = Tegra::Shader::Register;
+};
+
+template <>
+struct make_unsigned<Tegra::Shader::OpCode> {
+    using type = Tegra::Shader::OpCode;
+};
+
+} // namespace std
+
+namespace Tegra {
+namespace Shader {
+
+enum class Pred : u64 {
+    UnusedIndex = 0x7,
+    NeverExecute = 0xf,
+};
+
+enum class SubOp : u64 {
+    Cos = 0x0,
+    Sin = 0x1,
+    Ex2 = 0x2,
+    Lg2 = 0x3,
+    Rcp = 0x4,
+    Rsq = 0x5,
+    Min = 0x8,
+};
+
+union Instruction {
+    Instruction& operator=(const Instruction& instr) {
+        hex = instr.hex;
+        return *this;
+    }
+
+    OpCode opcode;
+    BitField<0, 8, Register> gpr0;
+    BitField<8, 8, Register> gpr8;
+    BitField<16, 4, Pred> pred;
+    BitField<20, 8, Register> gpr20;
+    BitField<20, 7, SubOp> sub_op;
+    BitField<28, 8, Register> gpr28;
+    BitField<39, 8, Register> gpr39;
+
+    union {
+        BitField<20, 19, u64> imm20_19;
+        BitField<20, 32, u64> imm20_32;
+        BitField<45, 1, u64> negate_b;
+        BitField<46, 1, u64> abs_a;
+        BitField<48, 1, u64> negate_a;
+        BitField<49, 1, u64> abs_b;
+        BitField<50, 1, u64> abs_d;
+        BitField<56, 1, u64> negate_imm;
+
+        float GetImm20_19() const {
+            float result{};
+            u32 imm{static_cast<u32>(imm20_19)};
+            imm <<= 12;
+            imm |= negate_imm ? 0x80000000 : 0;
+            std::memcpy(&result, &imm, sizeof(imm));
+            return result;
+        }
+
+        float GetImm20_32() const {
+            float result{};
+            u32 imm{static_cast<u32>(imm20_32)};
+            std::memcpy(&result, &imm, sizeof(imm));
+            return result;
+        }
+    } alu;
+
+    union {
+        BitField<48, 1, u64> negate_b;
+        BitField<49, 1, u64> negate_c;
+    } ffma;
+
+    BitField<61, 1, u64> is_b_imm;
+    BitField<60, 1, u64> is_b_gpr;
+    BitField<59, 1, u64> is_c_gpr;
+
+    Attribute attribute;
+    Uniform uniform;
+    Sampler sampler;
+
+    u64 hex;
+};
+static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
+static_assert(std::is_standard_layout<Instruction>::value,
+              "Structure does not have standard layout");
+
+} // namespace Shader
+} // namespace Tegra
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,7 +15,10 @@ namespace Tegra {

 enum class RenderTargetFormat : u32 {
    NONE = 0x0,
+    RGBA16_FLOAT = 0xCA,
+    RGB10_A2_UNORM = 0xD1,
    RGBA8_UNORM = 0xD5,
+    RGBA8_SRGB = 0xD6,
 };

 class DebugContext;
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -19,7 +19,7 @@ public:
    virtual void DrawArrays() = 0;

    /// Notify rasterizer that the specified Maxwell register has been changed
-    virtual void NotifyMaxwellRegisterChanged(u32 id) = 0;
+    virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;

    /// Notify rasterizer that all caches should be flushed to Switch memory
    virtual void FlushAll() = 0;
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
 MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));

-enum class UniformBindings : GLuint { Common, VS, FS };
-
-static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding,
-                                         size_t expected_size) {
-    GLuint ub_index = glGetUniformBlockIndex(shader, name);
-    if (ub_index != GL_INVALID_INDEX) {
-        GLint ub_size = 0;
-        glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
-        ASSERT_MSG(ub_size == expected_size,
-                   "Uniform block size did not match! Got %d, expected %zu",
-                   static_cast<int>(ub_size), expected_size);
-        glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
-    }
-}
-
-static void SetShaderUniformBlockBindings(GLuint shader) {
-    SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common,
-                                 sizeof(RasterizerOpenGL::UniformData));
-    SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS,
-                                 sizeof(RasterizerOpenGL::VSUniformData));
-    SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS,
-                                 sizeof(RasterizerOpenGL::FSUniformData));
-}
-
 RasterizerOpenGL::RasterizerOpenGL() {
-    shader_dirty = true;
-
    has_ARB_buffer_storage = false;
    has_ARB_direct_state_access = false;
    has_ARB_separate_shader_objects = false;
@@ -72,6 +46,14 @@ RasterizerOpenGL::RasterizerOpenGL() {
        state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
    }

+    // Create SSBOs
+    for (size_t stage = 0; stage < ssbos.size(); ++stage) {
+        for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) {
+            ssbos[stage][buffer].Create();
+            state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle;
+        }
+    }
+
    GLint ext_num;
    glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
    for (GLint i = 0; i < ext_num; i++) {
@@ -88,6 +70,8 @@ RasterizerOpenGL::RasterizerOpenGL() {
        }
    }

+    ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported");
+
    // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
    state.clip_distance[0] = true;

@@ -102,36 +86,30 @@ RasterizerOpenGL::RasterizerOpenGL() {
    state.draw.uniform_buffer = uniform_buffer.handle;
    state.Apply();

-    glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW);
-    glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);
-
-    uniform_block_data.dirty = true;
-
    // Create render framebuffer
    framebuffer.Create();

-    if (has_ARB_separate_shader_objects) {
-        hw_vao.Create();
-        hw_vao_enabled_attributes.fill(false);
+    hw_vao.Create();
+    hw_vao_enabled_attributes.fill(false);

-        stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
-        stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
-        state.draw.vertex_buffer = stream_buffer->GetHandle();
+    stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
+    stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
+    state.draw.vertex_buffer = stream_buffer->GetHandle();

-        pipeline.Create();
-        state.draw.program_pipeline = pipeline.handle;
-        state.draw.shader_program = 0;
-        state.draw.vertex_array = hw_vao.handle;
-        state.Apply();
+    shader_program_manager = std::make_unique<GLShader::ProgramManager>();
+    state.draw.shader_program = 0;
+    state.draw.vertex_array = hw_vao.handle;
+    state.Apply();

-        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
+    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());

-        vs_uniform_buffer.Create();
-        glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle);
-        glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY);
-        glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle);
-    } else {
-        UNREACHABLE();
+    for (unsigned index = 0; index < uniform_buffers.size(); ++index) {
+        auto& buffer = uniform_buffers[index];
+        buffer.Create();
+        glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
+        glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr,
+                     GL_STREAM_COPY);
+        glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle);
    }

    accelerate_draw = AccelDraw::Disabled;
@@ -149,17 +127,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
    }
 }

-void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
-    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
-
-    if (is_indexed) {
-        UNREACHABLE();
-    }
-
-    // TODO(bunnei): Add support for 1+ vertex arrays
-    vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride;
-}
-
 void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
    MICROPROFILE_SCOPE(OpenGL_VAO);
    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
@@ -171,6 +138,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {

    // TODO(bunnei): Add support for 1+ vertex arrays
    const auto& vertex_array{regs.vertex_array[0]};
+    const auto& vertex_array_limit{regs.vertex_array_limit[0]};
    ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?");
    ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!");
    for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) {
@@ -183,6 +151,10 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
    // to avoid OpenGL errors.
    for (unsigned index = 0; index < 16; ++index) {
        auto& attrib = regs.vertex_attrib_format[index];
+        NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
+                    index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
+                    attrib.offset.Value(), attrib.IsNormalized());
+
        glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
                              attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride,
                              reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset));
@@ -191,7 +163,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
    }

    // Copy vertex array data
-    const u32 data_size{vertex_array.stride * regs.vertex_buffer.count};
+    const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1};
    const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())};
    res_cache.FlushRegion(data_addr, data_size, nullptr);
    Memory::ReadBlock(data_addr, array_ptr, data_size);
@@ -200,26 +172,89 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
    buffer_offset += data_size;
 }

-void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) {
-    MICROPROFILE_SCOPE(OpenGL_VS);
-    LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader.");
-    glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle);
-}
+void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) {
+    // Helper function for uploading uniform data
+    const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
+        if (has_ARB_direct_state_access) {
+            glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
+        } else {
+            glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
+            glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
+        }
+    };

-void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) {
-    MICROPROFILE_SCOPE(OpenGL_FS);
-    UNREACHABLE();
+    auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
+    ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
+
+    // Next available bindpoint to use when uploading the const buffers to the GLSL shaders.
+    u32 current_constbuffer_bindpoint = 0;
+
+    for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
+        ptr_pos += sizeof(GLShader::MaxwellUniformData);
+
+        auto& shader_config = gpu.regs.shader_config[index];
+        const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
+
+        const auto& stage = index - 1; // Stage indices are 0 - 5
+
+        const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage));
+
+        // Skip stages that are not enabled
+        if (!is_enabled) {
+            continue;
+        }
+
+        // Upload uniform data as one UBO per stage
+        const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
+        copy_buffer(uniform_buffers[stage].handle, ubo_offset,
+                    sizeof(GLShader::MaxwellUniformData));
+        GLShader::MaxwellUniformData* ub_ptr =
+            reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]);
+        ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]);
+
+        // Fetch program code from memory
+        GLShader::ProgramCode program_code;
+        const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
+        const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)};
+        Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64));
+        GLShader::ShaderSetup setup{std::move(program_code)};
+
+        GLShader::ShaderEntries shader_resources;
+
+        switch (program) {
+        case Maxwell::ShaderProgram::VertexB: {
+            GLShader::MaxwellVSConfig vs_config{setup};
+            shader_resources =
+                shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
+            break;
+        }
+        case Maxwell::ShaderProgram::Fragment: {
+            GLShader::MaxwellFSConfig fs_config{setup};
+            shader_resources =
+                shader_program_manager->UseProgrammableFragmentShader(fs_config, setup);
+            break;
+        }
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index,
+                         shader_config.enable.Value(), shader_config.offset);
+            UNREACHABLE();
+        }
+
+        GLuint gl_stage_program = shader_program_manager->GetCurrentProgramStage(
+            static_cast<Maxwell::ShaderStage>(stage));
+
+        // Configure the const buffers for this shader stage.
+        current_constbuffer_bindpoint =
+            SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
+                              current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
+    }
+
+    shader_program_manager->UseTrivialGeometryShader();
 }

 bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
-    if (!has_ARB_separate_shader_objects) {
-        UNREACHABLE();
-        return false;
-    }
-
    accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
    DrawArrays();
-
    return true;
 }

@@ -280,18 +315,6 @@ void RasterizerOpenGL::DrawArrays() {
    // Sync and bind the texture surfaces
    BindTextures();

-    // Sync and bind the shader
-    if (shader_dirty) {
-        SetShader();
-        shader_dirty = false;
-    }
-
-    // Sync the uniform data
-    if (uniform_block_data.dirty) {
-        glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data);
-        uniform_block_data.dirty = false;
-    }
-
    // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
    // scissor test to prevent drawing outside of the framebuffer region
    state.scissor.enabled = true;
@@ -303,15 +326,22 @@ void RasterizerOpenGL::DrawArrays() {

    // Draw the vertex batch
    const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
-    AnalyzeVertexArray(is_indexed);
+    const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
+    const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
+
+    // TODO(bunnei): Add support for 1+ vertex arrays
+    vs_input_size = vertex_num * regs.vertex_array[0].stride;
+
    state.draw.vertex_buffer = stream_buffer->GetHandle();
    state.Apply();

    size_t buffer_size = static_cast<size_t>(vs_input_size);
    if (is_indexed) {
-        UNREACHABLE();
+        buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size;
    }
-    buffer_size += sizeof(VSUniformData);
+
+    // Uniform space for the 5 shader stages
+    buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;

    size_t ptr_pos = 0;
    u8* buffer_ptr;
@@ -322,36 +352,37 @@ void RasterizerOpenGL::DrawArrays() {
    SetupVertexArray(buffer_ptr, buffer_offset);
    ptr_pos += vs_input_size;

+    // If indexed mode, copy the index buffer
    GLintptr index_buffer_offset = 0;
    if (is_indexed) {
-        UNREACHABLE();
+        ptr_pos = Common::AlignUp(ptr_pos, 4);
+
+        const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
+        const VAddr index_data_addr{
+            memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())};
+        Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size);
+
+        index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
+        ptr_pos += index_buffer_size;
    }

-    SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]),
-                      buffer_offset + static_cast<GLintptr>(ptr_pos));
-    const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
-    ptr_pos += sizeof(VSUniformData);
+    SetupShaders(buffer_ptr, buffer_offset, ptr_pos);

    stream_buffer->Unmap();

-    const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
-        if (has_ARB_direct_state_access) {
-            glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
-        } else {
-            glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
-            glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
-        }
-    };
-
-    copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData));
-
-    glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle);
+    shader_program_manager->ApplyTo(state);
+    state.Apply();

+    const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
    if (is_indexed) {
-        UNREACHABLE();
+        const GLint index_min{static_cast<GLint>(regs.index_array.first)};
+        const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)};
+        glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count,
+                                      MaxwellToGL::IndexFormat(regs.index_array.format),
+                                      reinterpret_cast<const void*>(index_buffer_offset),
+                                      -index_min);
    } else {
-        glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0,
-                     regs.vertex_buffer.count);
+        glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count);
    }

    // Disable scissor test
@@ -384,7 +415,7 @@ void RasterizerOpenGL::DrawArrays() {

 void RasterizerOpenGL::BindTextures() {
    using Regs = Tegra::Engines::Maxwell3D::Regs;
-    auto maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
+    auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();

    // Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a
    // certain number in OpenGL. We try to only use the minimum amount of host textures by not
@@ -415,7 +446,32 @@ void RasterizerOpenGL::BindTextures() {
    }
 }

-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {}
+void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
+    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+    switch (method) {
+    case MAXWELL3D_REG_INDEX(blend.separate_alpha):
+        ASSERT_MSG(false, "unimplemented");
+        break;
+    case MAXWELL3D_REG_INDEX(blend.equation_rgb):
+        state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_source_rgb):
+        state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb):
+        state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.equation_a):
+        state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_source_a):
+        state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_dest_a):
+        state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
+        break;
+    }
+}

 void RasterizerOpenGL::FlushAll() {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -467,9 +523,12 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu
    src_params.width = std::min(framebuffer.width, pixel_stride);
    src_params.height = framebuffer.height;
    src_params.stride = pixel_stride;
-    src_params.is_tiled = false;
+    src_params.is_tiled = true;
+    src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
    src_params.pixel_format =
        SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
+    src_params.component_type =
+        SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format);
    src_params.UpdateParams();

    MathUtil::Rectangle<u32> src_rect;
@@ -531,70 +590,53 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
    }
 }

-void RasterizerOpenGL::SetShader() {
-    // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to
-    // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell
-    // shaders.
+u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program,
+                                        u32 current_bindpoint,
+                                        const std::vector<GLShader::ConstBufferEntry>& entries) {
+    auto& gpu = Core::System::GetInstance().GPU();
+    auto& maxwell3d = gpu.Get3DEngine();

-    static constexpr char vertex_shader[] = R"(
-#version 150 core
+    ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
+               "Attempted to upload constbuffer of disabled shader stage");

-in vec2 vert_position;
-in vec2 vert_tex_coord;
-out vec2 frag_tex_coord;
-
-void main() {
-    // Multiply input position by the rotscale part of the matrix and then manually translate by
-    // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
-    // to `vec3(vert_position.xy, 1.0)`
-    gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0);
-    frag_tex_coord = vert_tex_coord;
-}
-)";
-
-    static constexpr char fragment_shader[] = R"(
-#version 150 core
-
-in vec2 frag_tex_coord;
-out vec4 color;
-
-uniform sampler2D tex[32];
-
-void main() {
-    color = texture(tex[0], frag_tex_coord);
-}
-)";
-
-    if (current_shader) {
-        return;
+    // Reset all buffer draw state for this stage.
+    for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) {
+        buffer.bindpoint = 0;
+        buffer.enabled = false;
    }

-    LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader.");
+    // Upload only the enabled buffers from the 16 constbuffers of each shader stage
+    auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];

-    current_shader = &test_shader;
-    if (has_ARB_separate_shader_objects) {
-        test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true);
-        glActiveShaderProgram(pipeline.handle, test_shader.shader.handle);
-    } else {
-        UNREACHABLE();
+    for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+        const auto& used_buffer = entries[bindpoint];
+        const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
+        auto& buffer_draw_state =
+            state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
+
+        ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer");
+        buffer_draw_state.enabled = true;
+        buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
+
+        VAddr addr = gpu.memory_manager->PhysicalToVirtualAddress(buffer.address);
+        std::vector<u8> data(used_buffer.GetSize() * sizeof(float));
+        Memory::ReadBlock(addr, data.data(), data.size());
+
+        glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo);
+        glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW);
+        glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+
+        // Now configure the bindpoint of the buffer inside the shader
+        std::string buffer_name = used_buffer.GetName();
+        GLuint index =
+            glGetProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, buffer_name.c_str());
+        if (index != -1)
+            glShaderStorageBlockBinding(program, index, buffer_draw_state.bindpoint);
    }

-    state.draw.shader_program = test_shader.shader.handle;
    state.Apply();

-    for (u32 texture = 0; texture < texture_samplers.size(); ++texture) {
-        // Set the texture samplers to correspond to different texture units
-        std::string uniform_name = "tex[" + std::to_string(texture) + "]";
-        GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str());
-        if (uniform_tex != -1) {
-            glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
-        }
-    }
-
-    if (has_ARB_separate_shader_objects) {
-        state.draw.shader_program = 0;
-        state.Apply();
-    }
+    return current_bindpoint + entries.size();
 }

 void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -15,10 +15,12 @@
 #include "common/common_types.h"
 #include "common/hash.h"
 #include "common/vector_math.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"

@@ -30,7 +32,7 @@ public:
    ~RasterizerOpenGL() override;

    void DrawArrays() override;
-    void NotifyMaxwellRegisterChanged(u32 id) override;
+    void NotifyMaxwellRegisterChanged(u32 method) override;
    void FlushAll() override;
    void FlushRegion(VAddr addr, u64 size) override;
    void InvalidateRegion(VAddr addr, u64 size) override;
@@ -45,7 +47,7 @@ public:
    /// OpenGL shader generated for a given Maxwell register state
    struct MaxwellShader {
        /// OpenGL shader resource
-        OGLShader shader;
+        OGLProgram shader;
    };

    struct VertexShader {
@@ -56,34 +58,6 @@ public:
        OGLShader shader;
    };

-    /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-    // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
-    //       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-    //       Not following that rule will cause problems on some AMD drivers.
-    struct UniformData {};
-
-    // static_assert(
-    //    sizeof(UniformData) == 0x460,
-    //    "The size of the UniformData structure has changed, update the structure in the shader");
-    static_assert(sizeof(UniformData) < 16384,
-                  "UniformData structure must be less than 16kb as per the OpenGL spec");
-
-    struct VSUniformData {};
-    // static_assert(
-    //    sizeof(VSUniformData) == 1856,
-    //    "The size of the VSUniformData structure has changed, update the structure in the
-    //    shader");
-    static_assert(sizeof(VSUniformData) < 16384,
-                  "VSUniformData structure must be less than 16kb as per the OpenGL spec");
-
-    struct FSUniformData {};
-    // static_assert(
-    //    sizeof(FSUniformData) == 1856,
-    //    "The size of the FSUniformData structure has changed, update the structure in the
-    //    shader");
-    static_assert(sizeof(FSUniformData) < 16384,
-                  "FSUniformData structure must be less than 16kb as per the OpenGL spec");
-
 private:
    class SamplerInfo {
    public:
@@ -113,6 +87,18 @@ private:
    /// Binds the required textures to OpenGL before drawing a batch.
    void BindTextures();

+    /*
+     * Configures the current constbuffers to use for the draw command.
+     * @param stage The shader stage to configure buffers for.
+     * @param program The OpenGL program object that contains the specified stage.
+     * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
+     * @param entries Vector describing the buffers that are actually used in the guest shader.
+     * @returns The next available bindpoint for use in the next shader stage.
+     */
+    u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
+                          u32 current_bindpoint,
+                          const std::vector<GLShader::ConstBufferEntry>& entries);
+
    /// Syncs the viewport to match the guest state
    void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);

@@ -122,9 +108,6 @@ private:
    /// Syncs the clip coefficients to match the guest state
    void SyncClipCoef();

-    /// Sets the OpenGL shader in accordance with the current guest state
-    void SetShader();
-
    /// Syncs the cull mode to match the guest state
    void SyncCullMode();

@@ -152,23 +135,16 @@ private:

    RasterizerCacheOpenGL res_cache;

-    /// Shader used for test renderering - to be removed once we have emulated shaders
-    MaxwellShader test_shader{};
-
-    const MaxwellShader* current_shader{};
-    bool shader_dirty{};
-
-    struct {
-        UniformData data;
-        bool dirty;
-    } uniform_block_data = {};
-
-    OGLPipeline pipeline;
+    std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
    OGLVertexArray sw_vao;
    OGLVertexArray hw_vao;
    std::array<bool, 16> hw_vao_enabled_attributes;

-    std::array<SamplerInfo, 32> texture_samplers;
+    std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
+    std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>,
+               Tegra::Engines::Maxwell3D::Regs::MaxShaderStage>
+        ssbos;
+
    static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
    std::unique_ptr<OGLStreamBuffer> vertex_buffer;
    OGLBuffer uniform_buffer;
@@ -179,22 +155,11 @@ private:

    GLsizeiptr vs_input_size;

-    void AnalyzeVertexArray(bool is_indexed);
    void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);

-    OGLBuffer vs_uniform_buffer;
-    std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map;
-    std::unordered_map<std::string, VertexShader> vs_shader_cache;
-    OGLShader vs_default_shader;
+    std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;

-    void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset);
-
-    OGLBuffer fs_uniform_buffer;
-    std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map;
-    std::unordered_map<std::string, FragmentShader> fs_shader_cache;
-    OGLShader fs_default_shader;
-
-    void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset);
+    void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos);

    enum class AccelDraw { Disabled, Arrays, Indexed };
    AccelDraw accelerate_draw;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -36,6 +36,7 @@

 using SurfaceType = SurfaceParams::SurfaceType;
 using PixelFormat = SurfaceParams::PixelFormat;
+using ComponentType = SurfaceParams::ComponentType;

 struct FormatTuple {
    GLint internal_format;
@@ -47,26 +48,24 @@ struct FormatTuple {
    u32 compression_factor;
 };

-static constexpr std::array<FormatTuple, 1> fb_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
+static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1},                     // ABGR8
+    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1},                        // B5G6R5
+    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16},   // DXT1
+    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23
+    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45
 }};

-static constexpr std::array<FormatTuple, 2> tex_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1},                       // RGBA8
-    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
-}};
-
-static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
+static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
    const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
-    if (type == SurfaceType::Color) {
-        ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size());
-        return fb_format_tuples[static_cast<unsigned int>(pixel_format)];
+    if (type == SurfaceType::ColorTexture) {
+        ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
+        // For now only UNORM components are supported
+        ASSERT(component_type == ComponentType::UNorm);
+        return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
    } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
        // TODO(Subv): Implement depth formats
        ASSERT_MSG(false, "Unimplemented");
-    } else if (type == SurfaceType::Texture) {
-        ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
-        return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
    }

    UNREACHABLE();
@@ -85,56 +84,42 @@ static u16 GetResolutionScaleFactor() {
 }

 template <bool morton_to_gl, PixelFormat format>
-static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start,
+                VAddr end) {
    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
-    for (u32 y = 0; y < 8; ++y) {
-        for (u32 x = 0; x < 8; ++x) {
-            u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
-            u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel;
-            if (morton_to_gl) {
-                std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
-            } else {
-                std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
-            }
-        }
+
+    if (morton_to_gl) {
+        auto data = Tegra::Texture::UnswizzleTexture(
+            base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height,
+            block_height);
+        std::memcpy(gl_buffer, data.data(), data.size());
+    } else {
+        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
+        // the configuration for this and perform more generic un/swizzle
+        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
+        VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
+                                       Memory::GetPointer(base), gl_buffer, morton_to_gl);
    }
 }

-template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
-    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
-
-    // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
-    // configuration for this and perform more generic un/swizzle
-    LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-    VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
-                                   Memory::GetPointer(base), gl_buffer, morton_to_gl);
-}
-
-template <>
-void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base,
-                                         VAddr start, VAddr end) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8;
-    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1);
-
-    // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
-    // configuration for this and perform more generic un/swizzle
-    LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-    auto data =
-        Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height);
-    std::memcpy(gl_buffer, data.data(), data.size());
-}
-
-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = {
-    MortonCopy<true, PixelFormat::RGBA8>,
-    MortonCopy<true, PixelFormat::DXT1>,
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
+                            SurfaceParams::MaxPixelFormat>
+    morton_to_gl_fns = {
+        MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
+        MortonCopy<true, PixelFormat::DXT1>,  MortonCopy<true, PixelFormat::DXT23>,
+        MortonCopy<true, PixelFormat::DXT45>,
 };

-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = {
-    MortonCopy<false, PixelFormat::RGBA8>,
-    MortonCopy<false, PixelFormat::DXT1>,
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
+                            SurfaceParams::MaxPixelFormat>
+    gl_to_morton_fns = {
+        MortonCopy<false, PixelFormat::ABGR8>,
+        MortonCopy<false, PixelFormat::B5G6R5>,
+        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
+        nullptr,
+        nullptr,
+        nullptr,
 };

 // Allocate an uninitialized texture of appropriate size and format for the surface
@@ -183,7 +168,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec

    u32 buffers = 0;

-    if (type == SurfaceType::Color || type == SurfaceType::Texture) {
+    if (type == SurfaceType::ColorTexture) {
        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
                               0);
        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
@@ -311,15 +296,18 @@ MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& su

 bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
    return std::tie(other_surface.addr, other_surface.width, other_surface.height,
-                    other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) ==
-               std::tie(addr, width, height, stride, pixel_format, is_tiled) &&
+                    other_surface.stride, other_surface.block_height, other_surface.pixel_format,
+                    other_surface.component_type,
+                    other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height,
+                                                        pixel_format, component_type, is_tiled) &&
           pixel_format != PixelFormat::Invalid;
 }

 bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
    return sub_surface.addr >= addr && sub_surface.end <= end &&
           sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
-           sub_surface.is_tiled == is_tiled &&
+           sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height &&
+           sub_surface.component_type == component_type &&
           (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
           (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
           GetSubRect(sub_surface).left + sub_surface.width <= stride;
@@ -328,7 +316,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
 bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
    return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
           addr <= expanded_surface.end && expanded_surface.addr <= end &&
-           is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride &&
+           is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height &&
+           component_type == expanded_surface.component_type && stride == expanded_surface.stride &&
           (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
                   BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
               0;
@@ -339,6 +328,10 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
        end < texcopy_params.end) {
        return false;
    }
+    if (texcopy_params.block_height != block_height ||
+        texcopy_params.component_type != component_type)
+        return false;
+
    if (texcopy_params.width != texcopy_params.stride) {
        const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1)));
        return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
@@ -481,18 +474,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
    const u64 start_offset = load_start - addr;

    if (!is_tiled) {
-        ASSERT(type == SurfaceType::Color);
        const u32 bytes_per_pixel{GetFormatBpp() >> 3};

-        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
-        // the configuration for this and perform more generic un/swizzle
-        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-        VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
-                                       texture_src_data + start_offset, &gl_buffer[start_offset],
-                                       true);
+        std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
+                    bytes_per_pixel * width * height);
    } else {
-        morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
-                                                            load_start, load_end);
+        morton_to_gl_fns[static_cast<size_t>(pixel_format)](
+            stride, block_height, height, &gl_buffer[0], addr, load_start, load_end);
    }
 }

@@ -533,11 +521,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
        if (backup_bytes)
            std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
    } else if (!is_tiled) {
-        ASSERT(type == SurfaceType::Color);
        std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
    } else {
-        gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
-                                                            flush_start, flush_end);
+        gl_to_morton_fns[static_cast<size_t>(pixel_format)](
+            stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end);
    }
 }

@@ -556,7 +543,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
    GLint y0 = static_cast<GLint>(rect.bottom);
    size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format);

-    const FormatTuple& tuple = GetFormatTuple(pixel_format);
+    const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);
    GLuint target_tex = texture.handle;

    // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
@@ -629,7 +616,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
    OpenGLState prev_state = state;
    SCOPE_EXIT({ prev_state.Apply(); });

-    const FormatTuple& tuple = GetFormatTuple(pixel_format);
+    const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);

    // Ensure no bad interactions with GL_PACK_ALIGNMENT
    ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
@@ -662,7 +649,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
        state.draw.read_framebuffer = read_fb_handle;
        state.Apply();

-        if (type == SurfaceType::Color || type == SurfaceType::Texture) {
+        if (type == SurfaceType::ColorTexture) {
            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
                                   texture.handle, 0);
            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
@@ -818,7 +805,7 @@ void main() {
    color = texelFetch(tbo, tbo_offset).rabg;
 }
 )";
-    d24s8_abgr_shader.Create(vs_source, nullptr, fs_source);
+    d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source);

    OpenGLState state = OpenGLState::GetCurState();
    GLuint old_program = state.draw.shader_program;
@@ -1041,9 +1028,25 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
    params.height = config.tic.Height();
    params.is_tiled = config.tic.IsTiled();
    params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
+
+    // TODO(Subv): Different types per component are not supported.
+    ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
+           config.tic.r_type.Value() == config.tic.b_type.Value() &&
+           config.tic.r_type.Value() == config.tic.a_type.Value());
+
+    params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value());
+
+    if (config.tic.IsTiled()) {
+        params.block_height = config.tic.BlockHeight();
+    } else {
+        // Use the texture-provided stride value if the texture isn't tiled.
+        params.stride = params.PixelsInBytes(config.tic.Pitch());
+    }
+
    params.UpdateParams();

-    if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) {
+    if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 ||
+        params.stride != params.width) {
        Surface src_surface;
        MathUtil::Rectangle<u32> rect;
        std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
@@ -1094,10 +1097,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
    color_params.res_scale = resolution_scale_factor;
    color_params.width = config.width;
    color_params.height = config.height;
+    // TODO(Subv): Can framebuffers use a different block height?
+    color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
    SurfaceParams depth_params = color_params;

    color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address());
    color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format);
+    color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format);
    color_params.UpdateParams();

    ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented");
@@ -1293,7 +1299,6 @@ void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface
    const SurfaceInterval invalid_interval(addr, addr + size);

    if (region_owner != nullptr) {
-        ASSERT(region_owner->type != SurfaceType::Texture);
        ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end);
        // Surfaces can't have a gap
        ASSERT(region_owner->width == region_owner->stride);
@@ -1355,7 +1360,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {

    surface->gl_buffer_size = 0;
    surface->invalid_regions.insert(surface->GetInterval());
-    AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format),
+    AllocateSurfaceTexture(surface->texture.handle,
+                           GetFormatTuple(surface->pixel_format, surface->component_type),
                           surface->GetScaledWidth(), surface->GetScaledHeight());

    return surface;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -52,27 +52,45 @@ enum class ScaleMatch {

 struct SurfaceParams {
    enum class PixelFormat {
-        RGBA8 = 0,
-        DXT1 = 1,
+        ABGR8 = 0,
+        B5G6R5 = 1,
+        DXT1 = 2,
+        DXT23 = 3,
+        DXT45 = 4,
+
+        Max,
        Invalid = 255,
    };

+    static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max);
+
+    enum class ComponentType {
+        Invalid = 0,
+        SNorm = 1,
+        UNorm = 2,
+        SInt = 3,
+        UInt = 4,
+        Float = 5,
+    };
+
    enum class SurfaceType {
-        Color = 0,
-        Texture = 1,
-        Depth = 2,
-        DepthStencil = 3,
-        Fill = 4,
-        Invalid = 5
+        ColorTexture = 0,
+        Depth = 1,
+        DepthStencil = 2,
+        Fill = 3,
+        Invalid = 4,
    };

    static constexpr unsigned int GetFormatBpp(PixelFormat format) {
        if (format == PixelFormat::Invalid)
            return 0;

-        constexpr std::array<unsigned int, 2> bpp_table = {
-            32, // RGBA8
-            64, // DXT1
+        constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = {
+            32,  // ABGR8
+            16,  // B5G6R5
+            64,  // DXT1
+            128, // DXT23
+            128, // DXT45
        };

        ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -85,8 +103,9 @@ struct SurfaceParams {
    static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
        switch (format) {
        case Tegra::RenderTargetFormat::RGBA8_UNORM:
-            return PixelFormat::RGBA8;
+            return PixelFormat::ABGR8;
        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
        }
    }
@@ -94,8 +113,9 @@ struct SurfaceParams {
    static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
        switch (format) {
        case Tegra::FramebufferConfig::PixelFormat::ABGR8:
-            return PixelFormat::RGBA8;
+            return PixelFormat::ABGR8;
        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
        }
    }
@@ -104,10 +124,69 @@ struct SurfaceParams {
        // TODO(Subv): Properly implement this
        switch (format) {
        case Tegra::Texture::TextureFormat::A8R8G8B8:
-            return PixelFormat::RGBA8;
+            return PixelFormat::ABGR8;
+        case Tegra::Texture::TextureFormat::B5G6R5:
+            return PixelFormat::B5G6R5;
        case Tegra::Texture::TextureFormat::DXT1:
            return PixelFormat::DXT1;
+        case Tegra::Texture::TextureFormat::DXT23:
+            return PixelFormat::DXT23;
+        case Tegra::Texture::TextureFormat::DXT45:
+            return PixelFormat::DXT45;
        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+            UNREACHABLE();
+        }
+    }
+
+    static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) {
+        // TODO(Subv): Properly implement this
+        switch (format) {
+        case PixelFormat::ABGR8:
+            return Tegra::Texture::TextureFormat::A8R8G8B8;
+        case PixelFormat::B5G6R5:
+            return Tegra::Texture::TextureFormat::B5G6R5;
+        case PixelFormat::DXT1:
+            return Tegra::Texture::TextureFormat::DXT1;
+        case PixelFormat::DXT23:
+            return Tegra::Texture::TextureFormat::DXT23;
+        case PixelFormat::DXT45:
+            return Tegra::Texture::TextureFormat::DXT45;
+        default:
+            UNREACHABLE();
+        }
+    }
+
+    static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
+        // TODO(Subv): Implement more component types
+        switch (type) {
+        case Tegra::Texture::ComponentType::UNORM:
+            return ComponentType::UNorm;
+        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
+            UNREACHABLE();
+        }
+    }
+
+    static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
+        // TODO(Subv): Implement more render targets
+        switch (format) {
+        case Tegra::RenderTargetFormat::RGBA8_UNORM:
+        case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
+            return ComponentType::UNorm;
+        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+            UNREACHABLE();
+        }
+    }
+
+    static ComponentType ComponentTypeFromGPUPixelFormat(
+        Tegra::FramebufferConfig::PixelFormat format) {
+        switch (format) {
+        case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+            return ComponentType::UNorm;
+        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
        }
    }
@@ -116,8 +195,7 @@ struct SurfaceParams {
        SurfaceType a_type = GetFormatType(pixel_format_a);
        SurfaceType b_type = GetFormatType(pixel_format_b);

-        if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) &&
-            (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
+        if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) {
            return true;
        }

@@ -133,12 +211,8 @@ struct SurfaceParams {
    }

    static SurfaceType GetFormatType(PixelFormat pixel_format) {
-        if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::RGBA8)) {
-            return SurfaceType::Color;
-        }
-
-        if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::DXT1)) {
-            return SurfaceType::Texture;
+        if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {
+            return SurfaceType::ColorTexture;
        }

        // TODO(Subv): Implement the other formats
@@ -210,11 +284,13 @@ struct SurfaceParams {
    u32 width = 0;
    u32 height = 0;
    u32 stride = 0;
+    u32 block_height = 0;
    u16 res_scale = 1;

    bool is_tiled = false;
    PixelFormat pixel_format = PixelFormat::Invalid;
    SurfaceType type = SurfaceType::Invalid;
+    ComponentType component_type = ComponentType::Invalid;
 };

 struct CachedSurface : SurfaceParams {
@@ -334,7 +410,7 @@ private:
    OGLVertexArray attributeless_vao;
    OGLBuffer d24s8_abgr_buffer;
    GLsizeiptr d24s8_abgr_buffer_size;
-    OGLShader d24s8_abgr_shader;
+    OGLProgram d24s8_abgr_shader;
    GLint d24s8_abgr_tbo_size_u_id;
    GLint d24s8_abgr_viewport_u_id;
 };
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -13,14 +13,16 @@
 class OGLTexture : private NonCopyable {
 public:
    OGLTexture() = default;
-    OGLTexture(OGLTexture&& o) {
-        std::swap(handle, o.handle);
-    }
+
+    OGLTexture(OGLTexture&& o) : handle(std::exchange(o.handle, 0)) {}
+
    ~OGLTexture() {
        Release();
    }
+
    OGLTexture& operator=(OGLTexture&& o) {
-        std::swap(handle, o.handle);
+        Release();
+        handle = std::exchange(o.handle, 0);
        return *this;
    }

@@ -46,14 +48,16 @@ public:
 class OGLSampler : private NonCopyable {
 public:
    OGLSampler() = default;
-    OGLSampler(OGLSampler&& o) {
-        std::swap(handle, o.handle);
-    }
+
+    OGLSampler(OGLSampler&& o) : handle(std::exchange(o.handle, 0)) {}
+
    ~OGLSampler() {
        Release();
    }
+
    OGLSampler& operator=(OGLSampler&& o) {
-        std::swap(handle, o.handle);
+        Release();
+        handle = std::exchange(o.handle, 0);
        return *this;
    }

@@ -79,25 +83,71 @@ public:
 class OGLShader : private NonCopyable {
 public:
    OGLShader() = default;
-    OGLShader(OGLShader&& o) {
-        std::swap(handle, o.handle);
-    }
+
+    OGLShader(OGLShader&& o) : handle(std::exchange(o.handle, 0)) {}
+
    ~OGLShader() {
        Release();
    }
+
    OGLShader& operator=(OGLShader&& o) {
-        std::swap(handle, o.handle);
+        Release();
+        handle = std::exchange(o.handle, 0);
        return *this;
    }

-    /// Creates a new internal OpenGL resource and stores the handle
-    void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader,
-                const std::vector<const char*>& feedback_vars = {},
-                bool separable_program = false) {
+    void Create(const char* source, GLenum type) {
        if (handle != 0)
            return;
-        handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars,
-                                       separable_program);
+        if (source == nullptr)
+            return;
+        handle = GLShader::LoadShader(source, type);
+    }
+
+    void Release() {
+        if (handle == 0)
+            return;
+        glDeleteShader(handle);
+        handle = 0;
+    }
+
+    GLuint handle = 0;
+};
+
+class OGLProgram : private NonCopyable {
+public:
+    OGLProgram() = default;
+
+    OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {}
+
+    ~OGLProgram() {
+        Release();
+    }
+
+    OGLProgram& operator=(OGLProgram&& o) {
+        Release();
+        handle = std::exchange(o.handle, 0);
+        return *this;
+    }
+
+    template <typename... T>
+    void Create(bool separable_program, T... shaders) {
+        if (handle != 0)
+            return;
+        handle = GLShader::LoadProgram(separable_program, shaders...);
+    }
+
+    /// Creates a new internal OpenGL resource and stores the handle
+    void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
+                          bool separable_program = false) {
+        OGLShader vert, geo, frag;
+        if (vert_shader)
+            vert.Create(vert_shader, GL_VERTEX_SHADER);
+        if (geo_shader)
+            geo.Create(geo_shader, GL_GEOMETRY_SHADER);
+        if (frag_shader)
+            frag.Create(frag_shader, GL_FRAGMENT_SHADER);
+        Create(separable_program, vert.handle, geo.handle, frag.handle);
    }

    /// Deletes the internal OpenGL resource
@@ -148,14 +198,16 @@ public:
 class OGLBuffer : private NonCopyable {
 public:
    OGLBuffer() = default;
-    OGLBuffer(OGLBuffer&& o) {
-        std::swap(handle, o.handle);
-    }
+
+    OGLBuffer(OGLBuffer&& o) : handle(std::exchange(o.handle, 0)) {}
+
    ~OGLBuffer() {
        Release();
    }
+
    OGLBuffer& operator=(OGLBuffer&& o) {
-        std::swap(handle, o.handle);
+        Release();
+        handle = std::exchange(o.handle, 0);
        return *this;
    }

@@ -214,14 +266,16 @@ public:
 class OGLVertexArray : private NonCopyable {
 public:
    OGLVertexArray() = default;
-    OGLVertexArray(OGLVertexArray&& o) {
-        std::swap(handle, o.handle);
-    }
+
+    OGLVertexArray(OGLVertexArray&& o) : handle(std::exchange(o.handle, 0)) {}
+
    ~OGLVertexArray() {
        Release();
    }
+
    OGLVertexArray& operator=(OGLVertexArray&& o) {
-        std::swap(handle, o.handle);
+        Release();
+        handle = std::exchange(o.handle, 0);
        return *this;
    }

@@ -247,14 +301,16 @@ public:
 class OGLFramebuffer : private NonCopyable {
 public:
    OGLFramebuffer() = default;
-    OGLFramebuffer(OGLFramebuffer&& o) {
-        std::swap(handle, o.handle);
-    }
+
+    OGLFramebuffer(OGLFramebuffer&& o) : handle(std::exchange(o.handle, 0)) {}
+
    ~OGLFramebuffer() {
        Release();
    }
+
    OGLFramebuffer& operator=(OGLFramebuffer&& o) {
-        std::swap(handle, o.handle);
+        Release();
+        handle = std::exchange(o.handle, 0);
        return *this;
    }

--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2,57 +2,642 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <map>
+#include <set>
 #include <string>
-#include <queue>
+#include <string_view>
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"

-namespace Maxwell3D {
-namespace Shader {
+namespace GLShader {
 namespace Decompiler {

+using Tegra::Shader::Attribute;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
+using Tegra::Shader::Sampler;
+using Tegra::Shader::SubOp;
+using Tegra::Shader::Uniform;
+
 constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;

-class Impl {
+class DecompileFail : public std::runtime_error {
 public:
-    Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
-         const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset,
-         const std::function<std::string(u32)>& inputreg_getter,
-         const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul,
-         const std::string& emit_cb, const std::string& setemit_cb)
-        : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset),
-          inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter),
-          sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {}
+    using std::runtime_error::runtime_error;
+};

-    std::string Decompile() {
-        UNREACHABLE();
-        return {};
+/// Describes the behaviour of code path of a given entry point and a return point.
+enum class ExitMethod {
+    Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
+    AlwaysReturn, ///< All code paths reach the return point.
+    Conditional,  ///< Code path reaches the return point or an END instruction conditionally.
+    AlwaysEnd,    ///< All code paths reach a END instruction.
+};
+
+/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction.
+struct Subroutine {
+    /// Generates a name suitable for GLSL source code.
+    std::string GetName() const {
+        return "sub_" + std::to_string(begin) + "_" + std::to_string(end);
+    }
+
+    u32 begin;              ///< Entry point of the subroutine.
+    u32 end;                ///< Return point of the subroutine.
+    ExitMethod exit_method; ///< Exit method of the subroutine.
+    std::set<u32> labels;   ///< Addresses refereced by JMP instructions.
+
+    bool operator<(const Subroutine& rhs) const {
+        return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
+    }
+};
+
+/// Analyzes shader code and produces a set of subroutines.
+class ControlFlowAnalyzer {
+public:
+    ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset)
+        : program_code(program_code) {
+
+        // Recursively finds all subroutines.
+        const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END);
+        if (program_main.exit_method != ExitMethod::AlwaysEnd)
+            throw DecompileFail("Program does not always end");
+    }
+
+    std::set<Subroutine> GetSubroutines() {
+        return std::move(subroutines);
    }

 private:
-    const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code;
-    const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data;
-    u32 main_offset;
-    const std::function<std::string(u32)>& inputreg_getter;
-    const std::function<std::string(u32)>& outputreg_getter;
-    bool sanitize_mul;
-    const std::string& emit_cb;
-    const std::string& setemit_cb;
+    const ProgramCode& program_code;
+    std::set<Subroutine> subroutines;
+    std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
+
+    /// Adds and analyzes a new subroutine if it is not added yet.
+    const Subroutine& AddSubroutine(u32 begin, u32 end) {
+        auto iter = subroutines.find(Subroutine{begin, end});
+        if (iter != subroutines.end())
+            return *iter;
+
+        Subroutine subroutine{begin, end};
+        subroutine.exit_method = Scan(begin, end, subroutine.labels);
+        if (subroutine.exit_method == ExitMethod::Undetermined)
+            throw DecompileFail("Recursive function detected");
+        return *subroutines.insert(std::move(subroutine)).first;
+    }
+
+    /// Scans a range of code for labels and determines the exit method.
+    ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
+        auto [iter, inserted] =
+            exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
+        ExitMethod& exit_method = iter->second;
+        if (!inserted)
+            return exit_method;
+
+        for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
+            const Instruction instr = {program_code[offset]};
+            switch (instr.opcode.EffectiveOpCode()) {
+            case OpCode::Id::EXIT: {
+                return exit_method = ExitMethod::AlwaysEnd;
+            }
+            }
+        }
+        return exit_method = ExitMethod::AlwaysReturn;
+    }
 };

-std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
-                             const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data,
-                             u32 main_offset,
-                             const std::function<std::string(u32)>& inputreg_getter,
-                             const std::function<std::string(u32)>& outputreg_getter,
-                             bool sanitize_mul, const std::string& emit_cb,
-                             const std::string& setemit_cb) {
-    Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter,
-              sanitize_mul, emit_cb, setemit_cb);
-    return impl.Decompile();
+class ShaderWriter {
+public:
+    void AddLine(std::string_view text) {
+        DEBUG_ASSERT(scope >= 0);
+        if (!text.empty()) {
+            AppendIndentation();
+        }
+        shader_source += text;
+        AddNewLine();
+    }
+
+    void AddLine(char character) {
+        DEBUG_ASSERT(scope >= 0);
+        AppendIndentation();
+        shader_source += character;
+        AddNewLine();
+    }
+
+    void AddNewLine() {
+        DEBUG_ASSERT(scope >= 0);
+        shader_source += '\n';
+    }
+
+    std::string GetResult() {
+        return std::move(shader_source);
+    }
+
+    int scope = 0;
+
+private:
+    void AppendIndentation() {
+        shader_source.append(static_cast<size_t>(scope) * 4, ' ');
+    }
+
+    std::string shader_source;
+};
+
+class GLSLGenerator {
+public:
+    GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
+                  u32 main_offset, Maxwell3D::Regs::ShaderStage stage)
+        : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
+          stage(stage) {
+
+        Generate();
+    }
+
+    std::string GetShaderCode() {
+        return declarations.GetResult() + shader.GetResult();
+    }
+
+    /// Returns entries in the shader that are useful for external functions
+    ShaderEntries GetEntries() const {
+        return {GetConstBuffersDeclarations()};
+    }
+
+private:
+    /// Gets the Subroutine object corresponding to the specified address.
+    const Subroutine& GetSubroutine(u32 begin, u32 end) const {
+        auto iter = subroutines.find(Subroutine{begin, end});
+        ASSERT(iter != subroutines.end());
+        return *iter;
+    }
+
+    /// Generates code representing an input attribute register.
+    std::string GetInputAttribute(Attribute::Index attribute) {
+        switch (attribute) {
+        case Attribute::Index::Position:
+            return "position";
+        default:
+            const u32 index{static_cast<u32>(attribute) -
+                            static_cast<u32>(Attribute::Index::Attribute_0)};
+            if (attribute >= Attribute::Index::Attribute_0) {
+                declr_input_attribute.insert(attribute);
+                return "input_attribute_" + std::to_string(index);
+            }
+
+            NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
+            UNREACHABLE();
+        }
+    }
+
+    /// Generates code representing an output attribute register.
+    std::string GetOutputAttribute(Attribute::Index attribute) {
+        switch (attribute) {
+        case Attribute::Index::Position:
+            return "position";
+        default:
+            const u32 index{static_cast<u32>(attribute) -
+                            static_cast<u32>(Attribute::Index::Attribute_0)};
+            if (attribute >= Attribute::Index::Attribute_0) {
+                declr_output_attribute.insert(attribute);
+                return "output_attribute_" + std::to_string(index);
+            }
+
+            NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index);
+            UNREACHABLE();
+        }
+    }
+
+    /// Generates code representing a 19-bit immediate value
+    static std::string GetImmediate19(const Instruction& instr) {
+        return std::to_string(instr.alu.GetImm20_19());
+    }
+
+    /// Generates code representing a 32-bit immediate value
+    static std::string GetImmediate32(const Instruction& instr) {
+        return std::to_string(instr.alu.GetImm20_32());
+    }
+
+    /// Generates code representing a temporary (GPR) register.
+    std::string GetRegister(const Register& reg, unsigned elem = 0) {
+        if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
+            // GPRs 0-3 are output color for the fragment shader
+            return std::string{"color."} + "rgba"[(reg + elem) & 3];
+        }
+
+        return *declr_register.insert("register_" + std::to_string(reg + elem)).first;
+    }
+
+    /// Generates code representing a uniform (C buffer) register.
+    std::string GetUniform(const Uniform& reg) {
+        declr_const_buffers[reg.index].MarkAsUsed(static_cast<unsigned>(reg.index),
+                                                  static_cast<unsigned>(reg.offset), stage);
+        return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']';
+    }
+
+    /// Generates code representing a texture sampler.
+    std::string GetSampler(const Sampler& sampler) const {
+        // TODO(Subv): Support more than just texture sampler 0
+        ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
+        const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
+                             static_cast<unsigned>(Sampler::Index::Sampler_0)};
+        return "tex[" + std::to_string(index) + "]";
+    }
+
+    /**
+     * Adds code that calls a subroutine.
+     * @param subroutine the subroutine to call.
+     */
+    void CallSubroutine(const Subroutine& subroutine) {
+        if (subroutine.exit_method == ExitMethod::AlwaysEnd) {
+            shader.AddLine(subroutine.GetName() + "();");
+            shader.AddLine("return true;");
+        } else if (subroutine.exit_method == ExitMethod::Conditional) {
+            shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }");
+        } else {
+            shader.AddLine(subroutine.GetName() + "();");
+        }
+    }
+
+    /**
+     * Writes code that does an assignment operation.
+     * @param reg the destination register code.
+     * @param value the code representing the value to assign.
+     */
+    void SetDest(u64 elem, const std::string& reg, const std::string& value,
+                 u64 dest_num_components, u64 value_num_components, bool is_abs = false) {
+        std::string swizzle = ".";
+        swizzle += "xyzw"[elem];
+
+        std::string dest = reg + (dest_num_components != 1 ? swizzle : "");
+        std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : "");
+        src = is_abs ? "abs(" + src + ")" : src;
+
+        shader.AddLine(dest + " = " + src + ";");
+    }
+
+    /**
+     * Compiles a single instruction from Tegra to GLSL.
+     * @param offset the offset of the Tegra shader instruction.
+     * @return the offset of the next instruction to execute. Usually it is the current offset
+     * + 1. If the current instruction always terminates the program, returns PROGRAM_END.
+     */
+    u32 CompileInstr(u32 offset) {
+        const Instruction instr = {program_code[offset]};
+
+        shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name);
+
+        switch (OpCode::GetInfo(instr.opcode).type) {
+        case OpCode::Type::Arithmetic: {
+            std::string dest = GetRegister(instr.gpr0);
+            std::string op_a = instr.alu.negate_a ? "-" : "";
+            op_a += GetRegister(instr.gpr8);
+            if (instr.alu.abs_a) {
+                op_a = "abs(" + op_a + ")";
+            }
+
+            std::string op_b = instr.alu.negate_b ? "-" : "";
+
+            if (instr.is_b_imm) {
+                op_b += GetImmediate19(instr);
+            } else {
+                if (instr.is_b_gpr) {
+                    op_b += GetRegister(instr.gpr20);
+                } else {
+                    op_b += GetUniform(instr.uniform);
+                }
+            }
+
+            if (instr.alu.abs_b) {
+                op_b = "abs(" + op_b + ")";
+            }
+
+            switch (instr.opcode.EffectiveOpCode()) {
+            case OpCode::Id::FMUL_C:
+            case OpCode::Id::FMUL_R:
+            case OpCode::Id::FMUL_IMM: {
+                SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
+                break;
+            }
+            case OpCode::Id::FMUL32_IMM: {
+                // fmul32i doesn't have abs or neg bits.
+                SetDest(0, dest, GetRegister(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
+                break;
+            }
+            case OpCode::Id::FADD_C:
+            case OpCode::Id::FADD_R:
+            case OpCode::Id::FADD_IMM: {
+                SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
+                break;
+            }
+            case OpCode::Id::MUFU: {
+                switch (instr.sub_op) {
+                case SubOp::Cos:
+                    SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Sin:
+                    SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Ex2:
+                    SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Lg2:
+                    SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Rcp:
+                    SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Rsq:
+                    SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Min:
+                    SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                default:
+                    NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {}",
+                                   static_cast<unsigned>(instr.sub_op.Value()));
+                    UNREACHABLE();
+                }
+                break;
+            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
+            }
+            break;
+        }
+        case OpCode::Type::Ffma: {
+            std::string dest = GetRegister(instr.gpr0);
+            std::string op_a = GetRegister(instr.gpr8);
+            std::string op_b = instr.ffma.negate_b ? "-" : "";
+            std::string op_c = instr.ffma.negate_c ? "-" : "";
+
+            switch (instr.opcode.EffectiveOpCode()) {
+            case OpCode::Id::FFMA_CR: {
+                op_b += GetUniform(instr.uniform);
+                op_c += GetRegister(instr.gpr39);
+                break;
+            }
+            case OpCode::Id::FFMA_RR: {
+                op_b += GetRegister(instr.gpr20);
+                op_c += GetRegister(instr.gpr39);
+                break;
+            }
+            case OpCode::Id::FFMA_RC: {
+                op_b += GetRegister(instr.gpr39);
+                op_c += GetUniform(instr.uniform);
+                break;
+            }
+            case OpCode::Id::FFMA_IMM: {
+                op_b += GetImmediate19(instr);
+                op_c += GetRegister(instr.gpr39);
+                break;
+            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
+            }
+
+            SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
+            break;
+        }
+        case OpCode::Type::Memory: {
+            std::string gpr0 = GetRegister(instr.gpr0);
+            const Attribute::Index attribute = instr.attribute.fmt20.index;
+
+            switch (instr.opcode.EffectiveOpCode()) {
+            case OpCode::Id::LD_A: {
+                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
+                SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4);
+                break;
+            }
+            case OpCode::Id::ST_A: {
+                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
+                SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1);
+                break;
+            }
+            case OpCode::Id::TEXS: {
+                ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
+                const std::string op_a = GetRegister(instr.gpr8);
+                const std::string op_b = GetRegister(instr.gpr20);
+                const std::string sampler = GetSampler(instr.sampler);
+                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+                // Add an extra scope and declare the texture coords inside to prevent overwriting
+                // them in case they are used as outputs of the texs instruction.
+                shader.AddLine("{");
+                ++shader.scope;
+                shader.AddLine(coord);
+                const std::string texture = "texture(" + sampler + ", coords)";
+                for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
+                    SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4);
+                }
+                --shader.scope;
+                shader.AddLine("}");
+                break;
+            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
+            }
+            break;
+        }
+
+        default: {
+            switch (instr.opcode.EffectiveOpCode()) {
+            case OpCode::Id::EXIT: {
+                shader.AddLine("return true;");
+                offset = PROGRAM_END - 1;
+                break;
+            }
+            case OpCode::Id::IPA: {
+                const auto& attribute = instr.attribute.fmt28;
+                std::string dest = GetRegister(instr.gpr0);
+                SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4);
+                break;
+            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
+            }
+
+            break;
+        }
+        }
+
+        return offset + 1;
+    }
+
+    /**
+     * Compiles a range of instructions from Tegra to GLSL.
+     * @param begin the offset of the starting instruction.
+     * @param end the offset where the compilation should stop (exclusive).
+     * @return the offset of the next instruction to compile. PROGRAM_END if the program
+     * terminates.
+     */
+    u32 CompileRange(u32 begin, u32 end) {
+        u32 program_counter;
+        for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) {
+            program_counter = CompileInstr(program_counter);
+        }
+        return program_counter;
+    }
+
+    void Generate() {
+        // Add declarations for all subroutines
+        for (const auto& subroutine : subroutines) {
+            shader.AddLine("bool " + subroutine.GetName() + "();");
+        }
+        shader.AddNewLine();
+
+        // Add the main entry point
+        shader.AddLine("bool exec_shader() {");
+        ++shader.scope;
+        CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
+        --shader.scope;
+        shader.AddLine("}\n");
+
+        // Add definitions for all subroutines
+        for (const auto& subroutine : subroutines) {
+            std::set<u32> labels = subroutine.labels;
+
+            shader.AddLine("bool " + subroutine.GetName() + "() {");
+            ++shader.scope;
+
+            if (labels.empty()) {
+                if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) {
+                    shader.AddLine("return false;");
+                }
+            } else {
+                labels.insert(subroutine.begin);
+                shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
+                shader.AddLine("while (true) {");
+                ++shader.scope;
+
+                shader.AddLine("switch (jmp_to) {");
+
+                for (auto label : labels) {
+                    shader.AddLine("case " + std::to_string(label) + "u: {");
+                    ++shader.scope;
+
+                    auto next_it = labels.lower_bound(label + 1);
+                    u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
+
+                    u32 compile_end = CompileRange(label, next_label);
+                    if (compile_end > next_label && compile_end != PROGRAM_END) {
+                        // This happens only when there is a label inside a IF/LOOP block
+                        shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }");
+                        labels.emplace(compile_end);
+                    }
+
+                    --shader.scope;
+                    shader.AddLine('}');
+                }
+
+                shader.AddLine("default: return false;");
+                shader.AddLine('}');
+
+                --shader.scope;
+                shader.AddLine('}');
+
+                shader.AddLine("return false;");
+            }
+
+            --shader.scope;
+            shader.AddLine("}\n");
+
+            DEBUG_ASSERT(shader.scope == 0);
+        }
+
+        GenerateDeclarations();
+    }
+
+    /// Returns a list of constant buffer declarations
+    std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const {
+        std::vector<ConstBufferEntry> result;
+        std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(),
+                     std::back_inserter(result), [](const auto& entry) { return entry.IsUsed(); });
+        return result;
+    }
+
+    /// Add declarations for registers
+    void GenerateDeclarations() {
+        for (const auto& reg : declr_register) {
+            declarations.AddLine("float " + reg + " = 0.0;");
+        }
+        declarations.AddNewLine();
+
+        for (const auto& index : declr_input_attribute) {
+            // TODO(bunnei): Use proper number of elements for these
+            declarations.AddLine("layout(location = " +
+                                 std::to_string(static_cast<u32>(index) -
+                                                static_cast<u32>(Attribute::Index::Attribute_0)) +
+                                 ") in vec4 " + GetInputAttribute(index) + ";");
+        }
+        declarations.AddNewLine();
+
+        for (const auto& index : declr_output_attribute) {
+            // TODO(bunnei): Use proper number of elements for these
+            declarations.AddLine("layout(location = " +
+                                 std::to_string(static_cast<u32>(index) -
+                                                static_cast<u32>(Attribute::Index::Attribute_0)) +
+                                 ") out vec4 " + GetOutputAttribute(index) + ";");
+        }
+        declarations.AddNewLine();
+
+        unsigned const_buffer_layout = 0;
+        for (const auto& entry : GetConstBuffersDeclarations()) {
+            declarations.AddLine("layout(std430) buffer " + entry.GetName());
+            declarations.AddLine('{');
+            declarations.AddLine("    float c" + std::to_string(entry.GetIndex()) + "[];");
+            declarations.AddLine("};");
+            declarations.AddNewLine();
+            ++const_buffer_layout;
+        }
+    }
+
+private:
+    const std::set<Subroutine>& subroutines;
+    const ProgramCode& program_code;
+    const u32 main_offset;
+    Maxwell3D::Regs::ShaderStage stage;
+
+    ShaderWriter shader;
+    ShaderWriter declarations;
+
+    // Declarations
+    std::set<std::string> declr_register;
+    std::set<Attribute::Index> declr_input_attribute;
+    std::set<Attribute::Index> declr_output_attribute;
+    std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
+}; // namespace Decompiler
+
+std::string GetCommonDeclarations() {
+    return "bool exec_shader();";
+}
+
+boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
+                                                Maxwell3D::Regs::ShaderStage stage) {
+    try {
+        auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines();
+        GLSLGenerator generator(subroutines, program_code, main_offset, stage);
+        return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
+    } catch (const DecompileFail& exception) {
+        NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
+    }
+    return boost::none;
 }

 } // namespace Decompiler
-} // namespace Shader
-} // namespace Maxwell3D
+} // namespace GLShader
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,23 +5,20 @@
 #include <array>
 #include <functional>
 #include <string>
+#include <boost/optional.hpp>
 #include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"

-namespace Maxwell3D {
-namespace Shader {
+namespace GLShader {
 namespace Decompiler {

-constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000};
-constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000};
+using Tegra::Engines::Maxwell3D;

-std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
-                             const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data,
-                             u32 main_offset,
-                             const std::function<std::string(u32)>& inputreg_getter,
-                             const std::function<std::string(u32)>& outputreg_getter,
-                             bool sanitize_mul, const std::string& emit_cb = "",
-                             const std::string& setemit_cb = "");
+std::string GetCommonDeclarations();
+
+boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
+                                                Maxwell3D::Regs::ShaderStage stage);

 } // namespace Decompiler
-} // namespace Shader
-} // namespace Maxwell3D
+} // namespace GLShader
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -3,18 +3,74 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"

 namespace GLShader {

-std::string GenerateVertexShader(const MaxwellVSConfig& config) {
-    UNREACHABLE();
-    return {};
+using Tegra::Engines::Maxwell3D;
+
+static constexpr u32 PROGRAM_OFFSET{10};
+
+ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) {
+    std::string out = "#version 430 core\n";
+    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    out += Decompiler::GetCommonDeclarations();
+
+    ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
+                                                         Maxwell3D::Regs::ShaderStage::Vertex)
+                                .get_value_or({});
+    out += R"(
+
+out gl_PerVertex {
+    vec4 gl_Position;
+};
+
+out vec4 position;
+
+layout (std140) uniform vs_config {
+    vec4 viewport_flip;
+};
+
+void main() {
+    exec_shader();
+
+    // Viewport can be flipped, which is unsupported by glViewport
+    position.xy *= viewport_flip.xy;
+    gl_Position = position;
+}
+)";
+    out += program.first;
+    return {out, program.second};
 }

-std::string GenerateFragmentShader(const MaxwellFSConfig& config) {
-    UNREACHABLE();
-    return {};
+ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) {
+    std::string out = "#version 430 core\n";
+    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    out += Decompiler::GetCommonDeclarations();
+
+    ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
+                                                         Maxwell3D::Regs::ShaderStage::Fragment)
+                                .get_value_or({});
+    out += R"(
+
+in vec4 position;
+out vec4 color;
+
+layout (std140) uniform fs_config {
+    vec4 viewport_flip;
+};
+
+uniform sampler2D tex[32];
+
+void main() {
+    exec_shader();
+}
+
+)";
+    out += program.first;
+    return {out, program.second};
 }

 } // namespace GLShader
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -4,46 +4,113 @@

 #pragma once

-#include <cstring>
+#include <array>
 #include <string>
 #include <type_traits>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
 #include "common/hash.h"

 namespace GLShader {

-enum Attributes {
-    ATTRIBUTE_POSITION,
-    ATTRIBUTE_COLOR,
-    ATTRIBUTE_TEXCOORD0,
-    ATTRIBUTE_TEXCOORD1,
-    ATTRIBUTE_TEXCOORD2,
-    ATTRIBUTE_TEXCOORD0_W,
-    ATTRIBUTE_NORMQUAT,
-    ATTRIBUTE_VIEW,
+constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
+
+using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>;
+
+class ConstBufferEntry {
+    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+public:
+    void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) {
+        is_used = true;
+        this->index = index;
+        this->stage = stage;
+        max_offset = std::max(max_offset, offset);
+    }
+
+    bool IsUsed() const {
+        return is_used;
+    }
+
+    unsigned GetIndex() const {
+        return index;
+    }
+
+    unsigned GetSize() const {
+        return max_offset + 1;
+    }
+
+    std::string GetName() const {
+        return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index);
+    }
+
+private:
+    static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = {
+        "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c",
+    };
+
+    bool is_used{};
+    unsigned index{};
+    unsigned max_offset{};
+    Maxwell::ShaderStage stage;
+};
+
+struct ShaderEntries {
+    std::vector<ConstBufferEntry> const_buffer_entries;
+};
+
+using ProgramResult = std::pair<std::string, ShaderEntries>;
+
+struct ShaderSetup {
+    ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {}
+
+    ProgramCode program_code;
+    bool program_code_hash_dirty = true;
+
+    u64 GetProgramCodeHash() {
+        if (program_code_hash_dirty) {
+            program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
+            program_code_hash_dirty = false;
+        }
+        return program_code_hash;
+    }
+
+private:
+    u64 program_code_hash{};
 };

 struct MaxwellShaderConfigCommon {
-    explicit MaxwellShaderConfigCommon(){};
+    void Init(ShaderSetup& setup) {
+        program_hash = setup.GetProgramCodeHash();
+    }
+
+    u64 program_hash;
 };

-struct MaxwellVSConfig : MaxwellShaderConfigCommon {
-    explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {}
-
-    bool operator==(const MaxwellVSConfig& o) const {
-        return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0;
-    };
+struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
+    explicit MaxwellVSConfig(ShaderSetup& setup) {
+        state.Init(setup);
+    }
 };

-struct MaxwellFSConfig : MaxwellShaderConfigCommon {
-    explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {}
-
-    bool operator==(const MaxwellFSConfig& o) const {
-        return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0;
-    };
+struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
+    explicit MaxwellFSConfig(ShaderSetup& setup) {
+        state.Init(setup);
+    }
 };

-std::string GenerateVertexShader(const MaxwellVSConfig& config);
-std::string GenerateFragmentShader(const MaxwellFSConfig& config);
+/**
+ * Generates the GLSL vertex shader program source code for the given VS program
+ * @returns String of the shader source code
+ */
+ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config);
+
+/**
+ * Generates the GLSL fragment shader program source code for the given FS program
+ * @returns String of the shader source code
+ */
+ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config);

 } // namespace GLShader

@@ -52,14 +119,14 @@ namespace std {
 template <>
 struct hash<GLShader::MaxwellVSConfig> {
    size_t operator()(const GLShader::MaxwellVSConfig& k) const {
-        return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig));
+        return k.Hash();
    }
 };

 template <>
 struct hash<GLShader::MaxwellFSConfig> {
    size_t operator()(const GLShader::MaxwellFSConfig& k) const {
-        return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig));
+        return k.Hash();
    }
 };

--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -0,0 +1,64 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/core.h"
+#include "core/hle/kernel/process.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+
+namespace GLShader {
+
+namespace Impl {
+void SetShaderUniformBlockBinding(GLuint shader, const char* name,
+                                  Maxwell3D::Regs::ShaderStage binding, size_t expected_size) {
+    GLuint ub_index = glGetUniformBlockIndex(shader, name);
+    if (ub_index != GL_INVALID_INDEX) {
+        GLint ub_size = 0;
+        glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
+        ASSERT_MSG(ub_size == expected_size,
+                   "Uniform block size did not match! Got %d, expected %zu",
+                   static_cast<int>(ub_size), expected_size);
+        glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
+    }
+}
+
+void SetShaderUniformBlockBindings(GLuint shader) {
+    SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex,
+                                 sizeof(MaxwellUniformData));
+    SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry,
+                                 sizeof(MaxwellUniformData));
+    SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment,
+                                 sizeof(MaxwellUniformData));
+}
+
+void SetShaderSamplerBindings(GLuint shader) {
+    OpenGLState cur_state = OpenGLState::GetCurState();
+    GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
+    cur_state.Apply();
+
+    // Set the texture samplers to correspond to different texture units
+    for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
+        // Set the texture samplers to correspond to different texture units
+        std::string uniform_name = "tex[" + std::to_string(texture) + "]";
+        GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
+        if (uniform_tex != -1) {
+            glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
+        }
+    }
+
+    cur_state.draw.shader_program = old_program;
+    cur_state.Apply();
+}
+
+} // namespace Impl
+
+void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
+    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+    // TODO(bunnei): Support more than one viewport
+    viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0;
+    viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0;
+}
+
+} // namespace GLShader
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -0,0 +1,175 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <tuple>
+#include <unordered_map>
+#include <boost/functional/hash.hpp>
+#include <glad/glad.h>
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
+
+namespace GLShader {
+
+/// Number of OpenGL texture samplers that can be used in the fragment shader
+static constexpr size_t NumTextureSamplers = 32;
+
+using Tegra::Engines::Maxwell3D;
+
+namespace Impl {
+void SetShaderUniformBlockBindings(GLuint shader);
+void SetShaderSamplerBindings(GLuint shader);
+} // namespace Impl
+
+/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
+// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
+//       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+//       Not following that rule will cause problems on some AMD drivers.
+struct MaxwellUniformData {
+    void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
+    alignas(16) GLvec4 viewport_flip;
+};
+static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) < 16384,
+              "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
+
+class OGLShaderStage {
+public:
+    OGLShaderStage() = default;
+
+    void Create(const ProgramResult& program_result, GLenum type) {
+        OGLShader shader;
+        shader.Create(program_result.first.c_str(), type);
+        program.Create(true, shader.handle);
+        Impl::SetShaderUniformBlockBindings(program.handle);
+        Impl::SetShaderSamplerBindings(program.handle);
+        entries = program_result.second;
+    }
+    GLuint GetHandle() const {
+        return program.handle;
+    }
+
+    ShaderEntries GetEntries() const {
+        return entries;
+    }
+
+private:
+    OGLProgram program;
+    ShaderEntries entries;
+};
+
+// TODO(wwylele): beautify this doc
+// This is a shader cache designed for translating PICA shader to GLSL shader.
+// The double cache is needed because diffent KeyConfigType, which includes a hash of the code
+// region (including its leftover unused code) can generate the same GLSL code.
+template <typename KeyConfigType,
+          ProgramResult (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&),
+          GLenum ShaderType>
+class ShaderCache {
+public:
+    ShaderCache() = default;
+
+    using Result = std::pair<GLuint, ShaderEntries>;
+
+    Result Get(const KeyConfigType& key, const ShaderSetup& setup) {
+        auto map_it = shader_map.find(key);
+        if (map_it == shader_map.end()) {
+            ProgramResult program = CodeGenerator(setup, key);
+
+            auto [iter, new_shader] = shader_cache.emplace(program.first, OGLShaderStage{});
+            OGLShaderStage& cached_shader = iter->second;
+            if (new_shader) {
+                cached_shader.Create(program, ShaderType);
+            }
+            shader_map[key] = &cached_shader;
+            return {cached_shader.GetHandle(), program.second};
+        } else {
+            return {map_it->second->GetHandle(), map_it->second->GetEntries()};
+        }
+    }
+
+private:
+    std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map;
+    std::unordered_map<std::string, OGLShaderStage> shader_cache;
+};
+
+using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>;
+
+using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>;
+
+class ProgramManager {
+public:
+    ProgramManager() {
+        pipeline.Create();
+    }
+
+    ShaderEntries UseProgrammableVertexShader(const MaxwellVSConfig& config,
+                                              const ShaderSetup setup) {
+        ShaderEntries result;
+        std::tie(current.vs, result) = vertex_shaders.Get(config, setup);
+        return result;
+    }
+
+    ShaderEntries UseProgrammableFragmentShader(const MaxwellFSConfig& config,
+                                                const ShaderSetup setup) {
+        ShaderEntries result;
+        std::tie(current.fs, result) = fragment_shaders.Get(config, setup);
+        return result;
+    }
+
+    GLuint GetCurrentProgramStage(Maxwell3D::Regs::ShaderStage stage) {
+        switch (stage) {
+        case Maxwell3D::Regs::ShaderStage::Vertex:
+            return current.vs;
+        case Maxwell3D::Regs::ShaderStage::Fragment:
+            return current.fs;
+        }
+
+        UNREACHABLE();
+    }
+
+    void UseTrivialGeometryShader() {
+        current.gs = 0;
+    }
+
+    void ApplyTo(OpenGLState& state) {
+        // Workaround for AMD bug
+        glUseProgramStages(pipeline.handle,
+                           GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
+                           0);
+
+        glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs);
+        glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs);
+        glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs);
+        state.draw.shader_program = 0;
+        state.draw.program_pipeline = pipeline.handle;
+    }
+
+private:
+    struct ShaderTuple {
+        GLuint vs = 0, gs = 0, fs = 0;
+        bool operator==(const ShaderTuple& rhs) const {
+            return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
+        }
+        struct Hash {
+            std::size_t operator()(const ShaderTuple& tuple) const {
+                std::size_t hash = 0;
+                boost::hash_combine(hash, tuple.vs);
+                boost::hash_combine(hash, tuple.gs);
+                boost::hash_combine(hash, tuple.fs);
+                return hash;
+            }
+        };
+    };
+    ShaderTuple current;
+    VertexShaders vertex_shaders;
+    FragmentShaders fragment_shaders;
+
+    std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache;
+    OGLPipeline pipeline;
+};
+
+} // namespace GLShader
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -10,156 +10,41 @@

 namespace GLShader {

-GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
-                   const char* fragment_shader, const std::vector<const char*>& feedback_vars,
-                   bool separable_program) {
-    // Create the shaders
-    GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0;
-    GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0;
-    GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0;
+GLuint LoadShader(const char* source, GLenum type) {
+    const char* debug_type;
+    switch (type) {
+    case GL_VERTEX_SHADER:
+        debug_type = "vertex";
+        break;
+    case GL_GEOMETRY_SHADER:
+        debug_type = "geometry";
+        break;
+    case GL_FRAGMENT_SHADER:
+        debug_type = "fragment";
+        break;
+    default:
+        UNREACHABLE();
+    }
+    GLuint shader_id = glCreateShader(type);
+    glShaderSource(shader_id, 1, &source, nullptr);
+    NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
+    glCompileShader(shader_id);

    GLint result = GL_FALSE;
-    int info_log_length;
-
-    if (vertex_shader) {
-        // Compile Vertex Shader
-        LOG_DEBUG(Render_OpenGL, "Compiling vertex shader...");
-
-        glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr);
-        glCompileShader(vertex_shader_id);
-
-        // Check Vertex Shader
-        glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result);
-        glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
-        if (info_log_length > 1) {
-            std::vector<char> vertex_shader_error(info_log_length);
-            glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]);
-            if (result == GL_TRUE) {
-                LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]);
-            } else {
-                LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s",
-                             &vertex_shader_error[0]);
-            }
-        }
-    }
-
-    if (geometry_shader) {
-        // Compile Geometry Shader
-        LOG_DEBUG(Render_OpenGL, "Compiling geometry shader...");
-
-        glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr);
-        glCompileShader(geometry_shader_id);
-
-        // Check Geometry Shader
-        glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result);
-        glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
-        if (info_log_length > 1) {
-            std::vector<char> geometry_shader_error(info_log_length);
-            glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr,
-                               &geometry_shader_error[0]);
-            if (result == GL_TRUE) {
-                LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]);
-            } else {
-                LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s",
-                             &geometry_shader_error[0]);
-            }
-        }
-    }
-
-    if (fragment_shader) {
-        // Compile Fragment Shader
-        LOG_DEBUG(Render_OpenGL, "Compiling fragment shader...");
-
-        glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr);
-        glCompileShader(fragment_shader_id);
-
-        // Check Fragment Shader
-        glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result);
-        glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
-        if (info_log_length > 1) {
-            std::vector<char> fragment_shader_error(info_log_length);
-            glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr,
-                               &fragment_shader_error[0]);
-            if (result == GL_TRUE) {
-                LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
-            } else {
-                LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s",
-                             &fragment_shader_error[0]);
-            }
-        }
-    }
-
-    // Link the program
-    LOG_DEBUG(Render_OpenGL, "Linking program...");
-
-    GLuint program_id = glCreateProgram();
-    if (vertex_shader) {
-        glAttachShader(program_id, vertex_shader_id);
-    }
-    if (geometry_shader) {
-        glAttachShader(program_id, geometry_shader_id);
-    }
-    if (fragment_shader) {
-        glAttachShader(program_id, fragment_shader_id);
-    }
-
-    if (!feedback_vars.empty()) {
-        auto varyings = feedback_vars;
-        glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()),
-                                    &varyings[0], GL_INTERLEAVED_ATTRIBS);
-    }
-
-    if (separable_program) {
-        glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
-    }
-
-    glLinkProgram(program_id);
-
-    // Check the program
-    glGetProgramiv(program_id, GL_LINK_STATUS, &result);
-    glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
+    GLint info_log_length;
+    glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
+    glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);

    if (info_log_length > 1) {
-        std::vector<char> program_error(info_log_length);
-        glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
+        std::string shader_error(info_log_length, ' ');
+        glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
        if (result == GL_TRUE) {
-            LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]);
+            NGLOG_DEBUG(Render_OpenGL, "{}", shader_error);
        } else {
-            LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]);
+            NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
        }
    }
-
-    // If the program linking failed at least one of the shaders was probably bad
-    if (result == GL_FALSE) {
-        if (vertex_shader) {
-            LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader);
-        }
-        if (geometry_shader) {
-            LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader);
-        }
-        if (fragment_shader) {
-            LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader);
-        }
-    }
-    ASSERT_MSG(result == GL_TRUE, "Shader not linked");
-
-    if (vertex_shader) {
-        glDetachShader(program_id, vertex_shader_id);
-        glDeleteShader(vertex_shader_id);
-    }
-    if (geometry_shader) {
-        glDetachShader(program_id, geometry_shader_id);
-        glDeleteShader(geometry_shader_id);
-    }
-    if (fragment_shader) {
-        glDetachShader(program_id, fragment_shader_id);
-        glDeleteShader(fragment_shader_id);
-    }
-
-    return program_id;
+    return shader_id;
 }

 } // namespace GLShader
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -6,18 +6,60 @@

 #include <vector>
 #include <glad/glad.h>
+#include "common/assert.h"
+#include "common/logging/log.h"

 namespace GLShader {

 /**
- * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
- * @param vertex_shader String of the GLSL vertex shader program
- * @param geometry_shader String of the GLSL geometry shader program
- * @param fragment_shader String of the GLSL fragment shader program
- * @returns Handle of the newly created OpenGL shader object
+ * Utility function to create and compile an OpenGL GLSL shader
+ * @param source String of the GLSL shader program
+ * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
 */
-GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
-                   const char* fragment_shader, const std::vector<const char*>& feedback_vars = {},
-                   bool separable_program = false);
+GLuint LoadShader(const char* source, GLenum type);
+
+/**
+ * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
+ * @param separable_program whether to create a separable program
+ * @param shaders ID of shaders to attach to the program
+ * @returns Handle of the newly created OpenGL program object
+ */
+template <typename... T>
+GLuint LoadProgram(bool separable_program, T... shaders) {
+    // Link the program
+    NGLOG_DEBUG(Render_OpenGL, "Linking program...");
+
+    GLuint program_id = glCreateProgram();
+
+    ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
+
+    if (separable_program) {
+        glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
+    }
+
+    glLinkProgram(program_id);
+
+    // Check the program
+    GLint result = GL_FALSE;
+    GLint info_log_length;
+    glGetProgramiv(program_id, GL_LINK_STATUS, &result);
+    glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
+
+    if (info_log_length > 1) {
+        std::string program_error(info_log_length, ' ');
+        glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
+        if (result == GL_TRUE) {
+            NGLOG_DEBUG(Render_OpenGL, "{}", program_error);
+        } else {
+            NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
+        }
+    }
+
+    ASSERT_MSG(result == GL_TRUE, "Shader not linked");
+
+    ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
+
+    return program_id;
+}

 } // namespace GLShader
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -202,6 +202,20 @@ void OpenGLState::Apply() const {
        }
    }

+    // Constbuffers
+    for (u32 stage = 0; stage < draw.const_buffers.size(); ++stage) {
+        for (u32 buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) {
+            auto& current = cur_state.draw.const_buffers[stage][buffer_id];
+            auto& new_state = draw.const_buffers[stage][buffer_id];
+            if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint ||
+                current.ssbo != new_state.ssbo) {
+                if (new_state.enabled) {
+                    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, new_state.bindpoint, new_state.ssbo);
+                }
+            }
+        }
+    }
+
    // Lighting LUTs
    if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
        glActiveTexture(TextureUnits::LightingLUT.Enum());
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -123,6 +123,12 @@ public:
        GLuint uniform_buffer;   // GL_UNIFORM_BUFFER_BINDING
        GLuint shader_program;   // GL_CURRENT_PROGRAM
        GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
+        struct ConstBufferConfig {
+            bool enabled = false;
+            GLuint bindpoint;
+            GLuint ssbo;
+        };
+        std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{};
    } draw;

    struct {
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -10,6 +10,14 @@
 #include "common/logging/log.h"
 #include "video_core/engines/maxwell_3d.h"

+using GLvec2 = std::array<GLfloat, 2>;
+using GLvec3 = std::array<GLfloat, 3>;
+using GLvec4 = std::array<GLfloat, 4>;
+
+using GLuvec2 = std::array<GLuint, 2>;
+using GLuvec3 = std::array<GLuint, 3>;
+using GLuvec4 = std::array<GLuint, 4>;
+
 namespace MaxwellToGL {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -23,7 +31,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
            return GL_UNSIGNED_BYTE;
        }

-        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str());
+        NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
        UNREACHABLE();
        return {};
    }
@@ -32,17 +40,33 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        return GL_FLOAT;
    }

-    LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str());
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
+    UNREACHABLE();
+    return {};
+}
+
+inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
+    switch (index_format) {
+    case Maxwell::IndexFormat::UnsignedByte:
+        return GL_UNSIGNED_BYTE;
+    case Maxwell::IndexFormat::UnsignedShort:
+        return GL_UNSIGNED_SHORT;
+    case Maxwell::IndexFormat::UnsignedInt:
+        return GL_UNSIGNED_INT;
+    }
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
    UNREACHABLE();
    return {};
 }

 inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
    switch (topology) {
+    case Maxwell::PrimitiveTopology::Triangles:
+        return GL_TRIANGLES;
    case Maxwell::PrimitiveTopology::TriangleStrip:
        return GL_TRIANGLE_STRIP;
    }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology);
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
    UNREACHABLE();
    return {};
 }
@@ -54,18 +78,90 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) {
    case Tegra::Texture::TextureFilter::Nearest:
        return GL_NEAREST;
    }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode=%u",
-                 static_cast<u32>(filter_mode));
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}",
+                   static_cast<u32>(filter_mode));
    UNREACHABLE();
    return {};
 }

 inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
    switch (wrap_mode) {
+    case Tegra::Texture::WrapMode::Wrap:
+        return GL_REPEAT;
    case Tegra::Texture::WrapMode::ClampToEdge:
        return GL_CLAMP_TO_EDGE;
+    case Tegra::Texture::WrapMode::ClampOGL:
+        // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
+        // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
+        // manually mix them. However the shader part of this is not yet implemented.
+        return GL_CLAMP_TO_BORDER;
    }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode=%u", static_cast<u32>(wrap_mode));
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}",
+                   static_cast<u32>(wrap_mode));
+    UNREACHABLE();
+    return {};
+}
+
+inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
+    switch (equation) {
+    case Maxwell::Blend::Equation::Add:
+        return GL_FUNC_ADD;
+    case Maxwell::Blend::Equation::Subtract:
+        return GL_FUNC_SUBTRACT;
+    case Maxwell::Blend::Equation::ReverseSubtract:
+        return GL_FUNC_REVERSE_SUBTRACT;
+    case Maxwell::Blend::Equation::Min:
+        return GL_MIN;
+    case Maxwell::Blend::Equation::Max:
+        return GL_MAX;
+    }
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
+    UNREACHABLE();
+    return {};
+}
+
+inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
+    switch (factor) {
+    case Maxwell::Blend::Factor::Zero:
+        return GL_ZERO;
+    case Maxwell::Blend::Factor::One:
+        return GL_ONE;
+    case Maxwell::Blend::Factor::SourceColor:
+        return GL_SRC_COLOR;
+    case Maxwell::Blend::Factor::OneMinusSourceColor:
+        return GL_ONE_MINUS_SRC_COLOR;
+    case Maxwell::Blend::Factor::SourceAlpha:
+        return GL_SRC_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusSourceAlpha:
+        return GL_ONE_MINUS_SRC_ALPHA;
+    case Maxwell::Blend::Factor::DestAlpha:
+        return GL_DST_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusDestAlpha:
+        return GL_ONE_MINUS_DST_ALPHA;
+    case Maxwell::Blend::Factor::DestColor:
+        return GL_DST_COLOR;
+    case Maxwell::Blend::Factor::OneMinusDestColor:
+        return GL_ONE_MINUS_DST_COLOR;
+    case Maxwell::Blend::Factor::SourceAlphaSaturate:
+        return GL_SRC_ALPHA_SATURATE;
+    case Maxwell::Blend::Factor::Source1Color:
+        return GL_SRC1_COLOR;
+    case Maxwell::Blend::Factor::OneMinusSource1Color:
+        return GL_ONE_MINUS_SRC1_COLOR;
+    case Maxwell::Blend::Factor::Source1Alpha:
+        return GL_SRC1_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusSource1Alpha:
+        return GL_ONE_MINUS_SRC1_ALPHA;
+    case Maxwell::Blend::Factor::ConstantColor:
+        return GL_CONSTANT_COLOR;
+    case Maxwell::Blend::Factor::OneMinusConstantColor:
+        return GL_ONE_MINUS_CONSTANT_COLOR;
+    case Maxwell::Blend::Factor::ConstantAlpha:
+        return GL_CONSTANT_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusConstantAlpha:
+        return GL_ONE_MINUS_CONSTANT_ALPHA;
+    }
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
    UNREACHABLE();
    return {};
 }
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -57,7 +57,7 @@ uniform sampler2D color_texture;
 void main() {
    // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
    // support more framebuffer pixel formats.
-    color = texture(color_texture, frag_tex_coord).abgr;
+    color = texture(color_texture, frag_tex_coord);
 }
 )";

@@ -210,7 +210,7 @@ void RendererOpenGL::InitOpenGLObjects() {
                 0.0f);

    // Link shaders and get variable locations
-    shader.Create(vertex_shader, nullptr, fragment_shader);
+    shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
    state.draw.shader_program = shader.handle;
    state.Apply();
    uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
@@ -311,10 +311,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
        }

    std::array<ScreenRectVertex, 4> vertices = {{
-        ScreenRectVertex(x, y, texcoords.top, right),
-        ScreenRectVertex(x + w, y, texcoords.bottom, right),
-        ScreenRectVertex(x, y + h, texcoords.top, left),
-        ScreenRectVertex(x + w, y + h, texcoords.bottom, left),
+        ScreenRectVertex(x, y, texcoords.top, left),
+        ScreenRectVertex(x + w, y, texcoords.bottom, left),
+        ScreenRectVertex(x, y + h, texcoords.top, right),
+        ScreenRectVertex(x + w, y + h, texcoords.bottom, right),
    }};

    state.texture_units[0].texture_2d = screen_info.display_texture;
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -72,7 +72,7 @@ private:
    // OpenGL object IDs
    OGLVertexArray vertex_array;
    OGLBuffer vertex_buffer;
-    OGLShader shader;
+    OGLProgram shader;

    /// Display information for Switch screen
    ScreenInfo screen_info;
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -48,31 +48,39 @@ u32 BytesPerPixel(TextureFormat format) {
    case TextureFormat::DXT1:
        // In this case a 'pixel' actually refers to a 4x4 tile.
        return 8;
+    case TextureFormat::DXT23:
+    case TextureFormat::DXT45:
+        // In this case a 'pixel' actually refers to a 4x4 tile.
+        return 16;
    case TextureFormat::A8R8G8B8:
        return 4;
+    case TextureFormat::B5G6R5:
+        return 2;
    default:
        UNIMPLEMENTED_MSG("Format not implemented");
        break;
    }
 }

-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) {
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+                                 u32 block_height) {
    u8* data = Memory::GetPointer(address);
    u32 bytes_per_pixel = BytesPerPixel(format);

-    static constexpr u32 DefaultBlockHeight = 16;
-
    std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);

    switch (format) {
    case TextureFormat::DXT1:
-        // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
+    case TextureFormat::DXT23:
+    case TextureFormat::DXT45:
+        // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values.
        CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
-                         unswizzled_data.data(), true, DefaultBlockHeight);
+                         unswizzled_data.data(), true, block_height);
        break;
    case TextureFormat::A8R8G8B8:
+    case TextureFormat::B5G6R5:
        CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
-                         unswizzled_data.data(), true, DefaultBlockHeight);
+                         unswizzled_data.data(), true, block_height);
        break;
    default:
        UNIMPLEMENTED_MSG("Format not implemented");
@@ -89,7 +97,10 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
    // TODO(Subv): Implement.
    switch (format) {
    case TextureFormat::DXT1:
+    case TextureFormat::DXT23:
+    case TextureFormat::DXT45:
    case TextureFormat::A8R8G8B8:
+    case TextureFormat::B5G6R5:
        // TODO(Subv): For the time being just forward the same data without any decoding.
        rgba_data = texture_data;
        break;
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -14,7 +14,8 @@ namespace Texture {
 /**
 * Unswizzles a swizzled texture without changing its format.
 */
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height);
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+                                 u32 block_height = TICEntry::DefaultBlockHeight);

 /**
 * Decodes an unswizzled texture into a A8R8G8B8 texture.
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,6 +4,7 @@

 #pragma once

+#include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -13,8 +14,11 @@ namespace Tegra {
 namespace Texture {

 enum class TextureFormat : u32 {
-    A8R8G8B8 = 8,
+    A8R8G8B8 = 0x8,
+    B5G6R5 = 0x15,
    DXT1 = 0x24,
+    DXT23 = 0x25,
+    DXT45 = 0x26,
 };

 enum class TextureType : u32 {
@@ -55,6 +59,8 @@ union TextureHandle {
 static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");

 struct TICEntry {
+    static constexpr u32 DefaultBlockHeight = 16;
+
    union {
        u32 raw;
        BitField<0, 7, TextureFormat> format;
@@ -68,7 +74,12 @@ struct TICEntry {
        BitField<0, 16, u32> address_high;
        BitField<21, 3, TICHeaderVersion> header_version;
    };
-    INSERT_PADDING_BYTES(4);
+    union {
+        BitField<3, 3, u32> block_height;
+
+        // High 16 bits of the pitch value
+        BitField<0, 16, u32> pitch_high;
+    };
    union {
        BitField<0, 16, u32> width_minus_1;
        BitField<23, 4, TextureType> texture_type;
@@ -80,6 +91,13 @@ struct TICEntry {
        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
    }

+    u32 Pitch() const {
+        ASSERT(header_version == TICHeaderVersion::Pitch ||
+               header_version == TICHeaderVersion::PitchColorKey);
+        // The pitch value is 21 bits, and is 32B aligned.
+        return pitch_high << 5;
+    }
+
    u32 Width() const {
        return width_minus_1 + 1;
    }
@@ -88,6 +106,13 @@ struct TICEntry {
        return height_minus_1 + 1;
    }

+    u32 BlockHeight() const {
+        ASSERT(header_version == TICHeaderVersion::BlockLinear ||
+               header_version == TICHeaderVersion::BlockLinearColorKey);
+        // The block height is stored in log2 format.
+        return 1 << block_height;
+    }
+
    bool IsTiled() const {
        return header_version == TICHeaderVersion::BlockLinear ||
               header_version == TICHeaderVersion::BlockLinearColorKey;
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -151,7 +151,7 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe
            const u32 coarse_y = y & ~127;
            u32 morton_offset =
                GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
-            u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
+            u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel;

            data_ptrs[morton_to_gl] = morton_data + morton_offset;
            data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -44,6 +44,15 @@
 Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin);
 #endif

+#ifdef _WIN32
+extern "C" {
+// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable
+// graphics
+__declspec(dllexport) unsigned long NvOptimusEnablement = 0x00000001;
+__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
+}
+#endif
+
 /**
 * "Callouts" are one-time instructional messages shown to the user. In the config settings, there
 * is a bitfield "callout_flags" options, used to track if a message has already been shown to the
@@ -776,9 +785,11 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
        return;
    }

-    UISettings::values.geometry = saveGeometry();
+    if (ui.action_Fullscreen->isChecked()) {
+        UISettings::values.geometry = saveGeometry();
+        UISettings::values.renderwindow_geometry = render_window->saveGeometry();
+    }
    UISettings::values.state = saveState();
-    UISettings::values.renderwindow_geometry = render_window->saveGeometry();
 #if MICROPROFILE_ENABLED
    UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry();
    UISettings::values.microprofile_visible = microProfileDialog->isVisible();
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -37,6 +37,15 @@
 #include "yuzu_cmd/config.h"
 #include "yuzu_cmd/emu_window/emu_window_sdl2.h"

+#ifdef _WIN32
+extern "C" {
+// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable
+// graphics
+__declspec(dllexport) unsigned long NvOptimusEnablement = 0x00000001;
+__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
+}
+#endif
+
 static void PrintHelp(const char* argv0) {
    std::cout << "Usage: " << argv0
              << " [options] <filename>\n"
Author	SHA1	Message	Date
Lioncash	16ffecd8fb	common_types: Convert typedefs to using aliases May as well while we're making changes to this file.	2018-04-19 22:26:35 -04:00
Lioncash	e8e5041955	common_types: Remove unnecessary check for whether or not__func__ is defined VS has supported this for quite a while.	2018-04-19 22:25:19 -04:00
bunnei	17ad56c1dc	Merge pull request #356 from lioncash/shader glsl_shader_decompiler: Minor API changes to ShaderWriter	2018-04-19 21:09:25 -04:00
bunnei	f633b0c875	Merge pull request #355 from Subv/shader_instr ShaderGen: Fixed TEXS overriding its own texcoords and implemented fmul32i	2018-04-19 21:09:02 -04:00
Lioncash	e3b6f6c016	glsl_shader_decompiler: Use std::string_view instead of std::string for AddLine() This function doesn't need to take ownership of the string data being given to it, considering all we do is append the characters to the internal string instance. Instead, use a string view to simply reference the string data without any potential heap allocation. Now anything that is a raw const char* won't need to be converted to a std::string before appending.	2018-04-19 20:12:58 -04:00
Lioncash	412b31ad72	glsl_shader_decompiler: Add AddNewLine() function to ShaderWriter Avoids constructing a std::string just to append a newline character	2018-04-19 20:09:27 -04:00
Lioncash	aa26baa3db	glsl_shader_decompiler: Add char overload for ShaderWriter's AddLine() Avoids constructing a std::string just to append a character.	2018-04-19 20:04:09 -04:00
Lioncash	4ef392906b	glsl_shader_decompiler: Append indentation without constructing a separate std::string The interface of std::string already lets us append N copies of a character to an existing string.	2018-04-19 19:59:25 -04:00
James Rowe	3f49210234	Merge pull request #348 from jlachniet/patch-1 Technically, yuzu can boot commercial games	2018-04-19 14:07:17 -06:00
Subv	fe84842137	ShaderGen: Implemented the fmul32i shader instruction.	2018-04-19 13:46:32 -05:00
Subv	5367935d35	ShaderGen: Fixed a case where the TEXS instruction would use the same registers for the input and the output. It will now save the coords before writing the outputs in a subscope.	2018-04-19 13:33:17 -05:00
N00byKing	8a47e7e493	Implement Pull #3528 from citra: use nvidia graphics automatically on laptops with optimus (with AMD support) (#271 ) * Port 3528: use nvidia graphics automatically on laptops with optimus * Force dedicated AMD Card for switchable Graphics * Ran clang-format	2018-04-19 12:22:26 -06:00
James Rowe	e90a12f80c	Merge pull request #352 from bunnei/fix-microprofile nvflinger: Call MicroProfileFlip on NVFlinger::Compose.	2018-04-19 12:18:47 -06:00
bunnei	d019bb16f6	Merge pull request #353 from Subv/compressed_formats GPU: Add support for the DXT23 and DXT45 compressed texture formats.	2018-04-19 09:02:19 -04:00
Subv	057170928c	GPU: Add support for the DXT23 and DXT45 compressed texture formats.	2018-04-18 20:48:53 -05:00
bunnei	de18592179	nvflinger: Call MicroProfileFlip on NVFlinger::Compose.	2018-04-18 20:28:50 -04:00
bunnei	60e6e8953e	Merge pull request #351 from Subv/tex_formats GPU: Implemented the B5G6R5 format.	2018-04-18 20:20:51 -04:00
Subv	2985056340	GPU: Implemented the B5G6R5 format.	2018-04-18 18:16:45 -05:00
bunnei	ce4f159b1c	gl_shader_gen: Support vertical/horizontal viewport flipping. (#347 ) * gl_shader_gen: Support vertical/horizontal viewport flipping. * fixup! gl_shader_gen: Support vertical/horizontal viewport flipping.	2018-04-18 16:42:40 -04:00
bunnei	6a999cf800	Merge pull request #350 from Subv/tex_components GPU: Fixed the incorrect component order in ABGR8 textures.	2018-04-18 15:36:00 -04:00
Subv	43d98ca8fe	GLCache: Added boilerplate code to make supporting configurable texture component types. For now only the UNORM type is supported.	2018-04-18 14:17:28 -05:00
Subv	5b3fab6766	GLCache: Unify texture and framebuffer formats when converting to OpenGL.	2018-04-18 14:17:28 -05:00
Subv	b2c1672e10	GPU: Texture format 8 and framebuffer format 0xD5 are actually ABGR8.	2018-04-18 14:17:27 -05:00
bunnei	d3f9ea90e7	Merge pull request #349 from Subv/texturing GPU: Support non-tiled textures and configurable block height.	2018-04-18 14:46:10 -04:00
Subv	48d4efbd69	GPU: Pitch textures are now supported, don't assert when encountering them.	2018-04-18 12:52:53 -05:00
Subv	a3e82e8e1f	GLCache: Take into account the texture's block height when caching and unswizzling.	2018-04-18 12:52:53 -05:00
Subv	ac09b5a2e9	GLCache: Added a function to convert cached PixelFormats back to texture formats. TODO: The way we handle cached formats must change, framebuffer and texture formats are too different to keep them in the same place.	2018-04-18 12:52:52 -05:00
Subv	6b63aaa5b4	GPU: Allow using a configurable block height when unswizzling textures.	2018-04-18 12:52:51 -05:00
Subv	db5f2bfa7e	GPU/TIC: Added the pitch and block height fields to the TIC structure.	2018-04-18 11:38:39 -05:00
jlachniet	f600f6eebd	Technically, yuzu can boot commercial games Clarifies the yuzu cannot play commercial games to any reasonable extent, rather than not at all.	2018-04-18 09:18:41 -04:00
bunnei	c93ea96366	Merge pull request #346 from bunnei/misc-gpu-improvements Misc gpu improvements	2018-04-17 22:17:07 -04:00
bunnei	71b4a3b9f6	Merge pull request #344 from bunnei/shader-decompiler-p2 Shader decompiler changes part 2	2018-04-17 22:10:53 -04:00
bunnei	9dc0d13ba5	Merge pull request #345 from bunnei/blending renderer_opengl: Implement BlendEquation and BlendFunc.	2018-04-17 21:45:36 -04:00
bunnei	7222d9a4c3	gl_rasterizer_cache: Add missing LOG statements.	2018-04-17 21:44:36 -04:00
bunnei	9df8e924fb	texture: Add missing formats.	2018-04-17 21:41:36 -04:00
bunnei	3ed8a1cac7	gpu: Add several framebuffer formats to RenderTargetFormat.	2018-04-17 21:40:38 -04:00
bunnei	4a8eb6745e	maxwell3d: Allow Texture2DNoMipmap as Texture2D.	2018-04-17 21:39:15 -04:00
bunnei	531c25386e	shader_bytecode: Make ctor's constexpr and explicit.	2018-04-17 21:27:07 -04:00
bunnei	174cba5c58	renderer_opengl: Implement BlendEquation and BlendFunc.	2018-04-17 18:11:48 -04:00
bunnei	e59126809c	bit_field: Remove is_pod check, add is_trivially_copyable_v.	2018-04-17 18:00:18 -04:00
bunnei	1f6fe062ca	gl_shader_decompiler: Fix warnings with MarkAsUsed.	2018-04-17 16:36:44 -04:00
bunnei	ed542a7309	gl_shader_decompiler: Cleanup logging, updating to NGLOG_*.	2018-04-17 16:36:44 -04:00
bunnei	ef2d5ab0c1	gl_shader_decompiler: Implement several MUFU subops and abs_d.	2018-04-17 16:36:43 -04:00
bunnei	59f4ff4659	gl_shader_decompiler: Fix swizzle in GetRegister.	2018-04-17 16:36:42 -04:00
bunnei	5a28dce9eb	gl_shader_decompiler: Implement FMUL/FADD/FFMA immediate instructions.	2018-04-17 16:36:42 -04:00
bunnei	8d4899d6ea	gl_shader_decompiler: Allow vertex position to be used in fragment shader.	2018-04-17 16:36:40 -04:00
bunnei	95144cc39c	gl_shader_decompiler: Implement IPA instruction.	2018-04-17 16:36:39 -04:00
bunnei	8b4443c966	gl_shader_decompiler: Add support for TEXS instruction.	2018-04-17 16:36:38 -04:00
bunnei	5ba71369ac	gl_shader_decompiler: Use fragment output color for GPR 0-3.	2018-04-17 15:25:54 -04:00
bunnei	5d529698c9	gl_shader_decompiler: Partially implement MUFU.	2018-04-17 15:25:54 -04:00
bunnei	5b9bcbf438	Merge pull request #341 from shinyquagsire23/pfs-hfs-impl file_sys: Add HFS/PFS helper component	2018-04-17 14:39:20 -04:00
bunnei	2b082e2710	Merge pull request #343 from Subv/tex_wrap_4 GPU: Implement some wrap modes	2018-04-17 12:25:24 -04:00
shinyquagsire23	de580ccdd5	file_sys: Use NGLOG	2018-04-17 09:55:29 -06:00
Hexagon12	e52a87b98a	Various service name fixes - part 2 (rebased) (#322 ) * Updated ACC with more service names * Updated SVC with more service names * Updated set with more service names * Updated sockets with more service names * Updated SPL with more service names * Updated time with more service names * Updated vi with more service names	2018-04-17 11:37:43 -04:00
bunnei	0905dc1ff4	Merge pull request #342 from bunnei/indexed-verts Implement indexed mode rendering	2018-04-17 11:34:22 -04:00
Subv	636ad34707	MaxwellToGL: Implemented tex wrap mode 1 (Wrap, GL_REPEAT).	2018-04-17 10:17:18 -05:00
Subv	7fc516cc1a	MaxwellToGL: Added a TODO and partial implementation of maxwell wrap mode 4 (Clamp, GL_CLAMP). This clamp mode was removed from OpenGL as of 3.1, we can emulate it by using GL_CLAMP_TO_BORDER to get the border color of the texture, and then manually sampling the edge to mix them in the fragment shader.	2018-04-17 10:16:50 -05:00
bunnei	77bdc49343	gl_rendering: Use NGLOG* for changed code.	2018-04-16 21:23:28 -04:00
bunnei	1a1af3fda3	gl_rasterizer: Implement indexed vertex mode.	2018-04-16 21:10:15 -04:00
shinyquagsire23	83aa38b239	file_sys: tweaks	2018-04-16 06:51:59 -06:00
shinyquagsire23	c03795300a	file_sys: Add HFS/PFS helper component	2018-04-16 04:36:25 -06:00
bunnei	44e09ba807	Merge pull request #338 from bunnei/unrequire-shared-font pl_u: Use empty shared font if none is available.	2018-04-15 16:54:36 -04:00
bunnei	d6d7d0989c	Merge pull request #337 from Subv/used_buffers GPU: Don't use explicit binding points when uploading the constbuffers to opengl	2018-04-15 16:30:57 -04:00
bunnei	ac628f139d	pl_u: Use empty shared font if none is available. - Makes games work in lieu of shared_font.bin.	2018-04-15 16:15:34 -04:00
Subv	477aab5960	GPU: Use the same buffer names in the generated GLSL and the buffer uploading code.	2018-04-15 15:02:50 -05:00
Subv	14ac40436e	GPU: Don't use explicit binding points when uploading the constbuffers to opengl. The bindpoints will now be dynamically calculated based on the number of buffers used by the previous shader stage.	2018-04-15 14:14:57 -05:00
bunnei	b8825fbf10	Merge pull request #335 from bunnei/delete-file fsp_srv: Implement DeleteFile.	2018-04-15 15:13:02 -04:00
bunnei	b60834ac41	Merge pull request #334 from Subv/used_buffers GPU: Use the buffer hints from the shader decompiler to upload only the necessary const buffers for each shader stage	2018-04-15 13:17:30 -04:00
bunnei	bddad50dd4	fsp_srv: Implement DeleteFile. - Used by Binding of Isaac.	2018-04-15 13:15:18 -04:00
Subv	e128e90350	GPU: Don't use GetPointer when uploading the constbuffer data to the GPU.	2018-04-15 11:18:09 -05:00
Subv	7da47da66e	GPU: Use the buffer hints from the shader decompiler to upload only the necessary const buffers for each shader stage.	2018-04-15 11:15:54 -05:00
bunnei	34264879b3	Merge pull request #333 from bunnei/const-buff-hints shaders: Expose hints about used const buffers.	2018-04-15 12:12:17 -04:00
bunnei	73d9c494ea	shaders: Expose hints about used const buffers.	2018-04-15 11:50:10 -04:00
bunnei	bb0c3fc828	Merge pull request #328 from Subv/constbuffers GPU: Upload the shader Constant Buffers as SSBOs to the GPU	2018-04-15 03:19:57 -04:00
Subv	c9b511da08	GPU: Upload the entirety of each constbuffer for each shader stage as SSBOs. We're going to need the shader generator to give us a mapping of the actual used const buffers to properly bind them to the shader.	2018-04-14 23:02:05 -05:00
Subv	1957640ea2	GPU: Allow configuring ssbos in the opengl state manager.	2018-04-14 22:54:23 -05:00
Subv	ae58e46036	GPU: Added a function to determine whether a shader stage is enabled or not.	2018-04-14 22:54:23 -05:00
bunnei	2b9a6b3281	Merge pull request #332 from bunnei/fix-total-mem-usage vm_manager: Increase GetTotalMemoryUsage value.	2018-04-14 22:29:19 -04:00
bunnei	43f0f163e1	vm_manager: Increase GetTotalMemoryUsage value. - Gets Binding of Isaac running.	2018-04-14 22:04:10 -04:00
bunnei	9f66cae865	Merge pull request #327 from adityaruplaha/fullscreen-fix Fix the stuck in fullscreen bug	2018-04-14 21:24:36 -04:00
bunnei	778be45103	Merge pull request #331 from bunnei/fsp-flush fsp_srv: Implement IFile::Flush.	2018-04-14 21:21:34 -04:00
bunnei	fdca7b5f7a	Merge pull request #329 from bunnei/shader-gen-part-1 OpenGL shader generation part 1	2018-04-14 20:40:39 -04:00
bunnei	1b41b875dc	shaders: Add NumTextureSamplers const, remove unused #pragma.	2018-04-14 18:50:06 -04:00
bunnei	e6224fec27	shaders: Address PR review feedback.	2018-04-14 16:01:41 -04:00
bunnei	eabeedf6af	gl_shader_decompiler: Cleanup log statements.	2018-04-14 16:01:41 -04:00
bunnei	0d408b965b	shaders: Fix GCC and clang build issues.	2018-04-14 16:01:40 -04:00
bunnei	86135864da	gl_shader_decompiler: Implement negate, abs, etc. and lots of cleanup.	2018-04-14 16:01:40 -04:00
bunnei	7639667562	shader_bytecode: Add FSETP and KIL to GetInfo.	2018-04-14 16:01:40 -04:00
bunnei	5a47832221	shader_bytecode: Add SubOp decoding.	2018-04-14 16:01:40 -04:00
bunnei	50023bdae7	gl_shader_decompiler: Add shader stage hint.	2018-04-14 16:01:39 -04:00
bunnei	a992aac5eb	renderer_opengl: Fix Morton copy byteswap, etc.	2018-04-14 16:01:39 -04:00
adityaruplaha	958c98bdae	Fix the stuck in fullscreen bug (Original PR: citra-emu/citra#3611 )	2018-04-14 16:41:56 +05:30
bunnei	0ca8fce9d0	gl_shader_manager: Implement SetShaderSamplerBindings.	2018-04-13 23:48:30 -04:00
bunnei	beddc8afd2	gl_rasterizer: Generate shaders and upload uniforms.	2018-04-13 23:48:29 -04:00
bunnei	85d77a3d24	gl_shader_decompiler: Basic impl. for very simple vertex shaders. - Tested with Puyo Puyo Tetris and Cave Story+	2018-04-13 23:48:28 -04:00
bunnei	51f37f5061	gl_shader_manager: Cleanup and consolidate uniform handling.	2018-04-13 23:48:28 -04:00
bunnei	35aca0bf1f	maxwell_3d: Make memory_manager public.	2018-04-13 23:48:27 -04:00
bunnei	33bb53571b	maxwell_3d: Fix shader_config decodings.	2018-04-13 23:48:26 -04:00
bunnei	5617831d5f	gl_rasterizer: Use shader program manager, remove test shader.	2018-04-13 23:48:26 -04:00
bunnei	459826a705	renderer_opengl: Add gl_shader_manager class.	2018-04-13 23:48:25 -04:00
bunnei	8aa21a03b3	maxwell_to_gl: Add a few types, etc.	2018-04-13 23:48:24 -04:00
bunnei	10953495c1	gl_shader_gen: Add hashable setup/config structs.	2018-04-13 23:48:23 -04:00
bunnei	2fcbb35ad2	gl_shader_util: Add missing includes.	2018-04-13 23:48:23 -04:00
bunnei	45fd7c4a37	common: Port cityhash code from Citra.	2018-04-13 23:48:22 -04:00
bunnei	da1114ca59	renderer_opengl: Use OGLProgram instead of OGLShader.	2018-04-13 23:48:21 -04:00
bunnei	4f2b2d0bc5	gl_shader_util: Grab latest upstream.	2018-04-13 23:48:21 -04:00
bunnei	dbfd106ba0	gl_resource_manager: Grab latest upstream.	2018-04-13 23:48:20 -04:00
bunnei	ed7e597b44	gl_shader_decompiler: Add skeleton code from Citra for shader analysis.	2018-04-13 23:48:20 -04:00
bunnei	4e7e0f8112	shader_bytecode: Add initial module for shader decoding.	2018-04-13 23:48:19 -04:00
bunnei	0315fe8c3d	bit_field: Make all methods constexpr.	2018-04-13 23:48:18 -04:00