Eliminate G8R8 conversion, native is actualy R8G8

Merge pull request #1806 from ReinUsesLisp/morton-fixup
morton: Fixup compiler warning
2018-11-27 07:57:28 -04:00 · 2018-11-26 23:22:59 -05:00 · 2018-11-26 23:19:40 -05:00 · 2018-11-26 22:59:51 -05:00 · 2018-11-26 21:23:15 -05:00 · 2018-11-26 21:23:11 -05:00
29 changed files with 395 additions and 441 deletions
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -282,7 +282,7 @@ static void FpuWrite(std::size_t id, u128 val, Kernel::Thread* thread = nullptr)
    if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
        thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val;
    } else if (id == FPCR_REGISTER) {
-        thread_context.fpcr = val[0];
+        thread_context.fpcr = static_cast<u32>(val[0]);
    }
 }

--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -14,7 +14,7 @@ namespace Kernel {

 class KernelCore;

-enum class ResourceType {
+enum class ResourceType : u32 {
    PhysicalMemory,
    Threads,
    Events,
@@ -25,6 +25,10 @@ enum class ResourceType {
    ResourceTypeCount
 };

+constexpr bool IsValidResourceType(ResourceType type) {
+    return type < ResourceType::ResourceTypeCount;
+}
+
 class ResourceLimit final : public Object {
 public:
    /**
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -105,6 +105,38 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add

    return RESULT_SUCCESS;
 }
+
+enum class ResourceLimitValueType {
+    CurrentValue,
+    LimitValue,
+};
+
+ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_type,
+                                          ResourceLimitValueType value_type) {
+    const auto type = static_cast<ResourceType>(resource_type);
+    if (!IsValidResourceType(type)) {
+        LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
+        return ERR_INVALID_ENUM_VALUE;
+    }
+
+    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto* const current_process = kernel.CurrentProcess();
+    ASSERT(current_process != nullptr);
+
+    const auto resource_limit_object =
+        current_process->GetHandleTable().Get<ResourceLimit>(resource_limit);
+    if (!resource_limit_object) {
+        LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
+                  resource_limit);
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (value_type == ResourceLimitValueType::CurrentValue) {
+        return MakeResult(resource_limit_object->GetCurrentResourceValue(type));
+    }
+
+    return MakeResult(resource_limit_object->GetMaxResourceValue(type));
+}
 } // Anonymous namespace

 /// Set the process heap to a given Size. It can both extend and shrink the heap.
@@ -1346,6 +1378,87 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
    return RESULT_SUCCESS;
 }

+static ResultCode CreateResourceLimit(Handle* out_handle) {
+    LOG_DEBUG(Kernel_SVC, "called");
+
+    auto& kernel = Core::System::GetInstance().Kernel();
+    auto resource_limit = ResourceLimit::Create(kernel);
+
+    auto* const current_process = kernel.CurrentProcess();
+    ASSERT(current_process != nullptr);
+
+    const auto handle = current_process->GetHandleTable().Create(std::move(resource_limit));
+    if (handle.Failed()) {
+        return handle.Code();
+    }
+
+    *out_handle = *handle;
+    return RESULT_SUCCESS;
+}
+
+static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_limit,
+                                             u32 resource_type) {
+    LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
+
+    const auto limit_value = RetrieveResourceLimitValue(resource_limit, resource_type,
+                                                        ResourceLimitValueType::LimitValue);
+    if (limit_value.Failed()) {
+        return limit_value.Code();
+    }
+
+    *out_value = static_cast<u64>(*limit_value);
+    return RESULT_SUCCESS;
+}
+
+static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_limit,
+                                               u32 resource_type) {
+    LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
+
+    const auto current_value = RetrieveResourceLimitValue(resource_limit, resource_type,
+                                                          ResourceLimitValueType::CurrentValue);
+    if (current_value.Failed()) {
+        return current_value.Code();
+    }
+
+    *out_value = static_cast<u64>(*current_value);
+    return RESULT_SUCCESS;
+}
+
+static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource_type, u64 value) {
+    LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}, Value={}", resource_limit,
+              resource_type, value);
+
+    const auto type = static_cast<ResourceType>(resource_type);
+    if (!IsValidResourceType(type)) {
+        LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
+        return ERR_INVALID_ENUM_VALUE;
+    }
+
+    auto& kernel = Core::System::GetInstance().Kernel();
+    auto* const current_process = kernel.CurrentProcess();
+    ASSERT(current_process != nullptr);
+
+    auto resource_limit_object =
+        current_process->GetHandleTable().Get<ResourceLimit>(resource_limit);
+    if (!resource_limit_object) {
+        LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
+                  resource_limit);
+        return ERR_INVALID_HANDLE;
+    }
+
+    const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value));
+    if (set_result.IsError()) {
+        LOG_ERROR(
+            Kernel_SVC,
+            "Attempted to lower resource limit ({}) for category '{}' below its current value ({})",
+            resource_limit_object->GetMaxResourceValue(type), resource_type,
+            resource_limit_object->GetCurrentResourceValue(type));
+        return set_result;
+    }
+
+    return RESULT_SUCCESS;
+}
+
 namespace {
 struct FunctionDef {
    using Func = void();
@@ -1405,8 +1518,8 @@ static const FunctionDef SVC_Table[] = {
    {0x2D, nullptr, "UnmapPhysicalMemory"},
    {0x2E, nullptr, "GetFutureThreadInfo"},
    {0x2F, nullptr, "GetLastThreadInfo"},
-    {0x30, nullptr, "GetResourceLimitLimitValue"},
-    {0x31, nullptr, "GetResourceLimitCurrentValue"},
+    {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
+    {0x31, SvcWrap<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"},
    {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"},
    {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"},
    {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"},
@@ -1482,8 +1595,8 @@ static const FunctionDef SVC_Table[] = {
    {0x7A, nullptr, "StartProcess"},
    {0x7B, nullptr, "TerminateProcess"},
    {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"},
-    {0x7D, nullptr, "CreateResourceLimit"},
-    {0x7E, nullptr, "SetResourceLimitLimitValue"},
+    {0x7D, SvcWrap<CreateResourceLimit>, "CreateResourceLimit"},
+    {0x7E, SvcWrap<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"},
    {0x7F, nullptr, "CallSecureMonitor"},
 };

--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -43,6 +43,14 @@ void SvcWrap() {
    FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw);
 }

+template <ResultCode func(u32*)>
+void SvcWrap() {
+    u32 param = 0;
+    const u32 retval = func(&param).raw;
+    Core::CurrentArmInterface().SetReg(1, param);
+    FuncReturn(retval);
+}
+
 template <ResultCode func(u32*, u32)>
 void SvcWrap() {
    u32 param_1 = 0;
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -30,8 +30,6 @@ add_library(video_core STATIC
    renderer_base.h
    renderer_opengl/gl_buffer_cache.cpp
    renderer_opengl/gl_buffer_cache.h
-    renderer_opengl/gl_global_cache.cpp
-    renderer_opengl/gl_global_cache.h
    renderer_opengl/gl_primitive_assembler.cpp
    renderer_opengl/gl_primitive_assembler.h
    renderer_opengl/gl_rasterizer.cpp
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB
 void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
    MICROPROFILE_SCOPE(ProcessCommandLists);

+    // On entering GPU code, assume all memory may be touched by the ARM core.
+    maxwell_3d->dirty_flags.OnMemoryWrite();
+
    auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
        LOG_TRACE(HW_GPU,
                  "Processing method {:08X} on subchannel {} value "
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,8 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "core/core.h"
 #include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/textures/decoders.h"

@@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {
    u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);

    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
+        // All copies here update the main memory, so mark all rasterizer states as invalid.
+        Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+
        rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
        // We have to invalidate the destination region to evict any outdated surfaces from the
        // cache. We do this before actually writing the new data because the destination address
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -3,8 +3,10 @@
 // Refer to the license.txt file included.

 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/memory.h"
 #include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"

 namespace Tegra::Engines {
@@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {
    rasterizer.InvalidateRegion(dest_address, sizeof(u32));

    Memory::Write32(dest_address, data);
+    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

    state.write_offset++;
 }
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -135,10 +135,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {

    if (regs.reg_array[method] != value) {
        regs.reg_array[method] = value;
+        // Vertex format
        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
            dirty_flags.vertex_attrib_format = true;
        }
+
+        // Vertex buffer
+        if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
+            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
+            dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
+        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
+                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
+            dirty_flags.vertex_array |=
+                1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
+        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
+                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
+            dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays));
+        }
    }

    switch (method) {
@@ -270,6 +284,7 @@ void Maxwell3D::ProcessQueryGet() {
            query_result.timestamp = CoreTiming::GetTicks();
            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
        }
+        dirty_flags.OnMemoryWrite();
        break;
    }
    default:
@@ -319,11 +334,6 @@ void Maxwell3D::DrawArrays() {
    }
 }

-bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
-               const Maxwell3D::GlobalMemoryDescriptor& rhs) {
-    return std::tie(lhs.cbuf_index, lhs.cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
-}
-
 void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
    // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
    auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
@@ -351,6 +361,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
        memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);

    Memory::Write32(*address, value);
+    dirty_flags.OnMemoryWrite();

    // Increment the current buffer position.
    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,7 +5,6 @@
 #pragma once

 #include <array>
-#include <set>
 #include <unordered_map>
 #include <vector>
 #include "common/assert.h"
@@ -32,12 +31,6 @@ public:
    explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
    ~Maxwell3D() = default;

-    /// Structure representing a global memory region
-    struct GlobalMemoryDescriptor {
-        u64 cbuf_index;
-        u64 cbuf_offset;
-    };
-
    /// Register structure of the Maxwell3D engine.
    /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
    struct Regs {
@@ -597,10 +590,18 @@ public:

                float clear_color[4];
                float clear_depth;
+
                INSERT_PADDING_WORDS(0x3);
+
                s32 clear_stencil;

-                INSERT_PADDING_WORDS(0x17);
+                INSERT_PADDING_WORDS(0x7);
+
+                u32 polygon_offset_point_enable;
+                u32 polygon_offset_line_enable;
+                u32 polygon_offset_fill_enable;
+
+                INSERT_PADDING_WORDS(0xD);

                std::array<ScissorTest, NumViewports> scissor_test;

@@ -735,6 +736,7 @@ public:
                u32 frag_color_clamp;

                union {
+                    BitField<0, 1, u32> y_negate;
                    BitField<4, 1, u32> triangle_rast_flip;
                } screen_y_control;

@@ -768,7 +770,11 @@ public:
                    }
                } tsc;

-                INSERT_PADDING_WORDS(0x3);
+                INSERT_PADDING_WORDS(0x1);
+
+                float polygon_offset_factor;
+
+                INSERT_PADDING_WORDS(0x1);

                struct {
                    u32 tic_address_high;
@@ -793,7 +799,9 @@ public:

                u32 framebuffer_srgb;

-                INSERT_PADDING_WORDS(0x12);
+                float polygon_offset_units;
+
+                INSERT_PADDING_WORDS(0x11);

                union {
                    BitField<2, 1, u32> coord_origin;
@@ -870,7 +878,9 @@ public:

                INSERT_PADDING_WORDS(0x7);

-                INSERT_PADDING_WORDS(0x20);
+                INSERT_PADDING_WORDS(0x1F);
+
+                float polygon_offset_clamp;

                struct {
                    u32 is_instanced[NumVertexArrays];
@@ -886,7 +896,13 @@ public:

                Cull cull;

-                INSERT_PADDING_WORDS(0x28);
+                u32 pixel_center_integer;
+
+                INSERT_PADDING_WORDS(0x1);
+
+                u32 viewport_transform_enabled;
+
+                INSERT_PADDING_WORDS(0x25);

                struct {
                    u32 enable;
@@ -1044,8 +1060,6 @@ public:

        std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
        u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
-
-        std::set<GlobalMemoryDescriptor> global_memory_uniforms;
    };

    State state{};
@@ -1053,6 +1067,11 @@ public:

    struct DirtyFlags {
        bool vertex_attrib_format = true;
+        u32 vertex_array = 0xFFFFFFFF;
+
+        void OnMemoryWrite() {
+            vertex_array = 0xFFFFFFFF;
+        }
    };

    DirtyFlags dirty_flags;
@@ -1078,9 +1097,6 @@ public:
        return macro_memory;
    }

-    std::string CreateGlobalMemoryRegion(std::tuple<u64, u64, u64> iadd_data);
-    std::set<std::pair<u64, u64>> ListGlobalMemoryRegions() const;
-
 private:
    void InitializeRegisterDefaults();

@@ -1135,9 +1151,6 @@ private:
    void DrawArrays();
 };

-bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
-               const Maxwell3D::GlobalMemoryDescriptor& rhs);
-
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
    static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4,                           \
                  "Field " #field_name " has invalid position")
@@ -1151,6 +1164,9 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
 ASSERT_REG_POSITION(clear_color[0], 0x360);
 ASSERT_REG_POSITION(clear_depth, 0x364);
 ASSERT_REG_POSITION(clear_stencil, 0x368);
+ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
+ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
+ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
 ASSERT_REG_POSITION(scissor_test, 0x380);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
@@ -1189,6 +1205,7 @@ ASSERT_REG_POSITION(point_size, 0x546);
 ASSERT_REG_POSITION(zeta_enable, 0x54E);
 ASSERT_REG_POSITION(multisample_control, 0x54F);
 ASSERT_REG_POSITION(tsc, 0x557);
+ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
 ASSERT_REG_POSITION(tic, 0x55D);
 ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
 ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
@@ -1196,13 +1213,17 @@ ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
 ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
 ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
 ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
+ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
 ASSERT_REG_POSITION(point_coord_replace, 0x581);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
 ASSERT_REG_POSITION(primitive_restart, 0x591);
 ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
 ASSERT_REG_POSITION(cull, 0x646);
+ASSERT_REG_POSITION(pixel_center_integer, 0x649);
+ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
 ASSERT_REG_POSITION(logic_op, 0x671);
 ASSERT_REG_POSITION(clear_buffers, 0x674);
 ASSERT_REG_POSITION(color_mask, 0x680);
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,7 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "core/core.h"
 #include "core/memory.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/textures/decoders.h"
@@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {
        return;
    }

+    // All copies here update the main memory, so mark all rasterizer states as invalid.
+    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+
    if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
        // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
        // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1,4 +1,4 @@
-// Copyright 2018 yuzu Emulator Project
+// Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@@ -208,8 +208,6 @@ enum class UniformType : u64 {
    SignedShort = 3,
    Single = 4,
    Double = 5,
-    Quad = 6,
-    UnsignedQuad = 7,
 };

 enum class StoreType : u64 {
@@ -781,12 +779,6 @@ union Instruction {
        BitField<44, 2, u64> unknown;
    } st_l;

-    union {
-        BitField<48, 3, UniformType> type;
-        BitField<46, 2, u64> cache_mode;
-        BitField<20, 24, s64> offset_immediate;
-    } ld_g;
-
    union {
        BitField<0, 3, u64> pred0;
        BitField<3, 3, u64> pred3;
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -183,13 +183,14 @@ static constexpr ConversionArray linear_to_morton_fns = {
    // clang-format on
 };

-constexpr MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
+static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
    switch (mode) {
    case MortonSwizzleMode::MortonToLinear:
        return morton_to_linear_fns[static_cast<std::size_t>(format)];
    case MortonSwizzleMode::LinearToMorton:
        return linear_to_morton_fns[static_cast<std::size_t>(format)];
    }
+    UNREACHABLE();
 }

 /// 8x8 Z-Order coordinate from 2D coordinates
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s
    return std::make_tuple(uploaded_ptr, uploaded_offset);
 }

-void OGLBufferCache::Map(std::size_t max_size) {
+bool OGLBufferCache::Map(std::size_t max_size) {
    bool invalidate;
    std::tie(buffer_ptr, buffer_offset_base, invalidate) =
        stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
    if (invalidate) {
        InvalidateAll();
    }
+    return invalidate;
 }

 void OGLBufferCache::Unmap() {
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -50,7 +50,7 @@ public:
    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
    std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);

-    void Map(std::size_t max_size);
+    bool Map(std::size_t max_size);
    void Unmap();

    GLuint GetHandle() const;
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -1,96 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "core/core.h"
-#include "core/memory.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_global_cache.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
-#include "video_core/renderer_opengl/utils.h"
-
-namespace OpenGL {
-
-CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
-    buffer.Create();
-    LabelGLObject(GL_BUFFER, buffer.handle, addr);
-}
-
-/// Helper function to get the maximum size we can use for an OpenGL uniform block
-static u32 GetMaxUniformBlockSize() {
-    GLint max_size{};
-    glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_size);
-    return static_cast<u32>(max_size);
-}
-
-void CachedGlobalRegion::Reload(u32 size_) {
-    static const u32 max_size{GetMaxUniformBlockSize()};
-
-    size = size_;
-    if (size > max_size) {
-        size = max_size;
-        LOG_CRITICAL(HW_GPU, "Global region size {} exceeded max UBO size of {}!", size_, max_size);
-    }
-
-    glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
-    glBufferData(GL_UNIFORM_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
-}
-
-GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
-    auto search{reserve.find(addr)};
-    if (search == reserve.end()) {
-        return {};
-    }
-    return search->second;
-}
-
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
-    GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
-    if (!region) {
-        // No reserved surface available, create a new one and reserve it
-        region = std::make_shared<CachedGlobalRegion>(addr, size);
-        ReserveGlobalRegion(region);
-    }
-    region->Reload(size);
-    return region;
-}
-
-void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
-    reserve[region->GetAddr()] = region;
-}
-
-GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
-    : RasterizerCache{rasterizer} {}
-
-GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
-    const Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor& global_region,
-    Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
-    auto& gpu{Core::System::GetInstance().GPU()};
-    const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
-    const auto cbuf_addr{gpu.MemoryManager().GpuToCpuAddress(
-        cbufs.const_buffers[global_region.cbuf_index].address + global_region.cbuf_offset)};
-
-    ASSERT(cbuf_addr);
-
-    const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
-    const auto size = Memory::Read32(*cbuf_addr + 8);
-    const auto actual_addr{gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu)};
-
-    ASSERT(actual_addr);
-
-    // Look up global region in the cache based on address
-    GlobalRegion region{TryGet(*actual_addr)};
-
-    if (!region) {
-        // No global region found - create a new one
-        region = GetUncachedGlobalRegion(*actual_addr, size);
-        Register(region);
-    }
-
-    return region;
-}
-
-} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -1,89 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <fmt/format.h>
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-
-namespace OpenGL {
-
-class RasterizerOpenGL;
-class CachedGlobalRegion;
-using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
-
-/// Helper class for caching global region uniform locations
-class CachedGlobalRegionUniform {
-public:
-    explicit CachedGlobalRegionUniform(std::size_t index) : index{index} {}
-
-    std::string GetName() const {
-        return fmt::format("global_memory_region_declblock_{}", index);
-    }
-
-    u32 GetHash() const {
-        // This needs to be unique from ConstBufferEntry::GetHash and SamplerEntry::GetHash
-        return (static_cast<u32>(index) << 16) | 0xFFFF;
-    }
-
-private:
-    std::size_t index{};
-};
-
-class CachedGlobalRegion final : public RasterizerCacheObject {
-public:
-    CachedGlobalRegion(VAddr addr, u32 size);
-
-    /// Gets the address of the shader in guest memory, required for cache management
-    VAddr GetAddr() const {
-        return addr;
-    }
-
-    /// Gets the size of the shader in guest memory, required for cache management
-    std::size_t GetSizeInBytes() const {
-        return size;
-    }
-
-    /// Gets the GL program handle for the buffer
-    GLuint GetBufferHandle() const {
-        return buffer.handle;
-    }
-
-    /// Reloads the global region from guest memory
-    void Reload(u32 size_);
-
-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
-private:
-    VAddr addr;
-    u32 size;
-
-    OGLBuffer buffer;
-};
-
-class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
-public:
-    explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
-
-    /// Gets the current specified shader stage program
-    GlobalRegion GetGlobalRegion(
-        const Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor& descriptor,
-        Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
-
-private:
-    GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
-    GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
-    void ReserveGlobalRegion(const GlobalRegion& region);
-
-    std::unordered_map<VAddr, GlobalRegion> reserve;
-};
-
-} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -81,7 +81,7 @@ struct DrawParameters {

 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
    : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
-      buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
+      buffer_cache(*this, STREAM_BUFFER_SIZE) {
    // Create sampler objects
    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
        texture_samplers[i].Create();
@@ -176,15 +176,25 @@ void RasterizerOpenGL::SetupVertexFormat() {
    }
    state.draw.vertex_array = VAO.handle;
    state.ApplyVertexBufferState();
+
+    // Rebinding the VAO invalidates the vertex buffer bindings.
+    gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
 }

 void RasterizerOpenGL::SetupVertexBuffer() {
-    MICROPROFILE_SCOPE(OpenGL_VB);
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;

+    if (!gpu.dirty_flags.vertex_array)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_VB);
+
    // Upload all guest vertex arrays sequentially to our buffer
    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
+        if (~gpu.dirty_flags.vertex_array & (1u << index))
+            continue;
+
        const auto& vertex_array = regs.vertex_array[index];
        if (!vertex_array.IsEnabled())
            continue;
@@ -211,6 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {

    // Implicit set by glBindVertexBuffer. Stupid glstate handling...
    state.draw.vertex_buffer = buffer_cache.GetHandle();
+
+    gpu.dirty_flags.vertex_array = 0;
 }

 DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -267,7 +279,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {

    // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
    // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
-    u32 current_buffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
+    u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
    u32 current_texture_bindpoint = 0;

    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -321,14 +333,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
        }

        // Configure the const buffers for this shader stage.
-        current_buffer_bindpoint =
+        current_constbuffer_bindpoint =
            SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
-                              current_buffer_bindpoint);
-
-        // Configure global memory regions for this shader stage.
-        current_buffer_bindpoint =
-            SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
-                               current_buffer_bindpoint);
+                              current_constbuffer_bindpoint);

        // Configure the textures for this shader stage.
        current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
@@ -605,7 +612,7 @@ void RasterizerOpenGL::DrawArrays() {
        return;

    MICROPROFILE_SCOPE(OpenGL_Drawing);
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;

    ScopeAcquireGLContext acquire_context{emu_window};
@@ -625,7 +632,7 @@ void RasterizerOpenGL::DrawArrays() {
    SyncTransformFeedback();
    SyncPointState();
    CheckAlphaTests();
-
+    SyncPolygonOffset();
    // TODO(bunnei): Sync framebuffer_scale uniform here
    // TODO(bunnei): Sync scissorbox uniform(s) here

@@ -658,7 +665,11 @@ void RasterizerOpenGL::DrawArrays() {
    // Add space for at least 18 constant buffers
    buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);

-    buffer_cache.Map(buffer_size);
+    bool invalidate = buffer_cache.Map(buffer_size);
+    if (invalidate) {
+        // As all cached buffers are invalidated, we need to recheck their state.
+        gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF;
+    }

    SetupVertexFormat();
    SetupVertexBuffer();
@@ -700,7 +711,6 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
    res_cache.InvalidateRegion(addr, size);
    shader_cache.InvalidateRegion(addr, size);
-    global_cache.InvalidateRegion(addr, size);
    buffer_cache.InvalidateRegion(addr, size);
 }

@@ -925,29 +935,6 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
    return current_bindpoint + static_cast<u32>(entries.size());
 }

-u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader,
-                                         GLenum primitive_mode, u32 current_bindpoint) {
-    std::size_t global_region_index{};
-    const auto& maxwell3d{Core::System::GetInstance().GPU().Maxwell3D()};
-    for (const auto& global_region : maxwell3d.state.global_memory_uniforms) {
-        const auto& region{
-            global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage))};
-        const GLenum b_index{
-            shader->GetProgramResourceIndex(CachedGlobalRegionUniform{global_region_index})};
-
-        if (b_index != GL_INVALID_INDEX) {
-            glBindBufferBase(GL_UNIFORM_BUFFER, current_bindpoint, region->GetBufferHandle());
-            glUniformBlockBinding(shader->GetProgramHandle(primitive_mode), b_index,
-                                  current_bindpoint);
-            ++current_bindpoint;
-        }
-
-        ++global_region_index;
-    }
-
-    return current_bindpoint;
-}
-
 u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
                                    GLenum primitive_mode, u32 current_unit) {
    MICROPROFILE_SCOPE(OpenGL_Texture);
@@ -998,13 +985,25 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,

 void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-    for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumViewports; i++) {
-        const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+    const bool geometry_shaders_enabled =
+        regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
+    const std::size_t viewport_count =
+        geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
+    for (std::size_t i = 0; i < viewport_count; i++) {
        auto& viewport = current_state.viewports[i];
-        viewport.x = viewport_rect.left;
-        viewport.y = viewport_rect.bottom;
-        viewport.width = viewport_rect.GetWidth();
-        viewport.height = viewport_rect.GetHeight();
+        const auto& src = regs.viewports[i];
+        if (regs.viewport_transform_enabled) {
+            const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+            viewport.x = viewport_rect.left;
+            viewport.y = viewport_rect.bottom;
+            viewport.width = viewport_rect.GetWidth();
+            viewport.height = viewport_rect.GetHeight();
+        } else {
+            viewport.x = src.x;
+            viewport.y = src.y;
+            viewport.width = src.width;
+            viewport.height = src.height;
+        }
        viewport.depth_range_far = regs.viewports[i].depth_range_far;
        viewport.depth_range_near = regs.viewports[i].depth_range_near;
    }
@@ -1178,7 +1177,11 @@ void RasterizerOpenGL::SyncLogicOpState() {

 void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-    for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumViewports; i++) {
+    const bool geometry_shaders_enabled =
+        regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
+    const std::size_t viewport_count =
+        geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
+    for (std::size_t i = 0; i < viewport_count; i++) {
        const auto& src = regs.scissor_test[i];
        auto& dst = current_state.viewports[i].scissor;
        dst.enabled = (src.enable != 0);
@@ -1208,6 +1211,16 @@ void RasterizerOpenGL::SyncPointState() {
    state.point.size = regs.point_size;
 }

+void RasterizerOpenGL::SyncPolygonOffset() {
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
+    state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
+    state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
+    state.polygon_offset.units = regs.polygon_offset_units;
+    state.polygon_offset.factor = regs.polygon_offset_factor;
+    state.polygon_offset.clamp = regs.polygon_offset_clamp;
+}
+
 void RasterizerOpenGL::CheckAlphaTests() {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,7 +23,6 @@
 #include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
-#include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_primitive_assembler.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -119,7 +118,7 @@ private:
                               bool using_depth_fb = true, bool preserve_contents = true,
                               std::optional<std::size_t> single_color_target = {});

-    /**
+    /*
     * Configures the current constbuffers to use for the draw command.
     * @param stage The shader stage to configure buffers for.
     * @param shader The shader object that contains the specified stage.
@@ -129,17 +128,7 @@ private:
    u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
                          GLenum primitive_mode, u32 current_bindpoint);

-    /**
-     * Configures the current global memory regions to use for the draw command.
-     * @param stage The shader stage to configure buffers for.
-     * @param shader The shader object that contains the specified stage.
-     * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
-     * @returns The next available bindpoint for use in the next shader stage.
-     */
-    u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
-                           GLenum primitive_mode, u32 current_bindpoint);
-
-    /**
+    /*
     * Configures the current textures to use for the draw command.
     * @param stage The shader stage to configure textures for.
     * @param shader The shader object that contains the specified stage.
@@ -194,6 +183,9 @@ private:
    /// Syncs Color Mask
    void SyncColorMask();

+    /// Syncs the polygon offsets
+    void SyncPolygonOffset();
+
    /// Check asserts for alpha testing.
    void CheckAlphaTests();

@@ -204,7 +196,6 @@ private:

    RasterizerCacheOpenGL res_cache;
    ShaderCacheOpenGL shader_cache;
-    GlobalRegionCacheOpenGL global_cache;

    Core::Frontend::EmuWindow& emu_window;

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -738,18 +738,6 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo
    }
 }

-static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
-    constexpr auto bpp{GetBytesPerPixel(PixelFormat::G8R8U)};
-    for (std::size_t y = 0; y < height; ++y) {
-        for (std::size_t x = 0; x < width; ++x) {
-            const std::size_t offset{bpp * (y * width + x)};
-            const u8 temp{data[offset]};
-            data[offset] = data[offset + 1];
-            data[offset + 1] = temp;
-        }
-    }
-}
-
 /**
 * Helper function to perform software conversion (as needed) when loading a buffer from Switch
 * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
@@ -782,12 +770,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
        // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
        ConvertS8Z24ToZ24S8(data, width, height, false);
        break;
-
-    case PixelFormat::G8R8U:
-    case PixelFormat::G8R8S:
-        // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8.
-        ConvertG8R8ToR8G8(data, width, height);
-        break;
    }
 }

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -98,6 +98,18 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
    }
 }

+GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
+    const auto search{resource_cache.find(buffer.GetHash())};
+    if (search == resource_cache.end()) {
+        const GLuint index{
+            glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
+        resource_cache[buffer.GetHash()] = index;
+        return index;
+    }
+
+    return search->second;
+}
+
 GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
    const auto search{uniform_cache.find(sampler.GetHash())};
    if (search == uniform_cache.end()) {
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -71,18 +71,7 @@ public:
    }

    /// Gets the GL program resource location for the specified resource, caching as needed
-    template <typename T>
-    GLuint GetProgramResourceIndex(const T& buffer) {
-        const auto& search{resource_cache.find(buffer.GetHash())};
-        if (search == resource_cache.end()) {
-            const GLuint index{glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK,
-                                                         buffer.GetName().c_str())};
-            resource_cache[buffer.GetHash()] = index;
-            return index;
-        }
-
-        return search->second;
-    }
+    GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);

    /// Gets the GL uniform location for the specified resource, caching as needed
    GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -13,7 +13,6 @@

 #include "common/assert.h"
 #include "common/common_types.h"
-#include "core/core.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -586,7 +585,6 @@ public:
        GenerateInputAttrs();
        GenerateOutputAttrs();
        GenerateConstBuffers();
-        GenerateGlobalRegions();
        GenerateSamplers();
        GenerateGeometry();
    }
@@ -708,21 +706,6 @@ private:
        declarations.AddNewLine();
    }

-    /// Generates declarations for global memory regions.
-    void GenerateGlobalRegions() {
-        const auto& regions{
-            Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms};
-        for (std::size_t i = 0; i < regions.size(); ++i) {
-            declarations.AddLine("layout(std140) uniform " +
-                                 fmt::format("global_memory_region_declblock_{}", i));
-            declarations.AddLine('{');
-            declarations.AddLine("    vec4 global_memory_region_" + std::to_string(i) + "[0x400];");
-            declarations.AddLine("};");
-            declarations.AddNewLine();
-        }
-        declarations.AddNewLine();
-    }
-
    /// Generates declarations for samplers.
    void GenerateSamplers() {
        const auto& samplers = GetSamplers();
@@ -884,7 +867,8 @@ private:
            // vertex shader, and what's the value of the fourth element when inside a Tess Eval
            // shader.
            ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
-            return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))";
+            // Config pack's first value is instance_id.
+            return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))";
        case Attribute::Index::FrontFacing:
            // TODO(Subv): Find out what the values are for the other elements.
            ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
@@ -1851,11 +1835,6 @@ private:
                } else {
                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
                                            GLSLRegister::Type::Integer);
-                    if (opcode->get().GetId() == OpCode::Id::IADD_C) {
-                        s_last_iadd = last_iadd;
-                        last_iadd = IADDReference{instr.gpr8.Value(), instr.cbuf34.index,
-                                                  instr.cbuf34.offset};
-                    }
                }
            }

@@ -3148,72 +3127,6 @@ private:
                shader.AddLine('}');
                break;
            }
-            case OpCode::Id::LDG: {
-                // Determine number of GPRs to fill with data
-                u64 count = 1;
-
-                switch (instr.ld_g.type) {
-                case Tegra::Shader::UniformType::Single:
-                    count = 1;
-                    break;
-                case Tegra::Shader::UniformType::Double:
-                    count = 2;
-                    break;
-                case Tegra::Shader::UniformType::Quad:
-                case Tegra::Shader::UniformType::UnsignedQuad:
-                    count = 4;
-                    break;
-                default:
-                    UNREACHABLE_MSG("Unimplemented LDG size!");
-                }
-
-                auto [gpr_index, index, offset] = last_iadd;
-
-                // The last IADD might be the upper u32 of address, so instead take the one before
-                // that.
-                if (gpr_index == Register::ZeroIndex) {
-                    gpr_index = s_last_iadd.out;
-                    index = s_last_iadd.cbuf_index;
-                    offset = s_last_iadd.cbuf_offset;
-                }
-
-                const auto gpr = regs.GetRegisterAsInteger(gpr_index);
-                const auto constbuffer =
-                    regs.GetUniform(index, offset, GLSLRegister::Type::UnsignedInteger);
-
-                Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms.insert(
-                    {index, offset * 4});
-                const auto memory = fmt::format("global_memory_region_{}",
-                                                Core::System::GetInstance()
-                                                        .GPU()
-                                                        .Maxwell3D()
-                                                        .state.global_memory_uniforms.size() -
-                                                    1);
-
-                const auto immediate = std::to_string(instr.ld_g.offset_immediate.Value());
-                const auto o_register = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
-                const auto address = "( " + immediate + " + " + o_register + " )";
-                const auto base_sub = address + " - " + constbuffer;
-
-                // New scope to prevent potential conflicts
-                shader.AddLine('{');
-                ++shader.scope;
-
-                shader.AddLine("uint final_offset = " + base_sub + ";");
-                for (std::size_t out = 0; out < count; ++out) {
-                    const u64 reg_id = instr.gpr0.Value() + out;
-                    const auto this_memory =
-                        fmt::format("{}[(final_offset + {}) / 16][((final_offset + {}) / 4) % 4]",
-                                    memory, out * 4, out * 4);
-
-                    regs.SetRegisterToFloat(reg_id, 0, this_memory, 1, 1);
-                }
-
-                --shader.scope;
-                shader.AddLine('}');
-
-                break;
-            }
            default: {
                UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
            }
@@ -3488,6 +3401,10 @@ private:
                regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
                                          1);
            }
+            if (instr.generates_cc.Value() != 0) {
+                regs.SetInternalFlag(InternalFlag::ZeroFlag, predicate);
+                LOG_WARNING(HW_GPU, "FSET Condition Code is incomplete");
+            }
            break;
        }
        case OpCode::Type::IntegerSet: {
@@ -3741,6 +3658,11 @@ private:
                    regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1);
                    break;
                }
+                case Tegra::Shader::SystemVariable::Ydirection: {
+                    // Config pack's third value is Y_NEGATE's state.
+                    regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1);
+                    break;
+                }
                default: {
                    UNIMPLEMENTED_MSG("Unhandled system move: {}",
                                      static_cast<u32>(instr.sys20.Value()));
@@ -3764,11 +3686,17 @@ private:
                                     "BRA with constant buffers are not implemented");

                const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
-                UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
-                                     "BRA condition code used: {}", static_cast<u32>(cc));
-
                const u32 target = offset + instr.bra.GetBranchTarget();
-                shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
+                if (cc != Tegra::Shader::ConditionCode::T) {
+                    const std::string condition_code = regs.GetConditionCode(cc);
+                    shader.AddLine("if (" + condition_code + "){");
+                    shader.scope++;
+                    shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
+                    shader.scope--;
+                    shader.AddLine('}');
+                } else {
+                    shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
+                }
                break;
            }
            case OpCode::Id::IPA: {
@@ -4011,18 +3939,9 @@ private:
    ShaderWriter declarations;
    GLSLRegisterManager regs{shader, declarations, stage, suffix, header};

-    struct IADDReference {
-        Register out;
-        u64 cbuf_index;
-        u64 cbuf_offset;
-    };
-
-    IADDReference last_iadd{};
-    IADDReference s_last_iadd{};
-
    // Declarations
    std::set<std::string> declr_predicates;
-};
+}; // namespace OpenGL::GLShader::Decompiler

 std::string GetCommonDeclarations() {
    return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n",
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -24,8 +24,7 @@ layout (location = 0) out vec4 position;

 layout(std140) uniform vs_config {
    vec4 viewport_flip;
-    uvec4 instance_id;
-    uvec4 flip_stage;
+    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
    uvec4 alpha_test;
 };
 )";
@@ -63,7 +62,8 @@ void main() {
    out += R"(

    // Check if the flip stage is VertexB
-    if (flip_stage[0] == 1) {
+    // Config pack's second value is flip_stage
+    if (config_pack[1] == 1) {
        // Viewport can be flipped, which is unsupported by glViewport
        position.xy *= viewport_flip.xy;
    }
@@ -71,7 +71,7 @@ void main() {

    // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
    // For now, this is here to bring order in lieu of proper emulation
-    if (flip_stage[0] == 1) {
+    if (config_pack[1] == 1) {
        position.w = 1.0;
    }
 }
@@ -101,8 +101,7 @@ layout (location = 0) out vec4 position;

 layout (std140) uniform gs_config {
    vec4 viewport_flip;
-    uvec4 instance_id;
-    uvec4 flip_stage;
+    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
    uvec4 alpha_test;
 };

@@ -139,8 +138,7 @@ layout (location = 0) in vec4 position;

 layout (std140) uniform fs_config {
    vec4 viewport_flip;
-    uvec4 instance_id;
-    uvec4 flip_stage;
+    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
    uvec4 alpha_test;
 };

--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -57,8 +57,7 @@ public:
    }

    u32 GetHash() const {
-        // This needs to be unique from CachedGlobalRegionUniform::GetHash
-        return (static_cast<u32>(stage) << 12) | index;
+        return (static_cast<u32>(stage) << 16) | index;
    }

 private:
@@ -139,8 +138,7 @@ public:
    }

    u32 GetHash() const {
-        // This needs to be unique from CachedGlobalRegionUniform::GetHash
-        return (static_cast<u32>(stage) << 12) | static_cast<u32>(sampler_index);
+        return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
    }

    static std::string GetArrayName(Maxwell::ShaderStage stage) {
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -27,16 +27,18 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
    alpha_test.func = func;
    alpha_test.ref = regs.alpha_test_ref;

-    // We only assign the instance to the first component of the vector, the rest is just padding.
-    instance_id[0] = state.current_instance;
+    instance_id = state.current_instance;

    // Assign in which stage the position has to be flipped
    // (the last stage before the fragment shader).
    if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
-        flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+        flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
    } else {
-        flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
+        flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
    }
+
+    // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
+    y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f;
 }

 } // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -21,8 +21,11 @@ using Tegra::Engines::Maxwell3D;
 struct MaxwellUniformData {
    void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
    alignas(16) GLvec4 viewport_flip;
-    alignas(16) GLuvec4 instance_id;
-    alignas(16) GLuvec4 flip_stage;
+    struct alignas(16) {
+        GLuint instance_id;
+        GLuint flip_stage;
+        GLfloat y_direction;
+    };
    struct alignas(16) {
        GLuint enabled;
        GLuint func;
@@ -30,7 +33,7 @@ struct MaxwellUniformData {
        GLuint padding;
    } alpha_test;
 };
-static_assert(sizeof(MaxwellUniformData) == 64, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect");
 static_assert(sizeof(MaxwellUniformData) < 16384,
              "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");

--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -92,6 +92,13 @@ OpenGLState::OpenGLState() {

    point.size = 1;
    fragment_color_clamp.enabled = false;
+
+    polygon_offset.fill_enable = false;
+    polygon_offset.line_enable = false;
+    polygon_offset.point_enable = false;
+    polygon_offset.factor = 0.0f;
+    polygon_offset.units = 0.0f;
+    polygon_offset.clamp = 0.0f;
 }

 void OpenGLState::ApplyDefaultState() {
@@ -406,6 +413,55 @@ void OpenGLState::ApplyLogicOp() const {
    }
 }

+void OpenGLState::ApplyPolygonOffset() const {
+
+    const bool fill_enable_changed =
+        polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
+    const bool line_enable_changed =
+        polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
+    const bool point_enable_changed =
+        polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
+    const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
+    const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
+    const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
+
+    if (fill_enable_changed) {
+        if (polygon_offset.fill_enable) {
+            glEnable(GL_POLYGON_OFFSET_FILL);
+        } else {
+            glDisable(GL_POLYGON_OFFSET_FILL);
+        }
+    }
+
+    if (line_enable_changed) {
+        if (polygon_offset.line_enable) {
+            glEnable(GL_POLYGON_OFFSET_LINE);
+        } else {
+            glDisable(GL_POLYGON_OFFSET_LINE);
+        }
+    }
+
+    if (point_enable_changed) {
+        if (polygon_offset.point_enable) {
+            glEnable(GL_POLYGON_OFFSET_POINT);
+        } else {
+            glDisable(GL_POLYGON_OFFSET_POINT);
+        }
+    }
+
+    if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) &&
+        (factor_changed || units_changed || clamp_changed)) {
+
+        if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
+            glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
+        } else {
+            glPolygonOffset(polygon_offset.factor, polygon_offset.units);
+            UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
+                                 "Unimplemented Depth polygon offset clamp.");
+        }
+    }
+}
+
 void OpenGLState::ApplyTextures() const {
    for (std::size_t i = 0; i < std::size(texture_units); ++i) {
        const auto& texture_unit = texture_units[i];
@@ -532,6 +588,7 @@ void OpenGLState::Apply() const {
    ApplyLogicOp();
    ApplyTextures();
    ApplySamplers();
+    ApplyPolygonOffset();
    cur_state = *this;
 }

--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -176,6 +176,15 @@ public:
        float size; // GL_POINT_SIZE
    } point;

+    struct {
+        bool point_enable;
+        bool line_enable;
+        bool fill_enable;
+        GLfloat units;
+        GLfloat factor;
+        GLfloat clamp;
+    } polygon_offset;
+
    std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE

    OpenGLState();
@@ -226,6 +235,7 @@ private:
    void ApplyLogicOp() const;
    void ApplyTextures() const;
    void ApplySamplers() const;
+    void ApplyPolygonOffset() const;
 };

 } // namespace OpenGL
Author	SHA1	Message	Date
FernandoS27	6dea93dabc	Eliminate G8R8 conversion, native is actualy R8G8	2018-11-27 07:57:28 -04:00
bunnei	1cd40f107f	Merge pull request #1806 from ReinUsesLisp/morton-fixup morton: Fixup compiler warning	2018-11-26 23:22:59 -05:00
bunnei	11c17465d8	Merge pull request #1804 from lioncash/cast gdbstub: Silence value truncation warning within FpuWrite()	2018-11-26 23:19:40 -05:00
bunnei	852a462df3	Merge pull request #1805 from lioncash/resource svc: Implement svcCreateResourceLimit, svcGetResourceLimitCurrentValue(), svcGetResourceLimitLimitValue(), and svcSetResourceLimitLimitValue()	2018-11-26 22:59:51 -05:00
Lioncash	5905162e36	svc: Implement svcSetResourceLimitLimitValue() The opposite of the getter functions, this function sets the limit value for a particular ResourceLimit resource category, with the restriction that the new limit value must be equal to or greater than the current resource value. If this is violated, then ERR_INVALID_STATE is returned. e.g. Assume: current[Events] = 10; limit[Events] = 20; a call to this service function lowering the limit value to 10 would be fine, however, attempting to lower it to 9 in this case would cause an invalid state error.	2018-11-26 21:23:15 -05:00
Lioncash	eb5596044d	svc: Implement svcGetResourceLimitCurrentValue() This kernel service function is essentially the exact same as svcGetResourceLimitLimitValue(), with the only difference being that it retrieves the current value for a given resource category using the provided resource limit handle, rather than retrieving the limiting value of that resource limit instance. Given these are exactly the same and only differ on returned values, we can extract the existing code for svcGetResourceLimitLimitValue() to handle both values.	2018-11-26 21:23:11 -05:00
ReinUsesLisp	237c2026e9	morton: Fixup compiler warning	2018-11-26 23:22:57 -03:00
Lioncash	1d6399c222	svc: Implement svcGetResourceLimitLimitValue() This kernel service function retrieves the maximum allowable value for a provided resource category for a given resource limit instance. Given we already have the functionality added to the resource limit instance itself, it's sufficient to just hook it up. The error scenarios for this are: 1. If an invalid resource category type is provided, then ERR_INVALID_ENUM is returned. 2. If an invalid handle is provided, then ERR_INVALID_HANDLE is returned (bad thing goes in, bad thing goes out, as one would expect). If neither of the above error cases occur, then the out parameter is provided with the maximum limit value for the given category and success is returned.	2018-11-26 21:12:13 -05:00
Lioncash	4ef2af8c98	svc: Implement svcCreateResourceLimit() This function simply creates a ResourceLimit instance and attempts to create a handle for it within the current process' handle table. If the kernal fails to either create the ResourceLimit instance or create a handle for the ResourceLimit instance, it returns a failure code (OUT_OF_RESOURCE, and HANDLE_TABLE_FULL respectively). Finally, it exits by providing the output parameter with the handle value for the ResourceLimit instance and returning that it was successful. Note: We do not return OUT_OF_RESOURCE because, if yuzu runs out of available memory, then new will currently throw. We could allocate the kernel instance with std::nothrow, however this would be inconsistent with how all other kernel objects are currently allocated.	2018-11-26 21:10:31 -05:00
Lioncash	474c745502	gdbstub: Silence value truncation warning within FpuWrite() Previously this would cause an implicit truncation warning about assigning a u64 value to a u32 value without an explicit cast.	2018-11-26 19:58:23 -05:00
bunnei	1856d0ee8a	Merge pull request #1794 from Tinob/master Add support for viewport_transfom_enable register	2018-11-26 18:34:09 -05:00
bunnei	67a154e23d	Merge pull request #1723 from degasus/dirty_flags gl_rasterizer: Skip VB upload if the state is clean.	2018-11-26 18:33:22 -05:00
Marcos	cb8d51e37e	GPU States: Implement Polygon Offset. This is used in SMO all the time. (#1784 ) * GPU States: Implement Polygon Offset. This is used in SMO all the time. * Clang Format fixes. * Initialize polygon_offset in the constructor.	2018-11-26 18:31:44 -05:00
bunnei	7684f4d0cf	Merge pull request #1713 from FernandoS27/bra-cc Implemented BRA CC conditional and FSET CC Setting	2018-11-26 18:28:03 -05:00
bunnei	a41943dc55	Merge pull request #1798 from ReinUsesLisp/y-direction gl_shader_decompiler: Implement S2R's Y_DIRECTION	2018-11-26 18:25:42 -05:00
Rodolfo Bogado	415e8383ba	Limit the amount of viewports tested for state changes only to the usable ones	2018-11-25 12:18:29 -03:00
ReinUsesLisp	924e834b8f	gl_shader_decompiler: Implement S2R's Y_DIRECTION	2018-11-25 04:37:29 -03:00
FernandoS27	33afff1870	Implemented BRA CC conditional and FSET CC Setting	2018-11-24 13:25:54 -04:00
Rodolfo Bogado	13f6a603c2	Add support for viewport_transfom_enable register	2018-11-24 13:17:48 -03:00
Markus Wick	97f5c4ffd3	gl_rasterizer: Skip VB upload if the state is clean.	2018-11-17 14:28:54 +01:00