Compare commits

..

1 Commits

Author SHA1 Message Date
David Marcec
8b2dcd743c Changed UNIMPLEMENTED_MSG -> LOG_CRITICAL for missing services
Easier debugging for canary logs
2019-01-04 20:38:29 +11:00
140 changed files with 5608 additions and 9695 deletions

View File

@@ -1,27 +1,16 @@
<!--
Please keep in mind yuzu is EXPERIMENTAL SOFTWARE.
Please read the FAQ:
https://yuzu-emu.org/wiki/faq/
Please read the FAQ: https://yuzu-emu.org/wiki/faq/
THIS IS NOT A SUPPORT FORUM, FOR SUPPORT GO TO:
https://community.citra-emu.org/
When submitting an issue, please do the following:
If the FAQ does not answer your question, please go to:
https://community.citra-emu.org/
When submitting an issue, please check the following:
- You have read the above.
- You have provided the version (commit hash) of yuzu you are using.
- You have provided sufficient detail for the issue to be reproduced.
- You have provided system specs (if relevant).
- Please also provide:
- For any issues, a log file
- Provide the version (commit hash) of yuzu you are using.
- Provide sufficient detail for the issue to be reproduced.
- Provide:
- For crashes, a backtrace.
- For graphical issues, comparison screenshots with real hardware.
- For emulation inaccuracies, a test-case (if able).
-->

View File

@@ -1,12 +1,12 @@
#!/bin/bash -ex
apt-get update
apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev qtwebengine5-dev wget cmake ninja-build ccache
apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev wget cmake ninja-build ccache
cd /yuzu
mkdir build && cd build
cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -G Ninja
cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -G Ninja
ninja
ccache -s

View File

@@ -9,7 +9,7 @@ export PATH="/usr/local/opt/ccache/libexec:$PATH"
mkdir build && cd build
cmake --version
cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON
cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON
make -j4
ccache -s

View File

@@ -19,8 +19,6 @@ option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
option(YUZU_USE_BUNDLED_UNICORN "Build/Download bundled Unicorn" ON)
option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OFF)
option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)
option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)
@@ -304,7 +302,7 @@ endif()
if (ENABLE_QT)
if (YUZU_USE_BUNDLED_QT)
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
set(QT_VER qt-5.12.0-msvc2017_64)
set(QT_VER qt-5.10.0-msvc2015_64)
else()
message(FATAL_ERROR "No bundled Qt binaries for your toolchain. Disable YUZU_USE_BUNDLED_QT and provide your own.")
endif()
@@ -321,10 +319,6 @@ if (ENABLE_QT)
endif()
find_package(Qt5 REQUIRED COMPONENTS Widgets OpenGL ${QT_PREFIX_HINT})
if (YUZU_USE_QT_WEB_ENGINE)
find_package(Qt5 REQUIRED COMPONENTS WebEngineCore WebEngineWidgets ${QT_PREFIX_HINT})
endif ()
endif()
# Platform-specific library requirements

View File

@@ -5,7 +5,6 @@ function(copy_yuzu_Qt5_deps target_dir)
set(Qt5_PLATFORMS_DIR "${Qt5_DIR}/../../../plugins/platforms/")
set(Qt5_STYLES_DIR "${Qt5_DIR}/../../../plugins/styles/")
set(Qt5_IMAGEFORMATS_DIR "${Qt5_DIR}/../../../plugins/imageformats/")
set(Qt5_RESOURCES_DIR "${Qt5_DIR}/../../../resources/")
set(PLATFORMS ${DLL_DEST}platforms/)
set(STYLES ${DLL_DEST}styles/)
set(IMAGEFORMATS ${DLL_DEST}imageformats/)
@@ -18,35 +17,7 @@ function(copy_yuzu_Qt5_deps target_dir)
Qt5OpenGL$<$<CONFIG:Debug>:d>.*
Qt5Widgets$<$<CONFIG:Debug>:d>.*
)
if (YUZU_USE_QT_WEB_ENGINE)
windows_copy_files(${target_dir} ${Qt5_DLL_DIR} ${DLL_DEST}
Qt5Network$<$<CONFIG:Debug>:d>.*
Qt5Positioning$<$<CONFIG:Debug>:d>.*
Qt5PrintSupport$<$<CONFIG:Debug>:d>.*
Qt5Qml$<$<CONFIG:Debug>:d>.*
Qt5Quick$<$<CONFIG:Debug>:d>.*
Qt5QuickWidgets$<$<CONFIG:Debug>:d>.*
Qt5WebChannel$<$<CONFIG:Debug>:d>.*
Qt5WebEngine$<$<CONFIG:Debug>:d>.*
Qt5WebEngineCore$<$<CONFIG:Debug>:d>.*
Qt5WebEngineWidgets$<$<CONFIG:Debug>:d>.*
QtWebEngineProcess$<$<CONFIG:Debug>:d>.*
)
windows_copy_files(${target_dir} ${Qt5_RESOURCES_DIR} ${DLL_DEST}
qtwebengine_resources.pak
qtwebengine_devtools_resources.pak
qtwebengine_resources_100p.pak
qtwebengine_resources_200p.pak
icudtl.dat
)
endif ()
windows_copy_files(yuzu ${Qt5_PLATFORMS_DIR} ${PLATFORMS} qwindows$<$<CONFIG:Debug>:d>.*)
windows_copy_files(yuzu ${Qt5_STYLES_DIR} ${STYLES} qwindowsvistastyle$<$<CONFIG:Debug>:d>.*)
windows_copy_files(yuzu ${Qt5_IMAGEFORMATS_DIR} ${IMAGEFORMATS}
qjpeg$<$<CONFIG:Debug>:d>.*
qgif$<$<CONFIG:Debug>:d>.*
)
windows_copy_files(yuzu ${Qt5_IMAGEFORMATS_DIR} ${IMAGEFORMATS} qjpeg$<$<CONFIG:Debug>:d>.*)
endfunction(copy_yuzu_Qt5_deps)

View File

@@ -42,7 +42,7 @@ before_build:
$COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING}
if ($env:BUILD_TYPE -eq 'msvc') {
# redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0'
cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0'
} else {
C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1"
}
@@ -94,7 +94,6 @@ after_build:
Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse
rm "$RELEASE_DIST\*.exe"
Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST
Copy-Item .\license.txt -Destination $RELEASE_DIST
Copy-Item .\README.md -Destination $RELEASE_DIST
7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\*

View File

@@ -68,7 +68,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
}
// Implementation of a volume slider with a dynamic range of 60 dB
const float volume_scale_factor = volume == 0 ? 0 : std::exp(6.90775f * volume) * 0.001f;
const float volume_scale_factor{std::exp(6.90775f * volume) * 0.001f};
for (auto& sample : samples) {
sample = static_cast<s16>(sample * volume_scale_factor);
}

View File

@@ -53,8 +53,8 @@ std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out,
const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);
// Place a lower limit of 5% speed. When a game boots up, there will be
// many silence samples. These do not need to be timestretched.
// Place a lower limit of 5% speed. When a game boots up, there will be
// many silence samples. These do not need to be timestretched.
m_stretch_ratio = std::max(m_stretch_ratio, 0.05);
m_sound_touch.setTempo(m_stretch_ratio);

View File

@@ -88,15 +88,11 @@ add_library(core STATIC
frontend/applets/profile_select.h
frontend/applets/software_keyboard.cpp
frontend/applets/software_keyboard.h
frontend/applets/web_browser.cpp
frontend/applets/web_browser.h
frontend/emu_window.cpp
frontend/emu_window.h
frontend/framebuffer_layout.cpp
frontend/framebuffer_layout.h
frontend/input.h
frontend/scope_acquire_window_context.cpp
frontend/scope_acquire_window_context.h
gdbstub/gdbstub.cpp
gdbstub/gdbstub.h
hle/ipc.h
@@ -177,8 +173,6 @@ add_library(core STATIC
hle/service/am/applets/software_keyboard.h
hle/service/am/applets/stub_applet.cpp
hle/service/am/applets/stub_applet.h
hle/service/am/applets/web_browser.cpp
hle/service/am/applets/web_browser.h
hle/service/am/idle.cpp
hle/service/am/idle.h
hle/service/am/omm.cpp

View File

@@ -30,11 +30,8 @@
#include "core/hle/service/sm/sm.h"
#include "core/loader/loader.h"
#include "core/perf_stats.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
#include "frontend/applets/profile_select.h"
#include "frontend/applets/software_keyboard.h"
#include "frontend/applets/web_browser.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/gpu.h"
#include "video_core/renderer_base.h"
@@ -97,11 +94,6 @@ struct System::Impl {
CoreTiming::Init();
kernel.Initialize();
const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch());
Settings::values.custom_rtc_differential =
Settings::values.custom_rtc.value_or(current_time) - current_time;
// Create a default fs if one doesn't already exist.
if (virtual_filesystem == nullptr)
virtual_filesystem = std::make_shared<FileSys::RealVfsFilesystem>();
@@ -111,8 +103,6 @@ struct System::Impl {
profile_selector = std::make_unique<Core::Frontend::DefaultProfileSelectApplet>();
if (software_keyboard == nullptr)
software_keyboard = std::make_unique<Core::Frontend::DefaultSoftwareKeyboardApplet>();
if (web_browser == nullptr)
web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();
auto main_process = Kernel::Process::Create(kernel, "main");
kernel.MakeCurrentProcess(main_process.get());
@@ -209,11 +199,6 @@ struct System::Impl {
// Close app loader
app_loader.reset();
// Clear all applets
profile_selector.reset();
software_keyboard.reset();
web_browser.reset();
LOG_DEBUG(Core, "Shutdown OK");
}
@@ -248,7 +233,6 @@ struct System::Impl {
/// Frontend applets
std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_selector;
std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard;
std::unique_ptr<Core::Frontend::WebBrowserApplet> web_browser;
/// Service manager
std::shared_ptr<Service::SM::ServiceManager> service_manager;
@@ -443,34 +427,22 @@ std::shared_ptr<FileSys::VfsFilesystem> System::GetFilesystem() const {
return impl->virtual_filesystem;
}
void System::SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet) {
void System::SetProfileSelector(std::unique_ptr<Core::Frontend::ProfileSelectApplet> applet) {
impl->profile_selector = std::move(applet);
}
const Frontend::ProfileSelectApplet& System::GetProfileSelector() const {
const Core::Frontend::ProfileSelectApplet& System::GetProfileSelector() const {
return *impl->profile_selector;
}
void System::SetSoftwareKeyboard(std::unique_ptr<Frontend::SoftwareKeyboardApplet> applet) {
void System::SetSoftwareKeyboard(std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> applet) {
impl->software_keyboard = std::move(applet);
}
const Frontend::SoftwareKeyboardApplet& System::GetSoftwareKeyboard() const {
const Core::Frontend::SoftwareKeyboardApplet& System::GetSoftwareKeyboard() const {
return *impl->software_keyboard;
}
void System::SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet) {
impl->web_browser = std::move(applet);
}
Frontend::WebBrowserApplet& System::GetWebBrowser() {
return *impl->web_browser;
}
const Frontend::WebBrowserApplet& System::GetWebBrowser() const {
return *impl->web_browser;
}
System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) {
return impl->Init(*this, emu_window);
}

View File

@@ -11,12 +11,11 @@
#include "common/common_types.h"
#include "core/file_sys/vfs_types.h"
#include "core/hle/kernel/object.h"
#include "frontend/applets/profile_select.h"
namespace Core::Frontend {
class EmuWindow;
class ProfileSelectApplet;
class SoftwareKeyboardApplet;
class WebBrowserApplet;
} // namespace Core::Frontend
namespace FileSys {
@@ -243,18 +242,13 @@ public:
std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;
void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet);
void SetProfileSelector(std::unique_ptr<Core::Frontend::ProfileSelectApplet> applet);
const Frontend::ProfileSelectApplet& GetProfileSelector() const;
const Core::Frontend::ProfileSelectApplet& GetProfileSelector() const;
void SetSoftwareKeyboard(std::unique_ptr<Frontend::SoftwareKeyboardApplet> applet);
void SetSoftwareKeyboard(std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> applet);
const Frontend::SoftwareKeyboardApplet& GetSoftwareKeyboard() const;
void SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet);
Frontend::WebBrowserApplet& GetWebBrowser();
const Frontend::WebBrowserApplet& GetWebBrowser() const;
const Core::Frontend::SoftwareKeyboardApplet& GetSoftwareKeyboard() const;
private:
System();

View File

@@ -359,8 +359,6 @@ bool NCA::ReadPFS0Section(const NCASectionHeader& section, const NCASectionTable
dirs.push_back(std::move(npfs));
if (IsDirectoryExeFS(dirs.back()))
exefs = dirs.back();
else if (IsDirectoryLogoPartition(dirs.back()))
logo = dirs.back();
} else {
if (has_rights_id)
status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
@@ -548,8 +546,4 @@ u64 NCA::GetBaseIVFCOffset() const {
return ivfc_offset;
}
VirtualDir NCA::GetLogoPartition() const {
return logo;
}
} // namespace FileSys

View File

@@ -74,13 +74,6 @@ inline bool IsDirectoryExeFS(const std::shared_ptr<VfsDirectory>& pfs) {
return pfs->GetFile("main") != nullptr && pfs->GetFile("main.npdm") != nullptr;
}
inline bool IsDirectoryLogoPartition(const VirtualDir& pfs) {
// NintendoLogo is the static image in the top left corner while StartupMovie is the animation
// in the bottom right corner.
return pfs->GetFile("NintendoLogo.png") != nullptr &&
pfs->GetFile("StartupMovie.gif") != nullptr;
}
// An implementation of VfsDirectory that represents a Nintendo Content Archive (NCA) conatiner.
// After construction, use GetStatus to determine if the file is valid and ready to be used.
class NCA : public ReadOnlyVfsDirectory {
@@ -109,8 +102,6 @@ public:
// Returns the base ivfc offset used in BKTR patching.
u64 GetBaseIVFCOffset() const;
VirtualDir GetLogoPartition() const;
private:
bool CheckSupportedNCA(const NCAHeader& header);
bool HandlePotentialHeaderDecryption();
@@ -131,7 +122,6 @@ private:
VirtualFile romfs = nullptr;
VirtualDir exefs = nullptr;
VirtualDir logo = nullptr;
VirtualFile file;
VirtualFile bktr_base_romfs;
u64 ivfc_offset = 0;

View File

@@ -39,4 +39,27 @@ static_assert(sizeof(Entry) == 0x310, "Directory Entry struct isn't exactly 0x31
static_assert(offsetof(Entry, type) == 0x304, "Wrong offset for type in Entry.");
static_assert(offsetof(Entry, file_size) == 0x308, "Wrong offset for file_size in Entry.");
class DirectoryBackend : NonCopyable {
public:
DirectoryBackend() {}
virtual ~DirectoryBackend() {}
/**
* List files contained in the directory
* @param count Number of entries to return at once in entries
* @param entries Buffer to read data into
* @return Number of entries listed
*/
virtual u64 Read(const u64 count, Entry* entries) = 0;
/// Returns the number of entries still left to read.
virtual u64 GetEntryCount() const = 0;
/**
* Close the directory
* @return true if the directory closed correctly
*/
virtual bool Close() const = 0;
};
} // namespace FileSys

View File

@@ -119,9 +119,6 @@ VirtualDir ExtractRomFS(VirtualFile file, RomFSExtractionType type) {
VirtualDir out = std::move(root);
if (type == RomFSExtractionType::SingleDiscard)
return out->GetSubdirectories().front();
while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) {
if (out->GetSubdirectories().front()->GetName() == "data" &&
type == RomFSExtractionType::Truncated)

View File

@@ -33,9 +33,8 @@ struct IVFCHeader {
static_assert(sizeof(IVFCHeader) == 0xE0, "IVFCHeader has incorrect size.");
enum class RomFSExtractionType {
Full, // Includes data directory
Truncated, // Traverses into data directory
SingleDiscard, // Traverses into the first subdirectory of root
Full, // Includes data directory
Truncated, // Traverses into data directory
};
// Converts a RomFS binary blob to VFS Filesystem

View File

@@ -1,24 +0,0 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "core/frontend/applets/web_browser.h"
namespace Core::Frontend {
WebBrowserApplet::~WebBrowserApplet() = default;
DefaultWebBrowserApplet::~DefaultWebBrowserApplet() = default;
void DefaultWebBrowserApplet::OpenPage(std::string_view filename,
std::function<void()> unpack_romfs_callback,
std::function<void()> finished_callback) {
LOG_INFO(Service_AM,
"(STUBBED) called - No suitable web browser implementation found to open website page "
"at '{}'!",
filename);
finished_callback();
}
} // namespace Core::Frontend

View File

@@ -1,28 +0,0 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include <string_view>
namespace Core::Frontend {
class WebBrowserApplet {
public:
virtual ~WebBrowserApplet();
virtual void OpenPage(std::string_view url, std::function<void()> unpack_romfs_callback,
std::function<void()> finished_callback) = 0;
};
class DefaultWebBrowserApplet final : public WebBrowserApplet {
public:
~DefaultWebBrowserApplet() override;
void OpenPage(std::string_view url, std::function<void()> unpack_romfs_callback,
std::function<void()> finished_callback) override;
};
} // namespace Core::Frontend

View File

@@ -1,18 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/frontend/emu_window.h"
#include "core/frontend/scope_acquire_window_context.h"
namespace Core::Frontend {
ScopeAcquireWindowContext::ScopeAcquireWindowContext(Core::Frontend::EmuWindow& emu_window_)
: emu_window{emu_window_} {
emu_window.MakeCurrent();
}
ScopeAcquireWindowContext::~ScopeAcquireWindowContext() {
emu_window.DoneCurrent();
}
} // namespace Core::Frontend

View File

@@ -1,23 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Core::Frontend {
class EmuWindow;
/// Helper class to acquire/release window context within a given scope
class ScopeAcquireWindowContext : NonCopyable {
public:
explicit ScopeAcquireWindowContext(Core::Frontend::EmuWindow& window);
~ScopeAcquireWindowContext();
private:
Core::Frontend::EmuWindow& emu_window;
};
} // namespace Core::Frontend

View File

@@ -216,11 +216,6 @@ private:
/// Push ///
template <>
inline void ResponseBuilder::Push(s32 value) {
cmdbuf[index++] = static_cast<u32>(value);
}
template <>
inline void ResponseBuilder::Push(u32 value) {
cmdbuf[index++] = value;
@@ -239,22 +234,6 @@ inline void ResponseBuilder::Push(ResultCode value) {
Push<u32>(0);
}
template <>
inline void ResponseBuilder::Push(s8 value) {
PushRaw(value);
}
template <>
inline void ResponseBuilder::Push(s16 value) {
PushRaw(value);
}
template <>
inline void ResponseBuilder::Push(s64 value) {
Push(static_cast<u32>(value));
Push(static_cast<u32>(value >> 32));
}
template <>
inline void ResponseBuilder::Push(u8 value) {
PushRaw(value);

View File

@@ -36,6 +36,7 @@ enum class HandleType : u32 {
enum class ResetType {
OneShot, ///< Reset automatically on object acquisition
Sticky, ///< Never reset automatically
Pulse, ///< Reset automatically on wakeup
};
class Object : NonCopyable {

View File

@@ -46,6 +46,9 @@ ResultCode ReadableEvent::Reset() {
void ReadableEvent::WakeupAllWaitingThreads() {
WaitObject::WakeupAllWaitingThreads();
if (reset_type == ResetType::Pulse)
signaled = false;
}
} // namespace Kernel

View File

@@ -597,7 +597,6 @@ enum class BreakType : u32 {
PostNROLoad = 4,
PreNROUnload = 5,
PostNROUnload = 6,
CppException = 7,
};
struct BreakReason {
@@ -670,9 +669,6 @@ static void Break(u32 reason, u64 info1, u64 info2) {
"Signalling debugger, Unloaded an NRO at 0x{:016X} with size 0x{:016X}", info1,
info2);
break;
case BreakType::CppException:
LOG_CRITICAL(Debug_Emulated, "Signalling debugger. Uncaught C++ exception encountered.");
break;
default:
LOG_WARNING(
Debug_Emulated,

View File

@@ -68,6 +68,9 @@ void Timer::Clear() {
void Timer::WakeupAllWaitingThreads() {
WaitObject::WakeupAllWaitingThreads();
if (reset_type == ResetType::Pulse)
signaled = false;
}
void Timer::Signal(int cycles_late) {

View File

@@ -23,7 +23,6 @@
#include "core/hle/service/am/applets/profile_select.h"
#include "core/hle/service/am/applets/software_keyboard.h"
#include "core/hle/service/am/applets/stub_applet.h"
#include "core/hle/service/am/applets/web_browser.h"
#include "core/hle/service/am/idle.h"
#include "core/hle/service/am/omm.h"
#include "core/hle/service/am/spsm.h"
@@ -39,13 +38,11 @@
namespace Service::AM {
constexpr ResultCode ERR_NO_DATA_IN_CHANNEL{ErrorModule::AM, 0x2};
constexpr ResultCode ERR_NO_MESSAGES{ErrorModule::AM, 0x3};
constexpr ResultCode ERR_SIZE_OUT_OF_BOUNDS{ErrorModule::AM, 0x1F7};
enum class AppletId : u32 {
ProfileSelect = 0x10,
SoftwareKeyboard = 0x11,
LibAppletOff = 0x17,
};
constexpr u32 POP_LAUNCH_PARAMETER_MAGIC = 0xC79497CA;
@@ -463,17 +460,9 @@ void ICommonStateGetter::GetEventHandle(Kernel::HLERequestContext& ctx) {
void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_AM, "called");
const auto message = msg_queue->PopMessage();
IPC::ResponseBuilder rb{ctx, 3};
if (message == AppletMessageQueue::AppletMessage::NoMessage) {
LOG_ERROR(Service_AM, "Message queue is empty");
rb.Push(ERR_NO_MESSAGES);
rb.PushEnum<AppletMessageQueue::AppletMessage>(message);
return;
}
rb.Push(RESULT_SUCCESS);
rb.PushEnum<AppletMessageQueue::AppletMessage>(message);
rb.PushEnum<AppletMessageQueue::AppletMessage>(msg_queue->PopMessage());
}
void ICommonStateGetter::GetCurrentFocusState(Kernel::HLERequestContext& ctx) {
@@ -732,9 +721,9 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const u64 offset{rp.Pop<u64>()};
const std::vector<u8> data{ctx.ReadBuffer()};
LOG_DEBUG(Service_AM, "called, offset={}", offset);
LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size());
const std::vector<u8> data{ctx.ReadBuffer()};
if (data.size() > backing.buffer.size() - offset) {
LOG_ERROR(Service_AM,
@@ -755,9 +744,9 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const u64 offset{rp.Pop<u64>()};
const std::size_t size{ctx.GetWriteBufferSize()};
LOG_DEBUG(Service_AM, "called, offset={}", offset);
LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
const std::size_t size{ctx.GetWriteBufferSize()};
if (size > backing.buffer.size() - offset) {
LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}",
@@ -793,8 +782,6 @@ static std::shared_ptr<Applets::Applet> GetAppletFromId(AppletId id) {
return std::make_shared<Applets::ProfileSelect>();
case AppletId::SoftwareKeyboard:
return std::make_shared<Applets::SoftwareKeyboard>();
case AppletId::LibAppletOff:
return std::make_shared<Applets::WebBrowser>();
default:
LOG_ERROR(Service_AM, "Unimplemented AppletId [{:08X}]! -- Falling back to stub!",
static_cast<u32>(id));

View File

@@ -7,7 +7,7 @@
#include "common/assert.h"
#include "common/string_util.h"
#include "core/core.h"
#include "core/frontend/applets/profile_select.h"
#include "core/frontend/applets/software_keyboard.h"
#include "core/hle/service/am/am.h"
#include "core/hle/service/am/applets/profile_select.h"

View File

@@ -1,190 +0,0 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <cstring>
#include <vector>
#include "common/assert.h"
#include "common/common_funcs.h"
#include "common/common_paths.h"
#include "common/file_util.h"
#include "common/hex_util.h"
#include "common/logging/log.h"
#include "common/string_util.h"
#include "core/core.h"
#include "core/file_sys/content_archive.h"
#include "core/file_sys/mode.h"
#include "core/file_sys/nca_metadata.h"
#include "core/file_sys/registered_cache.h"
#include "core/file_sys/romfs.h"
#include "core/file_sys/vfs_types.h"
#include "core/frontend/applets/web_browser.h"
#include "core/hle/kernel/process.h"
#include "core/hle/service/am/applets/web_browser.h"
#include "core/hle/service/filesystem/filesystem.h"
#include "core/loader/loader.h"
namespace Service::AM::Applets {
// TODO(DarkLordZach): There are other arguments in the WebBuffer structure that are currently not
// parsed, for example footer mode and left stick mode. Some of these are not particularly relevant,
// but some may be worth an implementation.
constexpr u16 WEB_ARGUMENT_URL_TYPE = 0x6;
struct WebBufferHeader {
u16 count;
INSERT_PADDING_BYTES(6);
};
static_assert(sizeof(WebBufferHeader) == 0x8, "WebBufferHeader has incorrect size.");
struct WebArgumentHeader {
u16 type;
u16 size;
u32 offset;
};
static_assert(sizeof(WebArgumentHeader) == 0x8, "WebArgumentHeader has incorrect size.");
struct WebArgumentResult {
u32 result_code;
std::array<char, 0x1000> last_url;
u64 last_url_size;
};
static_assert(sizeof(WebArgumentResult) == 0x1010, "WebArgumentResult has incorrect size.");
static std::vector<u8> GetArgumentDataForTagType(const std::vector<u8>& data, u16 type) {
WebBufferHeader header;
ASSERT(sizeof(WebBufferHeader) <= data.size());
std::memcpy(&header, data.data(), sizeof(WebBufferHeader));
u64 offset = sizeof(WebBufferHeader);
for (u16 i = 0; i < header.count; ++i) {
WebArgumentHeader arg;
ASSERT(offset + sizeof(WebArgumentHeader) <= data.size());
std::memcpy(&arg, data.data() + offset, sizeof(WebArgumentHeader));
offset += sizeof(WebArgumentHeader);
if (arg.type == type) {
std::vector<u8> out(arg.size);
offset += arg.offset;
ASSERT(offset + arg.size <= data.size());
std::memcpy(out.data(), data.data() + offset, out.size());
return out;
}
offset += arg.offset + arg.size;
}
return {};
}
static FileSys::VirtualFile GetManualRomFS() {
auto& loader{Core::System::GetInstance().GetAppLoader()};
FileSys::VirtualFile out;
if (loader.ReadManualRomFS(out) == Loader::ResultStatus::Success)
return out;
const auto& installed{FileSystem::GetUnionContents()};
const auto res = installed.GetEntry(Core::System::GetInstance().CurrentProcess()->GetTitleID(),
FileSys::ContentRecordType::Manual);
if (res != nullptr)
return res->GetRomFS();
return nullptr;
}
WebBrowser::WebBrowser() = default;
WebBrowser::~WebBrowser() = default;
void WebBrowser::Initialize() {
Applet::Initialize();
complete = false;
temporary_dir.clear();
filename.clear();
status = RESULT_SUCCESS;
const auto web_arg_storage = broker.PopNormalDataToApplet();
ASSERT(web_arg_storage != nullptr);
const auto& web_arg = web_arg_storage->GetData();
const auto url_data = GetArgumentDataForTagType(web_arg, WEB_ARGUMENT_URL_TYPE);
filename = Common::StringFromFixedZeroTerminatedBuffer(
reinterpret_cast<const char*>(url_data.data()), url_data.size());
temporary_dir = FileUtil::SanitizePath(FileUtil::GetUserPath(FileUtil::UserPath::CacheDir) +
"web_applet_manual",
FileUtil::DirectorySeparator::PlatformDefault);
FileUtil::DeleteDirRecursively(temporary_dir);
manual_romfs = GetManualRomFS();
if (manual_romfs == nullptr) {
status = ResultCode(-1);
LOG_ERROR(Service_AM, "Failed to find manual for current process!");
}
filename =
FileUtil::SanitizePath(temporary_dir + DIR_SEP + "html-document" + DIR_SEP + filename,
FileUtil::DirectorySeparator::PlatformDefault);
}
bool WebBrowser::TransactionComplete() const {
return complete;
}
ResultCode WebBrowser::GetStatus() const {
return status;
}
void WebBrowser::ExecuteInteractive() {
UNIMPLEMENTED_MSG("Unexpected interactive data recieved!");
}
void WebBrowser::Execute() {
if (complete)
return;
if (status != RESULT_SUCCESS) {
complete = true;
return;
}
auto& frontend{Core::System::GetInstance().GetWebBrowser()};
frontend.OpenPage(filename, [this] { UnpackRomFS(); }, [this] { Finalize(); });
}
void WebBrowser::UnpackRomFS() {
if (unpacked)
return;
ASSERT(manual_romfs != nullptr);
const auto dir =
FileSys::ExtractRomFS(manual_romfs, FileSys::RomFSExtractionType::SingleDiscard);
const auto& vfs{Core::System::GetInstance().GetFilesystem()};
const auto temp_dir = vfs->CreateDirectory(temporary_dir, FileSys::Mode::ReadWrite);
FileSys::VfsRawCopyD(dir, temp_dir);
unpacked = true;
}
void WebBrowser::Finalize() {
complete = true;
WebArgumentResult out{};
out.result_code = 0;
out.last_url_size = 0;
std::vector<u8> data(sizeof(WebArgumentResult));
std::memcpy(data.data(), &out, sizeof(WebArgumentResult));
broker.PushNormalDataFromApplet(IStorage{data});
broker.SignalStateChanged();
FileUtil::DeleteDirRecursively(temporary_dir);
}
} // namespace Service::AM::Applets

View File

@@ -1,44 +0,0 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "core/file_sys/vfs_types.h"
#include "core/hle/service/am/am.h"
#include "core/hle/service/am/applets/applets.h"
namespace Service::AM::Applets {
class WebBrowser final : public Applet {
public:
WebBrowser();
~WebBrowser() override;
void Initialize() override;
bool TransactionComplete() const override;
ResultCode GetStatus() const override;
void ExecuteInteractive() override;
void Execute() override;
// Callback to be fired when the frontend needs the manual RomFS unpacked to temporary
// directory. This is a blocking call and may take a while as some manuals can be up to 100MB in
// size. Attempting to access files at filename before invocation is likely to not work.
void UnpackRomFS();
// Callback to be fired when the frontend is finished browsing. This will delete the temporary
// manual RomFS extracted files, so ensure this is only called at actual finalization.
void Finalize();
private:
bool complete = false;
bool unpacked = false;
ResultCode status = RESULT_SUCCESS;
FileSys::VirtualFile manual_romfs;
std::string temporary_dir;
std::string filename;
};
} // namespace Service::AM::Applets

View File

@@ -410,8 +410,6 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
libnx_entry.pad.pad_states.raw = pad_state.pad_states.raw;
libnx_entry.pad.l_stick = pad_state.l_stick;
libnx_entry.pad.r_stick = pad_state.r_stick;
press_state |= static_cast<u32>(pad_state.pad_states.raw);
}
std::memcpy(data + NPAD_OFFSET, shared_memory_entries.data(),
shared_memory_entries.size() * sizeof(NPadEntry));
@@ -638,10 +636,6 @@ void Controller_NPad::ClearAllControllers() {
});
}
u32 Controller_NPad::GetAndResetPressState() {
return std::exchange(press_state, 0);
}
bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const {
const bool support_handheld =
std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(), NPAD_HANDHELD) !=

View File

@@ -124,10 +124,6 @@ public:
void ConnectAllDisconnectedControllers();
void ClearAllControllers();
// Logical OR for all buttons presses on all controllers
// Specifically for cheat engine and other features.
u32 GetAndResetPressState();
static std::size_t NPadIdToIndex(u32 npad_id);
static u32 IndexToNPad(std::size_t index);
@@ -296,8 +292,6 @@ private:
bool is_connected;
};
u32 press_state{};
NPadType style{};
std::array<NPadEntry, 10> shared_memory_entries{};
std::array<

File diff suppressed because it is too large Load Diff

View File

@@ -4,122 +4,12 @@
#pragma once
#include "controllers/controller_base.h"
#include "core/hle/service/service.h"
namespace CoreTiming {
struct EventType;
}
namespace Kernel {
class SharedMemory;
}
namespace SM {
class ServiceManager;
}
namespace Service::HID {
enum class HidController : std::size_t {
DebugPad,
Touchscreen,
Mouse,
Keyboard,
XPad,
Unknown1,
Unknown2,
Unknown3,
SixAxisSensor,
NPad,
Gesture,
MaxControllers,
};
class IAppletResource final : public ServiceFramework<IAppletResource> {
public:
IAppletResource();
~IAppletResource() override;
void ActivateController(HidController controller);
void DeactivateController(HidController controller);
template <typename T>
T& GetController(HidController controller) {
return static_cast<T&>(*controllers[static_cast<size_t>(controller)]);
}
template <typename T>
const T& GetController(HidController controller) const {
return static_cast<T&>(*controllers[static_cast<size_t>(controller)]);
}
private:
template <typename T>
void MakeController(HidController controller) {
controllers[static_cast<std::size_t>(controller)] = std::make_unique<T>();
}
void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx);
void UpdateControllers(u64 userdata, int cycles_late);
Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
CoreTiming::EventType* pad_update_event;
std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)>
controllers{};
};
class Hid final : public ServiceFramework<Hid> {
public:
Hid();
~Hid() override;
std::shared_ptr<IAppletResource> GetAppletResource();
private:
void CreateAppletResource(Kernel::HLERequestContext& ctx);
void ActivateXpad(Kernel::HLERequestContext& ctx);
void ActivateDebugPad(Kernel::HLERequestContext& ctx);
void ActivateTouchScreen(Kernel::HLERequestContext& ctx);
void ActivateMouse(Kernel::HLERequestContext& ctx);
void ActivateKeyboard(Kernel::HLERequestContext& ctx);
void ActivateGesture(Kernel::HLERequestContext& ctx);
void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx);
void StartSixAxisSensor(Kernel::HLERequestContext& ctx);
void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx);
void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
void SetSupportedNpadIdType(Kernel::HLERequestContext& ctx);
void ActivateNpad(Kernel::HLERequestContext& ctx);
void AcquireNpadStyleSetUpdateEventHandle(Kernel::HLERequestContext& ctx);
void DisconnectNpad(Kernel::HLERequestContext& ctx);
void GetPlayerLedPattern(Kernel::HLERequestContext& ctx);
void SetNpadJoyHoldType(Kernel::HLERequestContext& ctx);
void GetNpadJoyHoldType(Kernel::HLERequestContext& ctx);
void SetNpadJoyAssignmentModeSingleByDefault(Kernel::HLERequestContext& ctx);
void BeginPermitVibrationSession(Kernel::HLERequestContext& ctx);
void EndPermitVibrationSession(Kernel::HLERequestContext& ctx);
void SendVibrationValue(Kernel::HLERequestContext& ctx);
void SendVibrationValues(Kernel::HLERequestContext& ctx);
void GetActualVibrationValue(Kernel::HLERequestContext& ctx);
void SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx);
void MergeSingleJoyAsDualJoy(Kernel::HLERequestContext& ctx);
void SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx);
void GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx);
void CreateActiveVibrationDeviceList(Kernel::HLERequestContext& ctx);
void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
std::shared_ptr<IAppletResource> applet_resource;
};
/// Reload input devices. Used when input configuration changed
void ReloadInputDevices();

View File

@@ -20,8 +20,7 @@ namespace Service::NFP {
namespace ErrCodes {
constexpr ResultCode ERR_TAG_FAILED(ErrorModule::NFP,
-1); // TODO(ogniK): Find the actual error code
constexpr ResultCode ERR_NO_APPLICATION_AREA(ErrorModule::NFP, 152);
} // namespace ErrCodes
}
Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
: ServiceFramework(name), module(std::move(module)) {
@@ -293,9 +292,10 @@ private:
}
void OpenApplicationArea(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_NFP, "(STUBBED) called");
LOG_DEBUG(Service_NFP, "called");
// We don't need to worry about this since we can just open the file
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ErrCodes::ERR_NO_APPLICATION_AREA);
rb.Push(RESULT_SUCCESS);
}
void GetApplicationAreaSize(Kernel::HLERequestContext& ctx) {

View File

@@ -13,7 +13,7 @@ public:
explicit BootMode() : ServiceFramework{"pm:bm"} {
static const FunctionInfo functions[] = {
{0, &BootMode::GetBootMode, "GetBootMode"},
{1, &BootMode::SetMaintenanceBoot, "SetMaintenanceBoot"},
{1, nullptr, "SetMaintenanceBoot"},
};
RegisterHandlers(functions);
}
@@ -24,19 +24,8 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.PushEnum(boot_mode);
rb.Push<u32>(static_cast<u32>(SystemBootMode::Normal)); // Normal boot mode
}
void SetMaintenanceBoot(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_PM, "called");
boot_mode = SystemBootMode::Maintenance;
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
SystemBootMode boot_mode = SystemBootMode::Normal;
};
class DebugMonitor final : public ServiceFramework<DebugMonitor> {

View File

@@ -9,12 +9,7 @@ class ServiceManager;
}
namespace Service::PM {
enum class SystemBootMode {
Normal,
Maintenance,
};
enum class SystemBootMode : u32 { Normal = 0, Maintenance = 1 };
/// Registers all PM services with the specified service manager.
void InstallInterfaces(SM::ServiceManager& service_manager);

View File

@@ -148,7 +148,7 @@ void ServiceFrameworkBase::ReportUnimplementedFunction(Kernel::HLERequestContext
}
buf.push_back('}');
UNIMPLEMENTED_MSG("Unknown / unimplemented {}", fmt::to_string(buf));
LOG_CRITICAL(Service, "Unknown / unimplemented {}", fmt::to_string(buf));
}
void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {

View File

@@ -12,16 +12,9 @@
#include "core/hle/kernel/client_session.h"
#include "core/hle/service/time/interface.h"
#include "core/hle/service/time/time.h"
#include "core/settings.h"
namespace Service::Time {
static std::chrono::seconds GetSecondsSinceEpoch() {
return std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch()) +
Settings::values.custom_rtc_differential;
}
static void PosixToCalendar(u64 posix_time, CalendarTime& calendar_time,
CalendarAdditionalInfo& additional_info,
[[maybe_unused]] const TimeZoneRule& /*rule*/) {
@@ -75,7 +68,9 @@ public:
private:
void GetCurrentTime(Kernel::HLERequestContext& ctx) {
const s64 time_since_epoch{GetSecondsSinceEpoch().count()};
const s64 time_since_epoch{std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch())
.count()};
LOG_DEBUG(Service_Time, "called");
IPC::ResponseBuilder rb{ctx, 4};
@@ -271,7 +266,10 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto initial_type = rp.PopRaw<u8>();
const s64 time_since_epoch{GetSecondsSinceEpoch().count()};
const s64 time_since_epoch{std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch())
.count()};
const std::time_t time(time_since_epoch);
const std::tm* tm = std::localtime(&time);
if (tm == nullptr) {

View File

@@ -32,9 +32,6 @@
namespace Service::VI {
constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
struct DisplayInfo {
/// The name of this particular display.
char display_name[0x40]{"Default"};
@@ -704,14 +701,13 @@ private:
rb.Push(RESULT_SUCCESS);
}
// This function currently does nothing but return a success error code in
// the vi library itself, so do the same thing, but log out the passed in values.
void SetLayerVisibility(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const u64 layer_id = rp.Pop<u64>();
const bool visibility = rp.Pop<bool>();
LOG_DEBUG(Service_VI, "called, layer_id=0x{:08X}, visibility={}", layer_id, visibility);
LOG_WARNING(Service_VI, "(STUBBED) called, layer_id=0x{:08X}, visibility={}", layer_id,
visibility);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
@@ -878,22 +874,6 @@ public:
~IApplicationDisplayService() = default;
private:
enum class ConvertedScaleMode : u64 {
Freeze = 0,
ScaleToWindow = 1,
ScaleAndCrop = 2,
None = 3,
PreserveAspectRatio = 4,
};
enum class NintendoScaleMode : u32 {
None = 0,
Freeze = 1,
ScaleToWindow = 2,
ScaleAndCrop = 3,
PreserveAspectRatio = 4,
};
void GetRelayService(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_VI, "(STUBBED) called");
@@ -978,43 +958,33 @@ private:
IPC::RequestParser rp{ctx};
const u64 display_id = rp.Pop<u64>();
LOG_DEBUG(Service_VI, "called. display_id=0x{:016X}", display_id);
LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id);
IPC::ResponseBuilder rb{ctx, 6};
rb.Push(RESULT_SUCCESS);
// This only returns the fixed values of 1280x720 and makes no distinguishing
// between docked and undocked dimensions. We take the liberty of applying
// the resolution scaling factor here.
rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth) *
static_cast<u32>(Settings::values.resolution_factor));
rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight) *
static_cast<u32>(Settings::values.resolution_factor));
if (Settings::values.use_docked_mode) {
rb.Push(static_cast<u64>(DisplayResolution::DockedWidth) *
static_cast<u32>(Settings::values.resolution_factor));
rb.Push(static_cast<u64>(DisplayResolution::DockedHeight) *
static_cast<u32>(Settings::values.resolution_factor));
} else {
rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth) *
static_cast<u32>(Settings::values.resolution_factor));
rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight) *
static_cast<u32>(Settings::values.resolution_factor));
}
}
void SetLayerScalingMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto scaling_mode = rp.PopEnum<NintendoScaleMode>();
const u32 scaling_mode = rp.Pop<u32>();
const u64 unknown = rp.Pop<u64>();
LOG_DEBUG(Service_VI, "called. scaling_mode=0x{:08X}, unknown=0x{:016X}",
static_cast<u32>(scaling_mode), unknown);
LOG_WARNING(Service_VI, "(STUBBED) called. scaling_mode=0x{:08X}, unknown=0x{:016X}",
scaling_mode, unknown);
IPC::ResponseBuilder rb{ctx, 2};
if (scaling_mode > NintendoScaleMode::PreserveAspectRatio) {
LOG_ERROR(Service_VI, "Invalid scaling mode provided.");
rb.Push(ERR_OPERATION_FAILED);
return;
}
if (scaling_mode != NintendoScaleMode::ScaleToWindow &&
scaling_mode != NintendoScaleMode::PreserveAspectRatio) {
LOG_ERROR(Service_VI, "Unsupported scaling mode supplied.");
rb.Push(ERR_UNSUPPORTED);
return;
}
rb.Push(RESULT_SUCCESS);
}
@@ -1095,37 +1065,51 @@ private:
rb.PushCopyObjects(vsync_event);
}
enum class ConvertedScaleMode : u64 {
None = 0, // VI seems to name this as "Unknown" but lots of games pass it, assume it's no
// scaling/default
Freeze = 1,
ScaleToWindow = 2,
Crop = 3,
NoCrop = 4,
};
// This struct is different, currently it's 1:1 but this might change in the future.
enum class NintendoScaleMode : u32 {
None = 0,
Freeze = 1,
ScaleToWindow = 2,
Crop = 3,
NoCrop = 4,
};
void ConvertScalingMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto mode = rp.PopEnum<NintendoScaleMode>();
auto mode = rp.PopEnum<NintendoScaleMode>();
LOG_DEBUG(Service_VI, "called mode={}", static_cast<u32>(mode));
const auto converted_mode = ConvertScalingModeImpl(mode);
if (converted_mode.Succeeded()) {
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
rb.PushEnum(*converted_mode);
} else {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(converted_mode.Code());
}
}
static ResultVal<ConvertedScaleMode> ConvertScalingModeImpl(NintendoScaleMode mode) {
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
switch (mode) {
case NintendoScaleMode::None:
return MakeResult(ConvertedScaleMode::None);
rb.PushEnum(ConvertedScaleMode::None);
break;
case NintendoScaleMode::Freeze:
return MakeResult(ConvertedScaleMode::Freeze);
rb.PushEnum(ConvertedScaleMode::Freeze);
break;
case NintendoScaleMode::ScaleToWindow:
return MakeResult(ConvertedScaleMode::ScaleToWindow);
case NintendoScaleMode::ScaleAndCrop:
return MakeResult(ConvertedScaleMode::ScaleAndCrop);
case NintendoScaleMode::PreserveAspectRatio:
return MakeResult(ConvertedScaleMode::PreserveAspectRatio);
rb.PushEnum(ConvertedScaleMode::ScaleToWindow);
break;
case NintendoScaleMode::Crop:
rb.PushEnum(ConvertedScaleMode::Crop);
break;
case NintendoScaleMode::NoCrop:
rb.PushEnum(ConvertedScaleMode::NoCrop);
break;
default:
return ERR_OPERATION_FAILED;
UNIMPLEMENTED_MSG("Unknown scaling mode {}", static_cast<u32>(mode));
rb.PushEnum(ConvertedScaleMode::None);
break;
}
}

View File

@@ -178,8 +178,6 @@ public:
/**
* Get the banner (typically banner section) of the application
* In the context of NX, this is the animation that displays in the bottom right of the screen
* when a game boots. Stored in GIF format.
* @param buffer Reference to buffer to store data
* @return ResultStatus result of function
*/
@@ -189,8 +187,6 @@ public:
/**
* Get the logo (typically logo section) of the application
* In the context of NX, this is the static image that displays in the top left of the screen
* when a game boots. Stored in JPEG format.
* @param buffer Reference to buffer to store data
* @return ResultStatus result of function
*/
@@ -263,15 +259,6 @@ public:
return ResultStatus::ErrorNotImplemented;
}
/**
* Get the RomFS of the manual of the application
* @param file The raw manual RomFS of the game
* @return ResultStatus result of function
*/
virtual ResultStatus ReadManualRomFS(FileSys::VirtualFile& file) {
return ResultStatus::ErrorNotImplemented;
}
protected:
FileSys::VirtualFile file;
bool is_loaded = false;

View File

@@ -79,13 +79,4 @@ u64 AppLoader_NAX::ReadRomFSIVFCOffset() const {
ResultStatus AppLoader_NAX::ReadProgramId(u64& out_program_id) {
return nca_loader->ReadProgramId(out_program_id);
}
ResultStatus AppLoader_NAX::ReadBanner(std::vector<u8>& buffer) {
return nca_loader->ReadBanner(buffer);
}
ResultStatus AppLoader_NAX::ReadLogo(std::vector<u8>& buffer) {
return nca_loader->ReadLogo(buffer);
}
} // namespace Loader

View File

@@ -39,9 +39,6 @@ public:
u64 ReadRomFSIVFCOffset() const override;
ResultStatus ReadProgramId(u64& out_program_id) override;
ResultStatus ReadBanner(std::vector<u8>& buffer) override;
ResultStatus ReadLogo(std::vector<u8>& buffer) override;
private:
std::unique_ptr<FileSys::NAX> nax;
std::unique_ptr<AppLoader_NCA> nca_loader;

View File

@@ -84,23 +84,4 @@ ResultStatus AppLoader_NCA::ReadProgramId(u64& out_program_id) {
return ResultStatus::Success;
}
ResultStatus AppLoader_NCA::ReadBanner(std::vector<u8>& buffer) {
if (nca == nullptr || nca->GetStatus() != ResultStatus::Success)
return ResultStatus::ErrorNotInitialized;
const auto logo = nca->GetLogoPartition();
if (logo == nullptr)
return ResultStatus::ErrorNoIcon;
buffer = logo->GetFile("StartupMovie.gif")->ReadAllBytes();
return ResultStatus::Success;
}
ResultStatus AppLoader_NCA::ReadLogo(std::vector<u8>& buffer) {
if (nca == nullptr || nca->GetStatus() != ResultStatus::Success)
return ResultStatus::ErrorNotInitialized;
const auto logo = nca->GetLogoPartition();
if (logo == nullptr)
return ResultStatus::ErrorNoIcon;
buffer = logo->GetFile("NintendoLogo.png")->ReadAllBytes();
return ResultStatus::Success;
}
} // namespace Loader

View File

@@ -39,9 +39,6 @@ public:
u64 ReadRomFSIVFCOffset() const override;
ResultStatus ReadProgramId(u64& out_program_id) override;
ResultStatus ReadBanner(std::vector<u8>& buffer) override;
ResultStatus ReadLogo(std::vector<u8>& buffer) override;
private:
std::unique_ptr<FileSys::NCA> nca;
std::unique_ptr<AppLoader_DeconstructedRomDirectory> directory_loader;

View File

@@ -158,21 +158,4 @@ ResultStatus AppLoader_NSP::ReadControlData(FileSys::NACP& nacp) {
nacp = *nacp_file;
return ResultStatus::Success;
}
ResultStatus AppLoader_NSP::ReadManualRomFS(FileSys::VirtualFile& file) {
const auto nca = nsp->GetNCA(nsp->GetProgramTitleID(), FileSys::ContentRecordType::Manual);
if (nsp->GetStatus() != ResultStatus::Success || nca == nullptr)
return ResultStatus::ErrorNoRomFS;
file = nca->GetRomFS();
return file == nullptr ? ResultStatus::ErrorNoRomFS : ResultStatus::Success;
}
ResultStatus AppLoader_NSP::ReadBanner(std::vector<u8>& buffer) {
return secondary_loader->ReadBanner(buffer);
}
ResultStatus AppLoader_NSP::ReadLogo(std::vector<u8>& buffer) {
return secondary_loader->ReadLogo(buffer);
}
} // namespace Loader

View File

@@ -44,10 +44,6 @@ public:
ResultStatus ReadIcon(std::vector<u8>& buffer) override;
ResultStatus ReadTitle(std::string& title) override;
ResultStatus ReadControlData(FileSys::NACP& nacp) override;
ResultStatus ReadManualRomFS(FileSys::VirtualFile& file) override;
ResultStatus ReadBanner(std::vector<u8>& buffer) override;
ResultStatus ReadLogo(std::vector<u8>& buffer) override;
private:
std::unique_ptr<FileSys::NSP> nsp;

View File

@@ -128,21 +128,4 @@ ResultStatus AppLoader_XCI::ReadControlData(FileSys::NACP& control) {
return ResultStatus::Success;
}
ResultStatus AppLoader_XCI::ReadManualRomFS(FileSys::VirtualFile& file) {
const auto nca = xci->GetSecurePartitionNSP()->GetNCA(xci->GetProgramTitleID(),
FileSys::ContentRecordType::Manual);
if (xci->GetStatus() != ResultStatus::Success || nca == nullptr)
return ResultStatus::ErrorXCIMissingPartition;
file = nca->GetRomFS();
return file == nullptr ? ResultStatus::ErrorNoRomFS : ResultStatus::Success;
}
ResultStatus AppLoader_XCI::ReadBanner(std::vector<u8>& buffer) {
return nca_loader->ReadBanner(buffer);
}
ResultStatus AppLoader_XCI::ReadLogo(std::vector<u8>& buffer) {
return nca_loader->ReadLogo(buffer);
}
} // namespace Loader

View File

@@ -44,10 +44,6 @@ public:
ResultStatus ReadIcon(std::vector<u8>& buffer) override;
ResultStatus ReadTitle(std::string& title) override;
ResultStatus ReadControlData(FileSys::NACP& control) override;
ResultStatus ReadManualRomFS(FileSys::VirtualFile& file) override;
ResultStatus ReadBanner(std::vector<u8>& buffer) override;
ResultStatus ReadLogo(std::vector<u8>& buffer) override;
private:
std::unique_ptr<FileSys::XCI> xci;

View File

@@ -74,33 +74,4 @@ void Apply() {
Service::HID::ReloadInputDevices();
}
template <typename T>
void LogSetting(const std::string& name, const T& value) {
LOG_INFO(Config, "{}: {}", name, value);
}
void LogSettings() {
LOG_INFO(Config, "yuzu Configuration:");
LogSetting("System_UseDockedMode", Settings::values.use_docked_mode);
LogSetting("System_EnableNfc", Settings::values.enable_nfc);
LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
LogSetting("System_CurrentUser", Settings::values.current_user);
LogSetting("System_LanguageIndex", Settings::values.language_index);
LogSetting("Core_UseCpuJit", Settings::values.use_cpu_jit);
LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
LogSetting("Audio_OutputEngine", Settings::values.sink_id);
LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
LogSetting("DataStorage_UseVirtualSd", Settings::values.use_virtual_sd);
LogSetting("DataStorage_NandDir", Settings::values.nand_dir);
LogSetting("DataStorage_SdmcDir", Settings::values.sdmc_dir);
LogSetting("Debugging_UseGdbstub", Settings::values.use_gdbstub);
LogSetting("Debugging_GdbstubPort", Settings::values.gdbstub_port);
LogSetting("Debugging_ProgramArgs", Settings::values.program_args);
}
} // namespace Settings

View File

@@ -6,7 +6,6 @@
#include <array>
#include <atomic>
#include <chrono>
#include <map>
#include <optional>
#include <string>
@@ -351,11 +350,6 @@ struct Values {
bool use_docked_mode;
bool enable_nfc;
std::optional<u32> rng_seed;
// Measured in seconds since epoch
std::optional<std::chrono::seconds> custom_rtc;
// Set on game boot, reset on stop. Seconds difference between current time and `custom_rtc`
std::chrono::seconds custom_rtc_differential;
s32 current_user;
s32 language_index;
@@ -425,5 +419,4 @@ struct Values {
} extern values;
void Apply();
void LogSettings();
} // namespace Settings

View File

@@ -30,8 +30,6 @@ add_library(video_core STATIC
renderer_base.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_global_cache.cpp
renderer_opengl/gl_global_cache.h
renderer_opengl/gl_primitive_assembler.cpp
renderer_opengl/gl_primitive_assembler.h
renderer_opengl/gl_rasterizer.cpp
@@ -59,35 +57,6 @@ add_library(video_core STATIC
renderer_opengl/renderer_opengl.h
renderer_opengl/utils.cpp
renderer_opengl/utils.h
shader/decode/arithmetic.cpp
shader/decode/arithmetic_immediate.cpp
shader/decode/bfe.cpp
shader/decode/bfi.cpp
shader/decode/shift.cpp
shader/decode/arithmetic_integer.cpp
shader/decode/arithmetic_integer_immediate.cpp
shader/decode/arithmetic_half.cpp
shader/decode/arithmetic_half_immediate.cpp
shader/decode/ffma.cpp
shader/decode/hfma2.cpp
shader/decode/conversion.cpp
shader/decode/memory.cpp
shader/decode/float_set_predicate.cpp
shader/decode/integer_set_predicate.cpp
shader/decode/half_set_predicate.cpp
shader/decode/predicate_set_register.cpp
shader/decode/predicate_set_predicate.cpp
shader/decode/register_set_predicate.cpp
shader/decode/float_set.cpp
shader/decode/integer_set.cpp
shader/decode/half_set.cpp
shader/decode/video.cpp
shader/decode/xmad.cpp
shader/decode/other.cpp
shader/decode.cpp
shader/shader_ir.cpp
shader/shader_ir.h
shader/track.cpp
surface.cpp
surface.h
textures/astc.cpp

View File

@@ -37,7 +37,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.viewports[viewport].depth_range_near = 0.0f;
regs.viewports[viewport].depth_range_far = 1.0f;
}
// Doom and Bomberman seems to use the uninitialized registers and just enable blend
// so initialize blend registers with sane values
regs.blend.equation_rgb = Regs::Blend::Equation::Add;
@@ -67,7 +66,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.stencil_back_func_func = Regs::ComparisonOp::Always;
regs.stencil_back_func_mask = 0xFFFFFFFF;
regs.stencil_back_mask = 0xFFFFFFFF;
// TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
// register carrying a default value. Assume it's OpenGL's default (1).
regs.point_size = 1.0f;
@@ -80,9 +78,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.color_mask[color_mask].B.Assign(1);
regs.color_mask[color_mask].A.Assign(1);
}
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
regs.rt_separate_frag_data = 1;
}
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
@@ -140,33 +135,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
if (regs.reg_array[method_call.method] != method_call.argument) {
regs.reg_array[method_call.method] = method_call.argument;
// Color buffers
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
if (method_call.method >= first_rt_reg &&
method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
}
// Zeta buffer
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
(method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
dirty_flags.zeta_buffer = true;
}
// Shader
constexpr u32 shader_registers_count =
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
dirty_flags.shaders = true;
}
// Vertex format
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
method_call.method <

View File

@@ -1089,18 +1089,10 @@ public:
MemoryManager& memory_manager;
struct DirtyFlags {
u8 color_buffer = 0xFF;
bool zeta_buffer = true;
bool shaders = true;
bool vertex_attrib_format = true;
u32 vertex_array = 0xFFFFFFFF;
void OnMemoryWrite() {
color_buffer = 0xFF;
zeta_buffer = true;
shaders = true;
vertex_array = 0xFFFFFFFF;
}
};

View File

@@ -208,8 +208,6 @@ enum class UniformType : u64 {
SignedShort = 3,
Single = 4,
Double = 5,
Quad = 6,
UnsignedQuad = 7,
};
enum class StoreType : u64 {
@@ -399,10 +397,6 @@ struct IpaMode {
bool operator!=(const IpaMode& a) const {
return !operator==(a);
}
bool operator<(const IpaMode& a) const {
return std::tie(interpolation_mode, sampling_mode) <
std::tie(a.interpolation_mode, a.sampling_mode);
}
};
enum class SystemVariable : u64 {
@@ -650,7 +644,6 @@ union Instruction {
BitField<37, 2, HalfPrecision> precision;
BitField<32, 1, u64> saturate;
BitField<31, 1, u64> negate_b;
BitField<30, 1, u64> negate_c;
BitField<35, 2, HalfType> type_c;
} rr;
@@ -786,12 +779,6 @@ union Instruction {
BitField<44, 2, u64> unknown;
} st_l;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> immediate_offset;
} ldg;
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
@@ -1444,7 +1431,6 @@ public:
PredicateSetRegister,
RegisterSetPredicate,
Conversion,
Video,
Xmad,
Unknown,
};
@@ -1576,8 +1562,8 @@ private:
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"),
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),

View File

@@ -106,7 +106,7 @@ struct Header {
} ps;
};
u64 GetLocalMemorySize() const {
u64 GetLocalMemorySize() {
return (common1.shader_local_memory_low_size |
(common2.shader_local_memory_high_size << 24));
}

View File

@@ -3,8 +3,6 @@
// Refer to the license.txt file included.
#include "common/assert.h"
#include "core/core_timing.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
@@ -126,36 +124,9 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) {
}
}
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.
enum class BufferMethods {
BindObject = 0x0,
Nop = 0x2,
SemaphoreAddressHigh = 0x4,
SemaphoreAddressLow = 0x5,
SemaphoreSequence = 0x6,
SemaphoreTrigger = 0x7,
NotifyIntr = 0x8,
WrcacheFlush = 0x9,
Unk28 = 0xA,
Unk2c = 0xB,
RefCnt = 0x14,
SemaphoreAcquire = 0x1A,
SemaphoreRelease = 0x1B,
Unk70 = 0x1C,
Unk74 = 0x1D,
Unk78 = 0x1E,
Unk7c = 0x1F,
Yield = 0x20,
NonPullerMethods = 0x40,
};
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
WriteLong = 0x2,
AcquireGequal = 0x4,
AcquireMask = 0x8,
BindObject = 0,
CountBufferMethods = 0x40,
};
void GPU::CallMethod(const MethodCall& method_call) {
@@ -164,78 +135,20 @@ void GPU::CallMethod(const MethodCall& method_call) {
ASSERT(method_call.subchannel < bound_engines.size());
if (ExecuteMethodOnEngine(method_call)) {
CallEngineMethod(method_call);
} else {
CallPullerMethod(method_call);
if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
return;
}
}
bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
const auto method = static_cast<BufferMethods>(method_call.method);
return method >= BufferMethods::NonPullerMethods;
}
if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
// TODO(Subv): Research and implement these methods.
LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
return;
}
void GPU::CallPullerMethod(const MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
const auto method = static_cast<BufferMethods>(method_call.method);
switch (method) {
case BufferMethods::BindObject: {
ProcessBindMethod(method_call);
break;
}
case BufferMethods::Nop:
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
case BufferMethods::RefCnt:
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
break;
}
case BufferMethods::NotifyIntr: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
break;
}
case BufferMethods::WrcacheFlush: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
break;
}
case BufferMethods::Unk28: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
break;
}
case BufferMethods::Unk2c: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
break;
}
case BufferMethods::SemaphoreAcquire: {
ProcessSemaphoreAcquire();
break;
}
case BufferMethods::SemaphoreRelease: {
ProcessSemaphoreRelease();
break;
}
case BufferMethods::Yield: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
break;
}
default:
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
static_cast<u32>(method));
break;
}
}
void GPU::CallEngineMethod(const MethodCall& method_call) {
const EngineID engine = bound_engines[method_call.subchannel];
switch (engine) {
@@ -259,76 +172,4 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
}
}
void GPU::ProcessBindMethod(const MethodCall& method_call) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
}
void GPU::ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
if (op == GpuSemaphoreOperation::WriteLong) {
auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
struct Block {
u32 sequence;
u32 zeros = 0;
u64 timestamp;
};
Block block{};
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = CoreTiming::GetTicks();
Memory::WriteBlock(*address, &block, sizeof(block));
} else {
const auto address =
memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
const u32 word = Memory::Read32(*address);
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
(op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
// Nothing to do in this case
} else {
regs.acquire_source = true;
regs.acquire_value = regs.semaphore_sequence;
if (op == GpuSemaphoreOperation::AcquireEqual) {
regs.acquire_active = true;
regs.acquire_mode = false;
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
regs.acquire_active = true;
regs.acquire_mode = true;
} else if (op == GpuSemaphoreOperation::AcquireMask) {
// TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
// semaphore_sequence, gives a non-0 result
LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
} else {
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
}
}
}
}
void GPU::ProcessSemaphoreRelease() {
const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
Memory::Write32(*address, regs.semaphore_release);
}
void GPU::ProcessSemaphoreAcquire() {
const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
const u32 word = Memory::Read32(*address);
const auto value = regs.semaphore_acquire;
if (word != value) {
regs.acquire_active = true;
regs.acquire_value = value;
// TODO(kemathe73) figure out how to do the acquire_timeout
regs.acquire_mode = false;
regs.acquire_source = false;
}
}
} // namespace Tegra

View File

@@ -156,46 +156,6 @@ public:
/// Returns a const reference to the GPU DMA pusher.
const Tegra::DmaPusher& DmaPusher() const;
struct Regs {
static constexpr size_t NUM_REGS = 0x100;
union {
struct {
INSERT_PADDING_WORDS(0x4);
struct {
u32 address_high;
u32 address_low;
GPUVAddr SmaphoreAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} smaphore_address;
u32 semaphore_sequence;
u32 semaphore_trigger;
INSERT_PADDING_WORDS(0xC);
// The puser and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
INSERT_PADDING_WORDS(0x5);
u32 semaphore_acquire;
u32 semaphore_release;
INSERT_PADDING_WORDS(0xE4);
// Puller state
u32 acquire_mode;
u32 acquire_source;
u32 acquire_active;
u32 acquire_timeout;
u32 acquire_value;
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
private:
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::unique_ptr<Tegra::MemoryManager> memory_manager;
@@ -213,37 +173,6 @@ private:
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
void ProcessBindMethod(const MethodCall& method_call);
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();
// Calls a GPU puller method.
void CallPullerMethod(const MethodCall& method_call);
// Calls a GPU engine method.
void CallEngineMethod(const MethodCall& method_call);
// Determines where the method should be executed.
bool ExecuteMethodOnEngine(const MethodCall& method_call);
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(GPU::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(smaphore_address, 0x4);
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
ASSERT_REG_POSITION(reference_count, 0x14);
ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
ASSERT_REG_POSITION(semaphore_release, 0x1B);
ASSERT_REG_POSITION(acquire_mode, 0x100);
ASSERT_REG_POSITION(acquire_source, 0x101);
ASSERT_REG_POSITION(acquire_active, 0x102);
ASSERT_REG_POSITION(acquire_timeout, 0x103);
ASSERT_REG_POSITION(acquire_value, 0x104);
#undef ASSERT_REG_POSITION
} // namespace Tegra

View File

@@ -4,7 +4,6 @@
#pragma once
#include <functional>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
@@ -12,14 +11,6 @@
namespace VideoCore {
enum class LoadCallbackStage {
Prepare,
Decompile,
Build,
Complete,
};
using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size_t, std::size_t)>;
class RasterizerInterface {
public:
virtual ~RasterizerInterface() {}

View File

@@ -14,7 +14,7 @@
namespace OpenGL {
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
: RasterizerCache{rasterizer}, stream_buffer(GL_ARRAY_BUFFER, size) {}
GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
std::size_t alignment, bool cache) {

View File

@@ -1,94 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
buffer.Create();
// Bind and unbind the buffer so it gets allocated by the driver
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
}
void CachedGlobalRegion::Reload(u32 size_) {
constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
size = size_;
if (size > max_size) {
size = max_size;
LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
max_size);
}
// TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
}
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
const auto search{reserve.find(addr)};
if (search == reserve.end()) {
return {};
}
return search->second;
}
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
if (!region) {
// No reserved surface available, create a new one and reserve it
region = std::make_shared<CachedGlobalRegion>(addr, size);
ReserveGlobalRegion(region);
}
region->Reload(size);
return region;
}
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
reserve[region->GetAddr()] = region;
}
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
: RasterizerCache{rasterizer} {}
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
const GLShader::GlobalMemoryEntry& global_region,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
auto& gpu{Core::System::GetInstance().GPU()};
const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
ASSERT(cbuf_addr);
const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
const auto size = Memory::Read32(*cbuf_addr + 8);
const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
ASSERT(actual_addr);
// Look up global region in the cache based on address
GlobalRegion region = TryGet(*actual_addr);
if (!region) {
// No global region found - create a new one
region = GetUncachedGlobalRegion(*actual_addr, size);
Register(region);
}
return region;
}
} // namespace OpenGL

View File

@@ -1,78 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <unordered_map>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
namespace GLShader {
class GlobalMemoryEntry;
} // namespace GLShader
class RasterizerOpenGL;
class CachedGlobalRegion;
using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
class CachedGlobalRegion final : public RasterizerCacheObject {
public:
explicit CachedGlobalRegion(VAddr addr, u32 size);
/// Gets the address of the shader in guest memory, required for cache management
VAddr GetAddr() const {
return addr;
}
/// Gets the size of the shader in guest memory, required for cache management
std::size_t GetSizeInBytes() const {
return size;
}
/// Gets the GL program handle for the buffer
GLuint GetBufferHandle() const {
return buffer.handle;
}
/// Reloads the global region from guest memory
void Reload(u32 size_);
// TODO(Rodrigo): When global memory is written (STG), implement flushing
void Flush() override {
UNIMPLEMENTED();
}
private:
VAddr addr{};
u32 size{};
OGLBuffer buffer;
};
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
public:
explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
/// Gets the current specified shader stage program
GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
private:
GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
void ReserveGlobalRegion(const GlobalRegion& region);
std::unordered_map<VAddr, GlobalRegion> reserve;
};
} // namespace OpenGL

View File

@@ -101,7 +101,7 @@ struct FramebufferCacheKey {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -135,31 +135,27 @@ void RasterizerOpenGL::CheckExtensions() {
}
}
GLuint RasterizerOpenGL::SetupVertexFormat() {
void RasterizerOpenGL::SetupVertexFormat() {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
if (!gpu.dirty_flags.vertex_attrib_format) {
return state.draw.vertex_array;
}
if (!gpu.dirty_flags.vertex_attrib_format)
return;
gpu.dirty_flags.vertex_attrib_format = false;
MICROPROFILE_SCOPE(OpenGL_VAO);
auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
auto& vao_entry = iter->second;
auto& VAO = iter->second;
if (is_cache_miss) {
vao_entry.Create();
const GLuint vao = vao_entry.handle;
VAO.Create();
state.draw.vertex_array = VAO.handle;
state.ApplyVertexBufferState();
// Eventhough we are using DSA to create this vertex array, there is a bug on Intel's blob
// that fails to properly create the vertex array if it's not bound even after creating it
// with glCreateVertexArrays
state.draw.vertex_array = vao;
state.ApplyVertexArrayState();
glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
// The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
// around.
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle());
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
// Enables the first 16 vertex attributes always, as we don't know which ones are actually
@@ -167,7 +163,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
// for now to avoid OpenGL errors.
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
// assume every shader uses them all.
for (u32 index = 0; index < 16; ++index) {
for (unsigned index = 0; index < 16; ++index) {
const auto& attrib = regs.vertex_attrib_format[index];
// Ignore invalid attributes.
@@ -182,29 +178,28 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
ASSERT(buffer.IsEnabled());
glEnableVertexArrayAttrib(vao, index);
glEnableVertexAttribArray(index);
if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
attrib.type ==
Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
glVertexArrayAttribIFormat(vao, index, attrib.ComponentCount(),
MaxwellToGL::VertexType(attrib), attrib.offset);
glVertexAttribIFormat(index, attrib.ComponentCount(),
MaxwellToGL::VertexType(attrib), attrib.offset);
} else {
glVertexArrayAttribFormat(
vao, index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
glVertexAttribFormat(index, attrib.ComponentCount(),
MaxwellToGL::VertexType(attrib),
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
}
glVertexArrayAttribBinding(vao, index, attrib.buffer);
glVertexAttribBinding(index, attrib.buffer);
}
}
state.draw.vertex_array = VAO.handle;
state.ApplyVertexBufferState();
// Rebinding the VAO invalidates the vertex buffer bindings.
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle;
}
void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
void RasterizerOpenGL::SetupVertexBuffer() {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
@@ -222,7 +217,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
if (!vertex_array.IsEnabled())
continue;
const Tegra::GPUVAddr start = vertex_array.StartAddress();
Tegra::GPUVAddr start = vertex_array.StartAddress();
const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end > start);
@@ -230,18 +225,21 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size);
// Bind the vertex array to the buffer at the current offset.
glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset,
vertex_array.stride);
glBindVertexBuffer(index, buffer_cache.GetHandle(), vertex_buffer_offset,
vertex_array.stride);
if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
// Enable vertex buffer instancing with the specified divisor.
glVertexArrayBindingDivisor(vao, index, vertex_array.divisor);
glVertexBindingDivisor(index, vertex_array.divisor);
} else {
// Disable the vertex buffer instancing.
glVertexArrayBindingDivisor(vao, index, 0);
glVertexBindingDivisor(index, 0);
}
}
// Implicit set by glBindVertexBuffer. Stupid glstate handling...
state.draw.vertex_buffer = buffer_cache.GetHandle();
gpu.dirty_flags.vertex_array = 0;
}
@@ -295,9 +293,12 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
BaseBindings base_bindings;
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -321,42 +322,50 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
const GLintptr offset = buffer_cache.UploadHostMemory(
&ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
// Bind the emulation info buffer
glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
// Bind the buffer
glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(),
offset, static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
const auto [program_handle, next_bindings] =
shader->GetProgramHandle(primitive_mode, base_bindings);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
shader_program_manager->UseProgrammableVertexShader(program_handle);
case Maxwell::ShaderProgram::VertexB: {
shader_program_manager->UseProgrammableVertexShader(
shader->GetProgramHandle(primitive_mode));
break;
case Maxwell::ShaderProgram::Geometry:
shader_program_manager->UseProgrammableGeometryShader(program_handle);
}
case Maxwell::ShaderProgram::Geometry: {
shader_program_manager->UseProgrammableGeometryShader(
shader->GetProgramHandle(primitive_mode));
break;
case Maxwell::ShaderProgram::Fragment:
shader_program_manager->UseProgrammableFragmentShader(program_handle);
}
case Maxwell::ShaderProgram::Fragment: {
shader_program_manager->UseProgrammableFragmentShader(
shader->GetProgramHandle(primitive_mode));
break;
}
default:
LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
shader_config.enable.Value(), shader_config.offset);
UNREACHABLE();
}
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
SetupTextures(stage_enum, shader, program_handle, base_bindings);
// Configure the const buffers for this shader stage.
current_constbuffer_bindpoint =
SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
current_constbuffer_bindpoint);
// Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
primitive_mode, current_texture_bindpoint);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
// clip distances only when it's written by a shader stage.
for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) {
clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i];
clip_distances[i] |= shader->GetShaderEntries().clip_distances[i];
}
// When VertexA is enabled, we have dual vertex shaders
@@ -364,13 +373,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
base_bindings = next_bindings;
}
SyncClipEnabled(clip_distances);
gpu.dirty_flags.shaders = false;
}
void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
@@ -481,26 +486,17 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
single_color_target};
if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
!gpu.dirty_flags.zeta_buffer) {
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
return;
}
current_framebuffer_config_state = fb_config_state;
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
Surface depth_surface;
if (using_depth_fb) {
depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
}
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// TODO(bunnei): Figure out how the below register works. According to envytools, this should be
// used to enable multiple render targets. However, it is left unset on all games that I have
// tested.
UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);
// Bind the framebuffer surfaces
current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
@@ -634,6 +630,8 @@ void RasterizerOpenGL::Clear() {
return;
}
ScopeAcquireGLContext acquire_context{emu_window};
ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
regs.clear_buffers.RT.Value());
if (regs.clear_flags.scissor) {
@@ -667,6 +665,8 @@ void RasterizerOpenGL::DrawArrays() {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
ScopeAcquireGLContext acquire_context{emu_window};
ConfigureFramebuffers(state);
SyncColorMask();
SyncFragmentColorClampState();
@@ -689,6 +689,9 @@ void RasterizerOpenGL::DrawArrays() {
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
state.draw.vertex_buffer = buffer_cache.GetHandle();
state.ApplyVertexBufferState();
std::size_t buffer_size = CalculateVertexArraysSize();
// Add space for index buffer (keeping in mind non-core primitives)
@@ -718,9 +721,8 @@ void RasterizerOpenGL::DrawArrays() {
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
}
const GLuint vao = SetupVertexFormat();
SetupVertexBuffer(vao);
SetupVertexFormat();
SetupVertexBuffer();
DrawParameters params = SetupDraw();
SetupShaders(params.primitive_mode);
@@ -759,7 +761,6 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}
@@ -915,14 +916,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
}
void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLuint program_handle,
BaseBindings base_bindings) {
u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
const auto& entries = shader->GetShaderEntries().const_buffers;
const auto& entries = shader->GetShaderEntries().const_buffer_entries;
constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
std::array<GLuint, max_binds> bind_buffers;
@@ -965,73 +965,75 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
GLintptr const_buffer_offset = buffer_cache.UploadMemory(
buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
// Now configure the bindpoint of the buffer inside the shader
glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
shader->GetProgramResourceIndex(used_buffer),
current_bindpoint + bindpoint);
// Prepare values for multibind
bind_buffers[bindpoint] = buffer_cache.GetHandle();
bind_offsets[bindpoint] = const_buffer_offset;
bind_sizes[bindpoint] = size;
}
// The first binding is reserved for emulation values
const GLuint ubo_base_binding = base_bindings.cbuf + 1;
glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
return current_bindpoint + static_cast<u32>(entries.size());
}
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings) {
// TODO(Rodrigo): Use ARB_multi_bind here
const auto& entries = shader->GetShaderEntries().global_memory_entries;
for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
const auto& entry = entries[bindpoint];
const u32 current_bindpoint = base_bindings.gmem + bindpoint;
const auto& region = global_cache.GetGlobalRegion(entry, stage);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
}
}
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings) {
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().samplers;
const auto& entries = shader->GetShaderEntries().texture_samplers;
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
auto& unit = state.texture_units[current_bindpoint];
const u32 current_bindpoint = current_unit + bindpoint;
// Bind the uniform to the sampler.
glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
shader->GetUniformLocation(entry), current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
if (!texture.enabled) {
unit.texture = 0;
state.texture_units[current_bindpoint].texture = 0;
continue;
}
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture, entry);
if (surface != nullptr) {
unit.texture =
const GLuint handle =
entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source);
unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source);
unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source);
unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source);
const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
state.texture_units[current_bindpoint].texture = handle;
state.texture_units[current_bindpoint].target = target;
state.texture_units[current_bindpoint].swizzle.r =
MaxwellToGL::SwizzleSource(texture.tic.x_source);
state.texture_units[current_bindpoint].swizzle.g =
MaxwellToGL::SwizzleSource(texture.tic.y_source);
state.texture_units[current_bindpoint].swizzle.b =
MaxwellToGL::SwizzleSource(texture.tic.z_source);
state.texture_units[current_bindpoint].swizzle.a =
MaxwellToGL::SwizzleSource(texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
unit.texture = 0;
state.texture_units[current_bindpoint].texture = 0;
}
}
return current_unit + static_cast<u32>(entries.size());
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {

View File

@@ -23,7 +23,6 @@
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -67,10 +66,6 @@ public:
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
static constexpr std::size_t MaxGlobalMemorySize = 0x10000;
static_assert(MaxGlobalMemorySize % sizeof(float) == 0,
"The maximum size of a global memory must be a multiple of the size of float");
private:
class SamplerInfo {
public:
@@ -99,23 +94,6 @@ private:
float max_anisotropic = 1.0f;
};
struct FramebufferConfigState {
bool using_color_fb{};
bool using_depth_fb{};
bool preserve_contents{};
std::optional<std::size_t> single_color_target;
bool operator==(const FramebufferConfigState& rhs) const {
return std::tie(using_color_fb, using_depth_fb, preserve_contents,
single_color_target) == std::tie(rhs.using_color_fb, rhs.using_depth_fb,
rhs.preserve_contents,
rhs.single_color_target);
}
bool operator!=(const FramebufferConfigState& rhs) const {
return !operator==(rhs);
}
};
/**
* Configures the color and depth framebuffer states.
* @param use_color_fb If true, configure color framebuffers.
@@ -127,18 +105,25 @@ private:
bool using_depth_fb = true, bool preserve_contents = true,
std::optional<std::size_t> single_color_target = {});
/// Configures the current constbuffers to use for the draw command.
void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
/*
* Configures the current constbuffers to use for the draw command.
* @param stage The shader stage to configure buffers for.
* @param shader The shader object that contains the specified stage.
* @param current_bindpoint The offset at which to start counting new buffer bindpoints.
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint);
/// Configures the current global memory entries to use for the draw command.
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings);
/// Configures the current textures to use for the draw command.
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
/*
* Configures the current textures to use for the draw command.
* @param stage The shader stage to configure textures for.
* @param shader The shader object that contains the specified stage.
* @param current_unit The offset at which to start counting unused texture units.
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);
@@ -200,7 +185,6 @@ private:
RasterizerCacheOpenGL res_cache;
ShaderCacheOpenGL shader_cache;
GlobalRegionCacheOpenGL global_cache;
Core::Frontend::EmuWindow& emu_window;
@@ -213,7 +197,6 @@ private:
vertex_array_cache;
std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
FramebufferConfigState current_framebuffer_config_state;
std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
@@ -226,10 +209,8 @@ private:
std::size_t CalculateIndexBufferSize() const;
/// Updates and returns a vertex array object representing current vertex format
GLuint SetupVertexFormat();
void SetupVertexBuffer(GLuint vao);
void SetupVertexFormat();
void SetupVertexBuffer();
DrawParameters SetupDraw();

View File

@@ -452,7 +452,7 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW);
if (source_format.compressed) {
glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
static_cast<GLsizei>(src_params.size_in_bytes), nullptr);
@@ -919,16 +919,9 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
}
Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
const auto& regs{gpu.regs};
if (!gpu.dirty_flags.zeta_buffer) {
return last_depth_buffer;
}
gpu.dirty_flags.zeta_buffer = false;
const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
if (!regs.zeta.Address() || !regs.zeta_enable) {
return last_depth_buffer = {};
return {};
}
SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
@@ -936,31 +929,25 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
return last_depth_buffer = GetSurface(depth_params, preserve_contents);
return GetSurface(depth_params, preserve_contents);
}
Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
const auto& regs{gpu.regs};
if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
return last_color_buffers[index];
}
gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
if (index >= regs.rt_control.count) {
return last_color_buffers[index] = {};
return {};
}
if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
return last_color_buffers[index] = {};
return {};
}
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
return GetSurface(color_params, preserve_contents);
}
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {

View File

@@ -396,9 +396,6 @@ private:
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
/// using the new format.
OGLBuffer copy_pbo;
std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
Surface last_depth_buffer;
};
} // namespace OpenGL

View File

@@ -117,7 +117,7 @@ void OGLBuffer::Create() {
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateBuffers(1, &handle);
glGenBuffers(1, &handle);
}
void OGLBuffer::Release() {
@@ -126,6 +126,7 @@ void OGLBuffer::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteBuffers(1, &handle);
OpenGLState::GetCurState().ResetBuffer(handle).Apply();
handle = 0;
}
@@ -151,7 +152,7 @@ void OGLVertexArray::Create() {
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateVertexArrays(1, &handle);
glGenVertexArrays(1, &handle);
}
void OGLVertexArray::Release() {

View File

@@ -10,15 +10,11 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
using VideoCommon::Shader::ProgramCode;
/// Gets the address for the specified shader stage program
static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
@@ -28,31 +24,42 @@ static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
}
/// Gets the shader program code from memory for the specified address
static ProgramCode GetShaderCode(VAddr addr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
static GLShader::ProgramCode GetShaderCode(VAddr addr) {
GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
return program_code;
}
/// Gets the shader type from a Maxwell program type
constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
return GL_VERTEX_SHADER;
case Maxwell::ShaderProgram::Geometry:
return GL_GEOMETRY_SHADER;
case Maxwell::ShaderProgram::Fragment:
return GL_FRAGMENT_SHADER;
default:
return GL_NONE;
/// Helper function to set shader uniform block bindings for a single shader stage
static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
Maxwell::ShaderStage binding, std::size_t expected_size) {
const GLuint ub_index = glGetUniformBlockIndex(shader, name);
if (ub_index == GL_INVALID_INDEX) {
return;
}
GLint ub_size = 0;
glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
"Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
}
/// Sets shader uniform block bindings for an entire shader program
static void SetShaderUniformBlockBindings(GLuint shader) {
SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex,
sizeof(GLShader::MaxwellUniformData));
SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry,
sizeof(GLShader::MaxwellUniformData));
SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment,
sizeof(GLShader::MaxwellUniformData));
}
CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
: addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {
GLShader::ProgramResult program_result;
GLenum gl_type{};
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
@@ -63,14 +70,17 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
case Maxwell::ShaderProgram::VertexB:
CalculateProperties();
program_result = GLShader::GenerateVertexShader(setup);
gl_type = GL_VERTEX_SHADER;
break;
case Maxwell::ShaderProgram::Geometry:
CalculateProperties();
program_result = GLShader::GenerateGeometryShader(setup);
gl_type = GL_GEOMETRY_SHADER;
break;
case Maxwell::ShaderProgram::Fragment:
CalculateProperties();
program_result = GLShader::GenerateFragmentShader(setup);
gl_type = GL_FRAGMENT_SHADER;
break;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
@@ -78,105 +88,59 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
return;
}
code = program_result.first;
entries = program_result.second;
shader_length = entries.shader_length;
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings) {
GLuint handle{};
if (program_type == Maxwell::ShaderProgram::Geometry) {
handle = GetGeometryShader(primitive_mode, base_bindings);
if (program_type != Maxwell::ShaderProgram::Geometry) {
OGLShader shader;
shader.Create(program_result.first.c_str(), gl_type);
program.Create(true, shader.handle);
SetShaderUniformBlockBindings(program.handle);
LabelGLObject(GL_PROGRAM, program.handle, addr);
} else {
const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
auto& program = entry->second;
if (is_cache_miss) {
std::string source = AllocateBindings(base_bindings);
source += code;
OGLShader shader;
shader.Create(source.c_str(), GetShaderType(program_type));
program.Create(true, shader.handle);
LabelGLObject(GL_PROGRAM, program.handle, addr);
}
handle = program.handle;
}
// Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for
// emulation values
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1;
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
return {handle, base_bindings};
}
std::string CachedShader::AllocateBindings(BaseBindings base_bindings) {
std::string code = "#version 430 core\n";
code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
for (const auto& cbuf : entries.const_buffers) {
code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
}
for (const auto& gmem : entries.global_memory_entries) {
code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
gmem.GetCbufOffset(), base_bindings.gmem++);
}
for (const auto& sampler : entries.samplers) {
code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
base_bindings.sampler++);
}
return code;
}
GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
auto& programs = entry->second;
switch (primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines");
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4,
"ShaderLinesAdjacency");
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3,
"ShaderTriangles");
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.triangles_adjacency, base_bindings,
"triangles_adjacency", 6, "ShaderTrianglesAdjacency");
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
// Store shader's code to lazily build it on draw
geometry_programs.code = program_result.first;
}
}
GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
const auto search{resource_cache.find(buffer.GetHash())};
if (search == resource_cache.end()) {
const GLuint index{
glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
resource_cache[buffer.GetHash()] = index;
return index;
}
return search->second;
}
GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
const auto search{uniform_cache.find(sampler.GetHash())};
if (search == uniform_cache.end()) {
const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
uniform_cache[sampler.GetHash()] = index;
return index;
}
return search->second;
}
GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
const std::string& glsl_topology, u32 max_vertices,
const std::string& debug_name) {
if (target_program.handle != 0) {
return target_program.handle;
}
std::string source = AllocateBindings(base_bindings);
std::string source = "#version 430 core\n";
source += "layout (" + glsl_topology + ") in;\n";
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
source += code;
source += geometry_programs.code;
OGLShader shader;
shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
target_program.Create(true, shader.handle);
SetShaderUniformBlockBindings(target_program.handle);
LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
return target_program.handle;
};
@@ -224,10 +188,6 @@ void CachedShader::CalculateProperties() {
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
return last_shaders[static_cast<u32>(program)];
}
const VAddr program_addr{GetShaderAddress(program)};
// Look up shader in the cache based on address
@@ -239,7 +199,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
Register(shader);
}
return last_shaders[static_cast<u32>(program)] = shader;
return shader;
}
} // namespace OpenGL

View File

@@ -4,18 +4,13 @@
#pragma once
#include <array>
#include <map>
#include <memory>
#include <tuple>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace OpenGL {
@@ -26,16 +21,6 @@ class RasterizerOpenGL;
using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct BaseBindings {
u32 cbuf{};
u32 gmem{};
u32 sampler{};
bool operator<(const BaseBindings& rhs) const {
return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
}
};
class CachedShader final : public RasterizerCacheObject {
public:
CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
@@ -57,45 +42,70 @@ public:
}
/// Gets the GL program handle for the shader
std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings);
GLuint GetProgramHandle(GLenum primitive_mode) {
if (program_type != Maxwell::ShaderProgram::Geometry) {
return program.handle;
}
switch (primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
"ShaderLinesAdjacency");
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
"ShaderTriangles");
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
6, "ShaderTrianglesAdjacency");
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
}
}
/// Gets the GL program resource location for the specified resource, caching as needed
GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
/// Gets the GL uniform location for the specified resource, caching as needed
GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
private:
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
u32 max_vertices, const std::string& debug_name);
void CalculateProperties();
VAddr addr;
std::size_t shader_length;
Maxwell::ShaderProgram program_type;
GLShader::ShaderSetup setup;
GLShader::ShaderEntries entries;
// Non-geometry program.
OGLProgram program;
// Geometry programs. These are needed because GLSL needs an input topology but it's not
// declared by the hardware. Workaround this issue by generating a different shader per input
// topology class.
struct GeometryPrograms {
struct {
std::string code;
OGLProgram points;
OGLProgram lines;
OGLProgram lines_adjacency;
OGLProgram triangles;
OGLProgram triangles_adjacency;
};
} geometry_programs;
std::string AllocateBindings(BaseBindings base_bindings);
GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
const std::string& glsl_topology, u32 max_vertices,
const std::string& debug_name);
void CalculateProperties();
VAddr addr{};
std::size_t shader_length{};
Maxwell::ShaderProgram program_type{};
GLShader::ShaderSetup setup;
GLShader::ShaderEntries entries;
std::string code;
std::map<BaseBindings, OGLProgram> programs;
std::map<BaseBindings, GeometryPrograms> geometry_programs;
std::map<u32, GLuint> cbuf_resource_cache;
std::map<u32, GLuint> gmem_resource_cache;
std::map<u32, GLuint> resource_cache;
std::map<u32, GLint> uniform_cache;
};
@@ -105,9 +115,6 @@ public:
/// Gets the current specified shader stage program
Shader GetStageProgram(Maxwell::ShaderProgram program);
private:
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -5,106 +5,21 @@
#pragma once
#include <array>
#include <functional>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace VideoCommon::Shader {
class ShaderIR;
}
namespace OpenGL::GLShader::Decompiler {
namespace OpenGL::GLShader {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry,
Maxwell::ShaderStage stage, const std::string& name, u32 index)
: VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {}
const std::string& GetName() const {
return name;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
u32 GetIndex() const {
return index;
}
private:
std::string name;
Maxwell::ShaderStage stage{};
u32 index{};
};
class SamplerEntry : public VideoCommon::Shader::Sampler {
public:
explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage,
const std::string& name)
: VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {}
const std::string& GetName() const {
return name;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
private:
std::string name;
Maxwell::ShaderStage stage{};
};
class GlobalMemoryEntry {
public:
explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
std::string name)
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
u32 GetCbufIndex() const {
return cbuf_index;
}
u32 GetCbufOffset() const {
return cbuf_offset;
}
const std::string& GetName() const {
return name;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
private:
u32 cbuf_index{};
u32 cbuf_offset{};
Maxwell::ShaderStage stage{};
std::string name;
};
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
};
using ProgramResult = std::pair<std::string, ShaderEntries>;
using Tegra::Engines::Maxwell3D;
std::string GetCommonDeclarations();
ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
const std::string& suffix);
std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix);
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader::Decompiler

View File

@@ -7,57 +7,63 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL::GLShader {
using Tegra::Engines::Maxwell3D;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: VS" + id + "\n\n";
out += GetCommonDeclarations();
out += Decompiler::GetCommonDeclarations();
out += R"(
layout (location = 0) out vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
layout(std140) uniform vs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
};
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
if (setup.IsDualProgram()) {
out += "bool exec_vertex_b();\n";
}
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
.value_or(ProgramResult());
out += program.first;
if (setup.IsDualProgram()) {
ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ProgramResult program_b =
Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
.value_or(ProgramResult());
out += program_b.first;
}
out += R"(
void main() {
position = vec4(0.0, 0.0, 0.0, 0.0);
execute_vertex();
exec_vertex();
)";
if (setup.IsDualProgram()) {
out += " execute_vertex_b();";
out += " exec_vertex_b();";
}
out += R"(
// Check if the flip stage is VertexB
// Config pack's second value is flip_stage
if (config_pack[1] == 1) {
@@ -71,62 +77,73 @@ void main() {
if (config_pack[1] == 1) {
position.w = 1.0;
}
})";
}
)";
return {out, program.second};
}
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
// Version is intentionally skipped in shader generation, it's added by the lazy compilation.
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
out += "// Shader Unique Id: GS" + id + "\n\n";
out += GetCommonDeclarations();
out += Decompiler::GetCommonDeclarations();
out += "bool exec_geometry();\n";
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Geometry, "geometry")
.value_or(ProgramResult());
out += R"(
out gl_PerVertex {
vec4 gl_Position;
};
layout (location = 0) in vec4 gs_position[];
layout (location = 0) out vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
layout (std140) uniform gs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
};
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
out += program.first;
out += R"(
void main() {
execute_geometry();
};)";
exec_geometry();
}
)";
out += program.first;
return {out, program.second};
}
ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: FS" + id + "\n\n";
out += GetCommonDeclarations();
out += Decompiler::GetCommonDeclarations();
out += "bool exec_fragment();\n";
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
.value_or(ProgramResult());
out += R"(
layout (location = 0) out vec4 FragColor0;
layout (location = 1) out vec4 FragColor1;
layout (location = 2) out vec4 FragColor2;
layout (location = 3) out vec4 FragColor3;
layout (location = 4) out vec4 FragColor4;
layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout(location = 0) out vec4 FragColor0;
layout(location = 1) out vec4 FragColor1;
layout(location = 2) out vec4 FragColor2;
layout(location = 3) out vec4 FragColor3;
layout(location = 4) out vec4 FragColor4;
layout(location = 5) out vec4 FragColor5;
layout(location = 6) out vec4 FragColor6;
layout(location = 7) out vec4 FragColor7;
layout (location = 0) in vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
layout (std140) uniform fs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
@@ -156,20 +173,12 @@ bool AlphaFunc(in float value) {
}
}
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
out += program.first;
out += R"(
void main() {
execute_fragment();
exec_fragment();
}
)";
out += program.first;
return {out, program.second};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -10,12 +10,164 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL::GLShader {
using VideoCommon::Shader::ProgramCode;
constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
using ProgramCode = std::vector<u64>;
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
class ConstBufferEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
is_used = true;
this->index = static_cast<unsigned>(index);
this->stage = stage;
max_offset = std::max(max_offset, static_cast<unsigned>(offset));
}
void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
is_used = true;
is_indirect = true;
this->index = static_cast<unsigned>(index);
this->stage = stage;
}
bool IsUsed() const {
return is_used;
}
bool IsIndirect() const {
return is_indirect;
}
unsigned GetIndex() const {
return index;
}
unsigned GetSize() const {
return max_offset + 1;
}
std::string GetName() const {
return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | index;
}
private:
static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = {
"buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c",
};
bool is_used{};
bool is_indirect{};
unsigned index{};
unsigned max_offset{};
Maxwell::ShaderStage stage;
};
class SamplerEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
: offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array),
is_shadow(is_shadow) {}
std::size_t GetOffset() const {
return offset;
}
std::size_t GetIndex() const {
return sampler_index;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
std::string GetName() const {
return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
std::to_string(sampler_index);
}
std::string GetTypeString() const {
using Tegra::Shader::TextureType;
std::string glsl_type;
switch (type) {
case TextureType::Texture1D:
glsl_type = "sampler1D";
break;
case TextureType::Texture2D:
glsl_type = "sampler2D";
break;
case TextureType::Texture3D:
glsl_type = "sampler3D";
break;
case TextureType::TextureCube:
glsl_type = "samplerCube";
break;
default:
UNIMPLEMENTED();
}
if (is_array)
glsl_type += "Array";
if (is_shadow)
glsl_type += "Shadow";
return glsl_type;
}
Tegra::Shader::TextureType GetType() const {
return type;
}
bool IsArray() const {
return is_array;
}
bool IsShadow() const {
return is_shadow;
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
}
static std::string GetArrayName(Maxwell::ShaderStage stage) {
return TextureSamplerNames[static_cast<std::size_t>(stage)];
}
private:
static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
"tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
};
/// Offset in TSC memory from which to read the sampler object, as specified by the sampling
/// instruction.
std::size_t offset;
Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
bool is_array; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not.
};
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffer_entries;
std::vector<SamplerEntry> texture_samplers;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances;
std::size_t shader_length;
};
using ProgramResult = std::pair<std::string, ShaderEntries>;
struct ShaderSetup {
explicit ShaderSetup(ProgramCode program_code) {

View File

@@ -83,6 +83,8 @@ OpenGLState::OpenGLState() {
draw.read_framebuffer = 0;
draw.draw_framebuffer = 0;
draw.vertex_array = 0;
draw.vertex_buffer = 0;
draw.uniform_buffer = 0;
draw.shader_program = 0;
draw.program_pipeline = 0;
@@ -503,6 +505,7 @@ void OpenGLState::ApplySamplers() const {
}
void OpenGLState::ApplyFramebufferState() const {
// Framebuffer
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
}
@@ -511,10 +514,16 @@ void OpenGLState::ApplyFramebufferState() const {
}
}
void OpenGLState::ApplyVertexArrayState() const {
void OpenGLState::ApplyVertexBufferState() const {
// Vertex array
if (draw.vertex_array != cur_state.draw.vertex_array) {
glBindVertexArray(draw.vertex_array);
}
// Vertex buffer
if (draw.vertex_buffer != cur_state.draw.vertex_buffer) {
glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer);
}
}
void OpenGLState::ApplyDepthClamp() const {
@@ -534,7 +543,11 @@ void OpenGLState::ApplyDepthClamp() const {
void OpenGLState::Apply() const {
ApplyFramebufferState();
ApplyVertexArrayState();
ApplyVertexBufferState();
// Uniform buffer
if (draw.uniform_buffer != cur_state.draw.uniform_buffer) {
glBindBuffer(GL_UNIFORM_BUFFER, draw.uniform_buffer);
}
// Shader program
if (draw.shader_program != cur_state.draw.shader_program) {
@@ -625,6 +638,16 @@ OpenGLState& OpenGLState::ResetPipeline(GLuint handle) {
return *this;
}
OpenGLState& OpenGLState::ResetBuffer(GLuint handle) {
if (draw.vertex_buffer == handle) {
draw.vertex_buffer = 0;
}
if (draw.uniform_buffer == handle) {
draw.uniform_buffer = 0;
}
return *this;
}
OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) {
if (draw.vertex_array == handle) {
draw.vertex_array = 0;

View File

@@ -154,6 +154,8 @@ public:
GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
GLuint shader_program; // GL_CURRENT_PROGRAM
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
} draw;
@@ -204,10 +206,10 @@ public:
}
/// Apply this state as the current OpenGL state
void Apply() const;
/// Apply only the state affecting the framebuffer
/// Apply only the state afecting the framebuffer
void ApplyFramebufferState() const;
/// Apply only the state affecting the vertex array
void ApplyVertexArrayState() const;
/// Apply only the state afecting the vertex buffer
void ApplyVertexBufferState() const;
/// Set the initial OpenGL state
static void ApplyDefaultState();
/// Resets any references to the given resource
@@ -215,6 +217,7 @@ public:
OpenGLState& ResetSampler(GLuint handle);
OpenGLState& ResetProgram(GLuint handle);
OpenGLState& ResetPipeline(GLuint handle);
OpenGLState& ResetBuffer(GLuint handle);
OpenGLState& ResetVertexArray(GLuint handle);
OpenGLState& ResetFramebuffer(GLuint handle);
void EmulateViewportWithScissor();

View File

@@ -15,12 +15,13 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent)
: buffer_size(size) {
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
: gl_target(target), buffer_size(size) {
gl_buffer.Create();
glBindBuffer(gl_target, gl_buffer.handle);
GLsizeiptr allocate_size = size;
if (vertex_data_usage) {
if (target == GL_ARRAY_BUFFER) {
// On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
@@ -34,17 +35,18 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
coherent = prefer_coherent;
const GLbitfield flags =
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
mapped_ptr = static_cast<u8*>(glMapNamedBufferRange(
gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
glBufferStorage(gl_target, allocate_size, nullptr, flags);
mapped_ptr = static_cast<u8*>(glMapBufferRange(
gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
} else {
glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
}
}
OGLStreamBuffer::~OGLStreamBuffer() {
if (persistent) {
glUnmapNamedBuffer(gl_buffer.handle);
glBindBuffer(gl_target, gl_buffer.handle);
glUnmapBuffer(gl_target);
}
gl_buffer.Release();
}
@@ -72,7 +74,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
invalidate = true;
if (persistent) {
glUnmapNamedBuffer(gl_buffer.handle);
glUnmapBuffer(gl_target);
}
}
@@ -82,7 +84,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
mapped_ptr = static_cast<u8*>(
glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
mapped_offset = buffer_pos;
}
@@ -93,11 +95,11 @@ void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);
if (!coherent && size > 0) {
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size);
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
}
if (!persistent) {
glUnmapNamedBuffer(gl_buffer.handle);
glUnmapBuffer(gl_target);
}
buffer_pos += size;

View File

@@ -13,7 +13,7 @@ namespace OpenGL {
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false);
explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false);
~OGLStreamBuffer();
GLuint GetHandle() const;
@@ -33,6 +33,7 @@ public:
private:
OGLBuffer gl_buffer;
GLenum gl_target;
bool coherent = false;
bool persistent = false;

View File

@@ -14,7 +14,6 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "core/memory.h"
#include "core/perf_stats.h"
#include "core/settings.h"
@@ -98,6 +97,18 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
return matrix;
}
ScopeAcquireGLContext::ScopeAcquireGLContext(Core::Frontend::EmuWindow& emu_window_)
: emu_window{emu_window_} {
if (Settings::values.use_multi_core) {
emu_window.MakeCurrent();
}
}
ScopeAcquireGLContext::~ScopeAcquireGLContext() {
if (Settings::values.use_multi_core) {
emu_window.DoneCurrent();
}
}
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window)
: VideoCore::RendererBase{window} {}
@@ -106,6 +117,7 @@ RendererOpenGL::~RendererOpenGL() = default;
/// Swap buffers (render frame)
void RendererOpenGL::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
ScopeAcquireGLContext acquire_context{render_window};
Core::System::GetInstance().GetPerfStats().EndSystemFrame();
@@ -233,20 +245,20 @@ void RendererOpenGL::InitOpenGLObjects() {
// Generate VAO
vertex_array.Create();
state.draw.vertex_array = vertex_array.handle;
state.draw.vertex_buffer = vertex_buffer.handle;
state.draw.uniform_buffer = 0;
state.Apply();
// Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
glVertexArrayAttribFormat(vertex_array.handle, attrib_position, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, position));
glVertexArrayAttribFormat(vertex_array.handle, attrib_tex_coord, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, tex_coord));
glVertexArrayAttribBinding(vertex_array.handle, attrib_position, 0);
glVertexArrayAttribBinding(vertex_array.handle, attrib_tex_coord, 0);
glEnableVertexArrayAttrib(vertex_array.handle, attrib_position);
glEnableVertexArrayAttrib(vertex_array.handle, attrib_tex_coord);
glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
sizeof(ScreenRectVertex));
glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex),
(GLvoid*)offsetof(ScreenRectVertex, position));
glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex),
(GLvoid*)offsetof(ScreenRectVertex, tex_coord));
glEnableVertexAttribArray(attrib_position);
glEnableVertexAttribArray(attrib_tex_coord);
// Allocate textures for the screen
screen_info.texture.resource.Create();
@@ -358,12 +370,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
state.texture_units[0].texture = screen_info.display_texture;
state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
// Workaround brigthness problems in SMO by enabling sRGB in the final output
// if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
// if it has been used in the frame
// Needed because of this bug in QT
// QTBUG-50987
state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
// restore default state
state.framebuffer_srgb.enabled = false;
state.texture_units[0].texture = 0;
state.Apply();
@@ -494,7 +508,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
/// Initialize the renderer
bool RendererOpenGL::Init() {
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
ScopeAcquireGLContext acquire_context{render_window};
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);

View File

@@ -39,6 +39,16 @@ struct ScreenInfo {
TextureInfo texture;
};
/// Helper class to acquire/release OpenGL context within a given scope
class ScopeAcquireGLContext : NonCopyable {
public:
explicit ScopeAcquireGLContext(Core::Frontend::EmuWindow& window);
~ScopeAcquireGLContext();
private:
Core::Frontend::EmuWindow& emu_window;
};
class RendererOpenGL : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& window);

View File

@@ -1,206 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <set>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
namespace {
/// Merges exit method of two parallel branches.
constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
if (a == ExitMethod::Undetermined) {
return b;
}
if (b == ExitMethod::Undetermined) {
return a;
}
if (a == b) {
return a;
}
return ExitMethod::Conditional;
}
/**
* Returns whether the instruction at the specified offset is a 'sched' instruction.
* Sched instructions always appear before a sequence of 3 instructions.
*/
constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
constexpr u32 SchedPeriod = 4;
u32 absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
}
} // namespace
void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
std::set<u32> labels;
const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels);
if (exit_method != ExitMethod::AlwaysEnd) {
UNREACHABLE_MSG("Program does not always end");
}
if (labels.empty()) {
basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)});
return;
}
labels.insert(main_offset);
for (const u32 label : labels) {
const auto next_it = labels.lower_bound(label + 1);
const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it;
basic_blocks.insert({label, DecodeRange(label, next_label)});
}
}
ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
const auto [iter, inserted] =
exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
ExitMethod& exit_method = iter->second;
if (!inserted)
return exit_method;
for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
coverage_begin = std::min(coverage_begin, offset);
coverage_end = std::max(coverage_end, offset + 1);
const Instruction instr = {program_code[offset]};
const auto opcode = OpCode::Decode(instr);
if (!opcode)
continue;
switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
// The EXIT instruction can be predicated, which means that the shader can conditionally
// end on this instruction. We have to consider the case where the condition is not met
// and check the exit method of that other basic block.
using Tegra::Shader::Pred;
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
return exit_method = ExitMethod::AlwaysEnd;
} else {
const ExitMethod not_met = Scan(offset + 1, end, labels);
return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
}
}
case OpCode::Id::BRA: {
const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
const ExitMethod no_jmp = Scan(offset + 1, end, labels);
const ExitMethod jmp = Scan(target, end, labels);
return exit_method = ParallelExit(no_jmp, jmp);
}
case OpCode::Id::SSY:
case OpCode::Id::PBK: {
// The SSY and PBK use a similar encoding as the BRA instruction.
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer branching is not supported");
const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
// Continue scanning for an exit method.
break;
}
}
}
return exit_method = ExitMethod::AlwaysReturn;
}
BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
BasicBlock basic_block;
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
pc = DecodeInstr(basic_block, pc);
}
return basic_block;
}
u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
// Ignore sched instructions when generating code.
if (IsSchedInstruction(pc, main_offset)) {
return pc + 1;
}
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
// Decoding failure
if (!opcode) {
UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
return pc + 1;
}
bb.push_back(
Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value)));
using Tegra::Shader::Pred;
UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
"NeverExecute predicate not implemented");
static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)>
decoders = {
{OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
{OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
{OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
{OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
{OpCode::Type::Shift, &ShaderIR::DecodeShift},
{OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
{OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
{OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
{OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
{OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
{OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
{OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
{OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
{OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
{OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
{OpCode::Type::Video, &ShaderIR::DecodeVideo},
{OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
};
std::vector<Node> tmp_block;
if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
pc = (this->*decoder->second)(tmp_block, bb, pc);
} else {
pc = DecodeOther(tmp_block, bb, pc);
}
// Some instructions (like SSY) don't have a predicate field, they are always unconditionally
// executed.
const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
bb.push_back(
Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)));
} else {
for (auto& node : tmp_block) {
bb.push_back(std::move(node));
}
}
return pc + 1;
}
} // namespace VideoCommon::Shader

View File

@@ -1,155 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::SubOp;
u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
Node op_b = [&]() -> Node {
if (instr.is_b_imm) {
return GetImmediate19(instr);
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
switch (opcode->get().GetId()) {
case OpCode::Id::MOV_C:
case OpCode::Id::MOV_R: {
// MOV does not have neither 'abs' nor 'neg' bits.
SetRegister(bb, instr.gpr0, op_b);
break;
}
case OpCode::Id::FMUL_C:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented",
instr.fmul.tab5cb8_2.Value());
UNIMPLEMENTED_IF_MSG(
instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
// TODO(Rodrigo): Should precise be used when there's a postfactor?
Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
if (instr.fmul.postfactor != 0) {
auto postfactor = static_cast<s32>(instr.fmul.postfactor);
// Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
// logic.
if (postfactor >= 4) {
postfactor = 7 - postfactor;
} else {
postfactor = 0 - postfactor;
}
if (postfactor > 0) {
value = Operation(OperationCode::FMul, NO_PRECISE, value,
Immediate(static_cast<f32>(1 << postfactor)));
} else {
value = Operation(OperationCode::FDiv, NO_PRECISE, value,
Immediate(static_cast<f32>(1 << -postfactor)));
}
}
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::MUFU: {
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
Node value = [&]() {
switch (instr.sub_op) {
case SubOp::Cos:
return Operation(OperationCode::FCos, PRECISE, op_a);
case SubOp::Sin:
return Operation(OperationCode::FSin, PRECISE, op_a);
case SubOp::Ex2:
return Operation(OperationCode::FExp2, PRECISE, op_a);
case SubOp::Lg2:
return Operation(OperationCode::FLog2, PRECISE, op_a);
case SubOp::Rcp:
return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
case SubOp::Rsq:
return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
case SubOp::Sqrt:
return Operation(OperationCode::FSqrt, PRECISE, op_a);
default:
UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
static_cast<unsigned>(instr.sub_op.Value()));
return Immediate(0);
}
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FMNMX_C:
case OpCode::Id::FMNMX_R:
case OpCode::Id::FMNMX_IMM: {
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::RRO_C:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
// Currently RRO is only implemented as a register move.
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
SetRegister(bb, instr.gpr0, op_b);
LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,70 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
opcode->get().GetId() == OpCode::Id::HADD2_R) {
UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
}
UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
const bool negate_a =
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
const bool negate_b =
opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a);
// instr.alu_half.type_a
Node op_b = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
case OpCode::Id::HADD2_R:
case OpCode::Id::HMUL2_R:
return GetRegister(instr.gpr20);
default:
UNREACHABLE();
return Immediate(0);
}
}();
op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
Node value = [&]() {
MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}};
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HADD2_R:
return Operation(OperationCode::HAdd, meta, op_a, op_b);
case OpCode::Id::HMUL2_C:
case OpCode::Id::HMUL2_R:
return Operation(OperationCode::HMul, meta, op_a, op_b);
default:
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
return Immediate(0);
}
}();
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,51 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
} else {
UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
}
UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0,
"Half float immediate saturation not implemented");
Node op_a = GetRegister(instr.gpr8);
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
const Node op_b = UnpackHalfImmediate(instr, true);
Node value = [&]() {
MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}};
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_IMM:
return Operation(OperationCode::HAdd, meta, op_a, op_b);
case OpCode::Id::HMUL2_IMM:
return Operation(OperationCode::HMul, meta, op_a, op_b);
default:
UNREACHABLE();
return Immediate(0);
}
}();
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,52 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::MOV32_IMM: {
SetRegister(bb, instr.gpr0, GetImmediate32(instr));
break;
}
case OpCode::Id::FMUL32_IMM: {
Node value =
Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
value = GetSaturatedFloat(value, instr.fmul32.saturate);
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FADD32I: {
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
instr.fadd32i.negate_a);
const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
instr.fadd32i.negate_b);
const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,287 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::IAdd3Height;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
using Tegra::Shader::Register;
u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
Node op_b = [&]() {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
switch (opcode->get().GetId()) {
case OpCode::Id::IADD_C:
case OpCode::Id::IADD_R:
case OpCode::Id::IADD_IMM: {
UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::IADD3_C:
case OpCode::Id::IADD3_R:
case OpCode::Id::IADD3_IMM: {
Node op_c = GetRegister(instr.gpr39);
const auto ApplyHeight = [&](IAdd3Height height, Node value) {
switch (height) {
case IAdd3Height::None:
return value;
case IAdd3Height::LowerHalfWord:
return BitfieldExtract(value, 0, 16);
case IAdd3Height::UpperHalfWord:
return BitfieldExtract(value, 16, 16);
default:
UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height));
return Immediate(0);
}
};
if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
op_a = ApplyHeight(instr.iadd3.height_a, op_a);
op_b = ApplyHeight(instr.iadd3.height_b, op_b);
op_c = ApplyHeight(instr.iadd3.height_c, op_c);
}
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
const Node value = [&]() {
const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
}
const Node shifted = [&]() {
switch (instr.iadd3.mode) {
case Tegra::Shader::IAdd3Mode::RightShift:
// TODO(tech4me): According to
// https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
// The addition between op_a and op_b should be done in uint33, more
// investigation required
return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
Immediate(16));
case Tegra::Shader::IAdd3Mode::LeftShift:
return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
Immediate(16));
default:
return add_ab;
}
}();
return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
}();
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::ISCADD_C:
case OpCode::Id::ISCADD_R:
case OpCode::Id::ISCADD_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in ISCADD is not implemented");
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::POPC_C:
case OpCode::Id::POPC_R:
case OpCode::Id::POPC_IMM: {
if (instr.popc.invert) {
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
}
const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::LOP_C:
case OpCode::Id::LOP_R:
case OpCode::Id::LOP_IMM: {
if (instr.alu.lop.invert_a)
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
if (instr.alu.lop.invert_b)
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
instr.generates_cc);
break;
}
case OpCode::Id::LOP3_C:
case OpCode::Id::LOP3_R:
case OpCode::Id::LOP3_IMM: {
const Node op_c = GetRegister(instr.gpr39);
const Node lut = [&]() {
if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
return Immediate(instr.alu.lop3.GetImmLut28());
} else {
return Immediate(instr.alu.lop3.GetImmLut48());
}
}();
WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
break;
}
case OpCode::Id::IMNMX_C:
case OpCode::Id::IMNMX_R:
case OpCode::Id::IMNMX_IMM: {
UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
const bool is_signed = instr.imnmx.is_signed;
const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::LEA_R2:
case OpCode::Id::LEA_R1:
case OpCode::Id::LEA_IMM:
case OpCode::Id::LEA_RZ:
case OpCode::Id::LEA_HI: {
const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::LEA_R2: {
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
}
case OpCode::Id::LEA_R1: {
const bool neg = instr.lea.r1.neg != 0;
return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
GetRegister(instr.gpr20),
Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
}
case OpCode::Id::LEA_IMM: {
const bool neg = instr.lea.imm.neg != 0;
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
}
case OpCode::Id::LEA_RZ: {
const bool neg = instr.lea.rz.neg != 0;
return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
}
case OpCode::Id::LEA_HI:
default:
UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
}
}();
UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
"Unhandled LEA Predicate");
const Node shifted_c =
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
SetRegister(bb, instr.gpr0, value);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
}
return pc;
}
void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
Node imm_lut, bool sets_cc) {
constexpr u32 lop_iterations = 32;
const Node one = Immediate(1);
const Node two = Immediate(2);
Node value{};
for (u32 i = 0; i < lop_iterations; ++i) {
const Node shift_amount = Immediate(i);
const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount);
const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one);
const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
const Node shifted_bit =
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
const Node right =
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
if (i > 0) {
value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
} else {
value = right;
}
}
SetInternalFlagsFromInteger(bb, value, sets_cc);
SetRegister(bb, dest, value);
}
} // namespace VideoCommon::Shader

View File

@@ -1,96 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::LogicOperation;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
using Tegra::Shader::PredicateResultMode;
using Tegra::Shader::Register;
u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
switch (opcode->get().GetId()) {
case OpCode::Id::IADD32I: {
UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::LOP32I: {
if (instr.alu.lop32i.invert_a)
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
if (instr.alu.lop32i.invert_b)
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
opcode->get().GetName());
}
return pc;
}
void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
Node op_a, Node op_b, PredicateResultMode predicate_mode,
Pred predicate, bool sets_cc) {
const Node result = [&]() {
switch (logic_op) {
case LogicOperation::And:
return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
case LogicOperation::Or:
return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
case LogicOperation::Xor:
return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
case LogicOperation::PassB:
return op_b;
default:
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
return Immediate(0);
}
}();
SetInternalFlagsFromInteger(bb, result, sets_cc);
SetRegister(bb, dest, result);
// Write the predicate value depending on the predicate mode.
switch (predicate_mode) {
case PredicateResultMode::None:
// Do nothing.
return;
case PredicateResultMode::NotZero: {
// Set the predicate to true if the result is not zero.
const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
SetPredicate(bb, static_cast<u64>(predicate), compare);
break;
}
default:
UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}",
static_cast<u32>(predicate_mode));
}
}
} // namespace VideoCommon::Shader

View File

@@ -1,49 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.bfe.negate_b);
Node op_a = GetRegister(instr.gpr8);
op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
switch (opcode->get().GetId()) {
case OpCode::Id::BFE_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in BFE is not implemented");
const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
const Node outer_shift_imm =
Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
const Node inner_shift =
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
const Node outer_shift =
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
SetRegister(bb, instr.gpr0, outer_shift);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,41 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::BFI_IMM_R:
return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())};
default:
UNREACHABLE();
return {Immediate(0), Immediate(0)};
}
}();
const Node insert = GetRegister(instr.gpr8);
const Node offset = BitfieldExtract(packed_shift, 0, 8);
const Node bits = BitfieldExtract(packed_shift, 8, 8);
const Node value =
Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,149 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::I2I_R: {
UNIMPLEMENTED_IF(instr.conversion.selector);
const bool input_signed = instr.conversion.is_input_signed;
const bool output_signed = instr.conversion.is_output_signed;
Node value = GetRegister(instr.gpr20);
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
input_signed);
if (input_signed != output_signed) {
value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
}
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C: {
UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.conversion.selector);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
Node value = [&]() {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
const bool input_signed = instr.conversion.is_input_signed;
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::F2F_R:
case OpCode::Id::F2F_C: {
UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in F2F is not implemented");
Node value = [&]() {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
value = [&]() {
switch (instr.conversion.f2f.rounding) {
case Tegra::Shader::F2fRoundingOp::None:
return value;
case Tegra::Shader::F2fRoundingOp::Round:
return Operation(OperationCode::FRoundEven, PRECISE, value);
case Tegra::Shader::F2fRoundingOp::Floor:
return Operation(OperationCode::FFloor, PRECISE, value);
case Tegra::Shader::F2fRoundingOp::Ceil:
return Operation(OperationCode::FCeil, PRECISE, value);
case Tegra::Shader::F2fRoundingOp::Trunc:
return Operation(OperationCode::FTrunc, PRECISE, value);
}
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
static_cast<u32>(instr.conversion.f2f.rounding.Value()));
return Immediate(0);
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::F2I_R:
case OpCode::Id::F2I_C: {
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in F2I is not implemented");
Node value = [&]() {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
value = [&]() {
switch (instr.conversion.f2i.rounding) {
case Tegra::Shader::F2iRoundingOp::None:
return value;
case Tegra::Shader::F2iRoundingOp::Floor:
return Operation(OperationCode::FFloor, PRECISE, value);
case Tegra::Shader::F2iRoundingOp::Ceil:
return Operation(OperationCode::FCeil, PRECISE, value);
case Tegra::Shader::F2iRoundingOp::Trunc:
return Operation(OperationCode::FTrunc, PRECISE, value);
default:
UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
static_cast<u32>(instr.conversion.f2i.rounding.Value()));
return Immediate(0);
}
}();
const bool is_signed = instr.conversion.is_output_signed;
value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed);
SetRegister(bb, instr.gpr0, value);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,59 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
instr.ffma.tab5980_1.Value());
const Node op_a = GetRegister(instr.gpr8);
auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::FFMA_CR: {
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
GetRegister(instr.gpr39)};
}
case OpCode::Id::FFMA_RR:
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::FFMA_RC: {
return {GetRegister(instr.gpr39),
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
}
case OpCode::Id::FFMA_IMM:
return {GetImmediate19(instr), GetRegister(instr.gpr39)};
default:
UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
return {Immediate(0), Immediate(0)};
}
}();
op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,58 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
instr.fset.neg_a != 0);
Node op_b = [&]() {
if (instr.is_b_imm) {
return GetImmediate19(instr);
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
// The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
// condition is true, and to 0 otherwise.
const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
const Node predicate = Operation(combiner, first_pred, second_pred);
const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
const Node value =
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
if (instr.fset.bf) {
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
} else {
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
}
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,56 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
instr.fsetp.neg_a != 0);
Node op_b = [&]() {
if (instr.is_b_imm) {
return GetImmediate19(instr);
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
// We can't use the constant predicate as destination.
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b);
const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
const Node value = Operation(combiner, predicate, second_pred);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(bb, instr.fsetp.pred3, value);
if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
// if enabled
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
const Node second_value = Operation(combiner, negated_pred, second_pred);
SetPredicate(bb, instr.fsetp.pred0, second_value);
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,67 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
// instr.hset2.type_a
// instr.hset2.type_b
Node op_a = GetRegister(instr.gpr8);
Node op_b = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HSET2_R:
return GetRegister(instr.gpr20);
default:
UNREACHABLE();
return Immediate(0);
}
}();
op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}};
const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b);
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
// HSET2 operates on each half float in the pack.
std::array<Node, 2> values;
for (u32 i = 0; i < 2; ++i) {
const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
const Node true_value = Immediate(raw_value << (i * 16));
const Node false_value = Immediate(0);
const Node comparison =
Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
const Node predicate = Operation(combiner, comparison, second_pred);
values[i] =
Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
}
const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,62 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
Node op_a = GetRegister(instr.gpr8);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
const Node op_b = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HSETP2_R:
return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
instr.hsetp2.negate_b);
default:
UNREACHABLE();
return Immediate(0);
}
}();
// We can't use the constant predicate as destination.
ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
const OperationCode pair_combiner =
instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}};
const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
const Node first_pred = Operation(pair_combiner, comparison);
// Set the primary predicate to the result of Predicate OP SecondPredicate
const Node value = Operation(combiner, first_pred, second_pred);
SetPredicate(bb, instr.hsetp2.pred3, value);
if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,76 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::HalfPrecision;
using Tegra::Shader::HalfType;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
} else {
UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
}
constexpr auto identity = HalfType::H0_H1;
const HalfType type_a = instr.hfma2.type_a;
const Node op_a = GetRegister(instr.gpr8);
bool neg_b{}, neg_c{};
auto [saturate, type_b, op_b, type_c,
op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::HFMA2_CR:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_b,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_RC:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
case OpCode::Id::HFMA2_RR:
neg_b = instr.hfma2.rr.negate_b;
neg_c = instr.hfma2.rr.negate_c;
return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_IMM_R:
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
default:
return {false, identity, Immediate(0), identity, Immediate(0)};
}
}();
UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,50 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = [&]() {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
// The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
// is true, and to 0 otherwise.
const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
const Node first_pred =
GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
const Node predicate = Operation(combiner, first_pred, second_pred);
const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
const Node value =
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,53 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = [&]() {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
// We can't use the constant predicate as destination.
ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
const Node predicate =
GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
const Node value = Operation(combiner, predicate, second_pred);
SetPredicate(bb, instr.isetp.pred3, value);
if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,737 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <vector>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
using Tegra::Shader::TextureMiscMode;
using Tegra::Shader::TextureProcessMode;
using Tegra::Shader::TextureType;
static std::size_t GetCoordCount(TextureType texture_type) {
switch (texture_type) {
case TextureType::Texture1D:
return 1;
case TextureType::Texture2D:
return 2;
case TextureType::Texture3D:
case TextureType::TextureCube:
return 3;
default:
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
return 0;
}
}
u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::LD_A: {
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
"Indirect attribute loads are not supported");
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
Tegra::Shader::IpaSampleMode::Default};
u64 next_element = instr.attribute.fmt20.element;
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto LoadNextElement = [&](u32 reg_offset) {
const Node buffer = GetRegister(instr.gpr39);
const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
next_element, input_mode, buffer);
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
// attribute.
next_element = (next_element + 1) % 4;
next_index = next_index + (next_element == 0 ? 1 : 0);
};
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
LoadNextElement(reg_offset);
}
break;
}
case OpCode::Id::LD_C: {
UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
Node index = GetRegister(instr.gpr8);
const Node op_a =
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index);
switch (instr.ld_c.type.Value()) {
case Tegra::Shader::UniformType::Single:
SetRegister(bb, instr.gpr0, op_a);
break;
case Tegra::Shader::UniformType::Double: {
const Node op_b =
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index);
SetTemporal(bb, 0, op_a);
SetTemporal(bb, 1, op_b);
SetRegister(bb, instr.gpr0, GetTemporal(0));
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value()));
}
break;
}
case OpCode::Id::LD_L: {
UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
static_cast<unsigned>(instr.ld_l.unknown.Value()));
const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8),
Immediate(static_cast<s32>(instr.smem_imm)));
const Node lmem = GetLocalMemory(index);
switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bytes32:
SetRegister(bb, instr.gpr0, lmem);
break;
default:
UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
static_cast<unsigned>(instr.ldst_sl.type.Value()));
}
break;
}
case OpCode::Id::LDG: {
const u32 count = [&]() {
switch (instr.ldg.type) {
case Tegra::Shader::UniformType::Single:
return 1;
case Tegra::Shader::UniformType::Double:
return 2;
case Tegra::Shader::UniformType::Quad:
case Tegra::Shader::UniformType::UnsignedQuad:
return 4;
default:
UNIMPLEMENTED_MSG("Unimplemented LDG size!");
return 1;
}
}();
const Node addr_register = GetRegister(instr.gpr8);
const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
const auto cbuf = std::get_if<CbufNode>(base_address);
ASSERT(cbuf != nullptr);
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
ASSERT(cbuf_offset_imm != nullptr);
const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4;
bb.push_back(Comment(
fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
used_global_memory_bases.insert(descriptor);
const Node immediate_offset =
Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
const Node base_real_address =
Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
SetTemporal(bb, i, gmem);
}
for (u32 i = 0; i < count; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
break;
}
case OpCode::Id::ST_A: {
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
"Indirect attribute loads are not supported");
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
u64 next_element = instr.attribute.fmt20.element;
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto StoreNextElement = [&](u32 reg_offset) {
const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
next_element, GetRegister(instr.gpr39));
const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
bb.push_back(Operation(OperationCode::Assign, dest, src));
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
// attribute.
next_element = (next_element + 1) % 4;
next_index = next_index + (next_element == 0 ? 1 : 0);
};
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
StoreNextElement(reg_offset);
}
break;
}
case OpCode::Id::ST_L: {
UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
static_cast<u32>(instr.st_l.unknown.Value()));
const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
Immediate(static_cast<s32>(instr.smem_imm)));
switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bytes32:
SetLocalMemory(bb, index, GetRegister(instr.gpr0));
break;
default:
UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
static_cast<u32>(instr.ldst_sl.type.Value()));
}
break;
}
case OpCode::Id::TEX: {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
}
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
break;
}
case OpCode::Id::TEXS: {
const TextureType texture_type{instr.texs.GetTextureType()};
const bool is_array{instr.texs.IsArrayTexture()};
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
}
const Node4 components =
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
if (instr.texs.fp32_flag) {
WriteTexsInstructionFloat(bb, instr, components);
} else {
WriteTexsInstructionHalfFloat(bb, instr, components);
}
break;
}
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
"NDV is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
"PTP is not implemented");
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
}
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
WriteTexInstructionFloat(bb, instr,
GetTld4Code(instr, texture_type, depth_compare, is_array));
break;
}
case OpCode::Id::TLD4S: {
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
}
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = GetRegister(instr.gpr20);
std::vector<Node> coords;
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
if (depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
coords.push_back(op_a);
coords.push_back(op_y);
coords.push_back(op_b);
} else {
coords.push_back(op_a);
coords.push_back(op_b);
}
const auto num_coords = static_cast<u32>(coords.size());
coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
const auto& sampler =
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto params = coords;
MetaTexture meta{sampler, element, num_coords};
values[element] =
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
}
WriteTexsInstructionFloat(bb, instr, values);
break;
}
case OpCode::Id::TXQ: {
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
}
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
switch (instr.txq.query_type) {
case Tegra::Shader::TextureQueryType::Dimension: {
for (u32 element = 0; element < 4; ++element) {
MetaTexture meta{sampler, element};
const Node value = Operation(OperationCode::F4TextureQueryDimensions,
std::move(meta), GetRegister(instr.gpr8));
SetTemporal(bb, element, value);
}
for (u32 i = 0; i < 4; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
static_cast<u32>(instr.txq.query_type.Value()));
}
break;
}
case OpCode::Id::TMML: {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
}
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
std::vector<Node> coords;
// TODO: Add coordinates for different samplers once other texture types are implemented.
switch (texture_type) {
case TextureType::Texture1D:
coords.push_back(GetRegister(instr.gpr8));
break;
case TextureType::Texture2D:
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
break;
default:
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
// Fallback to interpreting as a 2D texture for now
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
texture_type = TextureType::Texture2D;
}
for (u32 element = 0; element < 2; ++element) {
auto params = coords;
MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
const Node value =
Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
SetTemporal(bb, element, value);
}
for (u32 element = 0; element < 2; ++element) {
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
}
break;
}
case OpCode::Id::TLDS: {
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
}
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
}
return pc;
}
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
bool is_array, bool is_shadow) {
const auto offset = static_cast<std::size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
const auto itr =
std::find_if(used_samplers.begin(), used_samplers.end(),
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
if (itr != used_samplers.end()) {
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
itr->IsShadow() == is_shadow);
return *itr;
}
// Otherwise create a new mapping for this sampler
const std::size_t next_index = used_samplers.size();
const Sampler entry{offset, next_index, type, is_array, is_shadow};
return *used_samplers.emplace(entry).first;
}
void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
const Node4& components) {
u32 dest_elem = 0;
for (u32 elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
SetTemporal(bb, dest_elem++, components[elem]);
}
// After writing values in temporals, move them to the real registers
for (u32 i = 0; i < dest_elem; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
}
void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
const Node4& components) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component))
continue;
SetTemporal(bb, dest_elem++, components[component]);
}
for (u32 i = 0; i < dest_elem; ++i) {
if (i < 2) {
// Write the first two swizzle components to gpr0 and gpr0+1
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
} else {
ASSERT(instr.texs.HasTwoDestinations());
// Write the rest of the swizzle components to gpr28 and gpr28+1
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
}
}
}
void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
const Node4& components) {
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
// float instruction).
Node4 values;
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component))
continue;
values[dest_elem++] = components[component];
}
if (dest_elem == 0)
return;
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
if (dest_elem <= 2) {
SetRegister(bb, instr.gpr0, first_value);
return;
}
SetTemporal(bb, 0, first_value);
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
SetRegister(bb, instr.gpr0, GetTemporal(0));
SetRegister(bb, instr.gpr28, GetTemporal(1));
}
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array,
std::size_t array_offset, std::size_t bias_offset,
std::vector<Node>&& coords) {
UNIMPLEMENTED_IF_MSG(
(texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
(texture_type == TextureType::TextureCube && is_array && depth_compare),
"This method is not supported.");
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
process_mode == TextureProcessMode::LLA;
// LOD selection (either via bias or explicit textureLod) not supported in GL for
// sampler2DArrayShadow and samplerCubeArrayShadow.
const bool gl_lod_supported =
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
const OperationCode read_method =
lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
std::optional<u32> array_offset_value;
if (is_array)
array_offset_value = static_cast<u32>(array_offset);
const auto coords_count = static_cast<u32>(coords.size());
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
if (process_mode == TextureProcessMode::LZ) {
coords.push_back(Immediate(0.0f));
} else {
// If present, lod or bias are always stored in the register indexed by the gpr20
// field with an offset depending on the usage of the other registers
coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
}
}
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto params = coords;
MetaTexture meta{sampler, element, coords_count, array_offset_value};
values[element] = Operation(read_method, std::move(meta), std::move(params));
}
return values;
}
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
const bool lod_bias_enabled =
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
}
// 1D.DC in opengl the 2nd component is ignored.
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
coords.push_back(Immediate(0.0f));
}
std::size_t array_offset{};
if (is_array) {
array_offset = coords.size();
coords.push_back(GetRegister(array_register));
}
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20
// or in the next register if lod or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
coords.push_back(GetRegister(depth_register));
}
// Fill ignored coordinates
while (coords.size() < total_coord_count) {
coords.push_back(Immediate(0));
}
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
0, std::move(coords));
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
const bool lod_bias_enabled =
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
const u64 last_coord_register =
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
? static_cast<u64>(instr.gpr20.Value())
: coord_register + 1;
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
std::size_t array_offset{};
if (is_array) {
array_offset = coords.size();
coords.push_back(GetRegister(array_register));
}
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20
// or in the next register if lod or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
coords.push_back(GetRegister(depth_register));
}
// Fill ignored coordinates
while (coords.size() < total_coord_count) {
coords.push_back(Immediate(0));
}
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
(coord_count > 2 ? 1 : 0), std::move(coords));
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
bool is_array) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
for (size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
}
std::optional<u32> array_offset;
if (is_array) {
array_offset = static_cast<u32>(coords.size());
coords.push_back(GetRegister(array_register));
}
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto params = coords;
MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
values[element] =
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
}
return values;
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
const std::size_t type_coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// if is array gpr20 is used
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
const u64 last_coord_register =
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
? static_cast<u64>(instr.gpr20.Value())
: coord_register + 1;
std::vector<Node> coords;
for (std::size_t i = 0; i < type_coord_count; ++i) {
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
std::optional<u32> array_offset;
if (is_array) {
array_offset = static_cast<u32>(coords.size());
coords.push_back(GetRegister(array_register));
}
const auto coords_count = static_cast<u32>(coords.size());
if (lod_enabled) {
// When lod is used always is in grp20
coords.push_back(GetRegister(instr.gpr20));
} else {
coords.push_back(Immediate(0));
}
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto params = coords;
MetaTexture meta{sampler, element, coords_count, array_offset};
values[element] =
Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
}
return values;
}
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
std::size_t max_coords, std::size_t max_inputs) {
const std::size_t coord_count = GetCoordCount(texture_type);
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
UNIMPLEMENTED_MSG("Unsupported Texture operation");
total_coord_count = std::min(total_coord_count, max_coords);
}
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
total_coord_count +=
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
return {coord_count, total_coord_count};
}
} // namespace VideoCommon::Shader

View File

@@ -1,178 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::ConditionCode;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
static_cast<u32>(cc));
switch (instr.flow.cond) {
case Tegra::Shader::FlowCondition::Always:
bb.push_back(Operation(OperationCode::Exit));
if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) {
// If this is an unconditional exit then just end processing here,
// otherwise we have to account for the possibility of the condition
// not being met, so continue processing the next instruction.
pc = MAX_PROGRAM_LENGTH - 1;
}
break;
case Tegra::Shader::FlowCondition::Fcsm_Tr:
// TODO(bunnei): What is this used for? If we assume this conditon is not
// satisifed, dual vertex shaders in Farming Simulator make more sense
UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
break;
default:
UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
static_cast<u32>(instr.flow.cond.Value()));
}
break;
}
case OpCode::Id::KIL: {
UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}",
static_cast<u32>(cc));
bb.push_back(Operation(OperationCode::Discard));
break;
}
case OpCode::Id::MOV_SYS: {
switch (instr.sys20) {
case Tegra::Shader::SystemVariable::InvocationInfo: {
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
SetRegister(bb, instr.gpr0, Immediate(0u));
break;
}
case Tegra::Shader::SystemVariable::Ydirection: {
// Config pack's third value is Y_NEGATE's state.
SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate));
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value()));
}
break;
}
case OpCode::Id::BRA: {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"BRA with constant buffers are not implemented");
const u32 target = pc + instr.bra.GetBranchTarget();
const Node branch = Operation(OperationCode::Branch, Immediate(target));
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
if (cc != Tegra::Shader::ConditionCode::T) {
bb.push_back(Conditional(GetConditionCode(cc), {branch}));
} else {
bb.push_back(branch);
}
break;
}
case OpCode::Id::SSY: {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer flow is not supported");
// The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the
// target of the jump that the SYNC instruction will make. The SSY opcode has a similar
// structure to the BRA opcode.
const u32 target = pc + instr.bra.GetBranchTarget();
bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
break;
}
case OpCode::Id::PBK: {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer PBK is not supported");
// PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but
// using SYNC on a PBK address will kill the shader execution. We don't emulate this because
// it's very unlikely a driver will emit such invalid shader.
const u32 target = pc + instr.bra.GetBranchTarget();
bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
break;
}
case OpCode::Id::SYNC: {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
static_cast<u32>(cc));
// The SYNC opcode jumps to the address previously set by the SSY opcode
bb.push_back(Operation(OperationCode::PopFlowStack));
break;
}
case OpCode::Id::BRK: {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
static_cast<u32>(cc));
// The BRK opcode jumps to the address previously set by the PBK opcode
bb.push_back(Operation(OperationCode::PopFlowStack));
break;
}
case OpCode::Id::IPA: {
const auto& attribute = instr.attribute.fmt28;
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
instr.ipa.sample_mode.Value()};
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::OUT_R: {
UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
"Stream buffer is not supported");
if (instr.out.emit) {
// gpr0 is used to store the next address and gpr8 contains the address to emit.
// Hardware uses pointers here but we just ignore it
bb.push_back(Operation(OperationCode::EmitVertex));
SetRegister(bb, instr.gpr0, Immediate(0));
}
if (instr.out.cut) {
bb.push_back(Operation(OperationCode::EndPrimitive));
}
break;
}
case OpCode::Id::ISBERD: {
UNIMPLEMENTED_IF(instr.isberd.o != 0);
UNIMPLEMENTED_IF(instr.isberd.skew != 0);
UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
break;
}
case OpCode::Id::DEPBAR: {
LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,67 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::PSETP: {
const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
// We can't use the constant predicate as destination.
ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
const Node predicate = Operation(combiner, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
// enabled
SetPredicate(bb, instr.psetp.pred0,
Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
second_pred));
}
break;
}
case OpCode::Id::CSETP: {
const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
const Node condition_code = GetConditionCode(instr.csetp.cc);
const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
}
if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
}
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

Some files were not shown because too many files have changed in this diff Show More