Compare commits
110 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5980aa1e51 | ||
|
|
f601f25bcc | ||
|
|
27e10e0442 | ||
|
|
6738fb5fef | ||
|
|
0a67416971 | ||
|
|
369be67039 | ||
|
|
aa599ac709 | ||
|
|
a2edb27158 | ||
|
|
f470bcb826 | ||
|
|
7a35178ee2 | ||
|
|
45c162444d | ||
|
|
6c4985edc9 | ||
|
|
5d369112d9 | ||
|
|
63bda67a34 | ||
|
|
d4b95bfc25 | ||
|
|
5e457bf258 | ||
|
|
4be61013a1 | ||
|
|
5ad889f6fd | ||
|
|
7826f0afd9 | ||
|
|
8cdbfe69b1 | ||
|
|
0ff4a5fa39 | ||
|
|
fec32fed18 | ||
|
|
a081dea8ab | ||
|
|
0d3db58657 | ||
|
|
f2e7b29c14 | ||
|
|
223a535f3f | ||
|
|
c3218c110f | ||
|
|
bebbdc2067 | ||
|
|
60926ac16b | ||
|
|
44d87ff641 | ||
|
|
b56e7f870a | ||
|
|
e2d7dda166 | ||
|
|
2a4044a858 | ||
|
|
6b0d017675 | ||
|
|
56bca83bde | ||
|
|
bbecd13697 | ||
|
|
725ba6cf63 | ||
|
|
1bdb59fc6e | ||
|
|
b77a1ed67a | ||
|
|
afa8096df5 | ||
|
|
3477b92289 | ||
|
|
2ac7472d3f | ||
|
|
0f54b541f4 | ||
|
|
5818959e54 | ||
|
|
913b7a6872 | ||
|
|
a9943222f2 | ||
|
|
5c1e1a148e | ||
|
|
5d31bab69a | ||
|
|
4882c058fd | ||
|
|
0ec9da2f9f | ||
|
|
b9e1db1312 | ||
|
|
bbc5b5d62d | ||
|
|
093e5440e2 | ||
|
|
4d82158274 | ||
|
|
ea5602b959 | ||
|
|
31fe859fe5 | ||
|
|
73b37886c1 | ||
|
|
d4fc560c05 | ||
|
|
bb67091c77 | ||
|
|
0eb0c24269 | ||
|
|
79c382fafd | ||
|
|
521fb325aa | ||
|
|
2a94745500 | ||
|
|
8fc806e88a | ||
|
|
70624e1c1d | ||
|
|
072a9796f5 | ||
|
|
aca40de224 | ||
|
|
f2549739d1 | ||
|
|
a1845d1dd3 | ||
|
|
2de7649311 | ||
|
|
e7c6045a03 | ||
|
|
dc4a93594c | ||
|
|
e7a88f0ab3 | ||
|
|
34357b110c | ||
|
|
cfb3db1a32 | ||
|
|
d45fed3030 | ||
|
|
01b21ee1e8 | ||
|
|
d5533b440c | ||
|
|
926b80102f | ||
|
|
459fce3a8f | ||
|
|
8a6fc529a9 | ||
|
|
c218ae4b02 | ||
|
|
8af6e6a052 | ||
|
|
697206092e | ||
|
|
c9d886c84e | ||
|
|
ca6f08e3b1 | ||
|
|
ce64a9fab9 | ||
|
|
b901cd584e | ||
|
|
d0966b9f7c | ||
|
|
1689784c19 | ||
|
|
13a8fde3ad | ||
|
|
56c7912159 | ||
|
|
eb6f55d880 | ||
|
|
79a23ca5f0 | ||
|
|
83050c9495 | ||
|
|
f7691ebe57 | ||
|
|
7ecf64257a | ||
|
|
9cdc576f60 | ||
|
|
1fa21fa192 | ||
|
|
32c0212b24 | ||
|
|
2bcae41a73 | ||
|
|
02ab844934 | ||
|
|
d14fbfb9b5 | ||
|
|
345f852bdb | ||
|
|
8155b12d3d | ||
|
|
f8ba72d491 | ||
|
|
b54fb8fc4c | ||
|
|
a6d2f52fc3 | ||
|
|
2b9d4088ec | ||
|
|
2e39c20da5 |
15
.ci/scripts/common/post-upload.sh
Normal file
15
.ci/scripts/common/post-upload.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
# Copy documentation
|
||||
cp license.txt "$REV_NAME"
|
||||
cp README.md "$REV_NAME"
|
||||
|
||||
tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$REV_NAME"
|
||||
|
||||
mv "$REV_NAME" $RELEASE_NAME
|
||||
|
||||
7z a "$REV_NAME.7z" $RELEASE_NAME
|
||||
|
||||
# move the compiled archive into the artifacts directory to be uploaded by travis releases
|
||||
mv "$ARCHIVE_NAME" artifacts/
|
||||
mv "$REV_NAME.7z" artifacts/
|
||||
6
.ci/scripts/common/pre-upload.sh
Normal file
6
.ci/scripts/common/pre-upload.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`"
|
||||
GITREV="`git show -s --format='%h'`"
|
||||
|
||||
mkdir -p artifacts
|
||||
6
.ci/scripts/format/docker.sh
Normal file
6
.ci/scripts/format/docker.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
# Run clang-format
|
||||
cd /yuzu
|
||||
chmod a+x ./.ci/scripts/format/script.sh
|
||||
./.ci/scripts/format/script.sh
|
||||
4
.ci/scripts/format/exec.sh
Normal file
4
.ci/scripts/format/exec.sh
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
chmod a+x ./.ci/scripts/format/docker.sh
|
||||
docker run -v $(pwd):/yuzu yuzuemu/build-environments:linux-clang-format /bin/bash -ex /yuzu/.ci/scripts/format/docker.sh
|
||||
37
.ci/scripts/format/script.sh
Normal file
37
.ci/scripts/format/script.sh
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dist/*.desktop \
|
||||
dist/*.svg dist/*.xml; then
|
||||
echo Trailing whitespace found, aborting
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Default clang-format points to default 3.5 version one
|
||||
CLANG_FORMAT=clang-format-6.0
|
||||
$CLANG_FORMAT --version
|
||||
|
||||
if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then
|
||||
# Get list of every file modified in this pull request
|
||||
files_to_lint="$(git diff --name-only --diff-filter=ACMRTUXB $TRAVIS_COMMIT_RANGE | grep '^src/[^.]*[.]\(cpp\|h\)$' || true)"
|
||||
else
|
||||
# Check everything for branch pushes
|
||||
files_to_lint="$(find src/ -name '*.cpp' -or -name '*.h')"
|
||||
fi
|
||||
|
||||
# Turn off tracing for this because it's too verbose
|
||||
set +x
|
||||
|
||||
for f in $files_to_lint; do
|
||||
d=$(diff -u "$f" <($CLANG_FORMAT "$f") || true)
|
||||
if ! [ -z "$d" ]; then
|
||||
echo "!!! $f not compliant to coding style, here is the fix:"
|
||||
echo "$d"
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
set -x
|
||||
|
||||
if [ "$fail" = 1 ]; then
|
||||
exit 1
|
||||
fi
|
||||
14
.ci/scripts/linux/docker.sh
Normal file
14
.ci/scripts/linux/docker.sh
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
cd /yuzu
|
||||
|
||||
ccache -s
|
||||
|
||||
mkdir build || true && cd build
|
||||
cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
|
||||
|
||||
ninja
|
||||
|
||||
ccache -s
|
||||
|
||||
ctest -VV -C Release
|
||||
5
.ci/scripts/linux/exec.sh
Normal file
5
.ci/scripts/linux/exec.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
mkdir -p "ccache" || true
|
||||
chmod a+x ./.ci/scripts/linux/docker.sh
|
||||
docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/linux/docker.sh
|
||||
14
.ci/scripts/linux/upload.sh
Normal file
14
.ci/scripts/linux/upload.sh
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
. .ci/scripts/common/pre-upload.sh
|
||||
|
||||
REV_NAME="yuzu-linux-${GITDATE}-${GITREV}"
|
||||
ARCHIVE_NAME="${REV_NAME}.tar.xz"
|
||||
COMPRESSION_FLAGS="-cJvf"
|
||||
|
||||
mkdir "$REV_NAME"
|
||||
|
||||
cp build/bin/yuzu-cmd "$REV_NAME"
|
||||
cp build/bin/yuzu "$REV_NAME"
|
||||
|
||||
. .ci/scripts/common/post-upload.sh
|
||||
28
.ci/scripts/merge/apply-patches-by-label.py
Normal file
28
.ci/scripts/merge/apply-patches-by-label.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# Download all pull requests as patches that match a specific label
|
||||
# Usage: python download-patches-by-label.py <Label to Match> <Root Path Folder to DL to>
|
||||
|
||||
import requests, sys, json, urllib3.request, shutil, subprocess
|
||||
|
||||
http = urllib3.PoolManager()
|
||||
dl_list = {}
|
||||
|
||||
def check_individual(labels):
|
||||
for label in labels:
|
||||
if (label["name"] == sys.argv[1]):
|
||||
return True
|
||||
return False
|
||||
|
||||
try:
|
||||
url = 'https://api.github.com/repos/yuzu-emu/yuzu/pulls'
|
||||
response = requests.get(url)
|
||||
if (response.ok):
|
||||
j = json.loads(response.content)
|
||||
for pr in j:
|
||||
if (check_individual(pr["labels"])):
|
||||
pn = pr["number"]
|
||||
print("Matched PR# %s" % pn)
|
||||
print(subprocess.check_output(["git", "fetch", "https://github.com/yuzu-emu/yuzu.git", "pull/%s/head:pr-%s" % (pn, pn), "-f"]))
|
||||
print(subprocess.check_output(["git", "merge", "--squash", "pr-%s" % pn]))
|
||||
print(subprocess.check_output(["git", "commit", "-m\"Merge PR %s\"" % pn]))
|
||||
except:
|
||||
sys.exit(-1)
|
||||
18
.ci/scripts/merge/check-label-presence.py
Normal file
18
.ci/scripts/merge/check-label-presence.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# Checks to see if the specified pull request # has the specified tag
|
||||
# Usage: python check-label-presence.py <Pull Request ID> <Name of Label>
|
||||
|
||||
import requests, json, sys
|
||||
|
||||
try:
|
||||
url = 'https://api.github.com/repos/yuzu-emu/yuzu/issues/%s' % sys.argv[1]
|
||||
response = requests.get(url)
|
||||
if (response.ok):
|
||||
j = json.loads(response.content)
|
||||
for label in j["labels"]:
|
||||
if label["name"] == sys.argv[2]:
|
||||
print('##vso[task.setvariable variable=enabletesting;]true')
|
||||
sys.exit()
|
||||
except:
|
||||
sys.exit(-1)
|
||||
|
||||
print('##vso[task.setvariable variable=enabletesting;]false')
|
||||
2
.ci/scripts/merge/yuzubot-git-config.sh
Normal file
2
.ci/scripts/merge/yuzubot-git-config.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
git config --global user.email "yuzu@yuzu-emu.org"
|
||||
git config --global user.name "yuzubot"
|
||||
50
.ci/scripts/windows/docker.sh
Normal file
50
.ci/scripts/windows/docker.sh
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
cd /yuzu
|
||||
|
||||
ccache -s
|
||||
|
||||
# Dirty hack to trick unicorn makefile into believing we are in a MINGW system
|
||||
mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname
|
||||
chmod +x /bin/uname
|
||||
|
||||
# Dirty hack to trick unicorn makefile into believing we have cmd
|
||||
echo '' >> /bin/cmd
|
||||
chmod +x /bin/cmd
|
||||
|
||||
mkdir build || true && cd build
|
||||
cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
|
||||
ninja
|
||||
|
||||
# Clean up the dirty hacks
|
||||
rm /bin/uname && mv /bin/uname1 /bin/uname
|
||||
rm /bin/cmd
|
||||
|
||||
ccache -s
|
||||
|
||||
echo "Tests skipped"
|
||||
#ctest -VV -C Release
|
||||
|
||||
echo 'Prepare binaries...'
|
||||
cd ..
|
||||
mkdir package
|
||||
|
||||
QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/'
|
||||
find build/ -name "yuzu*.exe" -exec cp {} 'package' \;
|
||||
|
||||
# copy Qt plugins
|
||||
mkdir package/platforms
|
||||
cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/
|
||||
cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/
|
||||
cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/
|
||||
rm -f package/mediaservice/*d.dll
|
||||
|
||||
for i in package/*.exe; do
|
||||
# we need to process pdb here, however, cv2pdb
|
||||
# does not work here, so we just simply strip all the debug symbols
|
||||
x86_64-w64-mingw32-strip "${i}"
|
||||
done
|
||||
|
||||
pip3 install pefile
|
||||
python3 .ci/scripts/windows/scan_dll.py package/*.exe "package/"
|
||||
python3 .ci/scripts/windows/scan_dll.py package/imageformats/*.dll "package/"
|
||||
5
.ci/scripts/windows/exec.sh
Normal file
5
.ci/scripts/windows/exec.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
mkdir -p "ccache" || true
|
||||
chmod a+x ./.ci/scripts/windows/docker.sh
|
||||
docker run -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.ci/scripts/windows/docker.sh
|
||||
106
.ci/scripts/windows/scan_dll.py
Normal file
106
.ci/scripts/windows/scan_dll.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import pefile
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
import queue
|
||||
import shutil
|
||||
|
||||
# constant definitions
|
||||
KNOWN_SYS_DLLS = ['WINMM.DLL', 'MSVCRT.DLL', 'VERSION.DLL', 'MPR.DLL',
|
||||
'DWMAPI.DLL', 'UXTHEME.DLL', 'DNSAPI.DLL', 'IPHLPAPI.DLL']
|
||||
# below is for Ubuntu 18.04 with specified PPA enabled, if you are using
|
||||
# other distro or different repositories, change the following accordingly
|
||||
DLL_PATH = [
|
||||
'/usr/x86_64-w64-mingw32/bin/',
|
||||
'/usr/x86_64-w64-mingw32/lib/',
|
||||
'/usr/lib/gcc/x86_64-w64-mingw32/7.3-posix/'
|
||||
]
|
||||
|
||||
missing = []
|
||||
|
||||
|
||||
def parse_imports(file_name):
|
||||
results = []
|
||||
pe = pefile.PE(file_name, fast_load=True)
|
||||
pe.parse_data_directories()
|
||||
|
||||
for entry in pe.DIRECTORY_ENTRY_IMPORT:
|
||||
current = entry.dll.decode()
|
||||
current_u = current.upper() # b/c Windows is often case insensitive
|
||||
# here we filter out system dlls
|
||||
# dll w/ names like *32.dll are likely to be system dlls
|
||||
if current_u.upper() not in KNOWN_SYS_DLLS and not re.match(string=current_u, pattern=r'.*32\.DLL'):
|
||||
results.append(current)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def parse_imports_recursive(file_name, path_list=[]):
|
||||
q = queue.Queue() # create a FIFO queue
|
||||
# file_name can be a string or a list for the convience
|
||||
if isinstance(file_name, str):
|
||||
q.put(file_name)
|
||||
elif isinstance(file_name, list):
|
||||
for i in file_name:
|
||||
q.put(i)
|
||||
full_list = []
|
||||
while q.qsize():
|
||||
current = q.get_nowait()
|
||||
print('> %s' % current)
|
||||
deps = parse_imports(current)
|
||||
# if this dll does not have any import, ignore it
|
||||
if not deps:
|
||||
continue
|
||||
for dep in deps:
|
||||
# the dependency already included in the list, skip
|
||||
if dep in full_list:
|
||||
continue
|
||||
# find the requested dll in the provided paths
|
||||
full_path = find_dll(dep)
|
||||
if not full_path:
|
||||
missing.append(dep)
|
||||
continue
|
||||
full_list.append(dep)
|
||||
q.put(full_path)
|
||||
path_list.append(full_path)
|
||||
return full_list
|
||||
|
||||
|
||||
def find_dll(name):
|
||||
for path in DLL_PATH:
|
||||
for root, _, files in os.walk(path):
|
||||
for f in files:
|
||||
if name.lower() == f.lower():
|
||||
return os.path.join(root, f)
|
||||
|
||||
|
||||
def deploy(name, dst, dry_run=False):
|
||||
dlls_path = []
|
||||
parse_imports_recursive(name, dlls_path)
|
||||
for dll_entry in dlls_path:
|
||||
if not dry_run:
|
||||
shutil.copy(dll_entry, dst)
|
||||
else:
|
||||
print('[Dry-Run] Copy %s to %s' % (dll_entry, dst))
|
||||
print('Deploy completed.')
|
||||
return dlls_path
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
print('Usage: %s [files to examine ...] [target deploy directory]')
|
||||
return 1
|
||||
to_deploy = sys.argv[1:-1]
|
||||
tgt_dir = sys.argv[-1]
|
||||
if not os.path.isdir(tgt_dir):
|
||||
print('%s is not a directory.' % tgt_dir)
|
||||
return 1
|
||||
print('Scanning dependencies...')
|
||||
deploy(to_deploy, tgt_dir)
|
||||
if missing:
|
||||
print('Following DLLs are not found: %s' % ('\n'.join(missing)))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
13
.ci/scripts/windows/upload.sh
Normal file
13
.ci/scripts/windows/upload.sh
Normal file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
. .ci/scripts/common/pre-upload.sh
|
||||
|
||||
REV_NAME="yuzu-windows-mingw-${GITDATE}-${GITREV}"
|
||||
ARCHIVE_NAME="${REV_NAME}.tar.gz"
|
||||
COMPRESSION_FLAGS="-czvf"
|
||||
|
||||
mkdir "$REV_NAME"
|
||||
# get around the permission issues
|
||||
cp -r package/* "$REV_NAME"
|
||||
|
||||
. .ci/scripts/common/post-upload.sh
|
||||
21
.ci/templates/build-single.yml
Normal file
21
.ci/templates/build-single.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
parameters:
|
||||
artifactSource: 'true'
|
||||
|
||||
steps:
|
||||
- task: DockerInstaller@0
|
||||
displayName: 'Prepare Environment'
|
||||
inputs:
|
||||
dockerVersion: '17.09.0-ce'
|
||||
- task: CacheBeta@0
|
||||
displayName: 'Cache Build System'
|
||||
inputs:
|
||||
key: yuzu-v1-$(BuildName)-$(BuildSuffix)-$(CacheSuffix)
|
||||
path: $(System.DefaultWorkingDirectory)/ccache
|
||||
cacheHitVar: CACHE_RESTORED
|
||||
- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/exec.sh && ./.ci/scripts/$(ScriptFolder)/exec.sh
|
||||
displayName: 'Build'
|
||||
- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && ./.ci/scripts/$(ScriptFolder)/upload.sh
|
||||
displayName: 'Package Artifacts'
|
||||
- publish: artifacts
|
||||
artifact: 'yuzu-$(BuildName)-$(BuildSuffix)'
|
||||
displayName: 'Upload Artifacts'
|
||||
22
.ci/templates/build-standard.yml
Normal file
22
.ci/templates/build-standard.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
jobs:
|
||||
- job: build
|
||||
displayName: 'standard'
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
strategy:
|
||||
maxParallel: 10
|
||||
matrix:
|
||||
windows:
|
||||
BuildSuffix: 'windows-mingw'
|
||||
ScriptFolder: 'windows'
|
||||
linux:
|
||||
BuildSuffix: 'linux'
|
||||
ScriptFolder: 'linux'
|
||||
steps:
|
||||
- template: ./sync-source.yml
|
||||
parameters:
|
||||
artifactSource: $(parameters.artifactSource)
|
||||
needSubmodules: 'true'
|
||||
- template: ./build-single.yml
|
||||
parameters:
|
||||
artifactSource: 'false'
|
||||
30
.ci/templates/build-testing.yml
Normal file
30
.ci/templates/build-testing.yml
Normal file
@@ -0,0 +1,30 @@
|
||||
jobs:
|
||||
- job: build_test
|
||||
displayName: 'testing'
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
strategy:
|
||||
maxParallel: 10
|
||||
matrix:
|
||||
windows:
|
||||
BuildSuffix: 'windows-testing'
|
||||
ScriptFolder: 'windows'
|
||||
steps:
|
||||
- task: PythonScript@0
|
||||
condition: eq(variables['Build.Reason'], 'PullRequest')
|
||||
displayName: 'Determine Testing Status'
|
||||
inputs:
|
||||
scriptSource: 'filePath'
|
||||
scriptPath: '../scripts/merge/check-label-presence.py'
|
||||
arguments: '$(System.PullRequest.PullRequestNumber) create-testing-build'
|
||||
- ${{ if eq(variables.enabletesting, 'true') }}:
|
||||
- template: ./sync-source.yml
|
||||
parameters:
|
||||
artifactSource: $(parameters.artifactSource)
|
||||
needSubmodules: 'true'
|
||||
- template: ./mergebot.yml
|
||||
parameters:
|
||||
matchLabel: 'testing-merge'
|
||||
- template: ./build-single.yml
|
||||
parameters:
|
||||
artifactSource: 'false'
|
||||
14
.ci/templates/format-check.yml
Normal file
14
.ci/templates/format-check.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
parameters:
|
||||
artifactSource: 'true'
|
||||
|
||||
steps:
|
||||
- template: ./sync-source.yml
|
||||
parameters:
|
||||
artifactSource: $(parameters.artifactSource)
|
||||
needSubmodules: 'false'
|
||||
- task: DockerInstaller@0
|
||||
displayName: 'Prepare Environment'
|
||||
inputs:
|
||||
dockerVersion: '17.09.0-ce'
|
||||
- script: chmod a+x ./.ci/scripts/format/exec.sh && ./.ci/scripts/format/exec.sh
|
||||
displayName: 'Verify Formatting'
|
||||
46
.ci/templates/merge.yml
Normal file
46
.ci/templates/merge.yml
Normal file
@@ -0,0 +1,46 @@
|
||||
jobs:
|
||||
- job: merge
|
||||
displayName: 'pull requests'
|
||||
steps:
|
||||
- checkout: self
|
||||
submodules: recursive
|
||||
- template: ./mergebot.yml
|
||||
parameters:
|
||||
matchLabel: '$(BuildName)-merge'
|
||||
- task: ArchiveFiles@2
|
||||
displayName: 'Package Source'
|
||||
inputs:
|
||||
rootFolderOrFile: '$(System.DefaultWorkingDirectory)'
|
||||
includeRootFolder: false
|
||||
archiveType: '7z'
|
||||
archiveFile: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z'
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'Upload Artifacts'
|
||||
inputs:
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z'
|
||||
artifact: 'yuzu-$(BuildName)-source'
|
||||
replaceExistingArchive: true
|
||||
- job: upload_source
|
||||
displayName: 'upload'
|
||||
dependsOn: merge
|
||||
steps:
|
||||
- template: ./sync-source.yml
|
||||
parameters:
|
||||
artifactSource: 'true'
|
||||
needSubmodules: 'true'
|
||||
- script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh
|
||||
displayName: 'Apply Git Configuration'
|
||||
- script: git tag -a $(BuildName)-$(Build.BuildId) -m "yuzu $(BuildName) $(Build.BuildNumber) $(Build.DefinitionName)"
|
||||
displayName: 'Tag Source'
|
||||
- script: git remote add other $(GitRepoPushChangesURL)
|
||||
displayName: 'Register Repository'
|
||||
- script: git push --follow-tags --force other HEAD:$(GitPushBranch)
|
||||
displayName: 'Update Code'
|
||||
- script: git rev-list -n 1 $(BuildName)-$(Build.BuildId) > $(Build.ArtifactStagingDirectory)/tag-commit.sha
|
||||
displayName: 'Calculate Release Point'
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'Upload Release Point'
|
||||
inputs:
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)/tag-commit.sha'
|
||||
artifact: 'yuzu-$(BuildName)-release-point'
|
||||
replaceExistingArchive: true
|
||||
15
.ci/templates/mergebot.yml
Normal file
15
.ci/templates/mergebot.yml
Normal file
@@ -0,0 +1,15 @@
|
||||
parameters:
|
||||
matchLabel: 'dummy-merge'
|
||||
|
||||
steps:
|
||||
- script: mkdir $(System.DefaultWorkingDirectory)/patches && pip install requests urllib3
|
||||
displayName: 'Prepare Environment'
|
||||
- script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh
|
||||
displayName: 'Apply Git Configuration'
|
||||
- task: PythonScript@0
|
||||
displayName: 'Discover, Download, and Apply Patches'
|
||||
inputs:
|
||||
scriptSource: 'filePath'
|
||||
scriptPath: '.ci/scripts/merge/apply-patches-by-label.py'
|
||||
arguments: '${{ parameters.matchLabel }} patches'
|
||||
workingDirectory: '$(System.DefaultWorkingDirectory)'
|
||||
16
.ci/templates/retrieve-artifact-source.yml
Normal file
16
.ci/templates/retrieve-artifact-source.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
steps:
|
||||
- checkout: none
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Source'
|
||||
inputs:
|
||||
artifactName: 'yuzu-$(BuildName)-source'
|
||||
buildType: 'current'
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)'
|
||||
- script: rm -rf $(System.DefaultWorkingDirectory) && mkdir $(System.DefaultWorkingDirectory)
|
||||
displayName: 'Clean Working Directory'
|
||||
- task: ExtractFiles@1
|
||||
displayName: 'Prepare Source'
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Build.ArtifactStagingDirectory)/*.7z'
|
||||
destinationFolder: '$(System.DefaultWorkingDirectory)'
|
||||
cleanDestinationFolder: false
|
||||
11
.ci/templates/retrieve-master-source.yml
Normal file
11
.ci/templates/retrieve-master-source.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
parameters:
|
||||
needSubmodules: 'true'
|
||||
|
||||
steps:
|
||||
- checkout: self
|
||||
displayName: 'Checkout Recursive'
|
||||
submodules: recursive
|
||||
# condition: eq(parameters.needSubmodules, 'true')
|
||||
#- checkout: self
|
||||
# displayName: 'Checkout Fast'
|
||||
# condition: ne(parameters.needSubmodules, 'true')
|
||||
7
.ci/templates/sync-source.yml
Normal file
7
.ci/templates/sync-source.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
steps:
|
||||
- ${{ if eq(parameters.artifactSource, 'true') }}:
|
||||
- template: ./retrieve-artifact-source.yml
|
||||
- ${{ if ne(parameters.artifactSource, 'true') }}:
|
||||
- template: ./retrieve-master-source.yml
|
||||
parameters:
|
||||
needSubmodules: $(parameters.needSubmodules)
|
||||
@@ -1,19 +1,23 @@
|
||||
# Starter pipeline
|
||||
# Start with a minimal pipeline that you can customize to build and deploy your code.
|
||||
# Add steps that build, run tests, deploy, and more:
|
||||
# https://aka.ms/yaml
|
||||
|
||||
trigger:
|
||||
- master
|
||||
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
|
||||
steps:
|
||||
- script: echo Hello, world!
|
||||
displayName: 'Run a one-line script'
|
||||
|
||||
- script: |
|
||||
echo Add other tasks to build, test, and deploy your project.
|
||||
echo See https://aka.ms/yaml
|
||||
displayName: 'Run a multi-line script'
|
||||
stages:
|
||||
- stage: merge
|
||||
displayName: 'merge'
|
||||
jobs:
|
||||
- template: ./templates/merge.yml
|
||||
- stage: format
|
||||
dependsOn: merge
|
||||
displayName: 'format'
|
||||
jobs:
|
||||
- job: format
|
||||
displayName: 'clang'
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
steps:
|
||||
- template: ./templates/format-check.yml
|
||||
- stage: build
|
||||
displayName: 'build'
|
||||
dependsOn: format
|
||||
jobs:
|
||||
- template: ./templates/build-standard.yml
|
||||
|
||||
18
.ci/yuzu-verify.yml
Normal file
18
.ci/yuzu-verify.yml
Normal file
@@ -0,0 +1,18 @@
|
||||
stages:
|
||||
- stage: format
|
||||
displayName: 'format'
|
||||
jobs:
|
||||
- job: format
|
||||
displayName: 'clang'
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
steps:
|
||||
- template: ./templates/format-check.yml
|
||||
parameters:
|
||||
artifactSource: 'false'
|
||||
- stage: build
|
||||
displayName: 'build'
|
||||
dependsOn: format
|
||||
jobs:
|
||||
- template: ./templates/build-standard.yml
|
||||
- template: ./templates/build-testing.yml
|
||||
19
.ci/yuzu.yml
19
.ci/yuzu.yml
@@ -1,19 +0,0 @@
|
||||
# Starter pipeline
|
||||
# Start with a minimal pipeline that you can customize to build and deploy your code.
|
||||
# Add steps that build, run tests, deploy, and more:
|
||||
# https://aka.ms/yaml
|
||||
|
||||
trigger:
|
||||
- master
|
||||
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
|
||||
steps:
|
||||
- script: echo Hello, world!
|
||||
displayName: 'Run a one-line script'
|
||||
|
||||
- script: |
|
||||
echo Add other tasks to build, test, and deploy your project.
|
||||
echo See https://aka.ms/yaml
|
||||
displayName: 'Run a multi-line script'
|
||||
@@ -82,6 +82,8 @@ set(HASH_FILES
|
||||
"${VIDEO_CORE}/shader/decode/shift.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/video.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/xmad.cpp"
|
||||
"${VIDEO_CORE}/shader/control_flow.cpp"
|
||||
"${VIDEO_CORE}/shader/control_flow.h"
|
||||
"${VIDEO_CORE}/shader/decode.cpp"
|
||||
"${VIDEO_CORE}/shader/node.h"
|
||||
"${VIDEO_CORE}/shader/node_helper.cpp"
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
yuzu emulator
|
||||
yuzu emulator
|
||||
=============
|
||||
[](https://travis-ci.org/yuzu-emu/yuzu)
|
||||
[](https://ci.appveyor.com/project/bunnei/yuzu)
|
||||
[](https://dev.azure.com/yuzu-emu/yuzu/)
|
||||
|
||||
yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).
|
||||
|
||||
|
||||
@@ -73,13 +73,15 @@ private:
|
||||
EffectInStatus info{};
|
||||
};
|
||||
AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
|
||||
Kernel::SharedPtr<Kernel::WritableEvent> buffer_event)
|
||||
Kernel::SharedPtr<Kernel::WritableEvent> buffer_event,
|
||||
std::size_t instance_number)
|
||||
: worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
|
||||
effects(params.effect_count) {
|
||||
|
||||
audio_out = std::make_unique<AudioCore::AudioOut>();
|
||||
stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
|
||||
"AudioRenderer", [=]() { buffer_event->Signal(); });
|
||||
fmt::format("AudioRenderer-Instance{}", instance_number),
|
||||
[=]() { buffer_event->Signal(); });
|
||||
audio_out->StartStream(stream);
|
||||
|
||||
QueueMixedBuffer(0);
|
||||
|
||||
@@ -215,7 +215,8 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
|
||||
class AudioRenderer {
|
||||
public:
|
||||
AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
|
||||
Kernel::SharedPtr<Kernel::WritableEvent> buffer_event);
|
||||
Kernel::SharedPtr<Kernel::WritableEvent> buffer_event,
|
||||
std::size_t instance_number);
|
||||
~AudioRenderer();
|
||||
|
||||
std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params);
|
||||
|
||||
@@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp
|
||||
"${VIDEO_CORE}/shader/decode/shift.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/video.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/xmad.cpp"
|
||||
"${VIDEO_CORE}/shader/control_flow.cpp"
|
||||
"${VIDEO_CORE}/shader/control_flow.h"
|
||||
"${VIDEO_CORE}/shader/decode.cpp"
|
||||
"${VIDEO_CORE}/shader/node.h"
|
||||
"${VIDEO_CORE}/shader/node_helper.cpp"
|
||||
|
||||
@@ -44,13 +44,6 @@ public:
|
||||
/// Step CPU by one instruction
|
||||
virtual void Step() = 0;
|
||||
|
||||
/// Maps a backing memory region for the CPU
|
||||
virtual void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
|
||||
Kernel::VMAPermission perms) = 0;
|
||||
|
||||
/// Unmaps a region of memory that was previously mapped using MapBackingMemory
|
||||
virtual void UnmapMemory(VAddr address, std::size_t size) = 0;
|
||||
|
||||
/// Clear all instruction cache
|
||||
virtual void ClearInstructionCache() = 0;
|
||||
|
||||
|
||||
@@ -177,15 +177,6 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
|
||||
|
||||
ARM_Dynarmic::~ARM_Dynarmic() = default;
|
||||
|
||||
void ARM_Dynarmic::MapBackingMemory(u64 address, std::size_t size, u8* memory,
|
||||
Kernel::VMAPermission perms) {
|
||||
inner_unicorn.MapBackingMemory(address, size, memory, perms);
|
||||
}
|
||||
|
||||
void ARM_Dynarmic::UnmapMemory(u64 address, std::size_t size) {
|
||||
inner_unicorn.UnmapMemory(address, size);
|
||||
}
|
||||
|
||||
void ARM_Dynarmic::SetPC(u64 pc) {
|
||||
jit->SetPC(pc);
|
||||
}
|
||||
|
||||
@@ -23,9 +23,6 @@ public:
|
||||
ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
|
||||
~ARM_Dynarmic() override;
|
||||
|
||||
void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
|
||||
Kernel::VMAPermission perms) override;
|
||||
void UnmapMemory(u64 address, std::size_t size) override;
|
||||
void SetPC(u64 pc) override;
|
||||
u64 GetPC() const override;
|
||||
u64 GetReg(int index) const override;
|
||||
|
||||
@@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
|
||||
|
||||
static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
|
||||
void* user_data) {
|
||||
auto* const system = static_cast<System*>(user_data);
|
||||
|
||||
ARM_Interface::ThreadContext ctx{};
|
||||
Core::CurrentArmInterface().SaveContext(ctx);
|
||||
system->CurrentArmInterface().SaveContext(ctx);
|
||||
ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
|
||||
ctx.pc, ctx.cpu_registers[30]);
|
||||
return {};
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
|
||||
@@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
|
||||
|
||||
uc_hook hook{};
|
||||
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
|
||||
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1));
|
||||
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1));
|
||||
if (GDBStub::IsServerEnabled()) {
|
||||
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
|
||||
last_bkpt_hit = false;
|
||||
@@ -76,15 +79,6 @@ ARM_Unicorn::~ARM_Unicorn() {
|
||||
CHECKED(uc_close(uc));
|
||||
}
|
||||
|
||||
void ARM_Unicorn::MapBackingMemory(VAddr address, std::size_t size, u8* memory,
|
||||
Kernel::VMAPermission perms) {
|
||||
CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory));
|
||||
}
|
||||
|
||||
void ARM_Unicorn::UnmapMemory(VAddr address, std::size_t size) {
|
||||
CHECKED(uc_mem_unmap(uc, address, size));
|
||||
}
|
||||
|
||||
void ARM_Unicorn::SetPC(u64 pc) {
|
||||
CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc));
|
||||
}
|
||||
|
||||
@@ -18,9 +18,6 @@ public:
|
||||
explicit ARM_Unicorn(System& system);
|
||||
~ARM_Unicorn() override;
|
||||
|
||||
void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
|
||||
Kernel::VMAPermission perms) override;
|
||||
void UnmapMemory(VAddr address, std::size_t size) override;
|
||||
void SetPC(u64 pc) override;
|
||||
u64 GetPC() const override;
|
||||
u64 GetReg(int index) const override;
|
||||
|
||||
@@ -327,10 +327,6 @@ private:
|
||||
static System s_instance;
|
||||
};
|
||||
|
||||
inline ARM_Interface& CurrentArmInterface() {
|
||||
return System::GetInstance().CurrentArmInterface();
|
||||
}
|
||||
|
||||
inline Kernel::Process* CurrentProcess() {
|
||||
return System::GetInstance().CurrentProcess();
|
||||
}
|
||||
|
||||
@@ -53,16 +53,12 @@ bool CpuBarrier::Rendezvous() {
|
||||
Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
|
||||
std::size_t core_index)
|
||||
: cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
|
||||
if (Settings::values.cpu_jit_enabled) {
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
|
||||
arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
|
||||
#else
|
||||
arm_interface = std::make_unique<ARM_Unicorn>(system);
|
||||
LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
|
||||
arm_interface = std::make_unique<ARM_Unicorn>(system);
|
||||
LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
|
||||
#endif
|
||||
} else {
|
||||
arm_interface = std::make_unique<ARM_Unicorn>(system);
|
||||
}
|
||||
|
||||
scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
|
||||
}
|
||||
@@ -70,15 +66,12 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
|
||||
Cpu::~Cpu() = default;
|
||||
|
||||
std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) {
|
||||
if (Settings::values.cpu_jit_enabled) {
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
return std::make_unique<DynarmicExclusiveMonitor>(num_cores);
|
||||
return std::make_unique<DynarmicExclusiveMonitor>(num_cores);
|
||||
#else
|
||||
return nullptr; // TODO(merry): Passthrough exclusive monitor
|
||||
// TODO(merry): Passthrough exclusive monitor
|
||||
return nullptr;
|
||||
#endif
|
||||
} else {
|
||||
return nullptr; // TODO(merry): Passthrough exclusive monitor
|
||||
}
|
||||
}
|
||||
|
||||
void Cpu::RunLoop(bool tight_loop) {
|
||||
|
||||
@@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const {
|
||||
return aci_file_access.permissions;
|
||||
}
|
||||
|
||||
u32 ProgramMetadata::GetSystemResourceSize() const {
|
||||
return npdm_header.system_resource_size;
|
||||
}
|
||||
|
||||
const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const {
|
||||
return aci_kernel_capabilities;
|
||||
}
|
||||
|
||||
@@ -58,6 +58,7 @@ public:
|
||||
u32 GetMainThreadStackSize() const;
|
||||
u64 GetTitleID() const;
|
||||
u64 GetFilesystemPermissions() const;
|
||||
u32 GetSystemResourceSize() const;
|
||||
const KernelCapabilityDescriptors& GetKernelCapabilities() const;
|
||||
|
||||
void Print() const;
|
||||
@@ -76,7 +77,8 @@ private:
|
||||
u8 reserved_3;
|
||||
u8 main_thread_priority;
|
||||
u8 main_thread_cpu;
|
||||
std::array<u8, 8> reserved_4;
|
||||
std::array<u8, 4> reserved_4;
|
||||
u32_le system_resource_size;
|
||||
u32_le process_category;
|
||||
u32_le main_stack_size;
|
||||
std::array<u8, 0x10> application_name;
|
||||
|
||||
@@ -129,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const {
|
||||
return vm_manager.GetTotalPhysicalMemoryAvailable();
|
||||
}
|
||||
|
||||
u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const {
|
||||
// TODO: Subtract the personal heap size from this when the
|
||||
// personal heap is implemented.
|
||||
return GetTotalPhysicalMemoryAvailable();
|
||||
u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
|
||||
return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize();
|
||||
}
|
||||
|
||||
u64 Process::GetTotalPhysicalMemoryUsed() const {
|
||||
return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size;
|
||||
return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size +
|
||||
GetSystemResourceUsage();
|
||||
}
|
||||
|
||||
u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const {
|
||||
// TODO: Subtract the personal heap size from this when the
|
||||
// personal heap is implemented.
|
||||
return GetTotalPhysicalMemoryUsed();
|
||||
u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
|
||||
return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage();
|
||||
}
|
||||
|
||||
void Process::RegisterThread(const Thread* thread) {
|
||||
@@ -172,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
|
||||
program_id = metadata.GetTitleID();
|
||||
ideal_core = metadata.GetMainThreadCore();
|
||||
is_64bit_process = metadata.Is64BitProgram();
|
||||
system_resource_size = metadata.GetSystemResourceSize();
|
||||
|
||||
vm_manager.Reset(metadata.GetAddressSpaceType());
|
||||
|
||||
@@ -186,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
|
||||
}
|
||||
|
||||
void Process::Run(s32 main_thread_priority, u64 stack_size) {
|
||||
// The kernel always ensures that the given stack size is page aligned.
|
||||
main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
|
||||
|
||||
// Allocate and map the main thread stack
|
||||
// TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
|
||||
// of the user address space.
|
||||
const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
|
||||
vm_manager
|
||||
.MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
|
||||
0, main_thread_stack_size, MemoryState::Stack)
|
||||
.Unwrap();
|
||||
AllocateMainThreadStack(stack_size);
|
||||
tls_region_address = CreateTLSRegion();
|
||||
|
||||
vm_manager.LogLayout();
|
||||
|
||||
ChangeStatus(ProcessStatus::Running);
|
||||
|
||||
SetupMainThread(*this, kernel, main_thread_priority);
|
||||
@@ -228,6 +218,9 @@ void Process::PrepareForTermination() {
|
||||
stop_threads(system.Scheduler(2).GetThreadList());
|
||||
stop_threads(system.Scheduler(3).GetThreadList());
|
||||
|
||||
FreeTLSRegion(tls_region_address);
|
||||
tls_region_address = 0;
|
||||
|
||||
ChangeStatus(ProcessStatus::Exited);
|
||||
}
|
||||
|
||||
@@ -327,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
|
||||
WakeupAllWaitingThreads();
|
||||
}
|
||||
|
||||
void Process::AllocateMainThreadStack(u64 stack_size) {
|
||||
// The kernel always ensures that the given stack size is page aligned.
|
||||
main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
|
||||
|
||||
// Allocate and map the main thread stack
|
||||
const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
|
||||
vm_manager
|
||||
.MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
|
||||
0, main_thread_stack_size, MemoryState::Stack)
|
||||
.Unwrap();
|
||||
}
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -135,6 +135,11 @@ public:
|
||||
return mutex;
|
||||
}
|
||||
|
||||
/// Gets the address to the process' dedicated TLS region.
|
||||
VAddr GetTLSRegionAddress() const {
|
||||
return tls_region_address;
|
||||
}
|
||||
|
||||
/// Gets the current status of the process
|
||||
ProcessStatus GetStatus() const {
|
||||
return status;
|
||||
@@ -168,8 +173,24 @@ public:
|
||||
return capabilities.GetPriorityMask();
|
||||
}
|
||||
|
||||
u32 IsVirtualMemoryEnabled() const {
|
||||
return is_virtual_address_memory_enabled;
|
||||
/// Gets the amount of secure memory to allocate for memory management.
|
||||
u32 GetSystemResourceSize() const {
|
||||
return system_resource_size;
|
||||
}
|
||||
|
||||
/// Gets the amount of secure memory currently in use for memory management.
|
||||
u32 GetSystemResourceUsage() const {
|
||||
// On hardware, this returns the amount of system resource memory that has
|
||||
// been used by the kernel. This is problematic for Yuzu to emulate, because
|
||||
// system resource memory is used for page tables -- and yuzu doesn't really
|
||||
// have a way to calculate how much memory is required for page tables for
|
||||
// the current process at any given time.
|
||||
// TODO: Is this even worth implementing? Games may retrieve this value via
|
||||
// an SDK function that gets used + available system resource size for debug
|
||||
// or diagnostic purposes. However, it seems unlikely that a game would make
|
||||
// decisions based on how much system memory is dedicated to its page tables.
|
||||
// Is returning a value other than zero wise?
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Whether this process is an AArch64 or AArch32 process.
|
||||
@@ -196,15 +217,15 @@ public:
|
||||
u64 GetTotalPhysicalMemoryAvailable() const;
|
||||
|
||||
/// Retrieves the total physical memory available to this process in bytes,
|
||||
/// without the size of the personal heap added to it.
|
||||
u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const;
|
||||
/// without the size of the personal system resource heap added to it.
|
||||
u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const;
|
||||
|
||||
/// Retrieves the total physical memory used by this process in bytes.
|
||||
u64 GetTotalPhysicalMemoryUsed() const;
|
||||
|
||||
/// Retrieves the total physical memory used by this process in bytes,
|
||||
/// without the size of the personal heap added to it.
|
||||
u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const;
|
||||
/// without the size of the personal system resource heap added to it.
|
||||
u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const;
|
||||
|
||||
/// Gets the list of all threads created with this process as their owner.
|
||||
const std::list<const Thread*>& GetThreadList() const {
|
||||
@@ -280,6 +301,9 @@ private:
|
||||
/// a process signal.
|
||||
void ChangeStatus(ProcessStatus new_status);
|
||||
|
||||
/// Allocates the main thread stack for the process, given the stack size in bytes.
|
||||
void AllocateMainThreadStack(u64 stack_size);
|
||||
|
||||
/// Memory manager for this process.
|
||||
Kernel::VMManager vm_manager;
|
||||
|
||||
@@ -298,12 +322,16 @@ private:
|
||||
/// Title ID corresponding to the process
|
||||
u64 program_id = 0;
|
||||
|
||||
/// Specifies additional memory to be reserved for the process's memory management by the
|
||||
/// system. When this is non-zero, secure memory is allocated and used for page table allocation
|
||||
/// instead of using the normal global page tables/memory block management.
|
||||
u32 system_resource_size = 0;
|
||||
|
||||
/// Resource limit descriptor for this process
|
||||
SharedPtr<ResourceLimit> resource_limit;
|
||||
|
||||
/// The ideal CPU core for this process, threads are scheduled on this core by default.
|
||||
u8 ideal_core = 0;
|
||||
u32 is_virtual_address_memory_enabled = 0;
|
||||
|
||||
/// The Thread Local Storage area is allocated as processes create threads,
|
||||
/// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
|
||||
@@ -338,6 +366,9 @@ private:
|
||||
/// variable related facilities.
|
||||
Mutex mutex;
|
||||
|
||||
/// Address indicating the location of the process' dedicated TLS region.
|
||||
VAddr tls_region_address = 0;
|
||||
|
||||
/// Random values for svcGetInfo RandomEntropy
|
||||
std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};
|
||||
|
||||
|
||||
@@ -318,7 +318,14 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
|
||||
return result;
|
||||
}
|
||||
|
||||
return vm_manager.UnmapRange(dst_addr, size);
|
||||
const auto unmap_res = vm_manager.UnmapRange(dst_addr, size);
|
||||
|
||||
// Reprotect the source mapping on success
|
||||
if (unmap_res.IsSuccess()) {
|
||||
ASSERT(vm_manager.ReprotectRange(src_addr, size, VMAPermission::ReadWrite).IsSuccess());
|
||||
}
|
||||
|
||||
return unmap_res;
|
||||
}
|
||||
|
||||
/// Connect to an OS service given the port name, returns the handle to the port to out
|
||||
@@ -729,16 +736,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
StackRegionBaseAddr = 14,
|
||||
StackRegionSize = 15,
|
||||
// 3.0.0+
|
||||
IsVirtualAddressMemoryEnabled = 16,
|
||||
PersonalMmHeapUsage = 17,
|
||||
SystemResourceSize = 16,
|
||||
SystemResourceUsage = 17,
|
||||
TitleId = 18,
|
||||
// 4.0.0+
|
||||
PrivilegedProcessId = 19,
|
||||
// 5.0.0+
|
||||
UserExceptionContextAddr = 20,
|
||||
// 6.0.0+
|
||||
TotalPhysicalMemoryAvailableWithoutMmHeap = 21,
|
||||
TotalPhysicalMemoryUsedWithoutMmHeap = 22,
|
||||
TotalPhysicalMemoryAvailableWithoutSystemResource = 21,
|
||||
TotalPhysicalMemoryUsedWithoutSystemResource = 22,
|
||||
};
|
||||
|
||||
const auto info_id_type = static_cast<GetInfoType>(info_id);
|
||||
@@ -756,12 +763,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
case GetInfoType::StackRegionSize:
|
||||
case GetInfoType::TotalPhysicalMemoryAvailable:
|
||||
case GetInfoType::TotalPhysicalMemoryUsed:
|
||||
case GetInfoType::IsVirtualAddressMemoryEnabled:
|
||||
case GetInfoType::PersonalMmHeapUsage:
|
||||
case GetInfoType::SystemResourceSize:
|
||||
case GetInfoType::SystemResourceUsage:
|
||||
case GetInfoType::TitleId:
|
||||
case GetInfoType::UserExceptionContextAddr:
|
||||
case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap:
|
||||
case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: {
|
||||
case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
|
||||
case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: {
|
||||
if (info_sub_id != 0) {
|
||||
return ERR_INVALID_ENUM_VALUE;
|
||||
}
|
||||
@@ -822,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
*result = process->GetTotalPhysicalMemoryUsed();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::IsVirtualAddressMemoryEnabled:
|
||||
*result = process->IsVirtualMemoryEnabled();
|
||||
case GetInfoType::SystemResourceSize:
|
||||
*result = process->GetSystemResourceSize();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::SystemResourceUsage:
|
||||
LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage");
|
||||
*result = process->GetSystemResourceUsage();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::TitleId:
|
||||
@@ -831,17 +843,15 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::UserExceptionContextAddr:
|
||||
LOG_WARNING(Kernel_SVC,
|
||||
"(STUBBED) Attempted to query user exception context address, returned 0");
|
||||
*result = 0;
|
||||
*result = process->GetTLSRegionAddress();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap:
|
||||
*result = process->GetTotalPhysicalMemoryAvailable();
|
||||
case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
|
||||
*result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap:
|
||||
*result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap();
|
||||
case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource:
|
||||
*result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
default:
|
||||
@@ -946,6 +956,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
}
|
||||
}
|
||||
|
||||
/// Maps memory at a desired address
|
||||
static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
|
||||
LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
|
||||
|
||||
if (!Common::Is4KBAligned(addr)) {
|
||||
LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
|
||||
return ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
if (!Common::Is4KBAligned(size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
|
||||
return ERR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "Size is zero");
|
||||
return ERR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
if (!(addr < addr + size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
|
||||
return ERR_INVALID_MEMORY_RANGE;
|
||||
}
|
||||
|
||||
Process* const current_process = system.Kernel().CurrentProcess();
|
||||
auto& vm_manager = current_process->VMManager();
|
||||
|
||||
if (current_process->GetSystemResourceSize() == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
if (!vm_manager.IsWithinMapRegion(addr, size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Range not within map region");
|
||||
return ERR_INVALID_MEMORY_RANGE;
|
||||
}
|
||||
|
||||
return vm_manager.MapPhysicalMemory(addr, size);
|
||||
}
|
||||
|
||||
/// Unmaps memory previously mapped via MapPhysicalMemory
|
||||
static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
|
||||
LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
|
||||
|
||||
if (!Common::Is4KBAligned(addr)) {
|
||||
LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
|
||||
return ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
if (!Common::Is4KBAligned(size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
|
||||
return ERR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "Size is zero");
|
||||
return ERR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
if (!(addr < addr + size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
|
||||
return ERR_INVALID_MEMORY_RANGE;
|
||||
}
|
||||
|
||||
Process* const current_process = system.Kernel().CurrentProcess();
|
||||
auto& vm_manager = current_process->VMManager();
|
||||
|
||||
if (current_process->GetSystemResourceSize() == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
if (!vm_manager.IsWithinMapRegion(addr, size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Range not within map region");
|
||||
return ERR_INVALID_MEMORY_RANGE;
|
||||
}
|
||||
|
||||
return vm_manager.UnmapPhysicalMemory(addr, size);
|
||||
}
|
||||
|
||||
/// Sets the thread activity
|
||||
static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
|
||||
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
|
||||
@@ -1647,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
|
||||
// Wait for an address (via Address Arbiter)
|
||||
static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
|
||||
s64 timeout) {
|
||||
LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
|
||||
address, type, value, timeout);
|
||||
LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
|
||||
type, value, timeout);
|
||||
|
||||
// If the passed address is a kernel virtual address, return invalid memory state.
|
||||
if (Memory::IsKernelVirtualAddress(address)) {
|
||||
@@ -1670,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
|
||||
// Signals to an address (via Address Arbiter)
|
||||
static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
|
||||
s32 num_to_wake) {
|
||||
LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
|
||||
address, type, value, num_to_wake);
|
||||
LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
|
||||
address, type, value, num_to_wake);
|
||||
|
||||
// If the passed address is a kernel virtual address, return invalid memory state.
|
||||
if (Memory::IsKernelVirtualAddress(address)) {
|
||||
@@ -2303,8 +2393,8 @@ static const FunctionDef SVC_Table[] = {
|
||||
{0x29, SvcWrap<GetInfo>, "GetInfo"},
|
||||
{0x2A, nullptr, "FlushEntireDataCache"},
|
||||
{0x2B, nullptr, "FlushDataCache"},
|
||||
{0x2C, nullptr, "MapPhysicalMemory"},
|
||||
{0x2D, nullptr, "UnmapPhysicalMemory"},
|
||||
{0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"},
|
||||
{0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"},
|
||||
{0x2E, nullptr, "GetFutureThreadInfo"},
|
||||
{0x2F, nullptr, "GetLastThreadInfo"},
|
||||
{0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
|
||||
|
||||
@@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) {
|
||||
FuncReturn(system, func(system, Param(system, 0)).raw);
|
||||
}
|
||||
|
||||
template <ResultCode func(Core::System&, u64, u64)>
|
||||
void SvcWrap(Core::System& system) {
|
||||
FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw);
|
||||
}
|
||||
|
||||
template <ResultCode func(Core::System&, u32)>
|
||||
void SvcWrap(Core::System& system) {
|
||||
FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
|
||||
|
||||
@@ -8,10 +8,11 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/memory_hook.h"
|
||||
#include "core/arm/arm_interface.h"
|
||||
#include "core/core.h"
|
||||
#include "core/file_sys/program_metadata.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/kernel/resource_limit.h"
|
||||
#include "core/hle/kernel/vm_manager.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/memory_setup.h"
|
||||
@@ -49,10 +50,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
|
||||
type != next.type) {
|
||||
return false;
|
||||
}
|
||||
if (type == VMAType::AllocatedMemoryBlock &&
|
||||
(backing_block != next.backing_block || offset + size != next.offset)) {
|
||||
if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) {
|
||||
// TODO: Can device mapped memory be merged sanely?
|
||||
// Not merging it may cause inaccuracies versus hardware when memory layout is queried.
|
||||
return false;
|
||||
}
|
||||
if (type == VMAType::AllocatedMemoryBlock) {
|
||||
return true;
|
||||
}
|
||||
if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) {
|
||||
return false;
|
||||
}
|
||||
@@ -100,7 +105,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const {
|
||||
ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
|
||||
std::shared_ptr<std::vector<u8>> block,
|
||||
std::size_t offset, u64 size,
|
||||
MemoryState state) {
|
||||
MemoryState state, VMAPermission perm) {
|
||||
ASSERT(block != nullptr);
|
||||
ASSERT(offset + size <= block->size());
|
||||
|
||||
@@ -109,17 +114,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
|
||||
VirtualMemoryArea& final_vma = vma_handle->second;
|
||||
ASSERT(final_vma.size == size);
|
||||
|
||||
system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
|
||||
VMAPermission::ReadWriteExecute);
|
||||
system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
|
||||
VMAPermission::ReadWriteExecute);
|
||||
system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset,
|
||||
VMAPermission::ReadWriteExecute);
|
||||
system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset,
|
||||
VMAPermission::ReadWriteExecute);
|
||||
|
||||
final_vma.type = VMAType::AllocatedMemoryBlock;
|
||||
final_vma.permissions = VMAPermission::ReadWrite;
|
||||
final_vma.permissions = perm;
|
||||
final_vma.state = state;
|
||||
final_vma.backing_block = std::move(block);
|
||||
final_vma.offset = offset;
|
||||
@@ -137,11 +133,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
|
||||
VirtualMemoryArea& final_vma = vma_handle->second;
|
||||
ASSERT(final_vma.size == size);
|
||||
|
||||
system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
|
||||
system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
|
||||
system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
|
||||
system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
|
||||
|
||||
final_vma.type = VMAType::BackingMemory;
|
||||
final_vma.permissions = VMAPermission::ReadWrite;
|
||||
final_vma.state = state;
|
||||
@@ -230,11 +221,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
|
||||
|
||||
ASSERT(FindVMA(target)->second.size >= size);
|
||||
|
||||
system.ArmInterface(0).UnmapMemory(target, size);
|
||||
system.ArmInterface(1).UnmapMemory(target, size);
|
||||
system.ArmInterface(2).UnmapMemory(target, size);
|
||||
system.ArmInterface(3).UnmapMemory(target, size);
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -308,6 +294,166 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
|
||||
return MakeResult<VAddr>(heap_region_base);
|
||||
}
|
||||
|
||||
ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
|
||||
const auto end_addr = target + size;
|
||||
const auto last_addr = end_addr - 1;
|
||||
VAddr cur_addr = target;
|
||||
|
||||
ResultCode result = RESULT_SUCCESS;
|
||||
|
||||
// Check how much memory we've already mapped.
|
||||
const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size);
|
||||
if (mapped_size_result.Failed()) {
|
||||
return mapped_size_result.Code();
|
||||
}
|
||||
|
||||
// If we've already mapped the desired amount, return early.
|
||||
const std::size_t mapped_size = *mapped_size_result;
|
||||
if (mapped_size == size) {
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
// Check that we can map the memory we want.
|
||||
const auto res_limit = system.CurrentProcess()->GetResourceLimit();
|
||||
const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) -
|
||||
res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory);
|
||||
if (physmem_remaining < (size - mapped_size)) {
|
||||
return ERR_RESOURCE_LIMIT_EXCEEDED;
|
||||
}
|
||||
|
||||
// Keep track of the memory regions we unmap.
|
||||
std::vector<std::pair<u64, u64>> mapped_regions;
|
||||
|
||||
// Iterate, trying to map memory.
|
||||
{
|
||||
cur_addr = target;
|
||||
|
||||
auto iter = FindVMA(target);
|
||||
ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
|
||||
|
||||
while (true) {
|
||||
const auto& vma = iter->second;
|
||||
const auto vma_start = vma.base;
|
||||
const auto vma_end = vma_start + vma.size;
|
||||
const auto vma_last = vma_end - 1;
|
||||
|
||||
// Map the memory block
|
||||
const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
|
||||
if (vma.state == MemoryState::Unmapped) {
|
||||
const auto map_res =
|
||||
MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0,
|
||||
map_size, MemoryState::Heap, VMAPermission::ReadWrite);
|
||||
result = map_res.Code();
|
||||
if (result.IsError()) {
|
||||
break;
|
||||
}
|
||||
|
||||
mapped_regions.emplace_back(cur_addr, map_size);
|
||||
}
|
||||
|
||||
// Break once we hit the end of the range.
|
||||
if (last_addr <= vma_last) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance to the next block.
|
||||
cur_addr = vma_end;
|
||||
iter = FindVMA(cur_addr);
|
||||
ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
|
||||
}
|
||||
}
|
||||
|
||||
// If we failed, unmap memory.
|
||||
if (result.IsError()) {
|
||||
for (const auto [unmap_address, unmap_size] : mapped_regions) {
|
||||
ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(),
|
||||
"MapPhysicalMemory un-map on error");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Update amount of mapped physical memory.
|
||||
physical_memory_mapped += size - mapped_size;
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
|
||||
const auto end_addr = target + size;
|
||||
const auto last_addr = end_addr - 1;
|
||||
VAddr cur_addr = target;
|
||||
|
||||
ResultCode result = RESULT_SUCCESS;
|
||||
|
||||
// Check how much memory is currently mapped.
|
||||
const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size);
|
||||
if (mapped_size_result.Failed()) {
|
||||
return mapped_size_result.Code();
|
||||
}
|
||||
|
||||
// If we've already unmapped all the memory, return early.
|
||||
const std::size_t mapped_size = *mapped_size_result;
|
||||
if (mapped_size == 0) {
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
// Keep track of the memory regions we unmap.
|
||||
std::vector<std::pair<u64, u64>> unmapped_regions;
|
||||
|
||||
// Try to unmap regions.
|
||||
{
|
||||
cur_addr = target;
|
||||
|
||||
auto iter = FindVMA(target);
|
||||
ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
|
||||
|
||||
while (true) {
|
||||
const auto& vma = iter->second;
|
||||
const auto vma_start = vma.base;
|
||||
const auto vma_end = vma_start + vma.size;
|
||||
const auto vma_last = vma_end - 1;
|
||||
|
||||
// Unmap the memory block
|
||||
const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
|
||||
if (vma.state == MemoryState::Heap) {
|
||||
result = UnmapRange(cur_addr, unmap_size);
|
||||
if (result.IsError()) {
|
||||
break;
|
||||
}
|
||||
|
||||
unmapped_regions.emplace_back(cur_addr, unmap_size);
|
||||
}
|
||||
|
||||
// Break once we hit the end of the range.
|
||||
if (last_addr <= vma_last) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance to the next block.
|
||||
cur_addr = vma_end;
|
||||
iter = FindVMA(cur_addr);
|
||||
ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
|
||||
}
|
||||
}
|
||||
|
||||
// If we failed, re-map regions.
|
||||
// TODO: Preserve memory contents?
|
||||
if (result.IsError()) {
|
||||
for (const auto [map_address, map_size] : unmapped_regions) {
|
||||
const auto remap_res =
|
||||
MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0,
|
||||
map_size, MemoryState::Heap, VMAPermission::None);
|
||||
ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error");
|
||||
}
|
||||
}
|
||||
|
||||
// Update mapped amount
|
||||
physical_memory_mapped -= mapped_size;
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) {
|
||||
constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped;
|
||||
const auto src_check_result = CheckRangeState(
|
||||
@@ -455,7 +601,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
|
||||
// Protect mirror with permissions from old region
|
||||
Reprotect(new_vma, vma->second.permissions);
|
||||
// Remove permissions from old region
|
||||
Reprotect(vma, VMAPermission::None);
|
||||
ReprotectRange(src_addr, size, VMAPermission::None);
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
@@ -588,14 +734,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
|
||||
VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
|
||||
const VMAIter next_vma = std::next(iter);
|
||||
if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
|
||||
iter->second.size += next_vma->second.size;
|
||||
MergeAdjacentVMA(iter->second, next_vma->second);
|
||||
vma_map.erase(next_vma);
|
||||
}
|
||||
|
||||
if (iter != vma_map.begin()) {
|
||||
VMAIter prev_vma = std::prev(iter);
|
||||
if (prev_vma->second.CanBeMergedWith(iter->second)) {
|
||||
prev_vma->second.size += iter->second.size;
|
||||
MergeAdjacentVMA(prev_vma->second, iter->second);
|
||||
vma_map.erase(iter);
|
||||
iter = prev_vma;
|
||||
}
|
||||
@@ -604,6 +750,38 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
|
||||
return iter;
|
||||
}
|
||||
|
||||
void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) {
|
||||
ASSERT(left.CanBeMergedWith(right));
|
||||
|
||||
// Always merge allocated memory blocks, even when they don't share the same backing block.
|
||||
if (left.type == VMAType::AllocatedMemoryBlock &&
|
||||
(left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
|
||||
// Check if we can save work.
|
||||
if (left.offset == 0 && left.size == left.backing_block->size()) {
|
||||
// Fast case: left is an entire backing block.
|
||||
left.backing_block->insert(left.backing_block->end(),
|
||||
right.backing_block->begin() + right.offset,
|
||||
right.backing_block->begin() + right.offset + right.size);
|
||||
} else {
|
||||
// Slow case: make a new memory block for left and right.
|
||||
auto new_memory = std::make_shared<std::vector<u8>>();
|
||||
new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset,
|
||||
left.backing_block->begin() + left.offset + left.size);
|
||||
new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset,
|
||||
right.backing_block->begin() + right.offset + right.size);
|
||||
left.backing_block = new_memory;
|
||||
left.offset = 0;
|
||||
}
|
||||
|
||||
// Page table update is needed, because backing memory changed.
|
||||
left.size += right.size;
|
||||
UpdatePageTableForVMA(left);
|
||||
} else {
|
||||
// Just update the size.
|
||||
left.size += right.size;
|
||||
}
|
||||
}
|
||||
|
||||
void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
|
||||
switch (vma.type) {
|
||||
case VMAType::Free:
|
||||
@@ -778,6 +956,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo
|
||||
std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask));
|
||||
}
|
||||
|
||||
ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address,
|
||||
std::size_t size) const {
|
||||
const VAddr end_addr = address + size;
|
||||
const VAddr last_addr = end_addr - 1;
|
||||
std::size_t mapped_size = 0;
|
||||
|
||||
VAddr cur_addr = address;
|
||||
auto iter = FindVMA(cur_addr);
|
||||
ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
|
||||
|
||||
while (true) {
|
||||
const auto& vma = iter->second;
|
||||
const VAddr vma_start = vma.base;
|
||||
const VAddr vma_end = vma_start + vma.size;
|
||||
const VAddr vma_last = vma_end - 1;
|
||||
|
||||
// Add size if relevant.
|
||||
if (vma.state != MemoryState::Unmapped) {
|
||||
mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
|
||||
}
|
||||
|
||||
// Break once we hit the end of the range.
|
||||
if (last_addr <= vma_last) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance to the next block.
|
||||
cur_addr = vma_end;
|
||||
iter = std::next(iter);
|
||||
ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
|
||||
}
|
||||
|
||||
return MakeResult(mapped_size);
|
||||
}
|
||||
|
||||
ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
|
||||
std::size_t size) const {
|
||||
const VAddr end_addr = address + size;
|
||||
const VAddr last_addr = end_addr - 1;
|
||||
std::size_t mapped_size = 0;
|
||||
|
||||
VAddr cur_addr = address;
|
||||
auto iter = FindVMA(cur_addr);
|
||||
ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
|
||||
|
||||
while (true) {
|
||||
const auto& vma = iter->second;
|
||||
const auto vma_start = vma.base;
|
||||
const auto vma_end = vma_start + vma.size;
|
||||
const auto vma_last = vma_end - 1;
|
||||
const auto state = vma.state;
|
||||
const auto attr = vma.attribute;
|
||||
|
||||
// Memory within region must be free or mapped heap.
|
||||
if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) ||
|
||||
(state == MemoryState::Unmapped))) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
// Add size if relevant.
|
||||
if (state != MemoryState::Unmapped) {
|
||||
mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
|
||||
}
|
||||
|
||||
// Break once we hit the end of the range.
|
||||
if (last_addr <= vma_last) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance to the next block.
|
||||
cur_addr = vma_end;
|
||||
iter = std::next(iter);
|
||||
ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
|
||||
}
|
||||
|
||||
return MakeResult(mapped_size);
|
||||
}
|
||||
|
||||
u64 VMManager::GetTotalPhysicalMemoryAvailable() const {
|
||||
LOG_WARNING(Kernel, "(STUBBED) called");
|
||||
return 0xF8000000;
|
||||
|
||||
@@ -349,7 +349,8 @@ public:
|
||||
* @param state MemoryState tag to attach to the VMA.
|
||||
*/
|
||||
ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
|
||||
std::size_t offset, u64 size, MemoryState state);
|
||||
std::size_t offset, u64 size, MemoryState state,
|
||||
VMAPermission perm = VMAPermission::ReadWrite);
|
||||
|
||||
/**
|
||||
* Maps an unmanaged host memory pointer at a given address.
|
||||
@@ -450,6 +451,34 @@ public:
|
||||
///
|
||||
ResultVal<VAddr> SetHeapSize(u64 size);
|
||||
|
||||
/// Maps memory at a given address.
|
||||
///
|
||||
/// @param addr The virtual address to map memory at.
|
||||
/// @param size The amount of memory to map.
|
||||
///
|
||||
/// @note The destination address must lie within the Map region.
|
||||
///
|
||||
/// @note This function requires that SystemResourceSize be non-zero,
|
||||
/// however, this is just because if it were not then the
|
||||
/// resulting page tables could be exploited on hardware by
|
||||
/// a malicious program. SystemResource usage does not need
|
||||
/// to be explicitly checked or updated here.
|
||||
ResultCode MapPhysicalMemory(VAddr target, u64 size);
|
||||
|
||||
/// Unmaps memory at a given address.
|
||||
///
|
||||
/// @param addr The virtual address to unmap memory at.
|
||||
/// @param size The amount of memory to unmap.
|
||||
///
|
||||
/// @note The destination address must lie within the Map region.
|
||||
///
|
||||
/// @note This function requires that SystemResourceSize be non-zero,
|
||||
/// however, this is just because if it were not then the
|
||||
/// resulting page tables could be exploited on hardware by
|
||||
/// a malicious program. SystemResource usage does not need
|
||||
/// to be explicitly checked or updated here.
|
||||
ResultCode UnmapPhysicalMemory(VAddr target, u64 size);
|
||||
|
||||
/// Maps a region of memory as code memory.
|
||||
///
|
||||
/// @param dst_address The base address of the region to create the aliasing memory region.
|
||||
@@ -657,6 +686,11 @@ private:
|
||||
*/
|
||||
VMAIter MergeAdjacent(VMAIter vma);
|
||||
|
||||
/**
|
||||
* Merges two adjacent VMAs.
|
||||
*/
|
||||
void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right);
|
||||
|
||||
/// Updates the pages corresponding to this VMA so they match the VMA's attributes.
|
||||
void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
|
||||
|
||||
@@ -701,6 +735,13 @@ private:
|
||||
MemoryAttribute attribute_mask, MemoryAttribute attribute,
|
||||
MemoryAttribute ignore_mask) const;
|
||||
|
||||
/// Gets the amount of memory currently mapped (state != Unmapped) in a range.
|
||||
ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const;
|
||||
|
||||
/// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range.
|
||||
ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
|
||||
std::size_t size) const;
|
||||
|
||||
/**
|
||||
* A map covering the entirety of the managed address space, keyed by the `base` field of each
|
||||
* VMA. It must always be modified by splitting or merging VMAs, so that the invariant
|
||||
@@ -742,6 +783,11 @@ private:
|
||||
// end of the range. This is essentially 'base_address + current_size'.
|
||||
VAddr heap_end = 0;
|
||||
|
||||
// The current amount of memory mapped via MapPhysicalMemory.
|
||||
// This is used here (and in Nintendo's kernel) only for debugging, and does not impact
|
||||
// any behavior.
|
||||
u64 physical_memory_mapped = 0;
|
||||
|
||||
Core::System& system;
|
||||
};
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -25,7 +25,8 @@ namespace Service::Audio {
|
||||
|
||||
class IAudioRenderer final : public ServiceFramework<IAudioRenderer> {
|
||||
public:
|
||||
explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params)
|
||||
explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params,
|
||||
const std::size_t instance_number)
|
||||
: ServiceFramework("IAudioRenderer") {
|
||||
// clang-format off
|
||||
static const FunctionInfo functions[] = {
|
||||
@@ -48,8 +49,8 @@ public:
|
||||
auto& system = Core::System::GetInstance();
|
||||
system_event = Kernel::WritableEvent::CreateEventPair(
|
||||
system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
|
||||
renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
|
||||
system_event.writable);
|
||||
renderer = std::make_unique<AudioCore::AudioRenderer>(
|
||||
system.CoreTiming(), audren_params, system_event.writable, instance_number);
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -607,7 +608,7 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
|
||||
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
|
||||
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushIpcInterface<IAudioRenderer>(params);
|
||||
rb.PushIpcInterface<IAudioRenderer>(params, audren_instance_count++);
|
||||
}
|
||||
|
||||
bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
|
||||
|
||||
@@ -33,6 +33,7 @@ private:
|
||||
};
|
||||
|
||||
bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;
|
||||
std::size_t audren_instance_count = 0;
|
||||
};
|
||||
|
||||
} // namespace Service::Audio
|
||||
|
||||
@@ -345,14 +345,16 @@ public:
|
||||
vm_manager
|
||||
.MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode)
|
||||
.IsSuccess());
|
||||
ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess());
|
||||
ASSERT(vm_manager.ReprotectRange(nro_address, nro_size, Kernel::VMAPermission::None)
|
||||
.IsSuccess());
|
||||
|
||||
if (bss_size > 0) {
|
||||
ASSERT(vm_manager
|
||||
.MirrorMemory(*map_address + nro_size, bss_address, bss_size,
|
||||
Kernel::MemoryState::ModuleCode)
|
||||
.IsSuccess());
|
||||
ASSERT(vm_manager.UnmapRange(bss_address, bss_size).IsSuccess());
|
||||
ASSERT(vm_manager.ReprotectRange(bss_address, bss_size, Kernel::VMAPermission::None)
|
||||
.IsSuccess());
|
||||
}
|
||||
|
||||
vm_manager.ReprotectRange(*map_address, header.text_size,
|
||||
@@ -364,7 +366,8 @@ public:
|
||||
|
||||
Core::System::GetInstance().InvalidateCpuInstructionCaches();
|
||||
|
||||
nro.insert_or_assign(*map_address, NROInfo{hash, nro_size + bss_size});
|
||||
nro.insert_or_assign(*map_address,
|
||||
NROInfo{hash, nro_address, nro_size, bss_address, bss_size});
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 4};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
@@ -409,9 +412,23 @@ public:
|
||||
}
|
||||
|
||||
auto& vm_manager = Core::CurrentProcess()->VMManager();
|
||||
const auto& nro_size = iter->second.size;
|
||||
const auto& nro_info = iter->second;
|
||||
|
||||
ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess());
|
||||
// Unmap the mirrored memory
|
||||
ASSERT(
|
||||
vm_manager.UnmapRange(nro_address, nro_info.nro_size + nro_info.bss_size).IsSuccess());
|
||||
|
||||
// Reprotect the source memory
|
||||
ASSERT(vm_manager
|
||||
.ReprotectRange(nro_info.nro_address, nro_info.nro_size,
|
||||
Kernel::VMAPermission::ReadWrite)
|
||||
.IsSuccess());
|
||||
if (nro_info.bss_size > 0) {
|
||||
ASSERT(vm_manager
|
||||
.ReprotectRange(nro_info.bss_address, nro_info.bss_size,
|
||||
Kernel::VMAPermission::ReadWrite)
|
||||
.IsSuccess());
|
||||
}
|
||||
|
||||
Core::System::GetInstance().InvalidateCpuInstructionCaches();
|
||||
|
||||
@@ -473,7 +490,10 @@ private:
|
||||
|
||||
struct NROInfo {
|
||||
SHA256Hash hash;
|
||||
u64 size;
|
||||
VAddr nro_address;
|
||||
u64 nro_size;
|
||||
VAddr bss_address;
|
||||
u64 bss_size;
|
||||
};
|
||||
|
||||
bool initialized = false;
|
||||
|
||||
@@ -85,7 +85,6 @@ void LogSettings() {
|
||||
LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
|
||||
LogSetting("System_CurrentUser", Settings::values.current_user);
|
||||
LogSetting("System_LanguageIndex", Settings::values.language_index);
|
||||
LogSetting("Core_CpuJitEnabled", Settings::values.cpu_jit_enabled);
|
||||
LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
|
||||
LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
|
||||
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
|
||||
|
||||
@@ -378,7 +378,6 @@ struct Values {
|
||||
std::atomic_bool is_device_reload_pending{true};
|
||||
|
||||
// Core
|
||||
bool cpu_jit_enabled;
|
||||
bool use_multi_core;
|
||||
|
||||
// Data Storage
|
||||
|
||||
@@ -168,7 +168,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
|
||||
AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching",
|
||||
Settings::values.enable_audio_stretching);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.cpu_jit_enabled);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
|
||||
Settings::values.use_multi_core);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
add_library(video_core STATIC
|
||||
buffer_cache.h
|
||||
dma_pusher.cpp
|
||||
dma_pusher.h
|
||||
debug_utils/debug_utils.cpp
|
||||
@@ -43,8 +44,6 @@ add_library(video_core STATIC
|
||||
renderer_opengl/gl_device.h
|
||||
renderer_opengl/gl_framebuffer_cache.cpp
|
||||
renderer_opengl/gl_framebuffer_cache.h
|
||||
renderer_opengl/gl_global_cache.cpp
|
||||
renderer_opengl/gl_global_cache.h
|
||||
renderer_opengl/gl_rasterizer.cpp
|
||||
renderer_opengl/gl_rasterizer.h
|
||||
renderer_opengl/gl_resource_manager.cpp
|
||||
@@ -103,6 +102,8 @@ add_library(video_core STATIC
|
||||
shader/decode/video.cpp
|
||||
shader/decode/xmad.cpp
|
||||
shader/decode/other.cpp
|
||||
shader/control_flow.cpp
|
||||
shader/control_flow.h
|
||||
shader/decode.cpp
|
||||
shader/node_helper.cpp
|
||||
shader/node_helper.h
|
||||
|
||||
299
src/video_core/buffer_cache.h
Normal file
299
src/video_core/buffer_cache.h
Normal file
@@ -0,0 +1,299 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <typename BufferStorageType>
|
||||
class CachedBuffer final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr)
|
||||
: RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {}
|
||||
~CachedBuffer() override = default;
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
u8* GetWritableHostPtr() const {
|
||||
return host_ptr;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t GetCapacity() const {
|
||||
return capacity;
|
||||
}
|
||||
|
||||
bool IsInternalized() const {
|
||||
return is_internal;
|
||||
}
|
||||
|
||||
const BufferStorageType& GetBuffer() const {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void SetSize(std::size_t new_size) {
|
||||
size = new_size;
|
||||
}
|
||||
|
||||
void SetInternalState(bool is_internal_) {
|
||||
is_internal = is_internal_;
|
||||
}
|
||||
|
||||
BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) {
|
||||
capacity = new_capacity;
|
||||
std::swap(buffer, buffer_);
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
private:
|
||||
u8* host_ptr{};
|
||||
VAddr cpu_addr{};
|
||||
std::size_t size{};
|
||||
std::size_t capacity{};
|
||||
bool is_internal{};
|
||||
BufferStorageType buffer;
|
||||
};
|
||||
|
||||
template <typename BufferStorageType, typename BufferType, typename StreamBuffer>
|
||||
class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> {
|
||||
public:
|
||||
using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>;
|
||||
using BufferInfo = std::pair<const BufferType*, u64>;
|
||||
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
std::unique_ptr<StreamBuffer> stream_buffer)
|
||||
: RasterizerCache<Buffer>{rasterizer}, system{system},
|
||||
stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{
|
||||
this->stream_buffer->GetHandle()} {}
|
||||
~BufferCache() = default;
|
||||
|
||||
void Unregister(const Buffer& entry) override {
|
||||
std::lock_guard lock{RasterizerCache<Buffer>::mutex};
|
||||
if (entry->IsInternalized()) {
|
||||
internalized_entries.erase(entry->GetCacheAddr());
|
||||
}
|
||||
ReserveBuffer(entry);
|
||||
RasterizerCache<Buffer>::Unregister(entry);
|
||||
}
|
||||
|
||||
void TickFrame() {
|
||||
marked_for_destruction_index =
|
||||
(marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size();
|
||||
MarkedForDestruction().clear();
|
||||
}
|
||||
|
||||
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
||||
bool internalize = false, bool is_written = false) {
|
||||
std::lock_guard lock{RasterizerCache<Buffer>::mutex};
|
||||
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
if (!host_ptr) {
|
||||
return {GetEmptyBuffer(size), 0};
|
||||
}
|
||||
const auto cache_addr = ToCacheAddr(host_ptr);
|
||||
|
||||
// Cache management is a big overhead, so only cache entries with a given size.
|
||||
// TODO: Figure out which size is the best for given games.
|
||||
constexpr std::size_t max_stream_size = 0x800;
|
||||
if (!internalize && size < max_stream_size &&
|
||||
internalized_entries.find(cache_addr) == internalized_entries.end()) {
|
||||
return StreamBufferUpload(host_ptr, size, alignment);
|
||||
}
|
||||
|
||||
auto entry = RasterizerCache<Buffer>::TryGet(cache_addr);
|
||||
if (!entry) {
|
||||
return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written);
|
||||
}
|
||||
|
||||
if (entry->GetSize() < size) {
|
||||
IncreaseBufferSize(entry, size);
|
||||
}
|
||||
if (is_written) {
|
||||
entry->MarkAsModified(true, *this);
|
||||
}
|
||||
return {ToHandle(entry->GetBuffer()), 0};
|
||||
}
|
||||
|
||||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
||||
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
|
||||
std::size_t alignment = 4) {
|
||||
std::lock_guard lock{RasterizerCache<Buffer>::mutex};
|
||||
return StreamBufferUpload(raw_pointer, size, alignment);
|
||||
}
|
||||
|
||||
void Map(std::size_t max_size) {
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
|
||||
buffer_offset = buffer_offset_base;
|
||||
}
|
||||
|
||||
/// Finishes the upload stream, returns true on bindings invalidation.
|
||||
bool Unmap() {
|
||||
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
|
||||
return std::exchange(invalidated, false);
|
||||
}
|
||||
|
||||
virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0;
|
||||
|
||||
protected:
|
||||
void FlushObjectInner(const Buffer& entry) override {
|
||||
DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr());
|
||||
}
|
||||
|
||||
virtual BufferStorageType CreateBuffer(std::size_t size) = 0;
|
||||
|
||||
virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0;
|
||||
|
||||
virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset,
|
||||
std::size_t size, const u8* data) = 0;
|
||||
|
||||
virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset,
|
||||
std::size_t size, u8* data) = 0;
|
||||
|
||||
virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst,
|
||||
std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t size) = 0;
|
||||
|
||||
private:
|
||||
BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
|
||||
std::size_t alignment) {
|
||||
AlignBuffer(alignment);
|
||||
const std::size_t uploaded_offset = buffer_offset;
|
||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return {&stream_buffer_handle, uploaded_offset};
|
||||
}
|
||||
|
||||
BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
|
||||
bool internalize, bool is_written) {
|
||||
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
auto entry = GetUncachedBuffer(*cpu_addr, host_ptr);
|
||||
entry->SetSize(size);
|
||||
entry->SetInternalState(internalize);
|
||||
RasterizerCache<Buffer>::Register(entry);
|
||||
|
||||
if (internalize) {
|
||||
internalized_entries.emplace(ToCacheAddr(host_ptr));
|
||||
}
|
||||
if (is_written) {
|
||||
entry->MarkAsModified(true, *this);
|
||||
}
|
||||
|
||||
if (entry->GetCapacity() < size) {
|
||||
MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size));
|
||||
}
|
||||
|
||||
UploadBufferData(entry->GetBuffer(), 0, size, host_ptr);
|
||||
return {ToHandle(entry->GetBuffer()), 0};
|
||||
}
|
||||
|
||||
void IncreaseBufferSize(Buffer& entry, std::size_t new_size) {
|
||||
const std::size_t old_size = entry->GetSize();
|
||||
if (entry->GetCapacity() < new_size) {
|
||||
const auto& old_buffer = entry->GetBuffer();
|
||||
auto new_buffer = CreateBuffer(new_size);
|
||||
|
||||
// Copy bits from the old buffer to the new buffer.
|
||||
CopyBufferData(old_buffer, new_buffer, 0, 0, old_size);
|
||||
MarkedForDestruction().push_back(
|
||||
entry->ExchangeBuffer(std::move(new_buffer), new_size));
|
||||
|
||||
// This buffer could have been used
|
||||
invalidated = true;
|
||||
}
|
||||
// Upload the new bits.
|
||||
const std::size_t size_diff = new_size - old_size;
|
||||
UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
|
||||
|
||||
// Update entry's size in the object and in the cache.
|
||||
Unregister(entry);
|
||||
|
||||
entry->SetSize(new_size);
|
||||
RasterizerCache<Buffer>::Register(entry);
|
||||
}
|
||||
|
||||
Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
|
||||
if (auto entry = TryGetReservedBuffer(host_ptr)) {
|
||||
return entry;
|
||||
}
|
||||
return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr);
|
||||
}
|
||||
|
||||
Buffer TryGetReservedBuffer(u8* host_ptr) {
|
||||
const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
|
||||
if (it == buffer_reserve.end()) {
|
||||
return {};
|
||||
}
|
||||
auto& reserve = it->second;
|
||||
auto entry = reserve.back();
|
||||
reserve.pop_back();
|
||||
return entry;
|
||||
}
|
||||
|
||||
void ReserveBuffer(Buffer entry) {
|
||||
buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
|
||||
}
|
||||
|
||||
void AlignBuffer(std::size_t alignment) {
|
||||
// Align the offset, not the mapped pointer
|
||||
const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
|
||||
buffer_ptr += offset_aligned - buffer_offset;
|
||||
buffer_offset = offset_aligned;
|
||||
}
|
||||
|
||||
std::vector<BufferStorageType>& MarkedForDestruction() {
|
||||
return marked_for_destruction_ring_buffer[marked_for_destruction_index];
|
||||
}
|
||||
|
||||
Core::System& system;
|
||||
|
||||
std::unique_ptr<StreamBuffer> stream_buffer;
|
||||
BufferType stream_buffer_handle{};
|
||||
|
||||
bool invalidated = false;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
u64 buffer_offset = 0;
|
||||
u64 buffer_offset_base = 0;
|
||||
|
||||
std::size_t marked_for_destruction_index = 0;
|
||||
std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer;
|
||||
|
||||
std::unordered_set<CacheAddr> internalized_entries;
|
||||
std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
|
||||
MICROPROFILE_SCOPE(DispatchCalls);
|
||||
|
||||
// On entering GPU code, assume all memory may be touched by the ARM core.
|
||||
gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
gpu.Maxwell3D().dirty.OnMemoryWrite();
|
||||
|
||||
dma_pushbuffer_subindex = 0;
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
|
||||
const bool is_last_call = method_call.IsLastCall();
|
||||
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
system.GPU().Maxwell3D().dirty.OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
|
||||
}
|
||||
|
||||
void KeplerCompute::ProcessLaunch() {
|
||||
|
||||
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
||||
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
||||
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
|
||||
|
||||
const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
|
||||
LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
|
||||
const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
|
||||
LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
|
||||
|
||||
rasterizer.DispatchCompute(code_addr);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
||||
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
|
||||
const bool is_last_call = method_call.IsLastCall();
|
||||
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
system.GPU().Maxwell3D().dirty.OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
|
||||
MemoryManager& memory_manager)
|
||||
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
|
||||
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
|
||||
InitDirtySettings();
|
||||
InitializeRegisterDefaults();
|
||||
}
|
||||
|
||||
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
regs.stencil_back_func_mask = 0xFFFFFFFF;
|
||||
regs.stencil_back_mask = 0xFFFFFFFF;
|
||||
|
||||
regs.depth_test_func = Regs::ComparisonOp::Always;
|
||||
regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
|
||||
regs.cull.cull_face = Regs::Cull::CullFace::Back;
|
||||
|
||||
// TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
|
||||
// register carrying a default value. Assume it's OpenGL's default (1).
|
||||
regs.point_size = 1.0f;
|
||||
@@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
regs.rt_separate_frag_data = 1;
|
||||
}
|
||||
|
||||
#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
|
||||
|
||||
void Maxwell3D::InitDirtySettings() {
|
||||
const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
|
||||
const auto start_itr = dirty_pointers.begin() + start;
|
||||
const auto end_itr = start_itr + range;
|
||||
std::fill(start_itr, end_itr, position);
|
||||
};
|
||||
dirty.regs.fill(true);
|
||||
|
||||
// Init Render Targets
|
||||
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
|
||||
constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
|
||||
constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
|
||||
u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
|
||||
for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
|
||||
set_block(rt_reg, registers_per_rt, rt_dirty_reg);
|
||||
rt_dirty_reg++;
|
||||
}
|
||||
constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
|
||||
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
|
||||
constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
|
||||
set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
|
||||
|
||||
// Init Vertex Arrays
|
||||
constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
|
||||
constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
|
||||
constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
|
||||
u32 va_reg = DIRTY_REGS_POS(vertex_array);
|
||||
u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
|
||||
for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
|
||||
vertex_reg += vertex_array_size) {
|
||||
set_block(vertex_reg, 3, va_reg);
|
||||
// The divisor concerns vertex array instances
|
||||
dirty_pointers[vertex_reg + 3] = vi_reg;
|
||||
va_reg++;
|
||||
vi_reg++;
|
||||
}
|
||||
constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
|
||||
constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
|
||||
constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
|
||||
va_reg = DIRTY_REGS_POS(vertex_array);
|
||||
for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
|
||||
vertex_reg += vertex_limit_size) {
|
||||
set_block(vertex_reg, vertex_limit_size, va_reg);
|
||||
va_reg++;
|
||||
}
|
||||
constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
|
||||
constexpr u32 vertex_instance_size =
|
||||
sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
|
||||
constexpr u32 vertex_instance_end =
|
||||
vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
|
||||
vi_reg = DIRTY_REGS_POS(vertex_instance);
|
||||
for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
|
||||
vertex_reg += vertex_instance_size) {
|
||||
set_block(vertex_reg, vertex_instance_size, vi_reg);
|
||||
vi_reg++;
|
||||
}
|
||||
set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
|
||||
DIRTY_REGS_POS(vertex_attrib_format));
|
||||
|
||||
// Init Shaders
|
||||
constexpr u32 shader_registers_count =
|
||||
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
|
||||
set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
|
||||
DIRTY_REGS_POS(shaders));
|
||||
|
||||
// State
|
||||
|
||||
// Viewport
|
||||
constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
|
||||
constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
|
||||
constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
|
||||
set_block(viewport_start, viewport_size, viewport_dirty_reg);
|
||||
constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
|
||||
constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
|
||||
set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
|
||||
|
||||
// Viewport transformation
|
||||
constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
|
||||
constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
|
||||
set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
|
||||
|
||||
// Cullmode
|
||||
constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
|
||||
constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
|
||||
set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
|
||||
|
||||
// Screen y control
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
|
||||
|
||||
// Primitive Restart
|
||||
constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
|
||||
constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
|
||||
set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
|
||||
|
||||
// Depth Test
|
||||
constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
|
||||
|
||||
// Stencil Test
|
||||
constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
|
||||
|
||||
// Color Mask
|
||||
constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
|
||||
set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
|
||||
color_mask_dirty_reg);
|
||||
// Blend State
|
||||
constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
|
||||
set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
|
||||
blend_state_dirty_reg);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
|
||||
set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
|
||||
set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
|
||||
blend_state_dirty_reg);
|
||||
|
||||
// Scissor State
|
||||
constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
|
||||
set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
|
||||
scissor_test_dirty_reg);
|
||||
|
||||
// Polygon Offset
|
||||
constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
|
||||
// Reset the current macro.
|
||||
executing_macro = 0;
|
||||
@@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
|
||||
const u32 method = method_call.method;
|
||||
|
||||
if (method == cb_data_state.current) {
|
||||
regs.reg_array[method] = method_call.argument;
|
||||
ProcessCBData(method_call.argument);
|
||||
return;
|
||||
} else if (cb_data_state.current != null_cb_data) {
|
||||
FinishCBData();
|
||||
}
|
||||
|
||||
// It is an error to write to a register other than the current macro's ARG register before it
|
||||
// has finished execution.
|
||||
if (executing_macro != 0) {
|
||||
@@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
|
||||
if (regs.reg_array[method] != method_call.argument) {
|
||||
regs.reg_array[method] = method_call.argument;
|
||||
// Color buffers
|
||||
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
|
||||
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
|
||||
if (method >= first_rt_reg &&
|
||||
method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
|
||||
const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
|
||||
dirty_flags.color_buffer.set(rt_index);
|
||||
}
|
||||
|
||||
// Zeta buffer
|
||||
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
|
||||
if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
|
||||
method == MAXWELL3D_REG_INDEX(zeta_width) ||
|
||||
method == MAXWELL3D_REG_INDEX(zeta_height) ||
|
||||
(method >= MAXWELL3D_REG_INDEX(zeta) &&
|
||||
method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
|
||||
dirty_flags.zeta_buffer = true;
|
||||
}
|
||||
|
||||
// Shader
|
||||
constexpr u32 shader_registers_count =
|
||||
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
|
||||
if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
|
||||
method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
|
||||
dirty_flags.shaders = true;
|
||||
}
|
||||
|
||||
// Vertex format
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
|
||||
dirty_flags.vertex_attrib_format = true;
|
||||
}
|
||||
|
||||
// Vertex buffer
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
|
||||
method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
|
||||
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
|
||||
const std::size_t dirty_reg = dirty_pointers[method];
|
||||
if (dirty_reg) {
|
||||
dirty.regs[dirty_reg] = true;
|
||||
if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
|
||||
dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
|
||||
dirty.vertex_array_buffers = true;
|
||||
} else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
|
||||
dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
|
||||
dirty.vertex_instances = true;
|
||||
} else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
|
||||
dirty_reg < DIRTY_REGS_POS(render_settings)) {
|
||||
dirty.render_settings = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
|
||||
ProcessCBData(method_call.argument);
|
||||
StartCBData(method);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
|
||||
@@ -261,7 +397,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
const bool is_last_call = method_call.IsLastCall();
|
||||
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
dirty_flags.OnMemoryWrite();
|
||||
dirty.OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() {
|
||||
query_result.timestamp = system.CoreTiming().GetTicks();
|
||||
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
|
||||
}
|
||||
dirty_flags.OnMemoryWrite();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -405,23 +540,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCBData(u32 value) {
|
||||
const u32 id = cb_data_state.id;
|
||||
cb_data_state.buffer[id][cb_data_state.counter] = value;
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
|
||||
cb_data_state.counter++;
|
||||
}
|
||||
|
||||
void Maxwell3D::StartCBData(u32 method) {
|
||||
constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
|
||||
cb_data_state.start_pos = regs.const_buffer.cb_pos;
|
||||
cb_data_state.id = method - first_cb_data;
|
||||
cb_data_state.current = method;
|
||||
cb_data_state.counter = 0;
|
||||
ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
|
||||
}
|
||||
|
||||
void Maxwell3D::FinishCBData() {
|
||||
// Write the input value to the current const buffer at the current position.
|
||||
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
|
||||
ASSERT(buffer_address != 0);
|
||||
|
||||
// Don't allow writing past the end of the buffer.
|
||||
ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
|
||||
ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
|
||||
|
||||
const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
|
||||
const GPUVAddr address{buffer_address + cb_data_state.start_pos};
|
||||
const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
|
||||
|
||||
u8* ptr{memory_manager.GetPointer(address)};
|
||||
rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
|
||||
memory_manager.Write<u32>(address, value);
|
||||
const u32 id = cb_data_state.id;
|
||||
memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
|
||||
dirty.OnMemoryWrite();
|
||||
|
||||
dirty_flags.OnMemoryWrite();
|
||||
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
|
||||
cb_data_state.id = null_cb_data;
|
||||
cb_data_state.current = null_cb_data;
|
||||
}
|
||||
|
||||
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
|
||||
|
||||
@@ -67,6 +67,7 @@ public:
|
||||
static constexpr std::size_t MaxShaderStage = 5;
|
||||
// Maximum number of const buffers per shader stage.
|
||||
static constexpr std::size_t MaxConstBuffers = 18;
|
||||
static constexpr std::size_t MaxConstBufferSize = 0x10000;
|
||||
|
||||
enum class QueryMode : u32 {
|
||||
Write = 0,
|
||||
@@ -1123,23 +1124,77 @@ public:
|
||||
|
||||
State state{};
|
||||
|
||||
struct DirtyFlags {
|
||||
std::bitset<8> color_buffer{0xFF};
|
||||
std::bitset<32> vertex_array{0xFFFFFFFF};
|
||||
struct DirtyRegs {
|
||||
static constexpr std::size_t NUM_REGS = 256;
|
||||
union {
|
||||
struct {
|
||||
bool null_dirty;
|
||||
|
||||
bool vertex_attrib_format = true;
|
||||
bool zeta_buffer = true;
|
||||
bool shaders = true;
|
||||
// Vertex Attributes
|
||||
bool vertex_attrib_format;
|
||||
|
||||
// Vertex Arrays
|
||||
std::array<bool, 32> vertex_array;
|
||||
|
||||
bool vertex_array_buffers;
|
||||
|
||||
// Vertex Instances
|
||||
std::array<bool, 32> vertex_instance;
|
||||
|
||||
bool vertex_instances;
|
||||
|
||||
// Render Targets
|
||||
std::array<bool, 8> render_target;
|
||||
bool depth_buffer;
|
||||
|
||||
bool render_settings;
|
||||
|
||||
// Shaders
|
||||
bool shaders;
|
||||
|
||||
// Rasterizer State
|
||||
bool viewport;
|
||||
bool clip_coefficient;
|
||||
bool cull_mode;
|
||||
bool primitive_restart;
|
||||
bool depth_test;
|
||||
bool stencil_test;
|
||||
bool blend_state;
|
||||
bool scissor_test;
|
||||
bool transform_feedback;
|
||||
bool color_mask;
|
||||
bool polygon_offset;
|
||||
|
||||
// Complementary
|
||||
bool viewport_transform;
|
||||
bool screen_y_control;
|
||||
|
||||
bool memory_general;
|
||||
};
|
||||
std::array<bool, NUM_REGS> regs;
|
||||
};
|
||||
|
||||
void ResetVertexArrays() {
|
||||
vertex_array.fill(true);
|
||||
vertex_array_buffers = true;
|
||||
}
|
||||
|
||||
void ResetRenderTargets() {
|
||||
depth_buffer = true;
|
||||
render_target.fill(true);
|
||||
render_settings = true;
|
||||
}
|
||||
|
||||
void OnMemoryWrite() {
|
||||
zeta_buffer = true;
|
||||
shaders = true;
|
||||
color_buffer.set();
|
||||
vertex_array.set();
|
||||
memory_general = true;
|
||||
ResetRenderTargets();
|
||||
ResetVertexArrays();
|
||||
}
|
||||
};
|
||||
|
||||
DirtyFlags dirty_flags;
|
||||
} dirty{};
|
||||
|
||||
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
|
||||
|
||||
/// Reads a register value located at the input method address
|
||||
u32 GetRegisterValue(u32 method) const;
|
||||
@@ -1191,6 +1246,15 @@ private:
|
||||
/// Interpreter for the macro codes uploaded to the GPU.
|
||||
MacroInterpreter macro_interpreter;
|
||||
|
||||
static constexpr u32 null_cb_data = 0xFFFFFFFF;
|
||||
struct {
|
||||
std::array<std::array<u32, 0x4000>, 16> buffer;
|
||||
u32 current{null_cb_data};
|
||||
u32 id{null_cb_data};
|
||||
u32 start_pos{};
|
||||
u32 counter{};
|
||||
} cb_data_state;
|
||||
|
||||
Upload::State upload_state;
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
@@ -1199,6 +1263,8 @@ private:
|
||||
/// Retrieves information about a specific TSC entry from the TSC buffer.
|
||||
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
|
||||
|
||||
void InitDirtySettings();
|
||||
|
||||
/**
|
||||
* Call a macro on this engine.
|
||||
* @param method Method to call
|
||||
@@ -1222,7 +1288,9 @@ private:
|
||||
void ProcessSyncPoint();
|
||||
|
||||
/// Handles a write to the CB_DATA[i] register.
|
||||
void StartCBData(u32 method);
|
||||
void ProcessCBData(u32 value);
|
||||
void FinishCBData();
|
||||
|
||||
/// Handles a write to the CB_BIND register.
|
||||
void ProcessCBBind(Regs::ShaderStage stage);
|
||||
|
||||
@@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() {
|
||||
}
|
||||
|
||||
// All copies here update the main memory, so mark all rasterizer states as invalid.
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
system.GPU().Maxwell3D().dirty.OnMemoryWrite();
|
||||
|
||||
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
|
||||
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
|
||||
|
||||
@@ -78,7 +78,7 @@ union Attribute {
|
||||
constexpr explicit Attribute(u64 value) : value(value) {}
|
||||
|
||||
enum class Index : u64 {
|
||||
PointSize = 6,
|
||||
LayerViewportPointSize = 6,
|
||||
Position = 7,
|
||||
Attribute_0 = 8,
|
||||
Attribute_31 = 39,
|
||||
@@ -931,8 +931,6 @@ union Instruction {
|
||||
} csetp;
|
||||
|
||||
union {
|
||||
BitField<35, 4, PredCondition> cond;
|
||||
BitField<49, 1, u64> h_and;
|
||||
BitField<6, 1, u64> ftz;
|
||||
BitField<45, 2, PredOperation> op;
|
||||
BitField<3, 3, u64> pred3;
|
||||
@@ -940,9 +938,21 @@ union Instruction {
|
||||
BitField<43, 1, u64> negate_a;
|
||||
BitField<44, 1, u64> abs_a;
|
||||
BitField<47, 2, HalfType> type_a;
|
||||
BitField<31, 1, u64> negate_b;
|
||||
BitField<30, 1, u64> abs_b;
|
||||
BitField<28, 2, HalfType> type_b;
|
||||
union {
|
||||
BitField<35, 4, PredCondition> cond;
|
||||
BitField<49, 1, u64> h_and;
|
||||
BitField<31, 1, u64> negate_b;
|
||||
BitField<30, 1, u64> abs_b;
|
||||
BitField<28, 2, HalfType> type_b;
|
||||
} reg;
|
||||
union {
|
||||
BitField<56, 1, u64> negate_b;
|
||||
BitField<54, 1, u64> abs_b;
|
||||
} cbuf;
|
||||
union {
|
||||
BitField<49, 4, PredCondition> cond;
|
||||
BitField<53, 1, u64> h_and;
|
||||
} cbuf_and_imm;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<39, 3, u64> pred39;
|
||||
} hsetp2;
|
||||
@@ -1278,6 +1288,7 @@ union Instruction {
|
||||
union {
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
BitField<53, 4, u64> texture_info;
|
||||
BitField<59, 1, u64> fp32_flag;
|
||||
|
||||
TextureType GetTextureType() const {
|
||||
// The TLDS instruction has a weird encoding for the texture type.
|
||||
@@ -1367,6 +1378,20 @@ union Instruction {
|
||||
}
|
||||
} bra;
|
||||
|
||||
union {
|
||||
BitField<20, 24, u64> target;
|
||||
BitField<5, 1, u64> constant_buffer;
|
||||
|
||||
s32 GetBranchExtend() const {
|
||||
// Sign extend the branch target offset
|
||||
u32 mask = 1U << (24 - 1);
|
||||
u32 value = static_cast<u32>(target);
|
||||
// The branch offset is relative to the next instruction and is stored in bytes, so
|
||||
// divide it by the size of an instruction and add 1 to it.
|
||||
return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
|
||||
}
|
||||
} brx;
|
||||
|
||||
union {
|
||||
BitField<39, 1, u64> emit; // EmitVertex
|
||||
BitField<40, 1, u64> cut; // EndPrimitive
|
||||
@@ -1464,6 +1489,7 @@ public:
|
||||
BFE_IMM,
|
||||
BFI_IMM_R,
|
||||
BRA,
|
||||
BRX,
|
||||
PBK,
|
||||
LD_A,
|
||||
LD_L,
|
||||
@@ -1532,7 +1558,9 @@ public:
|
||||
HFMA2_RC,
|
||||
HFMA2_RR,
|
||||
HFMA2_IMM_R,
|
||||
HSETP2_C,
|
||||
HSETP2_R,
|
||||
HSETP2_IMM,
|
||||
HSET2_R,
|
||||
POPC_C,
|
||||
POPC_R,
|
||||
@@ -1738,6 +1766,7 @@ private:
|
||||
INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
|
||||
INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
|
||||
INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
|
||||
INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
|
||||
INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
|
||||
INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
|
||||
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
|
||||
@@ -1760,7 +1789,7 @@ private:
|
||||
INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
|
||||
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
|
||||
INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
|
||||
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
|
||||
INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
|
||||
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
|
||||
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
|
||||
@@ -1814,7 +1843,9 @@ private:
|
||||
INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
|
||||
INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
|
||||
INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
|
||||
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"),
|
||||
INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
|
||||
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
|
||||
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
|
||||
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
|
||||
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
|
||||
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
|
||||
|
||||
@@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
|
||||
|
||||
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
|
||||
auto& rasterizer{renderer.Rasterizer()};
|
||||
memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
|
||||
memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
|
||||
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
|
||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
|
||||
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
|
||||
@@ -50,6 +50,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const {
|
||||
return *maxwell_3d;
|
||||
}
|
||||
|
||||
Engines::KeplerCompute& GPU::KeplerCompute() {
|
||||
return *kepler_compute;
|
||||
}
|
||||
|
||||
const Engines::KeplerCompute& GPU::KeplerCompute() const {
|
||||
return *kepler_compute;
|
||||
}
|
||||
|
||||
MemoryManager& GPU::MemoryManager() {
|
||||
return *memory_manager;
|
||||
}
|
||||
|
||||
@@ -155,6 +155,12 @@ public:
|
||||
/// Returns a const reference to the Maxwell3D GPU engine.
|
||||
const Engines::Maxwell3D& Maxwell3D() const;
|
||||
|
||||
/// Returns a reference to the KeplerCompute GPU engine.
|
||||
Engines::KeplerCompute& KeplerCompute();
|
||||
|
||||
/// Returns a reference to the KeplerCompute GPU engine.
|
||||
const Engines::KeplerCompute& KeplerCompute() const;
|
||||
|
||||
/// Returns a reference to the GPU memory manager.
|
||||
Tegra::MemoryManager& MemoryManager();
|
||||
|
||||
|
||||
@@ -4,14 +4,18 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro_interpreter.h"
|
||||
|
||||
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
||||
|
||||
void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
|
||||
MICROPROFILE_SCOPE(MacroInterp);
|
||||
Reset();
|
||||
registers[1] = parameters[0];
|
||||
this->parameters = std::move(parameters);
|
||||
|
||||
@@ -5,13 +5,17 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/kernel/vm_manager.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
|
||||
MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
||||
: rasterizer{rasterizer}, system{system} {
|
||||
std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
|
||||
std::fill(page_table.attributes.begin(), page_table.attributes.end(),
|
||||
Common::PageType::Unmapped);
|
||||
@@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
|
||||
const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
|
||||
|
||||
MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
|
||||
ASSERT(system.CurrentProcess()
|
||||
->VMManager()
|
||||
.SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
|
||||
Kernel::MemoryAttribute::DeviceMapped)
|
||||
.IsSuccess());
|
||||
|
||||
return gpu_addr;
|
||||
}
|
||||
@@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
|
||||
const u64 aligned_size{Common::AlignUp(size, page_size)};
|
||||
|
||||
MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
|
||||
|
||||
ASSERT(system.CurrentProcess()
|
||||
->VMManager()
|
||||
.SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
|
||||
Kernel::MemoryAttribute::DeviceMapped)
|
||||
.IsSuccess());
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
@@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
|
||||
|
||||
const u64 aligned_size{Common::AlignUp(size, page_size)};
|
||||
const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
|
||||
const auto cpu_addr = GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
|
||||
UnmapRange(gpu_addr, aligned_size);
|
||||
ASSERT(system.CurrentProcess()
|
||||
->VMManager()
|
||||
.SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped,
|
||||
Kernel::MemoryAttribute::None)
|
||||
.IsSuccess());
|
||||
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
@@ -14,6 +14,10 @@ namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
/**
|
||||
@@ -47,7 +51,7 @@ struct VirtualMemoryArea {
|
||||
|
||||
class MemoryManager final {
|
||||
public:
|
||||
explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer);
|
||||
explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
|
||||
~MemoryManager();
|
||||
|
||||
GPUVAddr AllocateSpace(u64 size, u64 align);
|
||||
@@ -173,6 +177,8 @@ private:
|
||||
Common::PageTable page_table{page_bits};
|
||||
VMAMap vma_map;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
||||
Core::System& system;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -34,6 +34,9 @@ public:
|
||||
/// Clear the current framebuffer
|
||||
virtual void Clear() = 0;
|
||||
|
||||
/// Dispatches a compute shader invocation
|
||||
virtual void DispatchCompute(GPUVAddr code_addr) = 0;
|
||||
|
||||
/// Notify rasterizer that all caches should be flushed to Switch memory
|
||||
virtual void FlushAll() = 0;
|
||||
|
||||
@@ -47,6 +50,9 @@ public:
|
||||
/// and invalidated
|
||||
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that a frame is about to finish
|
||||
virtual void TickFrame() = 0;
|
||||
|
||||
/// Attempt to use a faster method to perform a surface copy
|
||||
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
|
||||
@@ -2,103 +2,57 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
|
||||
std::size_t alignment, u8* host_ptr)
|
||||
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
|
||||
alignment{alignment} {}
|
||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
std::size_t stream_size)
|
||||
: VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{
|
||||
rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
|
||||
|
||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
|
||||
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
|
||||
OGLBufferCache::~OGLBufferCache() = default;
|
||||
|
||||
GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
|
||||
bool cache) {
|
||||
std::lock_guard lock{mutex};
|
||||
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
|
||||
|
||||
// Cache management is a big overhead, so only cache entries with a given size.
|
||||
// TODO: Figure out which size is the best for given games.
|
||||
cache &= size >= 2048;
|
||||
|
||||
const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
|
||||
if (cache) {
|
||||
auto entry = TryGet(host_ptr);
|
||||
if (entry) {
|
||||
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
|
||||
return entry->GetOffset();
|
||||
}
|
||||
Unregister(entry);
|
||||
}
|
||||
}
|
||||
|
||||
AlignBuffer(alignment);
|
||||
const GLintptr uploaded_offset = buffer_offset;
|
||||
|
||||
if (!host_ptr) {
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
std::memcpy(buffer_ptr, host_ptr, size);
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
|
||||
if (cache) {
|
||||
auto entry = std::make_shared<CachedBufferEntry>(
|
||||
*memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
|
||||
Register(entry);
|
||||
}
|
||||
|
||||
return uploaded_offset;
|
||||
OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) {
|
||||
OGLBuffer buffer;
|
||||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size,
|
||||
std::size_t alignment) {
|
||||
std::lock_guard lock{mutex};
|
||||
AlignBuffer(alignment);
|
||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
||||
const GLintptr uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return uploaded_offset;
|
||||
const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) {
|
||||
return &buffer.handle;
|
||||
}
|
||||
|
||||
bool OGLBufferCache::Map(std::size_t max_size) {
|
||||
bool invalidate;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
|
||||
stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
|
||||
buffer_offset = buffer_offset_base;
|
||||
|
||||
if (invalidate) {
|
||||
InvalidateAll();
|
||||
}
|
||||
return invalidate;
|
||||
const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
static const GLuint null_buffer = 0;
|
||||
return &null_buffer;
|
||||
}
|
||||
|
||||
void OGLBufferCache::Unmap() {
|
||||
stream_buffer.Unmap(buffer_offset - buffer_offset_base);
|
||||
void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) {
|
||||
glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(size), data);
|
||||
}
|
||||
|
||||
GLuint OGLBufferCache::GetHandle() const {
|
||||
return stream_buffer.GetHandle();
|
||||
void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset,
|
||||
std::size_t size, u8* data) {
|
||||
glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(size), data);
|
||||
}
|
||||
|
||||
void OGLBufferCache::AlignBuffer(std::size_t alignment) {
|
||||
// Align the offset, not the mapped pointer
|
||||
const GLintptr offset_aligned =
|
||||
static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
|
||||
buffer_ptr += offset_aligned - buffer_offset;
|
||||
buffer_offset = offset_aligned;
|
||||
void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst,
|
||||
std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t size) {
|
||||
glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset),
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -4,80 +4,44 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/buffer_cache.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
|
||||
class CachedBufferEntry final : public RasterizerCacheObject {
|
||||
class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> {
|
||||
public:
|
||||
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
|
||||
std::size_t alignment, u8* host_ptr);
|
||||
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
std::size_t stream_size);
|
||||
~OGLBufferCache();
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
GLintptr GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetAlignment() const {
|
||||
return alignment;
|
||||
}
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
std::size_t size{};
|
||||
GLintptr offset{};
|
||||
std::size_t alignment{};
|
||||
};
|
||||
|
||||
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
||||
public:
|
||||
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size);
|
||||
|
||||
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
|
||||
/// allocated.
|
||||
GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
||||
bool cache = true);
|
||||
|
||||
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
|
||||
GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
|
||||
|
||||
bool Map(std::size_t max_size);
|
||||
void Unmap();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
const GLuint* GetEmptyBuffer(std::size_t) override;
|
||||
|
||||
protected:
|
||||
void AlignBuffer(std::size_t alignment);
|
||||
OGLBuffer CreateBuffer(std::size_t size) override;
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
|
||||
const GLuint* ToHandle(const OGLBuffer& buffer) override;
|
||||
|
||||
private:
|
||||
OGLStreamBuffer stream_buffer;
|
||||
void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) override;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
GLintptr buffer_offset = 0;
|
||||
GLintptr buffer_offset_base = 0;
|
||||
void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
u8* data) override;
|
||||
|
||||
void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) override;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -24,8 +24,10 @@ T GetInteger(GLenum pname) {
|
||||
|
||||
Device::Device() {
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
has_component_indexing_bug = TestComponentIndexingBug();
|
||||
}
|
||||
@@ -34,6 +36,7 @@ Device::Device(std::nullptr_t) {
|
||||
uniform_buffer_alignment = 0;
|
||||
max_vertex_attributes = 16;
|
||||
max_varyings = 15;
|
||||
has_vertex_viewport_layer = true;
|
||||
has_variable_aoffi = true;
|
||||
has_component_indexing_bug = false;
|
||||
}
|
||||
|
||||
@@ -18,6 +18,10 @@ public:
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
||||
std::size_t GetShaderStorageBufferAlignment() const {
|
||||
return shader_storage_alignment;
|
||||
}
|
||||
|
||||
u32 GetMaxVertexAttributes() const {
|
||||
return max_vertex_attributes;
|
||||
}
|
||||
@@ -26,6 +30,10 @@ public:
|
||||
return max_varyings;
|
||||
}
|
||||
|
||||
bool HasVertexViewportLayer() const {
|
||||
return has_vertex_viewport_layer;
|
||||
}
|
||||
|
||||
bool HasVariableAoffi() const {
|
||||
return has_variable_aoffi;
|
||||
}
|
||||
@@ -39,8 +47,10 @@ private:
|
||||
static bool TestComponentIndexingBug();
|
||||
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
std::size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
u32 max_varyings{};
|
||||
bool has_vertex_viewport_layer{};
|
||||
bool has_variable_aoffi{};
|
||||
bool has_component_indexing_bug{};
|
||||
};
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_global_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
|
||||
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
|
||||
max_size{max_size} {
|
||||
buffer.Create();
|
||||
LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
|
||||
}
|
||||
|
||||
CachedGlobalRegion::~CachedGlobalRegion() = default;
|
||||
|
||||
void CachedGlobalRegion::Reload(u32 size_) {
|
||||
size = size_;
|
||||
if (size > max_size) {
|
||||
size = max_size;
|
||||
LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
|
||||
max_size);
|
||||
}
|
||||
glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
|
||||
}
|
||||
|
||||
void CachedGlobalRegion::Flush() {
|
||||
LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
|
||||
glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
|
||||
}
|
||||
|
||||
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
|
||||
const auto search{reserve.find(addr)};
|
||||
if (search == reserve.end()) {
|
||||
return {};
|
||||
}
|
||||
return search->second;
|
||||
}
|
||||
|
||||
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
|
||||
u32 size) {
|
||||
GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
|
||||
if (!region) {
|
||||
// No reserved surface available, create a new one and reserve it
|
||||
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
|
||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
|
||||
ReserveGlobalRegion(region);
|
||||
}
|
||||
region->Reload(size);
|
||||
return region;
|
||||
}
|
||||
|
||||
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
|
||||
reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
|
||||
}
|
||||
|
||||
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
|
||||
: RasterizerCache{rasterizer} {
|
||||
GLint max_ssbo_size_;
|
||||
glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
|
||||
max_ssbo_size = static_cast<u32>(max_ssbo_size_);
|
||||
}
|
||||
|
||||
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
|
||||
const GLShader::GlobalMemoryEntry& global_region,
|
||||
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
auto& gpu{Core::System::GetInstance().GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
|
||||
const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
|
||||
global_region.GetCbufOffset()};
|
||||
const auto actual_addr{memory_manager.Read<u64>(addr)};
|
||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
||||
|
||||
// Look up global region in the cache based on address
|
||||
const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
|
||||
GlobalRegion region{TryGet(host_ptr)};
|
||||
|
||||
if (!region) {
|
||||
// No global region found - create a new one
|
||||
region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
|
||||
Register(region);
|
||||
}
|
||||
|
||||
return region;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -1,82 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace GLShader {
|
||||
class GlobalMemoryEntry;
|
||||
}
|
||||
|
||||
class RasterizerOpenGL;
|
||||
class CachedGlobalRegion;
|
||||
using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
|
||||
|
||||
class CachedGlobalRegion final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
|
||||
~CachedGlobalRegion();
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
/// Gets the GL program handle for the buffer
|
||||
GLuint GetBufferHandle() const {
|
||||
return buffer.handle;
|
||||
}
|
||||
|
||||
/// Reloads the global region from guest memory
|
||||
void Reload(u32 size_);
|
||||
|
||||
void Flush();
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
u8* host_ptr{};
|
||||
u32 size{};
|
||||
u32 max_size{};
|
||||
|
||||
OGLBuffer buffer;
|
||||
};
|
||||
|
||||
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
|
||||
public:
|
||||
explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
|
||||
|
||||
/// Gets the current specified shader stage program
|
||||
GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
|
||||
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
|
||||
|
||||
protected:
|
||||
void FlushObjectInner(const GlobalRegion& object) override {
|
||||
object->Flush();
|
||||
}
|
||||
|
||||
private:
|
||||
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
|
||||
GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
|
||||
void ReserveGlobalRegion(GlobalRegion region);
|
||||
|
||||
std::unordered_map<CacheAddr, GlobalRegion> reserve;
|
||||
u32 max_ssbo_size{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
@@ -19,7 +20,9 @@
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
@@ -80,16 +83,31 @@ struct DrawParameters {
|
||||
}
|
||||
};
|
||||
|
||||
static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const GLShader::ConstBufferEntry& entry) {
|
||||
if (!entry.IsIndirect()) {
|
||||
return entry.GetSize();
|
||||
}
|
||||
|
||||
if (buffer.size > Maxwell::MaxConstBufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
|
||||
Maxwell::MaxConstBufferSize);
|
||||
return Maxwell::MaxConstBufferSize;
|
||||
}
|
||||
|
||||
return buffer.size;
|
||||
}
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||
ScreenInfo& info)
|
||||
: texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
|
||||
global_cache{*this}, system{system}, screen_info{info},
|
||||
buffer_cache(*this, STREAM_BUFFER_SIZE) {
|
||||
system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
|
||||
OpenGLState::ApplyDefaultState();
|
||||
|
||||
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
||||
state.draw.shader_program = 0;
|
||||
state.Apply();
|
||||
clear_framebuffer.Create();
|
||||
|
||||
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
|
||||
CheckExtensions();
|
||||
@@ -109,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
if (!gpu.dirty_flags.vertex_attrib_format) {
|
||||
if (!gpu.dirty.vertex_attrib_format) {
|
||||
return state.draw.vertex_array;
|
||||
}
|
||||
gpu.dirty_flags.vertex_attrib_format = false;
|
||||
gpu.dirty.vertex_attrib_format = false;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||
|
||||
@@ -129,8 +147,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
state.draw.vertex_array = vao;
|
||||
state.ApplyVertexArrayState();
|
||||
|
||||
glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
|
||||
|
||||
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
|
||||
// Enables the first 16 vertex attributes always, as we don't know which ones are actually
|
||||
// used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
|
||||
@@ -168,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
}
|
||||
|
||||
// Rebinding the VAO invalidates the vertex buffer bindings.
|
||||
gpu.dirty_flags.vertex_array.set();
|
||||
gpu.dirty.ResetVertexArrays();
|
||||
|
||||
state.draw.vertex_array = vao_entry.handle;
|
||||
return vao_entry.handle;
|
||||
@@ -176,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
|
||||
void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
if (gpu.dirty_flags.vertex_array.none())
|
||||
if (!gpu.dirty.vertex_array_buffers)
|
||||
return;
|
||||
gpu.dirty.vertex_array_buffers = false;
|
||||
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (!gpu.dirty_flags.vertex_array[index])
|
||||
if (!gpu.dirty.vertex_array[index])
|
||||
continue;
|
||||
gpu.dirty.vertex_array[index] = false;
|
||||
gpu.dirty.vertex_instance[index] = false;
|
||||
|
||||
const auto& vertex_array = regs.vertex_array[index];
|
||||
if (!vertex_array.IsEnabled())
|
||||
@@ -197,11 +216,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
|
||||
ASSERT(end > start);
|
||||
const u64 size = end - start + 1;
|
||||
const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size);
|
||||
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
|
||||
|
||||
// Bind the vertex array to the buffer at the current offset.
|
||||
glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset,
|
||||
vertex_array.stride);
|
||||
vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
|
||||
vertex_array.stride);
|
||||
|
||||
if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
|
||||
// Enable vertex buffer instancing with the specified divisor.
|
||||
@@ -211,11 +230,47 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
glVertexArrayBindingDivisor(vao, index, 0);
|
||||
}
|
||||
}
|
||||
|
||||
gpu.dirty_flags.vertex_array.reset();
|
||||
}
|
||||
|
||||
DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
|
||||
if (!gpu.dirty.vertex_instances)
|
||||
return;
|
||||
gpu.dirty.vertex_instances = false;
|
||||
|
||||
const auto& regs = gpu.regs;
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (!gpu.dirty.vertex_instance[index])
|
||||
continue;
|
||||
|
||||
gpu.dirty.vertex_instance[index] = false;
|
||||
|
||||
if (regs.instanced_arrays.IsInstancingEnabled(index) &&
|
||||
regs.vertex_array[index].divisor != 0) {
|
||||
// Enable vertex buffer instancing with the specified divisor.
|
||||
glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
|
||||
} else {
|
||||
// Disable the vertex buffer instancing.
|
||||
glVertexArrayBindingDivisor(vao, index, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||
if (accelerate_draw != AccelDraw::Indexed) {
|
||||
return 0;
|
||||
}
|
||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
const std::size_t size = CalculateIndexBufferSize();
|
||||
const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
||||
vertex_array_pushbuffer.SetIndexBuffer(buffer);
|
||||
return offset;
|
||||
}
|
||||
|
||||
DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) {
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
|
||||
@@ -227,11 +282,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
|
||||
|
||||
if (is_indexed) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||
params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
|
||||
params.count = regs.index_array.count;
|
||||
params.index_buffer_offset =
|
||||
buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
|
||||
params.index_buffer_offset = index_buffer_offset;
|
||||
params.base_vertex = static_cast<GLint>(regs.vb_element_base);
|
||||
} else {
|
||||
params.count = regs.vertex_buffer.count;
|
||||
@@ -247,10 +300,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
BaseBindings base_bindings;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
|
||||
// Prepare packed bindings
|
||||
bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
|
||||
bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto& shader_config = gpu.regs.shader_config[index];
|
||||
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
@@ -271,18 +320,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
|
||||
GLShader::MaxwellUniformData ubo{};
|
||||
ubo.SetFromRegs(gpu, stage);
|
||||
const GLintptr offset =
|
||||
const auto [buffer, offset] =
|
||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||
|
||||
// Bind the emulation info buffer
|
||||
bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
|
||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
|
||||
Shader shader{shader_cache.GetStageProgram(program)};
|
||||
|
||||
const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
|
||||
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
|
||||
SetupDrawConstBuffers(stage_enum, shader);
|
||||
SetupGlobalRegions(stage_enum, shader);
|
||||
SetupDrawGlobalMemory(stage_enum, shader);
|
||||
const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
|
||||
|
||||
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
|
||||
@@ -321,12 +369,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
base_bindings = next_bindings;
|
||||
}
|
||||
|
||||
bind_ubo_pushbuffer.Bind();
|
||||
bind_ssbo_pushbuffer.Bind();
|
||||
|
||||
SyncClipEnabled(clip_distances);
|
||||
|
||||
gpu.dirty_flags.shaders = false;
|
||||
gpu.dirty.shaders = false;
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
@@ -409,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
||||
|
||||
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
|
||||
single_color_target};
|
||||
if (fb_config_state == current_framebuffer_config_state &&
|
||||
gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
|
||||
if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
|
||||
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
|
||||
// single color targets). This is done because the guest registers may not change but the
|
||||
// host framebuffer may contain different attachments
|
||||
return current_depth_stencil_usage;
|
||||
}
|
||||
gpu.dirty.render_settings = false;
|
||||
current_framebuffer_config_state = fb_config_state;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
@@ -504,13 +549,65 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
||||
return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
|
||||
bool using_depth_fb, bool using_stencil_fb) {
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
View color_surface{};
|
||||
if (using_color_fb) {
|
||||
color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
|
||||
}
|
||||
View depth_surface{};
|
||||
if (using_depth_fb || using_stencil_fb) {
|
||||
depth_surface = texture_cache.GetDepthBufferSurface(false);
|
||||
}
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
current_state.draw.draw_framebuffer = clear_framebuffer.handle;
|
||||
current_state.ApplyFramebufferState();
|
||||
|
||||
if (color_surface) {
|
||||
color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
|
||||
} else {
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
}
|
||||
|
||||
if (depth_surface) {
|
||||
const auto& params = depth_surface->GetSurfaceParams();
|
||||
switch (params.type) {
|
||||
case VideoCore::Surface::SurfaceType::Depth: {
|
||||
depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
break;
|
||||
}
|
||||
case VideoCore::Surface::SurfaceType::DepthStencil: {
|
||||
depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
|
||||
break;
|
||||
}
|
||||
default: { UNIMPLEMENTED(); }
|
||||
}
|
||||
} else {
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
bool use_color{};
|
||||
bool use_depth{};
|
||||
bool use_stencil{};
|
||||
|
||||
OpenGLState clear_state;
|
||||
OpenGLState prev_state{OpenGLState::GetCurState()};
|
||||
SCOPE_EXIT({
|
||||
prev_state.AllDirty();
|
||||
prev_state.Apply();
|
||||
});
|
||||
|
||||
OpenGLState clear_state{OpenGLState::GetCurState()};
|
||||
clear_state.SetDefaultViewports();
|
||||
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
|
||||
regs.clear_buffers.A) {
|
||||
use_color = true;
|
||||
@@ -530,6 +627,7 @@ void RasterizerOpenGL::Clear() {
|
||||
// true.
|
||||
clear_state.depth.test_enabled = true;
|
||||
clear_state.depth.test_func = GL_ALWAYS;
|
||||
clear_state.depth.write_mask = GL_TRUE;
|
||||
}
|
||||
if (regs.clear_buffers.S) {
|
||||
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
|
||||
@@ -566,8 +664,9 @@ void RasterizerOpenGL::Clear() {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
|
||||
clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
|
||||
ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
|
||||
|
||||
SyncViewport(clear_state);
|
||||
if (regs.clear_flags.scissor) {
|
||||
SyncScissorTest(clear_state);
|
||||
}
|
||||
@@ -576,21 +675,18 @@ void RasterizerOpenGL::Clear() {
|
||||
clear_state.EmulateViewportWithScissor();
|
||||
}
|
||||
|
||||
clear_state.ApplyColorMask();
|
||||
clear_state.ApplyDepth();
|
||||
clear_state.ApplyStencilTest();
|
||||
clear_state.ApplyViewport();
|
||||
clear_state.ApplyFramebufferState();
|
||||
clear_state.AllDirty();
|
||||
clear_state.Apply();
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
|
||||
}
|
||||
|
||||
if (clear_depth && clear_stencil) {
|
||||
if (use_depth && use_stencil) {
|
||||
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
|
||||
} else if (clear_depth) {
|
||||
} else if (use_depth) {
|
||||
glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth);
|
||||
} else if (clear_stencil) {
|
||||
} else if (use_stencil) {
|
||||
glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
|
||||
}
|
||||
}
|
||||
@@ -634,26 +730,47 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
Maxwell::MaxShaderStage;
|
||||
|
||||
// Add space for at least 18 constant buffers
|
||||
buffer_size +=
|
||||
Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_size += Maxwell::MaxConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
|
||||
const bool invalidate = buffer_cache.Map(buffer_size);
|
||||
if (invalidate) {
|
||||
// As all cached buffers are invalidated, we need to recheck their state.
|
||||
gpu.dirty_flags.vertex_array.set();
|
||||
}
|
||||
// Prepare the vertex array.
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
// Prepare vertex array format.
|
||||
const GLuint vao = SetupVertexFormat();
|
||||
SetupVertexBuffer(vao);
|
||||
vertex_array_pushbuffer.Setup(vao);
|
||||
|
||||
DrawParameters params = SetupDraw();
|
||||
// Upload vertex and index data.
|
||||
SetupVertexBuffer(vao);
|
||||
SetupVertexInstances(vao);
|
||||
const GLintptr index_buffer_offset = SetupIndexBuffer();
|
||||
|
||||
// Setup draw parameters. It will automatically choose what glDraw* method to use.
|
||||
const DrawParameters params = SetupDraw(index_buffer_offset);
|
||||
|
||||
// Prepare packed bindings.
|
||||
bind_ubo_pushbuffer.Setup(0);
|
||||
bind_ssbo_pushbuffer.Setup(0);
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
texture_cache.GuardSamplers(true);
|
||||
SetupShaders(params.primitive_mode);
|
||||
texture_cache.GuardSamplers(false);
|
||||
|
||||
ConfigureFramebuffers(state);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
// Signal the buffer cache that we are not going to upload more things.
|
||||
const bool invalidate = buffer_cache.Unmap();
|
||||
|
||||
// Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
|
||||
vertex_array_pushbuffer.Bind();
|
||||
bind_ubo_pushbuffer.Bind();
|
||||
bind_ssbo_pushbuffer.Bind();
|
||||
|
||||
if (invalidate) {
|
||||
// As all cached buffers are invalidated, we need to recheck their state.
|
||||
gpu.dirty.ResetVertexArrays();
|
||||
}
|
||||
|
||||
shader_program_manager->ApplyTo(state);
|
||||
state.Apply();
|
||||
@@ -665,6 +782,46 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
params.DispatchDraw();
|
||||
|
||||
accelerate_draw = AccelDraw::Disabled;
|
||||
gpu.dirty.memory_general = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
if (!GLAD_GL_ARB_compute_variable_group_size) {
|
||||
LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
|
||||
"lack of GL_ARB_compute_variable_group_size");
|
||||
return;
|
||||
}
|
||||
|
||||
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
const auto [program, next_bindings] = kernel->GetProgramHandle({});
|
||||
state.draw.shader_program = program;
|
||||
state.draw.program_pipeline = 0;
|
||||
|
||||
const std::size_t buffer_size =
|
||||
Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
bind_ubo_pushbuffer.Setup(0);
|
||||
bind_ssbo_pushbuffer.Setup(0);
|
||||
|
||||
SetupComputeConstBuffers(kernel);
|
||||
SetupComputeGlobalMemory(kernel);
|
||||
|
||||
// TODO(Rodrigo): Bind images and samplers
|
||||
|
||||
buffer_cache.Unmap();
|
||||
|
||||
bind_ubo_pushbuffer.Bind();
|
||||
bind_ssbo_pushbuffer.Bind();
|
||||
|
||||
state.ApplyShaderProgram();
|
||||
state.ApplyProgramPipeline();
|
||||
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
|
||||
launch_desc.grid_dim_z, launch_desc.block_dim_x,
|
||||
launch_desc.block_dim_y, launch_desc.block_dim_z);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAll() {}
|
||||
@@ -675,7 +832,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
||||
return;
|
||||
}
|
||||
texture_cache.FlushRegion(addr, size);
|
||||
global_cache.FlushRegion(addr, size);
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
@@ -685,7 +842,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
}
|
||||
texture_cache.InvalidateRegion(addr, size);
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
global_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
@@ -696,6 +852,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::TickFrame() {
|
||||
buffer_cache.TickFrame();
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
@@ -737,14 +897,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto stage_index = static_cast<std::size_t>(stage);
|
||||
const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index];
|
||||
const auto& entries = shader->GetShaderEntries().const_buffers;
|
||||
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
|
||||
const auto& shader_stage = stages[static_cast<std::size_t>(stage)];
|
||||
for (const auto& entry : shader->GetShaderEntries().const_buffers) {
|
||||
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
|
||||
SetupConstBuffer(buffer, entry);
|
||||
}
|
||||
}
|
||||
|
||||
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
|
||||
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
||||
const auto& entry = entries[bindpoint];
|
||||
SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry);
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
|
||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||
const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value();
|
||||
Tegra::Engines::ConstBufferInfo buffer;
|
||||
buffer.address = config.Address();
|
||||
buffer.size = config.size;
|
||||
buffer.enabled = mask[entry.GetIndex()];
|
||||
SetupConstBuffer(buffer, entry);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -752,49 +923,52 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
|
||||
const GLShader::ConstBufferEntry& entry) {
|
||||
if (!buffer.enabled) {
|
||||
// Set values to zero to unbind buffers
|
||||
bind_ubo_pushbuffer.Push(0, 0, 0);
|
||||
bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t size;
|
||||
if (entry.IsIndirect()) {
|
||||
// Buffer is accessed indirectly, so upload the entire thing
|
||||
size = buffer.size;
|
||||
|
||||
if (size > MaxConstbufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
|
||||
MaxConstbufferSize);
|
||||
size = MaxConstbufferSize;
|
||||
}
|
||||
} else {
|
||||
// Buffer is accessed directly, upload just what we use
|
||||
size = entry.GetSize();
|
||||
}
|
||||
|
||||
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
|
||||
// UBO alignment requirements.
|
||||
size = Common::AlignUp(size, sizeof(GLvec4));
|
||||
ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
|
||||
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
||||
|
||||
const std::size_t alignment = device.GetUniformBufferAlignment();
|
||||
const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment);
|
||||
bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size);
|
||||
const auto alignment = device.GetUniformBufferAlignment();
|
||||
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
|
||||
bind_ubo_pushbuffer.Push(cbuf, offset, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader) {
|
||||
const auto& entries = shader->GetShaderEntries().global_memory_entries;
|
||||
for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
||||
const auto& entry{entries[bindpoint]};
|
||||
const auto& region{global_cache.GetGlobalRegion(entry, stage)};
|
||||
if (entry.IsWritten()) {
|
||||
region->MarkAsModified(true, global_cache);
|
||||
}
|
||||
bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
|
||||
static_cast<GLsizeiptr>(region->GetSizeInBytes()));
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader) {
|
||||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
|
||||
for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
|
||||
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
|
||||
const auto gpu_addr{memory_manager.Read<u64>(addr)};
|
||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(entry, gpu_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
|
||||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
||||
for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
|
||||
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
|
||||
const auto gpu_addr{memory_manager.Read<u64>(addr)};
|
||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(entry, gpu_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
|
||||
GPUVAddr gpu_addr, std::size_t size) {
|
||||
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
||||
const auto [ssbo, buffer_offset] =
|
||||
buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten());
|
||||
bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
|
||||
BaseBindings base_bindings) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
@@ -883,10 +1057,11 @@ void RasterizerOpenGL::SyncClipCoef() {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncCullMode() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
state.cull.enabled = regs.cull.enabled != 0;
|
||||
|
||||
if (state.cull.enabled) {
|
||||
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
|
||||
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
|
||||
@@ -919,16 +1094,21 @@ void RasterizerOpenGL::SyncDepthTestState() {
|
||||
state.depth.test_enabled = regs.depth_test_enable != 0;
|
||||
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
|
||||
|
||||
if (!state.depth.test_enabled)
|
||||
if (!state.depth.test_enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncStencilTestState() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
state.stencil.test_enabled = regs.stencil_enable != 0;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.stencil_test) {
|
||||
return;
|
||||
}
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
state.stencil.test_enabled = regs.stencil_enable != 0;
|
||||
if (!regs.stencil_enable) {
|
||||
return;
|
||||
}
|
||||
@@ -957,10 +1137,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
|
||||
state.stencil.back.action_depth_fail = GL_KEEP;
|
||||
state.stencil.back.action_depth_pass = GL_KEEP;
|
||||
}
|
||||
state.MarkDirtyStencilState();
|
||||
maxwell3d.dirty.stencil_test = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncColorMask() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.color_mask) {
|
||||
return;
|
||||
}
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
const std::size_t count =
|
||||
regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
|
||||
for (std::size_t i = 0; i < count; i++) {
|
||||
@@ -971,6 +1158,9 @@ void RasterizerOpenGL::SyncColorMask() {
|
||||
dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
|
||||
dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
|
||||
}
|
||||
|
||||
state.MarkDirtyColorMask();
|
||||
maxwell3d.dirty.color_mask = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncMultiSampleState() {
|
||||
@@ -985,7 +1175,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncBlendState() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.blend_state) {
|
||||
return;
|
||||
}
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
state.blend_color.red = regs.blend_color.r;
|
||||
state.blend_color.green = regs.blend_color.g;
|
||||
@@ -1008,6 +1202,8 @@ void RasterizerOpenGL::SyncBlendState() {
|
||||
for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
|
||||
state.blend[i].enabled = false;
|
||||
}
|
||||
maxwell3d.dirty.blend_state = false;
|
||||
state.MarkDirtyBlendState();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1024,6 +1220,9 @@ void RasterizerOpenGL::SyncBlendState() {
|
||||
blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
|
||||
blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
|
||||
}
|
||||
|
||||
state.MarkDirtyBlendState();
|
||||
maxwell3d.dirty.blend_state = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLogicOpState() {
|
||||
@@ -1075,13 +1274,21 @@ void RasterizerOpenGL::SyncPointState() {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncPolygonOffset() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.polygon_offset) {
|
||||
return;
|
||||
}
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
|
||||
state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
|
||||
state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
|
||||
state.polygon_offset.units = regs.polygon_offset_units;
|
||||
state.polygon_offset.factor = regs.polygon_offset_factor;
|
||||
state.polygon_offset.clamp = regs.polygon_offset_clamp;
|
||||
|
||||
state.MarkDirtyPolygonOffset();
|
||||
maxwell3d.dirty.polygon_offset = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncAlphaTest() {
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_global_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
@@ -59,10 +58,12 @@ public:
|
||||
|
||||
void DrawArrays() override;
|
||||
void Clear() override;
|
||||
void DispatchCompute(GPUVAddr code_addr) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
@@ -73,11 +74,6 @@ public:
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
/// Maximum supported size that a constbuffer can have in bytes.
|
||||
static constexpr std::size_t MaxConstbufferSize = 0x10000;
|
||||
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
|
||||
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
|
||||
|
||||
private:
|
||||
struct FramebufferConfigState {
|
||||
bool using_color_fb{};
|
||||
@@ -113,17 +109,30 @@ private:
|
||||
OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
|
||||
bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
|
||||
|
||||
void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
|
||||
bool using_depth_fb, bool using_stencil_fb);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader);
|
||||
|
||||
/// Configures the current constbuffers to use for the kernel invocation.
|
||||
void SetupComputeConstBuffers(const Shader& kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const GLShader::ConstBufferEntry& entry);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader);
|
||||
void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader);
|
||||
|
||||
/// Configures the current global memory entries to use for the kernel invocation.
|
||||
void SetupComputeGlobalMemory(const Shader& kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
std::size_t size);
|
||||
|
||||
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
|
||||
/// usage.
|
||||
@@ -191,7 +200,6 @@ private:
|
||||
|
||||
TextureCacheOpenGL texture_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
GlobalRegionCacheOpenGL global_cache;
|
||||
SamplerCacheOpenGL sampler_cache;
|
||||
FramebufferCacheOpenGL framebuffer_cache;
|
||||
|
||||
@@ -210,6 +218,7 @@ private:
|
||||
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
OGLBufferCache buffer_cache;
|
||||
|
||||
VertexArrayPushBuffer vertex_array_pushbuffer;
|
||||
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
|
||||
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
|
||||
|
||||
@@ -221,14 +230,19 @@ private:
|
||||
GLuint SetupVertexFormat();
|
||||
|
||||
void SetupVertexBuffer(GLuint vao);
|
||||
void SetupVertexInstances(GLuint vao);
|
||||
|
||||
DrawParameters SetupDraw();
|
||||
GLintptr SetupIndexBuffer();
|
||||
|
||||
DrawParameters SetupDraw(GLintptr index_buffer_offset);
|
||||
|
||||
void SetupShaders(GLenum primitive_mode);
|
||||
|
||||
enum class AccelDraw { Disabled, Arrays, Indexed };
|
||||
AccelDraw accelerate_draw = AccelDraw::Disabled;
|
||||
|
||||
OGLFramebuffer clear_framebuffer;
|
||||
|
||||
using CachedPageMap = boost::icl::interval_map<u64, int>;
|
||||
CachedPageMap cached_pages;
|
||||
};
|
||||
|
||||
@@ -23,13 +23,13 @@ namespace OpenGL {
|
||||
|
||||
using VideoCommon::Shader::ProgramCode;
|
||||
|
||||
// One UBO is always reserved for emulation values
|
||||
constexpr u32 RESERVED_UBOS = 1;
|
||||
// One UBO is always reserved for emulation values on staged shaders
|
||||
constexpr u32 STAGE_RESERVED_UBOS = 1;
|
||||
|
||||
struct UnspecializedShader {
|
||||
std::string code;
|
||||
GLShader::ShaderEntries entries;
|
||||
Maxwell::ShaderProgram program_type;
|
||||
ProgramType program_type;
|
||||
};
|
||||
|
||||
namespace {
|
||||
@@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
|
||||
}
|
||||
|
||||
/// Gets the shader type from a Maxwell program type
|
||||
constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
|
||||
constexpr GLenum GetShaderType(ProgramType program_type) {
|
||||
switch (program_type) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
case ProgramType::VertexA:
|
||||
case ProgramType::VertexB:
|
||||
return GL_VERTEX_SHADER;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
case ProgramType::Geometry:
|
||||
return GL_GEOMETRY_SHADER;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
case ProgramType::Fragment:
|
||||
return GL_FRAGMENT_SHADER;
|
||||
case ProgramType::Compute:
|
||||
return GL_COMPUTE_SHADER;
|
||||
default:
|
||||
return GL_NONE;
|
||||
}
|
||||
@@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
|
||||
}
|
||||
}
|
||||
|
||||
ProgramType GetProgramType(Maxwell::ShaderProgram program) {
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
return ProgramType::VertexA;
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
return ProgramType::VertexB;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
return ProgramType::TessellationControl;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
return ProgramType::TessellationEval;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
return ProgramType::Geometry;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
return ProgramType::Fragment;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Calculates the size of a program stream
|
||||
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
|
||||
constexpr std::size_t start_offset = 10;
|
||||
@@ -128,11 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
|
||||
}
|
||||
|
||||
/// Hashes one (or two) program streams
|
||||
u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
|
||||
const ProgramCode& code_b) {
|
||||
u64 unique_identifier =
|
||||
Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
|
||||
if (program_type != Maxwell::ShaderProgram::VertexA) {
|
||||
u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
|
||||
const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
|
||||
if (size_a == 0) {
|
||||
size_a = CalculateProgramSize(code);
|
||||
}
|
||||
u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
|
||||
if (program_type != ProgramType::VertexA) {
|
||||
return unique_identifier;
|
||||
}
|
||||
// VertexA programs include two programs
|
||||
@@ -140,50 +163,67 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
|
||||
std::size_t seed = 0;
|
||||
boost::hash_combine(seed, unique_identifier);
|
||||
|
||||
const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
|
||||
CalculateProgramSize(code_b));
|
||||
if (size_b == 0) {
|
||||
size_b = CalculateProgramSize(code_b);
|
||||
}
|
||||
const u64 identifier_b =
|
||||
Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
|
||||
boost::hash_combine(seed, identifier_b);
|
||||
return static_cast<u64>(seed);
|
||||
}
|
||||
|
||||
/// Creates an unspecialized program from code streams
|
||||
GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type,
|
||||
GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
|
||||
ProgramCode program_code, ProgramCode program_code_b) {
|
||||
GLShader::ShaderSetup setup(program_code);
|
||||
if (program_type == Maxwell::ShaderProgram::VertexA) {
|
||||
setup.program.size_a = CalculateProgramSize(program_code);
|
||||
setup.program.size_b = 0;
|
||||
if (program_type == ProgramType::VertexA) {
|
||||
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
|
||||
// Conventional HW does not support this, so we combine VertexA and VertexB into one
|
||||
// stage here.
|
||||
setup.SetProgramB(program_code_b);
|
||||
setup.program.size_b = CalculateProgramSize(program_code_b);
|
||||
}
|
||||
setup.program.unique_identifier =
|
||||
GetUniqueIdentifier(program_type, program_code, program_code_b);
|
||||
setup.program.unique_identifier = GetUniqueIdentifier(
|
||||
program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
|
||||
|
||||
switch (program_type) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
case ProgramType::VertexA:
|
||||
case ProgramType::VertexB:
|
||||
return GLShader::GenerateVertexShader(device, setup);
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
case ProgramType::Geometry:
|
||||
return GLShader::GenerateGeometryShader(device, setup);
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
case ProgramType::Fragment:
|
||||
return GLShader::GenerateFragmentShader(device, setup);
|
||||
case ProgramType::Compute:
|
||||
return GLShader::GenerateComputeShader(device, setup);
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
|
||||
UNREACHABLE();
|
||||
UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
|
||||
Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
|
||||
ProgramType program_type, const ProgramVariant& variant,
|
||||
bool hint_retrievable = false) {
|
||||
auto base_bindings{variant.base_bindings};
|
||||
const auto primitive_mode{variant.primitive_mode};
|
||||
const auto texture_buffer_usage{variant.texture_buffer_usage};
|
||||
|
||||
std::string source = "#version 430 core\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n\n";
|
||||
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n";
|
||||
if (entries.shader_viewport_layer_array) {
|
||||
source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
|
||||
}
|
||||
if (program_type == ProgramType::Compute) {
|
||||
source += "#extension GL_ARB_compute_variable_group_size : require\n";
|
||||
}
|
||||
source += '\n';
|
||||
|
||||
if (program_type != ProgramType::Compute) {
|
||||
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
|
||||
}
|
||||
|
||||
for (const auto& cbuf : entries.const_buffers) {
|
||||
source +=
|
||||
@@ -210,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
|
||||
source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
|
||||
}
|
||||
|
||||
if (program_type == Maxwell::ShaderProgram::Geometry) {
|
||||
if (program_type == ProgramType::Geometry) {
|
||||
const auto [glsl_topology, debug_name, max_vertices] =
|
||||
GetPrimitiveDescription(primitive_mode);
|
||||
|
||||
source += "layout (" + std::string(glsl_topology) + ") in;\n";
|
||||
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
|
||||
}
|
||||
if (program_type == ProgramType::Compute) {
|
||||
source += "layout (local_size_variable) in;\n";
|
||||
}
|
||||
|
||||
source += code;
|
||||
|
||||
@@ -244,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type,
|
||||
CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
|
||||
GLShader::ProgramResult result)
|
||||
: RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr},
|
||||
unique_identifier{params.unique_identifier}, program_type{program_type},
|
||||
@@ -257,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
ProgramCode&& program_code_b) {
|
||||
const auto code_size{CalculateProgramSize(program_code)};
|
||||
const auto code_size_b{CalculateProgramSize(program_code_b)};
|
||||
auto result{CreateProgram(params.device, program_type, program_code, program_code_b)};
|
||||
auto result{
|
||||
CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
|
||||
if (result.first.empty()) {
|
||||
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
|
||||
return {};
|
||||
}
|
||||
|
||||
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
|
||||
params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)),
|
||||
static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code),
|
||||
std::move(program_code_b)));
|
||||
params.unique_identifier, GetProgramType(program_type),
|
||||
static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
|
||||
std::move(program_code), std::move(program_code_b)));
|
||||
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result)));
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params, GetProgramType(program_type), std::move(result)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
GLShader::ProgramResult result) {
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result)));
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params, GetProgramType(program_type), std::move(result)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
|
||||
auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
|
||||
|
||||
const auto code_size{CalculateProgramSize(code)};
|
||||
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
|
||||
static_cast<u32>(code_size / sizeof(u64)), 0,
|
||||
std::move(code), {}));
|
||||
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params, ProgramType::Compute, std::move(result)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
|
||||
GLShader::ProgramResult result) {
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params, ProgramType::Compute, std::move(result)));
|
||||
}
|
||||
|
||||
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
|
||||
GLuint handle{};
|
||||
if (program_type == Maxwell::ShaderProgram::Geometry) {
|
||||
if (program_type == ProgramType::Geometry) {
|
||||
handle = GetGeometryShader(variant);
|
||||
} else {
|
||||
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
|
||||
@@ -297,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
|
||||
handle = program->handle;
|
||||
}
|
||||
|
||||
auto base_bindings{variant.base_bindings};
|
||||
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
|
||||
auto base_bindings = variant.base_bindings;
|
||||
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
|
||||
if (program_type != ProgramType::Compute) {
|
||||
base_bindings.cbuf += STAGE_RESERVED_UBOS;
|
||||
}
|
||||
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
|
||||
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
|
||||
|
||||
@@ -561,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
|
||||
}
|
||||
|
||||
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
|
||||
if (!system.GPU().Maxwell3D().dirty.shaders) {
|
||||
return last_shaders[static_cast<std::size_t>(program)];
|
||||
}
|
||||
|
||||
@@ -578,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
// No shader found - create a new one
|
||||
ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
|
||||
ProgramCode program_code_b;
|
||||
if (program == Maxwell::ShaderProgram::VertexA) {
|
||||
const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
|
||||
if (is_program_a) {
|
||||
const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
|
||||
program_code_b = GetShaderCode(memory_manager, program_addr_b,
|
||||
memory_manager.GetPointer(program_addr_b));
|
||||
}
|
||||
|
||||
const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
|
||||
const auto unique_identifier =
|
||||
GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
|
||||
const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
|
||||
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
|
||||
host_ptr, unique_identifier};
|
||||
@@ -601,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||
}
|
||||
|
||||
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||
auto& memory_manager{system.GPU().MemoryManager()};
|
||||
const auto host_ptr{memory_manager.GetPointer(code_addr)};
|
||||
auto kernel = TryGet(host_ptr);
|
||||
if (kernel) {
|
||||
return kernel;
|
||||
}
|
||||
|
||||
// No kernel found - create a new one
|
||||
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
|
||||
const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
|
||||
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
|
||||
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
|
||||
host_ptr, unique_identifier};
|
||||
|
||||
const auto found = precompiled_shaders.find(unique_identifier);
|
||||
if (found == precompiled_shaders.end()) {
|
||||
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
|
||||
} else {
|
||||
kernel = CachedShader::CreateKernelFromCache(params, found->second);
|
||||
}
|
||||
|
||||
Register(kernel);
|
||||
return kernel;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -61,6 +61,11 @@ public:
|
||||
Maxwell::ShaderProgram program_type,
|
||||
GLShader::ProgramResult result);
|
||||
|
||||
static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
|
||||
|
||||
static Shader CreateKernelFromCache(const ShaderParameters& params,
|
||||
GLShader::ProgramResult result);
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
@@ -78,7 +83,7 @@ public:
|
||||
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
|
||||
|
||||
private:
|
||||
explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type,
|
||||
explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
|
||||
GLShader::ProgramResult result);
|
||||
|
||||
// Geometry programs. These are needed because GLSL needs an input topology but it's not
|
||||
@@ -104,7 +109,7 @@ private:
|
||||
u8* host_ptr{};
|
||||
VAddr cpu_addr{};
|
||||
u64 unique_identifier{};
|
||||
Maxwell::ShaderProgram program_type{};
|
||||
ProgramType program_type{};
|
||||
ShaderDiskCacheOpenGL& disk_cache;
|
||||
const PrecompiledPrograms& precompiled_programs;
|
||||
|
||||
@@ -132,6 +137,9 @@ public:
|
||||
/// Gets the current specified shader stage program
|
||||
Shader GetStageProgram(Maxwell::ShaderProgram program);
|
||||
|
||||
/// Gets a compute kernel in the passed address
|
||||
Shader GetComputeKernel(GPUVAddr code_addr);
|
||||
|
||||
protected:
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const Shader& object) override {}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
@@ -36,7 +37,6 @@ using namespace std::string_literals;
|
||||
using namespace VideoCommon::Shader;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
|
||||
using Operation = const OperationNode&;
|
||||
|
||||
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||
@@ -46,7 +46,7 @@ using TextureArgument = std::pair<Type, Node>;
|
||||
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
|
||||
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
|
||||
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
|
||||
|
||||
class ShaderWriter {
|
||||
public:
|
||||
@@ -161,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
||||
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
|
||||
}
|
||||
|
||||
constexpr bool IsVertexShader(ProgramType stage) {
|
||||
return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
|
||||
}
|
||||
|
||||
class GLSLDecompiler final {
|
||||
public:
|
||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
|
||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
|
||||
std::string suffix)
|
||||
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
|
||||
|
||||
@@ -191,10 +195,12 @@ public:
|
||||
|
||||
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
|
||||
// unlikely that shaders will use 20 nested SSYs and PBKs.
|
||||
constexpr u32 FLOW_STACK_SIZE = 20;
|
||||
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
|
||||
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
|
||||
code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
|
||||
if (!ir.IsFlowStackDisabled()) {
|
||||
constexpr u32 FLOW_STACK_SIZE = 20;
|
||||
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
|
||||
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
|
||||
code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
|
||||
}
|
||||
}
|
||||
|
||||
code.AddLine("while (true) {{");
|
||||
@@ -244,24 +250,22 @@ public:
|
||||
usage.is_read, usage.is_written);
|
||||
}
|
||||
entries.clip_distances = ir.GetClipDistances();
|
||||
entries.shader_viewport_layer_array =
|
||||
IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex());
|
||||
entries.shader_length = ir.GetLength();
|
||||
return entries;
|
||||
}
|
||||
|
||||
private:
|
||||
using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
|
||||
using OperationDecompilersArray =
|
||||
std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
|
||||
|
||||
void DeclareVertex() {
|
||||
if (stage != ShaderStage::Vertex)
|
||||
if (!IsVertexShader(stage))
|
||||
return;
|
||||
|
||||
DeclareVertexRedeclarations();
|
||||
}
|
||||
|
||||
void DeclareGeometry() {
|
||||
if (stage != ShaderStage::Geometry) {
|
||||
if (stage != ProgramType::Geometry) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -280,22 +284,35 @@ private:
|
||||
}
|
||||
|
||||
void DeclareVertexRedeclarations() {
|
||||
bool clip_distances_declared = false;
|
||||
|
||||
code.AddLine("out gl_PerVertex {{");
|
||||
++code.scope;
|
||||
|
||||
code.AddLine("vec4 gl_Position;");
|
||||
|
||||
for (const auto o : ir.GetOutputAttributes()) {
|
||||
if (o == Attribute::Index::PointSize)
|
||||
code.AddLine("float gl_PointSize;");
|
||||
if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
|
||||
o == Attribute::Index::ClipDistances4567)) {
|
||||
for (const auto attribute : ir.GetOutputAttributes()) {
|
||||
if (attribute == Attribute::Index::ClipDistances0123 ||
|
||||
attribute == Attribute::Index::ClipDistances4567) {
|
||||
code.AddLine("float gl_ClipDistance[];");
|
||||
clip_distances_declared = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
|
||||
if (ir.UsesLayer()) {
|
||||
code.AddLine("int gl_Layer;");
|
||||
}
|
||||
if (ir.UsesViewportIndex()) {
|
||||
code.AddLine("int gl_ViewportIndex;");
|
||||
}
|
||||
} else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
|
||||
!device.HasVertexViewportLayer()) {
|
||||
LOG_ERROR(
|
||||
Render_OpenGL,
|
||||
"GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
|
||||
}
|
||||
|
||||
if (ir.UsesPointSize()) {
|
||||
code.AddLine("float gl_PointSize;");
|
||||
}
|
||||
|
||||
--code.scope;
|
||||
code.AddLine("}};");
|
||||
@@ -323,11 +340,16 @@ private:
|
||||
}
|
||||
|
||||
void DeclareLocalMemory() {
|
||||
if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
|
||||
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
|
||||
code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
|
||||
code.AddNewLine();
|
||||
// TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
|
||||
// specialization time.
|
||||
const u64 local_memory_size =
|
||||
stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
|
||||
if (local_memory_size == 0) {
|
||||
return;
|
||||
}
|
||||
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
|
||||
code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
|
||||
code.AddNewLine();
|
||||
}
|
||||
|
||||
void DeclareInternalFlags() {
|
||||
@@ -381,12 +403,12 @@ private:
|
||||
const u32 location{GetGenericAttributeIndex(index)};
|
||||
|
||||
std::string name{GetInputAttribute(index)};
|
||||
if (stage == ShaderStage::Geometry) {
|
||||
if (stage == ProgramType::Geometry) {
|
||||
name = "gs_" + name + "[]";
|
||||
}
|
||||
|
||||
std::string suffix;
|
||||
if (stage == ShaderStage::Fragment) {
|
||||
if (stage == ProgramType::Fragment) {
|
||||
const auto input_mode{header.ps.GetAttributeUse(location)};
|
||||
if (skip_unused && input_mode == AttributeUse::Unused) {
|
||||
return;
|
||||
@@ -398,7 +420,7 @@ private:
|
||||
}
|
||||
|
||||
void DeclareOutputAttributes() {
|
||||
if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) {
|
||||
if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) {
|
||||
for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
|
||||
DeclareOutputAttribute(ToGenericAttribute(i));
|
||||
}
|
||||
@@ -520,7 +542,7 @@ private:
|
||||
constexpr u32 element_stride{4};
|
||||
const u32 address{generic_base + index * generic_stride + element * element_stride};
|
||||
|
||||
const bool declared{stage != ShaderStage::Fragment ||
|
||||
const bool declared{stage != ProgramType::Fragment ||
|
||||
header.ps.GetAttributeUse(index) != AttributeUse::Unused};
|
||||
const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
|
||||
code.AddLine("case 0x{:x}: return {};", address, value);
|
||||
@@ -624,7 +646,7 @@ private:
|
||||
}
|
||||
|
||||
if (const auto abuf = std::get_if<AbufNode>(&*node)) {
|
||||
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry,
|
||||
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
|
||||
"Physical attributes in geometry shaders are not implemented");
|
||||
if (abuf->IsPhysicalBuffer()) {
|
||||
return fmt::format("readPhysicalAttribute(ftou({}))",
|
||||
@@ -679,6 +701,9 @@ private:
|
||||
}
|
||||
|
||||
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
|
||||
if (stage == ProgramType::Compute) {
|
||||
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
|
||||
}
|
||||
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
|
||||
}
|
||||
|
||||
@@ -708,7 +733,7 @@ private:
|
||||
|
||||
std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
|
||||
const auto GeometryPass = [&](std::string_view name) {
|
||||
if (stage == ShaderStage::Geometry && buffer) {
|
||||
if (stage == ProgramType::Geometry && buffer) {
|
||||
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
|
||||
// set an 0x80000000 index for those and the shader fails to build. Find out why
|
||||
// this happens and what's its intent.
|
||||
@@ -720,10 +745,10 @@ private:
|
||||
switch (attribute) {
|
||||
case Attribute::Index::Position:
|
||||
switch (stage) {
|
||||
case ShaderStage::Geometry:
|
||||
case ProgramType::Geometry:
|
||||
return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
|
||||
GetSwizzle(element));
|
||||
case ShaderStage::Fragment:
|
||||
case ProgramType::Fragment:
|
||||
return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
@@ -744,7 +769,7 @@ private:
|
||||
// TODO(Subv): Find out what the values are for the first two elements when inside a
|
||||
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
|
||||
// shader.
|
||||
ASSERT(stage == ShaderStage::Vertex);
|
||||
ASSERT(IsVertexShader(stage));
|
||||
switch (element) {
|
||||
case 2:
|
||||
// Config pack's first value is instance_id.
|
||||
@@ -756,7 +781,7 @@ private:
|
||||
return "0";
|
||||
case Attribute::Index::FrontFacing:
|
||||
// TODO(Subv): Find out what the values are for the other elements.
|
||||
ASSERT(stage == ShaderStage::Fragment);
|
||||
ASSERT(stage == ProgramType::Fragment);
|
||||
switch (element) {
|
||||
case 3:
|
||||
return "itof(gl_FrontFacing ? -1 : 0)";
|
||||
@@ -778,7 +803,7 @@ private:
|
||||
return value;
|
||||
}
|
||||
// There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
|
||||
const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
|
||||
const std::string precise = stage != ProgramType::Fragment ? "precise " : "";
|
||||
|
||||
const std::string temporary = code.GenerateTemporary();
|
||||
code.AddLine("{}float {} = {};", precise, temporary, value);
|
||||
@@ -803,6 +828,45 @@ private:
|
||||
return CastOperand(VisitOperand(operation, operand_index), type);
|
||||
}
|
||||
|
||||
std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) {
|
||||
switch (const auto attribute = abuf->GetIndex()) {
|
||||
case Attribute::Index::Position:
|
||||
return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false);
|
||||
case Attribute::Index::LayerViewportPointSize:
|
||||
switch (abuf->GetElement()) {
|
||||
case 0:
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
case 1:
|
||||
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
|
||||
return {};
|
||||
}
|
||||
return std::make_pair("gl_Layer", true);
|
||||
case 2:
|
||||
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
|
||||
return {};
|
||||
}
|
||||
return std::make_pair("gl_ViewportIndex", true);
|
||||
case 3:
|
||||
UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
|
||||
return std::make_pair("gl_PointSize", false);
|
||||
}
|
||||
return {};
|
||||
case Attribute::Index::ClipDistances0123:
|
||||
return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false);
|
||||
case Attribute::Index::ClipDistances4567:
|
||||
return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4),
|
||||
false);
|
||||
default:
|
||||
if (IsGenericAttribute(attribute)) {
|
||||
return std::make_pair(
|
||||
GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false);
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
std::string CastOperand(const std::string& value, Type type) const {
|
||||
switch (type) {
|
||||
case Type::Bool:
|
||||
@@ -999,6 +1063,8 @@ private:
|
||||
const Node& src = operation[1];
|
||||
|
||||
std::string target;
|
||||
bool is_integer = false;
|
||||
|
||||
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
|
||||
if (gpr->GetIndex() == Register::ZeroIndex) {
|
||||
// Writing to Register::ZeroIndex is a no op
|
||||
@@ -1007,27 +1073,16 @@ private:
|
||||
target = GetRegister(gpr->GetIndex());
|
||||
} else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
|
||||
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
|
||||
|
||||
target = [&]() -> std::string {
|
||||
switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
|
||||
case Attribute::Index::Position:
|
||||
return "gl_Position"s + GetSwizzle(abuf->GetElement());
|
||||
case Attribute::Index::PointSize:
|
||||
return "gl_PointSize";
|
||||
case Attribute::Index::ClipDistances0123:
|
||||
return fmt::format("gl_ClipDistance[{}]", abuf->GetElement());
|
||||
case Attribute::Index::ClipDistances4567:
|
||||
return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4);
|
||||
default:
|
||||
if (IsGenericAttribute(attribute)) {
|
||||
return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
|
||||
static_cast<u32>(attribute));
|
||||
return "0";
|
||||
}
|
||||
}();
|
||||
const auto result = GetOutputAttribute(abuf);
|
||||
if (!result) {
|
||||
return {};
|
||||
}
|
||||
target = result->first;
|
||||
is_integer = result->second;
|
||||
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
|
||||
if (stage == ProgramType::Compute) {
|
||||
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
|
||||
}
|
||||
target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
|
||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||
const std::string real = Visit(gmem->GetRealAddress());
|
||||
@@ -1038,7 +1093,11 @@ private:
|
||||
UNREACHABLE_MSG("Assign called without a proper target");
|
||||
}
|
||||
|
||||
code.AddLine("{} = {};", target, Visit(src));
|
||||
if (is_integer) {
|
||||
code.AddLine("{} = ftoi({});", target, Visit(src));
|
||||
} else {
|
||||
code.AddLine("{} = {};", target, Visit(src));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -1351,14 +1410,10 @@ private:
|
||||
return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
|
||||
}
|
||||
|
||||
std::string LogicalAll2(Operation operation) {
|
||||
std::string LogicalAnd2(Operation operation) {
|
||||
return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
|
||||
}
|
||||
|
||||
std::string LogicalAny2(Operation operation) {
|
||||
return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
|
||||
}
|
||||
|
||||
template <bool with_nan>
|
||||
std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
|
||||
const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
|
||||
@@ -1555,6 +1610,14 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string BranchIndirect(Operation operation) {
|
||||
const std::string op_a = VisitOperand(operation, 0, Type::Uint);
|
||||
|
||||
code.AddLine("jmp_to = {};", op_a);
|
||||
code.AddLine("break;");
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string PushFlowStack(Operation operation) {
|
||||
const auto stack = std::get<MetaStackClass>(operation.GetMeta());
|
||||
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
|
||||
@@ -1573,7 +1636,7 @@ private:
|
||||
}
|
||||
|
||||
std::string Exit(Operation operation) {
|
||||
if (stage != ShaderStage::Fragment) {
|
||||
if (stage != ProgramType::Fragment) {
|
||||
code.AddLine("return;");
|
||||
return {};
|
||||
}
|
||||
@@ -1624,7 +1687,7 @@ private:
|
||||
}
|
||||
|
||||
std::string EmitVertex(Operation operation) {
|
||||
ASSERT_MSG(stage == ShaderStage::Geometry,
|
||||
ASSERT_MSG(stage == ProgramType::Geometry,
|
||||
"EmitVertex is expected to be used in a geometry shader.");
|
||||
|
||||
// If a geometry shader is attached, it will always flip (it's the last stage before
|
||||
@@ -1635,7 +1698,7 @@ private:
|
||||
}
|
||||
|
||||
std::string EndPrimitive(Operation operation) {
|
||||
ASSERT_MSG(stage == ShaderStage::Geometry,
|
||||
ASSERT_MSG(stage == ProgramType::Geometry,
|
||||
"EndPrimitive is expected to be used in a geometry shader.");
|
||||
|
||||
code.AddLine("EndPrimitive();");
|
||||
@@ -1657,7 +1720,7 @@ private:
|
||||
return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
|
||||
}
|
||||
|
||||
static constexpr OperationDecompilersArray operation_decompilers = {
|
||||
static constexpr std::array operation_decompilers = {
|
||||
&GLSLDecompiler::Assign,
|
||||
|
||||
&GLSLDecompiler::Select,
|
||||
@@ -1741,8 +1804,7 @@ private:
|
||||
&GLSLDecompiler::LogicalXor,
|
||||
&GLSLDecompiler::LogicalNegate,
|
||||
&GLSLDecompiler::LogicalPick2,
|
||||
&GLSLDecompiler::LogicalAll2,
|
||||
&GLSLDecompiler::LogicalAny2,
|
||||
&GLSLDecompiler::LogicalAnd2,
|
||||
|
||||
&GLSLDecompiler::LogicalLessThan<Type::Float>,
|
||||
&GLSLDecompiler::LogicalEqual<Type::Float>,
|
||||
@@ -1789,6 +1851,7 @@ private:
|
||||
&GLSLDecompiler::ImageStore,
|
||||
|
||||
&GLSLDecompiler::Branch,
|
||||
&GLSLDecompiler::BranchIndirect,
|
||||
&GLSLDecompiler::PushFlowStack,
|
||||
&GLSLDecompiler::PopFlowStack,
|
||||
&GLSLDecompiler::Exit,
|
||||
@@ -1805,6 +1868,7 @@ private:
|
||||
&GLSLDecompiler::WorkGroupId<1>,
|
||||
&GLSLDecompiler::WorkGroupId<2>,
|
||||
};
|
||||
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
|
||||
|
||||
std::string GetRegister(u32 index) const {
|
||||
return GetDeclarationWithSuffix(index, "gpr");
|
||||
@@ -1869,7 +1933,7 @@ private:
|
||||
}
|
||||
|
||||
u32 GetNumPhysicalInputAttributes() const {
|
||||
return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
|
||||
return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
|
||||
}
|
||||
|
||||
u32 GetNumPhysicalAttributes() const {
|
||||
@@ -1882,7 +1946,7 @@ private:
|
||||
|
||||
const Device& device;
|
||||
const ShaderIR& ir;
|
||||
const ShaderStage stage;
|
||||
const ProgramType stage;
|
||||
const std::string suffix;
|
||||
const Header header;
|
||||
|
||||
@@ -1913,7 +1977,7 @@ std::string GetCommonDeclarations() {
|
||||
MAX_CONSTBUFFER_ELEMENTS);
|
||||
}
|
||||
|
||||
ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage,
|
||||
ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
|
||||
const std::string& suffix) {
|
||||
GLSLDecompiler decompiler(device, ir, stage, suffix);
|
||||
decompiler.Decompile();
|
||||
|
||||
@@ -12,14 +12,26 @@
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace OpenGL {
|
||||
class Device;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class ShaderIR;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
enum class ProgramType : u32 {
|
||||
VertexA = 0,
|
||||
VertexB = 1,
|
||||
TessellationControl = 2,
|
||||
TessellationEval = 3,
|
||||
Geometry = 4,
|
||||
Fragment = 5,
|
||||
Compute = 6
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace OpenGL::GLShader {
|
||||
|
||||
struct ShaderEntries;
|
||||
@@ -78,12 +90,13 @@ struct ShaderEntries {
|
||||
std::vector<ImageEntry> images;
|
||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
bool shader_viewport_layer_array{};
|
||||
std::size_t shader_length{};
|
||||
};
|
||||
|
||||
std::string GetCommonDeclarations();
|
||||
|
||||
ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Maxwell::ShaderStage stage, const std::string& suffix);
|
||||
ProgramType stage, const std::string& suffix);
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
|
||||
@@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
|
||||
} // namespace
|
||||
|
||||
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
||||
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
|
||||
u32 program_code_size, u32 program_code_size_b,
|
||||
ProgramCode program_code, ProgramCode program_code_b)
|
||||
: unique_identifier{unique_identifier}, program_type{program_type},
|
||||
@@ -373,6 +373,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
|
||||
}
|
||||
}
|
||||
|
||||
bool shader_viewport_layer_array{};
|
||||
if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.shader_viewport_layer_array = shader_viewport_layer_array;
|
||||
|
||||
u64 shader_length{};
|
||||
if (!LoadObjectFromPrecompiled(shader_length)) {
|
||||
return {};
|
||||
@@ -445,6 +451,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
|
||||
}
|
||||
}
|
||||
|
||||
if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/file_sys/vfs_vector.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
|
||||
namespace Core {
|
||||
@@ -34,14 +33,11 @@ namespace OpenGL {
|
||||
struct ShaderDiskCacheUsage;
|
||||
struct ShaderDiskCacheDump;
|
||||
|
||||
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
|
||||
|
||||
using ProgramCode = std::vector<u64>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
|
||||
using TextureBufferUsage = std::bitset<64>;
|
||||
|
||||
/// Allocated bindings used by an OpenGL shader program.
|
||||
/// Allocated bindings used by an OpenGL shader program
|
||||
struct BaseBindings {
|
||||
u32 cbuf{};
|
||||
u32 gmem{};
|
||||
@@ -126,7 +122,7 @@ namespace OpenGL {
|
||||
/// Describes a shader how it's used by the guest GPU
|
||||
class ShaderDiskCacheRaw {
|
||||
public:
|
||||
explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
||||
explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
|
||||
u32 program_code_size, u32 program_code_size_b,
|
||||
ProgramCode program_code, ProgramCode program_code_b);
|
||||
ShaderDiskCacheRaw();
|
||||
@@ -141,30 +137,13 @@ public:
|
||||
}
|
||||
|
||||
bool HasProgramA() const {
|
||||
return program_type == Maxwell::ShaderProgram::VertexA;
|
||||
return program_type == ProgramType::VertexA;
|
||||
}
|
||||
|
||||
Maxwell::ShaderProgram GetProgramType() const {
|
||||
ProgramType GetProgramType() const {
|
||||
return program_type;
|
||||
}
|
||||
|
||||
Maxwell::ShaderStage GetProgramStage() const {
|
||||
switch (program_type) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
return Maxwell::ShaderStage::Vertex;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
return Maxwell::ShaderStage::TesselationControl;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
return Maxwell::ShaderStage::TesselationEval;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
return Maxwell::ShaderStage::Geometry;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
return Maxwell::ShaderStage::Fragment;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
const ProgramCode& GetProgramCode() const {
|
||||
return program_code;
|
||||
}
|
||||
@@ -175,7 +154,7 @@ public:
|
||||
|
||||
private:
|
||||
u64 unique_identifier{};
|
||||
Maxwell::ShaderProgram program_type{};
|
||||
ProgramType program_type{};
|
||||
u32 program_code_size{};
|
||||
u32 program_code_size_b{};
|
||||
|
||||
|
||||
@@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D;
|
||||
using VideoCommon::Shader::ProgramCode;
|
||||
using VideoCommon::Shader::ShaderIR;
|
||||
|
||||
static constexpr u32 PROGRAM_OFFSET{10};
|
||||
static constexpr u32 PROGRAM_OFFSET = 10;
|
||||
static constexpr u32 COMPUTE_OFFSET = 0;
|
||||
|
||||
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
|
||||
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
|
||||
@@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
|
||||
};
|
||||
|
||||
)";
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
|
||||
ProgramResult program =
|
||||
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
|
||||
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
|
||||
const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
|
||||
ProgramResult program = Decompile(device, program_ir, stage, "vertex");
|
||||
out += program.first;
|
||||
|
||||
if (setup.IsDualProgram()) {
|
||||
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
|
||||
ProgramResult program_b =
|
||||
Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
|
||||
|
||||
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
|
||||
ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
|
||||
out += program_b.first;
|
||||
}
|
||||
|
||||
@@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
|
||||
};
|
||||
|
||||
)";
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
|
||||
ProgramResult program =
|
||||
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
|
||||
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
|
||||
ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
|
||||
out += program.first;
|
||||
|
||||
out += R"(
|
||||
@@ -115,10 +114,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
|
||||
};
|
||||
|
||||
)";
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
|
||||
ProgramResult program =
|
||||
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
|
||||
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
|
||||
ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
|
||||
out += program.first;
|
||||
|
||||
out += R"(
|
||||
@@ -130,4 +127,22 @@ void main() {
|
||||
return {std::move(out), std::move(program.second)};
|
||||
}
|
||||
|
||||
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
|
||||
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
|
||||
|
||||
std::string out = "// Shader Unique Id: CS" + id + "\n\n";
|
||||
out += GetCommonDeclarations();
|
||||
|
||||
const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a);
|
||||
ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
|
||||
out += program.first;
|
||||
|
||||
out += R"(
|
||||
void main() {
|
||||
execute_compute();
|
||||
}
|
||||
)";
|
||||
return {std::move(out), std::move(program.second)};
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
|
||||
@@ -27,6 +27,8 @@ struct ShaderSetup {
|
||||
ProgramCode code;
|
||||
ProgramCode code_b; // Used for dual vertex shaders
|
||||
u64 unique_identifier;
|
||||
std::size_t size_a;
|
||||
std::size_t size_b;
|
||||
} program;
|
||||
|
||||
/// Used in scenarios where we have a dual vertex shaders
|
||||
@@ -52,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
|
||||
/// Generates the GLSL fragment shader program source code for the given FS program
|
||||
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
|
||||
|
||||
/// Generates the GLSL compute shader program source code for the given CS program
|
||||
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
|
||||
@@ -10,21 +10,25 @@
|
||||
|
||||
namespace OpenGL::GLShader {
|
||||
|
||||
GLuint LoadShader(const char* source, GLenum type) {
|
||||
const char* debug_type;
|
||||
namespace {
|
||||
const char* GetStageDebugName(GLenum type) {
|
||||
switch (type) {
|
||||
case GL_VERTEX_SHADER:
|
||||
debug_type = "vertex";
|
||||
break;
|
||||
return "vertex";
|
||||
case GL_GEOMETRY_SHADER:
|
||||
debug_type = "geometry";
|
||||
break;
|
||||
return "geometry";
|
||||
case GL_FRAGMENT_SHADER:
|
||||
debug_type = "fragment";
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "fragment";
|
||||
case GL_COMPUTE_SHADER:
|
||||
return "compute";
|
||||
}
|
||||
UNIMPLEMENTED();
|
||||
return "unknown";
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
GLuint LoadShader(const char* source, GLenum type) {
|
||||
const char* debug_type = GetStageDebugName(type);
|
||||
const GLuint shader_id = glCreateShader(type);
|
||||
glShaderSource(shader_id, 1, &source, nullptr);
|
||||
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
|
||||
|
||||
@@ -6,8 +6,11 @@
|
||||
#include <glad/glad.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
@@ -162,6 +165,25 @@ OpenGLState::OpenGLState() {
|
||||
alpha_test.ref = 0.0f;
|
||||
}
|
||||
|
||||
void OpenGLState::SetDefaultViewports() {
|
||||
for (auto& item : viewports) {
|
||||
item.x = 0;
|
||||
item.y = 0;
|
||||
item.width = 0;
|
||||
item.height = 0;
|
||||
item.depth_range_near = 0.0f;
|
||||
item.depth_range_far = 1.0f;
|
||||
item.scissor.enabled = false;
|
||||
item.scissor.x = 0;
|
||||
item.scissor.y = 0;
|
||||
item.scissor.width = 0;
|
||||
item.scissor.height = 0;
|
||||
}
|
||||
|
||||
depth_clamp.far_plane = false;
|
||||
depth_clamp.near_plane = false;
|
||||
}
|
||||
|
||||
void OpenGLState::ApplyDefaultState() {
|
||||
glEnable(GL_BLEND);
|
||||
glDisable(GL_FRAMEBUFFER_SRGB);
|
||||
@@ -523,7 +545,8 @@ void OpenGLState::ApplySamplers() const {
|
||||
}
|
||||
}
|
||||
|
||||
void OpenGLState::Apply() const {
|
||||
void OpenGLState::Apply() {
|
||||
MICROPROFILE_SCOPE(OpenGL_State);
|
||||
ApplyFramebufferState();
|
||||
ApplyVertexArrayState();
|
||||
ApplyShaderProgram();
|
||||
@@ -532,19 +555,31 @@ void OpenGLState::Apply() const {
|
||||
ApplyPointSize();
|
||||
ApplyFragmentColorClamp();
|
||||
ApplyMultisample();
|
||||
if (dirty.color_mask) {
|
||||
ApplyColorMask();
|
||||
dirty.color_mask = false;
|
||||
}
|
||||
ApplyDepthClamp();
|
||||
ApplyColorMask();
|
||||
ApplyViewport();
|
||||
ApplyStencilTest();
|
||||
if (dirty.stencil_state) {
|
||||
ApplyStencilTest();
|
||||
dirty.stencil_state = false;
|
||||
}
|
||||
ApplySRgb();
|
||||
ApplyCulling();
|
||||
ApplyDepth();
|
||||
ApplyPrimitiveRestart();
|
||||
ApplyBlending();
|
||||
if (dirty.blend_state) {
|
||||
ApplyBlending();
|
||||
dirty.blend_state = false;
|
||||
}
|
||||
ApplyLogicOp();
|
||||
ApplyTextures();
|
||||
ApplySamplers();
|
||||
ApplyPolygonOffset();
|
||||
if (dirty.polygon_offset) {
|
||||
ApplyPolygonOffset();
|
||||
dirty.polygon_offset = false;
|
||||
}
|
||||
ApplyAlphaTest();
|
||||
}
|
||||
|
||||
|
||||
@@ -195,8 +195,9 @@ public:
|
||||
s_rgb_used = false;
|
||||
}
|
||||
|
||||
void SetDefaultViewports();
|
||||
/// Apply this state as the current OpenGL state
|
||||
void Apply() const;
|
||||
void Apply();
|
||||
|
||||
void ApplyFramebufferState() const;
|
||||
void ApplyVertexArrayState() const;
|
||||
@@ -237,11 +238,41 @@ public:
|
||||
/// Viewport does not affects glClearBuffer so emulate viewport using scissor test
|
||||
void EmulateViewportWithScissor();
|
||||
|
||||
void MarkDirtyBlendState() {
|
||||
dirty.blend_state = true;
|
||||
}
|
||||
|
||||
void MarkDirtyStencilState() {
|
||||
dirty.stencil_state = true;
|
||||
}
|
||||
|
||||
void MarkDirtyPolygonOffset() {
|
||||
dirty.polygon_offset = true;
|
||||
}
|
||||
|
||||
void MarkDirtyColorMask() {
|
||||
dirty.color_mask = true;
|
||||
}
|
||||
|
||||
void AllDirty() {
|
||||
dirty.blend_state = true;
|
||||
dirty.stencil_state = true;
|
||||
dirty.polygon_offset = true;
|
||||
dirty.color_mask = true;
|
||||
}
|
||||
|
||||
private:
|
||||
static OpenGLState cur_state;
|
||||
|
||||
// Workaround for sRGB problems caused by QT not supporting srgb output
|
||||
static bool s_rgb_used;
|
||||
struct {
|
||||
bool blend_state;
|
||||
bool stencil_state;
|
||||
bool viewport_state;
|
||||
bool polygon_offset;
|
||||
bool color_mask;
|
||||
} dirty{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
|
||||
MP_RGB(128, 192, 128));
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -483,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
|
||||
const auto& dst_params{dst_view->GetSurfaceParams()};
|
||||
|
||||
OpenGLState prev_state{OpenGLState::GetCurState()};
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
SCOPE_EXIT({
|
||||
prev_state.AllDirty();
|
||||
prev_state.Apply();
|
||||
});
|
||||
|
||||
OpenGLState state;
|
||||
state.draw.read_framebuffer = src_framebuffer.handle;
|
||||
state.draw.draw_framebuffer = dst_framebuffer.handle;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
|
||||
u32 buffers{};
|
||||
@@ -535,6 +541,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
|
||||
}
|
||||
|
||||
void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
|
||||
const auto& src_params = src_surface->GetSurfaceParams();
|
||||
const auto& dst_params = dst_surface->GetSurfaceParams();
|
||||
UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
|
||||
|
||||
@@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
|
||||
|
||||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
void RendererOpenGL::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
|
||||
@@ -109,6 +108,7 @@ void RendererOpenGL::SwapBuffers(
|
||||
|
||||
// Maintain the rasterizer's state as a priority
|
||||
OpenGLState prev_state = OpenGLState::GetCurState();
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
|
||||
if (framebuffer) {
|
||||
@@ -130,6 +130,8 @@ void RendererOpenGL::SwapBuffers(
|
||||
|
||||
DrawScreen(render_window.GetFramebufferLayout());
|
||||
|
||||
rasterizer->TickFrame();
|
||||
|
||||
render_window.SwapBuffers();
|
||||
}
|
||||
|
||||
@@ -139,6 +141,7 @@ void RendererOpenGL::SwapBuffers(
|
||||
system.GetPerfStats().BeginSystemFrame();
|
||||
|
||||
// Restore the rasterizer state
|
||||
prev_state.AllDirty();
|
||||
prev_state.Apply();
|
||||
}
|
||||
|
||||
@@ -205,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {
|
||||
// Link shaders and get variable locations
|
||||
shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
|
||||
state.draw.shader_program = shader.handle;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
|
||||
uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
|
||||
@@ -262,7 +266,6 @@ void RendererOpenGL::CreateRasterizer() {
|
||||
if (rasterizer) {
|
||||
return;
|
||||
}
|
||||
// Initialize sRGB Usage
|
||||
OpenGLState::ClearsRGBUsed();
|
||||
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
|
||||
}
|
||||
@@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
|
||||
// Workaround brigthness problems in SMO by enabling sRGB in the final output
|
||||
// if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
|
||||
state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
// Restore default state
|
||||
state.framebuffer_srgb.enabled = false;
|
||||
state.texture_units[0].texture = 0;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
// Clear sRGB state for the next frame
|
||||
OpenGLState::ClearsRGBUsed();
|
||||
@@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {
|
||||
GLuint old_read_fb = state.draw.read_framebuffer;
|
||||
GLuint old_draw_fb = state.draw.draw_framebuffer;
|
||||
state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
|
||||
Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
@@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {
|
||||
screenshot_framebuffer.Release();
|
||||
state.draw.read_framebuffer = old_read_fb;
|
||||
state.draw.draw_framebuffer = old_draw_fb;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
glDeleteRenderbuffers(1, &renderbuffer);
|
||||
|
||||
|
||||
@@ -13,29 +13,67 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
|
||||
|
||||
VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
|
||||
|
||||
void VertexArrayPushBuffer::Setup(GLuint vao_) {
|
||||
vao = vao_;
|
||||
index_buffer = nullptr;
|
||||
vertex_buffers.clear();
|
||||
}
|
||||
|
||||
void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
|
||||
index_buffer = buffer;
|
||||
}
|
||||
|
||||
void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
|
||||
GLintptr offset, GLsizei stride) {
|
||||
vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
|
||||
}
|
||||
|
||||
void VertexArrayPushBuffer::Bind() {
|
||||
if (index_buffer) {
|
||||
glVertexArrayElementBuffer(vao, *index_buffer);
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): Find a way to ARB_multi_bind this
|
||||
for (const auto& entry : vertex_buffers) {
|
||||
glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset,
|
||||
entry.stride);
|
||||
}
|
||||
}
|
||||
|
||||
BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
|
||||
|
||||
BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
|
||||
|
||||
void BindBuffersRangePushBuffer::Setup(GLuint first_) {
|
||||
first = first_;
|
||||
buffers.clear();
|
||||
buffer_pointers.clear();
|
||||
offsets.clear();
|
||||
sizes.clear();
|
||||
}
|
||||
|
||||
void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) {
|
||||
buffers.push_back(buffer);
|
||||
void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) {
|
||||
buffer_pointers.push_back(buffer);
|
||||
offsets.push_back(offset);
|
||||
sizes.push_back(size);
|
||||
}
|
||||
|
||||
void BindBuffersRangePushBuffer::Bind() const {
|
||||
const std::size_t count{buffers.size()};
|
||||
void BindBuffersRangePushBuffer::Bind() {
|
||||
// Ensure sizes are valid.
|
||||
const std::size_t count{buffer_pointers.size()};
|
||||
DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Dereference buffers.
|
||||
buffers.resize(count);
|
||||
std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
|
||||
[](const GLuint* pointer) { return *pointer; });
|
||||
|
||||
glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
|
||||
sizes.data());
|
||||
}
|
||||
|
||||
@@ -11,20 +11,49 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class BindBuffersRangePushBuffer {
|
||||
class VertexArrayPushBuffer final {
|
||||
public:
|
||||
BindBuffersRangePushBuffer(GLenum target);
|
||||
explicit VertexArrayPushBuffer();
|
||||
~VertexArrayPushBuffer();
|
||||
|
||||
void Setup(GLuint vao_);
|
||||
|
||||
void SetIndexBuffer(const GLuint* buffer);
|
||||
|
||||
void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
|
||||
GLsizei stride);
|
||||
|
||||
void Bind();
|
||||
|
||||
private:
|
||||
struct Entry {
|
||||
GLuint binding_index{};
|
||||
const GLuint* buffer{};
|
||||
GLintptr offset{};
|
||||
GLsizei stride{};
|
||||
};
|
||||
|
||||
GLuint vao{};
|
||||
const GLuint* index_buffer{};
|
||||
std::vector<Entry> vertex_buffers;
|
||||
};
|
||||
|
||||
class BindBuffersRangePushBuffer final {
|
||||
public:
|
||||
explicit BindBuffersRangePushBuffer(GLenum target);
|
||||
~BindBuffersRangePushBuffer();
|
||||
|
||||
void Setup(GLuint first_);
|
||||
|
||||
void Push(GLuint buffer, GLintptr offset, GLsizeiptr size);
|
||||
void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size);
|
||||
|
||||
void Bind() const;
|
||||
void Bind();
|
||||
|
||||
private:
|
||||
GLenum target;
|
||||
GLuint first;
|
||||
GLenum target{};
|
||||
GLuint first{};
|
||||
std::vector<const GLuint*> buffer_pointers;
|
||||
|
||||
std::vector<GLuint> buffers;
|
||||
std::vector<GLintptr> offsets;
|
||||
std::vector<GLsizeiptr> sizes;
|
||||
|
||||
@@ -205,10 +205,6 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
|
||||
using OperationDecompilersArray =
|
||||
std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
|
||||
|
||||
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
|
||||
|
||||
void AllocateBindings() {
|
||||
@@ -430,20 +426,17 @@ private:
|
||||
instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
|
||||
t_in_uint, "instance_index");
|
||||
|
||||
bool is_point_size_declared = false;
|
||||
bool is_clip_distances_declared = false;
|
||||
for (const auto index : ir.GetOutputAttributes()) {
|
||||
if (index == Attribute::Index::PointSize) {
|
||||
is_point_size_declared = true;
|
||||
} else if (index == Attribute::Index::ClipDistances0123 ||
|
||||
index == Attribute::Index::ClipDistances4567) {
|
||||
if (index == Attribute::Index::ClipDistances0123 ||
|
||||
index == Attribute::Index::ClipDistances4567) {
|
||||
is_clip_distances_declared = true;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Id> members;
|
||||
members.push_back(t_float4);
|
||||
if (is_point_size_declared) {
|
||||
if (ir.UsesPointSize()) {
|
||||
members.push_back(t_float);
|
||||
}
|
||||
if (is_clip_distances_declared) {
|
||||
@@ -466,7 +459,7 @@ private:
|
||||
|
||||
position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
|
||||
point_size_index =
|
||||
MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared);
|
||||
MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize());
|
||||
clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
|
||||
is_clip_distances_declared);
|
||||
|
||||
@@ -712,7 +705,8 @@ private:
|
||||
case Attribute::Index::Position:
|
||||
return AccessElement(t_out_float, per_vertex, position_index,
|
||||
abuf->GetElement());
|
||||
case Attribute::Index::PointSize:
|
||||
case Attribute::Index::LayerViewportPointSize:
|
||||
UNIMPLEMENTED_IF(abuf->GetElement() != 3);
|
||||
return AccessElement(t_out_float, per_vertex, point_size_index);
|
||||
case Attribute::Index::ClipDistances0123:
|
||||
return AccessElement(t_out_float, per_vertex, clip_distances_index,
|
||||
@@ -806,12 +800,7 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Id LogicalAll2(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Id LogicalAny2(Operation operation) {
|
||||
Id LogicalAnd2(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
@@ -949,6 +938,14 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Id BranchIndirect(Operation operation) {
|
||||
const Id op_a = VisitOperand<Type::Uint>(operation, 0);
|
||||
|
||||
Emit(OpStore(jmp_to, op_a));
|
||||
BranchingOp([&]() { Emit(OpBranch(continue_label)); });
|
||||
return {};
|
||||
}
|
||||
|
||||
Id PushFlowStack(Operation operation) {
|
||||
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
|
||||
ASSERT(target);
|
||||
@@ -1200,7 +1197,7 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
static constexpr OperationDecompilersArray operation_decompilers = {
|
||||
static constexpr std::array operation_decompilers = {
|
||||
&SPIRVDecompiler::Assign,
|
||||
|
||||
&SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
|
||||
@@ -1285,8 +1282,7 @@ private:
|
||||
&SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
|
||||
&SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
|
||||
&SPIRVDecompiler::LogicalPick2,
|
||||
&SPIRVDecompiler::LogicalAll2,
|
||||
&SPIRVDecompiler::LogicalAny2,
|
||||
&SPIRVDecompiler::LogicalAnd2,
|
||||
|
||||
&SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
|
||||
&SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
|
||||
@@ -1334,6 +1330,7 @@ private:
|
||||
&SPIRVDecompiler::ImageStore,
|
||||
|
||||
&SPIRVDecompiler::Branch,
|
||||
&SPIRVDecompiler::BranchIndirect,
|
||||
&SPIRVDecompiler::PushFlowStack,
|
||||
&SPIRVDecompiler::PopFlowStack,
|
||||
&SPIRVDecompiler::Exit,
|
||||
@@ -1350,6 +1347,7 @@ private:
|
||||
&SPIRVDecompiler::WorkGroupId<1>,
|
||||
&SPIRVDecompiler::WorkGroupId<2>,
|
||||
};
|
||||
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
|
||||
|
||||
const VKDevice& device;
|
||||
const ShaderIR& ir;
|
||||
|
||||
476
src/video_core/shader/control_flow.cpp
Normal file
476
src/video_core/shader/control_flow.cpp
Normal file
@@ -0,0 +1,476 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <stack>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/control_flow.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
constexpr s32 unassigned_branch = -2;
|
||||
|
||||
struct Query {
|
||||
u32 address{};
|
||||
std::stack<u32> ssy_stack{};
|
||||
std::stack<u32> pbk_stack{};
|
||||
};
|
||||
|
||||
struct BlockStack {
|
||||
BlockStack() = default;
|
||||
BlockStack(const BlockStack& b) = default;
|
||||
BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
|
||||
std::stack<u32> ssy_stack{};
|
||||
std::stack<u32> pbk_stack{};
|
||||
};
|
||||
|
||||
struct BlockBranchInfo {
|
||||
Condition condition{};
|
||||
s32 address{exit_branch};
|
||||
bool kill{};
|
||||
bool is_sync{};
|
||||
bool is_brk{};
|
||||
bool ignore{};
|
||||
};
|
||||
|
||||
struct BlockInfo {
|
||||
u32 start{};
|
||||
u32 end{};
|
||||
bool visited{};
|
||||
BlockBranchInfo branch{};
|
||||
|
||||
bool IsInside(const u32 address) const {
|
||||
return start <= address && address <= end;
|
||||
}
|
||||
};
|
||||
|
||||
struct CFGRebuildState {
|
||||
explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
|
||||
const u32 start)
|
||||
: program_code{program_code}, program_size{program_size}, start{start} {}
|
||||
|
||||
u32 start{};
|
||||
std::vector<BlockInfo> block_info{};
|
||||
std::list<u32> inspect_queries{};
|
||||
std::list<Query> queries{};
|
||||
std::unordered_map<u32, u32> registered{};
|
||||
std::unordered_set<u32> labels{};
|
||||
std::map<u32, u32> ssy_labels{};
|
||||
std::map<u32, u32> pbk_labels{};
|
||||
std::unordered_map<u32, BlockStack> stacks{};
|
||||
const ProgramCode& program_code;
|
||||
const std::size_t program_size;
|
||||
};
|
||||
|
||||
enum class BlockCollision : u32 { None, Found, Inside };
|
||||
|
||||
std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
|
||||
const auto& blocks = state.block_info;
|
||||
for (u32 index = 0; index < blocks.size(); index++) {
|
||||
if (blocks[index].start == address) {
|
||||
return {BlockCollision::Found, index};
|
||||
}
|
||||
if (blocks[index].IsInside(address)) {
|
||||
return {BlockCollision::Inside, index};
|
||||
}
|
||||
}
|
||||
return {BlockCollision::None, -1};
|
||||
}
|
||||
|
||||
struct ParseInfo {
|
||||
BlockBranchInfo branch_info{};
|
||||
u32 end_address{};
|
||||
};
|
||||
|
||||
BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
|
||||
auto& it = state.block_info.emplace_back();
|
||||
it.start = start;
|
||||
it.end = end;
|
||||
const u32 index = static_cast<u32>(state.block_info.size() - 1);
|
||||
state.registered.insert({start, index});
|
||||
return it;
|
||||
}
|
||||
|
||||
Pred GetPredicate(u32 index, bool negated) {
|
||||
return static_cast<Pred>(index + (negated ? 8 : 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the instruction at the specified offset is a 'sched' instruction.
|
||||
* Sched instructions always appear before a sequence of 3 instructions.
|
||||
*/
|
||||
constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
|
||||
constexpr u32 SchedPeriod = 4;
|
||||
u32 absolute_offset = offset - main_offset;
|
||||
|
||||
return (absolute_offset % SchedPeriod) == 0;
|
||||
}
|
||||
|
||||
enum class ParseResult : u32 {
|
||||
ControlCaught,
|
||||
BlockEnd,
|
||||
AbnormalFlow,
|
||||
};
|
||||
|
||||
std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
|
||||
u32 offset = static_cast<u32>(address);
|
||||
const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
|
||||
ParseInfo parse_info{};
|
||||
|
||||
const auto insert_label = [](CFGRebuildState& state, u32 address) {
|
||||
const auto pair = state.labels.emplace(address);
|
||||
if (pair.second) {
|
||||
state.inspect_queries.push_back(address);
|
||||
}
|
||||
};
|
||||
|
||||
while (true) {
|
||||
if (offset >= end_address) {
|
||||
// ASSERT_OR_EXECUTE can't be used, as it ignores the break
|
||||
ASSERT_MSG(false, "Shader passed the current limit!");
|
||||
parse_info.branch_info.address = exit_branch;
|
||||
parse_info.branch_info.ignore = false;
|
||||
break;
|
||||
}
|
||||
if (state.registered.count(offset) != 0) {
|
||||
parse_info.branch_info.address = offset;
|
||||
parse_info.branch_info.ignore = true;
|
||||
break;
|
||||
}
|
||||
if (IsSchedInstruction(offset, state.start)) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const Instruction instr = {state.program_code[offset]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::EXIT: {
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
parse_info.branch_info.condition.predicate =
|
||||
GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
parse_info.branch_info.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
parse_info.branch_info.address = exit_branch;
|
||||
parse_info.branch_info.kill = false;
|
||||
parse_info.branch_info.is_sync = false;
|
||||
parse_info.branch_info.is_brk = false;
|
||||
parse_info.branch_info.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::BRA: {
|
||||
if (instr.bra.constant_buffer != 0) {
|
||||
return {ParseResult::AbnormalFlow, parse_info};
|
||||
}
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
parse_info.branch_info.condition.predicate =
|
||||
GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
parse_info.branch_info.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const u32 branch_offset = offset + instr.bra.GetBranchTarget();
|
||||
if (branch_offset == 0) {
|
||||
parse_info.branch_info.address = exit_branch;
|
||||
} else {
|
||||
parse_info.branch_info.address = branch_offset;
|
||||
}
|
||||
insert_label(state, branch_offset);
|
||||
parse_info.branch_info.kill = false;
|
||||
parse_info.branch_info.is_sync = false;
|
||||
parse_info.branch_info.is_brk = false;
|
||||
parse_info.branch_info.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::SYNC: {
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
parse_info.branch_info.condition.predicate =
|
||||
GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
parse_info.branch_info.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
parse_info.branch_info.address = unassigned_branch;
|
||||
parse_info.branch_info.kill = false;
|
||||
parse_info.branch_info.is_sync = true;
|
||||
parse_info.branch_info.is_brk = false;
|
||||
parse_info.branch_info.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::BRK: {
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
parse_info.branch_info.condition.predicate =
|
||||
GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
parse_info.branch_info.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
parse_info.branch_info.address = unassigned_branch;
|
||||
parse_info.branch_info.kill = false;
|
||||
parse_info.branch_info.is_sync = false;
|
||||
parse_info.branch_info.is_brk = true;
|
||||
parse_info.branch_info.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::KIL: {
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
parse_info.branch_info.condition.predicate =
|
||||
GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
parse_info.branch_info.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
parse_info.branch_info.address = exit_branch;
|
||||
parse_info.branch_info.kill = true;
|
||||
parse_info.branch_info.is_sync = false;
|
||||
parse_info.branch_info.is_brk = false;
|
||||
parse_info.branch_info.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::SSY: {
|
||||
const u32 target = offset + instr.bra.GetBranchTarget();
|
||||
insert_label(state, target);
|
||||
state.ssy_labels.emplace(offset, target);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::PBK: {
|
||||
const u32 target = offset + instr.bra.GetBranchTarget();
|
||||
insert_label(state, target);
|
||||
state.pbk_labels.emplace(offset, target);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BRX: {
|
||||
return {ParseResult::AbnormalFlow, parse_info};
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
offset++;
|
||||
}
|
||||
parse_info.branch_info.kill = false;
|
||||
parse_info.branch_info.is_sync = false;
|
||||
parse_info.branch_info.is_brk = false;
|
||||
parse_info.end_address = offset - 1;
|
||||
return {ParseResult::BlockEnd, parse_info};
|
||||
}
|
||||
|
||||
bool TryInspectAddress(CFGRebuildState& state) {
|
||||
if (state.inspect_queries.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const u32 address = state.inspect_queries.front();
|
||||
state.inspect_queries.pop_front();
|
||||
const auto [result, block_index] = TryGetBlock(state, address);
|
||||
switch (result) {
|
||||
case BlockCollision::Found: {
|
||||
return true;
|
||||
}
|
||||
case BlockCollision::Inside: {
|
||||
// This case is the tricky one:
|
||||
// We need to Split the block in 2 sepparate blocks
|
||||
const u32 end = state.block_info[block_index].end;
|
||||
BlockInfo& new_block = CreateBlockInfo(state, address, end);
|
||||
BlockInfo& current_block = state.block_info[block_index];
|
||||
current_block.end = address - 1;
|
||||
new_block.branch = current_block.branch;
|
||||
BlockBranchInfo forward_branch{};
|
||||
forward_branch.address = address;
|
||||
forward_branch.ignore = true;
|
||||
current_block.branch = forward_branch;
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
const auto [parse_result, parse_info] = ParseCode(state, address);
|
||||
if (parse_result == ParseResult::AbnormalFlow) {
|
||||
// if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
|
||||
return false;
|
||||
}
|
||||
|
||||
BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
|
||||
block_info.branch = parse_info.branch_info;
|
||||
if (parse_info.branch_info.condition.IsUnconditional()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const u32 fallthrough_address = parse_info.end_address + 1;
|
||||
state.inspect_queries.push_front(fallthrough_address);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TryQuery(CFGRebuildState& state) {
|
||||
const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
|
||||
BlockInfo& block) {
|
||||
auto gather_start = labels.lower_bound(block.start);
|
||||
const auto gather_end = labels.upper_bound(block.end);
|
||||
while (gather_start != gather_end) {
|
||||
cc.push(gather_start->second);
|
||||
gather_start++;
|
||||
}
|
||||
};
|
||||
if (state.queries.empty()) {
|
||||
return false;
|
||||
}
|
||||
Query& q = state.queries.front();
|
||||
const u32 block_index = state.registered[q.address];
|
||||
BlockInfo& block = state.block_info[block_index];
|
||||
// If the block is visted, check if the stacks match, else gather the ssy/pbk
|
||||
// labels into the current stack and look if the branch at the end of the block
|
||||
// consumes a label. Schedule new queries accordingly
|
||||
if (block.visited) {
|
||||
BlockStack& stack = state.stacks[q.address];
|
||||
const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) &&
|
||||
(stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack);
|
||||
state.queries.pop_front();
|
||||
return all_okay;
|
||||
}
|
||||
block.visited = true;
|
||||
state.stacks[q.address] = BlockStack{q};
|
||||
Query q2(q);
|
||||
state.queries.pop_front();
|
||||
gather_labels(q2.ssy_stack, state.ssy_labels, block);
|
||||
gather_labels(q2.pbk_stack, state.pbk_labels, block);
|
||||
if (!block.branch.condition.IsUnconditional()) {
|
||||
q2.address = block.end + 1;
|
||||
state.queries.push_back(q2);
|
||||
}
|
||||
Query conditional_query{q2};
|
||||
if (block.branch.is_sync) {
|
||||
if (block.branch.address == unassigned_branch) {
|
||||
block.branch.address = conditional_query.ssy_stack.top();
|
||||
}
|
||||
conditional_query.ssy_stack.pop();
|
||||
}
|
||||
if (block.branch.is_brk) {
|
||||
if (block.branch.address == unassigned_branch) {
|
||||
block.branch.address = conditional_query.pbk_stack.top();
|
||||
}
|
||||
conditional_query.pbk_stack.pop();
|
||||
}
|
||||
conditional_query.address = block.branch.address;
|
||||
state.queries.push_back(conditional_query);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
|
||||
u32 start_address) {
|
||||
CFGRebuildState state{program_code, program_size, start_address};
|
||||
// Inspect Code and generate blocks
|
||||
state.labels.clear();
|
||||
state.labels.emplace(start_address);
|
||||
state.inspect_queries.push_back(state.start);
|
||||
while (!state.inspect_queries.empty()) {
|
||||
if (!TryInspectAddress(state)) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
// Decompile Stacks
|
||||
Query start_query{};
|
||||
start_query.address = state.start;
|
||||
state.queries.push_back(start_query);
|
||||
bool decompiled = true;
|
||||
while (!state.queries.empty()) {
|
||||
if (!TryQuery(state)) {
|
||||
decompiled = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Sort and organize results
|
||||
std::sort(state.block_info.begin(), state.block_info.end(),
|
||||
[](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
|
||||
ShaderCharacteristics result_out{};
|
||||
result_out.decompilable = decompiled;
|
||||
result_out.start = start_address;
|
||||
result_out.end = start_address;
|
||||
for (auto& block : state.block_info) {
|
||||
ShaderBlock new_block{};
|
||||
new_block.start = block.start;
|
||||
new_block.end = block.end;
|
||||
new_block.ignore_branch = block.branch.ignore;
|
||||
if (!new_block.ignore_branch) {
|
||||
new_block.branch.cond = block.branch.condition;
|
||||
new_block.branch.kills = block.branch.kill;
|
||||
new_block.branch.address = block.branch.address;
|
||||
}
|
||||
result_out.end = std::max(result_out.end, block.end);
|
||||
result_out.blocks.push_back(new_block);
|
||||
}
|
||||
if (result_out.decompilable) {
|
||||
result_out.labels = std::move(state.labels);
|
||||
return {result_out};
|
||||
}
|
||||
// If it's not decompilable, merge the unlabelled blocks together
|
||||
auto back = result_out.blocks.begin();
|
||||
auto next = std::next(back);
|
||||
while (next != result_out.blocks.end()) {
|
||||
if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
|
||||
back->end = next->end;
|
||||
next = result_out.blocks.erase(next);
|
||||
continue;
|
||||
}
|
||||
back = next;
|
||||
next++;
|
||||
}
|
||||
return {result_out};
|
||||
}
|
||||
} // namespace VideoCommon::Shader
|
||||
63
src/video_core/shader/control_flow.h
Normal file
63
src/video_core/shader/control_flow.h
Normal file
@@ -0,0 +1,63 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <list>
|
||||
#include <optional>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::ConditionCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
constexpr s32 exit_branch = -1;
|
||||
|
||||
struct Condition {
|
||||
Pred predicate{Pred::UnusedIndex};
|
||||
ConditionCode cc{ConditionCode::T};
|
||||
|
||||
bool IsUnconditional() const {
|
||||
return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
|
||||
}
|
||||
bool operator==(const Condition& other) const {
|
||||
return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
|
||||
}
|
||||
};
|
||||
|
||||
struct ShaderBlock {
|
||||
u32 start{};
|
||||
u32 end{};
|
||||
bool ignore_branch{};
|
||||
struct Branch {
|
||||
Condition cond{};
|
||||
bool kills{};
|
||||
s32 address{};
|
||||
bool operator==(const Branch& b) const {
|
||||
return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
|
||||
}
|
||||
} branch{};
|
||||
bool operator==(const ShaderBlock& sb) const {
|
||||
return std::tie(start, end, ignore_branch, branch) ==
|
||||
std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
|
||||
}
|
||||
};
|
||||
|
||||
struct ShaderCharacteristics {
|
||||
std::list<ShaderBlock> blocks{};
|
||||
bool decompilable{};
|
||||
u32 start{};
|
||||
u32 end{};
|
||||
std::unordered_set<u32> labels{};
|
||||
};
|
||||
|
||||
std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
|
||||
u32 start_address);
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/engines/shader_header.h"
|
||||
#include "video_core/shader/control_flow.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
@@ -21,20 +22,6 @@ using Tegra::Shader::OpCode;
|
||||
|
||||
namespace {
|
||||
|
||||
/// Merges exit method of two parallel branches.
|
||||
constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
|
||||
if (a == ExitMethod::Undetermined) {
|
||||
return b;
|
||||
}
|
||||
if (b == ExitMethod::Undetermined) {
|
||||
return a;
|
||||
}
|
||||
if (a == b) {
|
||||
return a;
|
||||
}
|
||||
return ExitMethod::Conditional;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the instruction at the specified offset is a 'sched' instruction.
|
||||
* Sched instructions always appear before a sequence of 3 instructions.
|
||||
@@ -51,87 +38,106 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
|
||||
void ShaderIR::Decode() {
|
||||
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
|
||||
|
||||
std::set<u32> labels;
|
||||
const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels);
|
||||
if (exit_method != ExitMethod::AlwaysEnd) {
|
||||
UNREACHABLE_MSG("Program does not always end");
|
||||
}
|
||||
|
||||
if (labels.empty()) {
|
||||
basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)});
|
||||
disable_flow_stack = false;
|
||||
const auto info = ScanFlow(program_code, program_size, main_offset);
|
||||
if (info) {
|
||||
const auto& shader_info = *info;
|
||||
coverage_begin = shader_info.start;
|
||||
coverage_end = shader_info.end;
|
||||
if (shader_info.decompilable) {
|
||||
disable_flow_stack = true;
|
||||
const auto insert_block = [this](NodeBlock& nodes, u32 label) {
|
||||
if (label == exit_branch) {
|
||||
return;
|
||||
}
|
||||
basic_blocks.insert({label, nodes});
|
||||
};
|
||||
const auto& blocks = shader_info.blocks;
|
||||
NodeBlock current_block;
|
||||
u32 current_label = exit_branch;
|
||||
for (auto& block : blocks) {
|
||||
if (shader_info.labels.count(block.start) != 0) {
|
||||
insert_block(current_block, current_label);
|
||||
current_block.clear();
|
||||
current_label = block.start;
|
||||
}
|
||||
if (!block.ignore_branch) {
|
||||
DecodeRangeInner(current_block, block.start, block.end);
|
||||
InsertControlFlow(current_block, block);
|
||||
} else {
|
||||
DecodeRangeInner(current_block, block.start, block.end + 1);
|
||||
}
|
||||
}
|
||||
insert_block(current_block, current_label);
|
||||
return;
|
||||
}
|
||||
LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
|
||||
// we can't decompile it, fallback to standard method
|
||||
for (const auto& block : shader_info.blocks) {
|
||||
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
|
||||
}
|
||||
return;
|
||||
}
|
||||
LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
|
||||
|
||||
labels.insert(main_offset);
|
||||
|
||||
for (const u32 label : labels) {
|
||||
const auto next_it = labels.lower_bound(label + 1);
|
||||
const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it;
|
||||
|
||||
basic_blocks.insert({label, DecodeRange(label, next_label)});
|
||||
// Now we need to deal with an undecompilable shader. We need to brute force
|
||||
// a shader that captures every position.
|
||||
coverage_begin = main_offset;
|
||||
const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
|
||||
coverage_end = shader_end;
|
||||
for (u32 label = main_offset; label < shader_end; label++) {
|
||||
basic_blocks.insert({label, DecodeRange(label, label + 1)});
|
||||
}
|
||||
}
|
||||
|
||||
ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
|
||||
const auto [iter, inserted] =
|
||||
exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
|
||||
ExitMethod& exit_method = iter->second;
|
||||
if (!inserted)
|
||||
return exit_method;
|
||||
|
||||
for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
|
||||
coverage_begin = std::min(coverage_begin, offset);
|
||||
coverage_end = std::max(coverage_end, offset + 1);
|
||||
|
||||
const Instruction instr = {program_code[offset]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
if (!opcode)
|
||||
continue;
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::EXIT: {
|
||||
// The EXIT instruction can be predicated, which means that the shader can conditionally
|
||||
// end on this instruction. We have to consider the case where the condition is not met
|
||||
// and check the exit method of that other basic block.
|
||||
using Tegra::Shader::Pred;
|
||||
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
|
||||
return exit_method = ExitMethod::AlwaysEnd;
|
||||
} else {
|
||||
const ExitMethod not_met = Scan(offset + 1, end, labels);
|
||||
return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
|
||||
}
|
||||
}
|
||||
case OpCode::Id::BRA: {
|
||||
const u32 target = offset + instr.bra.GetBranchTarget();
|
||||
labels.insert(target);
|
||||
const ExitMethod no_jmp = Scan(offset + 1, end, labels);
|
||||
const ExitMethod jmp = Scan(target, end, labels);
|
||||
return exit_method = ParallelExit(no_jmp, jmp);
|
||||
}
|
||||
case OpCode::Id::SSY:
|
||||
case OpCode::Id::PBK: {
|
||||
// The SSY and PBK use a similar encoding as the BRA instruction.
|
||||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
||||
"Constant buffer branching is not supported");
|
||||
const u32 target = offset + instr.bra.GetBranchTarget();
|
||||
labels.insert(target);
|
||||
// Continue scanning for an exit method.
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return exit_method = ExitMethod::AlwaysReturn;
|
||||
}
|
||||
|
||||
NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
|
||||
NodeBlock basic_block;
|
||||
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
|
||||
pc = DecodeInstr(basic_block, pc);
|
||||
}
|
||||
DecodeRangeInner(basic_block, begin, end);
|
||||
return basic_block;
|
||||
}
|
||||
|
||||
void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
|
||||
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
|
||||
pc = DecodeInstr(bb, pc);
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
|
||||
const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
|
||||
Node result = n;
|
||||
if (cond.cc != ConditionCode::T) {
|
||||
result = Conditional(GetConditionCode(cond.cc), {result});
|
||||
}
|
||||
if (cond.predicate != Pred::UnusedIndex) {
|
||||
u32 pred = static_cast<u32>(cond.predicate);
|
||||
const bool is_neg = pred > 7;
|
||||
if (is_neg) {
|
||||
pred -= 8;
|
||||
}
|
||||
result = Conditional(GetPredicate(pred, is_neg), {result});
|
||||
}
|
||||
return result;
|
||||
};
|
||||
if (block.branch.address < 0) {
|
||||
if (block.branch.kills) {
|
||||
Node n = Operation(OperationCode::Discard);
|
||||
n = apply_conditions(block.branch.cond, n);
|
||||
bb.push_back(n);
|
||||
global_code.push_back(n);
|
||||
return;
|
||||
}
|
||||
Node n = Operation(OperationCode::Exit);
|
||||
n = apply_conditions(block.branch.cond, n);
|
||||
bb.push_back(n);
|
||||
global_code.push_back(n);
|
||||
return;
|
||||
}
|
||||
Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
|
||||
n = apply_conditions(block.branch.cond, n);
|
||||
bb.push_back(n);
|
||||
global_code.push_back(n);
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
// Ignore sched instructions when generating code.
|
||||
if (IsSchedInstruction(pc, main_offset)) {
|
||||
@@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
|
||||
|
||||
// Decoding failure
|
||||
if (!opcode) {
|
||||
UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
|
||||
bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
|
||||
nv_address, instr.value)));
|
||||
return pc + 1;
|
||||
}
|
||||
|
||||
bb.push_back(
|
||||
Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value)));
|
||||
bb.push_back(Comment(
|
||||
fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
|
||||
|
||||
using Tegra::Shader::Pred;
|
||||
UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
|
||||
|
||||
@@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
|
||||
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
|
||||
|
||||
Node op_b = [&]() {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HSETP2_R:
|
||||
return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
|
||||
instr.hsetp2.negate_b);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b);
|
||||
|
||||
// We can't use the constant predicate as destination.
|
||||
ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
|
||||
|
||||
const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
|
||||
Tegra::Shader::PredCondition cond{};
|
||||
bool h_and{};
|
||||
Node op_b{};
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HSETP2_C:
|
||||
cond = instr.hsetp2.cbuf_and_imm.cond;
|
||||
h_and = instr.hsetp2.cbuf_and_imm.h_and;
|
||||
op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
|
||||
instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
|
||||
break;
|
||||
case OpCode::Id::HSETP2_IMM:
|
||||
cond = instr.hsetp2.cbuf_and_imm.cond;
|
||||
h_and = instr.hsetp2.cbuf_and_imm.h_and;
|
||||
op_b = UnpackHalfImmediate(instr, true);
|
||||
break;
|
||||
case OpCode::Id::HSETP2_R:
|
||||
cond = instr.hsetp2.reg.cond;
|
||||
h_and = instr.hsetp2.reg.h_and;
|
||||
op_b =
|
||||
UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
|
||||
instr.hsetp2.reg.negate_b),
|
||||
instr.hsetp2.reg.type_b);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
op_b = Immediate(0);
|
||||
}
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
|
||||
const OperationCode pair_combiner =
|
||||
instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
|
||||
const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
|
||||
|
||||
const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
|
||||
const Node first_pred = Operation(pair_combiner, comparison);
|
||||
const auto Write = [&](u64 dest, Node src) {
|
||||
SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
|
||||
};
|
||||
|
||||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
const Node value = Operation(combiner, first_pred, second_pred);
|
||||
SetPredicate(bb, instr.hsetp2.pred3, value);
|
||||
|
||||
if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
|
||||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
|
||||
const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
|
||||
SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
|
||||
const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
|
||||
const u64 first = instr.hsetp2.pred0;
|
||||
const u64 second = instr.hsetp2.pred3;
|
||||
if (h_and) {
|
||||
const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
|
||||
Write(first, joined);
|
||||
Write(second, Operation(OperationCode::LogicalNegate, joined));
|
||||
} else {
|
||||
Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
|
||||
Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
|
||||
}
|
||||
|
||||
return pc;
|
||||
|
||||
@@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image
|
||||
const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
|
||||
Tegra::Shader::ImageType type) {
|
||||
const Node image_register{GetRegister(reg)};
|
||||
const Node base_image{
|
||||
const auto [base_image, cbuf_index, cbuf_offset]{
|
||||
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
|
||||
const auto cbuf{std::get_if<CbufNode>(&*base_image)};
|
||||
const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())};
|
||||
const auto cbuf_offset{cbuf_offset_imm->GetValue()};
|
||||
const auto cbuf_index{cbuf->GetIndex()};
|
||||
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
|
||||
|
||||
// If this image has already been used, return the existing mapping.
|
||||
|
||||
@@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
const Node op_b =
|
||||
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
|
||||
|
||||
SetTemporal(bb, 0, op_a);
|
||||
SetTemporal(bb, 1, op_b);
|
||||
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
||||
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
|
||||
SetTemporary(bb, 0, op_a);
|
||||
SetTemporary(bb, 1, op_b);
|
||||
SetRegister(bb, instr.gpr0, GetTemporary(0));
|
||||
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
}();
|
||||
for (u32 i = 0; i < count; ++i)
|
||||
SetTemporal(bb, i, GetLmem(i * 4));
|
||||
SetTemporary(bb, i, GetLmem(i * 4));
|
||||
for (u32 i = 0; i < count; ++i)
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
||||
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
|
||||
SetTemporal(bb, i, gmem);
|
||||
SetTemporary(bb, i, gmem);
|
||||
}
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
TrackAndGetGlobalMemory(bb, instr, true);
|
||||
|
||||
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
|
||||
SetTemporal(bb, 0, real_address_base);
|
||||
SetTemporary(bb, 0, real_address_base);
|
||||
|
||||
const u32 count = GetUniformTypeElementsCount(type);
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
|
||||
SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
|
||||
}
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const Node it_offset = Immediate(i * 4);
|
||||
@@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
||||
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB
|
||||
const auto addr_register{GetRegister(instr.gmem.gpr)};
|
||||
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
|
||||
|
||||
const Node base_address{
|
||||
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
|
||||
const auto cbuf = std::get_if<CbufNode>(&*base_address);
|
||||
ASSERT(cbuf != nullptr);
|
||||
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
|
||||
ASSERT(cbuf_offset_imm != nullptr);
|
||||
const auto cbuf_offset = cbuf_offset_imm->GetValue();
|
||||
const auto [base_address, index, offset] =
|
||||
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
|
||||
ASSERT(base_address != nullptr);
|
||||
|
||||
bb.push_back(
|
||||
Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
|
||||
bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
|
||||
|
||||
const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
|
||||
const GlobalMemoryBase descriptor{index, offset};
|
||||
const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
|
||||
auto& usage = entry->second;
|
||||
if (is_write) {
|
||||
|
||||
@@ -91,11 +91,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BRA: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
||||
"BRA with constant buffers are not implemented");
|
||||
Node branch;
|
||||
if (instr.bra.constant_buffer == 0) {
|
||||
const u32 target = pc + instr.bra.GetBranchTarget();
|
||||
branch = Operation(OperationCode::Branch, Immediate(target));
|
||||
} else {
|
||||
const u32 target = pc + 1;
|
||||
const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
|
||||
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
|
||||
PRECISE, op_a, Immediate(3));
|
||||
const Node operand =
|
||||
Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||
branch = Operation(OperationCode::BranchIndirect, operand);
|
||||
}
|
||||
|
||||
const u32 target = pc + instr.bra.GetBranchTarget();
|
||||
const Node branch = Operation(OperationCode::Branch, Immediate(target));
|
||||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
|
||||
if (cc != Tegra::Shader::ConditionCode::T) {
|
||||
bb.push_back(Conditional(GetConditionCode(cc), {branch}));
|
||||
} else {
|
||||
bb.push_back(branch);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BRX: {
|
||||
Node operand;
|
||||
if (instr.brx.constant_buffer != 0) {
|
||||
const s32 target = pc + 1;
|
||||
const Node index = GetRegister(instr.gpr8);
|
||||
const Node op_a =
|
||||
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
|
||||
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
|
||||
PRECISE, op_a, Immediate(3));
|
||||
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||
} else {
|
||||
const s32 target = pc + instr.brx.GetBranchExtend();
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
|
||||
PRECISE, op_a, Immediate(3));
|
||||
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||
}
|
||||
const Node branch = Operation(OperationCode::BranchIndirect, operand);
|
||||
|
||||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
|
||||
if (cc != Tegra::Shader::ConditionCode::T) {
|
||||
@@ -109,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
||||
"Constant buffer flow is not supported");
|
||||
|
||||
if (disable_flow_stack) {
|
||||
break;
|
||||
}
|
||||
|
||||
// The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
|
||||
const u32 target = pc + instr.bra.GetBranchTarget();
|
||||
bb.push_back(
|
||||
@@ -119,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
||||
"Constant buffer PBK is not supported");
|
||||
|
||||
if (disable_flow_stack) {
|
||||
break;
|
||||
}
|
||||
|
||||
// PBK pushes to a stack the address where BRK will jump to.
|
||||
const u32 target = pc + instr.bra.GetBranchTarget();
|
||||
bb.push_back(
|
||||
@@ -130,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
|
||||
static_cast<u32>(cc));
|
||||
|
||||
if (disable_flow_stack) {
|
||||
break;
|
||||
}
|
||||
|
||||
// The SYNC opcode jumps to the address previously set by the SSY opcode
|
||||
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
|
||||
break;
|
||||
@@ -138,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
|
||||
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
|
||||
static_cast<u32>(cc));
|
||||
if (disable_flow_stack) {
|
||||
break;
|
||||
}
|
||||
|
||||
// The BRK opcode jumps to the address previously set by the PBK opcode
|
||||
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user