Compare commits

..

26 Commits

Author SHA1 Message Date
Fernando Sahmkow
f58ee3f15f ShaderDecompiler: Add a debug option to dump the game's shaders. 2022-01-04 02:39:00 +01:00
Fernando S
da8e0f6571 Merge pull request #7648 from bunnei/thread-pinning
core: hle: kernel: Implement thread pinning.
2022-01-03 02:01:26 +01:00
Fernando S
3fa9702952 Merge pull request #7624 from ameerj/intel-msaa-scale
vk_texture_cache: Use 3D scale helpers for MSAA texture scaling on Intel Windows drivers
2022-01-03 00:40:14 +01:00
Fernando S
ae7da0b12d Merge pull request #7629 from ameerj/nv-driver-fixes
shaders: Add fixes for NVIDIA drivers 495+
2022-01-03 00:39:59 +01:00
Fernando S
214b9fc9a7 Merge pull request #7659 from ameerj/overlap-overflow
texture_cache/util: Fix s32 overflow when resolving overlaps
2022-01-01 22:10:29 +01:00
ameerj
951c61aeaa texture_cache/util: Fix s32 overflow when resolving overlaps 2021-12-31 20:03:22 -05:00
Mai M
eb7d361657 Merge pull request #7654 from Morph1984/dynarmic
externals: Update dynarmic to 28714ee7
2021-12-31 02:49:16 -05:00
Morph
af89f7683d externals: Update dynarmic to 28714ee7
Reduces compilation times on MSVC.
2021-12-30 22:28:27 -05:00
bunnei
667a8ae163 Merge pull request #7647 from german77/toad
core/hid: Fix controller type validation
2021-12-30 16:54:35 -08:00
bunnei
3a89723d97 core: hle: kernel: Implement thread pinning.
- We largely had the mechanics in place for thread pinning, this change hooks these up.
- Validated with tests https://github.com/Atmosphere-NX/Atmosphere/blob/master/tests/TestSvc/source/test_thread_pinning.cpp.
2021-12-30 15:50:45 -08:00
german77
9ee5c4ec56 core/hid: Fix controller type validation 2021-12-29 22:51:53 -06:00
bunnei
5e58271903 Merge pull request #7635 from bunnei/set-heap-size
core: hle: kernel: Updated implementation of svcSetHeapSize.
2021-12-29 20:30:12 -08:00
bunnei
279c7bcc1a Merge pull request #7618 from goldenx86/patch-4
Increase boost requirement to 1.78.0
2021-12-28 16:25:37 -08:00
Matías Locatti
c7235e67ef Empty spaces 2021-12-28 18:50:51 -03:00
Matías Locatti
840d5520d2 Changes to avoid warnings in SSE4.2 optimized SPIR-V 2021-12-28 17:35:55 -03:00
bunnei
091463a429 core: hle: kernel: Updated implementation of svcSetHeapSize.
- Updates our svcSetHeapSize with latest HOS, furthermore allowing heap size to properly be extended/shrunk.
- Validated with tests https://github.com/Atmosphere-NX/Atmosphere/blob/master/tests/TestSvc/source/test_set_heap_size.cpp.
2021-12-28 01:25:20 -08:00
bunnei
f67605e6aa Merge pull request #7622 from ameerj/vk-rescale-invalid-ptr
vk_texture_cache: Fix invalidated pointer access
2021-12-28 00:46:37 -08:00
bunnei
9a0648ff0a Merge pull request #7621 from bunnei/set-mem-perm
core: hle: kernel: Implement SetMemoryPermission.
2021-12-27 23:33:11 -08:00
bunnei
c9e4acc4e2 Merge pull request #7630 from ameerj/glasm-get-int
emit_glasm_context_get_set: Fix GetAttribute return value type.
2021-12-27 16:35:11 -08:00
bunnei
292dfac25e Merge pull request #7620 from bunnei/kernel-thread-x18
core: hle: kernel: KThread: X18 should be a cryptographically random number.
2021-12-25 00:42:54 -08:00
ameerj
37addf7a94 emit_glasm_context_get_set: Fix GetAttribute return value type.
GetAttribute expects an F32 result type at the IR level, this fixes the return value of attributes which were not returning an F32
2021-12-24 20:45:07 -05:00
ameerj
f9e0681d59 vk_texture_cache: Use 3D scale helpers for MSAA texture scaling on Intel Windows drivers
Fixes a crash when scaling MSAA textures in titles such as Sonic Colors Ultimate.
2021-12-23 22:35:19 -05:00
ameerj
481b210c0d vk_texture_cache: Fix invalidated pointer access
The vulkan ImageView held a reference to its source image for rescale status checking. This pointer is sometimes invalidated when the texture cache slot_images container is resized.
To avoid an invalid pointer dereference, the ImageView now holds a reference to the container itself.
2021-12-23 20:55:48 -05:00
bunnei
4e7a6639d2 core: hle: kernel: Implement SetMemoryPermission.
- Not seen in any games yet, but validated with kernel tests.
2021-12-23 01:10:36 -08:00
bunnei
a0c7d93b84 core: hle: kernel: KThread: X18 should be a cryptographically random number.
- This was added with firmware 11.0.0 (https://switchbrew.org/wiki/11.0.0).
    - X18 is OR'd by kernel with 1, to make sure it is odd.
2021-12-23 00:03:39 -08:00
Matías Locatti
e0193e2be5 Increase boost requirement to 1.78.0
Liu's finding, this allows to build yuzu on VS 2022.
Ignore at will.
2021-12-22 16:10:21 -03:00
40 changed files with 544 additions and 152 deletions

View File

@@ -229,7 +229,7 @@ elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR YUZU_USE_BUNDLED_BOOST)
include_directories(SYSTEM "${Boost_INCLUDE_DIRS}")
else()
message(STATUS "Boost 1.73.0 or newer not found, falling back to Conan")
list(APPEND CONAN_REQUIRED_LIBS "boost/1.73.0")
list(APPEND CONAN_REQUIRED_LIBS "boost/1.78.0")
endif()
# Attempt to locate any packages that are required and report the missing ones in CONAN_REQUIRED_LIBS

View File

@@ -597,6 +597,7 @@ struct Values {
BasicSetting<std::string> program_args{std::string(), "program_args"};
BasicSetting<bool> dump_exefs{false, "dump_exefs"};
BasicSetting<bool> dump_nso{false, "dump_nso"};
BasicSetting<bool> dump_shaders{false, "dump_shaders"};
BasicSetting<bool> enable_fs_access_log{false, "enable_fs_access_log"};
BasicSetting<bool> reporting_services{false, "reporting_services"};
BasicSetting<bool> quest_flag{false, "quest_flag"};

View File

@@ -187,6 +187,8 @@ add_library(core STATIC
hle/kernel/k_event.h
hle/kernel/k_handle_table.cpp
hle/kernel/k_handle_table.h
hle/kernel/k_interrupt_manager.cpp
hle/kernel/k_interrupt_manager.h
hle/kernel/k_light_condition_variable.cpp
hle/kernel/k_light_condition_variable.h
hle/kernel/k_light_lock.cpp

View File

@@ -45,26 +45,26 @@ void DefaultControllerApplet::ReconfigureControllers(std::function<void()> callb
// Pro Controller -> Dual Joycons -> Left Joycon/Right Joycon -> Handheld
if (parameters.allow_pro_controller) {
controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::ProController);
controller->Connect();
controller->Connect(true);
} else if (parameters.allow_dual_joycons) {
controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::JoyconDual);
controller->Connect();
controller->Connect(true);
} else if (parameters.allow_left_joycon && parameters.allow_right_joycon) {
// Assign left joycons to even player indices and right joycons to odd player indices.
// We do this since Captain Toad Treasure Tracker expects a left joycon for Player 1 and
// a right Joycon for Player 2 in 2 Player Assist mode.
if (index % 2 == 0) {
controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::JoyconLeft);
controller->Connect();
controller->Connect(true);
} else {
controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::JoyconRight);
controller->Connect();
controller->Connect(true);
}
} else if (index == 0 && parameters.enable_single_mode && parameters.allow_handheld &&
!Settings::values.use_docked_mode.GetValue()) {
// We should *never* reach here under any normal circumstances.
controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::Handheld);
controller->Connect();
controller->Connect(true);
} else {
UNREACHABLE_MSG("Unable to add a new controller based on the given parameters!");
}

View File

@@ -886,8 +886,9 @@ void EmulatedController::SetSupportedNpadStyleTag(NpadStyleTag supported_styles)
}
}
bool EmulatedController::IsControllerSupported() const {
switch (npad_type) {
bool EmulatedController::IsControllerSupported(bool use_temporary_value) const {
const auto type = is_configuring && use_temporary_value ? tmp_npad_type : npad_type;
switch (type) {
case NpadStyleIndex::ProController:
return supported_style_tag.fullkey;
case NpadStyleIndex::Handheld:
@@ -915,9 +916,10 @@ bool EmulatedController::IsControllerSupported() const {
}
}
void EmulatedController::Connect() {
if (!IsControllerSupported()) {
LOG_ERROR(Service_HID, "Controller type {} is not supported", npad_type);
void EmulatedController::Connect(bool use_temporary_value) {
if (!IsControllerSupported(use_temporary_value)) {
const auto type = is_configuring && use_temporary_value ? tmp_npad_type : npad_type;
LOG_ERROR(Service_HID, "Controller type {} is not supported", type);
return;
}
{

View File

@@ -167,8 +167,11 @@ public:
*/
void SetSupportedNpadStyleTag(NpadStyleTag supported_styles);
/// Sets the connected status to true
void Connect();
/**
* Sets the connected status to true
* @param use_temporary_value If true tmp_npad_type will be used
*/
void Connect(bool use_temporary_value = false);
/// Sets the connected status to false
void Disconnect();
@@ -319,9 +322,10 @@ private:
/**
* Checks the current controller type against the supported_style_tag
* @param use_temporary_value If true tmp_npad_type will be used
* @return true if the controller is supported
*/
bool IsControllerSupported() const;
bool IsControllerSupported(bool use_temporary_value = false) const;
/**
* Updates the button status of the controller

View File

@@ -9,6 +9,7 @@
#include "core/hle/kernel/global_scheduler_context.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/physical_core.h"
namespace Kernel {
@@ -42,6 +43,11 @@ void GlobalSchedulerContext::PreemptThreads() {
for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
const u32 priority = preemption_priorities[core_id];
kernel.Scheduler(core_id).RotateScheduledQueue(core_id, priority);
// Signal an interrupt occurred. For core 3, this is a certainty, as preemption will result
// in the rotator thread being scheduled. For cores 0-2, this is to simulate or system
// interrupts that may have occurred.
kernel.PhysicalCore(core_id).Interrupt();
}
}

View File

@@ -0,0 +1,34 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/hle/kernel/k_interrupt_manager.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/k_thread.h"
#include "core/hle/kernel/kernel.h"
namespace Kernel::KInterruptManager {
void HandleInterrupt(KernelCore& kernel, s32 core_id) {
auto* process = kernel.CurrentProcess();
if (!process) {
return;
}
auto& scheduler = kernel.Scheduler(core_id);
auto& current_thread = *scheduler.GetCurrentThread();
// If the user disable count is set, we may need to pin the current thread.
if (current_thread.GetUserDisableCount() && !process->GetPinnedThread(core_id)) {
KScopedSchedulerLock sl{kernel};
// Pin the current thread.
process->PinCurrentThread(core_id);
// Set the interrupt flag for the thread.
scheduler.GetCurrentThread()->SetInterruptFlag();
}
}
} // namespace Kernel::KInterruptManager

View File

@@ -0,0 +1,17 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Kernel {
class KernelCore;
namespace KInterruptManager {
void HandleInterrupt(KernelCore& kernel, s32 core_id);
}
} // namespace Kernel

View File

@@ -120,7 +120,7 @@ static_assert(static_cast<u32>(KMemoryState::CodeOut) == 0x00402015);
enum class KMemoryPermission : u8 {
None = 0,
Mask = static_cast<u8>(~None),
All = static_cast<u8>(~None),
Read = 1 << 0,
Write = 1 << 1,

View File

@@ -264,9 +264,9 @@ ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_
ASSERT(heap_last < stack_start || stack_last < heap_start);
ASSERT(heap_last < kmap_start || kmap_last < heap_start);
current_heap_addr = heap_region_start;
heap_capacity = 0;
physical_memory_usage = 0;
current_heap_end = heap_region_start;
max_heap_size = 0;
mapped_physical_memory_size = 0;
memory_pool = pool;
page_table_impl.Resize(address_space_width, PageBits);
@@ -306,7 +306,7 @@ ResultCode KPageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std:
KMemoryState state{};
KMemoryPermission perm{};
CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, src_addr, size, KMemoryState::All,
KMemoryState::Normal, KMemoryPermission::Mask,
KMemoryState::Normal, KMemoryPermission::All,
KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask,
KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
@@ -465,7 +465,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
MapPhysicalMemory(page_linked_list, addr, end_addr);
physical_memory_usage += remaining_size;
mapped_physical_memory_size += remaining_size;
const std::size_t num_pages{size / PageSize};
block_manager->Update(addr, num_pages, KMemoryState::Free, KMemoryPermission::None,
@@ -507,7 +507,7 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
auto process{system.Kernel().CurrentProcess()};
process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
physical_memory_usage -= mapped_size;
mapped_physical_memory_size -= mapped_size;
return ResultSuccess;
}
@@ -554,7 +554,7 @@ ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
KMemoryState src_state{};
CASCADE_CODE(CheckMemoryState(
&src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias,
KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::ReadAndWrite,
KMemoryState::FlagCanAlias, KMemoryPermission::All, KMemoryPermission::ReadAndWrite,
KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
if (IsRegionMapped(dst_addr, size)) {
@@ -593,7 +593,7 @@ ResultCode KPageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) {
KMemoryState src_state{};
CASCADE_CODE(CheckMemoryState(
&src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias,
KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::None,
KMemoryState::FlagCanAlias, KMemoryPermission::All, KMemoryPermission::None,
KMemoryAttribute::Mask, KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
KMemoryPermission dst_perm{};
@@ -784,7 +784,7 @@ ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemo
CASCADE_CODE(CheckMemoryState(
&state, nullptr, &attribute, addr, size,
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::Mask,
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::All,
KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask, KMemoryAttribute::None,
KMemoryAttribute::IpcAndDeviceMapped));
@@ -806,6 +806,33 @@ ResultCode KPageTable::ResetTransferMemory(VAddr addr, std::size_t size) {
KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
block_manager->Update(addr, size / PageSize, state, KMemoryPermission::ReadAndWrite);
return ResultSuccess;
}
ResultCode KPageTable::SetMemoryPermission(VAddr addr, std::size_t size,
Svc::MemoryPermission svc_perm) {
const size_t num_pages = size / PageSize;
// Lock the table.
std::lock_guard lock{page_table_lock};
// Verify we can change the memory permission.
KMemoryState old_state;
KMemoryPermission old_perm;
R_TRY(this->CheckMemoryState(
std::addressof(old_state), std::addressof(old_perm), nullptr, addr, size,
KMemoryState::FlagCanReprotect, KMemoryState::FlagCanReprotect, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::All, KMemoryAttribute::None));
// Determine new perm.
const KMemoryPermission new_perm = ConvertToKMemoryPermission(svc_perm);
R_SUCCEED_IF(old_perm == new_perm);
// Perform mapping operation.
R_TRY(Operate(addr, num_pages, new_perm, OperationType::ChangePermissions));
// Update the blocks.
block_manager->Update(addr, num_pages, old_state, new_perm, KMemoryAttribute::None);
return ResultSuccess;
}
@@ -832,61 +859,125 @@ ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryA
return ResultSuccess;
}
ResultCode KPageTable::SetHeapCapacity(std::size_t new_heap_capacity) {
ResultCode KPageTable::SetMaxHeapSize(std::size_t size) {
// Lock the table.
std::lock_guard lock{page_table_lock};
heap_capacity = new_heap_capacity;
// Only process page tables are allowed to set heap size.
ASSERT(!this->IsKernel());
max_heap_size = size;
return ResultSuccess;
}
ResultVal<VAddr> KPageTable::SetHeapSize(std::size_t size) {
ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
// Try to perform a reduction in heap, instead of an extension.
VAddr cur_address{};
std::size_t allocation_size{};
{
// Lock the table.
std::lock_guard lk(page_table_lock);
if (size > heap_region_end - heap_region_start) {
return ResultOutOfMemory;
// Validate that setting heap size is possible at all.
R_UNLESS(!is_kernel, ResultOutOfMemory);
R_UNLESS(size <= static_cast<std::size_t>(heap_region_end - heap_region_start),
ResultOutOfMemory);
R_UNLESS(size <= max_heap_size, ResultOutOfMemory);
if (size < GetHeapSize()) {
// The size being requested is less than the current size, so we need to free the end of
// the heap.
// Validate memory state.
std::size_t num_allocator_blocks;
R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks),
heap_region_start + size, GetHeapSize() - size,
KMemoryState::All, KMemoryState::Normal,
KMemoryPermission::All, KMemoryPermission::ReadAndWrite,
KMemoryAttribute::All, KMemoryAttribute::None));
// Unmap the end of the heap.
const auto num_pages = (GetHeapSize() - size) / PageSize;
R_TRY(Operate(heap_region_start + size, num_pages, KMemoryPermission::None,
OperationType::Unmap));
// Release the memory from the resource limit.
system.Kernel().CurrentProcess()->GetResourceLimit()->Release(
LimitableResource::PhysicalMemory, num_pages * PageSize);
// Apply the memory block update.
block_manager->Update(heap_region_start + size, num_pages, KMemoryState::Free,
KMemoryPermission::None, KMemoryAttribute::None);
// Update the current heap end.
current_heap_end = heap_region_start + size;
// Set the output.
*out = heap_region_start;
return ResultSuccess;
} else if (size == GetHeapSize()) {
// The size requested is exactly the current size.
*out = heap_region_start;
return ResultSuccess;
} else {
// We have to allocate memory. Determine how much to allocate and where while the table
// is locked.
cur_address = current_heap_end;
allocation_size = size - GetHeapSize();
}
}
const u64 previous_heap_size{GetHeapSize()};
// Reserve memory for the heap extension.
KScopedResourceReservation memory_reservation(
system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
allocation_size);
R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
UNIMPLEMENTED_IF_MSG(previous_heap_size > size, "Heap shrink is unimplemented");
// Allocate pages for the heap extension.
KPageLinkedList page_linked_list;
R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, allocation_size / PageSize,
memory_pool));
// Increase the heap size
// Map the pages.
{
std::lock_guard lock{page_table_lock};
// Lock the table.
std::lock_guard lk(page_table_lock);
const u64 delta{size - previous_heap_size};
// Ensure that the heap hasn't changed since we began executing.
ASSERT(cur_address == current_heap_end);
// Reserve memory for the heap extension.
KScopedResourceReservation memory_reservation(
system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
delta);
// Check the memory state.
std::size_t num_allocator_blocks{};
R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks), current_heap_end,
allocation_size, KMemoryState::All, KMemoryState::Free,
KMemoryPermission::None, KMemoryPermission::None,
KMemoryAttribute::None, KMemoryAttribute::None));
if (!memory_reservation.Succeeded()) {
LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta);
return ResultLimitReached;
// Map the pages.
const auto num_pages = allocation_size / PageSize;
R_TRY(Operate(current_heap_end, num_pages, page_linked_list, OperationType::MapGroup));
// Clear all the newly allocated pages.
for (std::size_t cur_page = 0; cur_page < num_pages; ++cur_page) {
std::memset(system.Memory().GetPointer(current_heap_end + (cur_page * PageSize)), 0,
PageSize);
}
KPageLinkedList page_linked_list;
const std::size_t num_pages{delta / PageSize};
CASCADE_CODE(
system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool));
if (IsRegionMapped(current_heap_addr, delta)) {
return ResultInvalidCurrentMemory;
}
CASCADE_CODE(
Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup));
// Succeeded in allocation, commit the resource reservation
// We succeeded, so commit our memory reservation.
memory_reservation.Commit();
block_manager->Update(current_heap_addr, num_pages, KMemoryState::Normal,
KMemoryPermission::ReadAndWrite);
// Apply the memory block update.
block_manager->Update(current_heap_end, num_pages, KMemoryState::Normal,
KMemoryPermission::ReadAndWrite, KMemoryAttribute::None);
current_heap_addr = heap_region_start + size;
// Update the current heap end.
current_heap_end = heap_region_start + size;
// Set the output.
*out = heap_region_start;
return ResultSuccess;
}
return heap_region_start;
}
ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align,
@@ -978,7 +1069,7 @@ ResultCode KPageTable::LockForCodeMemory(VAddr addr, std::size_t size) {
if (const ResultCode result{CheckMemoryState(
nullptr, &old_perm, nullptr, addr, size, KMemoryState::FlagCanCodeMemory,
KMemoryState::FlagCanCodeMemory, KMemoryPermission::Mask,
KMemoryState::FlagCanCodeMemory, KMemoryPermission::All,
KMemoryPermission::UserReadWrite, KMemoryAttribute::All, KMemoryAttribute::None)};
result.IsError()) {
return result;
@@ -1031,9 +1122,8 @@ ResultCode KPageTable::InitializeMemoryLayout(VAddr start, VAddr end) {
bool KPageTable::IsRegionMapped(VAddr address, u64 size) {
return CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free,
KMemoryPermission::Mask, KMemoryPermission::None,
KMemoryAttribute::Mask, KMemoryAttribute::None,
KMemoryAttribute::IpcAndDeviceMapped)
KMemoryPermission::All, KMemoryPermission::None, KMemoryAttribute::Mask,
KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)
.IsError();
}

View File

@@ -47,10 +47,11 @@ public:
KMemoryInfo QueryInfo(VAddr addr);
ResultCode ReserveTransferMemory(VAddr addr, std::size_t size, KMemoryPermission perm);
ResultCode ResetTransferMemory(VAddr addr, std::size_t size);
ResultCode SetMemoryPermission(VAddr addr, std::size_t size, Svc::MemoryPermission perm);
ResultCode SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryAttribute mask,
KMemoryAttribute value);
ResultCode SetHeapCapacity(std::size_t new_heap_capacity);
ResultVal<VAddr> SetHeapSize(std::size_t size);
ResultCode SetMaxHeapSize(std::size_t size);
ResultCode SetHeapSize(VAddr* out, std::size_t size);
ResultVal<VAddr> AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align,
bool is_map_only, VAddr region_start,
std::size_t region_num_pages, KMemoryState state,
@@ -182,14 +183,15 @@ public:
constexpr VAddr GetAliasCodeRegionSize() const {
return alias_code_region_end - alias_code_region_start;
}
size_t GetNormalMemorySize() {
std::lock_guard lk(page_table_lock);
return GetHeapSize() + mapped_physical_memory_size;
}
constexpr std::size_t GetAddressSpaceWidth() const {
return address_space_width;
}
constexpr std::size_t GetHeapSize() {
return current_heap_addr - heap_region_start;
}
constexpr std::size_t GetTotalHeapSize() {
return GetHeapSize() + physical_memory_usage;
constexpr std::size_t GetHeapSize() const {
return current_heap_end - heap_region_start;
}
constexpr bool IsInsideAddressSpace(VAddr address, std::size_t size) const {
return address_space_start <= address && address + size - 1 <= address_space_end - 1;
@@ -269,10 +271,8 @@ private:
VAddr code_region_end{};
VAddr alias_code_region_start{};
VAddr alias_code_region_end{};
VAddr current_heap_addr{};
std::size_t heap_capacity{};
std::size_t physical_memory_usage{};
std::size_t mapped_physical_memory_size{};
std::size_t max_heap_size{};
std::size_t max_physical_memory_size{};
std::size_t address_space_width{};

View File

@@ -172,7 +172,7 @@ void KProcess::DecrementThreadCount() {
u64 KProcess::GetTotalPhysicalMemoryAvailable() const {
const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) +
page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
page_table->GetNormalMemorySize() + GetSystemResourceSize() + image_size +
main_thread_stack_size};
if (const auto pool_size = kernel.MemoryManager().GetSize(KMemoryManager::Pool::Application);
capacity != pool_size) {
@@ -189,7 +189,7 @@ u64 KProcess::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
}
u64 KProcess::GetTotalPhysicalMemoryUsed() const {
return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
return image_size + main_thread_stack_size + page_table->GetNormalMemorySize() +
GetSystemResourceSize();
}
@@ -220,30 +220,28 @@ bool KProcess::ReleaseUserException(KThread* thread) {
}
}
void KProcess::PinCurrentThread() {
void KProcess::PinCurrentThread(s32 core_id) {
ASSERT(kernel.GlobalSchedulerContext().IsLocked());
// Get the current thread.
const s32 core_id = GetCurrentCoreId(kernel);
KThread* cur_thread = GetCurrentThreadPointer(kernel);
KThread* cur_thread = kernel.Scheduler(static_cast<std::size_t>(core_id)).GetCurrentThread();
// If the thread isn't terminated, pin it.
if (!cur_thread->IsTerminationRequested()) {
// Pin it.
PinThread(core_id, cur_thread);
cur_thread->Pin();
cur_thread->Pin(core_id);
// An update is needed.
KScheduler::SetSchedulerUpdateNeeded(kernel);
}
}
void KProcess::UnpinCurrentThread() {
void KProcess::UnpinCurrentThread(s32 core_id) {
ASSERT(kernel.GlobalSchedulerContext().IsLocked());
// Get the current thread.
const s32 core_id = GetCurrentCoreId(kernel);
KThread* cur_thread = GetCurrentThreadPointer(kernel);
KThread* cur_thread = kernel.Scheduler(static_cast<std::size_t>(core_id)).GetCurrentThread();
// Unpin it.
cur_thread->Unpin();
@@ -410,8 +408,8 @@ void KProcess::Run(s32 main_thread_priority, u64 stack_size) {
resource_limit->Reserve(LimitableResource::Threads, 1);
resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size};
ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError());
const std::size_t heap_capacity{memory_usage_capacity - (main_thread_stack_size + image_size)};
ASSERT(!page_table->SetMaxHeapSize(heap_capacity).IsError());
ChangeStatus(ProcessStatus::Running);

View File

@@ -345,8 +345,8 @@ public:
bool IsSignaled() const override;
void PinCurrentThread();
void UnpinCurrentThread();
void PinCurrentThread(s32 core_id);
void UnpinCurrentThread(s32 core_id);
void UnpinThread(KThread* thread);
KLightLock& GetStateLock() {

View File

@@ -15,6 +15,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/cpu_manager.h"
#include "core/hle/kernel/k_interrupt_manager.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
@@ -53,6 +54,13 @@ void KScheduler::RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedul
}
cores_pending_reschedule &= ~(1ULL << core);
}
for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; ++core_id) {
if (kernel.PhysicalCore(core_id).IsInterrupted()) {
KInterruptManager::HandleInterrupt(kernel, static_cast<s32>(core_id));
}
}
if (must_context_switch) {
auto core_scheduler = kernel.CurrentScheduler();
kernel.ExitSVCProfile();

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <atomic>
#include <cinttypes>
#include <optional>
#include <vector>
@@ -26,12 +27,14 @@
#include "core/hle/kernel/k_resource_limit.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
#include "core/hle/kernel/k_system_control.h"
#include "core/hle/kernel/k_thread.h"
#include "core/hle/kernel/k_thread_queue.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/svc_results.h"
#include "core/hle/kernel/time_manager.h"
#include "core/hle/result.h"
#include "core/memory.h"
#ifdef ARCHITECTURE_x86_64
#include "core/arm/dynarmic/arm_dynarmic_32.h"
@@ -50,6 +53,7 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context,
VAddr entry_point, u64 arg) {
context = {};
context.cpu_registers[0] = arg;
context.cpu_registers[18] = Kernel::KSystemControl::GenerateRandomU64() | 1;
context.pc = entry_point;
context.sp = stack_top;
// TODO(merry): Perform a hardware test to determine the below value.
@@ -61,6 +65,13 @@ namespace Kernel {
namespace {
struct ThreadLocalRegion {
static constexpr std::size_t MessageBufferSize = 0x100;
std::array<u32, MessageBufferSize / sizeof(u32)> message_buffer;
std::atomic_uint16_t disable_count;
std::atomic_uint16_t interrupt_flag;
};
class ThreadQueueImplForKThreadSleep final : public KThreadQueueWithoutEndWait {
public:
explicit ThreadQueueImplForKThreadSleep(KernelCore& kernel_)
@@ -344,7 +355,7 @@ void KThread::StartTermination() {
if (parent != nullptr) {
parent->ReleaseUserException(this);
if (parent->GetPinnedThread(GetCurrentCoreId(kernel)) == this) {
parent->UnpinCurrentThread();
parent->UnpinCurrentThread(core_id);
}
}
@@ -370,7 +381,7 @@ void KThread::StartTermination() {
this->Close();
}
void KThread::Pin() {
void KThread::Pin(s32 current_core) {
ASSERT(kernel.GlobalSchedulerContext().IsLocked());
// Set ourselves as pinned.
@@ -387,7 +398,6 @@ void KThread::Pin() {
// Bind ourselves to this core.
const s32 active_core = GetActiveCore();
const s32 current_core = GetCurrentCoreId(kernel);
SetActiveCore(current_core);
physical_ideal_core_id = current_core;
@@ -480,6 +490,36 @@ void KThread::Unpin() {
}
}
u16 KThread::GetUserDisableCount() const {
if (!IsUserThread()) {
// We only emulate TLS for user threads
return {};
}
auto& memory = kernel.System().Memory();
return memory.Read16(tls_address + offsetof(ThreadLocalRegion, disable_count));
}
void KThread::SetInterruptFlag() {
if (!IsUserThread()) {
// We only emulate TLS for user threads
return;
}
auto& memory = kernel.System().Memory();
memory.Write16(tls_address + offsetof(ThreadLocalRegion, interrupt_flag), 1);
}
void KThread::ClearInterruptFlag() {
if (!IsUserThread()) {
// We only emulate TLS for user threads
return;
}
auto& memory = kernel.System().Memory();
memory.Write16(tls_address + offsetof(ThreadLocalRegion, interrupt_flag), 0);
}
ResultCode KThread::GetCoreMask(s32* out_ideal_core, u64* out_affinity_mask) {
KScopedSchedulerLock sl{kernel};

View File

@@ -307,6 +307,10 @@ public:
return parent != nullptr;
}
u16 GetUserDisableCount() const;
void SetInterruptFlag();
void ClearInterruptFlag();
[[nodiscard]] KThread* GetLockOwner() const {
return lock_owner;
}
@@ -490,7 +494,7 @@ public:
this->GetStackParameters().disable_count--;
}
void Pin();
void Pin(s32 current_core);
void Unpin();

View File

@@ -135,24 +135,15 @@ enum class ResourceLimitValueType {
} // Anonymous namespace
/// Set the process heap to a given Size. It can both extend and shrink the heap.
static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) {
LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
static ResultCode SetHeapSize(Core::System& system, VAddr* out_address, u64 size) {
LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", size);
// Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB.
if ((heap_size % 0x200000) != 0) {
LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}",
heap_size);
return ResultInvalidSize;
}
// Validate size.
R_UNLESS(Common::IsAligned(size, HeapSizeAlignment), ResultInvalidSize);
R_UNLESS(size < MainMemorySizeMax, ResultInvalidSize);
if (heap_size >= 0x200000000) {
LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size);
return ResultInvalidSize;
}
auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
CASCADE_RESULT(*heap_addr, page_table.SetHeapSize(heap_size));
// Set the heap size.
R_TRY(system.Kernel().CurrentProcess()->PageTable().SetHeapSize(out_address, size));
return ResultSuccess;
}
@@ -164,6 +155,36 @@ static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_s
return result;
}
constexpr bool IsValidSetMemoryPermission(MemoryPermission perm) {
switch (perm) {
case MemoryPermission::None:
case MemoryPermission::Read:
case MemoryPermission::ReadWrite:
return true;
default:
return false;
}
}
static ResultCode SetMemoryPermission(Core::System& system, VAddr address, u64 size,
MemoryPermission perm) {
// Validate address / size.
R_UNLESS(Common::IsAligned(address, PageSize), ResultInvalidAddress);
R_UNLESS(Common::IsAligned(size, PageSize), ResultInvalidSize);
R_UNLESS(size > 0, ResultInvalidSize);
R_UNLESS((address < address + size), ResultInvalidCurrentMemory);
// Validate the permission.
R_UNLESS(IsValidSetMemoryPermission(perm), ResultInvalidNewMemoryPermission);
// Validate that the region is in range for the current process.
auto& page_table = system.Kernel().CurrentProcess()->PageTable();
R_UNLESS(page_table.Contains(address, size), ResultInvalidCurrentMemory);
// Set the memory attribute.
return page_table.SetMemoryPermission(address, size, perm);
}
static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask,
u32 attribute) {
LOG_DEBUG(Kernel_SVC,
@@ -2006,6 +2027,25 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, Svc::Sign
count);
}
static void SynchronizePreemptionState(Core::System& system) {
auto& kernel = system.Kernel();
// Lock the scheduler.
KScopedSchedulerLock sl{kernel};
// If the current thread is pinned, unpin it.
KProcess* cur_process = system.Kernel().CurrentProcess();
const auto core_id = GetCurrentCoreId(kernel);
if (cur_process->GetPinnedThread(core_id) == GetCurrentThreadPointer(kernel)) {
// Clear the current thread's interrupt flag.
GetCurrentThread(kernel).ClearInterruptFlag();
// Unpin the current thread.
cur_process->UnpinCurrentThread(core_id);
}
}
static ResultCode SignalToAddress32(Core::System& system, u32 address, Svc::SignalType signal_type,
s32 value, s32 count) {
return SignalToAddress(system, address, signal_type, value, count);
@@ -2724,7 +2764,7 @@ static const FunctionDef SVC_Table_32[] = {
static const FunctionDef SVC_Table_64[] = {
{0x00, nullptr, "Unknown"},
{0x01, SvcWrap64<SetHeapSize>, "SetHeapSize"},
{0x02, nullptr, "SetMemoryPermission"},
{0x02, SvcWrap64<SetMemoryPermission>, "SetMemoryPermission"},
{0x03, SvcWrap64<SetMemoryAttribute>, "SetMemoryAttribute"},
{0x04, SvcWrap64<MapMemory>, "MapMemory"},
{0x05, SvcWrap64<UnmapMemory>, "UnmapMemory"},
@@ -2776,7 +2816,7 @@ static const FunctionDef SVC_Table_64[] = {
{0x33, SvcWrap64<GetThreadContext>, "GetThreadContext"},
{0x34, SvcWrap64<WaitForAddress>, "WaitForAddress"},
{0x35, SvcWrap64<SignalToAddress>, "SignalToAddress"},
{0x36, nullptr, "SynchronizePreemptionState"},
{0x36, SvcWrap64<SynchronizePreemptionState>, "SynchronizePreemptionState"},
{0x37, nullptr, "Unknown"},
{0x38, nullptr, "Unknown"},
{0x39, nullptr, "Unknown"},

View File

@@ -5,6 +5,7 @@
#pragma once
#include "common/common_types.h"
#include "common/literals.h"
namespace Kernel {
using Handle = u32;
@@ -12,9 +13,13 @@ using Handle = u32;
namespace Kernel::Svc {
using namespace Common::Literals;
constexpr s32 ArgumentHandleCountMax = 0x40;
constexpr u32 HandleWaitMask{1u << 30};
constexpr inline std::size_t HeapSizeAlignment = 2_MiB;
constexpr inline Handle InvalidHandle = Handle(0);
enum PseudoHandle : Handle {

View File

@@ -249,6 +249,14 @@ void SvcWrap64(Core::System& system) {
func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw);
}
// Used by SetMemoryPermission
template <ResultCode func(Core::System&, u64, u64, Svc::MemoryPermission)>
void SvcWrap64(Core::System& system) {
FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
static_cast<Svc::MemoryPermission>(Param(system, 2)))
.raw);
}
// Used by MapSharedMemory
template <ResultCode func(Core::System&, Handle, u64, u64, Svc::MemoryPermission)>
void SvcWrap64(Core::System& system) {

View File

@@ -86,7 +86,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
}
switch (attr) {
case IR::Attribute::PrimitiveId:
ctx.Add("MOV.S {}.x,primitive.id;", inst);
ctx.Add("MOV.F {}.x,primitive.id;", inst);
break;
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
@@ -113,13 +113,13 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle);
break;
case IR::Attribute::InstanceId:
ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
ctx.Add("MOV.F {}.x,{}.instance;", inst, ctx.attrib_name);
break;
case IR::Attribute::VertexId:
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name);
break;
case IR::Attribute::FrontFace:
ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
break;
default:
throw NotImplementedException("Get attribute {}", attr);

View File

@@ -30,11 +30,20 @@ struct FuncTraits<ReturnType_ (*)(Args...)> {
using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
};
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4702) // Ignore unreachable code warning
#endif
template <auto func, typename... Args>
void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
template <typename ArgType>
ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
if constexpr (std::is_same_v<ArgType, Id>) {

View File

@@ -31,6 +31,8 @@ public:
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
virtual void Dump(u64 hash) = 0;
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
return sph;
}

View File

@@ -425,6 +425,11 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
if (Settings::values.dump_shaders) {
env.Dump(key.unique_hashes[index]);
}
if (!uses_vertex_a || index != 1) {
// Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
@@ -511,8 +516,12 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
if (Settings::values.dump_shaders) {
env.Dump(key.Hash());
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)};
Shader::RuntimeInfo info;
info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();

View File

@@ -1047,7 +1047,7 @@ bool Image::ScaleDown(bool ignore) {
}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
ImageId image_id_, Image& image, const SlotVector<Image>&)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
const Device& device = runtime.device;
if (True(image.flags & ImageFlagBits::Converted)) {

View File

@@ -36,6 +36,7 @@ using VideoCommon::ImageViewType;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
struct ImageBufferMap {
~ImageBufferMap();
@@ -234,7 +235,8 @@ class ImageView : public VideoCommon::ImageViewBase {
friend Image;
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
const SlotVector<Image>&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,

View File

@@ -517,6 +517,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
if (Settings::values.dump_shaders) {
env.Dump(key.unique_hashes[index]);
}
if (!uses_vertex_a || index != 1) {
// Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
@@ -613,6 +616,12 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
// Dump it before error.
if (Settings::values.dump_shaders) {
env.Dump(key.Hash());
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const std::vector<u32> code{EmitSPIRV(profile, program)};
device.SaveShader(code);

View File

@@ -1344,7 +1344,6 @@ bool Image::ScaleUp(bool ignore) {
return false;
}
has_scaled = true;
const auto& device = runtime->device;
if (!scaled_image) {
const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width);
@@ -1352,7 +1351,7 @@ bool Image::ScaleUp(bool ignore) {
auto scaled_info = info;
scaled_info.size.width = scaled_width;
scaled_info.size.height = scaled_height;
scaled_image = MakeImage(device, scaled_info);
scaled_image = MakeImage(runtime->device, scaled_info);
auto& allocator = runtime->memory_allocator;
scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
ignore = false;
@@ -1361,18 +1360,13 @@ bool Image::ScaleUp(bool ignore) {
if (ignore) {
return true;
}
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
const PixelFormat format = StorageFormat(info.format);
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
} else {
if (NeedsScaleHelper()) {
return BlitScaleHelper(true);
} else {
BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
}
return true;
}
@@ -1394,15 +1388,10 @@ bool Image::ScaleDown(bool ignore) {
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
const PixelFormat format = StorageFormat(info.format);
const auto& device = runtime->device;
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
} else {
if (NeedsScaleHelper()) {
return BlitScaleHelper(false);
} else {
BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
}
return true;
}
@@ -1470,11 +1459,24 @@ bool Image::BlitScaleHelper(bool scale_up) {
return true;
}
bool Image::NeedsScaleHelper() const {
const auto& device = runtime->device;
const bool needs_msaa_helper = info.num_samples > 1 && device.CantBlitMSAA();
if (needs_msaa_helper) {
return true;
}
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
const PixelFormat format = StorageFormat(info.format);
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
const bool needs_blit_helper = !device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT);
return needs_blit_helper;
}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
src_image{&image}, image_handle{image.Handle()},
samples(ConvertSampleCount(image.info.num_samples)) {
image_handle{image.Handle()}, samples(ConvertSampleCount(image.info.num_samples)) {
using Shader::TextureType;
const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
@@ -1557,6 +1559,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
}
}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs)
: ImageView{runtime, info, image_id_, image} {
slot_images = &slot_imgs;
}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
@@ -1613,10 +1621,12 @@ VkImageView ImageView::StorageView(Shader::TextureType texture_type,
}
bool ImageView::IsRescaled() const noexcept {
if (!src_image) {
if (!slot_images) {
return false;
}
return src_image->IsRescaled();
const auto& slots = *slot_images;
const auto& src_image = slots[image_id];
return src_image.IsRescaled();
}
vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {

View File

@@ -23,6 +23,7 @@ using VideoCommon::ImageId;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
using VideoCore::Surface::PixelFormat;
class ASTCDecoderPass;
@@ -148,6 +149,8 @@ public:
private:
bool BlitScaleHelper(bool scale_up);
bool NeedsScaleHelper() const;
VKScheduler* scheduler{};
TextureCacheRuntime* runtime{};
@@ -170,6 +173,8 @@ private:
class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
const SlotVector<Image>&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
@@ -226,7 +231,7 @@ private:
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
const Device* device = nullptr;
const Image* src_image{};
const SlotVector<Image>* slot_images = nullptr;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
std::unique_ptr<StorageViews> storage_views;

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <bit>
#include <filesystem>
#include <fstream>
#include <memory>
@@ -14,6 +15,7 @@
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/fs/fs.h"
#include "common/fs/path_util.h"
#include "common/logging/log.h"
#include "shader_recompiler/environment.h"
#include "video_core/engines/kepler_compute.h"
@@ -57,6 +59,47 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
}
}
static std::string_view StageToPrefix(Shader::Stage stage) {
switch (stage) {
case Shader::Stage::VertexB:
return "VB";
case Shader::Stage::TessellationControl:
return "TC";
case Shader::Stage::TessellationEval:
return "TE";
case Shader::Stage::Geometry:
return "GS";
case Shader::Stage::Fragment:
return "FS";
case Shader::Stage::Compute:
return "CS";
case Shader::Stage::VertexA:
return "VA";
default:
return "UK";
}
}
static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest,
u32 initial_offset, Shader::Stage stage) {
const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
const auto base_dir{shader_dir / "shaders"};
if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
LOG_ERROR(Common_Filesystem, "Failed to create shader dump directories");
return;
}
const auto prefix = StageToPrefix(stage);
const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)};
const size_t real_size = read_highest - read_lowest + initial_offset;
const size_t padding_needed = ((32 - (real_size % 32)) % 32);
std::fstream shader_file(name, std::ios::out | std::ios::binary);
const size_t jump_index = initial_offset / sizeof(u64);
shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size);
for (size_t i = 0; i < padding_needed; i++) {
shader_file.put(0);
}
}
GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
u32 start_address_)
: gpu_memory{&gpu_memory_}, program_base{program_base_} {
@@ -128,6 +171,10 @@ u64 GenericEnvironment::CalculateHash() const {
return Common::CityHash64(data.get(), size);
}
void GenericEnvironment::Dump(u64 hash) {
DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage);
}
void GenericEnvironment::Serialize(std::ofstream& file) const {
const u64 code_size{static_cast<u64>(CachedSize())};
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
@@ -207,6 +254,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
u32 start_address_)
: GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
initial_offset = sizeof(sph);
gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -323,14 +371,20 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
if (stage == Shader::Stage::Compute) {
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
initial_offset = 0;
} else {
file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
initial_offset = sizeof(sph);
if (stage == Shader::Stage::Geometry) {
file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
}
}
}
void FileEnvironment::Dump(u64 [[maybe_unused]] hash) {
DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage);
}
u64 FileEnvironment::ReadInstruction(u32 address) {
if (address < read_lowest || address > read_highest) {
throw Shader::LogicError("Out of bounds address {}", address);

View File

@@ -57,6 +57,8 @@ public:
[[nodiscard]] u64 CalculateHash() const;
void Dump(u64 hash) override;
void Serialize(std::ofstream& file) const;
protected:
@@ -82,6 +84,7 @@ protected:
u32 cached_lowest = std::numeric_limits<u32>::max();
u32 cached_highest = 0;
u32 initial_offset = 0;
bool has_unbound_instructions = false;
};
@@ -149,6 +152,8 @@ public:
[[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
void Dump(u64 hash) override;
private:
std::unique_ptr<u64[]> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
@@ -159,6 +164,7 @@ private:
u32 texture_bound{};
u32 read_lowest{};
u32 read_highest{};
u32 initial_offset{};
};
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,

View File

@@ -1397,7 +1397,8 @@ ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const Imag
if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
return image_view_id;
}
const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
const ImageViewId image_view_id =
slot_image_views.insert(runtime, info, image_id, image, slot_images);
image.InsertView(info, image_view_id);
return image_view_id;
}

View File

@@ -364,14 +364,14 @@ template <u32 GOB_EXTENT>
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
const u32 layer_stride = new_info.layer_stride;
const s32 new_size = layer_stride * new_info.resources.layers;
const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
const u64 layer_stride = new_info.layer_stride;
const u64 new_size = layer_stride * new_info.resources.layers;
const u64 diff = overlap.gpu_addr - gpu_addr;
if (diff > new_size) {
return std::nullopt;
}
const s32 base_layer = diff / layer_stride;
const s32 mip_offset = diff % layer_stride;
const s32 base_layer = static_cast<s32>(diff / layer_stride);
const s32 mip_offset = static_cast<s32>(diff % layer_stride);
const std::array offsets = CalculateMipLevelOffsets(new_info);
const auto end = offsets.begin() + new_info.resources.levels;
const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset));

View File

@@ -638,15 +638,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
}
if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
if (ext_vertex_input_dynamic_state && is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
ext_vertex_input_dynamic_state = false;
}
if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
if (is_float16_supported && is_intel_windows) {
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
is_float16_supported = false;
}
if (is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
cant_blit_msaa = true;
}
supports_d24_depth =
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,

View File

@@ -350,6 +350,10 @@ public:
return supports_d24_depth;
}
bool CantBlitMSAA() const {
return cant_blit_msaa;
}
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
@@ -443,6 +447,7 @@ private:
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.

View File

@@ -33,7 +33,7 @@ void UpdateController(Core::HID::EmulatedController* controller,
}
controller->SetNpadStyleIndex(controller_type);
if (connected) {
controller->Connect();
controller->Connect(true);
}
}

View File

@@ -51,6 +51,8 @@ void ConfigureDebug::SetConfiguration() {
ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue());
ui->enable_nsight_aftermath->setEnabled(runtime_lock);
ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue());
ui->dump_shaders->setEnabled(runtime_lock);
ui->dump_shaders->setChecked(Settings::values.dump_shaders.GetValue());
ui->disable_macro_jit->setEnabled(runtime_lock);
ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue());
ui->disable_loop_safety_checks->setEnabled(runtime_lock);
@@ -73,6 +75,7 @@ void ConfigureDebug::ApplyConfiguration() {
Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked();
Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();
Settings::values.dump_shaders = ui->dump_shaders->isChecked();
Settings::values.disable_shader_loop_safety_checks =
ui->disable_loop_safety_checks->isChecked();
Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();

View File

@@ -105,6 +105,19 @@
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QCheckBox" name="dump_shaders">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it will dump all the original assembler shaders from the disk shader cache or game as found</string>
</property>
<property name="text">
<string>Dump Game Shaders</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled">

View File

@@ -599,11 +599,11 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
if (is_connected) {
if (type == Core::HID::NpadStyleIndex::Handheld) {
emulated_controller_p1->Disconnect();
emulated_controller_handheld->Connect();
emulated_controller_handheld->Connect(true);
emulated_controller = emulated_controller_handheld;
} else {
emulated_controller_handheld->Disconnect();
emulated_controller_p1->Connect();
emulated_controller_p1->Connect(true);
emulated_controller = emulated_controller_p1;
}
}
@@ -718,7 +718,7 @@ void ConfigureInputPlayer::LoadConfiguration() {
void ConfigureInputPlayer::ConnectPlayer(bool connected) {
ui->groupConnectedController->setChecked(connected);
if (connected) {
emulated_controller->Connect();
emulated_controller->Connect(true);
} else {
emulated_controller->Disconnect();
}