Compare commits

..

22 Commits

Author SHA1 Message Date
Lody
bd11b10298 shader: rewrite LOP3.LUT
shader: opt
2022-03-07 12:15:34 +08:00
Ameer J
370e480c8c gl_graphics_pipeline: Improve shader builder synchronization using fences (#7969)
* gl_graphics_pipeline: Improve shader builder synchronization

Make use of GLsync objects to ensure better synchronization between shader builder threads and the main context

* gl_graphics_pipeline: Make built_fence access threadsafe

* gl_graphics_pipeline: Use GLsync objects only when building in parallel

* gl_graphics_pipeline: Replace GetSync calls with non-blocking waits

The spec states that a ClientWait on a Fence object ensures the changes propagate to the calling context
2022-03-06 16:46:49 +01:00
Fernando S
5192c64991 Merge pull request #7973 from Morph1984/debug-crash
host_memory: Fix fastmem crashes in debug builds
2022-03-06 04:49:27 +01:00
bunnei
a31c195749 Merge pull request #7935 from Wunkolo/logging-join-fix
logging: Convert `backend_thread` into an `std::jthread`
2022-03-02 19:09:26 -08:00
bunnei
3ab82e7582 Merge pull request #7956 from bunnei/improve-mem-manager
Kernel Memory Updates (Part 4): Revamp KMemoryManager & other fixes
2022-03-02 17:55:51 -08:00
Morph
b33f23cc46 host_memory: Fix fastmem crashes in debug builds
It is possible for virtual_offset to not be 0 when the iterator is at the beginning, and thus, std::prev(it) may be evaluated, leading to a crash in debug mode.

Co-Authored-By: Fernando S. <1731197+FernandoS27@users.noreply.github.com>
2022-03-02 18:36:59 -05:00
Fernando S
e06a133717 Merge pull request #7959 from merryhime/cmpxchg
dynarmic: Inline exclusive memory accesses
2022-03-01 22:50:52 +01:00
Mai M
3c47570563 Merge pull request #7967 from zhaobot/tx-update-20220301023432
Update translations (2022-03-01)
2022-03-01 00:50:28 -05:00
merry
ec9689f200 dynarmic: Update to latest master 2022-02-28 20:10:13 +00:00
bunnei
14d28a043d hle: kernel: Re-create memory layout at initialization.
- As this can only be derived once.
2022-02-27 18:00:09 -08:00
bunnei
16e5954fcb hle: kernel: Remove unused pool locals. 2022-02-27 18:00:09 -08:00
bunnei
f87f076162 hle: kernel: k_memory_manager: Rework for latest kernel behavior.
- Updates the KMemoryManager implementation against latest documentation.
- Reworks KMemoryLayout to be accessed throughout the kernel.
- Fixes an issue with pool sizes being incorrectly reported.
2022-02-27 18:00:09 -08:00
Wunkolo
913c2bd2cb logging: Convert backend_thread into an std::jthread
Was getting an unhandled `invalid_argument` [exception](https://en.cppreference.com/w/cpp/thread/thread/join) during
shutdown on my linux machine. This removes the need for a `StopBackendThread` function entirely since `jthread`
[automatically handles both checking if the thread is joinable and stopping the token before attempting to join](https://en.cppreference.com/w/cpp/thread/jthread/~jthread) in the case that `StartBackendThread` was never called.
2022-02-27 16:23:52 -08:00
merry
16784e5bb3 dynarmic: Inline exclusive memory accesses
Inlines implementation of exclusive instructions into JITted code,
improving performance of applications relying heavily on these
instructions.

We also fastmem these instructions for additional speed, with
support for appropriate recompilation on fastmem failure.

An unsafe optimization to disable the intercore global_monitor is also
provided, should one wish to rely solely on cmpxchg semantics for
safety.

See also: merryhime/dynarmic#664
2022-02-27 19:40:05 +00:00
bunnei
adbb9c2b00 hle: kernel: k_page_heap: GetPhysicalAddr can be const. 2022-02-27 10:34:02 -08:00
bunnei
f7e65eb971 hle: kernel: k_page_heap: Remove superfluous consexpr. 2022-02-27 10:34:02 -08:00
bunnei
06e2b76c75 hle: kernel: k_page_heap: Various updates and improvements.
- KPageHeap tracks physical addresses, not virtual addresses.
- Various updates and improvements to match latest documentation for this type.
2022-02-27 10:34:02 -08:00
bunnei
5d1a81520c hle: kernel: Add initial_process.h header. 2022-02-27 10:34:02 -08:00
bunnei
a6496deeed hle: kernel: board: nx: Add k_memory_layout.h header. 2022-02-27 10:34:02 -08:00
bunnei
9b5e7971dc hle: kernel: k_system_control: Add GetRealMemorySize and update GetKernelPhysicalBaseAddress. 2022-02-27 10:34:02 -08:00
bunnei
18e77a54c3 hle: kernel: k_memory_layout: Add GetPhysicalLinearRegion. 2022-02-27 10:34:02 -08:00
bunnei
06a21ac229 hle: kernel: k_memory_region_types: Update for new regions. 2022-02-27 10:34:02 -08:00
37 changed files with 1111 additions and 478 deletions

View File

@@ -327,8 +327,8 @@ private:
bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const {
const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length});
if (it != placeholders.end() && it->lower() == virtual_offset + length) {
const bool is_root = it == placeholders.begin() && virtual_offset == 0;
return is_root || std::prev(it)->upper() == virtual_offset;
return it == placeholders.begin() ? virtual_offset == 0
: std::prev(it)->upper() == virtual_offset;
}
return false;
}

View File

@@ -218,19 +218,17 @@ private:
Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_)
: filter{filter_}, file_backend{file_backend_filename} {}
~Impl() {
StopBackendThread();
}
~Impl() = default;
void StartBackendThread() {
backend_thread = std::thread([this] {
backend_thread = std::jthread([this](std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:Log");
Entry entry;
const auto write_logs = [this, &entry]() {
ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });
};
while (!stop.stop_requested()) {
entry = message_queue.PopWait(stop.get_token());
while (!stop_token.stop_requested()) {
entry = message_queue.PopWait(stop_token);
if (entry.filename != nullptr) {
write_logs();
}
@@ -244,11 +242,6 @@ private:
});
}
void StopBackendThread() {
stop.request_stop();
backend_thread.join();
}
Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
const char* function, std::string&& message) const {
using std::chrono::duration_cast;
@@ -283,8 +276,7 @@ private:
ColorConsoleBackend color_console_backend{};
FileBackend file_backend;
std::stop_source stop;
std::thread backend_thread;
std::jthread backend_thread;
MPSCQueue<Entry, true> message_queue{};
std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
};

View File

@@ -176,6 +176,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true);
values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
values.cpuopt_unsafe_fastmem_check.SetGlobal(true);
values.cpuopt_unsafe_ignore_global_monitor.SetGlobal(true);
// Renderer
values.renderer_backend.SetGlobal(true);

View File

@@ -484,12 +484,15 @@ struct Values {
BasicSetting<bool> cpuopt_misc_ir{true, "cpuopt_misc_ir"};
BasicSetting<bool> cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"};
BasicSetting<bool> cpuopt_fastmem{true, "cpuopt_fastmem"};
BasicSetting<bool> cpuopt_fastmem_exclusives{true, "cpuopt_fastmem_exclusives"};
BasicSetting<bool> cpuopt_recompile_exclusives{true, "cpuopt_recompile_exclusives"};
Setting<bool> cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"};
Setting<bool> cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"};
Setting<bool> cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"};
Setting<bool> cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"};
Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"};
Setting<bool> cpuopt_unsafe_ignore_global_monitor{true, "cpuopt_unsafe_ignore_global_monitor"};
// Renderer
RangedSetting<RendererBackend> renderer_backend{

View File

@@ -152,6 +152,7 @@ add_library(core STATIC
hle/api_version.h
hle/ipc.h
hle/ipc_helpers.h
hle/kernel/board/nintendo/nx/k_memory_layout.h
hle/kernel/board/nintendo/nx/k_system_control.cpp
hle/kernel/board/nintendo/nx/k_system_control.h
hle/kernel/board/nintendo/nx/secure_monitor.h
@@ -164,6 +165,7 @@ add_library(core STATIC
hle/kernel/hle_ipc.h
hle/kernel/init/init_slab_setup.cpp
hle/kernel/init/init_slab_setup.h
hle/kernel/initial_process.h
hle/kernel/k_address_arbiter.cpp
hle/kernel/k_address_arbiter.h
hle/kernel/k_address_space_info.cpp

View File

@@ -137,6 +137,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
config.only_detect_misalignment_via_page_table_on_page_boundary = true;
config.fastmem_exclusive_access = true;
config.recompile_on_exclusive_fastmem_failure = true;
// Multi-process state
config.processor_id = core_index;
@@ -178,6 +180,12 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_fastmem) {
config.fastmem_pointer = nullptr;
}
if (!Settings::values.cpuopt_fastmem_exclusives) {
config.fastmem_exclusive_access = false;
}
if (!Settings::values.cpuopt_recompile_exclusives) {
config.recompile_on_exclusive_fastmem_failure = false;
}
}
// Unsafe optimizations
@@ -195,6 +203,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
}
// Curated optimizations
@@ -203,6 +214,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
return std::make_unique<Dynarmic::A32::Jit>(config);

View File

@@ -185,6 +185,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.fastmem_pointer = page_table->fastmem_arena;
config.fastmem_address_space_bits = address_space_bits;
config.silently_mirror_fastmem = false;
config.fastmem_exclusive_access = true;
config.recompile_on_exclusive_fastmem_failure = true;
}
// Multi-process state
@@ -237,6 +240,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_fastmem) {
config.fastmem_pointer = nullptr;
}
if (!Settings::values.cpuopt_fastmem_exclusives) {
config.fastmem_exclusive_access = false;
}
if (!Settings::values.cpuopt_recompile_exclusives) {
config.recompile_on_exclusive_fastmem_failure = false;
}
}
// Unsafe optimizations
@@ -254,6 +263,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (Settings::values.cpuopt_unsafe_fastmem_check) {
config.fastmem_address_space_bits = 64;
}
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
}
// Curated optimizations
@@ -262,6 +274,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
config.fastmem_address_space_bits = 64;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
return std::make_shared<Dynarmic::A64::Jit>(config);

View File

@@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad
});
}
void DynarmicExclusiveMonitor::ClearExclusive() {
monitor.Clear();
void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
monitor.ClearProcessor(core_index);
}
bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {

View File

@@ -29,7 +29,7 @@ public:
u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override;
void ClearExclusive() override;
void ClearExclusive(std::size_t core_index) override;
bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;

View File

@@ -23,7 +23,7 @@ public:
virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;
virtual void ClearExclusive() = 0;
virtual void ClearExclusive(std::size_t core_index) = 0;
virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;

View File

@@ -0,0 +1,13 @@
// Copyright 2022 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Kernel {
constexpr inline PAddr MainMemoryAddress = 0x80000000;
} // namespace Kernel

View File

@@ -39,6 +39,10 @@ Smc::MemoryArrangement GetMemoryArrangeForInit() {
}
} // namespace
size_t KSystemControl::Init::GetRealMemorySize() {
return GetIntendedMemorySize();
}
// Initialization.
size_t KSystemControl::Init::GetIntendedMemorySize() {
switch (GetMemorySizeForInit()) {
@@ -53,7 +57,13 @@ size_t KSystemControl::Init::GetIntendedMemorySize() {
}
PAddr KSystemControl::Init::GetKernelPhysicalBaseAddress(u64 base_address) {
return base_address;
const size_t real_dram_size = KSystemControl::Init::GetRealMemorySize();
const size_t intended_dram_size = KSystemControl::Init::GetIntendedMemorySize();
if (intended_dram_size * 2 < real_dram_size) {
return base_address;
} else {
return base_address + ((real_dram_size - intended_dram_size) / 2);
}
}
bool KSystemControl::Init::ShouldIncreaseThreadResourceLimit() {

View File

@@ -13,6 +13,7 @@ public:
class Init {
public:
// Initialization.
static std::size_t GetRealMemorySize();
static std::size_t GetIntendedMemorySize();
static PAddr GetKernelPhysicalBaseAddress(u64 base_address);
static bool ShouldIncreaseThreadResourceLimit();

View File

@@ -0,0 +1,23 @@
// Copyright 2022 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "common/literals.h"
#include "core/hle/kernel/board/nintendo/nx/k_memory_layout.h"
#include "core/hle/kernel/board/nintendo/nx/k_system_control.h"
namespace Kernel {
using namespace Common::Literals;
constexpr std::size_t InitialProcessBinarySizeMax = 12_MiB;
static inline PAddr GetInitialProcessBinaryPhysicalAddress() {
return Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetKernelPhysicalBaseAddress(
MainMemoryAddress);
}
} // namespace Kernel

View File

@@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu
}
} else {
// Otherwise, clear our exclusive hold and finish
monitor.ClearExclusive();
monitor.ClearExclusive(current_core);
}
// We're done.
@@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32
}
} else {
// Otherwise, clear our exclusive hold and finish.
monitor.ClearExclusive();
monitor.ClearExclusive(current_core);
}
// We're done.

View File

@@ -173,6 +173,10 @@ public:
return Dereference(FindVirtualLinear(address));
}
const KMemoryRegion& GetPhysicalLinearRegion(PAddr address) const {
return Dereference(FindPhysicalLinear(address));
}
const KMemoryRegion* GetPhysicalKernelTraceBufferRegion() const {
return GetPhysicalMemoryRegionTree().FindFirstDerived(KMemoryRegionType_KernelTraceBuffer);
}

View File

@@ -10,189 +10,412 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/device_memory.h"
#include "core/hle/kernel/initial_process.h"
#include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/kernel/k_page_linked_list.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/svc_results.h"
#include "core/memory.h"
namespace Kernel {
KMemoryManager::KMemoryManager(Core::System& system_) : system{system_} {}
namespace {
std::size_t KMemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) {
const auto size{end_address - start_address};
// Calculate metadata sizes
const auto ref_count_size{(size / PageSize) * sizeof(u16)};
const auto optimize_map_size{(Common::AlignUp((size / PageSize), 64) / 64) * sizeof(u64)};
const auto manager_size{Common::AlignUp(optimize_map_size + ref_count_size, PageSize)};
const auto page_heap_size{KPageHeap::CalculateManagementOverheadSize(size)};
const auto total_metadata_size{manager_size + page_heap_size};
ASSERT(manager_size <= total_metadata_size);
ASSERT(Common::IsAligned(total_metadata_size, PageSize));
// Setup region
pool = new_pool;
// Initialize the manager's KPageHeap
heap.Initialize(start_address, size, page_heap_size);
// Free the memory to the heap
heap.Free(start_address, size / PageSize);
// Update the heap's used size
heap.UpdateUsedSize();
return total_metadata_size;
constexpr KMemoryManager::Pool GetPoolFromMemoryRegionType(u32 type) {
if ((type | KMemoryRegionType_DramApplicationPool) == type) {
return KMemoryManager::Pool::Application;
} else if ((type | KMemoryRegionType_DramAppletPool) == type) {
return KMemoryManager::Pool::Applet;
} else if ((type | KMemoryRegionType_DramSystemPool) == type) {
return KMemoryManager::Pool::System;
} else if ((type | KMemoryRegionType_DramSystemNonSecurePool) == type) {
return KMemoryManager::Pool::SystemNonSecure;
} else {
UNREACHABLE_MSG("InvalidMemoryRegionType for conversion to Pool");
return {};
}
}
void KMemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) {
ASSERT(pool < Pool::Count);
managers[static_cast<std::size_t>(pool)].Initialize(pool, start_address, end_address);
} // namespace
KMemoryManager::KMemoryManager(Core::System& system_)
: system{system_}, pool_locks{
KLightLock{system_.Kernel()},
KLightLock{system_.Kernel()},
KLightLock{system_.Kernel()},
KLightLock{system_.Kernel()},
} {}
void KMemoryManager::Initialize(VAddr management_region, size_t management_region_size) {
// Clear the management region to zero.
const VAddr management_region_end = management_region + management_region_size;
// Reset our manager count.
num_managers = 0;
// Traverse the virtual memory layout tree, initializing each manager as appropriate.
while (num_managers != MaxManagerCount) {
// Locate the region that should initialize the current manager.
PAddr region_address = 0;
size_t region_size = 0;
Pool region_pool = Pool::Count;
for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) {
// We only care about regions that we need to create managers for.
if (!it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) {
continue;
}
// We want to initialize the managers in order.
if (it.GetAttributes() != num_managers) {
continue;
}
const PAddr cur_start = it.GetAddress();
const PAddr cur_end = it.GetEndAddress();
// Validate the region.
ASSERT(cur_end != 0);
ASSERT(cur_start != 0);
ASSERT(it.GetSize() > 0);
// Update the region's extents.
if (region_address == 0) {
region_address = cur_start;
region_size = it.GetSize();
region_pool = GetPoolFromMemoryRegionType(it.GetType());
} else {
ASSERT(cur_start == region_address + region_size);
// Update the size.
region_size = cur_end - region_address;
ASSERT(GetPoolFromMemoryRegionType(it.GetType()) == region_pool);
}
}
// If we didn't find a region, we're done.
if (region_size == 0) {
break;
}
// Initialize a new manager for the region.
Impl* manager = std::addressof(managers[num_managers++]);
ASSERT(num_managers <= managers.size());
const size_t cur_size = manager->Initialize(region_address, region_size, management_region,
management_region_end, region_pool);
management_region += cur_size;
ASSERT(management_region <= management_region_end);
// Insert the manager into the pool list.
const auto region_pool_index = static_cast<u32>(region_pool);
if (pool_managers_tail[region_pool_index] == nullptr) {
pool_managers_head[region_pool_index] = manager;
} else {
pool_managers_tail[region_pool_index]->SetNext(manager);
manager->SetPrev(pool_managers_tail[region_pool_index]);
}
pool_managers_tail[region_pool_index] = manager;
}
// Free each region to its corresponding heap.
size_t reserved_sizes[MaxManagerCount] = {};
const PAddr ini_start = GetInitialProcessBinaryPhysicalAddress();
const PAddr ini_end = ini_start + InitialProcessBinarySizeMax;
const PAddr ini_last = ini_end - 1;
for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) {
if (it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) {
// Get the manager for the region.
auto index = it.GetAttributes();
auto& manager = managers[index];
const PAddr cur_start = it.GetAddress();
const PAddr cur_last = it.GetLastAddress();
const PAddr cur_end = it.GetEndAddress();
if (cur_start <= ini_start && ini_last <= cur_last) {
// Free memory before the ini to the heap.
if (cur_start != ini_start) {
manager.Free(cur_start, (ini_start - cur_start) / PageSize);
}
// Open/reserve the ini memory.
manager.OpenFirst(ini_start, InitialProcessBinarySizeMax / PageSize);
reserved_sizes[it.GetAttributes()] += InitialProcessBinarySizeMax;
// Free memory after the ini to the heap.
if (ini_last != cur_last) {
ASSERT(cur_end != 0);
manager.Free(ini_end, cur_end - ini_end);
}
} else {
// Ensure there's no partial overlap with the ini image.
if (cur_start <= ini_last) {
ASSERT(cur_last < ini_start);
} else {
// Otherwise, check the region for general validity.
ASSERT(cur_end != 0);
}
// Free the memory to the heap.
manager.Free(cur_start, it.GetSize() / PageSize);
}
}
}
// Update the used size for all managers.
for (size_t i = 0; i < num_managers; ++i) {
managers[i].SetInitialUsedHeapSize(reserved_sizes[i]);
}
}
VAddr KMemoryManager::AllocateAndOpenContinuous(std::size_t num_pages, std::size_t align_pages,
u32 option) {
// Early return if we're allocating no pages
PAddr KMemoryManager::AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option) {
// Early return if we're allocating no pages.
if (num_pages == 0) {
return {};
return 0;
}
// Lock the pool that we're allocating from
// Lock the pool that we're allocating from.
const auto [pool, dir] = DecodeOption(option);
const auto pool_index{static_cast<std::size_t>(pool)};
std::lock_guard lock{pool_locks[pool_index]};
KScopedLightLock lk(pool_locks[static_cast<std::size_t>(pool)]);
// Choose a heap based on our page size request
const s32 heap_index{KPageHeap::GetAlignedBlockIndex(num_pages, align_pages)};
// Choose a heap based on our page size request.
const s32 heap_index = KPageHeap::GetAlignedBlockIndex(num_pages, align_pages);
// Loop, trying to iterate from each block
// TODO (bunnei): Support multiple managers
Impl& chosen_manager{managers[pool_index]};
VAddr allocated_block{chosen_manager.AllocateBlock(heap_index, false)};
// If we failed to allocate, quit now
if (!allocated_block) {
return {};
// Loop, trying to iterate from each block.
Impl* chosen_manager = nullptr;
PAddr allocated_block = 0;
for (chosen_manager = this->GetFirstManager(pool, dir); chosen_manager != nullptr;
chosen_manager = this->GetNextManager(chosen_manager, dir)) {
allocated_block = chosen_manager->AllocateBlock(heap_index, true);
if (allocated_block != 0) {
break;
}
}
// If we allocated more than we need, free some
const auto allocated_pages{KPageHeap::GetBlockNumPages(heap_index)};
// If we failed to allocate, quit now.
if (allocated_block == 0) {
return 0;
}
// If we allocated more than we need, free some.
const size_t allocated_pages = KPageHeap::GetBlockNumPages(heap_index);
if (allocated_pages > num_pages) {
chosen_manager.Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages);
chosen_manager->Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages);
}
// Open the first reference to the pages.
chosen_manager->OpenFirst(allocated_block, num_pages);
return allocated_block;
}
ResultCode KMemoryManager::Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
Direction dir, u32 heap_fill_value) {
ASSERT(page_list.GetNumPages() == 0);
ResultCode KMemoryManager::AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool,
Direction dir, bool random) {
// Choose a heap based on our page size request.
const s32 heap_index = KPageHeap::GetBlockIndex(num_pages);
R_UNLESS(0 <= heap_index, ResultOutOfMemory);
// Early return if we're allocating no pages
if (num_pages == 0) {
return ResultSuccess;
}
// Lock the pool that we're allocating from
const auto pool_index{static_cast<std::size_t>(pool)};
std::lock_guard lock{pool_locks[pool_index]};
// Choose a heap based on our page size request
const s32 heap_index{KPageHeap::GetBlockIndex(num_pages)};
if (heap_index < 0) {
return ResultOutOfMemory;
}
// TODO (bunnei): Support multiple managers
Impl& chosen_manager{managers[pool_index]};
// Ensure that we don't leave anything un-freed
auto group_guard = detail::ScopeExit([&] {
for (const auto& it : page_list.Nodes()) {
const auto min_num_pages{std::min<size_t>(
it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
chosen_manager.Free(it.GetAddress(), min_num_pages);
// Ensure that we don't leave anything un-freed.
auto group_guard = SCOPE_GUARD({
for (const auto& it : out->Nodes()) {
auto& manager = this->GetManager(system.Kernel().MemoryLayout(), it.GetAddress());
const size_t num_pages_to_free =
std::min(it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize);
manager.Free(it.GetAddress(), num_pages_to_free);
}
});
// Keep allocating until we've allocated all our pages
for (s32 index{heap_index}; index >= 0 && num_pages > 0; index--) {
const auto pages_per_alloc{KPageHeap::GetBlockNumPages(index)};
while (num_pages >= pages_per_alloc) {
// Allocate a block
VAddr allocated_block{chosen_manager.AllocateBlock(index, false)};
if (!allocated_block) {
break;
}
// Safely add it to our group
{
auto block_guard = detail::ScopeExit(
[&] { chosen_manager.Free(allocated_block, pages_per_alloc); });
if (const ResultCode result{page_list.AddBlock(allocated_block, pages_per_alloc)};
result.IsError()) {
return result;
// Keep allocating until we've allocated all our pages.
for (s32 index = heap_index; index >= 0 && num_pages > 0; index--) {
const size_t pages_per_alloc = KPageHeap::GetBlockNumPages(index);
for (Impl* cur_manager = this->GetFirstManager(pool, dir); cur_manager != nullptr;
cur_manager = this->GetNextManager(cur_manager, dir)) {
while (num_pages >= pages_per_alloc) {
// Allocate a block.
PAddr allocated_block = cur_manager->AllocateBlock(index, random);
if (allocated_block == 0) {
break;
}
block_guard.Cancel();
}
// Safely add it to our group.
{
auto block_guard =
SCOPE_GUARD({ cur_manager->Free(allocated_block, pages_per_alloc); });
R_TRY(out->AddBlock(allocated_block, pages_per_alloc));
block_guard.Cancel();
}
num_pages -= pages_per_alloc;
num_pages -= pages_per_alloc;
}
}
}
// Clear allocated memory.
for (const auto& it : page_list.Nodes()) {
std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value,
it.GetSize());
}
// Only succeed if we allocated as many pages as we wanted
if (num_pages) {
return ResultOutOfMemory;
}
// Only succeed if we allocated as many pages as we wanted.
R_UNLESS(num_pages == 0, ResultOutOfMemory);
// We succeeded!
group_guard.Cancel();
return ResultSuccess;
}
ResultCode KMemoryManager::Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
Direction dir, u32 heap_fill_value) {
// Early return if we're freeing no pages
if (!num_pages) {
return ResultSuccess;
}
ResultCode KMemoryManager::AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option) {
ASSERT(out != nullptr);
ASSERT(out->GetNumPages() == 0);
// Lock the pool that we're freeing from
const auto pool_index{static_cast<std::size_t>(pool)};
std::lock_guard lock{pool_locks[pool_index]};
// Early return if we're allocating no pages.
R_SUCCEED_IF(num_pages == 0);
// TODO (bunnei): Support multiple managers
Impl& chosen_manager{managers[pool_index]};
// Lock the pool that we're allocating from.
const auto [pool, dir] = DecodeOption(option);
KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]);
// Free all of the pages
for (const auto& it : page_list.Nodes()) {
const auto min_num_pages{std::min<size_t>(
it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
chosen_manager.Free(it.GetAddress(), min_num_pages);
// Allocate the page group.
R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false));
// Open the first reference to the pages.
for (const auto& block : out->Nodes()) {
PAddr cur_address = block.GetAddress();
size_t remaining_pages = block.GetNumPages();
while (remaining_pages > 0) {
// Get the manager for the current address.
auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address);
// Process part or all of the block.
const size_t cur_pages =
std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address));
manager.OpenFirst(cur_address, cur_pages);
// Advance.
cur_address += cur_pages * PageSize;
remaining_pages -= cur_pages;
}
}
return ResultSuccess;
}
std::size_t KMemoryManager::Impl::CalculateManagementOverheadSize(std::size_t region_size) {
const std::size_t ref_count_size = (region_size / PageSize) * sizeof(u16);
const std::size_t optimize_map_size =
ResultCode KMemoryManager::AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages,
u32 option, u64 process_id, u8 fill_pattern) {
ASSERT(out != nullptr);
ASSERT(out->GetNumPages() == 0);
// Decode the option.
const auto [pool, dir] = DecodeOption(option);
// Allocate the memory.
{
// Lock the pool that we're allocating from.
KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]);
// Allocate the page group.
R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false));
// Open the first reference to the pages.
for (const auto& block : out->Nodes()) {
PAddr cur_address = block.GetAddress();
size_t remaining_pages = block.GetNumPages();
while (remaining_pages > 0) {
// Get the manager for the current address.
auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address);
// Process part or all of the block.
const size_t cur_pages =
std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address));
manager.OpenFirst(cur_address, cur_pages);
// Advance.
cur_address += cur_pages * PageSize;
remaining_pages -= cur_pages;
}
}
}
// Set all the allocated memory.
for (const auto& block : out->Nodes()) {
std::memset(system.DeviceMemory().GetPointer(block.GetAddress()), fill_pattern,
block.GetSize());
}
return ResultSuccess;
}
void KMemoryManager::Open(PAddr address, size_t num_pages) {
// Repeatedly open references until we've done so for all pages.
while (num_pages) {
auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address);
const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address));
{
KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]);
manager.Open(address, cur_pages);
}
num_pages -= cur_pages;
address += cur_pages * PageSize;
}
}
void KMemoryManager::Close(PAddr address, size_t num_pages) {
// Repeatedly close references until we've done so for all pages.
while (num_pages) {
auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address);
const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address));
{
KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]);
manager.Close(address, cur_pages);
}
num_pages -= cur_pages;
address += cur_pages * PageSize;
}
}
void KMemoryManager::Close(const KPageLinkedList& pg) {
for (const auto& node : pg.Nodes()) {
Close(node.GetAddress(), node.GetNumPages());
}
}
void KMemoryManager::Open(const KPageLinkedList& pg) {
for (const auto& node : pg.Nodes()) {
Open(node.GetAddress(), node.GetNumPages());
}
}
size_t KMemoryManager::Impl::Initialize(PAddr address, size_t size, VAddr management,
VAddr management_end, Pool p) {
// Calculate management sizes.
const size_t ref_count_size = (size / PageSize) * sizeof(u16);
const size_t optimize_map_size = CalculateOptimizedProcessOverheadSize(size);
const size_t manager_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(size);
const size_t total_management_size = manager_size + page_heap_size;
ASSERT(manager_size <= total_management_size);
ASSERT(management + total_management_size <= management_end);
ASSERT(Common::IsAligned(total_management_size, PageSize));
// Setup region.
pool = p;
management_region = management;
page_reference_counts.resize(
Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize() / PageSize);
ASSERT(Common::IsAligned(management_region, PageSize));
// Initialize the manager's KPageHeap.
heap.Initialize(address, size, management + manager_size, page_heap_size);
return total_management_size;
}
size_t KMemoryManager::Impl::CalculateManagementOverheadSize(size_t region_size) {
const size_t ref_count_size = (region_size / PageSize) * sizeof(u16);
const size_t optimize_map_size =
(Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) /
Common::BitSize<u64>()) *
sizeof(u64);
const std::size_t manager_meta_size =
Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
const std::size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size);
const size_t manager_meta_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size);
return manager_meta_size + page_heap_size;
}

View File

@@ -5,11 +5,12 @@
#pragma once
#include <array>
#include <mutex>
#include <tuple>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "core/hle/kernel/k_light_lock.h"
#include "core/hle/kernel/k_memory_layout.h"
#include "core/hle/kernel/k_page_heap.h"
#include "core/hle/result.h"
@@ -52,22 +53,33 @@ public:
explicit KMemoryManager(Core::System& system_);
constexpr std::size_t GetSize(Pool pool) const {
return managers[static_cast<std::size_t>(pool)].GetSize();
void Initialize(VAddr management_region, size_t management_region_size);
constexpr size_t GetSize(Pool pool) const {
constexpr Direction GetSizeDirection = Direction::FromFront;
size_t total = 0;
for (auto* manager = this->GetFirstManager(pool, GetSizeDirection); manager != nullptr;
manager = this->GetNextManager(manager, GetSizeDirection)) {
total += manager->GetSize();
}
return total;
}
void InitializeManager(Pool pool, u64 start_address, u64 end_address);
PAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option);
ResultCode AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option);
ResultCode AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages, u32 option,
u64 process_id, u8 fill_pattern);
VAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option);
ResultCode Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir,
u32 heap_fill_value = 0);
ResultCode Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir,
u32 heap_fill_value = 0);
static constexpr size_t MaxManagerCount = 10;
static constexpr std::size_t MaxManagerCount = 10;
void Close(PAddr address, size_t num_pages);
void Close(const KPageLinkedList& pg);
void Open(PAddr address, size_t num_pages);
void Open(const KPageLinkedList& pg);
public:
static std::size_t CalculateManagementOverheadSize(std::size_t region_size) {
static size_t CalculateManagementOverheadSize(size_t region_size) {
return Impl::CalculateManagementOverheadSize(region_size);
}
@@ -100,17 +112,26 @@ private:
Impl() = default;
~Impl() = default;
std::size_t Initialize(Pool new_pool, u64 start_address, u64 end_address);
size_t Initialize(PAddr address, size_t size, VAddr management, VAddr management_end,
Pool p);
VAddr AllocateBlock(s32 index, bool random) {
return heap.AllocateBlock(index, random);
}
void Free(VAddr addr, std::size_t num_pages) {
void Free(VAddr addr, size_t num_pages) {
heap.Free(addr, num_pages);
}
constexpr std::size_t GetSize() const {
void SetInitialUsedHeapSize(size_t reserved_size) {
heap.SetInitialUsedSize(reserved_size);
}
constexpr Pool GetPool() const {
return pool;
}
constexpr size_t GetSize() const {
return heap.GetSize();
}
@@ -122,10 +143,88 @@ private:
return heap.GetEndAddress();
}
static std::size_t CalculateManagementOverheadSize(std::size_t region_size);
constexpr size_t GetPageOffset(PAddr address) const {
return heap.GetPageOffset(address);
}
static constexpr std::size_t CalculateOptimizedProcessOverheadSize(
std::size_t region_size) {
constexpr size_t GetPageOffsetToEnd(PAddr address) const {
return heap.GetPageOffsetToEnd(address);
}
constexpr void SetNext(Impl* n) {
next = n;
}
constexpr void SetPrev(Impl* n) {
prev = n;
}
constexpr Impl* GetNext() const {
return next;
}
constexpr Impl* GetPrev() const {
return prev;
}
void OpenFirst(PAddr address, size_t num_pages) {
size_t index = this->GetPageOffset(address);
const size_t end = index + num_pages;
while (index < end) {
const RefCount ref_count = (++page_reference_counts[index]);
ASSERT(ref_count == 1);
index++;
}
}
void Open(PAddr address, size_t num_pages) {
size_t index = this->GetPageOffset(address);
const size_t end = index + num_pages;
while (index < end) {
const RefCount ref_count = (++page_reference_counts[index]);
ASSERT(ref_count > 1);
index++;
}
}
void Close(PAddr address, size_t num_pages) {
size_t index = this->GetPageOffset(address);
const size_t end = index + num_pages;
size_t free_start = 0;
size_t free_count = 0;
while (index < end) {
ASSERT(page_reference_counts[index] > 0);
const RefCount ref_count = (--page_reference_counts[index]);
// Keep track of how many zero refcounts we see in a row, to minimize calls to free.
if (ref_count == 0) {
if (free_count > 0) {
free_count++;
} else {
free_start = index;
free_count = 1;
}
} else {
if (free_count > 0) {
this->Free(heap.GetAddress() + free_start * PageSize, free_count);
free_count = 0;
}
}
index++;
}
if (free_count > 0) {
this->Free(heap.GetAddress() + free_start * PageSize, free_count);
}
}
static size_t CalculateManagementOverheadSize(size_t region_size);
static constexpr size_t CalculateOptimizedProcessOverheadSize(size_t region_size) {
return (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) /
Common::BitSize<u64>()) *
sizeof(u64);
@@ -135,13 +234,45 @@ private:
using RefCount = u16;
KPageHeap heap;
std::vector<RefCount> page_reference_counts;
VAddr management_region{};
Pool pool{};
Impl* next{};
Impl* prev{};
};
private:
Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) {
return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()];
}
const Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) const {
return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()];
}
constexpr Impl* GetFirstManager(Pool pool, Direction dir) const {
return dir == Direction::FromBack ? pool_managers_tail[static_cast<size_t>(pool)]
: pool_managers_head[static_cast<size_t>(pool)];
}
constexpr Impl* GetNextManager(Impl* cur, Direction dir) const {
if (dir == Direction::FromBack) {
return cur->GetPrev();
} else {
return cur->GetNext();
}
}
ResultCode AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool,
Direction dir, bool random);
private:
Core::System& system;
std::array<std::mutex, static_cast<std::size_t>(Pool::Count)> pool_locks;
std::array<KLightLock, static_cast<size_t>(Pool::Count)> pool_locks;
std::array<Impl*, MaxManagerCount> pool_managers_head{};
std::array<Impl*, MaxManagerCount> pool_managers_tail{};
std::array<Impl, MaxManagerCount> managers;
size_t num_managers{};
};
} // namespace Kernel

View File

@@ -14,7 +14,8 @@
namespace Kernel {
enum KMemoryRegionType : u32 {
KMemoryRegionAttr_CarveoutProtected = 0x04000000,
KMemoryRegionAttr_CarveoutProtected = 0x02000000,
KMemoryRegionAttr_Uncached = 0x04000000,
KMemoryRegionAttr_DidKernelMap = 0x08000000,
KMemoryRegionAttr_ShouldKernelMap = 0x10000000,
KMemoryRegionAttr_UserReadOnly = 0x20000000,
@@ -239,6 +240,11 @@ static_assert(KMemoryRegionType_VirtualDramHeapBase.GetValue() == 0x1A);
static_assert(KMemoryRegionType_VirtualDramKernelPtHeap.GetValue() == 0x2A);
static_assert(KMemoryRegionType_VirtualDramKernelTraceBuffer.GetValue() == 0x4A);
// UNUSED: .DeriveSparse(2, 2, 0);
constexpr auto KMemoryRegionType_VirtualDramUnknownDebug =
KMemoryRegionType_Dram.DeriveSparse(2, 2, 1);
static_assert(KMemoryRegionType_VirtualDramUnknownDebug.GetValue() == (0x52));
constexpr auto KMemoryRegionType_VirtualDramKernelInitPt =
KMemoryRegionType_VirtualDramHeapBase.Derive(3, 0);
constexpr auto KMemoryRegionType_VirtualDramPoolManagement =
@@ -330,6 +336,8 @@ constexpr KMemoryRegionType GetTypeForVirtualLinearMapping(u32 type_id) {
return KMemoryRegionType_VirtualDramKernelTraceBuffer;
} else if (KMemoryRegionType_DramKernelPtHeap.IsAncestorOf(type_id)) {
return KMemoryRegionType_VirtualDramKernelPtHeap;
} else if ((type_id | KMemoryRegionAttr_ShouldKernelMap) == type_id) {
return KMemoryRegionType_VirtualDramUnknownDebug;
} else {
return KMemoryRegionType_Dram;
}

View File

@@ -7,35 +7,51 @@
namespace Kernel {
void KPageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) {
// Check our assumptions
ASSERT(Common::IsAligned((address), PageSize));
void KPageHeap::Initialize(PAddr address, size_t size, VAddr management_address,
size_t management_size, const size_t* block_shifts,
size_t num_block_shifts) {
// Check our assumptions.
ASSERT(Common::IsAligned(address, PageSize));
ASSERT(Common::IsAligned(size, PageSize));
ASSERT(0 < num_block_shifts && num_block_shifts <= NumMemoryBlockPageShifts);
const VAddr management_end = management_address + management_size;
// Set our members
heap_address = address;
heap_size = size;
// Set our members.
m_heap_address = address;
m_heap_size = size;
m_num_blocks = num_block_shifts;
// Setup bitmaps
metadata.resize(metadata_size / sizeof(u64));
u64* cur_bitmap_storage{metadata.data()};
for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) {
const std::size_t cur_block_shift{MemoryBlockPageShifts[i]};
const std::size_t next_block_shift{
(i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0};
cur_bitmap_storage = blocks[i].Initialize(heap_address, heap_size, cur_block_shift,
next_block_shift, cur_bitmap_storage);
// Setup bitmaps.
m_management_data.resize(management_size / sizeof(u64));
u64* cur_bitmap_storage{m_management_data.data()};
for (size_t i = 0; i < num_block_shifts; i++) {
const size_t cur_block_shift = block_shifts[i];
const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0;
cur_bitmap_storage = m_blocks[i].Initialize(m_heap_address, m_heap_size, cur_block_shift,
next_block_shift, cur_bitmap_storage);
}
// Ensure we didn't overextend our bounds.
ASSERT(VAddr(cur_bitmap_storage) <= management_end);
}
VAddr KPageHeap::AllocateBlock(s32 index, bool random) {
const std::size_t needed_size{blocks[index].GetSize()};
size_t KPageHeap::GetNumFreePages() const {
size_t num_free = 0;
for (s32 i{index}; i < static_cast<s32>(MemoryBlockPageShifts.size()); i++) {
if (const VAddr addr{blocks[i].PopBlock(random)}; addr) {
if (const std::size_t allocated_size{blocks[i].GetSize()};
allocated_size > needed_size) {
Free(addr + needed_size, (allocated_size - needed_size) / PageSize);
for (size_t i = 0; i < m_num_blocks; i++) {
num_free += m_blocks[i].GetNumFreePages();
}
return num_free;
}
PAddr KPageHeap::AllocateBlock(s32 index, bool random) {
const size_t needed_size = m_blocks[index].GetSize();
for (s32 i = index; i < static_cast<s32>(m_num_blocks); i++) {
if (const PAddr addr = m_blocks[i].PopBlock(random); addr != 0) {
if (const size_t allocated_size = m_blocks[i].GetSize(); allocated_size > needed_size) {
this->Free(addr + needed_size, (allocated_size - needed_size) / PageSize);
}
return addr;
}
@@ -44,34 +60,34 @@ VAddr KPageHeap::AllocateBlock(s32 index, bool random) {
return 0;
}
void KPageHeap::FreeBlock(VAddr block, s32 index) {
void KPageHeap::FreeBlock(PAddr block, s32 index) {
do {
block = blocks[index++].PushBlock(block);
block = m_blocks[index++].PushBlock(block);
} while (block != 0);
}
void KPageHeap::Free(VAddr addr, std::size_t num_pages) {
// Freeing no pages is a no-op
void KPageHeap::Free(PAddr addr, size_t num_pages) {
// Freeing no pages is a no-op.
if (num_pages == 0) {
return;
}
// Find the largest block size that we can free, and free as many as possible
s32 big_index{static_cast<s32>(MemoryBlockPageShifts.size()) - 1};
const VAddr start{addr};
const VAddr end{(num_pages * PageSize) + addr};
VAddr before_start{start};
VAddr before_end{start};
VAddr after_start{end};
VAddr after_end{end};
// Find the largest block size that we can free, and free as many as possible.
s32 big_index = static_cast<s32>(m_num_blocks) - 1;
const PAddr start = addr;
const PAddr end = addr + num_pages * PageSize;
PAddr before_start = start;
PAddr before_end = start;
PAddr after_start = end;
PAddr after_end = end;
while (big_index >= 0) {
const std::size_t block_size{blocks[big_index].GetSize()};
const VAddr big_start{Common::AlignUp((start), block_size)};
const VAddr big_end{Common::AlignDown((end), block_size)};
const size_t block_size = m_blocks[big_index].GetSize();
const PAddr big_start = Common::AlignUp(start, block_size);
const PAddr big_end = Common::AlignDown(end, block_size);
if (big_start < big_end) {
// Free as many big blocks as we can
for (auto block{big_start}; block < big_end; block += block_size) {
FreeBlock(block, big_index);
// Free as many big blocks as we can.
for (auto block = big_start; block < big_end; block += block_size) {
this->FreeBlock(block, big_index);
}
before_end = big_start;
after_start = big_end;
@@ -81,31 +97,31 @@ void KPageHeap::Free(VAddr addr, std::size_t num_pages) {
}
ASSERT(big_index >= 0);
// Free space before the big blocks
for (s32 i{big_index - 1}; i >= 0; i--) {
const std::size_t block_size{blocks[i].GetSize()};
// Free space before the big blocks.
for (s32 i = big_index - 1; i >= 0; i--) {
const size_t block_size = m_blocks[i].GetSize();
while (before_start + block_size <= before_end) {
before_end -= block_size;
FreeBlock(before_end, i);
this->FreeBlock(before_end, i);
}
}
// Free space after the big blocks
for (s32 i{big_index - 1}; i >= 0; i--) {
const std::size_t block_size{blocks[i].GetSize()};
// Free space after the big blocks.
for (s32 i = big_index - 1; i >= 0; i--) {
const size_t block_size = m_blocks[i].GetSize();
while (after_start + block_size <= after_end) {
FreeBlock(after_start, i);
this->FreeBlock(after_start, i);
after_start += block_size;
}
}
}
std::size_t KPageHeap::CalculateManagementOverheadSize(std::size_t region_size) {
std::size_t overhead_size = 0;
for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) {
const std::size_t cur_block_shift{MemoryBlockPageShifts[i]};
const std::size_t next_block_shift{
(i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0};
size_t KPageHeap::CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts,
size_t num_block_shifts) {
size_t overhead_size = 0;
for (size_t i = 0; i < num_block_shifts; i++) {
const size_t cur_block_shift = block_shifts[i];
const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0;
overhead_size += KPageHeap::Block::CalculateManagementOverheadSize(
region_size, cur_block_shift, next_block_shift);
}

View File

@@ -23,54 +23,73 @@ public:
KPageHeap() = default;
~KPageHeap() = default;
constexpr VAddr GetAddress() const {
return heap_address;
constexpr PAddr GetAddress() const {
return m_heap_address;
}
constexpr std::size_t GetSize() const {
return heap_size;
constexpr size_t GetSize() const {
return m_heap_size;
}
constexpr VAddr GetEndAddress() const {
return GetAddress() + GetSize();
constexpr PAddr GetEndAddress() const {
return this->GetAddress() + this->GetSize();
}
constexpr std::size_t GetPageOffset(VAddr block) const {
return (block - GetAddress()) / PageSize;
constexpr size_t GetPageOffset(PAddr block) const {
return (block - this->GetAddress()) / PageSize;
}
constexpr size_t GetPageOffsetToEnd(PAddr block) const {
return (this->GetEndAddress() - block) / PageSize;
}
void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size);
VAddr AllocateBlock(s32 index, bool random);
void Free(VAddr addr, std::size_t num_pages);
void UpdateUsedSize() {
used_size = heap_size - (GetNumFreePages() * PageSize);
void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address,
size_t management_size) {
return this->Initialize(heap_address, heap_size, management_address, management_size,
MemoryBlockPageShifts.data(), NumMemoryBlockPageShifts);
}
static std::size_t CalculateManagementOverheadSize(std::size_t region_size);
size_t GetFreeSize() const {
return this->GetNumFreePages() * PageSize;
}
static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) {
const auto target_pages{std::max(num_pages, align_pages)};
for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) {
if (target_pages <=
(static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
void SetInitialUsedSize(size_t reserved_size) {
// Check that the reserved size is valid.
const size_t free_size = this->GetNumFreePages() * PageSize;
ASSERT(m_heap_size >= free_size + reserved_size);
// Set the initial used size.
m_initial_used_size = m_heap_size - free_size - reserved_size;
}
PAddr AllocateBlock(s32 index, bool random);
void Free(PAddr addr, size_t num_pages);
static size_t CalculateManagementOverheadSize(size_t region_size) {
return CalculateManagementOverheadSize(region_size, MemoryBlockPageShifts.data(),
NumMemoryBlockPageShifts);
}
static constexpr s32 GetAlignedBlockIndex(size_t num_pages, size_t align_pages) {
const size_t target_pages = std::max(num_pages, align_pages);
for (size_t i = 0; i < NumMemoryBlockPageShifts; i++) {
if (target_pages <= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) {
return static_cast<s32>(i);
}
}
return -1;
}
static constexpr s32 GetBlockIndex(std::size_t num_pages) {
for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) {
if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
static constexpr s32 GetBlockIndex(size_t num_pages) {
for (s32 i = static_cast<s32>(NumMemoryBlockPageShifts) - 1; i >= 0; i--) {
if (num_pages >= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) {
return i;
}
}
return -1;
}
static constexpr std::size_t GetBlockSize(std::size_t index) {
return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index];
static constexpr size_t GetBlockSize(size_t index) {
return size_t(1) << MemoryBlockPageShifts[index];
}
static constexpr std::size_t GetBlockNumPages(std::size_t index) {
static constexpr size_t GetBlockNumPages(size_t index) {
return GetBlockSize(index) / PageSize;
}
@@ -83,114 +102,116 @@ private:
Block() = default;
~Block() = default;
constexpr std::size_t GetShift() const {
return block_shift;
constexpr size_t GetShift() const {
return m_block_shift;
}
constexpr std::size_t GetNextShift() const {
return next_block_shift;
constexpr size_t GetNextShift() const {
return m_next_block_shift;
}
constexpr std::size_t GetSize() const {
return static_cast<std::size_t>(1) << GetShift();
constexpr size_t GetSize() const {
return u64(1) << this->GetShift();
}
constexpr std::size_t GetNumPages() const {
return GetSize() / PageSize;
constexpr size_t GetNumPages() const {
return this->GetSize() / PageSize;
}
constexpr std::size_t GetNumFreeBlocks() const {
return bitmap.GetNumBits();
constexpr size_t GetNumFreeBlocks() const {
return m_bitmap.GetNumBits();
}
constexpr std::size_t GetNumFreePages() const {
return GetNumFreeBlocks() * GetNumPages();
constexpr size_t GetNumFreePages() const {
return this->GetNumFreeBlocks() * this->GetNumPages();
}
u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs,
u64* bit_storage) {
// Set shifts
block_shift = bs;
next_block_shift = nbs;
u64* Initialize(PAddr addr, size_t size, size_t bs, size_t nbs, u64* bit_storage) {
// Set shifts.
m_block_shift = bs;
m_next_block_shift = nbs;
// Align up the address
VAddr end{addr + size};
const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift)
: (1ULL << block_shift)};
addr = Common::AlignDown((addr), align);
end = Common::AlignUp((end), align);
// Align up the address.
PAddr end = addr + size;
const size_t align = (m_next_block_shift != 0) ? (u64(1) << m_next_block_shift)
: (u64(1) << m_block_shift);
addr = Common::AlignDown(addr, align);
end = Common::AlignUp(end, align);
heap_address = addr;
end_offset = (end - addr) / (1ULL << block_shift);
return bitmap.Initialize(bit_storage, end_offset);
m_heap_address = addr;
m_end_offset = (end - addr) / (u64(1) << m_block_shift);
return m_bitmap.Initialize(bit_storage, m_end_offset);
}
VAddr PushBlock(VAddr address) {
// Set the bit for the free block
std::size_t offset{(address - heap_address) >> GetShift()};
bitmap.SetBit(offset);
PAddr PushBlock(PAddr address) {
// Set the bit for the free block.
size_t offset = (address - m_heap_address) >> this->GetShift();
m_bitmap.SetBit(offset);
// If we have a next shift, try to clear the blocks below and return the address
if (GetNextShift()) {
const auto diff{1ULL << (GetNextShift() - GetShift())};
// If we have a next shift, try to clear the blocks below this one and return the new
// address.
if (this->GetNextShift()) {
const size_t diff = u64(1) << (this->GetNextShift() - this->GetShift());
offset = Common::AlignDown(offset, diff);
if (bitmap.ClearRange(offset, diff)) {
return heap_address + (offset << GetShift());
if (m_bitmap.ClearRange(offset, diff)) {
return m_heap_address + (offset << this->GetShift());
}
}
// We couldn't coalesce, or we're already as big as possible
return 0;
// We couldn't coalesce, or we're already as big as possible.
return {};
}
VAddr PopBlock(bool random) {
// Find a free block
const s64 soffset{bitmap.FindFreeBlock(random)};
PAddr PopBlock(bool random) {
// Find a free block.
s64 soffset = m_bitmap.FindFreeBlock(random);
if (soffset < 0) {
return 0;
return {};
}
const auto offset{static_cast<std::size_t>(soffset)};
const size_t offset = static_cast<size_t>(soffset);
// Update our tracking and return it
bitmap.ClearBit(offset);
return heap_address + (offset << GetShift());
// Update our tracking and return it.
m_bitmap.ClearBit(offset);
return m_heap_address + (offset << this->GetShift());
}
static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size,
std::size_t cur_block_shift,
std::size_t next_block_shift) {
const auto cur_block_size{(1ULL << cur_block_shift)};
const auto next_block_size{(1ULL << next_block_shift)};
const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size};
public:
static constexpr size_t CalculateManagementOverheadSize(size_t region_size,
size_t cur_block_shift,
size_t next_block_shift) {
const size_t cur_block_size = (u64(1) << cur_block_shift);
const size_t next_block_size = (u64(1) << next_block_shift);
const size_t align = (next_block_shift != 0) ? next_block_size : cur_block_size;
return KPageBitmap::CalculateManagementOverheadSize(
(align * 2 + Common::AlignUp(region_size, align)) / cur_block_size);
}
private:
KPageBitmap bitmap;
VAddr heap_address{};
uintptr_t end_offset{};
std::size_t block_shift{};
std::size_t next_block_shift{};
KPageBitmap m_bitmap;
PAddr m_heap_address{};
uintptr_t m_end_offset{};
size_t m_block_shift{};
size_t m_next_block_shift{};
};
constexpr std::size_t GetNumFreePages() const {
std::size_t num_free{};
private:
void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address,
size_t management_size, const size_t* block_shifts, size_t num_block_shifts);
size_t GetNumFreePages() const;
for (const auto& block : blocks) {
num_free += block.GetNumFreePages();
}
void FreeBlock(PAddr block, s32 index);
return num_free;
}
void FreeBlock(VAddr block, s32 index);
static constexpr std::size_t NumMemoryBlockPageShifts{7};
static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{
static constexpr size_t NumMemoryBlockPageShifts{7};
static constexpr std::array<size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{
0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E,
};
VAddr heap_address{};
std::size_t heap_size{};
std::size_t used_size{};
std::array<Block, NumMemoryBlockPageShifts> blocks{};
std::vector<u64> metadata;
private:
static size_t CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts,
size_t num_block_shifts);
private:
PAddr m_heap_address{};
size_t m_heap_size{};
size_t m_initial_used_size{};
size_t m_num_blocks{};
std::array<Block, NumMemoryBlockPageShifts> m_blocks{};
std::vector<u64> m_management_data;
};
} // namespace Kernel

View File

@@ -273,11 +273,12 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory
R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free,
KMemoryPermission::None, KMemoryPermission::None,
KMemoryAttribute::None, KMemoryAttribute::None));
KPageLinkedList pg;
R_TRY(system.Kernel().MemoryManager().AllocateAndOpen(
&pg, num_pages,
KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, allocation_option)));
KPageLinkedList page_linked_list;
R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool,
allocation_option));
R_TRY(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup));
R_TRY(Operate(addr, num_pages, pg, OperationType::MapGroup));
block_manager->Update(addr, num_pages, state, perm);
@@ -443,9 +444,10 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
// Allocate pages for the new memory.
KPageLinkedList page_linked_list;
R_TRY(system.Kernel().MemoryManager().Allocate(
page_linked_list, (size - mapped_size) / PageSize, memory_pool, allocation_option));
KPageLinkedList pg;
R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess(
&pg, (size - mapped_size) / PageSize,
KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0));
// Map the memory.
{
@@ -547,7 +549,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
});
// Iterate over the memory.
auto pg_it = page_linked_list.Nodes().begin();
auto pg_it = pg.Nodes().begin();
PAddr pg_phys_addr = pg_it->GetAddress();
size_t pg_pages = pg_it->GetNumPages();
@@ -571,7 +573,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
// Check if we're at the end of the physical block.
if (pg_pages == 0) {
// Ensure there are more pages to map.
ASSERT(pg_it != page_linked_list.Nodes().end());
ASSERT(pg_it != pg.Nodes().end());
// Advance our physical block.
++pg_it;
@@ -841,10 +843,14 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr address, std::size_t size) {
process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
// Update memory blocks.
system.Kernel().MemoryManager().Free(pg, size / PageSize, memory_pool, allocation_option);
block_manager->Update(address, size / PageSize, KMemoryState::Free, KMemoryPermission::None,
KMemoryAttribute::None);
// TODO(bunnei): This is a workaround until the next set of changes, where we add reference
// counting for mapped pages. Until then, we must manually close the reference to the page
// group.
system.Kernel().MemoryManager().Close(pg);
// We succeeded.
remap_guard.Cancel();
@@ -1270,9 +1276,16 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
// Allocate pages for the heap extension.
KPageLinkedList page_linked_list;
R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, allocation_size / PageSize,
memory_pool, allocation_option));
KPageLinkedList pg;
R_TRY(system.Kernel().MemoryManager().AllocateAndOpen(
&pg, allocation_size / PageSize,
KMemoryManager::EncodeOption(memory_pool, allocation_option)));
// Clear all the newly allocated pages.
for (const auto& it : pg.Nodes()) {
std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value,
it.GetSize());
}
// Map the pages.
{
@@ -1291,7 +1304,7 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
// Map the pages.
const auto num_pages = allocation_size / PageSize;
R_TRY(Operate(current_heap_end, num_pages, page_linked_list, OperationType::MapGroup));
R_TRY(Operate(current_heap_end, num_pages, pg, OperationType::MapGroup));
// Clear all the newly allocated pages.
for (std::size_t cur_page = 0; cur_page < num_pages; ++cur_page) {
@@ -1339,8 +1352,9 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages,
R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
} else {
KPageLinkedList page_group;
R_TRY(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool,
allocation_option));
R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess(
&page_group, needed_num_pages,
KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0));
R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
}
@@ -1547,7 +1561,7 @@ ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermiss
return ResultSuccess;
}
constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
switch (state) {
case KMemoryState::Free:
case KMemoryState::Kernel:
@@ -1583,7 +1597,7 @@ constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
}
}
constexpr std::size_t KPageTable::GetRegionSize(KMemoryState state) const {
std::size_t KPageTable::GetRegionSize(KMemoryState state) const {
switch (state) {
case KMemoryState::Free:
case KMemoryState::Kernel:

View File

@@ -102,8 +102,8 @@ private:
OperationType operation);
ResultCode Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm,
OperationType operation, PAddr map_addr = 0);
constexpr VAddr GetRegionAddress(KMemoryState state) const;
constexpr std::size_t GetRegionSize(KMemoryState state) const;
VAddr GetRegionAddress(KMemoryState state) const;
std::size_t GetRegionSize(KMemoryState state) const;
ResultCode CheckMemoryStateContiguous(std::size_t* out_blocks_needed, VAddr addr,
std::size_t size, KMemoryState state_mask,
@@ -254,8 +254,7 @@ public:
return !IsOutsideASLRRegion(address, size);
}
PAddr GetPhysicalAddr(VAddr addr) {
ASSERT(IsLockedByCurrentThread());
PAddr GetPhysicalAddr(VAddr addr) const {
const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits];
ASSERT(backing_addr);
return backing_addr + addr;
@@ -311,6 +310,8 @@ private:
bool is_kernel{};
bool is_aslr_enabled{};
u32 heap_fill_value{};
KMemoryManager::Pool memory_pool{KMemoryManager::Pool::Application};
KMemoryManager::Direction allocation_option{KMemoryManager::Direction::FromFront};

View File

@@ -70,13 +70,12 @@ struct KernelCore::Impl {
// Derive the initial memory layout from the emulated board
Init::InitializeSlabResourceCounts(kernel);
KMemoryLayout memory_layout;
DeriveInitialMemoryLayout(memory_layout);
Init::InitializeSlabHeaps(system, memory_layout);
DeriveInitialMemoryLayout();
Init::InitializeSlabHeaps(system, *memory_layout);
// Initialize kernel memory and resources.
InitializeSystemResourceLimit(kernel, system.CoreTiming(), memory_layout);
InitializeMemoryLayout(memory_layout);
InitializeSystemResourceLimit(kernel, system.CoreTiming());
InitializeMemoryLayout();
InitializePageSlab();
InitializeSchedulers();
InitializeSuspendThreads();
@@ -219,12 +218,11 @@ struct KernelCore::Impl {
// Creates the default system resource limit
void InitializeSystemResourceLimit(KernelCore& kernel,
const Core::Timing::CoreTiming& core_timing,
const KMemoryLayout& memory_layout) {
const Core::Timing::CoreTiming& core_timing) {
system_resource_limit = KResourceLimit::Create(system.Kernel());
system_resource_limit->Initialize(&core_timing);
const auto [total_size, kernel_size] = memory_layout.GetTotalAndKernelMemorySizes();
const auto [total_size, kernel_size] = memory_layout->GetTotalAndKernelMemorySizes();
// If setting the default system values fails, then something seriously wrong has occurred.
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::PhysicalMemory, total_size)
@@ -353,16 +351,18 @@ struct KernelCore::Impl {
return schedulers[thread_id]->GetCurrentThread();
}
void DeriveInitialMemoryLayout(KMemoryLayout& memory_layout) {
void DeriveInitialMemoryLayout() {
memory_layout = std::make_unique<KMemoryLayout>();
// Insert the root region for the virtual memory tree, from which all other regions will
// derive.
memory_layout.GetVirtualMemoryRegionTree().InsertDirectly(
memory_layout->GetVirtualMemoryRegionTree().InsertDirectly(
KernelVirtualAddressSpaceBase,
KernelVirtualAddressSpaceBase + KernelVirtualAddressSpaceSize - 1);
// Insert the root region for the physical memory tree, from which all other regions will
// derive.
memory_layout.GetPhysicalMemoryRegionTree().InsertDirectly(
memory_layout->GetPhysicalMemoryRegionTree().InsertDirectly(
KernelPhysicalAddressSpaceBase,
KernelPhysicalAddressSpaceBase + KernelPhysicalAddressSpaceSize - 1);
@@ -379,7 +379,7 @@ struct KernelCore::Impl {
if (!(kernel_region_start + KernelRegionSize - 1 <= KernelVirtualAddressSpaceLast)) {
kernel_region_size = KernelVirtualAddressSpaceEnd - kernel_region_start;
}
ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
kernel_region_start, kernel_region_size, KMemoryRegionType_Kernel));
// Setup the code region.
@@ -388,11 +388,11 @@ struct KernelCore::Impl {
Common::AlignDown(code_start_virt_addr, CodeRegionAlign);
constexpr VAddr code_region_end = Common::AlignUp(code_end_virt_addr, CodeRegionAlign);
constexpr size_t code_region_size = code_region_end - code_region_start;
ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
code_region_start, code_region_size, KMemoryRegionType_KernelCode));
// Setup board-specific device physical regions.
Init::SetupDevicePhysicalMemoryRegions(memory_layout);
Init::SetupDevicePhysicalMemoryRegions(*memory_layout);
// Determine the amount of space needed for the misc region.
size_t misc_region_needed_size;
@@ -401,7 +401,7 @@ struct KernelCore::Impl {
misc_region_needed_size = Core::Hardware::NUM_CPU_CORES * (3 * (PageSize + PageSize));
// Account for each auto-map device.
for (const auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
for (const auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
if (region.HasTypeAttribute(KMemoryRegionAttr_ShouldKernelMap)) {
// Check that the region is valid.
ASSERT(region.GetEndAddress() != 0);
@@ -426,22 +426,22 @@ struct KernelCore::Impl {
// Setup the misc region.
const VAddr misc_region_start =
memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
misc_region_size, MiscRegionAlign, KMemoryRegionType_Kernel);
ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
misc_region_start, misc_region_size, KMemoryRegionType_KernelMisc));
// Setup the stack region.
constexpr size_t StackRegionSize = 14_MiB;
constexpr size_t StackRegionAlign = KernelAslrAlignment;
const VAddr stack_region_start =
memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
StackRegionSize, StackRegionAlign, KMemoryRegionType_Kernel);
ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
stack_region_start, StackRegionSize, KMemoryRegionType_KernelStack));
// Determine the size of the resource region.
const size_t resource_region_size = memory_layout.GetResourceRegionSizeForInit();
const size_t resource_region_size = memory_layout->GetResourceRegionSizeForInit();
// Determine the size of the slab region.
const size_t slab_region_size =
@@ -458,23 +458,23 @@ struct KernelCore::Impl {
Common::AlignUp(code_end_phys_addr + slab_region_size, SlabRegionAlign) -
Common::AlignDown(code_end_phys_addr, SlabRegionAlign);
const VAddr slab_region_start =
memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
slab_region_needed_size, SlabRegionAlign, KMemoryRegionType_Kernel) +
(code_end_phys_addr % SlabRegionAlign);
ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
slab_region_start, slab_region_size, KMemoryRegionType_KernelSlab));
// Setup the temp region.
constexpr size_t TempRegionSize = 128_MiB;
constexpr size_t TempRegionAlign = KernelAslrAlignment;
const VAddr temp_region_start =
memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
TempRegionSize, TempRegionAlign, KMemoryRegionType_Kernel);
ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize,
KMemoryRegionType_KernelTemp));
ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize,
KMemoryRegionType_KernelTemp));
// Automatically map in devices that have auto-map attributes.
for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
// We only care about kernel regions.
if (!region.IsDerivedFrom(KMemoryRegionType_Kernel)) {
continue;
@@ -501,21 +501,21 @@ struct KernelCore::Impl {
const size_t map_size =
Common::AlignUp(region.GetEndAddress(), PageSize) - map_phys_addr;
const VAddr map_virt_addr =
memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
map_size, PageSize, KMemoryRegionType_KernelMisc, PageSize);
ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
map_virt_addr, map_size, KMemoryRegionType_KernelMiscMappedDevice));
region.SetPairAddress(map_virt_addr + region.GetAddress() - map_phys_addr);
}
Init::SetupDramPhysicalMemoryRegions(memory_layout);
Init::SetupDramPhysicalMemoryRegions(*memory_layout);
// Insert a physical region for the kernel code region.
ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
code_start_phys_addr, code_region_size, KMemoryRegionType_DramKernelCode));
// Insert a physical region for the kernel slab region.
ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
slab_start_phys_addr, slab_region_size, KMemoryRegionType_DramKernelSlab));
// Determine size available for kernel page table heaps, requiring > 8 MB.
@@ -524,12 +524,12 @@ struct KernelCore::Impl {
ASSERT(page_table_heap_size / 4_MiB > 2);
// Insert a physical region for the kernel page table heap region
ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
slab_end_phys_addr, page_table_heap_size, KMemoryRegionType_DramKernelPtHeap));
// All DRAM regions that we haven't tagged by this point will be mapped under the linear
// mapping. Tag them.
for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
if (region.GetType() == KMemoryRegionType_Dram) {
// Check that the region is valid.
ASSERT(region.GetEndAddress() != 0);
@@ -541,7 +541,7 @@ struct KernelCore::Impl {
// Get the linear region extents.
const auto linear_extents =
memory_layout.GetPhysicalMemoryRegionTree().GetDerivedRegionExtents(
memory_layout->GetPhysicalMemoryRegionTree().GetDerivedRegionExtents(
KMemoryRegionAttr_LinearMapped);
ASSERT(linear_extents.GetEndAddress() != 0);
@@ -553,7 +553,7 @@ struct KernelCore::Impl {
Common::AlignUp(linear_extents.GetEndAddress(), LinearRegionAlign) -
aligned_linear_phys_start;
const VAddr linear_region_start =
memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
linear_region_size, LinearRegionAlign, KMemoryRegionType_None, LinearRegionAlign);
const u64 linear_region_phys_to_virt_diff = linear_region_start - aligned_linear_phys_start;
@@ -562,7 +562,7 @@ struct KernelCore::Impl {
{
PAddr cur_phys_addr = 0;
u64 cur_size = 0;
for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
if (!region.HasTypeAttribute(KMemoryRegionAttr_LinearMapped)) {
continue;
}
@@ -581,55 +581,49 @@ struct KernelCore::Impl {
const VAddr region_virt_addr =
region.GetAddress() + linear_region_phys_to_virt_diff;
ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
region_virt_addr, region.GetSize(),
GetTypeForVirtualLinearMapping(region.GetType())));
region.SetPairAddress(region_virt_addr);
KMemoryRegion* virt_region =
memory_layout.GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr);
memory_layout->GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr);
ASSERT(virt_region != nullptr);
virt_region->SetPairAddress(region.GetAddress());
}
}
// Insert regions for the initial page table region.
ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
resource_end_phys_addr, KernelPageTableHeapSize, KMemoryRegionType_DramKernelInitPt));
ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
resource_end_phys_addr + linear_region_phys_to_virt_diff, KernelPageTableHeapSize,
KMemoryRegionType_VirtualDramKernelInitPt));
// All linear-mapped DRAM regions that we haven't tagged by this point will be allocated to
// some pool partition. Tag them.
for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
if (region.GetType() == (KMemoryRegionType_Dram | KMemoryRegionAttr_LinearMapped)) {
region.SetType(KMemoryRegionType_DramPoolPartition);
}
}
// Setup all other memory regions needed to arrange the pool partitions.
Init::SetupPoolPartitionMemoryRegions(memory_layout);
Init::SetupPoolPartitionMemoryRegions(*memory_layout);
// Cache all linear regions in their own trees for faster access, later.
memory_layout.InitializeLinearMemoryRegionTrees(aligned_linear_phys_start,
linear_region_start);
memory_layout->InitializeLinearMemoryRegionTrees(aligned_linear_phys_start,
linear_region_start);
}
void InitializeMemoryLayout(const KMemoryLayout& memory_layout) {
const auto system_pool = memory_layout.GetKernelSystemPoolRegionPhysicalExtents();
const auto applet_pool = memory_layout.GetKernelAppletPoolRegionPhysicalExtents();
const auto application_pool = memory_layout.GetKernelApplicationPoolRegionPhysicalExtents();
void InitializeMemoryLayout() {
const auto system_pool = memory_layout->GetKernelSystemPoolRegionPhysicalExtents();
// Initialize memory managers
// Initialize the memory manager.
memory_manager = std::make_unique<KMemoryManager>(system);
memory_manager->InitializeManager(KMemoryManager::Pool::Application,
application_pool.GetAddress(),
application_pool.GetEndAddress());
memory_manager->InitializeManager(KMemoryManager::Pool::Applet, applet_pool.GetAddress(),
applet_pool.GetEndAddress());
memory_manager->InitializeManager(KMemoryManager::Pool::System, system_pool.GetAddress(),
system_pool.GetEndAddress());
const auto& management_region = memory_layout->GetPoolManagementRegion();
ASSERT(management_region.GetEndAddress() != 0);
memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
// Setup memory regions for emulated processes
// TODO(bunnei): These should not be hardcoded regions initialized within the kernel
@@ -770,6 +764,9 @@ struct KernelCore::Impl {
Kernel::KSharedMemory* irs_shared_mem{};
Kernel::KSharedMemory* time_shared_mem{};
// Memory layout
std::unique_ptr<KMemoryLayout> memory_layout;
// Threads used for services
std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
Common::ThreadWorker service_threads_manager;
@@ -1135,6 +1132,10 @@ const KWorkerTaskManager& KernelCore::WorkerTaskManager() const {
return impl->worker_task_manager;
}
const KMemoryLayout& KernelCore::MemoryLayout() const {
return *impl->memory_layout;
}
bool KernelCore::IsPhantomModeForSingleCore() const {
return impl->IsPhantomModeForSingleCore();
}

View File

@@ -41,6 +41,7 @@ class KClientSession;
class KEvent;
class KHandleTable;
class KLinkedListNode;
class KMemoryLayout;
class KMemoryManager;
class KPort;
class KProcess;
@@ -350,6 +351,9 @@ public:
/// Gets the current worker task manager, used for dispatching KThread/KProcess tasks.
const KWorkerTaskManager& WorkerTaskManager() const;
/// Gets the memory layout.
const KMemoryLayout& MemoryLayout() const;
private:
friend class KProcess;
friend class KThread;

View File

@@ -2,6 +2,30 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// This files contains code from Ryujinx
// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
// The sections using code from Ryujinx are marked with a link to the original version
// MIT License
//
// Copyright (c) Ryujinx Team and Contributors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
// associated documentation files (the "Software"), to deal in the Software without restriction,
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
@@ -13,59 +37,87 @@ namespace {
// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
u64 ttbl) {
IR::U32 r{ir.Imm32(0)};
const IR::U32 not_a{ir.BitwiseNot(a)};
const IR::U32 not_b{ir.BitwiseNot(b)};
const IR::U32 not_c{ir.BitwiseNot(c)};
if (ttbl & 0x01) {
// r |= ~a & ~b & ~c;
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
std::optional<IR::U32> value;
// Encode into gray code.
u32 map = ttbl & 1;
map |= ((ttbl >> 1) & 1) << 4;
map |= ((ttbl >> 2) & 1) << 1;
map |= ((ttbl >> 3) & 1) << 5;
map |= ((ttbl >> 4) & 1) << 3;
map |= ((ttbl >> 5) & 1) << 7;
map |= ((ttbl >> 6) & 1) << 2;
map |= ((ttbl >> 7) & 1) << 6;
u32 visited = 0;
for (u32 index = 0; index < 8 && visited != 0xff; index++) {
if ((map & (1 << index)) == 0) {
continue;
}
const auto RotateLeft4 = [](u32 value, u32 shift) {
return ((value << shift) | (value >> (4 - shift))) & 0xf;
};
u32 mask = 0;
for (u32 size = 4; size != 0; size >>= 1) {
mask = RotateLeft4((1 << size) - 1, index & 3) << (index & 4);
if ((map & mask) == mask) {
break;
}
}
// The mask should wrap, if we are on the high row, shift to low etc.
const u32 mask2 = (index & 4) != 0 ? mask >> 4 : mask << 4;
if ((map & mask2) == mask2) {
mask |= mask2;
}
if ((mask & visited) == mask) {
continue;
}
const bool not_a = (mask & 0x33) != 0;
const bool not_b = (mask & 0x99) != 0;
const bool not_c = (mask & 0x0f) != 0;
const bool a_changes = (mask & 0xcc) != 0 && not_a;
const bool b_changes = (mask & 0x66) != 0 && not_b;
const bool c_changes = (mask & 0xf0) != 0 && not_c;
std::optional<IR::U32> local_value;
const auto And = [&](const IR::U32& source, bool inverted) {
IR::U32 result = inverted ? ir.BitwiseNot(source) : source;
if (local_value) {
local_value = ir.BitwiseAnd(*local_value, result);
} else {
local_value = result;
}
};
if (!a_changes) {
And(a, not_a);
}
if (!b_changes) {
And(b, not_b);
}
if (!c_changes) {
And(c, not_c);
}
if (value) {
value = ir.BitwiseOr(*value, *local_value);
} else {
value = local_value;
}
visited |= mask;
}
if (ttbl & 0x02) {
// r |= ~a & ~b & c;
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x04) {
// r |= ~a & b & ~c;
const auto lhs{ir.BitwiseAnd(not_a, b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x08) {
// r |= ~a & b & c;
const auto lhs{ir.BitwiseAnd(not_a, b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x10) {
// r |= a & ~b & ~c;
const auto lhs{ir.BitwiseAnd(a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x20) {
// r |= a & ~b & c;
const auto lhs{ir.BitwiseAnd(a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x40) {
// r |= a & b & ~c;
const auto lhs{ir.BitwiseAnd(a, b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x80) {
// r |= a & b & c;
const auto lhs{ir.BitwiseAnd(a, b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
return r;
return *value;
}
IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {

View File

@@ -243,10 +243,6 @@ GraphicsPipeline::GraphicsPipeline(
case Settings::ShaderBackend::GLASM:
if (!sources[stage].empty()) {
assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
if (in_parallel) {
// Make sure program is built before continuing when building in parallel
glGetString(GL_PROGRAM_ERROR_STRING_NV);
}
}
break;
case Settings::ShaderBackend::SPIRV:
@@ -256,20 +252,18 @@ GraphicsPipeline::GraphicsPipeline(
break;
}
}
if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
// Make sure programs have built if we are building shaders in parallel
for (OGLProgram& program : source_programs) {
if (program.handle != 0) {
GLint status{};
glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
}
}
if (in_parallel) {
std::lock_guard lock{built_mutex};
built_fence.Create();
// Flush this context to ensure compilation commands and fence are in the GPU pipe.
glFlush();
built_condvar.notify_one();
} else {
is_built = true;
}
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
is_built = true;
built_condvar.notify_one();
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
@@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
if (!is_built.load(std::memory_order::relaxed)) {
if (!IsBuilt()) {
WaitForBuild();
}
const bool use_assembly{assembly_programs[0].handle != 0};
@@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
}
void GraphicsPipeline::WaitForBuild() {
std::unique_lock lock{built_mutex};
built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
if (built_fence.handle == 0) {
std::unique_lock lock{built_mutex};
built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
}
ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
is_built = true;
}
bool GraphicsPipeline::IsBuilt() noexcept {
if (is_built) {
return true;
}
if (built_fence.handle == 0) {
return false;
}
// Timeout of zero means this is non-blocking
const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0);
ASSERT(sync_status != GL_WAIT_FAILED);
is_built = sync_status != GL_TIMEOUT_EXPIRED;
return is_built;
}
} // namespace OpenGL

View File

@@ -100,9 +100,7 @@ public:
return writes_global_memory;
}
[[nodiscard]] bool IsBuilt() const noexcept {
return is_built.load(std::memory_order::relaxed);
}
[[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
static auto MakeConfigureSpecFunc() {
@@ -154,7 +152,8 @@ private:
std::mutex built_mutex;
std::condition_variable built_condvar;
std::atomic_bool is_built{false};
OGLSync built_fence{};
bool is_built{false};
};
} // namespace OpenGL

View File

@@ -609,6 +609,7 @@ void Config::ReadCpuValues() {
ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
if (global) {
ReadBasicSetting(Settings::values.cpu_debug_mode);
@@ -621,6 +622,8 @@ void Config::ReadCpuValues() {
ReadBasicSetting(Settings::values.cpuopt_misc_ir);
ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
ReadBasicSetting(Settings::values.cpuopt_fastmem);
ReadBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
ReadBasicSetting(Settings::values.cpuopt_recompile_exclusives);
}
qt_config->endGroup();
@@ -1139,6 +1142,7 @@ void Config::SaveCpuValues() {
WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
if (global) {
WriteBasicSetting(Settings::values.cpu_debug_mode);

View File

@@ -36,6 +36,7 @@ void ConfigureCpu::SetConfiguration() {
ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock);
ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
ui->cpuopt_unsafe_ignore_global_monitor->setEnabled(runtime_lock);
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
ui->cpuopt_unsafe_reduce_fp_error->setChecked(
@@ -46,6 +47,8 @@ void ConfigureCpu::SetConfiguration() {
Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
ui->cpuopt_unsafe_fastmem_check->setChecked(
Settings::values.cpuopt_unsafe_fastmem_check.GetValue());
ui->cpuopt_unsafe_ignore_global_monitor->setChecked(
Settings::values.cpuopt_unsafe_ignore_global_monitor.GetValue());
if (Settings::IsConfiguringGlobal()) {
ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue()));
@@ -82,6 +85,9 @@ void ConfigureCpu::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check,
ui->cpuopt_unsafe_fastmem_check,
cpuopt_unsafe_fastmem_check);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_global_monitor,
ui->cpuopt_unsafe_ignore_global_monitor,
cpuopt_unsafe_ignore_global_monitor);
}
void ConfigureCpu::changeEvent(QEvent* event) {
@@ -120,4 +126,7 @@ void ConfigureCpu::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check,
Settings::values.cpuopt_unsafe_fastmem_check,
cpuopt_unsafe_fastmem_check);
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_global_monitor,
Settings::values.cpuopt_unsafe_ignore_global_monitor,
cpuopt_unsafe_ignore_global_monitor);
}

View File

@@ -45,6 +45,7 @@ private:
ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr;
ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
ConfigurationShared::CheckState cpuopt_unsafe_ignore_global_monitor;
const Core::System& system;
};

View File

@@ -150,6 +150,18 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_unsafe_ignore_global_monitor">
<property name="toolTip">
<string>
&lt;div&gt;This option improves speed by relying only on the semantics of cmpxchg to ensure safety of exclusive access instructions. Please note this may result in deadlocks and other race conditions.&lt;/div&gt;
</string>
</property>
<property name="text">
<string>Ignore global monitor</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>

View File

@@ -44,6 +44,12 @@ void ConfigureCpuDebug::SetConfiguration() {
Settings::values.cpuopt_reduce_misalign_checks.GetValue());
ui->cpuopt_fastmem->setEnabled(runtime_lock);
ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue());
ui->cpuopt_fastmem_exclusives->setEnabled(runtime_lock);
ui->cpuopt_fastmem_exclusives->setChecked(
Settings::values.cpuopt_fastmem_exclusives.GetValue());
ui->cpuopt_recompile_exclusives->setEnabled(runtime_lock);
ui->cpuopt_recompile_exclusives->setChecked(
Settings::values.cpuopt_recompile_exclusives.GetValue());
}
void ConfigureCpuDebug::ApplyConfiguration() {
@@ -56,6 +62,8 @@ void ConfigureCpuDebug::ApplyConfiguration() {
Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked();
Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked();
Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked();
Settings::values.cpuopt_fastmem_exclusives = ui->cpuopt_fastmem_exclusives->isChecked();
Settings::values.cpuopt_recompile_exclusives = ui->cpuopt_recompile_exclusives->isChecked();
}
void ConfigureCpuDebug::changeEvent(QEvent* event) {

View File

@@ -144,7 +144,34 @@
</string>
</property>
<property name="text">
<string>Enable Host MMU Emulation</string>
<string>Enable Host MMU Emulation (general memory instructions)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_fastmem_exclusives">
<property name="toolTip">
<string>
&lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
&lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it causes guest exclusive memory reads/writes to be done directly into memory and make use of Host's MMU.&lt;/div&gt;
&lt;div style=&quot;white-space: nowrap&quot;&gt;Disabling this forces all exclusive memory accesses to use Software MMU Emulation.&lt;/div&gt;
</string>
</property>
<property name="text">
<string>Enable Host MMU Emulation (exclusive memory instructions)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_recompile_exclusives">
<property name="toolTip">
<string>
&lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
&lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it reduces the overhead of fastmem failure of exclusive memory accesses.&lt;/div&gt;
</string>
</property>
<property name="text">
<string>Enable recompilation of exclusive memory instructions</string>
</property>
</widget>
</item>

View File

@@ -280,11 +280,14 @@ void Config::ReadValues() {
ReadSetting("Cpu", Settings::values.cpuopt_misc_ir);
ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks);
ReadSetting("Cpu", Settings::values.cpuopt_fastmem);
ReadSetting("Cpu", Settings::values.cpuopt_fastmem_exclusives);
ReadSetting("Cpu", Settings::values.cpuopt_recompile_exclusives);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_global_monitor);
// Renderer
ReadSetting("Renderer", Settings::values.renderer_backend);

View File

@@ -174,6 +174,14 @@ cpuopt_reduce_misalign_checks =
# 0: Disabled, 1 (default): Enabled
cpuopt_fastmem =
# Enable Host MMU Emulation for exclusive memory instructions (faster guest memory access)
# 0: Disabled, 1 (default): Enabled
cpuopt_fastmem_exclusives =
# Enable fallback on failure of fastmem of exclusive memory instructions (faster guest memory access)
# 0: Disabled, 1 (default): Enabled
cpuopt_recompile_exclusives =
# Enable unfuse FMA (improve performance on CPUs without FMA)
# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
# 0: Disabled, 1 (default): Enabled
@@ -199,6 +207,11 @@ cpuopt_unsafe_inaccurate_nan =
# 0: Disabled, 1 (default): Enabled
cpuopt_unsafe_fastmem_check =
# Enable faster exclusive instructions
# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
# 0: Disabled, 1 (default): Enabled
cpuopt_unsafe_ignore_global_monitor =
[Renderer]
# Which backend API to use.
# 0 (default): OpenGL, 1: Vulkan