Compare commits

..

3 Commits

Author SHA1 Message Date
Lioncash
19632d2421 kernel/svc: Make svcCreateThread/svcStartThread/svcSleepThread/svcExitThread calls show up in the debug log
These are actually quite important indicators of thread lifetimes, so
they should be going into the debug log, rather than being treated as
misc info and delegated to the trace log.
2019-04-29 01:38:27 -04:00
Lioncash
d672c6e759 kernel/svc: Reorganize svcSetThreadCoreMask()
Makes the code much nicer to follow in terms of behavior and control
flow. It also fixes a few bugs in the implementation.

Notably, the thread's owner process shouldn't be accessed in order to
retrieve the core mask or ideal core. This should be done through the
current running process. The only reason this bug wasn't encountered yet
is because we currently only support running one process, and thus every
owner process will be the current process.

We also weren't checking against the process' CPU core mask to see if an
allowed core is specified or not.

With this out of the way, it'll be less noisy to implement proper
handling of the affinity flags internally within the kernel thread
instances.
2019-04-29 01:38:27 -04:00
Lioncash
69a2003a8e kernel/thread: Update thread processor ID flags
Adds the missing flags to the enum and documents them.
2019-04-29 01:37:51 -04:00
7 changed files with 79 additions and 147 deletions

View File

@@ -1208,7 +1208,7 @@ static void ExitProcess(Core::System& system) {
/// Creates a new thread
static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
VAddr stack_top, u32 priority, s32 processor_id) {
LOG_TRACE(Kernel_SVC,
LOG_DEBUG(Kernel_SVC,
"called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
"threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
entry_point, arg, stack_top, priority, processor_id, *out_handle);
@@ -1266,7 +1266,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
/// Starts the thread for the provided handle
static ResultCode StartThread(Core::System& system, Handle thread_handle) {
LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
@@ -1289,7 +1289,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
/// Called when a thread exits
static void ExitThread(Core::System& system) {
LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
current_thread->Stop();
@@ -1299,7 +1299,7 @@ static void ExitThread(Core::System& system) {
/// Sleep the current thread
static void SleepThread(Core::System& system, s64 nanoseconds) {
LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds);
enum class SleepType : s64 {
YieldWithoutLoadBalancing = 0,
@@ -1744,11 +1744,51 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
}
static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
u64 mask) {
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
mask, core);
u64 affinity_mask) {
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}",
thread_handle, core, affinity_mask);
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
const auto* const current_process = system.Kernel().CurrentProcess();
if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
const u8 ideal_cpu_core = current_process->GetIdealCore();
ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
// Set the target CPU to the ideal core specified by the process.
core = ideal_cpu_core;
affinity_mask = 1ULL << core;
} else {
const u64 core_mask = current_process->GetCoreMask();
if ((core_mask | affinity_mask) != core_mask) {
LOG_ERROR(
Kernel_SVC,
"Invalid processor ID specified (core_mask=0x{:08X}, affinity_mask=0x{:016X})",
core_mask, affinity_mask);
return ERR_INVALID_PROCESSOR_ID;
}
if (affinity_mask == 0) {
LOG_ERROR(Kernel_SVC, "Specfified affinity mask is zero.");
return ERR_INVALID_COMBINATION;
}
if (core < Core::NUM_CPU_CORES) {
if ((affinity_mask & (1ULL << core)) == 0) {
LOG_ERROR(Kernel_SVC,
"Core is not enabled for the current mask, core={}, mask={:016X}", core,
affinity_mask);
return ERR_INVALID_COMBINATION;
}
} else if (core != static_cast<u32>(THREADPROCESSORID_DONT_CARE) &&
core != static_cast<u32>(THREADPROCESSORID_DONT_UPDATE)) {
LOG_ERROR(Kernel_SVC, "Invalid processor ID specified (core={}).", core);
return ERR_INVALID_PROCESSOR_ID;
}
}
const auto& handle_table = current_process->GetHandleTable();
const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
if (!thread) {
LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1756,40 +1796,7 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
return ERR_INVALID_HANDLE;
}
if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
const u8 ideal_cpu_core = thread->GetOwnerProcess()->GetIdealCore();
ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
// Set the target CPU to the ideal core specified by the process.
core = ideal_cpu_core;
mask = 1ULL << core;
}
if (mask == 0) {
LOG_ERROR(Kernel_SVC, "Mask is 0");
return ERR_INVALID_COMBINATION;
}
/// This value is used to only change the affinity mask without changing the current ideal core.
static constexpr u32 OnlyChangeMask = static_cast<u32>(-3);
if (core == OnlyChangeMask) {
core = thread->GetIdealCore();
} else if (core >= Core::NUM_CPU_CORES && core != static_cast<u32>(-1)) {
LOG_ERROR(Kernel_SVC, "Invalid core specified, got {}", core);
return ERR_INVALID_PROCESSOR_ID;
}
// Error out if the input core isn't enabled in the input mask.
if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
LOG_ERROR(Kernel_SVC, "Core is not enabled for the current mask, core={}, mask={:016X}",
core, mask);
return ERR_INVALID_COMBINATION;
}
thread->ChangeCore(core, mask);
thread->ChangeCore(core, affinity_mask);
return RESULT_SUCCESS;
}

View File

@@ -30,12 +30,21 @@ enum ThreadPriority : u32 {
};
enum ThreadProcessorId : s32 {
THREADPROCESSORID_IDEAL = -2, ///< Run thread on the ideal core specified by the process.
THREADPROCESSORID_0 = 0, ///< Run thread on core 0
THREADPROCESSORID_1 = 1, ///< Run thread on core 1
THREADPROCESSORID_2 = 2, ///< Run thread on core 2
THREADPROCESSORID_3 = 3, ///< Run thread on core 3
THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
/// Indicates that no particular processor core is preferred.
THREADPROCESSORID_DONT_CARE = -1,
/// Run thread on the ideal core specified by the process.
THREADPROCESSORID_IDEAL = -2,
/// Indicates that the preferred processor ID shouldn't be updated in
/// a core mask setting operation.
THREADPROCESSORID_DONT_UPDATE = -3,
THREADPROCESSORID_0 = 0, ///< Run thread on core 0
THREADPROCESSORID_1 = 1, ///< Run thread on core 1
THREADPROCESSORID_2 = 2, ///< Run thread on core 2
THREADPROCESSORID_3 = 3, ///< Run thread on core 3
THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
/// Allowed CPU mask
THREADPROCESSORID_DEFAULT_MASK = (1 << THREADPROCESSORID_0) | (1 << THREADPROCESSORID_1) |

View File

@@ -57,8 +57,8 @@ bool DmaPusher::Step() {
// Push buffer non-empty, read a word
command_headers.resize(command_list_header.size);
gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
for (const CommandHeader& command_header : command_headers) {

View File

@@ -418,7 +418,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -439,7 +439,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
Texture::TSCEntry tsc_entry;
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry;
}

View File

@@ -199,15 +199,7 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
return {};
}
bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) {
const GPUVAddr end = start + size;
const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
return range == size;
}
void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {
void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
std::size_t remaining_size{size};
std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask};
@@ -234,30 +226,7 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
}
}
void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
const std::size_t size) const {
std::size_t remaining_size{size};
std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
const u8* page_pointer = page_table.pointers[page_index];
if (page_pointer) {
const u8* src_ptr{page_pointer + page_offset};
std::memcpy(dest_buffer, src_ptr, copy_amount);
} else {
std::memset(dest_buffer, 0, copy_amount);
}
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) {
void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
std::size_t remaining_size{size};
std::size_t page_index{dest_addr >> page_bits};
std::size_t page_offset{dest_addr & page_mask};
@@ -284,28 +253,7 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
}
}
void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
const std::size_t size) {
std::size_t remaining_size{size};
std::size_t page_index{dest_addr >> page_bits};
std::size_t page_offset{dest_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
u8* page_pointer = page_table.pointers[page_index];
if (page_pointer) {
u8* dest_ptr{page_pointer + page_offset};
std::memcpy(dest_ptr, src_buffer, copy_amount);
}
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
std::size_t remaining_size{size};
std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask};
@@ -333,12 +281,6 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::
}
}
void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
std::vector<u8> tmp_buffer(size);
ReadBlockUnsafe(src_addr, tmp_buffer.data(), size);
WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
}
void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
VAddr backing_addr) {
LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,

View File

@@ -65,32 +65,9 @@ public:
u8* GetPointer(GPUVAddr addr);
const u8* GetPointer(GPUVAddr addr) const;
// Returns true if the block is continous in host memory, false otherwise
bool IsBlockContinous(const GPUVAddr start, const std::size_t size);
/**
* ReadBlock and WriteBlock are full read and write operations over virtual
* GPU Memory. It's important to use these when GPU memory may not be continous
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
* Flushes and Invalidations, respectively to each operation.
*/
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
/**
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
* WriteBlock respectively. In this versions, no flushing or invalidation is actually
* done and their performance is similar to a memcpy. This functions can be used
* on either of this 2 scenarios instead of their safe counterpart:
* - Memory which is sure to never be represented in the Host GPU.
* - Memory Managed by a Cache Manager. Example: Texture Flushing should use
* WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
* being flushed.
*/
void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
private:
using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;

View File

@@ -38,15 +38,13 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
}
/// Gets the shader program code from memory for the specified address
ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
const u8* host_ptr) {
ProgramCode GetShaderCode(const u8* host_ptr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
ASSERT_OR_EXECUTE(host_ptr != nullptr, {
std::fill(program_code.begin(), program_code.end(), 0);
return program_code;
});
memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
program_code.size() * sizeof(u64));
std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
return program_code;
}
@@ -499,12 +497,11 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
if (!shader) {
// No shader found - create a new one
ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
ProgramCode program_code{GetShaderCode(host_ptr)};
ProgramCode program_code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)};
program_code_b = GetShaderCode(memory_manager, program_addr_b,
memory_manager.GetPointer(program_addr_b));
program_code_b = GetShaderCode(
memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
}
const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};