Compare commits

..

19 Commits

Author SHA1 Message Date
ameerj
d956fb3c7c emit_glsl_warp: Fix shuffle ops for 64-thread warp sizes 2021-08-31 16:11:25 -04:00
ameerj
5b45dfe971 emit_glsl_warp: Fix ballot related ops for 64-thread warp sizes 2021-08-31 16:11:25 -04:00
ameerj
a5d9dcf3d9 emit_spirv_warp: Fix shuffle ops for 64-thread warp sizes 2021-08-31 13:40:39 -04:00
ameerj
95213270ef emit_spirv_warp: Fix ballot related ops for 64-thread warp sizes 2021-08-31 13:40:12 -04:00
bunnei
956171f024 Merge pull request #6897 from FernandoS27/pineapple-does-not-belong-in-pizza
Project <tentative title>: Rework Garbage Collection.
2021-08-31 09:11:21 -07:00
bunnei
122eb3cbd0 Merge pull request #6928 from ameerj/spirv-get-frontface
emit_spirv_context_get_set: Fix Get FrontFace return value
2021-08-30 18:16:31 -07:00
bunnei
ec19d9594f Merge pull request #6879 from ameerj/decoder-assert
vk_blit_screen: Fix non-accelerated texture size calculation
2021-08-30 15:24:04 -07:00
bunnei
5f19b66189 Merge pull request #6905 from Morph1984/nifm-misc
nifm/network_interface: Cleanup and populate fields in GetCurrentNetworkProfile
2021-08-29 00:04:58 -07:00
bunnei
4e88989435 Merge pull request #6921 from ameerj/vp9-unused
vp9_types: Remove unusued VP9 info struct members
2021-08-28 22:20:09 -07:00
Morph
7d2265b6a8 Merge pull request #6927 from german77/ngct
ngct: Stub NGCT:U service
2021-08-28 23:03:29 -04:00
german77
f134a5e56c ngct: Stub NGCT:U service 2021-08-27 14:15:34 -05:00
Morph
c1e2063c0d service: nifm: Populate fields in GetCurrentNetworkProfile
Populates the current_address, subnet_mask, and gateway fields from the selected network interface.
2021-08-27 02:10:59 -04:00
Morph
878d0225c5 service: nifm: Cleanup GetCurrentIpConfigInfo 2021-08-27 02:10:58 -04:00
Morph
871e1c6315 network_interface: Cleanup code 2021-08-27 02:10:58 -04:00
Morph
a32a7dacf4 network_interface: Replace default return value with std::nullopt 2021-08-27 02:10:58 -04:00
ameerj
6e407c02d8 emit_spirv_context_get_set: Fix Get FrontFace return value
The IR expects GetAttribute to return an F32 value. This case was returning a U32 instead.
2021-08-26 21:37:34 -04:00
ameerj
eb2624ed65 vp9_types: Minor refactor of VP9 info structs. 2021-08-25 21:42:43 -04:00
ameerj
3de38c9a70 vp9_types: Remove unused Vp9PictureInfo members 2021-08-25 21:29:22 -04:00
ameerj
537c6ac8fe vk_blit_screen: Fix non-accelerated texture size calculation
Addresses the potential OOB access in UnswizzleTexture.
2021-08-16 14:28:10 -04:00
15 changed files with 381 additions and 275 deletions

View File

@@ -111,6 +111,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
SUB(Service, NCM) \
SUB(Service, NFC) \
SUB(Service, NFP) \
SUB(Service, NGCT) \
SUB(Service, NIFM) \
SUB(Service, NIM) \
SUB(Service, NPNS) \

View File

@@ -81,6 +81,7 @@ enum class Class : u8 {
Service_NCM, ///< The NCM service
Service_NFC, ///< The NFC (Near-field communication) service
Service_NFP, ///< The NFP service
Service_NGCT, ///< The NGCT (No Good Content for Terra) service
Service_NIFM, ///< The NIFM (Network interface) service
Service_NIM, ///< The NIM service
Service_NPNS, ///< The NPNS service

View File

@@ -452,6 +452,8 @@ add_library(core STATIC
hle/service/nfp/nfp.h
hle/service/nfp/nfp_user.cpp
hle/service/nfp/nfp_user.h
hle/service/ngct/ngct.cpp
hle/service/ngct/ngct.h
hle/service/nifm/nifm.cpp
hle/service/nifm/nifm.h
hle/service/nim/nim.cpp

View File

@@ -0,0 +1,46 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included
#include "common/string_util.h"
#include "core/core.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/service/ngct/ngct.h"
#include "core/hle/service/service.h"
namespace Service::NGCT {
class IService final : public ServiceFramework<IService> {
public:
explicit IService(Core::System& system_) : ServiceFramework{system_, "ngct:u"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "Match"},
{1, &IService::Filter, "Filter"},
};
// clang-format on
RegisterHandlers(functions);
}
private:
void Filter(Kernel::HLERequestContext& ctx) {
const auto buffer = ctx.ReadBuffer();
const auto text = Common::StringFromFixedZeroTerminatedBuffer(
reinterpret_cast<const char*>(buffer.data()), buffer.size());
LOG_WARNING(Service_NGCT, "(STUBBED) called, text={}", text);
// Return the same string since we don't censor anything
ctx.WriteBuffer(buffer);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
}
};
void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
std::make_shared<IService>(system)->InstallAsService(system.ServiceManager());
}
} // namespace Service::NGCT

View File

@@ -0,0 +1,20 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included
#pragma once
namespace Core {
class System;
}
namespace Service::SM {
class ServiceManager;
}
namespace Service::NGCT {
/// Registers all NGCT services with the specified service manager.
void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system);
} // namespace Service::NGCT

View File

@@ -277,37 +277,45 @@ private:
void GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_NIFM, "(STUBBED) called");
const SfNetworkProfileData network_profile_data{
.ip_setting_data{
.ip_address_setting{
.is_automatic{true},
.current_address{192, 168, 1, 100},
.subnet_mask{255, 255, 255, 0},
.gateway{192, 168, 1, 1},
const auto net_iface = Network::GetSelectedNetworkInterface();
const SfNetworkProfileData network_profile_data = [&net_iface] {
if (!net_iface) {
return SfNetworkProfileData{};
}
return SfNetworkProfileData{
.ip_setting_data{
.ip_address_setting{
.is_automatic{true},
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
.gateway{Network::TranslateIPv4(net_iface->gateway)},
},
.dns_setting{
.is_automatic{true},
.primary_dns{1, 1, 1, 1},
.secondary_dns{1, 0, 0, 1},
},
.proxy_setting{
.enabled{false},
.port{},
.proxy_server{},
.automatic_auth_enabled{},
.user{},
.password{},
},
.mtu{1500},
},
.dns_setting{
.is_automatic{true},
.primary_dns{1, 1, 1, 1},
.secondary_dns{1, 0, 0, 1},
.uuid{0xdeadbeef, 0xdeadbeef},
.network_name{"yuzu Network"},
.wireless_setting_data{
.ssid_length{12},
.ssid{"yuzu Network"},
.passphrase{"yuzupassword"},
},
.proxy_setting{
.enabled{false},
.port{},
.proxy_server{},
.automatic_auth_enabled{},
.user{},
.password{},
},
.mtu{1500},
},
.uuid{0xdeadbeef, 0xdeadbeef},
.network_name{"yuzu Network"},
.wireless_setting_data{
.ssid_length{12},
.ssid{"yuzu Network"},
.passphrase{"yuzupassword"},
},
};
};
}();
ctx.WriteBuffer(network_profile_data);
@@ -352,38 +360,33 @@ private:
LOG_WARNING(Service_NIFM, "(STUBBED) called");
struct IpConfigInfo {
IpAddressSetting ip_address_setting;
DnsSetting dns_setting;
IpAddressSetting ip_address_setting{};
DnsSetting dns_setting{};
};
static_assert(sizeof(IpConfigInfo) == sizeof(IpAddressSetting) + sizeof(DnsSetting),
"IpConfigInfo has incorrect size.");
IpConfigInfo ip_config_info{
.ip_address_setting{
.is_automatic{true},
.current_address{0, 0, 0, 0},
.subnet_mask{255, 255, 255, 0},
.gateway{192, 168, 1, 1},
},
.dns_setting{
.is_automatic{true},
.primary_dns{1, 1, 1, 1},
.secondary_dns{1, 0, 0, 1},
},
};
const auto net_iface = Network::GetSelectedNetworkInterface();
const auto iface = Network::GetSelectedNetworkInterface();
if (iface) {
ip_config_info.ip_address_setting =
IpAddressSetting{.is_automatic{true},
.current_address{Network::TranslateIPv4(iface->ip_address)},
.subnet_mask{Network::TranslateIPv4(iface->subnet_mask)},
.gateway{Network::TranslateIPv4(iface->gateway)}};
const IpConfigInfo ip_config_info = [&net_iface] {
if (!net_iface) {
return IpConfigInfo{};
}
} else {
LOG_ERROR(Service_NIFM,
"Couldn't get host network configuration info, using default values");
}
return IpConfigInfo{
.ip_address_setting{
.is_automatic{true},
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
.gateway{Network::TranslateIPv4(net_iface->gateway)},
},
.dns_setting{
.is_automatic{true},
.primary_dns{1, 1, 1, 1},
.secondary_dns{1, 0, 0, 1},
},
};
}();
IPC::ResponseBuilder rb{ctx, 2 + (sizeof(IpConfigInfo) + 3) / sizeof(u32)};
rb.Push(ResultSuccess);

View File

@@ -46,6 +46,7 @@
#include "core/hle/service/ncm/ncm.h"
#include "core/hle/service/nfc/nfc.h"
#include "core/hle/service/nfp/nfp.h"
#include "core/hle/service/ngct/ngct.h"
#include "core/hle/service/nifm/nifm.h"
#include "core/hle/service/nim/nim.h"
#include "core/hle/service/npns/npns.h"
@@ -271,6 +272,7 @@ Services::Services(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system
NCM::InstallInterfaces(*sm, system);
NFC::InstallInterfaces(*sm, system);
NFP::InstallInterfaces(*sm, system);
NGCT::InstallInterfaces(*sm, system);
NIFM::InstallInterfaces(*sm, system);
NIM::InstallInterfaces(*sm, system);
NPNS::InstallInterfaces(*sm, system);

View File

@@ -37,73 +37,73 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
AF_INET, GAA_FLAG_SKIP_MULTICAST | GAA_FLAG_SKIP_DNS_SERVER | GAA_FLAG_INCLUDE_GATEWAYS,
nullptr, adapter_addresses.data(), &buf_size);
if (ret == ERROR_BUFFER_OVERFLOW) {
adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1);
} else {
if (ret != ERROR_BUFFER_OVERFLOW) {
break;
}
adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1);
}
if (ret == NO_ERROR) {
std::vector<NetworkInterface> result;
for (auto current_address = adapter_addresses.data(); current_address != nullptr;
current_address = current_address->Next) {
if (current_address->FirstUnicastAddress == nullptr ||
current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) {
continue;
}
if (current_address->OperStatus != IfOperStatusUp) {
continue;
}
const auto ip_addr = Common::BitCast<struct sockaddr_in>(
*current_address->FirstUnicastAddress->Address.lpSockaddr)
.sin_addr;
ULONG mask = 0;
if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength,
&mask) != NO_ERROR) {
LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask");
continue;
}
struct in_addr gateway = {.S_un{.S_addr{0}}};
if (current_address->FirstGatewayAddress != nullptr &&
current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) {
gateway = Common::BitCast<struct sockaddr_in>(
*current_address->FirstGatewayAddress->Address.lpSockaddr)
.sin_addr;
}
result.push_back(NetworkInterface{
.name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})},
.ip_address{ip_addr},
.subnet_mask = in_addr{.S_un{.S_addr{mask}}},
.gateway = gateway});
}
return result;
} else {
if (ret != NO_ERROR) {
LOG_ERROR(Network, "Failed to get network interfaces with GetAdaptersAddresses");
return {};
}
std::vector<NetworkInterface> result;
for (auto current_address = adapter_addresses.data(); current_address != nullptr;
current_address = current_address->Next) {
if (current_address->FirstUnicastAddress == nullptr ||
current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) {
continue;
}
if (current_address->OperStatus != IfOperStatusUp) {
continue;
}
const auto ip_addr = Common::BitCast<struct sockaddr_in>(
*current_address->FirstUnicastAddress->Address.lpSockaddr)
.sin_addr;
ULONG mask = 0;
if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength,
&mask) != NO_ERROR) {
LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask");
continue;
}
struct in_addr gateway = {.S_un{.S_addr{0}}};
if (current_address->FirstGatewayAddress != nullptr &&
current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) {
gateway = Common::BitCast<struct sockaddr_in>(
*current_address->FirstGatewayAddress->Address.lpSockaddr)
.sin_addr;
}
result.emplace_back(NetworkInterface{
.name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})},
.ip_address{ip_addr},
.subnet_mask = in_addr{.S_un{.S_addr{mask}}},
.gateway = gateway});
}
return result;
}
#else
std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
std::vector<NetworkInterface> result;
struct ifaddrs* ifaddr = nullptr;
if (getifaddrs(&ifaddr) != 0) {
LOG_ERROR(Network, "Failed to get network interfaces with getifaddrs: {}",
std::strerror(errno));
return result;
return {};
}
std::vector<NetworkInterface> result;
for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr) {
continue;
@@ -117,55 +117,62 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
continue;
}
std::uint32_t gateway{0};
u32 gateway{};
std::ifstream file{"/proc/net/route"};
if (file.is_open()) {
// ignore header
file.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
bool gateway_found = false;
for (std::string line; std::getline(file, line);) {
std::istringstream iss{line};
std::string iface_name{};
iss >> iface_name;
if (iface_name != ifa->ifa_name) {
continue;
}
iss >> std::hex;
std::uint32_t dest{0};
iss >> dest;
if (dest != 0) {
// not the default route
continue;
}
iss >> gateway;
std::uint16_t flags{0};
iss >> flags;
// flag RTF_GATEWAY (defined in <linux/route.h>)
if ((flags & 0x2) == 0) {
continue;
}
gateway_found = true;
break;
}
if (!gateway_found) {
gateway = 0;
}
} else {
if (!file.is_open()) {
LOG_ERROR(Network, "Failed to open \"/proc/net/route\"");
result.emplace_back(NetworkInterface{
.name{ifa->ifa_name},
.ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
.subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
.gateway{in_addr{.s_addr = gateway}}});
continue;
}
result.push_back(NetworkInterface{
// ignore header
file.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
bool gateway_found = false;
for (std::string line; std::getline(file, line);) {
std::istringstream iss{line};
std::string iface_name;
iss >> iface_name;
if (iface_name != ifa->ifa_name) {
continue;
}
iss >> std::hex;
u32 dest{};
iss >> dest;
if (dest != 0) {
// not the default route
continue;
}
iss >> gateway;
u16 flags{};
iss >> flags;
// flag RTF_GATEWAY (defined in <linux/route.h>)
if ((flags & 0x2) == 0) {
continue;
}
gateway_found = true;
break;
}
if (!gateway_found) {
gateway = 0;
}
result.emplace_back(NetworkInterface{
.name{ifa->ifa_name},
.ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
.subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
@@ -180,11 +187,11 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
#endif
std::optional<NetworkInterface> GetSelectedNetworkInterface() {
const std::string& selected_network_interface = Settings::values.network_interface.GetValue();
const auto& selected_network_interface = Settings::values.network_interface.GetValue();
const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
if (network_interfaces.size() == 0) {
LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces");
return {};
return std::nullopt;
}
const auto res =
@@ -192,12 +199,12 @@ std::optional<NetworkInterface> GetSelectedNetworkInterface() {
return iface.name == selected_network_interface;
});
if (res != network_interfaces.end()) {
return *res;
} else {
if (res == network_interfaces.end()) {
LOG_ERROR(Network, "Couldn't find selected interface \"{}\"", selected_network_interface);
return {};
return std::nullopt;
}
return *res;
}
} // namespace Network

View File

@@ -11,6 +11,8 @@
namespace Shader::Backend::GLSL {
namespace {
constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"};
void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
if (!in_bounds) {
@@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
SetInBoundsFlag(ctx, inst);
}
std::string_view BallotIndex(EmitContext& ctx) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ".x";
}
return "[gl_SubGroupInvocationARB>>5]";
}
std::string GetMask(EmitContext& ctx, std::string_view mask) {
const auto ballot_index{BallotIndex(ctx)};
return fmt::format("uint(uvec2({}){})", mask, ballot_index);
}
} // Anonymous namespace
void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
ctx.AddU32("{}={}&31u;", inst, THREAD_ID);
}
void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
} else {
const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
return;
}
const auto ballot_index{BallotIndex(ctx)};
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
}
void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
} else {
const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
return;
}
const auto ballot_index{BallotIndex(ctx)};
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
}
void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
} else {
const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
const auto value{fmt::format("({}^{})", ballot, active_mask)};
ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
return;
}
const auto ballot_index{BallotIndex(ctx)};
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
const auto value{fmt::format("({}^{})", ballot, active_mask)};
ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
}
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
} else {
ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
}
const auto ballot_index{BallotIndex(ctx)};
ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index);
}
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB"));
}
void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB"));
}
void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB"));
}
void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB"));
}
void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB"));
}
void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
std::string_view index, std::string_view clamp,
std::string_view segmentation_mask) {
std::string_view index, std::string_view clamp, std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask);
return;
}
const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
const auto thread_id{"gl_SubGroupInvocationARB"};
const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)};
const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
const auto not_seg_mask{fmt::format("(~{})", seg_mask)};
const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)};
const auto max_thread_id{
ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)};
const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)};
const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
@@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
}
void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
std::string_view clamp, std::string_view segmentation_mask) {
std::string_view clamp, std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask);
return;
}
const auto thread_id{"gl_SubGroupInvocationARB"};
const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)};
ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
}
void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
std::string_view index, std::string_view clamp,
std::string_view segmentation_mask) {
std::string_view index, std::string_view clamp, std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask);
return;
}
const auto thread_id{"gl_SubGroupInvocationARB"};
const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
@@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
std::string_view index, std::string_view clamp,
std::string_view segmentation_mask) {
std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask);
return;
}
const auto thread_id{"gl_SubGroupInvocationARB"};
const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);

View File

@@ -333,8 +333,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
}
case IR::Attribute::FrontFace:
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value);
return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
ctx.f32_zero_value);
case IR::Attribute::PointSpriteS:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));

View File

@@ -7,8 +7,13 @@
namespace Shader::Backend::SPIRV {
namespace {
Id GetThreadId(EmitContext& ctx) {
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
}
Id WarpExtract(EmitContext& ctx, Id value) {
const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
}
@@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
return ctx.OpSelect(ctx.U32[1], in_range,
ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
}
Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
const Id thirty_two{ctx.Const(32u)};
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
}
} // Anonymous namespace
Id EmitLaneId(EmitContext& ctx) {
const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id id{GetThreadId(ctx)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return id;
}
@@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
const Id thirty_two{ctx.Const(32u)};
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
}
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
@@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};

View File

@@ -742,6 +742,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
ASSERT(!current_frame_info.segment_enabled);
uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);

View File

@@ -22,7 +22,7 @@ struct Vp9FrameDimensions {
};
static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
enum FrameFlags : u32 {
enum class FrameFlags : u32 {
IsKeyFrame = 1 << 0,
LastFrameIsKeyFrame = 1 << 1,
FrameSizeChanged = 1 << 2,
@@ -30,6 +30,7 @@ enum FrameFlags : u32 {
LastShowFrame = 1 << 4,
IntraOnly = 1 << 5,
};
DECLARE_ENUM_FLAG_OPERATORS(FrameFlags)
enum class TxSize {
Tx4x4 = 0, // 4x4 transform
@@ -92,44 +93,34 @@ struct Vp9EntropyProbs {
static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
struct Vp9PictureInfo {
bool is_key_frame;
bool intra_only;
bool last_frame_was_key;
bool frame_size_changed;
bool error_resilient_mode;
bool last_frame_shown;
bool show_frame;
u32 bitstream_size;
std::array<u64, 4> frame_offsets;
std::array<s8, 4> ref_frame_sign_bias;
s32 base_q_index;
s32 y_dc_delta_q;
s32 uv_dc_delta_q;
s32 uv_ac_delta_q;
bool lossless;
s32 transform_mode;
bool allow_high_precision_mv;
s32 interp_filter;
s32 reference_mode;
s8 comp_fixed_ref;
std::array<s8, 2> comp_var_ref;
s32 log2_tile_cols;
s32 log2_tile_rows;
bool segment_enabled;
bool segment_map_update;
bool segment_map_temporal_update;
s32 segment_abs_delta;
std::array<u32, 8> segment_feature_enable;
std::array<std::array<s16, 4>, 8> segment_feature_data;
bool mode_ref_delta_enabled;
bool use_prev_in_find_mv_refs;
std::array<s8, 4> ref_deltas;
std::array<s8, 2> mode_deltas;
Vp9EntropyProbs entropy;
Vp9FrameDimensions frame_size;
u8 first_level;
u8 sharpness_level;
u32 bitstream_size;
std::array<u64, 4> frame_offsets;
std::array<bool, 4> refresh_frame;
bool is_key_frame;
bool intra_only;
bool last_frame_was_key;
bool error_resilient_mode;
bool last_frame_shown;
bool show_frame;
bool lossless;
bool allow_high_precision_mv;
bool segment_enabled;
bool mode_ref_delta_enabled;
};
struct Vp9FrameContainer {
@@ -145,7 +136,7 @@ struct PictureInfo {
Vp9FrameDimensions golden_frame_size; ///< 0x50
Vp9FrameDimensions alt_frame_size; ///< 0x58
Vp9FrameDimensions current_frame_size; ///< 0x60
u32 vp9_flags; ///< 0x68
FrameFlags vp9_flags; ///< 0x68
std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
u8 first_level; ///< 0x70
u8 sharpness_level; ///< 0x71
@@ -158,60 +149,43 @@ struct PictureInfo {
u8 allow_high_precision_mv; ///< 0x78
u8 interp_filter; ///< 0x79
u8 reference_mode; ///< 0x7A
s8 comp_fixed_ref; ///< 0x7B
std::array<s8, 2> comp_var_ref; ///< 0x7C
INSERT_PADDING_BYTES_NOINIT(3); ///< 0x7B
u8 log2_tile_cols; ///< 0x7E
u8 log2_tile_rows; ///< 0x7F
Segmentation segmentation; ///< 0x80
LoopFilter loop_filter; ///< 0xE4
INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB
u32 surface_params; ///< 0xF0
INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4
INSERT_PADDING_BYTES_NOINIT(21); ///< 0xEB
[[nodiscard]] Vp9PictureInfo Convert() const {
return {
.is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
.intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
.last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
.frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
.error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
.last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
.show_frame = true,
.bitstream_size = bitstream_size,
.frame_offsets{},
.ref_frame_sign_bias = ref_frame_sign_bias,
.base_q_index = base_q_index,
.y_dc_delta_q = y_dc_delta_q,
.uv_dc_delta_q = uv_dc_delta_q,
.uv_ac_delta_q = uv_ac_delta_q,
.lossless = lossless != 0,
.transform_mode = tx_mode,
.allow_high_precision_mv = allow_high_precision_mv != 0,
.interp_filter = interp_filter,
.reference_mode = reference_mode,
.comp_fixed_ref = comp_fixed_ref,
.comp_var_ref = comp_var_ref,
.log2_tile_cols = log2_tile_cols,
.log2_tile_rows = log2_tile_rows,
.segment_enabled = segmentation.enabled != 0,
.segment_map_update = segmentation.update_map != 0,
.segment_map_temporal_update = segmentation.temporal_update != 0,
.segment_abs_delta = segmentation.abs_delta,
.segment_feature_enable = segmentation.feature_mask,
.segment_feature_data = segmentation.feature_data,
.mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
.use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
!(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
!(vp9_flags == (FrameFlags::IntraOnly)) &&
(vp9_flags == (FrameFlags::LastShowFrame)) &&
!(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
.ref_deltas = loop_filter.ref_deltas,
.mode_deltas = loop_filter.mode_deltas,
.entropy{},
.frame_size = current_frame_size,
.first_level = first_level,
.sharpness_level = sharpness_level,
.bitstream_size = bitstream_size,
.frame_offsets{},
.refresh_frame{},
.is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame),
.intra_only = True(vp9_flags & FrameFlags::IntraOnly),
.last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame),
.error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode),
.last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame),
.show_frame = true,
.lossless = lossless != 0,
.allow_high_precision_mv = allow_high_precision_mv != 0,
.segment_enabled = segmentation.enabled != 0,
.mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
};
}
};
@@ -316,7 +290,6 @@ ASSERT_POSITION(last_frame_size, 0x48);
ASSERT_POSITION(first_level, 0x70);
ASSERT_POSITION(segmentation, 0x80);
ASSERT_POSITION(loop_filter, 0xE4);
ASSERT_POSITION(surface_params, 0xF0);
#undef ASSERT_POSITION
#define ASSERT_POSITION(field_name, position) \

View File

@@ -159,11 +159,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
const size_t size_bytes = GetSizeInBytes(framebuffer);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel,
framebuffer.stride, framebuffer.height,
1, block_height_log2, 0)};
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);

View File

@@ -63,14 +63,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
const u32 unswizzled_offset =
slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset);
offset >= input.size()) {
// TODO(Rodrigo): This is an out of bounds access that should never happen. To
// avoid crashing the emulator, break.
ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size());
break;
}
u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];