Merge remote-tracking branch 'upstream/main'

This commit is contained in:
microsoftv 2024-08-14 17:24:16 -04:00
commit 89e564a400
25 changed files with 242 additions and 281 deletions

View file

@ -421,6 +421,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
src/shader_recompiler/ir/passes/ir_passes.h
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp

View file

@ -74,13 +74,22 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
size_t* sizeOut) {
LOG_WARNING(Kernel_Vmm, "called searchStart = {:#x}, searchEnd = {:#x}, alignment = {:#x}",
searchStart, searchEnd, alignment);
if (searchEnd <= searchStart) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
if (searchEnd > SCE_KERNEL_MAIN_DMEM_SIZE) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
auto* memory = Core::Memory::Instance();
PAddr physAddr;
s32 size = memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut);
s32 result =
memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut);
*physAddrOut = static_cast<u64>(physAddr);
return size;
return result;
}
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,

View file

@ -1094,6 +1094,19 @@ int PS4_SYSV_ABI scePthreadAttrGetstack(ScePthreadAttr* attr, void** addr, size_
return SCE_KERNEL_ERROR_EINVAL;
}
int PS4_SYSV_ABI scePthreadAttrSetstack(ScePthreadAttr* attr, void* addr, size_t size) {
if (attr == nullptr || *attr == nullptr || addr == nullptr || size < 0x4000) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
int result = pthread_attr_setstack(&(*attr)->pth_attr, addr, size);
LOG_INFO(Kernel_Pthread, "scePthreadAttrSetstack: result = {}", result);
if (result == 0) {
return ORBIS_OK;
}
return ORBIS_KERNEL_ERROR_EINVAL;
}
int PS4_SYSV_ABI scePthreadJoin(ScePthread thread, void** res) {
int result = pthread_join(thread->pth, res);
LOG_INFO(Kernel_Pthread, "scePthreadJoin result = {}", result);
@ -1550,6 +1563,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("B5GmVDKwpn0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_yield);
LIB_FUNCTION("-quPa4SEJUw", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstack);
LIB_FUNCTION("Bvn74vj6oLo", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetstack);
LIB_FUNCTION("Ru36fiTtJzA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstackaddr);
LIB_FUNCTION("-fA+7ZlGDQs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstacksize);
LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, scePthreadOnce);

View file

@ -974,8 +974,11 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceNpGetNpId() {
LOG_ERROR(Lib_NpManager, "(STUBBED) called");
int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId) {
LOG_ERROR(Lib_NpManager, "(DUMMY) called");
std::string name = "shadps4";
strcpy(npId->handle.data, name.c_str());
return ORBIS_OK;
}

View file

@ -11,6 +11,22 @@ class SymbolsResolver;
namespace Libraries::NpManager {
constexpr int ORBIS_NP_ONLINEID_MAX_LENGTH = 16;
typedef int OrbisUserServiceUserId;
struct OrbisNpOnlineId {
char data[ORBIS_NP_ONLINEID_MAX_LENGTH];
char term;
char dummy[3];
};
struct OrbisNpId {
OrbisNpOnlineId handle;
u8 opt[8];
u8 reserved[8];
};
int PS4_SYSV_ABI Func_EF4378573542A508();
int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromKernel();
int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromPool();
@ -204,7 +220,7 @@ int PS4_SYSV_ABI sceNpGetAccountLanguage2();
int PS4_SYSV_ABI sceNpGetAccountLanguageA();
int PS4_SYSV_ABI sceNpGetGamePresenceStatus();
int PS4_SYSV_ABI sceNpGetGamePresenceStatusA();
int PS4_SYSV_ABI sceNpGetNpId();
int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId);
int PS4_SYSV_ABI sceNpGetNpReachabilityState();
int PS4_SYSV_ABI sceNpGetOnlineId();
int PS4_SYSV_ABI sceNpGetParentalControlInfo();

View file

@ -273,10 +273,10 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags,
std::scoped_lock lk{mutex};
auto it = FindVMA(addr);
if (!it->second.IsMapped() && flags == 1) {
if (it->second.type == VMAType::Free && flags == 1) {
it++;
}
if (!it->second.IsMapped()) {
if (it->second.type == VMAType::Free) {
LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region");
return ORBIS_KERNEL_ERROR_EACCES;
}

View file

@ -214,6 +214,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
if (attr == IR::Attribute::Position1) {
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
}

View file

@ -17,113 +17,133 @@ struct ImageOperands {
operands.push_back(value);
}
void AddOffset(EmitContext& ctx, const IR::Value& offset,
bool can_use_runtime_offsets = false) {
if (offset.IsEmpty()) {
return;
}
if (offset.IsImmediate()) {
const s32 operand = offset.U32();
Add(spv::ImageOperandsMask::ConstOffset, ctx.ConstS32(operand));
return;
}
IR::Inst* const inst{offset.InstRecursive()};
if (inst->AreAllArgsImmediates()) {
switch (inst->GetOpcode()) {
case IR::Opcode::CompositeConstructU32x2:
Add(spv::ImageOperandsMask::ConstOffset,
ctx.ConstS32(static_cast<s32>(inst->Arg(0).U32()),
static_cast<s32>(inst->Arg(1).U32())));
return;
case IR::Opcode::CompositeConstructU32x3:
Add(spv::ImageOperandsMask::ConstOffset,
ctx.ConstS32(static_cast<s32>(inst->Arg(0).U32()),
static_cast<s32>(inst->Arg(1).U32()),
static_cast<s32>(inst->Arg(2).U32())));
return;
default:
break;
}
}
if (can_use_runtime_offsets) {
Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
} else {
LOG_WARNING(Render_Vulkan,
"Runtime offset provided to unsupported image sample instruction");
}
}
spv::ImageOperandsMask mask{};
boost::container::static_vector<Id, 4> operands;
};
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
Id offset) {
const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
if (Sirit::ValidId(bias)) {
operands.Add(spv::ImageOperandsMask::Bias, bias);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
operands.Add(spv::ImageOperandsMask::Bias, bias);
operands.AddOffset(ctx, offset);
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
operands.operands);
}
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
Id offset) {
const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
if (Sirit::ValidId(lod)) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
operands.Add(spv::ImageOperandsMask::Lod, lod);
operands.AddOffset(ctx, offset);
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
operands.operands);
}
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias, Id offset) {
Id bias, const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
if (Sirit::ValidId(bias)) {
operands.Add(spv::ImageOperandsMask::Bias, bias);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
operands.Add(spv::ImageOperandsMask::Bias, bias);
operands.AddOffset(ctx, offset);
return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask,
operands.operands);
}
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id lod, Id offset) {
Id lod, const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
if (Sirit::ValidId(lod)) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
operands.AddOffset(ctx, offset);
operands.Add(spv::ImageOperandsMask::Lod, lod);
return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask,
operands.operands);
}
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2) {
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Offset, offset);
operands.AddOffset(ctx, offset);
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask,
operands.operands);
}
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset,
Id offset2, Id dref) {
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, Id dref) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Offset, offset);
operands.AddOffset(ctx, offset);
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask,
operands.operands);
}
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id ms) {
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
Id lod, Id ms) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id result_type = texture.data_types->Get(4);
if (Sirit::ValidId(lod)) {
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords,
spv::ImageOperandsMask::Lod, lod));
} else {
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords));
}
ImageOperands operands;
operands.AddOffset(ctx, offset);
operands.Add(spv::ImageOperandsMask::Lod, lod);
return ctx.OpBitcast(
ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands));
}
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) {

View file

@ -104,15 +104,9 @@ Id EmitUndefU8(EmitContext& ctx);
Id EmitUndefU16(EmitContext& ctx);
Id EmitUndefU32(EmitContext& ctx);
Id EmitUndefU64(EmitContext& ctx);
Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
@ -369,18 +363,19 @@ Id EmitConvertU16U32(EmitContext& ctx, Id value);
Id EmitConvertU32U16(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
Id offset);
const IR::Value& offset);
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
Id offset);
const IR::Value& offset);
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias, Id offset);
Id bias, const IR::Value& offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id lod, Id offset);
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset,
Id offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id ms);
Id lod, const IR::Value& offset);
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
Id lod, Id ms);
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,

View file

@ -5,99 +5,25 @@
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
namespace {
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
const Id shift_id{ctx.ConstU32(shift)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
}
Id Word(EmitContext& ctx, Id offset) {
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
return ctx.OpLoad(ctx.U32[1], pointer);
}
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))};
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))};
const Id count_id{ctx.ConstU32(count)};
return {bit, count_id};
}
} // Anonymous namespace
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
return ctx.OpLoad(ctx.U32[1], pointer);
} else {
return Word(ctx, offset);
}
}
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
return ctx.OpLoad(ctx.U32[2], pointer);
} else {
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
}
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
}
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
return ctx.OpLoad(ctx.U32[4], pointer);
}
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
std::array<Id, 4> values{};
@ -109,35 +35,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
return ctx.OpCompositeConstruct(ctx.U32[4], values);
}
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
}
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
}
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
Id pointer{};
if (ctx.profile.support_explicit_workgroup_layout) {
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
} else {
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
}
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
ctx.OpStore(pointer, value);
}
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
ctx.OpStore(pointer, value);
return;
}
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
@ -148,11 +53,6 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
}
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
ctx.OpStore(pointer, value);
return;
}
const Id shift{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
for (u32 i = 0; i < 4; ++i) {

View file

@ -407,6 +407,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rgba16f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
return spv::ImageFormat::Rgba16;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
return spv::ImageFormat::R8;
@ -513,43 +517,9 @@ void EmitContext::DefineSharedMemory() {
if (info.shared_memory_size == 0) {
info.shared_memory_size = DefaultSharedMemSize;
}
const auto make{[&](Id element_type, u32 element_size) {
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
const Id struct_type{TypeStruct(array_type)};
MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
Decorate(struct_type, spv::Decoration::Block);
const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
Decorate(variable, spv::Decoration::Aliased);
interfaces.push_back(variable);
return std::make_tuple(variable, element_pointer, pointer);
}};
if (profile.support_explicit_workgroup_layout) {
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
if (info.uses_shared_u8) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
}
if (info.uses_shared_u16) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
}
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
return;
}
const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
interfaces.push_back(shared_memory_u32);

View file

@ -253,10 +253,10 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
const IR::Value texel = [&]() -> IR::Value {
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
if (!flags.test(MimgModifier::Pcf)) {
return ir.ImageGather(handle, body, offset, {}, info);
return ir.ImageGather(handle, body, offset, info);
}
ASSERT(mimg.dmask & 1); // should be always 1st (R) component
return ir.ImageGatherDref(handle, body, offset, {}, dref, info);
return ir.ImageGatherDref(handle, body, offset, dref, info);
}();
// For gather4 instructions dmask selects which component to read and must have

View file

@ -259,10 +259,6 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
switch (bit_size) {
case 8:
return Inst<U32>(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
case 16:
return Inst<U32>(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
case 32:
return Inst<U32>(Opcode::LoadSharedU32, offset);
case 64:
@ -276,12 +272,6 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
switch (bit_size) {
case 8:
Inst(Opcode::WriteSharedU8, offset, value);
break;
case 16:
Inst(Opcode::WriteSharedU16, offset, value);
break;
case 32:
Inst(Opcode::WriteSharedU32, offset, value);
break;
@ -1465,13 +1455,13 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body
}
Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
const Value& offset2, TextureInstInfo info) {
return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset, offset2);
TextureInstInfo info) {
return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset);
}
Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset,
const Value& offset2, const F32& dref, TextureInstInfo info) {
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, offset2, dref);
const F32& dref, TextureInstInfo info) {
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
}
Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,

View file

@ -283,18 +283,17 @@ public:
const F32& dref, const U32& offset,
TextureInstInfo info);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
const IR::U1& skip_mips);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
const IR::U1& skip_mips, TextureInstInfo info);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
const U1& skip_mips);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
const U1& skip_mips, TextureInstInfo info);
[[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
TextureInstInfo info);
[[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
const Value& offset2, TextureInstInfo info);
TextureInstInfo info);
[[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
const Value& offset, const Value& offset2, const F32& dref,
TextureInstInfo info);
const Value& offset, const F32& dref, TextureInstInfo info);
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
const U32& lod, const U32& multisampling, TextureInstInfo info);
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,

View file

@ -59,8 +59,6 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::WriteSharedU128:
case Opcode::WriteSharedU64:
case Opcode::WriteSharedU32:
case Opcode::WriteSharedU16:
case Opcode::WriteSharedU8:
case Opcode::ImageWrite:
case Opcode::ImageAtomicIAdd32:
case Opcode::ImageAtomicSMin32:

View file

@ -26,15 +26,9 @@ OPCODE(WorkgroupMemoryBarrier, Void,
OPCODE(DeviceMemoryBarrier, Void, )
// Shared memory operations
OPCODE(LoadSharedU8, U32, U32, )
OPCODE(LoadSharedS8, U32, U32, )
OPCODE(LoadSharedU16, U32, U32, )
OPCODE(LoadSharedS16, U32, U32, )
OPCODE(LoadSharedU32, U32, U32, )
OPCODE(LoadSharedU64, U32x2, U32, )
OPCODE(LoadSharedU128, U32x4, U32, )
OPCODE(WriteSharedU8, Void, U32, U32, )
OPCODE(WriteSharedU16, Void, U32, U32, )
OPCODE(WriteSharedU32, Void, U32, U32, )
OPCODE(WriteSharedU64, Void, U32, U32x2, )
OPCODE(WriteSharedU128, Void, U32, U32x4, )
@ -311,12 +305,12 @@ OPCODE(ConvertU16U32, U16, U32,
OPCODE(ConvertU32U16, U32, U16, )
// Image operations
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, U32, )
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, U32, )
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, U32, )
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, U32, )
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, Opaque, )
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, Opaque, )
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, Opaque, )
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, Opaque, )
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, )
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )

View file

@ -14,5 +14,6 @@ void DeadCodeEliminationPass(IR::Program& program);
void ConstantPropagationPass(IR::BlockList& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
void LowerSharedMemToRegisters(IR::Program& program);
} // namespace Shader::Optimization

View file

@ -0,0 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
void LowerSharedMemToRegisters(IR::Program& program) {
boost::container::small_vector<IR::Inst*, 8> ds_writes;
Info& info{program.info};
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
const auto opcode = inst.GetOpcode();
if (opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64) {
ds_writes.emplace_back(&inst);
continue;
}
if (opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64) {
// Search for write instruction with same offset
const IR::Inst* prod = inst.Arg(0).InstRecursive();
const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) {
const IR::Inst* write_prod = write->Arg(0).InstRecursive();
return write_prod->Arg(1).U32() == prod->Arg(1).U32() &&
write_prod->Arg(0) == prod->Arg(0);
});
ASSERT(it != ds_writes.end());
// Replace data read with value written.
inst.ReplaceUsesWith((*it)->Arg(1));
}
}
}
// We should have eliminated everything. Invalidate data write instructions.
for (const auto inst : ds_writes) {
inst->Invalidate();
}
}
} // namespace Shader::Optimization

View file

@ -171,6 +171,22 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
}
}
u32 ImageOffsetArgumentPosition(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageGather:
case IR::Opcode::ImageGatherDref:
return 2;
case IR::Opcode::ImageSampleExplicitLod:
case IR::Opcode::ImageSampleImplicitLod:
return 3;
case IR::Opcode::ImageSampleDrefExplicitLod:
case IR::Opcode::ImageSampleDrefImplicitLod:
return 4;
default:
UNREACHABLE();
}
}
class Descriptors {
public:
explicit Descriptors(Info& info_)
@ -574,33 +590,29 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
if (inst_info.has_offset) {
// The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16].
const u32 arg_pos = [&]() -> u32 {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageGather:
case IR::Opcode::ImageGatherDref:
return 2;
case IR::Opcode::ImageSampleExplicitLod:
case IR::Opcode::ImageSampleImplicitLod:
return 3;
case IR::Opcode::ImageSampleDrefExplicitLod:
case IR::Opcode::ImageSampleDrefImplicitLod:
return 4;
default:
break;
}
return inst_info.is_depth ? 4 : 3;
}();
const u32 arg_pos = ImageOffsetArgumentPosition(inst);
const IR::Value arg = inst.Arg(arg_pos);
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
const auto f = [&](IR::Value value, u32 offset) -> auto {
const auto read = [&](u32 offset) -> auto {
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
};
const auto x = f(arg, 0);
const auto y = f(arg, 8);
const auto z = f(arg, 16);
const IR::Value value = ir.CompositeConstruct(x, y, z);
inst.SetArg(arg_pos, value);
switch (image.GetType()) {
case AmdGpu::ImageType::Color1D:
case AmdGpu::ImageType::Color1DArray:
inst.SetArg(arg_pos, read(0));
break;
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Color2DArray:
inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8)));
break;
case AmdGpu::ImageType::Color3D:
inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8), read(16)));
break;
default:
UNREACHABLE();
}
}
if (inst_info.has_lod_clamp) {

View file

@ -16,18 +16,6 @@ void Visit(Info& info, IR::Inst& inst) {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break;
}
case IR::Opcode::LoadSharedS8:
case IR::Opcode::LoadSharedU8:
case IR::Opcode::WriteSharedU8:
info.uses_shared_u8 = true;
info.uses_shared = true;
break;
case IR::Opcode::LoadSharedS16:
case IR::Opcode::LoadSharedU16:
case IR::Opcode::WriteSharedU16:
info.uses_shared_u16 = true;
info.uses_shared = true;
break;
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU32:

View file

@ -58,6 +58,9 @@ IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
if (program.info.stage != Stage::Compute) {
Shader::Optimization::LowerSharedMemToRegisters(program);
}
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::CollectShaderInfoPass(program);

View file

@ -195,8 +195,6 @@ struct Info {
bool has_image_query{};
bool uses_group_quad{};
bool uses_shared{};
bool uses_shared_u8{};
bool uses_shared_u16{};
bool uses_fp16{};
bool uses_step_rates{};
bool translation_failed{}; // indicates that shader has unsupported instructions

View file

@ -341,6 +341,7 @@ std::span<const vk::Format> GetAllFormats() {
vk::Format::eR32Sint,
vk::Format::eR32Uint,
vk::Format::eBc6HUfloatBlock,
vk::Format::eBc6HSfloatBlock,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16B16A16Sscaled,
vk::Format::eR16G16Sscaled,
@ -542,6 +543,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc6HUfloatBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eBc6HSfloatBlock;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR8G8B8A8Sint;

View file

@ -93,6 +93,7 @@ bool Swapchain::AcquireNextImage() {
case vk::Result::eSuboptimalKHR:
case vk::Result::eErrorSurfaceLostKHR:
case vk::Result::eErrorOutOfDateKHR:
case vk::Result::eErrorUnknown:
needs_recreation = true;
break;
default:

View file

@ -186,6 +186,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR32Sfloat:
case vk::Format::eR32Uint:
case vk::Format::eR16G16Sfloat:
case vk::Format::eR16G16Unorm:
return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaSrgbBlock:
case vk::Format::eBc1RgbaUnormBlock:
@ -193,6 +194,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR32G32Sfloat:
case vk::Format::eR32G32Uint:
case vk::Format::eR16G16B16A16Unorm:
case vk::Format::eR16G16B16A16Uint:
case vk::Format::eR16G16B16A16Sfloat:
return vk::Format::eR32G32Uint;
case vk::Format::eBc2SrgbBlock: