mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-20 03:24:49 +00:00
Merge branch 'main' into m4aac
This commit is contained in:
commit
2411018eac
20 changed files with 263 additions and 87 deletions
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -97,7 +97,7 @@
|
|||
shallow = true
|
||||
[submodule "externals/MoltenVK/SPIRV-Cross"]
|
||||
path = externals/MoltenVK/SPIRV-Cross
|
||||
url = https://github.com/KhronosGroup/SPIRV-Cross
|
||||
url = https://github.com/billhollings/SPIRV-Cross
|
||||
shallow = true
|
||||
[submodule "externals/MoltenVK/MoltenVK"]
|
||||
path = externals/MoltenVK/MoltenVK
|
||||
|
|
|
@ -29,8 +29,8 @@ SPDX-License-Identifier: GPL-2.0-or-later
|
|||
### GPU
|
||||
|
||||
- A graphics card with at least 1GB of VRAM
|
||||
- Keep your graphics drivers up to date
|
||||
- Vulkan 1.3 support (required)
|
||||
- Up-to-date graphics drivers
|
||||
- Vulkan 1.3 with the `VK_KHR_swapchain` and `VK_KHR_push_descriptor` extensions
|
||||
|
||||
### RAM
|
||||
|
||||
|
|
2
externals/MoltenVK/MoltenVK
vendored
2
externals/MoltenVK/MoltenVK
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 83510e0f3835c3c43651dda087305abc42572e17
|
||||
Subproject commit 067fc6c85b02f37dfda58eeda49d8458e093ed60
|
2
externals/MoltenVK/SPIRV-Cross
vendored
2
externals/MoltenVK/SPIRV-Cross
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 68300dc07ac3dc592dbbdb87e02d5180f984ad12
|
||||
Subproject commit 185833a61cbe29ce3bfb5a499ffb3dfeaee3bbe7
|
|
@ -21,6 +21,15 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
|
|||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
|
||||
}
|
||||
|
||||
Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
|
||||
if (Sirit::ValidId(buffer_size)) {
|
||||
// Bounds checking enabled, wrap in a conditional branch to make sure that
|
||||
|
@ -99,6 +108,18 @@ Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
|
|||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset) {
|
||||
return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset) {
|
||||
return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
|
|
@ -130,6 +130,10 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
|
|||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset);
|
||||
Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset);
|
||||
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value);
|
||||
|
||||
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
|
||||
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
|
||||
Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4);
|
||||
|
@ -338,8 +342,8 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
|
|||
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitISub64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSMulExt(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitUMulExt(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSMulHi(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitUMulHi(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitIMul32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitIMul64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSDiv32(EmitContext& ctx, Id a, Id b);
|
||||
|
|
|
@ -72,12 +72,17 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b) {
|
|||
return ctx.OpISub(ctx.U64, a, b);
|
||||
}
|
||||
|
||||
Id EmitSMulExt(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpSMulExtended(ctx.full_result_i32x2, a, b);
|
||||
Id EmitSMulHi(EmitContext& ctx, Id a, Id b) {
|
||||
const auto signed_a{ctx.OpBitcast(ctx.S32[1], a)};
|
||||
const auto signed_b{ctx.OpBitcast(ctx.S32[1], b)};
|
||||
const auto mul_ext{ctx.OpSMulExtended(ctx.full_result_i32x2, signed_a, signed_b)};
|
||||
const auto signed_hi{ctx.OpCompositeExtract(ctx.S32[1], mul_ext, 1)};
|
||||
return ctx.OpBitcast(ctx.U32[1], signed_hi);
|
||||
}
|
||||
|
||||
Id EmitUMulExt(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpUMulExtended(ctx.full_result_u32x2, a, b);
|
||||
Id EmitUMulHi(EmitContext& ctx, Id a, Id b) {
|
||||
const auto mul_ext{ctx.OpUMulExtended(ctx.full_result_u32x2, a, b)};
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], mul_ext, 1);
|
||||
}
|
||||
|
||||
Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
|
||||
|
|
|
@ -3569,19 +3569,19 @@ constexpr std::array<InstFormat, 112> InstructionFormatMIMG = {{
|
|||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 65 = IMAGE_GATHER4_CL
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
{},
|
||||
{},
|
||||
// 68 = IMAGE_GATHER4_L
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 69 = IMAGE_GATHER4_B
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 70 = IMAGE_GATHER4_B_CL
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 71 = IMAGE_GATHER4_LZ
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
|
@ -3589,19 +3589,19 @@ constexpr std::array<InstFormat, 112> InstructionFormatMIMG = {{
|
|||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 73 = IMAGE_GATHER4_C_CL
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
{},
|
||||
{},
|
||||
// 76 = IMAGE_GATHER4_C_L
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 77 = IMAGE_GATHER4_C_B
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 78 = IMAGE_GATHER4_C_B_CL
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 79 = IMAGE_GATHER4_C_LZ
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
|
@ -3609,19 +3609,19 @@ constexpr std::array<InstFormat, 112> InstructionFormatMIMG = {{
|
|||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 81 = IMAGE_GATHER4_CL_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
{},
|
||||
{},
|
||||
// 84 = IMAGE_GATHER4_L_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 85 = IMAGE_GATHER4_B_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 86 = IMAGE_GATHER4_B_CL_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 87 = IMAGE_GATHER4_LZ_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
|
@ -3629,19 +3629,19 @@ constexpr std::array<InstFormat, 112> InstructionFormatMIMG = {{
|
|||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 89 = IMAGE_GATHER4_C_CL_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
{},
|
||||
{},
|
||||
// 92 = IMAGE_GATHER4_C_L_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 93 = IMAGE_GATHER4_C_B_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 94 = IMAGE_GATHER4_C_B_CL_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
// 95 = IMAGE_GATHER4_C_LZ_O
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Float32},
|
||||
|
|
|
@ -13,6 +13,12 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
// DS
|
||||
case Opcode::DS_ADD_U32:
|
||||
return DS_ADD_U32(inst, false);
|
||||
case Opcode::DS_SUB_U32:
|
||||
return DS_SUB_U32(inst, false);
|
||||
case Opcode::DS_INC_U32:
|
||||
return DS_INC_U32(inst, false);
|
||||
case Opcode::DS_DEC_U32:
|
||||
return DS_DEC_U32(inst, false);
|
||||
case Opcode::DS_MIN_I32:
|
||||
return DS_MIN_U32(inst, true, false);
|
||||
case Opcode::DS_MAX_I32:
|
||||
|
@ -35,6 +41,8 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
return DS_WRITE(32, false, true, true, inst);
|
||||
case Opcode::DS_ADD_RTN_U32:
|
||||
return DS_ADD_U32(inst, true);
|
||||
case Opcode::DS_SUB_RTN_U32:
|
||||
return DS_SUB_U32(inst, true);
|
||||
case Opcode::DS_MIN_RTN_U32:
|
||||
return DS_MIN_U32(inst, false, true);
|
||||
case Opcode::DS_MAX_RTN_U32:
|
||||
|
@ -67,6 +75,8 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
return DS_READ(64, false, false, false, inst);
|
||||
case Opcode::DS_READ2_B64:
|
||||
return DS_READ(64, false, true, false, inst);
|
||||
case Opcode::DS_READ2ST64_B64:
|
||||
return DS_READ(64, false, true, true, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -226,6 +236,40 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
|
||||
}
|
||||
|
||||
void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIIncrement(addr_offset);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIDecrement(addr_offset);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_SUB_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicISub(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||
const GcnInst& inst) {
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
|
|
|
@ -30,6 +30,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||
return S_SUB_I32(inst);
|
||||
case Opcode::S_ADDC_U32:
|
||||
return S_ADDC_U32(inst);
|
||||
case Opcode::S_SUBB_U32:
|
||||
return S_SUBB_U32(inst);
|
||||
case Opcode::S_MIN_I32:
|
||||
return S_MIN_U32(true, inst);
|
||||
case Opcode::S_MIN_U32:
|
||||
|
@ -110,6 +112,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||
return S_FF1_I32_B32(inst);
|
||||
case Opcode::S_FF1_I32_B64:
|
||||
return S_FF1_I32_B64(inst);
|
||||
case Opcode::S_FLBIT_I32_B32:
|
||||
return S_FLBIT_I32_B32(inst);
|
||||
case Opcode::S_BITSET0_B32:
|
||||
return S_BITSET_B32(inst, 0);
|
||||
case Opcode::S_BITSET1_B32:
|
||||
|
@ -236,6 +240,17 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
|
|||
ir.SetScc(ir.IGreaterThan(src1, src0, false));
|
||||
}
|
||||
|
||||
void Translator::S_SUBB_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 borrow{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))};
|
||||
const IR::U32 result{ir.ISub(ir.ISub(src0, src1), borrow)};
|
||||
SetDst(inst.dst[0], result);
|
||||
|
||||
const IR::U32 sum_with_borrow{ir.IAdd(src1, borrow)};
|
||||
ir.SetScc(ir.ILessThan(src0, sum_with_borrow, false));
|
||||
}
|
||||
|
||||
void Translator::S_ADD_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
|
@ -660,6 +675,17 @@ void Translator::S_FF1_I32_B64(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
void Translator::S_FLBIT_I32_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
// Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB)
|
||||
// position
|
||||
const IR::U32 msb_pos = ir.FindUMsb(src0);
|
||||
const IR::U32 pos_from_left = ir.ISub(ir.Imm32(31), msb_pos);
|
||||
// Select 0xFFFFFFFF if src0 was 0
|
||||
const IR::U1 cond = ir.INotEqual(src0, ir.Imm32(0));
|
||||
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
|
||||
}
|
||||
|
||||
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
|
||||
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
||||
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};
|
||||
|
|
|
@ -80,6 +80,7 @@ public:
|
|||
// SOP2
|
||||
void S_ADD_U32(const GcnInst& inst);
|
||||
void S_SUB_U32(const GcnInst& inst);
|
||||
void S_SUBB_U32(const GcnInst& inst);
|
||||
void S_ADD_I32(const GcnInst& inst);
|
||||
void S_SUB_I32(const GcnInst& inst);
|
||||
void S_ADDC_U32(const GcnInst& inst);
|
||||
|
@ -119,6 +120,7 @@ public:
|
|||
void S_BCNT1_I32_B64(const GcnInst& inst);
|
||||
void S_FF1_I32_B32(const GcnInst& inst);
|
||||
void S_FF1_I32_B64(const GcnInst& inst);
|
||||
void S_FLBIT_I32_B32(const GcnInst& inst);
|
||||
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||
|
@ -273,6 +275,9 @@ public:
|
|||
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||
void DS_APPEND(const GcnInst& inst);
|
||||
void DS_CONSUME(const GcnInst& inst);
|
||||
void DS_SUB_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_INC_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_DEC_U32(const GcnInst& inst, bool rtn);
|
||||
|
||||
// Buffer Memory
|
||||
// MUBUF / MTBUF
|
||||
|
|
|
@ -394,6 +394,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_MUL_HI_U32(false, inst);
|
||||
case Opcode::V_MUL_LO_I32:
|
||||
return V_MUL_LO_U32(inst);
|
||||
case Opcode::V_MUL_HI_I32:
|
||||
return V_MUL_HI_U32(true, inst);
|
||||
case Opcode::V_MAD_U64_U32:
|
||||
return V_MAD_U64_U32(inst);
|
||||
case Opcode::V_NOP:
|
||||
|
@ -1279,7 +1281,7 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) {
|
|||
void Translator::V_MUL_HI_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 hi{ir.CompositeExtract(ir.IMulExt(src0, src1, is_signed), 1)};
|
||||
const IR::U32 hi{ir.IMulHi(src0, src1, is_signed)};
|
||||
SetDst(inst.dst[0], hi);
|
||||
}
|
||||
|
||||
|
|
|
@ -357,6 +357,18 @@ U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) {
|
|||
return Inst<U32>(Opcode::SharedAtomicXor32, address, data);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIIncrement(const U32& address) {
|
||||
return Inst<U32>(Opcode::SharedAtomicIIncrement32, address);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIDecrement(const U32& address) {
|
||||
return Inst<U32>(Opcode::SharedAtomicIDecrement32, address);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) {
|
||||
return Inst<U32>(Opcode::SharedAtomicISub32, address, data);
|
||||
}
|
||||
|
||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||
return Inst<U32>(Opcode::ReadConst, base, offset);
|
||||
}
|
||||
|
@ -1388,8 +1400,8 @@ U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
|
|||
}
|
||||
}
|
||||
|
||||
IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) {
|
||||
return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b);
|
||||
U32 IREmitter::IMulHi(const U32& a, const U32& b, bool is_signed) {
|
||||
return Inst<U32>(is_signed ? Opcode::SMulHi : Opcode::UMulHi, a, b);
|
||||
}
|
||||
|
||||
U32U64 IREmitter::IMul(const U32U64& a, const U32U64& b) {
|
||||
|
|
|
@ -106,6 +106,10 @@ public:
|
|||
[[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data);
|
||||
[[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data);
|
||||
|
||||
[[nodiscard]] U32 SharedAtomicIIncrement(const U32& address);
|
||||
[[nodiscard]] U32 SharedAtomicIDecrement(const U32& address);
|
||||
[[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data);
|
||||
|
||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||
|
||||
|
@ -240,7 +244,7 @@ public:
|
|||
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32 IMulHi(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32 IMod(const U32& a, const U32& b, bool is_signed = false);
|
||||
|
|
|
@ -44,6 +44,9 @@ OPCODE(SharedAtomicUMax32, U32, U32,
|
|||
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicOr32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicXor32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicISub32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicIIncrement32, U32, U32, )
|
||||
OPCODE(SharedAtomicIDecrement32, U32, U32, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
|
@ -317,8 +320,8 @@ OPCODE(ISub32, U32, U32,
|
|||
OPCODE(ISub64, U64, U64, U64, )
|
||||
OPCODE(IMul32, U32, U32, U32, )
|
||||
OPCODE(IMul64, U64, U64, U64, )
|
||||
OPCODE(SMulExt, U32x2, U32, U32, )
|
||||
OPCODE(UMulExt, U32x2, U32, U32, )
|
||||
OPCODE(SMulHi, U32, U32, U32, )
|
||||
OPCODE(UMulHi, U32, U32, U32, )
|
||||
OPCODE(SDiv32, U32, U32, U32, )
|
||||
OPCODE(UDiv32, U32, U32, U32, )
|
||||
OPCODE(SMod32, U32, U32, U32, )
|
||||
|
|
|
@ -235,9 +235,12 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
|
|||
return {prod2, true};
|
||||
}
|
||||
|
||||
SharpLocation TrackSharp(const IR::Inst* inst, const Shader::Info& info) {
|
||||
SharpLocation AttemptTrackSharp(const IR::Inst* inst, auto& visited_insts) {
|
||||
// Search until we find a potential sharp source.
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto pred = [&visited_insts](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (std::ranges::find(visited_insts, inst) != visited_insts.end()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
|
||||
inst->GetOpcode() == IR::Opcode::ReadConst) {
|
||||
return inst;
|
||||
|
@ -247,6 +250,7 @@ SharpLocation TrackSharp(const IR::Inst* inst, const Shader::Info& info) {
|
|||
const auto result = IR::BreadthFirstSearch(inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track sharp source");
|
||||
inst = result.value();
|
||||
visited_insts.emplace_back(inst);
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return static_cast<u32>(inst->Arg(0).ScalarReg());
|
||||
} else {
|
||||
|
@ -256,6 +260,29 @@ SharpLocation TrackSharp(const IR::Inst* inst, const Shader::Info& info) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Tracks a sharp with validation of the chosen data type.
|
||||
template <typename DataType>
|
||||
std::pair<SharpLocation, DataType> TrackSharp(const IR::Inst* inst, const Info& info) {
|
||||
boost::container::small_vector<const IR::Inst*, 4> visited_insts{};
|
||||
while (true) {
|
||||
const auto prev_size = visited_insts.size();
|
||||
const auto sharp = AttemptTrackSharp(inst, visited_insts);
|
||||
if (const auto data = info.ReadUdSharp<DataType>(sharp); data.Valid()) {
|
||||
return std::make_pair(sharp, data);
|
||||
}
|
||||
if (prev_size == visited_insts.size()) {
|
||||
// No change in visited instructions, we've run out of paths.
|
||||
UNREACHABLE_MSG("Unable to find valid sharp.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Tracks a sharp without data validation.
|
||||
SharpLocation TrackSharp(const IR::Inst* inst, const Info& info) {
|
||||
boost::container::static_vector<const IR::Inst*, 1> visited_insts{};
|
||||
return AttemptTrackSharp(inst, visited_insts);
|
||||
}
|
||||
|
||||
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||
AmdGpu::Buffer& cbuf) {
|
||||
|
||||
|
@ -293,8 +320,8 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
|||
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
||||
SharpLocation sharp;
|
||||
std::tie(sharp, buffer) = TrackSharp<AmdGpu::Buffer>(producer, info);
|
||||
binding = descriptors.Add(BufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
|
||||
|
|
|
@ -242,18 +242,22 @@ bool Instance::CreateDevice() {
|
|||
|
||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||
// with extensions.
|
||||
add_extension(VK_KHR_FORMAT_FEATURE_FLAGS_2_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
|
||||
const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||
ASSERT(add_extension(VK_KHR_FORMAT_FEATURE_FLAGS_2_EXTENSION_NAME));
|
||||
ASSERT(add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME));
|
||||
ASSERT(add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME));
|
||||
ASSERT(add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME));
|
||||
ASSERT(add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME));
|
||||
ASSERT(add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME));
|
||||
ASSERT(add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME) ||
|
||||
driver_id == vk::DriverId::eIntelProprietaryWindows);
|
||||
ASSERT(add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME));
|
||||
|
||||
add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||
// Required
|
||||
ASSERT(add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME));
|
||||
ASSERT(add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME));
|
||||
|
||||
// Optional
|
||||
depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||
dynamic_state_3 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
if (dynamic_state_3) {
|
||||
dynamic_state_3_features =
|
||||
|
@ -422,9 +426,6 @@ bool Instance::CreateDevice() {
|
|||
#endif
|
||||
};
|
||||
|
||||
if (!maintenance4) {
|
||||
device_chain.unlink<vk::PhysicalDeviceMaintenance4FeaturesKHR>();
|
||||
}
|
||||
if (!custom_border_color) {
|
||||
device_chain.unlink<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>();
|
||||
}
|
||||
|
|
|
@ -104,6 +104,11 @@ public:
|
|||
return depth_clip_control;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_depth_range_unrestricted is supported
|
||||
bool IsDepthRangeUnrestrictedSupported() const {
|
||||
return depth_range_unrestricted;
|
||||
}
|
||||
|
||||
/// Returns true when the extendedDynamicState3ColorWriteMask feature of
|
||||
/// VK_EXT_extended_dynamic_state3 is supported.
|
||||
bool IsDynamicColorWriteMaskSupported() const {
|
||||
|
@ -340,6 +345,7 @@ private:
|
|||
bool custom_border_color{};
|
||||
bool fragment_shader_barycentric{};
|
||||
bool depth_clip_control{};
|
||||
bool depth_range_unrestricted{};
|
||||
bool dynamic_state_3{};
|
||||
bool vertex_input_dynamic_state{};
|
||||
bool robustness2{};
|
||||
|
|
|
@ -930,12 +930,17 @@ bool Rasterizer::IsMapped(VAddr addr, u64 size) {
|
|||
// There is no memory, so not mapped.
|
||||
return false;
|
||||
}
|
||||
return mapped_ranges.find(boost::icl::interval<VAddr>::right_open(addr, addr + size)) !=
|
||||
mapped_ranges.end();
|
||||
const auto range = decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
|
||||
std::shared_lock lock{mapped_ranges_mutex};
|
||||
return boost::icl::contains(mapped_ranges, range);
|
||||
}
|
||||
|
||||
void Rasterizer::MapMemory(VAddr addr, u64 size) {
|
||||
mapped_ranges += boost::icl::interval<VAddr>::right_open(addr, addr + size);
|
||||
{
|
||||
std::unique_lock lock{mapped_ranges_mutex};
|
||||
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
}
|
||||
page_manager.OnGpuMap(addr, size);
|
||||
}
|
||||
|
||||
|
@ -943,7 +948,10 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
|||
buffer_cache.InvalidateMemory(addr, size);
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
page_manager.OnGpuUnmap(addr, size);
|
||||
mapped_ranges -= boost::icl::interval<VAddr>::right_open(addr, addr + size);
|
||||
{
|
||||
std::unique_lock lock{mapped_ranges_mutex};
|
||||
mapped_ranges -= decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
}
|
||||
}
|
||||
|
||||
void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) const {
|
||||
|
@ -1010,32 +1018,37 @@ void Rasterizer::UpdateViewportScissorState() const {
|
|||
|
||||
const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f;
|
||||
const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f;
|
||||
|
||||
vk::Viewport viewport = {
|
||||
.minDepth = zoffset - zscale * reduce_z,
|
||||
.maxDepth = zscale + zoffset,
|
||||
};
|
||||
if (!instance.IsDepthRangeUnrestrictedSupported()) {
|
||||
// Unrestricted depth range not supported by device. Make best attempt
|
||||
// by restricting to valid range.
|
||||
viewport.minDepth = std::max(viewport.minDepth, 0.f);
|
||||
viewport.maxDepth = std::min(viewport.maxDepth, 1.f);
|
||||
}
|
||||
if (regs.IsClipDisabled()) {
|
||||
// In case if clipping is disabled we patch the shader to convert vertex position
|
||||
// from screen space coordinates to NDC by defining a render space as full hardware
|
||||
// window range [0..16383, 0..16383] and setting the viewport to its size.
|
||||
viewports.push_back({
|
||||
.x = 0.f,
|
||||
.y = 0.f,
|
||||
.width = float(std::min<u32>(instance.GetMaxViewportWidth(), 16_KB)),
|
||||
.height = float(std::min<u32>(instance.GetMaxViewportHeight(), 16_KB)),
|
||||
.minDepth = zoffset - zscale * reduce_z,
|
||||
.maxDepth = zscale + zoffset,
|
||||
});
|
||||
viewport.x = 0.f;
|
||||
viewport.y = 0.f;
|
||||
viewport.width = float(std::min<u32>(instance.GetMaxViewportWidth(), 16_KB));
|
||||
viewport.height = float(std::min<u32>(instance.GetMaxViewportHeight(), 16_KB));
|
||||
} else {
|
||||
const auto xoffset = vp_ctl.xoffset_enable ? vp.xoffset : 0.f;
|
||||
const auto xscale = vp_ctl.xscale_enable ? vp.xscale : 1.f;
|
||||
const auto yoffset = vp_ctl.yoffset_enable ? vp.yoffset : 0.f;
|
||||
const auto yscale = vp_ctl.yscale_enable ? vp.yscale : 1.f;
|
||||
viewports.push_back({
|
||||
.x = xoffset - xscale,
|
||||
.y = yoffset - yscale,
|
||||
.width = xscale * 2.0f,
|
||||
.height = yscale * 2.0f,
|
||||
.minDepth = zoffset - zscale * reduce_z,
|
||||
.maxDepth = zscale + zoffset,
|
||||
});
|
||||
|
||||
viewport.x = xoffset - xscale;
|
||||
viewport.y = yoffset - yscale;
|
||||
viewport.width = xscale * 2.0f;
|
||||
viewport.height = yscale * 2.0f;
|
||||
}
|
||||
viewports.push_back(viewport);
|
||||
|
||||
auto vp_scsr = scsr;
|
||||
if (regs.mode_control.vport_scissor_enable) {
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <shared_mutex>
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/page_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
|
@ -106,6 +108,7 @@ private:
|
|||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
boost::icl::interval_set<VAddr> mapped_ranges;
|
||||
std::shared_mutex mapped_ranges_mutex;
|
||||
PipelineCache pipeline_cache;
|
||||
|
||||
boost::container::static_vector<
|
||||
|
|
Loading…
Add table
Reference in a new issue