Merge branch 'main' into http-part2

This commit is contained in:
georgemoralis 2025-04-03 16:33:17 +03:00 committed by GitHub
commit 04e8497a89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 168 additions and 34 deletions

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 8b9b12c2089505ac8b10fa56bf56b3ed49d9d7b0
Subproject commit 427a42c9ed99b38204d9107bc3dc14e92458acf1

View file

@ -75,6 +75,28 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
return ctx.OpFMin(ctx.F64[1], a, b);
}
Id EmitFPMinTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpFMin3AMD(ctx.F32[1], a, b, c);
}
return ctx.OpFMin(ctx.F32[1], a, ctx.OpFMin(ctx.F32[1], b, c));
}
Id EmitFPMaxTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpFMax3AMD(ctx.F32[1], a, b, c);
}
return ctx.OpFMax(ctx.F32[1], a, ctx.OpFMax(ctx.F32[1], b, c));
}
Id EmitFPMedTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpFMid3AMD(ctx.F32[1], a, b, c);
}
const Id mmx{ctx.OpFMin(ctx.F32[1], ctx.OpFMax(ctx.F32[1], a, b), c)};
return ctx.OpFMax(ctx.F32[1], ctx.OpFMin(ctx.F32[1], a, b), mmx);
}
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
}

View file

@ -247,6 +247,9 @@ Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
Id EmitFPMinTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitFPMaxTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitFPMedTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
@ -372,6 +375,12 @@ Id EmitSMin32(EmitContext& ctx, Id a, Id b);
Id EmitUMin32(EmitContext& ctx, Id a, Id b);
Id EmitSMax32(EmitContext& ctx, Id a, Id b);
Id EmitUMax32(EmitContext& ctx, Id a, Id b);
Id EmitSMinTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitUMinTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitSMaxTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitUMaxTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitSMedTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitUMedTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs);

View file

@ -256,6 +256,50 @@ Id EmitUMax32(EmitContext& ctx, Id a, Id b) {
return ctx.OpUMax(ctx.U32[1], a, b);
}
Id EmitSMinTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpSMin3AMD(ctx.U32[1], a, b, c);
}
return ctx.OpSMin(ctx.U32[1], a, ctx.OpSMin(ctx.U32[1], b, c));
}
Id EmitUMinTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpUMin3AMD(ctx.U32[1], a, b, c);
}
return ctx.OpUMin(ctx.U32[1], a, ctx.OpUMin(ctx.U32[1], b, c));
}
Id EmitSMaxTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpSMax3AMD(ctx.U32[1], a, b, c);
}
return ctx.OpSMax(ctx.U32[1], a, ctx.OpSMax(ctx.U32[1], b, c));
}
Id EmitUMaxTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpUMax3AMD(ctx.U32[1], a, b, c);
}
return ctx.OpUMax(ctx.U32[1], a, ctx.OpUMax(ctx.U32[1], b, c));
}
Id EmitSMedTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpSMid3AMD(ctx.U32[1], a, b, c);
}
const Id mmx{ctx.OpSMin(ctx.U32[1], ctx.OpSMax(ctx.U32[1], a, b), c)};
return ctx.OpSMax(ctx.U32[1], ctx.OpSMin(ctx.U32[1], a, b), mmx);
}
Id EmitUMedTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpUMid3AMD(ctx.U32[1], a, b, c);
}
const Id mmx{ctx.OpUMin(ctx.U32[1], ctx.OpUMax(ctx.U32[1], a, b), c)};
return ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], a, b), mmx);
}
Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
Id result{};
if (ctx.profile.has_broken_spirv_clamp) {

View file

@ -238,13 +238,11 @@ public:
void V_FMA_F32(const GcnInst& inst);
void V_FMA_F64(const GcnInst& inst);
void V_MIN3_F32(const GcnInst& inst);
void V_MIN3_I32(const GcnInst& inst);
void V_MIN3_U32(const GcnInst& inst);
void V_MIN3_U32(bool is_signed, const GcnInst& inst);
void V_MAX3_F32(const GcnInst& inst);
void V_MAX3_U32(bool is_signed, const GcnInst& inst);
void V_MED3_F32(const GcnInst& inst);
void V_MED3_I32(const GcnInst& inst);
void V_MED3_U32(const GcnInst& inst);
void V_MED3_U32(bool is_signed, const GcnInst& inst);
void V_SAD(const GcnInst& inst);
void V_SAD_U32(const GcnInst& inst);
void V_CVT_PK_U16_U32(const GcnInst& inst);

View file

@ -359,9 +359,9 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
case Opcode::V_MIN3_F32:
return V_MIN3_F32(inst);
case Opcode::V_MIN3_I32:
return V_MIN3_I32(inst);
return V_MIN3_U32(true, inst);
case Opcode::V_MIN3_U32:
return V_MIN3_U32(inst);
return V_MIN3_U32(false, inst);
case Opcode::V_MAX3_F32:
return V_MAX3_F32(inst);
case Opcode::V_MAX3_I32:
@ -371,9 +371,9 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
case Opcode::V_MED3_F32:
return V_MED3_F32(inst);
case Opcode::V_MED3_I32:
return V_MED3_I32(inst);
return V_MED3_U32(true, inst);
case Opcode::V_MED3_U32:
return V_MED3_U32(inst);
return V_MED3_U32(false, inst);
case Opcode::V_SAD_U32:
return V_SAD_U32(inst);
case Opcode::V_CVT_PK_U16_U32:
@ -1166,59 +1166,42 @@ void Translator::V_MIN3_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
SetDst(inst.dst[0], ir.FPMinTri(src0, src1, src2));
}
void Translator::V_MIN3_I32(const GcnInst& inst) {
void Translator::V_MIN3_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.SMin(src0, ir.SMin(src1, src2)));
}
void Translator::V_MIN3_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.UMin(src0, ir.UMin(src1, src2)));
SetDst(inst.dst[0], ir.IMinTri(src0, src1, src2, is_signed));
}
void Translator::V_MAX3_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
SetDst(inst.dst[0], ir.FPMaxTri(src0, src1, src2));
}
void Translator::V_MAX3_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.IMax(src0, ir.IMax(src1, src2, is_signed), is_signed));
SetDst(inst.dst[0], ir.IMaxTri(src0, src1, src2, is_signed));
}
void Translator::V_MED3_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
SetDst(inst.dst[0], ir.FPMedTri(src0, src1, src2));
}
void Translator::V_MED3_I32(const GcnInst& inst) {
void Translator::V_MED3_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
const IR::U32 mmx = ir.SMin(ir.SMax(src0, src1), src2);
SetDst(inst.dst[0], ir.SMax(ir.SMin(src0, src1), mmx));
}
void Translator::V_MED3_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
const IR::U32 mmx = ir.UMin(ir.UMax(src0, src1), src2);
SetDst(inst.dst[0], ir.UMax(ir.UMin(src0, src1), mmx));
SetDst(inst.dst[0], ir.IMedTri(src0, src1, src2, is_signed));
}
void Translator::V_SAD(const GcnInst& inst) {

View file

@ -1336,6 +1336,18 @@ F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy) {
}
}
F32F64 IREmitter::FPMinTri(const F32F64& a, const F32F64& b, const F32F64& c) {
return Inst<F32>(Opcode::FPMinTri32, a, b, c);
}
F32F64 IREmitter::FPMaxTri(const F32F64& a, const F32F64& b, const F32F64& c) {
return Inst<F32>(Opcode::FPMaxTri32, a, b, c);
}
F32F64 IREmitter::FPMedTri(const F32F64& a, const F32F64& b, const F32F64& c) {
return Inst<F32>(Opcode::FPMedTri32, a, b, c);
}
U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
@ -1567,6 +1579,42 @@ U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) {
return is_signed ? SMax(a, b) : UMax(a, b);
}
U32 IREmitter::SMinTri(const U32& a, const U32& b, const U32& c) {
return Inst<U32>(Opcode::SMinTri32, a, b, c);
}
U32 IREmitter::UMinTri(const U32& a, const U32& b, const U32& c) {
return Inst<U32>(Opcode::UMinTri32, a, b, c);
}
U32 IREmitter::IMinTri(const U32& a, const U32& b, const U32& c, bool is_signed) {
return is_signed ? SMinTri(a, b, c) : UMinTri(a, b, c);
}
U32 IREmitter::SMaxTri(const U32& a, const U32& b, const U32& c) {
return Inst<U32>(Opcode::SMaxTri32, a, b, c);
}
U32 IREmitter::UMaxTri(const U32& a, const U32& b, const U32& c) {
return Inst<U32>(Opcode::UMaxTri32, a, b, c);
}
U32 IREmitter::IMaxTri(const U32& a, const U32& b, const U32& c, bool is_signed) {
return is_signed ? SMaxTri(a, b, c) : UMaxTri(a, b, c);
}
U32 IREmitter::SMedTri(const U32& a, const U32& b, const U32& c) {
return Inst<U32>(Opcode::SMedTri32, a, b, c);
}
U32 IREmitter::UMedTri(const U32& a, const U32& b, const U32& c) {
return Inst<U32>(Opcode::UMedTri32, a, b, c);
}
U32 IREmitter::IMedTri(const U32& a, const U32& b, const U32& c, bool is_signed) {
return is_signed ? SMedTri(a, b, c) : UMedTri(a, b, c);
}
U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) {
return Inst<U32>(Opcode::SClamp32, value, min, max);
}

View file

@ -233,6 +233,9 @@ public:
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
[[nodiscard]] F32F64 FPMinTri(const F32F64& a, const F32F64& b, const F32F64& c);
[[nodiscard]] F32F64 FPMaxTri(const F32F64& a, const F32F64& b, const F32F64& c);
[[nodiscard]] F32F64 FPMedTri(const F32F64& a, const F32F64& b, const F32F64& c);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
@ -266,6 +269,15 @@ public:
[[nodiscard]] U32 SMax(const U32& a, const U32& b);
[[nodiscard]] U32 UMax(const U32& a, const U32& b);
[[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
[[nodiscard]] U32 SMinTri(const U32& a, const U32& b, const U32& c);
[[nodiscard]] U32 UMinTri(const U32& a, const U32& b, const U32& c);
[[nodiscard]] U32 IMinTri(const U32& a, const U32& b, const U32& c, bool is_signed);
[[nodiscard]] U32 SMaxTri(const U32& a, const U32& b, const U32& c);
[[nodiscard]] U32 UMaxTri(const U32& a, const U32& b, const U32& c);
[[nodiscard]] U32 IMaxTri(const U32& a, const U32& b, const U32& c, bool is_signed);
[[nodiscard]] U32 SMedTri(const U32& a, const U32& b, const U32& c);
[[nodiscard]] U32 UMedTri(const U32& a, const U32& b, const U32& c);
[[nodiscard]] U32 IMedTri(const U32& a, const U32& b, const U32& c, bool is_signed);
[[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
[[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);

View file

@ -241,6 +241,9 @@ OPCODE(FPMax32, F32, F32,
OPCODE(FPMax64, F64, F64, F64, )
OPCODE(FPMin32, F32, F32, F32, U1, )
OPCODE(FPMin64, F64, F64, F64, )
OPCODE(FPMinTri32, F32, F32, F32, F32, )
OPCODE(FPMaxTri32, F32, F32, F32, F32, )
OPCODE(FPMedTri32, F32, F32, F32, F32, )
OPCODE(FPMul32, F32, F32, F32, )
OPCODE(FPMul64, F64, F64, F64, )
OPCODE(FPDiv32, F32, F32, F32, )
@ -350,6 +353,12 @@ OPCODE(SMin32, U32, U32,
OPCODE(UMin32, U32, U32, U32, )
OPCODE(SMax32, U32, U32, U32, )
OPCODE(UMax32, U32, U32, U32, )
OPCODE(SMinTri32, U32, U32, U32, U32, )
OPCODE(UMinTri32, U32, U32, U32, U32, )
OPCODE(SMaxTri32, U32, U32, U32, U32, )
OPCODE(UMaxTri32, U32, U32, U32, U32, )
OPCODE(SMedTri32, U32, U32, U32, U32, )
OPCODE(UMedTri32, U32, U32, U32, U32, )
OPCODE(SClamp32, U32, U32, U32, U32, )
OPCODE(UClamp32, U32, U32, U32, U32, )
OPCODE(SLessThan32, U1, U32, U32, )

View file

@ -26,6 +26,7 @@ struct Profile {
bool support_legacy_vertex_attributes{};
bool supports_image_load_store_lod{};
bool supports_native_cube_calc{};
bool supports_trinary_minmax{};
bool supports_robust_buffer_access{};
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};

View file

@ -276,6 +276,7 @@ bool Instance::CreateDevice() {
shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME);
const bool calibrated_timestamps =
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;

View file

@ -145,6 +145,11 @@ public:
return amd_gcn_shader;
}
/// Returns true when VK_AMD_shader_trinary_minmax is supported.
bool IsAmdShaderTrinaryMinMaxSupported() const {
return amd_shader_trinary_minmax;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
@ -333,6 +338,7 @@ private:
bool shader_stencil_export{};
bool image_load_store_lod{};
bool amd_gcn_shader{};
bool amd_shader_trinary_minmax{};
bool portability_subset{};
};

View file

@ -201,6 +201,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
.supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(),
.supports_robust_buffer_access = instance_.IsRobustBufferAccess2Supported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,