Misc Cleanups (#2579)

-dont do trivial phi removal during SRT pass, that's now done in
ssa_rewrite
-remove unused variable when walking tess attributes
-fix some tess comments
This commit is contained in:
baggins183 2025-03-02 11:52:32 -08:00 committed by GitHub
parent d59536a71c
commit 7a4244ac8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 24 additions and 57 deletions

View file

@ -75,10 +75,12 @@ namespace Shader::Optimization {
*
* output_patch_stride and output_cp_stride are usually compile time constants in the gcn
*
* Hull shaders can probably also read output control points corresponding to other threads, like
* shared memory (but we havent seen this yet).
* ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens.
* They should also be able to read PatchConst values,
* Hull shaders can also read output control points corresponding to other threads.
* In HLSL style, this should only be possible in the Patch Constant function.
* TODO we may need to insert additional barriers if sync is free/more permissive
* on AMD LDS HW
* They should also be able to read output PatchConst values,
* although not sure if this happens in practice.
*
* To determine which type of attribute (input, output, patchconst) we the check the users of
@ -101,22 +103,22 @@ namespace Shader::Optimization {
* layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES];
*
* Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#.
* We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes.
* For TES, the number of attributes comes from hs_cp_stride / 16.
* We take ALIGN_UP(ls_stride, 16) / 16 to get the number of vec4 attributes.
* For TES, NUM_INPUT_ATTRIBUTES is ALIGN_UP(hs_cp_stride, 16) / 16.
* The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input
* patch (for Hull) or the hs output patch (for Domain).
*
* For input reads in TCS or TES, we emit SPIR-V like:
* float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2];
* float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr % ls_stride) >> 2];
*
* For output writes, we assume the control point index is InvocationId, since high level languages
* impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks
* like this:
* layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES];
* out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value;
* out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr % hs_cp_stride) >> 2] = value;
*
* NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs
* variable.
* NUM_OUTPUT_ATTRIBUTES is derived by ALIGN_UP(hs_cp_stride, 16) / 16, so it matches
* NUM_INPUT_ATTRIBUTES of the TES.
*
* Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole
* which contains the attributes from many patches. On the other hand, higher level shading
@ -235,12 +237,11 @@ private:
case IR::Opcode::Phi: {
struct PhiCounter {
u16 seq_num;
u8 unique_edge;
u8 counter;
u16 unique_edge;
};
PhiCounter count = inst->Flags<PhiCounter>();
ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand);
ASSERT_MSG(count.seq_num == 0 || count.unique_edge == use.operand);
// the point of seq_num is to tell us if we've already traversed this
// phi on the current walk. Alternatively we could keep a set of phi's
// seen on the current walk. This is to handle phi cycles
@ -249,13 +250,11 @@ private:
count.seq_num = seq_num;
// Mark the phi as having been traversed originally through this edge
count.unique_edge = use.operand;
count.counter = inc;
} else if (count.seq_num < seq_num) {
count.seq_num = seq_num;
// For now, assume we are visiting this phi via the same edge
// as on other walks. If not, some dataflow analysis might be necessary
ASSERT(count.unique_edge == use.operand);
count.counter += inc;
} else {
// count.seq_num == seq_num
// there's a cycle, and we've already been here on this walk

View file

@ -52,24 +52,16 @@ private:
switch (inst->GetOpcode()) {
case IR::Opcode::Phi: {
// hack to get to parity with main
// Need to fix ssa_rewrite pass to remove certain phis
std::optional<IR::Value> source = TryRemoveTrivialPhi(inst);
if (!source) {
const auto pred = [](IR::Inst* inst) -> std::optional<IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
inst->GetOpcode() == IR::Opcode::ReadConst) {
return inst;
}
return std::nullopt;
};
source = IR::BreadthFirstSearch(inst, pred).transform([](auto inst) {
return IR::Value{inst};
});
ASSERT(source);
}
vn = GetValueNumber(source.value());
const auto pred = [](IR::Inst* inst) -> std::optional<IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
inst->GetOpcode() == IR::Opcode::ReadConst) {
return inst;
}
return std::nullopt;
};
IR::Inst* source = IR::BreadthFirstSearch(inst, pred).value();
vn = GetValueNumber(source);
value_numbers[IR::Value(inst)] = vn;
break;
}
@ -117,30 +109,6 @@ private:
return iv;
}
// Temp workaround for something like this:
// [0000555558a5baf8] %297 = Phi [ %24, {Block $1} ], [ %297, {Block $5} ] (uses: 4)
// [0000555558a4e038] %305 = CompositeConstructU32x2 %297, %296 (uses: 4)
// [0000555558a4e0a8] %306 = ReadConst %305, #0 (uses: 2)
// Should probably be fixed in ssa_rewrite
std::optional<IR::Value> TryRemoveTrivialPhi(IR::Inst* phi) {
IR::Value single_source{};
for (auto i = 0; i < phi->NumArgs(); i++) {
IR::Value v = phi->Arg(i).Resolve();
if (v == IR::Value(phi)) {
continue;
}
if (!single_source.IsEmpty() && single_source != v) {
return std::nullopt;
}
single_source = v;
}
ASSERT(!single_source.IsEmpty());
phi->ReplaceUsesWith(single_source);
return single_source;
}
struct HashInstVector {
size_t operator()(const InstVector& iv) const {
u32 h = 0;