diff --git a/AK/SIMDExtras.h b/AK/SIMDExtras.h index 081ab2cf786..41ce029ce44 100644 --- a/AK/SIMDExtras.h +++ b/AK/SIMDExtras.h @@ -218,11 +218,10 @@ ALWAYS_INLINE static T shuffle_or_0_impl(T a, Control control, IndexSequence; if constexpr (__has_builtin(__builtin_shuffle)) { - // GCC does a very bad job at optimizing the masking, while not recognizing the shuffle idiom - // So we jinx its __builtin_shuffle to work with out of bounds indices - // TODO: verify that this masking logic is correct (for machines with __builtin_shuffle) - auto mask = (control >= 0) | (control < N); - return __builtin_shuffle(a, control & mask) & ~mask; + auto vector = __builtin_shuffle(a, control); + for (size_t i = 0; i < N; ++i) + vector[i] = control[i] < 0 || control[i] >= N ? 0 : vector[i]; + return vector; } // 1. Set all out of bounds values to ~0 // Note: This is done so that the optimization mentioned down below works