mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-09 17:49:40 +00:00
LibWasm: Fix SIMD shuffle and swizzle
`swizzle` had the wrong operands, and the vector masking boolean logic was incorrect in the internal `shuffle_or_0` implementation. `shuffle` was previously implemented as a dynamic swizzle, when it uses an immediate operand for lane indices in the spec.
This commit is contained in:
parent
d841742c35
commit
9cc3e7d32d
Notes:
github-actions[bot]
2024-07-24 21:24:08 +00:00
Author: https://github.com/dzfrias
Commit: 9cc3e7d32d
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/794
Reviewed-by: https://github.com/alimpfard
3 changed files with 15 additions and 9 deletions
|
@ -220,13 +220,14 @@ ALWAYS_INLINE static T shuffle_or_0_impl(T a, Control control, IndexSequence<Idx
|
||||||
if constexpr (__has_builtin(__builtin_shuffle)) {
|
if constexpr (__has_builtin(__builtin_shuffle)) {
|
||||||
// GCC does a very bad job at optimizing the masking, while not recognizing the shuffle idiom
|
// GCC does a very bad job at optimizing the masking, while not recognizing the shuffle idiom
|
||||||
// So we jinx its __builtin_shuffle to work with out of bounds indices
|
// So we jinx its __builtin_shuffle to work with out of bounds indices
|
||||||
|
// TODO: verify that this masking logic is correct (for machines with __builtin_shuffle)
|
||||||
auto mask = (control >= 0) | (control < N);
|
auto mask = (control >= 0) | (control < N);
|
||||||
return __builtin_shuffle(a, control & mask) & ~mask;
|
return __builtin_shuffle(a, control & mask) & ~mask;
|
||||||
}
|
}
|
||||||
// 1. Set all out of bounds values to ~0
|
// 1. Set all out of bounds values to ~0
|
||||||
// Note: This is done so that the optimization mentioned down below works
|
// Note: This is done so that the optimization mentioned down below works
|
||||||
// Note: Vector compares result in bitmasks, aka all 1s or all 0s per element
|
// Note: Vector compares result in bitmasks, aka all 1s or all 0s per element
|
||||||
control |= ~((control > 0) | (control < N));
|
control |= ~((control >= 0) & (control < N));
|
||||||
// 2. Selectively set out of bounds values to 0
|
// 2. Selectively set out of bounds values to 0
|
||||||
// Note: Clang successfully optimizes this to a few instructions on x86-ssse3, GCC does not
|
// Note: Clang successfully optimizes this to a few instructions on x86-ssse3, GCC does not
|
||||||
// Vector Optimizations/Instruction-Selection on ArmV8 seem to not be as powerful as of Clang18
|
// Vector Optimizations/Instruction-Selection on ArmV8 seem to not be as powerful as of Clang18
|
||||||
|
|
|
@ -1289,12 +1289,17 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi
|
||||||
case Instructions::f64x2_splat.value():
|
case Instructions::f64x2_splat.value():
|
||||||
return pop_and_push_m_splat<64, NativeFloatingType>(configuration, instruction);
|
return pop_and_push_m_splat<64, NativeFloatingType>(configuration, instruction);
|
||||||
case Instructions::i8x16_shuffle.value(): {
|
case Instructions::i8x16_shuffle.value(): {
|
||||||
auto indices = pop_vector<u8, MakeSigned>(configuration);
|
auto& arg = instruction.arguments().get<Instruction::ShuffleArgument>();
|
||||||
TRAP_IF_NOT(indices.has_value());
|
auto b = *pop_vector<u8, MakeUnsigned>(configuration);
|
||||||
auto vector = peek_vector<u8, MakeSigned>(configuration);
|
auto a = *pop_vector<u8, MakeUnsigned>(configuration);
|
||||||
TRAP_IF_NOT(vector.has_value());
|
using VectorType = Native128ByteVectorOf<u8, MakeUnsigned>;
|
||||||
auto result = shuffle_vector(vector.value(), indices.value());
|
VectorType result;
|
||||||
configuration.stack().peek() = Value(result);
|
for (size_t i = 0; i < 16; ++i)
|
||||||
|
if (arg.lanes[i] < 16)
|
||||||
|
result[i] = a[arg.lanes[i]];
|
||||||
|
else
|
||||||
|
result[i] = b[arg.lanes[i] - 16];
|
||||||
|
configuration.stack().push(Value(bit_cast<u128>(result)));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case Instructions::v128_store.value():
|
case Instructions::v128_store.value():
|
||||||
|
|
|
@ -239,8 +239,8 @@ struct VectorSwizzle {
|
||||||
auto operator()(u128 c1, u128 c2) const
|
auto operator()(u128 c1, u128 c2) const
|
||||||
{
|
{
|
||||||
// https://webassembly.github.io/spec/core/bikeshed/#-mathsfi8x16hrefsyntax-instr-vecmathsfswizzle%E2%91%A0
|
// https://webassembly.github.io/spec/core/bikeshed/#-mathsfi8x16hrefsyntax-instr-vecmathsfswizzle%E2%91%A0
|
||||||
auto i = bit_cast<Native128ByteVectorOf<i8, MakeSigned>>(c2);
|
auto i = bit_cast<Native128ByteVectorOf<i8, MakeSigned>>(c1);
|
||||||
auto j = bit_cast<Native128ByteVectorOf<i8, MakeSigned>>(c1);
|
auto j = bit_cast<Native128ByteVectorOf<i8, MakeSigned>>(c2);
|
||||||
auto result = shuffle_or_0(i, j);
|
auto result = shuffle_or_0(i, j);
|
||||||
return bit_cast<u128>(result);
|
return bit_cast<u128>(result);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue