diff --git a/AK/SIMDExtras.h b/AK/SIMDExtras.h index 3147f2fb3a8..081ab2cf786 100644 --- a/AK/SIMDExtras.h +++ b/AK/SIMDExtras.h @@ -220,13 +220,14 @@ ALWAYS_INLINE static T shuffle_or_0_impl(T a, Control control, IndexSequence= 0) | (control < N); return __builtin_shuffle(a, control & mask) & ~mask; } // 1. Set all out of bounds values to ~0 // Note: This is done so that the optimization mentioned down below works // Note: Vector compares result in bitmasks, aka all 1s or all 0s per element - control |= ~((control > 0) | (control < N)); + control |= ~((control >= 0) & (control < N)); // 2. Selectively set out of bounds values to 0 // Note: Clang successfully optimizes this to a few instructions on x86-ssse3, GCC does not // Vector Optimizations/Instruction-Selection on ArmV8 seem to not be as powerful as of Clang18 diff --git a/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp b/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp index bf6cf86d708..28dd42b8216 100644 --- a/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp +++ b/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp @@ -1289,12 +1289,17 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi case Instructions::f64x2_splat.value(): return pop_and_push_m_splat<64, NativeFloatingType>(configuration, instruction); case Instructions::i8x16_shuffle.value(): { - auto indices = pop_vector(configuration); - TRAP_IF_NOT(indices.has_value()); - auto vector = peek_vector(configuration); - TRAP_IF_NOT(vector.has_value()); - auto result = shuffle_vector(vector.value(), indices.value()); - configuration.stack().peek() = Value(result); + auto& arg = instruction.arguments().get(); + auto b = *pop_vector(configuration); + auto a = *pop_vector(configuration); + using VectorType = Native128ByteVectorOf; + VectorType result; + for (size_t i = 0; i < 16; ++i) + if (arg.lanes[i] < 16) + result[i] = a[arg.lanes[i]]; + else + result[i] = b[arg.lanes[i] - 16]; + configuration.stack().push(Value(bit_cast(result))); return; } case Instructions::v128_store.value(): diff --git a/Userland/Libraries/LibWasm/AbstractMachine/Operators.h b/Userland/Libraries/LibWasm/AbstractMachine/Operators.h index e421cf8af11..ad9843e03a9 100644 --- a/Userland/Libraries/LibWasm/AbstractMachine/Operators.h +++ b/Userland/Libraries/LibWasm/AbstractMachine/Operators.h @@ -239,8 +239,8 @@ struct VectorSwizzle { auto operator()(u128 c1, u128 c2) const { // https://webassembly.github.io/spec/core/bikeshed/#-mathsfi8x16hrefsyntax-instr-vecmathsfswizzle%E2%91%A0 - auto i = bit_cast>(c2); - auto j = bit_cast>(c1); + auto i = bit_cast>(c1); + auto j = bit_cast>(c2); auto result = shuffle_or_0(i, j); return bit_cast(result); }