LibCrypto: Improve efficiency of UnsignedBigInteger::shift_left

Before:
- a separate Word element allocation of the underlying Vector<Word> was
necessary for every new word in a multi-word shift
- two additional temporary UnsignedBigInteger buffers were allocated
and passed through, including in downstream calls (e.g. Multiplication)
- an additional allocation and word shift for the carry
- FIXME note seems to point to some of these issues

After:
- main change is in LibCrypto/BigInt/Algorithms/BitwiseOperations.cpp
- one single allocation per call, using shift_left_by_n_words
- only the input "number" and "output" need to be allocated by the
  caller
- downstream calls are adapted not to allocate or pass temporary
  buffers
- noticeable performance improvement when running TestBigInteger:
  0.41-0.42s (before) to 0.28-0.29s (after) Intel Core i9 laptop

Bonus: remove unused variables from UnsignedBigInteger::divided_by
- These were likely cut-and-paste artifacts from
  UnsignedBigInteger::multiplied_by; not caught by "unused-varible".

NOTE: making this change in a separate commit than shift_right, even if
it touches the same file BitwiseOperations.cpp since:
- it is a "bonus" addition: not necessary for fixing the shift_right
  bug, but logically unrelated to the shift_right code
- it brings a chain of downstream interface modifications (7 files),
  unrelated to shift_right
This commit is contained in:
Manuel Zahariev 2025-01-08 08:51:40 -08:00 committed by Ali Mohammad Pur
parent 05cfbdd6fb
commit d2ea77c099
Notes: github-actions[bot] 2025-03-23 18:34:27 +00:00
9 changed files with 60 additions and 74 deletions

View file

@ -166,58 +166,43 @@ FLATTEN ErrorOr<void> UnsignedBigIntegerAlgorithms::bitwise_not_fill_to_one_base
FLATTEN void UnsignedBigIntegerAlgorithms::shift_left_without_allocation(
UnsignedBigInteger const& number,
size_t num_bits,
UnsignedBigInteger& temp_result,
UnsignedBigInteger& temp_plus,
UnsignedBigInteger& output)
{
MUST(try_shift_left_without_allocation(number, num_bits, temp_result, temp_plus, output));
MUST(try_shift_left_without_allocation(number, num_bits, output));
}
/**
* Complexity : O(N + num_bits % 8) where N is the number of words in the number
* Shift method :
* Start by shifting by whole words in num_bits (by putting missing words at the start),
* then shift the number's words two by two by the remaining amount of bits.
* Complexity : O(N) where N is the number of words in the number
*/
FLATTEN ErrorOr<void> UnsignedBigIntegerAlgorithms::try_shift_left_without_allocation(
UnsignedBigInteger const& number,
size_t num_bits,
UnsignedBigInteger& temp_result,
UnsignedBigInteger& temp_plus,
UnsignedBigInteger& output)
{
// We can only do shift operations on individual words
// where the shift amount is <= size of word (32).
// But we do know how to shift by a multiple of word size (e.g 64=32*2)
// So we first shift the result by how many whole words fit in 'num_bits'
TRY(try_shift_left_by_n_words(number, num_bits / UnsignedBigInteger::BITS_IN_WORD, temp_result));
size_t const bit_shift = num_bits % UnsignedBigInteger::BITS_IN_WORD;
size_t const bit_shift_complement = UnsignedBigInteger::BITS_IN_WORD - bit_shift;
output.set_to(temp_result);
size_t const zero_based_index_of_highest_set_bit_in_hiword = (number.one_based_index_of_highest_set_bit() - 1) % UnsignedBigInteger::BITS_IN_WORD;
// And now we shift by the leftover amount of bits
num_bits %= UnsignedBigInteger::BITS_IN_WORD;
// true if the high word is a result of the bit_shift
bool const hiword_shift = (bit_shift + zero_based_index_of_highest_set_bit_in_hiword) >= UnsignedBigInteger::BITS_IN_WORD;
size_t const word_shift = num_bits / UnsignedBigInteger::BITS_IN_WORD;
if (num_bits == 0) {
TRY(try_shift_left_by_n_words(number, word_shift + (hiword_shift ? 1 : 0), output));
if (bit_shift == 0) // shifting left by an exact number of words)
return {};
UnsignedBigInteger::Word carry = 0;
for (size_t i = 0; i < number.length(); ++i) {
size_t const output_index = i + word_shift;
output.m_words[output_index] = (number.m_words.at(i) << bit_shift) | carry;
carry = (number.m_words.at(i) >> bit_shift_complement);
}
for (size_t i = 0; i < temp_result.length(); ++i) {
u32 current_word_of_temp_result = shift_left_get_one_word(temp_result, num_bits, i);
output.m_words[i] = current_word_of_temp_result;
}
// Shifting the last word can produce a carry
u32 carry_word = shift_left_get_one_word(temp_result, num_bits, temp_result.length());
if (carry_word != 0) {
// output += (carry_word << temp_result.length())
// FIXME : Using temp_plus this way to transform carry_word into a bigint is not
// efficient nor pretty. Maybe we should have an "add_with_shift" method ?
temp_plus.set_to_0();
temp_plus.m_words.append(carry_word);
TRY(try_shift_left_by_n_words(temp_plus, temp_result.length(), temp_result));
add_into_accumulator_without_allocation(output, temp_result);
}
if (hiword_shift)
output.m_words[output.length() - 1] = carry;
return {};
}

View file

@ -46,8 +46,6 @@ void UnsignedBigIntegerAlgorithms::extended_GCD_without_allocation(
UnsignedBigInteger& temp_quotient,
UnsignedBigInteger& temp_1,
UnsignedBigInteger& temp_2,
UnsignedBigInteger& temp_shift_result,
UnsignedBigInteger& temp_shift_plus,
UnsignedBigInteger& temp_shift,
UnsignedBigInteger& temp_r,
UnsignedBigInteger& temp_s,
@ -66,7 +64,7 @@ void UnsignedBigIntegerAlgorithms::extended_GCD_without_allocation(
divide_without_allocation(gcd, temp_r, temp_quotient, temp_1);
temp_2.set_to(temp_r);
multiply_without_allocation(temp_quotient, temp_r, temp_shift_result, temp_shift_plus, temp_shift, temp_1);
multiply_without_allocation(temp_quotient, temp_r, temp_shift, temp_1);
while (gcd < temp_1) {
add_into_accumulator_without_allocation(gcd, b);
}
@ -75,7 +73,7 @@ void UnsignedBigIntegerAlgorithms::extended_GCD_without_allocation(
// (old_s, s) := (s, old_s quotient × s)
temp_2.set_to(temp_s);
multiply_without_allocation(temp_quotient, temp_s, temp_shift_result, temp_shift_plus, temp_shift, temp_1);
multiply_without_allocation(temp_quotient, temp_s, temp_shift, temp_1);
while (x < temp_1) {
add_into_accumulator_without_allocation(x, b);
}
@ -84,7 +82,7 @@ void UnsignedBigIntegerAlgorithms::extended_GCD_without_allocation(
// (old_t, t) := (t, old_t quotient × t)
temp_2.set_to(temp_t);
multiply_without_allocation(temp_quotient, temp_t, temp_shift_result, temp_shift_plus, temp_shift, temp_1);
multiply_without_allocation(temp_quotient, temp_t, temp_shift, temp_1);
while (y < temp_1) {
add_into_accumulator_without_allocation(y, b);
}

View file

@ -19,14 +19,12 @@ void UnsignedBigIntegerAlgorithms::modular_inverse_without_allocation(
UnsignedBigInteger& temp_quotient,
UnsignedBigInteger& temp_1,
UnsignedBigInteger& temp_2,
UnsignedBigInteger& temp_shift_result,
UnsignedBigInteger& temp_shift_plus,
UnsignedBigInteger& temp_shift,
UnsignedBigInteger& temp_r,
UnsignedBigInteger& temp_s,
UnsignedBigInteger& temp_t)
{
extended_GCD_without_allocation(a, b, result, temp_y, temp_gcd, temp_quotient, temp_1, temp_2, temp_shift_result, temp_shift_plus, temp_shift, temp_r, temp_s, temp_t);
extended_GCD_without_allocation(a, b, result, temp_y, temp_gcd, temp_quotient, temp_1, temp_2, temp_shift, temp_r, temp_s, temp_t);
divide_without_allocation(result, b, temp_quotient, temp_1);
add_into_accumulator_without_allocation(temp_1, b);

View file

@ -14,8 +14,6 @@ void UnsignedBigIntegerAlgorithms::destructive_modular_power_without_allocation(
UnsignedBigInteger& base,
UnsignedBigInteger const& m,
UnsignedBigInteger& temp_1,
UnsignedBigInteger& temp_2,
UnsignedBigInteger& temp_3,
UnsignedBigInteger& temp_multiply,
UnsignedBigInteger& temp_quotient,
UnsignedBigInteger& temp_remainder,
@ -25,7 +23,7 @@ void UnsignedBigIntegerAlgorithms::destructive_modular_power_without_allocation(
while (!(ep < 1)) {
if (ep.words()[0] % 2 == 1) {
// exp = (exp * base) % m;
multiply_without_allocation(exp, base, temp_1, temp_2, temp_3, temp_multiply);
multiply_without_allocation(exp, base, temp_1, temp_multiply);
divide_without_allocation(temp_multiply, m, temp_quotient, temp_remainder);
exp.set_to(temp_remainder);
}
@ -34,7 +32,7 @@ void UnsignedBigIntegerAlgorithms::destructive_modular_power_without_allocation(
ep.set_to(ep.shift_right(1));
// base = (base * base) % m;
multiply_without_allocation(base, base, temp_1, temp_2, temp_3, temp_multiply);
multiply_without_allocation(base, base, temp_1, temp_multiply);
divide_without_allocation(temp_multiply, m, temp_quotient, temp_remainder);
base.set_to(temp_remainder);

View file

@ -20,8 +20,6 @@ namespace Crypto {
FLATTEN void UnsignedBigIntegerAlgorithms::multiply_without_allocation(
UnsignedBigInteger const& left,
UnsignedBigInteger const& right,
UnsignedBigInteger& temp_shift_result,
UnsignedBigInteger& temp_shift_plus,
UnsignedBigInteger& temp_shift,
UnsignedBigInteger& output)
{
@ -37,7 +35,7 @@ FLATTEN void UnsignedBigIntegerAlgorithms::multiply_without_allocation(
size_t shift_amount = word_index * UnsignedBigInteger::BITS_IN_WORD + bit_index;
// output += (right << shift_amount);
shift_left_without_allocation(right, shift_amount, temp_shift_result, temp_shift_plus, temp_shift);
shift_left_without_allocation(right, shift_amount, temp_shift);
add_into_accumulator_without_allocation(output, temp_shift);
}
}

View file

@ -20,19 +20,20 @@ public:
static void bitwise_or_without_allocation(UnsignedBigInteger const& left, UnsignedBigInteger const& right, UnsignedBigInteger& output);
static void bitwise_and_without_allocation(UnsignedBigInteger const& left, UnsignedBigInteger const& right, UnsignedBigInteger& output);
static void bitwise_xor_without_allocation(UnsignedBigInteger const& left, UnsignedBigInteger const& right, UnsignedBigInteger& output);
static void shift_left_without_allocation(UnsignedBigInteger const& number, size_t bits_to_shift_by, UnsignedBigInteger& temp_result, UnsignedBigInteger& temp_plus, UnsignedBigInteger& output);
static void shift_left_without_allocation(UnsignedBigInteger const& number, size_t bits_to_shift_by, UnsignedBigInteger& output);
static void shift_right_without_allocation(UnsignedBigInteger const& number, size_t num_bits, UnsignedBigInteger& output);
static void multiply_without_allocation(UnsignedBigInteger const& left, UnsignedBigInteger const& right, UnsignedBigInteger& temp_shift_result, UnsignedBigInteger& temp_shift_plus, UnsignedBigInteger& temp_shift, UnsignedBigInteger& output);
static void multiply_without_allocation(UnsignedBigInteger const& left, UnsignedBigInteger const& right, UnsignedBigInteger& temp_shift, UnsignedBigInteger& output);
static void divide_without_allocation(UnsignedBigInteger const& numerator, UnsignedBigInteger const& denominator, UnsignedBigInteger& quotient, UnsignedBigInteger& remainder);
static void divide_u16_without_allocation(UnsignedBigInteger const& numerator, UnsignedBigInteger::Word denominator, UnsignedBigInteger& quotient, UnsignedBigInteger& remainder);
static ErrorOr<void> bitwise_not_fill_to_one_based_index_without_allocation(UnsignedBigInteger const& left, size_t, UnsignedBigInteger& output);
static ErrorOr<void> try_shift_left_without_allocation(UnsignedBigInteger const& number, size_t bits_to_shift_by, UnsignedBigInteger& temp_result, UnsignedBigInteger& temp_plus, UnsignedBigInteger& output);
static ErrorOr<void> try_shift_left_without_allocation(UnsignedBigInteger const& number, size_t bits_to_shift_by, UnsignedBigInteger& output);
static void extended_GCD_without_allocation(UnsignedBigInteger const& a, UnsignedBigInteger const& b, UnsignedBigInteger& x, UnsignedBigInteger& y, UnsignedBigInteger& gcd, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_1, UnsignedBigInteger& temp_2, UnsignedBigInteger& temp_shift, UnsignedBigInteger& temp_r, UnsignedBigInteger& temp_s, UnsignedBigInteger& temp_t);
static void extended_GCD_without_allocation(UnsignedBigInteger const& a, UnsignedBigInteger const& b, UnsignedBigInteger& x, UnsignedBigInteger& y, UnsignedBigInteger& gcd, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_1, UnsignedBigInteger& temp_2, UnsignedBigInteger& temp_shift_result, UnsignedBigInteger& temp_shift_plus, UnsignedBigInteger& temp_shift, UnsignedBigInteger& temp_r, UnsignedBigInteger& temp_s, UnsignedBigInteger& temp_t);
static void destructive_GCD_without_allocation(UnsignedBigInteger& temp_a, UnsignedBigInteger& temp_b, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_remainder, UnsignedBigInteger& output);
static void modular_inverse_without_allocation(UnsignedBigInteger const& a, UnsignedBigInteger const& b, UnsignedBigInteger& result, UnsignedBigInteger& temp_y, UnsignedBigInteger& temp_gcd, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_1, UnsignedBigInteger& temp_2, UnsignedBigInteger& temp_shift_result, UnsignedBigInteger& temp_shift_plus, UnsignedBigInteger& temp_shift, UnsignedBigInteger& temp_r, UnsignedBigInteger& temp_s, UnsignedBigInteger& temp_t);
static void destructive_modular_power_without_allocation(UnsignedBigInteger& ep, UnsignedBigInteger& base, UnsignedBigInteger const& m, UnsignedBigInteger& temp_1, UnsignedBigInteger& temp_2, UnsignedBigInteger& temp_3, UnsignedBigInteger& temp_multiply, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_remainder, UnsignedBigInteger& result);
static void modular_inverse_without_allocation(UnsignedBigInteger const& a, UnsignedBigInteger const& b, UnsignedBigInteger& result, UnsignedBigInteger& temp_y, UnsignedBigInteger& temp_gcd, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_1, UnsignedBigInteger& temp_2, UnsignedBigInteger& temp_shift, UnsignedBigInteger& temp_r, UnsignedBigInteger& temp_s, UnsignedBigInteger& temp_t);
static void destructive_modular_power_without_allocation(UnsignedBigInteger& ep, UnsignedBigInteger& base, UnsignedBigInteger const& m, UnsignedBigInteger& temp_1, UnsignedBigInteger& temp_multiply, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_remainder, UnsignedBigInteger& result);
static void montgomery_modular_power_with_minimal_allocations(UnsignedBigInteger const& base, UnsignedBigInteger const& exponent, UnsignedBigInteger const& modulo, UnsignedBigInteger& temp_z0, UnsignedBigInteger& temp_rr, UnsignedBigInteger& temp_one, UnsignedBigInteger& temp_z, UnsignedBigInteger& temp_zz, UnsignedBigInteger& temp_x, UnsignedBigInteger& temp_extra, UnsignedBigInteger& result);
private:

View file

@ -489,10 +489,8 @@ FLATTEN ErrorOr<UnsignedBigInteger> UnsignedBigInteger::try_bitwise_not_fill_to_
FLATTEN ErrorOr<UnsignedBigInteger> UnsignedBigInteger::try_shift_left(size_t num_bits) const
{
UnsignedBigInteger output;
UnsignedBigInteger temp_result;
UnsignedBigInteger temp_plus;
TRY(UnsignedBigIntegerAlgorithms::try_shift_left_without_allocation(*this, num_bits, temp_result, temp_plus, output));
TRY(UnsignedBigIntegerAlgorithms::try_shift_left_without_allocation(*this, num_bits, output));
return output;
}
@ -541,11 +539,9 @@ FLATTEN UnsignedBigInteger UnsignedBigInteger::as_n_bits(size_t n) const
FLATTEN UnsignedBigInteger UnsignedBigInteger::multiplied_by(UnsignedBigInteger const& other) const
{
UnsignedBigInteger result;
UnsignedBigInteger temp_shift_result;
UnsignedBigInteger temp_shift_plus;
UnsignedBigInteger temp_shift;
UnsignedBigIntegerAlgorithms::multiply_without_allocation(*this, other, temp_shift_result, temp_shift_plus, temp_shift, result);
UnsignedBigIntegerAlgorithms::multiply_without_allocation(*this, other, temp_shift, result);
return result;
}
@ -562,11 +558,6 @@ FLATTEN UnsignedDivisionResult UnsignedBigInteger::divided_by(UnsignedBigInteger
return UnsignedDivisionResult { quotient, remainder };
}
UnsignedBigInteger temp_shift_result;
UnsignedBigInteger temp_shift_plus;
UnsignedBigInteger temp_shift;
UnsignedBigInteger temp_minus;
UnsignedBigIntegerAlgorithms::divide_without_allocation(*this, divisor, quotient, remainder);
return UnsignedDivisionResult { quotient, remainder };

View file

@ -31,14 +31,12 @@ UnsignedBigInteger ModularInverse(UnsignedBigInteger const& a, UnsignedBigIntege
UnsignedBigInteger temp_quotient;
UnsignedBigInteger temp_1;
UnsignedBigInteger temp_2;
UnsignedBigInteger temp_shift_result;
UnsignedBigInteger temp_shift_plus;
UnsignedBigInteger temp_shift;
UnsignedBigInteger temp_r;
UnsignedBigInteger temp_s;
UnsignedBigInteger temp_t;
UnsignedBigIntegerAlgorithms::modular_inverse_without_allocation(a, b, result, temp_y, temp_gcd, temp_quotient, temp_1, temp_2, temp_shift_result, temp_shift_plus, temp_shift, temp_r, temp_s, temp_t);
UnsignedBigIntegerAlgorithms::modular_inverse_without_allocation(a, b, result, temp_y, temp_gcd, temp_quotient, temp_1, temp_2, temp_shift, temp_r, temp_s, temp_t);
return result;
}
@ -67,13 +65,11 @@ UnsignedBigInteger ModularPower(UnsignedBigInteger const& b, UnsignedBigInteger
UnsignedBigInteger result;
UnsignedBigInteger temp_1;
UnsignedBigInteger temp_2;
UnsignedBigInteger temp_3;
UnsignedBigInteger temp_multiply;
UnsignedBigInteger temp_quotient;
UnsignedBigInteger temp_remainder;
UnsignedBigIntegerAlgorithms::destructive_modular_power_without_allocation(ep, base, m, temp_1, temp_2, temp_3, temp_multiply, temp_quotient, temp_remainder, result);
UnsignedBigIntegerAlgorithms::destructive_modular_power_without_allocation(ep, base, m, temp_1, temp_multiply, temp_quotient, temp_remainder, result);
return result;
}
@ -111,7 +107,7 @@ UnsignedBigInteger LCM(UnsignedBigInteger const& a, UnsignedBigInteger const& b)
// output = (a / gcd_output) * b
UnsignedBigIntegerAlgorithms::divide_without_allocation(a, gcd_output, temp_quotient, temp_remainder);
UnsignedBigIntegerAlgorithms::multiply_without_allocation(temp_quotient, b, temp_1, temp_2, temp_3, output);
UnsignedBigIntegerAlgorithms::multiply_without_allocation(temp_quotient, b, temp_1, output);
dbgln_if(NT_DEBUG, "quot: {} rem: {} out: {}", temp_quotient, temp_remainder, output);

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 2021, Peter Bocan <me@pbocan.net>
* Copyright (c) 2025, Manuel Zahariev <manuel@duck.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -571,6 +572,26 @@ TEST_CASE(test_signed_bigint_bitwise_xor)
EXPECT_EQ(num2.bitwise_xor(num2), "0"_sbigint);
}
TEST_CASE(test_bigint_shift_left)
{
Crypto::UnsignedBigInteger const num(Vector<u32> { 0x22222222, 0xffffffff });
size_t const tests = 8;
AK::Tuple<size_t, Vector<u32>> results[] = {
{ 0, { 0x22222222, 0xffffffff } },
{ 8, { 0x22222200, 0xffffff22, 0x000000ff } },
{ 16, { 0x22220000, 0xffff2222, 0x0000ffff } },
{ 32, { 0x00000000, 0x22222222, 0xffffffff } },
{ 36, { 0x00000000, 0x22222220, 0xfffffff2, 0x0000000f } },
{ 40, { 0x00000000, 0x22222200, 0xffffff22, 0x000000ff } },
{ 64, { 0x00000000, 0x00000000, 0x22222222, 0xffffffff } },
{ 68, { 0x00000000, 0x00000000, 0x22222220, 0xfffffff2, 0x0000000f } },
};
for (size_t i = 0; i < tests; ++i)
EXPECT_EQ(num.shift_left(results[i].get<0>()).words(), results[i].get<1>());
}
TEST_CASE(test_bigint_shift_right)
{
Crypto::UnsignedBigInteger const num1(Vector<u32> { 0x100, 0x20, 0x4, 0x2, 0x1 });