diff --git a/Libraries/LibWeb/Infra/Strings.cpp b/Libraries/LibWeb/Infra/Strings.cpp index 5ae62a53c00..dee54bd23dc 100644 --- a/Libraries/LibWeb/Infra/Strings.cpp +++ b/Libraries/LibWeb/Infra/Strings.cpp @@ -69,10 +69,12 @@ ErrorOr strip_and_collapse_whitespace(StringView string) } // https://infra.spec.whatwg.org/#code-unit-prefix -bool is_code_unit_prefix(StringView potential_prefix, StringView input) +bool is_code_unit_prefix(StringView potential_prefix_utf8, StringView input_utf8) { - auto potential_prefix_utf16 = MUST(utf8_to_utf16(potential_prefix)); - auto input_utf16 = MUST(utf8_to_utf16(input)); + auto potential_prefix_utf16_bytes = MUST(utf8_to_utf16(potential_prefix_utf8)); + auto input_utf16_bytes = MUST(utf8_to_utf16(input_utf8)); + Utf16View potential_prefix { potential_prefix_utf16_bytes }; + Utf16View input { input_utf16_bytes }; // 1. Let i be 0. size_t i = 0; @@ -80,18 +82,18 @@ bool is_code_unit_prefix(StringView potential_prefix, StringView input) // 2. While true: while (true) { // 1. If i is greater than or equal to potentialPrefix’s length, then return true. - if (i >= potential_prefix.length()) + if (i >= potential_prefix.length_in_code_units()) return true; // 2. If i is greater than or equal to input’s length, then return false. - if (i >= input.length()) + if (i >= input.length_in_code_units()) return false; // 3. Let potentialPrefixCodeUnit be the ith code unit of potentialPrefix. - auto potential_prefix_code_unit = Utf16View(potential_prefix_utf16).code_unit_at(i); + auto potential_prefix_code_unit = potential_prefix.code_unit_at(i); // 4. Let inputCodeUnit be the ith code unit of input. - auto input_code_unit = Utf16View(input_utf16).code_unit_at(i); + auto input_code_unit = input.code_unit_at(i); // 5. Return false if potentialPrefixCodeUnit is not inputCodeUnit. if (potential_prefix_code_unit != input_code_unit) diff --git a/Tests/LibWeb/CMakeLists.txt b/Tests/LibWeb/CMakeLists.txt index fa0c06e3782..92c7f3c5bcf 100644 --- a/Tests/LibWeb/CMakeLists.txt +++ b/Tests/LibWeb/CMakeLists.txt @@ -9,6 +9,7 @@ set(TEST_SOURCES TestMicrosyntax.cpp TestMimeSniff.cpp TestNumbers.cpp + TestStrings.cpp ) foreach(source IN LISTS TEST_SOURCES) diff --git a/Tests/LibWeb/TestStrings.cpp b/Tests/LibWeb/TestStrings.cpp new file mode 100644 index 00000000000..4a36a697e6b --- /dev/null +++ b/Tests/LibWeb/TestStrings.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2025, Shannon Booth + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +TEST_CASE(is_code_unit_prefix) +{ + + // Basic prefix match + EXPECT(Web::Infra::is_code_unit_prefix("abc"sv, "abcde"sv)); + + // Exact match + EXPECT(Web::Infra::is_code_unit_prefix("abc"sv, "abc"sv)); + + // Empty prefix + EXPECT(Web::Infra::is_code_unit_prefix(""sv, "abc"sv)); + + // Empty input string + EXPECT(!Web::Infra::is_code_unit_prefix("abc"sv, ""sv)); + + // Both strings empty + EXPECT(Web::Infra::is_code_unit_prefix(""sv, ""sv)); + + // Prefix longer than input string + EXPECT(!Web::Infra::is_code_unit_prefix("abcdef"sv, "abc"sv)); + + // Non-ASCII characters + EXPECT(Web::Infra::is_code_unit_prefix("こんにちは"sv, "こんにちは世界"sv)); + EXPECT(!Web::Infra::is_code_unit_prefix("世界"sv, "こんにちは世界"sv)); + + EXPECT(Web::Infra::is_code_unit_prefix("こ"sv, "こん"sv)); + EXPECT(!Web::Infra::is_code_unit_prefix("こん"sv, "こ"sv)); + + // Special characters + EXPECT(Web::Infra::is_code_unit_prefix("!@#"sv, "!@#$%^"sv)); + EXPECT(!Web::Infra::is_code_unit_prefix("!@#$"sv, "!@#"sv)); + + // Case sensitivity + EXPECT(!Web::Infra::is_code_unit_prefix("abc"sv, "ABC"sv)); + EXPECT(!Web::Infra::is_code_unit_prefix("ABC"sv, "abc"sv)); +}