LibWeb: Only use code unit length for is_code_unit_prefix

Fixes a crash in the included test.
This commit is contained in:
Shannon Booth 2025-05-12 20:12:30 +12:00 committed by Jelle Raaijmakers
commit 74334ea1ce
Notes: github-actions[bot] 2025-05-12 09:21:40 +00:00
3 changed files with 55 additions and 7 deletions

View file

@ -69,10 +69,12 @@ ErrorOr<String> strip_and_collapse_whitespace(StringView string)
} }
// https://infra.spec.whatwg.org/#code-unit-prefix // https://infra.spec.whatwg.org/#code-unit-prefix
bool is_code_unit_prefix(StringView potential_prefix, StringView input) bool is_code_unit_prefix(StringView potential_prefix_utf8, StringView input_utf8)
{ {
auto potential_prefix_utf16 = MUST(utf8_to_utf16(potential_prefix)); auto potential_prefix_utf16_bytes = MUST(utf8_to_utf16(potential_prefix_utf8));
auto input_utf16 = MUST(utf8_to_utf16(input)); auto input_utf16_bytes = MUST(utf8_to_utf16(input_utf8));
Utf16View potential_prefix { potential_prefix_utf16_bytes };
Utf16View input { input_utf16_bytes };
// 1. Let i be 0. // 1. Let i be 0.
size_t i = 0; size_t i = 0;
@ -80,18 +82,18 @@ bool is_code_unit_prefix(StringView potential_prefix, StringView input)
// 2. While true: // 2. While true:
while (true) { while (true) {
// 1. If i is greater than or equal to potentialPrefixs length, then return true. // 1. If i is greater than or equal to potentialPrefixs length, then return true.
if (i >= potential_prefix.length()) if (i >= potential_prefix.length_in_code_units())
return true; return true;
// 2. If i is greater than or equal to inputs length, then return false. // 2. If i is greater than or equal to inputs length, then return false.
if (i >= input.length()) if (i >= input.length_in_code_units())
return false; return false;
// 3. Let potentialPrefixCodeUnit be the ith code unit of potentialPrefix. // 3. Let potentialPrefixCodeUnit be the ith code unit of potentialPrefix.
auto potential_prefix_code_unit = Utf16View(potential_prefix_utf16).code_unit_at(i); auto potential_prefix_code_unit = potential_prefix.code_unit_at(i);
// 4. Let inputCodeUnit be the ith code unit of input. // 4. Let inputCodeUnit be the ith code unit of input.
auto input_code_unit = Utf16View(input_utf16).code_unit_at(i); auto input_code_unit = input.code_unit_at(i);
// 5. Return false if potentialPrefixCodeUnit is not inputCodeUnit. // 5. Return false if potentialPrefixCodeUnit is not inputCodeUnit.
if (potential_prefix_code_unit != input_code_unit) if (potential_prefix_code_unit != input_code_unit)

View file

@ -9,6 +9,7 @@ set(TEST_SOURCES
TestMicrosyntax.cpp TestMicrosyntax.cpp
TestMimeSniff.cpp TestMimeSniff.cpp
TestNumbers.cpp TestNumbers.cpp
TestStrings.cpp
) )
foreach(source IN LISTS TEST_SOURCES) foreach(source IN LISTS TEST_SOURCES)

View file

@ -0,0 +1,45 @@
/*
* Copyright (c) 2025, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <LibWeb/Infra/Strings.h>
TEST_CASE(is_code_unit_prefix)
{
// Basic prefix match
EXPECT(Web::Infra::is_code_unit_prefix("abc"sv, "abcde"sv));
// Exact match
EXPECT(Web::Infra::is_code_unit_prefix("abc"sv, "abc"sv));
// Empty prefix
EXPECT(Web::Infra::is_code_unit_prefix(""sv, "abc"sv));
// Empty input string
EXPECT(!Web::Infra::is_code_unit_prefix("abc"sv, ""sv));
// Both strings empty
EXPECT(Web::Infra::is_code_unit_prefix(""sv, ""sv));
// Prefix longer than input string
EXPECT(!Web::Infra::is_code_unit_prefix("abcdef"sv, "abc"sv));
// Non-ASCII characters
EXPECT(Web::Infra::is_code_unit_prefix("こんにちは"sv, "こんにちは世界"sv));
EXPECT(!Web::Infra::is_code_unit_prefix("世界"sv, "こんにちは世界"sv));
EXPECT(Web::Infra::is_code_unit_prefix(""sv, "こん"sv));
EXPECT(!Web::Infra::is_code_unit_prefix("こん"sv, ""sv));
// Special characters
EXPECT(Web::Infra::is_code_unit_prefix("!@#"sv, "!@#$%^"sv));
EXPECT(!Web::Infra::is_code_unit_prefix("!@#$"sv, "!@#"sv));
// Case sensitivity
EXPECT(!Web::Infra::is_code_unit_prefix("abc"sv, "ABC"sv));
EXPECT(!Web::Infra::is_code_unit_prefix("ABC"sv, "abc"sv));
}