AK+Everywhere: Change StringView case conversions to return String

There's a bit of a UTF-8 assumption with this change. But nearly every
caller of these methods were immediately creating a String from the
resulting ByteString anyways.
This commit is contained in:
Timothy Flynn 2025-04-06 10:19:35 -04:00 committed by Andreas Kling
parent 05627b6f45
commit 0a256b0a9a
Notes: github-actions[bot] 2025-04-07 15:45:50 +00:00
15 changed files with 57 additions and 56 deletions

View file

@ -73,6 +73,12 @@ public:
[[nodiscard]] ALWAYS_INLINE FlatPtr raw(Badge<FlyString>) const { return bit_cast<FlatPtr>(m_impl); }
[[nodiscard]] ALWAYS_INLINE FlatPtr raw(Badge<String>) const { return bit_cast<FlatPtr>(m_impl); }
template<typename Func>
ALWAYS_INLINE ErrorOr<void> replace_with_new_string(Badge<StringView>, size_t byte_count, Func&& callback)
{
return replace_with_new_string(byte_count, forward<Func>(callback));
}
protected:
template<typename Func>
ErrorOr<void> replace_with_new_string(size_t byte_count, Func&& callback)

View file

@ -75,28 +75,6 @@ NonnullRefPtr<StringImpl const> StringImpl::create(ReadonlyBytes bytes, ShouldCh
return StringImpl::create(reinterpret_cast<char const*>(bytes.data()), bytes.size(), shouldChomp);
}
NonnullRefPtr<StringImpl const> StringImpl::create_lowercased(char const* cstring, size_t length)
{
if (!length)
return the_empty_stringimpl();
char* buffer;
auto impl = create_uninitialized(length, buffer);
for (size_t i = 0; i < length; ++i)
buffer[i] = (char)to_ascii_lowercase(cstring[i]);
return impl;
}
NonnullRefPtr<StringImpl const> StringImpl::create_uppercased(char const* cstring, size_t length)
{
if (!length)
return the_empty_stringimpl();
char* buffer;
auto impl = create_uninitialized(length, buffer);
for (size_t i = 0; i < length; ++i)
buffer[i] = (char)to_ascii_uppercase(cstring[i]);
return impl;
}
unsigned StringImpl::case_insensitive_hash() const
{
return case_insensitive_string_hash(characters(), length());

View file

@ -28,8 +28,6 @@ public:
static NonnullRefPtr<StringImpl const> create(char const* cstring, ShouldChomp = NoChomp);
static NonnullRefPtr<StringImpl const> create(char const* cstring, size_t length, ShouldChomp = NoChomp);
static NonnullRefPtr<StringImpl const> create(ReadonlyBytes, ShouldChomp = NoChomp);
static NonnullRefPtr<StringImpl const> create_lowercased(char const* cstring, size_t length);
static NonnullRefPtr<StringImpl const> create_uppercased(char const* cstring, size_t length);
void operator delete(void* ptr)
{

View file

@ -488,7 +488,7 @@ ByteString to_snakecase(StringView str)
return builder.to_byte_string();
}
ByteString to_titlecase(StringView str)
String to_titlecase(StringView str)
{
StringBuilder builder;
bool next_is_upper = true;
@ -501,7 +501,7 @@ ByteString to_titlecase(StringView str)
next_is_upper = ch == ' ';
}
return builder.to_byte_string();
return MUST(builder.to_string());
}
// Finishes the replacing algorithm once it is known that ita least one

View file

@ -105,7 +105,7 @@ enum class SearchDirection {
Optional<size_t> find_any_of(StringView haystack, StringView needles, SearchDirection);
ByteString to_snakecase(StringView);
ByteString to_titlecase(StringView);
String to_titlecase(StringView);
ByteString replace(StringView, StringView needle, StringView replacement, ReplaceMode);
ErrorOr<String> replace(String const&, StringView needle, StringView replacement, ReplaceMode);

View file

@ -7,12 +7,14 @@
#include <AK/AnyOf.h>
#include <AK/ByteBuffer.h>
#include <AK/ByteString.h>
#include <AK/Enumerate.h>
#include <AK/Find.h>
#include <AK/FlyString.h>
#include <AK/Function.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/StringView.h>
#include <AK/Utf8View.h>
#include <AK/Vector.h>
#include <simdutf.h>
@ -204,17 +206,37 @@ bool StringView::is_ascii() const
return simdutf::validate_ascii(characters_without_null_termination(), length());
}
ByteString StringView::to_lowercase_string() const
String StringView::to_ascii_lowercase_string() const
{
return StringImpl::create_lowercased(characters_without_null_termination(), length()).release_nonnull();
VERIFY(Utf8View { *this }.validate());
String result;
MUST(result.replace_with_new_string({}, length(), [&](Bytes buffer) -> ErrorOr<void> {
for (auto [i, character] : enumerate(bytes()))
buffer[i] = static_cast<u8>(AK::to_ascii_lowercase(character));
return {};
}));
return result;
}
ByteString StringView::to_uppercase_string() const
String StringView::to_ascii_uppercase_string() const
{
return StringImpl::create_uppercased(characters_without_null_termination(), length()).release_nonnull();
VERIFY(Utf8View { *this }.validate());
String result;
MUST(result.replace_with_new_string({}, length(), [&](Bytes buffer) -> ErrorOr<void> {
for (auto [i, character] : enumerate(bytes()))
buffer[i] = static_cast<u8>(AK::to_ascii_uppercase(character));
return {};
}));
return result;
}
ByteString StringView::to_titlecase_string() const
String StringView::to_ascii_titlecase_string() const
{
return StringUtils::to_titlecase(*this);
}

View file

@ -105,9 +105,9 @@ public:
[[nodiscard]] StringView trim(StringView characters, TrimMode mode = TrimMode::Both) const { return StringUtils::trim(*this, characters, mode); }
[[nodiscard]] StringView trim_whitespace(TrimMode mode = TrimMode::Both) const { return StringUtils::trim_whitespace(*this, mode); }
[[nodiscard]] ByteString to_lowercase_string() const;
[[nodiscard]] ByteString to_uppercase_string() const;
[[nodiscard]] ByteString to_titlecase_string() const;
[[nodiscard]] String to_ascii_lowercase_string() const;
[[nodiscard]] String to_ascii_uppercase_string() const;
[[nodiscard]] String to_ascii_titlecase_string() const;
[[nodiscard]] Optional<size_t> find(char needle, size_t start = 0) const
{

View file

@ -89,7 +89,7 @@ static ByteString convert_enumeration_value_to_cpp_enum_member(ByteString const&
auto word = lexer.consume_while([](auto c) { return is_ascii_alphanumeric(c); });
if (!word.is_empty()) {
builder.append(word.to_titlecase_string());
builder.append(word.to_ascii_titlecase_string());
} else {
auto non_alnum_string = lexer.consume_while([](auto c) { return !is_ascii_alphanumeric(c); });
if (!non_alnum_string.is_empty())

View file

@ -103,7 +103,7 @@ ThrowCompletionOr<Value> canonical_code_for_display_names(VM& vm, DisplayNames::
return vm.throw_completion<RangeError>(ErrorType::OptionIsNotValidValue, code, "region"sv);
// b. Return the ASCII-uppercase of code.
return PrimitiveString::create(vm, code.to_uppercase_string());
return PrimitiveString::create(vm, code.to_ascii_uppercase_string());
}
// 3. If type is "script", then
@ -119,7 +119,7 @@ ThrowCompletionOr<Value> canonical_code_for_display_names(VM& vm, DisplayNames::
// c. Let first be the ASCII-uppercase of the substring of code from 0 to 1.
// d. Let rest be the ASCII-lowercase of the substring of code from 1.
// e. Return the string-concatenation of first and rest.
return PrimitiveString::create(vm, code.to_titlecase_string());
return PrimitiveString::create(vm, code.to_ascii_titlecase_string());
}
// 4. If type is "calendar", then
@ -133,7 +133,7 @@ ThrowCompletionOr<Value> canonical_code_for_display_names(VM& vm, DisplayNames::
return vm.throw_completion<RangeError>(ErrorType::OptionIsNotValidValue, code, "calendar"sv);
// c. Return the ASCII-lowercase of code.
return PrimitiveString::create(vm, code.to_lowercase_string());
return PrimitiveString::create(vm, code.to_ascii_lowercase_string());
}
// 5. If type is "dateTimeField", then
@ -154,7 +154,7 @@ ThrowCompletionOr<Value> canonical_code_for_display_names(VM& vm, DisplayNames::
return vm.throw_completion<RangeError>(ErrorType::OptionIsNotValidValue, code, "currency"sv);
// 8. Return the ASCII-uppercase of code.
return PrimitiveString::create(vm, code.to_uppercase_string());
return PrimitiveString::create(vm, code.to_ascii_uppercase_string());
}
// 12.5.2 IsValidDateTimeFieldCode ( field ), https://tc39.es/ecma402/#sec-isvaliddatetimefieldcode

View file

@ -527,10 +527,8 @@ static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
}
}
if (!slow_path) {
auto lowercase_domain = domain.to_lowercase_string();
return String::from_utf8_without_validation(lowercase_domain.bytes());
}
if (!slow_path)
return domain.to_ascii_lowercase_string();
}
Unicode::IDNA::ToAsciiOptions const options {

View file

@ -227,7 +227,7 @@ WebIDL::ExceptionOr<bool> DOMTokenList::supports(StringView token)
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Attribute {} does not define any supported tokens", m_associated_attribute)) };
// 2. Let lowercase token be a copy of token, in ASCII lowercase.
auto lowercase_token = token.to_lowercase_string();
auto lowercase_token = token.to_ascii_lowercase_string();
// 3. If lowercase token is present in supported tokens, return true.
if (supported_tokens->contains_slow(lowercase_token))

View file

@ -2072,17 +2072,16 @@ HTML::EnvironmentSettingsObject& Document::relevant_settings_object() const
}
// https://dom.spec.whatwg.org/#dom-document-createelement
WebIDL::ExceptionOr<GC::Ref<Element>> Document::create_element(String const& a_local_name, Variant<String, ElementCreationOptions> const& options)
WebIDL::ExceptionOr<GC::Ref<Element>> Document::create_element(String const& local_name, Variant<String, ElementCreationOptions> const& options)
{
auto local_name = a_local_name.to_byte_string();
// 1. If localName does not match the Name production, then throw an "InvalidCharacterError" DOMException.
if (!is_valid_name(a_local_name))
if (!is_valid_name(local_name))
return WebIDL::InvalidCharacterError::create(realm(), "Invalid character in tag name."_string);
// 2. If this is an HTML document, then set localName to localName in ASCII lowercase.
if (document_type() == Type::HTML)
local_name = local_name.to_lowercase();
auto local_name_lower = document_type() == Type::HTML
? local_name.to_ascii_lowercase()
: local_name;
// 3. Let is be null.
Optional<String> is_value;
@ -2100,7 +2099,7 @@ WebIDL::ExceptionOr<GC::Ref<Element>> Document::create_element(String const& a_l
namespace_ = Namespace::HTML;
// 6. Return the result of creating an element given this, localName, namespace, null, is, and with the synchronous custom elements flag set.
return TRY(DOM::create_element(*this, FlyString::from_utf8_without_validation(local_name.bytes()), move(namespace_), {}, move(is_value), true));
return TRY(DOM::create_element(*this, FlyString::from_utf8_without_validation(local_name_lower.bytes()), move(namespace_), {}, move(is_value), true));
}
// https://dom.spec.whatwg.org/#dom-document-createelementns

View file

@ -32,7 +32,7 @@ WebIDL::ExceptionOr<GC::Ref<TextDecoder>> TextDecoder::construct_impl(JS::Realm&
// 3. Set thiss encoding to encoding.
// https://encoding.spec.whatwg.org/#dom-textdecoder-encoding
// The encoding getter steps are to return thiss encodings name, ASCII lowercased.
auto lowercase_encoding_name = MUST(String::from_byte_string(encoding.value().to_lowercase_string()));
auto lowercase_encoding_name = encoding.value().to_ascii_lowercase_string();
// 4. If options["fatal"] is true, then set thiss error mode to "fatal".
auto fatal = options.value_or({}).fatal;

View file

@ -68,7 +68,7 @@ TokenizedFeature::Map tokenize_open_features(StringView features)
lexer.ignore_while(is_feature_separator);
// 4. Collect a sequence of code points that are not feature separators from features given position. Set name to the collected characters, converted to ASCII lowercase.
name = MUST(String::from_byte_string(lexer.consume_until(is_feature_separator).to_lowercase_string()));
name = lexer.consume_until(is_feature_separator).to_ascii_lowercase_string();
// 5. Set name to the result of normalizing the feature name name.
name = normalize_feature_name(name);
@ -85,7 +85,7 @@ TokenizedFeature::Map tokenize_open_features(StringView features)
lexer.ignore_while([](auto character) { return Infra::is_ascii_whitespace(character) || character == '='; });
// 2. Collect a sequence of code points that are not feature separators code points from features given position. Set value to the collected code points, converted to ASCII lowercase.
value = MUST(String::from_byte_string(lexer.consume_until(is_feature_separator).to_lowercase_string()));
value = lexer.consume_until(is_feature_separator).to_ascii_lowercase_string();
// 8. If name is not the empty string, then set tokenizedFeatures[name] to value.
if (!name.is_empty())

View file

@ -288,7 +288,7 @@ static void generate_get_parameter(SourceGenerator& generator, int webgl_version
return JS::@type_name@::create(m_realm, @element_count@, array_buffer);
)~~~");
} else if (type_name == "WebGLProgram"sv || type_name == "WebGLBuffer"sv || type_name == "WebGLTexture"sv || type_name == "WebGLFramebuffer"sv || type_name == "WebGLRenderbuffer"sv) {
impl_generator.set("stored_name", name_and_type.name.to_lowercase_string());
impl_generator.set("stored_name", name_and_type.name.to_ascii_lowercase_string());
impl_generator.append(R"~~~(
if (!m_@stored_name@)
return JS::js_null();