LibJS: Cache UTF-16 strings on the VM

We were already caching UTF-8 and byte strings, so let's add a cache
for UTF-16 strings as well. This is particularly profitable whenever we
run regular expressions, since the output of regex execution is a set of
UTF-16 strings.

Note that this is a weak cache like the other JS string caches, meaning
that strings are removed from the cache as they are garbage collected.

This avoids billions of PrimitiveString allocations across a run of WPT,
significantly reducing GC activity.
This commit is contained in:
Andreas Kling 2024-10-24 23:02:10 +02:00 committed by Tim Flynn
parent e89d889219
commit 206479b2b5
Notes: github-actions[bot] 2024-10-25 00:42:38 +00:00
4 changed files with 53 additions and 1 deletions

View file

@ -47,6 +47,8 @@ PrimitiveString::~PrimitiveString()
{
if (has_utf8_string())
vm().string_cache().remove(*m_utf8_string);
if (has_utf16_string())
vm().utf16_string_cache().remove(*m_utf16_string);
if (has_byte_string())
vm().byte_string_cache().remove(*m_byte_string);
}
@ -167,7 +169,13 @@ NonnullGCPtr<PrimitiveString> PrimitiveString::create(VM& vm, Utf16String string
return vm.single_ascii_character_string(static_cast<u8>(code_unit));
}
return vm.heap().allocate_without_realm<PrimitiveString>(move(string));
auto& string_cache = vm.utf16_string_cache();
if (auto it = string_cache.find(string); it != string_cache.end())
return *it->value;
auto new_string = vm.heap().allocate_without_realm<PrimitiveString>(string);
string_cache.set(move(string), new_string);
return *new_string;
}
NonnullGCPtr<PrimitiveString> PrimitiveString::create(VM& vm, String string)

View file

@ -55,6 +55,13 @@ Utf16View Utf16StringImpl::view() const
return Utf16View { m_string };
}
u32 Utf16StringImpl::compute_hash() const
{
if (m_string.is_empty())
return 0;
return string_hash((char const*)m_string.data(), m_string.size() * sizeof(u16));
}
}
Utf16String Utf16String::create()

View file

@ -29,10 +29,24 @@ public:
Utf16Data const& string() const;
Utf16View view() const;
[[nodiscard]] u32 hash() const
{
if (!m_has_hash) {
m_hash = compute_hash();
m_has_hash = true;
}
return m_hash;
}
[[nodiscard]] bool operator==(Utf16StringImpl const& other) const { return string() == other.string(); }
private:
Utf16StringImpl() = default;
explicit Utf16StringImpl(Utf16Data string);
[[nodiscard]] u32 compute_hash() const;
mutable bool m_has_hash { false };
mutable u32 m_hash { 0 };
Utf16Data m_string;
};
@ -57,6 +71,14 @@ public:
size_t length_in_code_units() const;
bool is_empty() const;
[[nodiscard]] u32 hash() const { return m_string->hash(); }
[[nodiscard]] bool operator==(Utf16String const& other) const
{
if (m_string == other.m_string)
return true;
return *m_string == *other.m_string;
}
private:
explicit Utf16String(NonnullRefPtr<Detail::Utf16StringImpl>);
@ -64,3 +86,12 @@ private:
};
}
namespace AK {
template<>
struct Traits<JS::Utf16String> : public DefaultTraits<JS::Utf16String> {
static unsigned hash(JS::Utf16String const& s) { return s.hash(); }
};
}

View file

@ -75,6 +75,11 @@ public:
return m_byte_string_cache;
}
HashMap<Utf16String, GCPtr<PrimitiveString>>& utf16_string_cache()
{
return m_utf16_string_cache;
}
PrimitiveString& empty_string() { return *m_empty_string; }
PrimitiveString& single_ascii_character_string(u8 character)
@ -298,6 +303,7 @@ private:
HashMap<String, GCPtr<PrimitiveString>> m_string_cache;
HashMap<ByteString, GCPtr<PrimitiveString>> m_byte_string_cache;
HashMap<Utf16String, GCPtr<PrimitiveString>> m_utf16_string_cache;
Heap m_heap;