From 206479b2b5fc2641a619eb0d05c1185d869ef844 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Thu, 24 Oct 2024 23:02:10 +0200 Subject: [PATCH] LibJS: Cache UTF-16 strings on the VM We were already caching UTF-8 and byte strings, so let's add a cache for UTF-16 strings as well. This is particularly profitable whenever we run regular expressions, since the output of regex execution is a set of UTF-16 strings. Note that this is a weak cache like the other JS string caches, meaning that strings are removed from the cache as they are garbage collected. This avoids billions of PrimitiveString allocations across a run of WPT, significantly reducing GC activity. --- .../LibJS/Runtime/PrimitiveString.cpp | 10 +++++- .../Libraries/LibJS/Runtime/Utf16String.cpp | 7 +++++ .../Libraries/LibJS/Runtime/Utf16String.h | 31 +++++++++++++++++++ Userland/Libraries/LibJS/Runtime/VM.h | 6 ++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp b/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp index 68e7fd6e6b2..8eeb3c602e2 100644 --- a/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp +++ b/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp @@ -47,6 +47,8 @@ PrimitiveString::~PrimitiveString() { if (has_utf8_string()) vm().string_cache().remove(*m_utf8_string); + if (has_utf16_string()) + vm().utf16_string_cache().remove(*m_utf16_string); if (has_byte_string()) vm().byte_string_cache().remove(*m_byte_string); } @@ -167,7 +169,13 @@ NonnullGCPtr PrimitiveString::create(VM& vm, Utf16String string return vm.single_ascii_character_string(static_cast(code_unit)); } - return vm.heap().allocate_without_realm(move(string)); + auto& string_cache = vm.utf16_string_cache(); + if (auto it = string_cache.find(string); it != string_cache.end()) + return *it->value; + + auto new_string = vm.heap().allocate_without_realm(string); + string_cache.set(move(string), new_string); + return *new_string; } NonnullGCPtr PrimitiveString::create(VM& vm, String string) diff --git a/Userland/Libraries/LibJS/Runtime/Utf16String.cpp b/Userland/Libraries/LibJS/Runtime/Utf16String.cpp index 295165e8f8c..2bd5a6c04ba 100644 --- a/Userland/Libraries/LibJS/Runtime/Utf16String.cpp +++ b/Userland/Libraries/LibJS/Runtime/Utf16String.cpp @@ -55,6 +55,13 @@ Utf16View Utf16StringImpl::view() const return Utf16View { m_string }; } +u32 Utf16StringImpl::compute_hash() const +{ + if (m_string.is_empty()) + return 0; + return string_hash((char const*)m_string.data(), m_string.size() * sizeof(u16)); +} + } Utf16String Utf16String::create() diff --git a/Userland/Libraries/LibJS/Runtime/Utf16String.h b/Userland/Libraries/LibJS/Runtime/Utf16String.h index 4e08afa4aa8..233a562ace6 100644 --- a/Userland/Libraries/LibJS/Runtime/Utf16String.h +++ b/Userland/Libraries/LibJS/Runtime/Utf16String.h @@ -29,10 +29,24 @@ public: Utf16Data const& string() const; Utf16View view() const; + [[nodiscard]] u32 hash() const + { + if (!m_has_hash) { + m_hash = compute_hash(); + m_has_hash = true; + } + return m_hash; + } + [[nodiscard]] bool operator==(Utf16StringImpl const& other) const { return string() == other.string(); } + private: Utf16StringImpl() = default; explicit Utf16StringImpl(Utf16Data string); + [[nodiscard]] u32 compute_hash() const; + + mutable bool m_has_hash { false }; + mutable u32 m_hash { 0 }; Utf16Data m_string; }; @@ -57,6 +71,14 @@ public: size_t length_in_code_units() const; bool is_empty() const; + [[nodiscard]] u32 hash() const { return m_string->hash(); } + [[nodiscard]] bool operator==(Utf16String const& other) const + { + if (m_string == other.m_string) + return true; + return *m_string == *other.m_string; + } + private: explicit Utf16String(NonnullRefPtr); @@ -64,3 +86,12 @@ private: }; } + +namespace AK { + +template<> +struct Traits : public DefaultTraits { + static unsigned hash(JS::Utf16String const& s) { return s.hash(); } +}; + +} diff --git a/Userland/Libraries/LibJS/Runtime/VM.h b/Userland/Libraries/LibJS/Runtime/VM.h index d39085111fc..ee6a8a4b73d 100644 --- a/Userland/Libraries/LibJS/Runtime/VM.h +++ b/Userland/Libraries/LibJS/Runtime/VM.h @@ -75,6 +75,11 @@ public: return m_byte_string_cache; } + HashMap>& utf16_string_cache() + { + return m_utf16_string_cache; + } + PrimitiveString& empty_string() { return *m_empty_string; } PrimitiveString& single_ascii_character_string(u8 character) @@ -298,6 +303,7 @@ private: HashMap> m_string_cache; HashMap> m_byte_string_cache; + HashMap> m_utf16_string_cache; Heap m_heap;