From 12f177e9e94ca71c619ec1171078e8635debde09 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 19 Jun 2024 09:02:21 -0400 Subject: [PATCH] LibWeb: Port text segmentation to the ICU text segmenter --- .../Libraries/LibWeb/DOM/CharacterData.cpp | 17 ++++++++++++++++ Userland/Libraries/LibWeb/DOM/CharacterData.h | 7 ++++++- Userland/Libraries/LibWeb/DOM/Position.cpp | 8 +++----- .../LibWeb/HTML/CanvasRenderingContext2D.cpp | 20 +++++++++---------- .../LibWeb/Page/EditEventHandler.cpp | 8 ++++---- 5 files changed, 40 insertions(+), 20 deletions(-) diff --git a/Userland/Libraries/LibWeb/DOM/CharacterData.cpp b/Userland/Libraries/LibWeb/DOM/CharacterData.cpp index cf6acd33d0b..16e4d66762d 100644 --- a/Userland/Libraries/LibWeb/DOM/CharacterData.cpp +++ b/Userland/Libraries/LibWeb/DOM/CharacterData.cpp @@ -4,6 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include @@ -22,6 +23,8 @@ CharacterData::CharacterData(Document& document, NodeType type, String const& da { } +CharacterData::~CharacterData() = default; + void CharacterData::initialize(JS::Realm& realm) { Base::initialize(realm); @@ -124,6 +127,10 @@ WebIDL::ExceptionOr CharacterData::replace_data(size_t offset, size_t coun static_cast(*layout_node).invalidate_text_for_rendering(); document().set_needs_layout(); + + if (m_segmenter) + m_segmenter->set_segmented_text(m_data); + return {}; } @@ -148,4 +155,14 @@ WebIDL::ExceptionOr CharacterData::delete_data(size_t offset, size_t count return replace_data(offset, count, String {}); } +Locale::Segmenter& CharacterData::segmenter() +{ + if (!m_segmenter) { + m_segmenter = Locale::Segmenter::create(Locale::SegmenterGranularity::Grapheme); + m_segmenter->set_segmented_text(m_data); + } + + return *m_segmenter; +} + } diff --git a/Userland/Libraries/LibWeb/DOM/CharacterData.h b/Userland/Libraries/LibWeb/DOM/CharacterData.h index bd90a0d369c..e0abe9deccf 100644 --- a/Userland/Libraries/LibWeb/DOM/CharacterData.h +++ b/Userland/Libraries/LibWeb/DOM/CharacterData.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include #include @@ -22,7 +23,7 @@ class CharacterData JS_DECLARE_ALLOCATOR(CharacterData); public: - virtual ~CharacterData() override = default; + virtual ~CharacterData() override; String const& data() const { return m_data; } void set_data(String const&); @@ -40,6 +41,8 @@ public: WebIDL::ExceptionOr delete_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units); WebIDL::ExceptionOr replace_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units, String const&); + Locale::Segmenter& segmenter(); + protected: CharacterData(Document&, NodeType, String const&); @@ -47,6 +50,8 @@ protected: private: String m_data; + + OwnPtr m_segmenter; }; } diff --git a/Userland/Libraries/LibWeb/DOM/Position.cpp b/Userland/Libraries/LibWeb/DOM/Position.cpp index d4d0a7ff0ec..58de160aabe 100644 --- a/Userland/Libraries/LibWeb/DOM/Position.cpp +++ b/Userland/Libraries/LibWeb/DOM/Position.cpp @@ -6,7 +6,7 @@ */ #include -#include +#include #include #include #include @@ -40,9 +40,8 @@ bool Position::increment_offset() return false; auto& node = verify_cast(*m_node); - auto text = Utf8View(node.data()); - if (auto offset = Unicode::next_grapheme_segmentation_boundary(text, m_offset); offset.has_value()) { + if (auto offset = node.segmenter().next_boundary(m_offset); offset.has_value()) { m_offset = *offset; return true; } @@ -57,9 +56,8 @@ bool Position::decrement_offset() return false; auto& node = verify_cast(*m_node); - auto text = Utf8View(node.data()); - if (auto offset = Unicode::previous_grapheme_segmentation_boundary(text, m_offset); offset.has_value()) { + if (auto offset = node.segmenter().previous_boundary(m_offset); offset.has_value()) { m_offset = *offset; return true; } diff --git a/Userland/Libraries/LibWeb/HTML/CanvasRenderingContext2D.cpp b/Userland/Libraries/LibWeb/HTML/CanvasRenderingContext2D.cpp index fd876d9b1a2..af00eba96d3 100644 --- a/Userland/Libraries/LibWeb/HTML/CanvasRenderingContext2D.cpp +++ b/Userland/Libraries/LibWeb/HTML/CanvasRenderingContext2D.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -468,7 +468,7 @@ CanvasRenderingContext2D::PreparedText CanvasRenderingContext2D::prepare_text(By for (auto c : text) { builder.append(Infra::is_ascii_whitespace(c) ? ' ' : c); } - auto replaced_text = builder.string_view(); + auto replaced_text = MUST(builder.to_string()); // 3. Let font be the current font of target, as given by that object's font attribute. auto font = current_font(); @@ -497,8 +497,6 @@ CanvasRenderingContext2D::PreparedText CanvasRenderingContext2D::prepare_text(By size_t width = font->width(text.view()); size_t height = font->pixel_size(); - Utf8View replaced_text_view { replaced_text }; - // 6. If maxWidth was provided and the hypothetical width of the inline box in the hypothetical line box is greater than maxWidth CSS pixels, then change font to have a more condensed font (if one is available or if a reasonably readable one can be synthesized by applying a horizontal scale factor to the font) or a smaller font, and return to the previous step. // FIXME: Record the font size used for this piece of text, and actually retry with a smaller size if needed. @@ -520,17 +518,19 @@ CanvasRenderingContext2D::PreparedText CanvasRenderingContext2D::prepare_text(By // 8. Let result be an array constructed by iterating over each glyph in the inline box from left to right (if any), adding to the array, for each glyph, the shape of the glyph as it is in the inline box, positioned on a coordinate space using CSS pixels with its origin is at the anchor point. PreparedText prepared_text { {}, physical_alignment, { 0, 0, static_cast(width), static_cast(height) } }; - prepared_text.glyphs.ensure_capacity(replaced_text.length()); + prepared_text.glyphs.ensure_capacity(replaced_text.bytes_as_string_view().length()); - size_t previous_grapheme_boundary = 0; - Unicode::for_each_grapheme_segmentation_boundary(replaced_text_view, [&](auto boundary) { + auto segmenter = Locale::Segmenter::create(Locale::SegmenterGranularity::Grapheme); + + size_t previous_boundary = 0; + segmenter->for_each_boundary(replaced_text, [&](auto boundary) { if (boundary == 0) return IterationDecision::Continue; - auto glyph_view = replaced_text_view.substring_view(previous_grapheme_boundary, boundary - previous_grapheme_boundary); - auto glyph = String::from_utf8(glyph_view.as_string()).release_value_but_fixme_should_propagate_errors(); - + auto glyph = MUST(replaced_text.substring_from_byte_offset(previous_boundary, boundary - previous_boundary)); prepared_text.glyphs.append({ move(glyph), { static_cast(boundary), 0 } }); + + previous_boundary = boundary; return IterationDecision::Continue; }); diff --git a/Userland/Libraries/LibWeb/Page/EditEventHandler.cpp b/Userland/Libraries/LibWeb/Page/EditEventHandler.cpp index e43974f69ba..e2ec6f9eb98 100644 --- a/Userland/Libraries/LibWeb/Page/EditEventHandler.cpp +++ b/Userland/Libraries/LibWeb/Page/EditEventHandler.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include @@ -22,15 +22,15 @@ void EditEventHandler::handle_delete_character_after(JS::NonnullGCPtr(*cursor_position->node()); auto& text = node.data(); - auto next_grapheme_offset = Unicode::next_grapheme_segmentation_boundary(Utf8View { text }, cursor_position->offset()); - if (!next_grapheme_offset.has_value()) { + auto next_offset = node.segmenter().next_boundary(cursor_position->offset()); + if (!next_offset.has_value()) { // FIXME: Move to the next node and delete the first character there. return; } StringBuilder builder; builder.append(text.bytes_as_string_view().substring_view(0, cursor_position->offset())); - builder.append(text.bytes_as_string_view().substring_view(*next_grapheme_offset)); + builder.append(text.bytes_as_string_view().substring_view(*next_offset)); node.set_data(MUST(builder.to_string())); m_navigable->did_edit({});