LibJS: Port Intl.Segmenter to the ICU text segmenter

This also lets us fully implement detecting if a segment is word-like,
although that is not tested by test262.
This commit is contained in:
Timothy Flynn 2024-06-18 18:51:06 -04:00 committed by Andreas Kling
commit 14071c52f9
Notes: sideshowbarker 2024-07-17 03:45:48 +09:00
12 changed files with 95 additions and 162 deletions

View file

@ -9,6 +9,7 @@
#include <AK/String.h>
#include <LibJS/Runtime/Object.h>
#include <LibLocale/Segmenter.h>
namespace JS::Intl {
@ -17,34 +18,34 @@ class Segmenter final : public Object {
JS_DECLARE_ALLOCATOR(Segmenter);
public:
enum class SegmenterGranularity {
Grapheme,
Word,
Sentence,
};
virtual ~Segmenter() override = default;
String const& locale() const { return m_locale; }
void set_locale(String locale) { m_locale = move(locale); }
SegmenterGranularity segmenter_granularity() const { return m_segmenter_granularity; }
void set_segmenter_granularity(StringView);
StringView segmenter_granularity_string() const;
::Locale::SegmenterGranularity segmenter_granularity() const { return m_segmenter_granularity; }
void set_segmenter_granularity(StringView segmenter_granularity) { m_segmenter_granularity = ::Locale::segmenter_granularity_from_string(segmenter_granularity); }
StringView segmenter_granularity_string() const { return ::Locale::segmenter_granularity_to_string(m_segmenter_granularity); }
::Locale::Segmenter const& segmenter() const { return *m_segmenter; }
void set_segmenter(NonnullOwnPtr<::Locale::Segmenter> segmenter) { m_segmenter = move(segmenter); }
private:
explicit Segmenter(Object& prototype);
String m_locale; // [[Locale]]
SegmenterGranularity m_segmenter_granularity { SegmenterGranularity::Grapheme }; // [[SegmenterGranularity]]
String m_locale; // [[Locale]]
::Locale::SegmenterGranularity m_segmenter_granularity { ::Locale::SegmenterGranularity::Grapheme }; // [[SegmenterGranularity]]
// Non-standard. Stores the ICU segmenter for the Intl object's segmentation options.
OwnPtr<::Locale::Segmenter> m_segmenter;
};
ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM&, Segmenter const&, Utf16View const&, double start_index, double end_index);
ThrowCompletionOr<NonnullGCPtr<Object>> create_segment_data_object(VM&, ::Locale::Segmenter const&, Utf16View const&, size_t start_index, size_t end_index);
enum class Direction {
Before,
After,
};
double find_boundary(Segmenter const&, Utf16View const&, double start_index, Direction);
size_t find_boundary(::Locale::Segmenter&, Utf16View const&, size_t start_index, Direction);
}