mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-21 00:38:56 +00:00
LibUnicode: Generate and use code point composition mappings
These allow us to binary search the code point compositions based on the first code point being combined, which makes the search close to O(log N) instead of O(N).
This commit is contained in:
parent
e227bf0f71
commit
945c58c7c1
Notes:
sideshowbarker
2024-07-17 02:39:10 +09:00
Author: https://github.com/IdanHo
Commit: 945c58c7c1
Pull-request: https://github.com/SerenityOS/serenity/pull/23868
Issue: https://github.com/SerenityOS/serenity/issues/23863
Reviewed-by: https://github.com/trflynn89 ✅
3 changed files with 80 additions and 20 deletions
|
@ -20,7 +20,7 @@ struct Unicode::CodePointDecomposition { };
|
|||
namespace Unicode {
|
||||
|
||||
Optional<CodePointDecomposition const> __attribute__((weak)) code_point_decomposition(u32) { return {}; }
|
||||
Optional<CodePointDecomposition const> __attribute__((weak)) code_point_decomposition_by_index(size_t) { return {}; }
|
||||
Optional<u32> __attribute__((weak)) code_point_composition(u32, u32) { return {}; }
|
||||
|
||||
NormalizationForm normalization_form_from_string(StringView form)
|
||||
{
|
||||
|
@ -126,20 +126,9 @@ static u32 combine_hangul_code_points(u32 a, u32 b)
|
|||
static u32 combine_code_points([[maybe_unused]] u32 a, [[maybe_unused]] u32 b)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
Array<u32, 2> const points { a, b };
|
||||
|
||||
// FIXME: Do something better than linear search to find reverse mappings.
|
||||
for (size_t index = 0;; ++index) {
|
||||
auto mapping_maybe = Unicode::code_point_decomposition_by_index(index);
|
||||
if (!mapping_maybe.has_value())
|
||||
break;
|
||||
auto& mapping = mapping_maybe.value();
|
||||
if (mapping.tag == CompatibilityFormattingTag::Canonical && mapping.decomposition == points) {
|
||||
if (code_point_has_property(mapping.code_point, Property::Full_Composition_Exclusion))
|
||||
continue;
|
||||
return mapping.code_point;
|
||||
}
|
||||
}
|
||||
auto composition = code_point_composition(a, b);
|
||||
if (composition.has_value())
|
||||
return composition.value();
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
namespace Unicode {
|
||||
|
||||
Optional<CodePointDecomposition const> code_point_decomposition(u32 code_point);
|
||||
Optional<CodePointDecomposition const> code_point_decomposition_by_index(size_t index);
|
||||
Optional<u32> code_point_composition(u32 first_code_point, u32 second_code_point);
|
||||
|
||||
enum class NormalizationForm {
|
||||
NFD,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue