AK: Add UnicodeUtils with Unicode-related helper functions

This introduces the UnicodeUtils file, which contains helper functions
related to Unicode. This is in contrast to StringUtils, whose functions
are not directly related to Unicode and are, in theory,
encoding-agnostic.
This commit is contained in:
Max Wipfli 2021-05-20 12:56:38 +02:00 committed by Andreas Kling
commit 3c2565da94
Notes: sideshowbarker 2024-07-18 17:40:53 +09:00
2 changed files with 57 additions and 0 deletions

37
AK/UnicodeUtils.cpp Normal file
View file

@ -0,0 +1,37 @@
/*
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Array.h>
#include <AK/Optional.h>
#include <AK/StringView.h>
#include <AK/UnicodeUtils.h>
namespace AK::UnicodeUtils {
Optional<StringView> get_unicode_control_code_point_alias(u32 code_point)
{
static constexpr Array<StringView, 32> ascii_controls_lookup_table = {
"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US"
};
static constexpr Array<StringView, 32> c1_controls_lookup_table = {
"XXX", "XXX", "BPH", "NBH", "IND", "NEL", "SSA", "ESA",
"HTS", "HTJ", "VTS", "PLD", "PLU", "RI", "SS2", "SS3",
"DCS", "PU1", "PU2", "STS", "CCH", "MW", "SPA", "EPA",
"SOS", "XXX", "SCI", "CSI", "ST", "OSC", "PM", "APC"
};
if (code_point < 0x20)
return ascii_controls_lookup_table[code_point];
if (code_point >= 0x80 && code_point < 0xa0)
return c1_controls_lookup_table[code_point - 0x80];
return {};
}
}