LibTextCodec+LibURL: Implement utf-8 and euc-jp encoders

Implements the corresponding encoders, selects the appropriate one when
encoding URL search params. If an encoder for the given encoding could
not be found, fallback to utf-8.
This commit is contained in:
BenJilks 2024-08-05 16:03:53 +01:00 committed by Tim Ledbetter
commit 72d0e3284b
Notes: github-actions[bot] 2024-08-08 16:51:38 +00:00
11 changed files with 260 additions and 22 deletions

View file

@ -21,6 +21,7 @@ struct LookupTable {
u32 max_code_point;
Vector<u32> code_points;
bool generate_accessor;
bool generate_inverse_accessor;
};
struct LookupTables {
@ -33,7 +34,12 @@ enum class GenerateAccessor {
Yes,
};
LookupTable prepare_table(JsonArray const& data, GenerateAccessor generate_accessor = GenerateAccessor::No)
enum class GenerateInverseAccessor {
No,
Yes,
};
LookupTable prepare_table(JsonArray const& data, GenerateAccessor generate_accessor = GenerateAccessor::No, GenerateInverseAccessor generate_inverse_accessor = GenerateInverseAccessor::No)
{
Vector<u32> code_points;
code_points.ensure_capacity(data.size());
@ -58,7 +64,7 @@ LookupTable prepare_table(JsonArray const& data, GenerateAccessor generate_acces
} else {
VERIFY(first_pointer == 0);
}
return { first_pointer, max, move(code_points), generate_accessor == GenerateAccessor::Yes };
return { first_pointer, max, move(code_points), generate_accessor == GenerateAccessor::Yes, generate_inverse_accessor == GenerateInverseAccessor::Yes };
}
void generate_table(SourceGenerator generator, StringView name, LookupTable& table)
@ -81,6 +87,8 @@ void generate_table(SourceGenerator generator, StringView name, LookupTable& tab
generator.appendln("\n};");
if (table.generate_accessor)
generator.appendln("Optional<u32> index_@name@_code_point(u32 pointer);");
if (table.generate_inverse_accessor)
generator.appendln("Optional<u32> code_point_@name@_index(u32 code_point);");
}
ErrorOr<void> generate_header_file(LookupTables& tables, Core::File& file)
@ -155,6 +163,42 @@ Optional<u32> index_@name@_code_point(u32 pointer)
}
}
void generate_inverse_table_accessor(SourceGenerator generator, StringView name, LookupTable& table)
{
generator.set("name", name);
generator.set("first_pointer", MUST(String::number(table.first_pointer)));
generator.set("size", MUST(String::number(table.code_points.size())));
// FIXME - Doing a linear search here is really slow, should be generating
// some kind of reverse lookup table.
if (table.first_pointer > 0) {
generator.append(R"~~~(
Optional<u32> code_point_@name@_index(u32 code_point)
{
for (u32 i = 0; i < s_@name@_index.size(); ++i) {
if (s_@name@_index[i] == code_point) {
return s_@name@_index_first_pointer + i;
}
}
return {};
}
)~~~");
} else {
generator.append(R"~~~(
Optional<u32> code_point_@name@_index(u32 code_point)
{
for (u32 i = 0; i < s_@name@_index.size(); ++i) {
if (s_@name@_index[i] == code_point) {
return i;
}
}
return {};
}
)~~~");
}
}
ErrorOr<void> generate_implementation_file(LookupTables& tables, Core::File& file)
{
StringBuilder builder;
@ -169,6 +213,8 @@ namespace TextCodec {
for (auto& [key, table] : tables.indexes) {
if (table.generate_accessor)
generate_table_accessor(generator.fork(), key, table);
if (table.generate_inverse_accessor)
generate_inverse_table_accessor(generator.fork(), key, table);
}
generator.appendln("\n}");
@ -222,7 +268,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
.indexes = {
{ "gb18030"sv, move(gb18030_table) },
{ "big5"sv, prepare_table(data.get("big5"sv)->as_array(), GenerateAccessor::Yes) },
{ "jis0208"sv, prepare_table(data.get("jis0208"sv)->as_array(), GenerateAccessor::Yes) },
{ "jis0208"sv, prepare_table(data.get("jis0208"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes) },
{ "jis0212"sv, prepare_table(data.get("jis0212"sv)->as_array(), GenerateAccessor::Yes) },
{ "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array(), GenerateAccessor::Yes) },
{ "ibm866"sv, prepare_table(data.get("ibm866"sv)->as_array()) },