mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-28 19:59:17 +00:00
Meta: Rewrite GenerateEncodingIndexes in python
Some checks are pending
CI / macOS, arm64, Sanitizer_CI, Clang (push) Waiting to run
CI / Linux, x86_64, Fuzzers_CI, Clang (push) Waiting to run
CI / Linux, x86_64, Sanitizer_CI, GNU (push) Waiting to run
CI / Linux, x86_64, Sanitizer_CI, Clang (push) Waiting to run
Package the js repl as a binary artifact / Linux, arm64 (push) Waiting to run
Package the js repl as a binary artifact / macOS, arm64 (push) Waiting to run
Package the js repl as a binary artifact / Linux, x86_64 (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Lint Code / lint (push) Waiting to run
Label PRs with merge conflicts / auto-labeler (push) Waiting to run
Push notes / build (push) Waiting to run
Some checks are pending
CI / macOS, arm64, Sanitizer_CI, Clang (push) Waiting to run
CI / Linux, x86_64, Fuzzers_CI, Clang (push) Waiting to run
CI / Linux, x86_64, Sanitizer_CI, GNU (push) Waiting to run
CI / Linux, x86_64, Sanitizer_CI, Clang (push) Waiting to run
Package the js repl as a binary artifact / Linux, arm64 (push) Waiting to run
Package the js repl as a binary artifact / macOS, arm64 (push) Waiting to run
Package the js repl as a binary artifact / Linux, x86_64 (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Lint Code / lint (push) Waiting to run
Label PRs with merge conflicts / auto-labeler (push) Waiting to run
Push notes / build (push) Waiting to run
This commit is contained in:
parent
3836d0e219
commit
838ca8d172
Notes:
github-actions[bot]
2025-06-26 13:59:08 +00:00
Author: https://github.com/ayeteadoe
Commit: 838ca8d172
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5027
Reviewed-by: https://github.com/ADKaster ✅
Reviewed-by: https://github.com/R-Goc
Reviewed-by: https://github.com/jdahlin
5 changed files with 309 additions and 325 deletions
|
@ -1,17 +1,17 @@
|
|||
function (generate_encoding_indexes)
|
||||
function(generate_encoding_indexes)
|
||||
set(LIBTEXTCODEC_INPUT_FOLDER "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
|
||||
# indexes.json can be found at https://encoding.spec.whatwg.org/indexes.json
|
||||
invoke_generator(
|
||||
"LookupTables.cpp"
|
||||
Lagom::GenerateEncodingIndexes
|
||||
"${LIBTEXTCODEC_INPUT_FOLDER}/indexes.json"
|
||||
"LookupTables.h"
|
||||
"LookupTables.cpp"
|
||||
arguments -j "${LIBTEXTCODEC_INPUT_FOLDER}/indexes.json"
|
||||
invoke_py_generator(
|
||||
"LookupTables.cpp"
|
||||
"generate_encoding_indexes.py"
|
||||
"${LIBTEXTCODEC_INPUT_FOLDER}/indexes.json"
|
||||
"LookupTables.h"
|
||||
"LookupTables.cpp"
|
||||
arguments -j "${LIBTEXTCODEC_INPUT_FOLDER}/indexes.json"
|
||||
)
|
||||
|
||||
if (ENABLE_INSTALL_HEADERS)
|
||||
if(ENABLE_INSTALL_HEADERS)
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/LookupTables.h" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/LibTextCodec/")
|
||||
endif()
|
||||
endfunction()
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
add_subdirectory(IPCCompiler)
|
||||
add_subdirectory(LibTextCodec)
|
||||
add_subdirectory(LibURL)
|
||||
add_subdirectory(LibWeb)
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
lagom_tool(GenerateEncodingIndexes SOURCES GenerateEncodingIndexes.cpp LIBS LibMain)
|
|
@ -1,314 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Simon Wanner <simon@skyrising.xyz>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Array.h>
|
||||
#include <AK/JsonObject.h>
|
||||
#include <AK/NumericLimits.h>
|
||||
#include <AK/SourceGenerator.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibCore/ArgsParser.h>
|
||||
#include <LibCore/File.h>
|
||||
#include <LibMain/Main.h>
|
||||
|
||||
namespace {
|
||||
|
||||
struct LookupTable {
|
||||
u32 first_pointer;
|
||||
u32 max_code_point;
|
||||
Vector<u32> code_points;
|
||||
bool generate_accessor;
|
||||
bool generate_inverse_accessor;
|
||||
};
|
||||
|
||||
struct LookupTables {
|
||||
JsonArray const& gb18030_ranges;
|
||||
OrderedHashMap<StringView, LookupTable> indexes;
|
||||
};
|
||||
|
||||
enum class GenerateAccessor {
|
||||
No,
|
||||
Yes,
|
||||
};
|
||||
|
||||
enum class GenerateInverseAccessor {
|
||||
No,
|
||||
Yes,
|
||||
};
|
||||
|
||||
LookupTable prepare_table(JsonArray const& data, GenerateAccessor generate_accessor = GenerateAccessor::No, GenerateInverseAccessor generate_inverse_accessor = GenerateInverseAccessor::No)
|
||||
{
|
||||
Vector<u32> code_points;
|
||||
code_points.ensure_capacity(data.size());
|
||||
u32 max = 0;
|
||||
u32 first_pointer = 0;
|
||||
for (auto const& entry : data.values()) {
|
||||
if (entry.is_null()) {
|
||||
if (code_points.is_empty()) {
|
||||
first_pointer++;
|
||||
} else {
|
||||
code_points.append(0xfffd);
|
||||
max = AK::max(max, code_points.last());
|
||||
}
|
||||
} else {
|
||||
code_points.append(entry.as_integer<u32>());
|
||||
max = AK::max(max, code_points.last());
|
||||
}
|
||||
}
|
||||
if (generate_accessor == GenerateAccessor::Yes) {
|
||||
while (code_points.last() == 0xfffd)
|
||||
code_points.take_last();
|
||||
} else {
|
||||
VERIFY(first_pointer == 0);
|
||||
}
|
||||
return { first_pointer, max, move(code_points), generate_accessor == GenerateAccessor::Yes, generate_inverse_accessor == GenerateInverseAccessor::Yes };
|
||||
}
|
||||
|
||||
void generate_table(SourceGenerator generator, StringView name, LookupTable& table)
|
||||
{
|
||||
generator.set("name", name);
|
||||
generator.set("value_type", table.max_code_point > NumericLimits<u16>::max() ? "u32" : "u16");
|
||||
generator.set("first_pointer", String::number(table.first_pointer));
|
||||
generator.set("size", String::number(table.code_points.size()));
|
||||
|
||||
if (table.first_pointer > 0) {
|
||||
generator.appendln("static constexpr u32 s_@name@_index_first_pointer = @first_pointer@;");
|
||||
}
|
||||
|
||||
generator.append("static constexpr Array<@value_type@, @size@> s_@name@_index {\n ");
|
||||
for (size_t i = 0; i < table.code_points.size(); i++) {
|
||||
generator.append(MUST(String::formatted("{:#04x}", table.code_points[i])));
|
||||
if (i != table.code_points.size() - 1)
|
||||
generator.append(i % 16 == 15 ? ",\n "sv : ", "sv);
|
||||
}
|
||||
generator.appendln("\n};");
|
||||
if (table.generate_accessor)
|
||||
generator.appendln("Optional<u32> index_@name@_code_point(u32 pointer);");
|
||||
if (table.generate_inverse_accessor)
|
||||
generator.appendln("Optional<u32> code_point_@name@_index(u32 code_point);");
|
||||
}
|
||||
|
||||
ErrorOr<void> generate_header_file(LookupTables& tables, Core::File& file)
|
||||
{
|
||||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
|
||||
generator.set("gb18030_ranges_size", String::number(tables.gb18030_ranges.size()));
|
||||
|
||||
generator.append(R"~~~(
|
||||
#pragma once
|
||||
|
||||
#include <AK/Array.h>
|
||||
#include <AK/Types.h>
|
||||
|
||||
namespace TextCodec {
|
||||
|
||||
struct Gb18030RangeEntry {
|
||||
u32 pointer;
|
||||
u32 code_point;
|
||||
};
|
||||
|
||||
static constexpr Array<Gb18030RangeEntry, @gb18030_ranges_size@> s_gb18030_ranges { {
|
||||
)~~~");
|
||||
|
||||
for (auto const& range : tables.gb18030_ranges.values()) {
|
||||
generator.appendln(MUST(String::formatted(" {{ {}, {:#04x} }},", range.as_array()[0].as_integer<u32>(), range.as_array()[1].as_integer<u32>())));
|
||||
}
|
||||
generator.appendln("} };\n");
|
||||
|
||||
for (auto e : tables.indexes) {
|
||||
generate_table(generator.fork(), e.key, e.value);
|
||||
}
|
||||
|
||||
generator.append("\n");
|
||||
generator.appendln("}");
|
||||
|
||||
TRY(file.write_until_depleted(generator.as_string_view().bytes()));
|
||||
return {};
|
||||
}
|
||||
|
||||
void generate_table_accessor(SourceGenerator generator, StringView name, LookupTable& table)
|
||||
{
|
||||
generator.set("name", name);
|
||||
generator.set("first_pointer", String::number(table.first_pointer));
|
||||
generator.set("size", String::number(table.code_points.size()));
|
||||
|
||||
if (table.first_pointer > 0) {
|
||||
generator.append(R"~~~(
|
||||
Optional<u32> index_@name@_code_point(u32 pointer)
|
||||
{
|
||||
if (pointer < s_@name@_index_first_pointer || pointer - s_@name@_index_first_pointer >= s_@name@_index.size())
|
||||
return {};
|
||||
auto value = s_@name@_index[pointer - s_@name@_index_first_pointer];
|
||||
if (value == 0xfffd)
|
||||
return {};
|
||||
return value;
|
||||
}
|
||||
)~~~");
|
||||
} else {
|
||||
generator.append(R"~~~(
|
||||
Optional<u32> index_@name@_code_point(u32 pointer)
|
||||
{
|
||||
if (pointer >= s_@name@_index.size())
|
||||
return {};
|
||||
auto value = s_@name@_index[pointer];
|
||||
if (value == 0xfffd)
|
||||
return {};
|
||||
return value;
|
||||
}
|
||||
)~~~");
|
||||
}
|
||||
}
|
||||
|
||||
void generate_inverse_table_accessor(SourceGenerator generator, StringView name, LookupTable& table)
|
||||
{
|
||||
generator.set("name", name);
|
||||
generator.set("first_pointer", String::number(table.first_pointer));
|
||||
generator.set("size", String::number(table.code_points.size()));
|
||||
|
||||
// FIXME - Doing a linear search here is really slow, should be generating
|
||||
// some kind of reverse lookup table.
|
||||
|
||||
if (table.first_pointer > 0) {
|
||||
generator.append(R"~~~(
|
||||
Optional<u32> code_point_@name@_index(u32 code_point)
|
||||
{
|
||||
for (u32 i = 0; i < s_@name@_index.size(); ++i) {
|
||||
if (s_@name@_index[i] == code_point) {
|
||||
return s_@name@_index_first_pointer + i;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
)~~~");
|
||||
} else {
|
||||
generator.append(R"~~~(
|
||||
Optional<u32> code_point_@name@_index(u32 code_point)
|
||||
{
|
||||
for (u32 i = 0; i < s_@name@_index.size(); ++i) {
|
||||
if (s_@name@_index[i] == code_point) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
)~~~");
|
||||
}
|
||||
}
|
||||
|
||||
ErrorOr<void> generate_implementation_file(LookupTables& tables, Core::File& file)
|
||||
{
|
||||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
|
||||
generator.append(R"~~~(
|
||||
#include <LibTextCodec/LookupTables.h>
|
||||
|
||||
namespace TextCodec {
|
||||
)~~~");
|
||||
|
||||
for (auto& [key, table] : tables.indexes) {
|
||||
if (table.generate_accessor)
|
||||
generate_table_accessor(generator.fork(), key, table);
|
||||
if (table.generate_inverse_accessor)
|
||||
generate_inverse_table_accessor(generator.fork(), key, table);
|
||||
}
|
||||
|
||||
generator.appendln("\n}");
|
||||
|
||||
TRY(file.write_until_depleted(generator.as_string_view().bytes()));
|
||||
return {};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
ErrorOr<int> serenity_main(Main::Arguments arguments)
|
||||
{
|
||||
StringView generated_header_path;
|
||||
StringView generated_implementation_path;
|
||||
StringView json_path;
|
||||
|
||||
Core::ArgsParser args_parser;
|
||||
args_parser.add_option(generated_header_path, "Path to the lookup table header file to generate", "generated-header-path", 'h', "generated-header-path");
|
||||
args_parser.add_option(generated_implementation_path, "Path to the lookup table implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
|
||||
args_parser.add_option(json_path, "Path to the JSON file to read from", "json-path", 'j', "json-path");
|
||||
args_parser.parse(arguments);
|
||||
|
||||
auto json_file = TRY(Core::File::open(json_path, Core::File::OpenMode::Read));
|
||||
auto json_data = TRY(json_file->read_until_eof());
|
||||
auto data = TRY(JsonValue::from_string(json_data)).as_object();
|
||||
|
||||
auto gb18030_table = prepare_table(data.get("gb18030"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes);
|
||||
|
||||
// FIXME: Encoding specification is not updated to GB-18030-2022 yet (https://github.com/whatwg/encoding/issues/312)
|
||||
// NOTE: See https://commits.webkit.org/264918@main
|
||||
gb18030_table.code_points[7182] = 0xfe10;
|
||||
gb18030_table.code_points[7183] = 0xfe12;
|
||||
gb18030_table.code_points[7184] = 0xfe11;
|
||||
gb18030_table.code_points[7185] = 0xfe13;
|
||||
gb18030_table.code_points[7186] = 0xfe14;
|
||||
gb18030_table.code_points[7187] = 0xfe15;
|
||||
gb18030_table.code_points[7188] = 0xfe16;
|
||||
gb18030_table.code_points[7201] = 0xfe17;
|
||||
gb18030_table.code_points[7202] = 0xfe18;
|
||||
gb18030_table.code_points[7208] = 0xfe19;
|
||||
gb18030_table.code_points[23775] = 0x9fb4;
|
||||
gb18030_table.code_points[23783] = 0x9fb5;
|
||||
gb18030_table.code_points[23788] = 0x9fb6;
|
||||
gb18030_table.code_points[23789] = 0x9fb7;
|
||||
gb18030_table.code_points[23795] = 0x9fb8;
|
||||
gb18030_table.code_points[23812] = 0x9fb9;
|
||||
gb18030_table.code_points[23829] = 0x9fba;
|
||||
gb18030_table.code_points[23845] = 0x9fbb;
|
||||
|
||||
LookupTables tables {
|
||||
.gb18030_ranges = data.get("gb18030-ranges"sv)->as_array(),
|
||||
.indexes = {
|
||||
{ "gb18030"sv, move(gb18030_table) },
|
||||
{ "big5"sv, prepare_table(data.get("big5"sv)->as_array(), GenerateAccessor::Yes) },
|
||||
{ "jis0208"sv, prepare_table(data.get("jis0208"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes) },
|
||||
{ "jis0212"sv, prepare_table(data.get("jis0212"sv)->as_array(), GenerateAccessor::Yes) },
|
||||
{ "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes) },
|
||||
{ "ibm866"sv, prepare_table(data.get("ibm866"sv)->as_array()) },
|
||||
{ "iso_2022_jp_katakana"sv, prepare_table(data.get("iso-2022-jp-katakana"sv)->as_array(), GenerateAccessor::Yes) },
|
||||
{ "iso_8859_2"sv, prepare_table(data.get("iso-8859-2"sv)->as_array()) },
|
||||
{ "iso_8859_3"sv, prepare_table(data.get("iso-8859-3"sv)->as_array()) },
|
||||
{ "iso_8859_4"sv, prepare_table(data.get("iso-8859-4"sv)->as_array()) },
|
||||
{ "iso_8859_5"sv, prepare_table(data.get("iso-8859-5"sv)->as_array()) },
|
||||
{ "iso_8859_6"sv, prepare_table(data.get("iso-8859-6"sv)->as_array()) },
|
||||
{ "iso_8859_7"sv, prepare_table(data.get("iso-8859-7"sv)->as_array()) },
|
||||
{ "iso_8859_8"sv, prepare_table(data.get("iso-8859-8"sv)->as_array()) },
|
||||
{ "iso_8859_10"sv, prepare_table(data.get("iso-8859-10"sv)->as_array()) },
|
||||
{ "iso_8859_13"sv, prepare_table(data.get("iso-8859-13"sv)->as_array()) },
|
||||
{ "iso_8859_14"sv, prepare_table(data.get("iso-8859-14"sv)->as_array()) },
|
||||
{ "iso_8859_15"sv, prepare_table(data.get("iso-8859-15"sv)->as_array()) },
|
||||
{ "iso_8859_16"sv, prepare_table(data.get("iso-8859-16"sv)->as_array()) },
|
||||
{ "koi8_r"sv, prepare_table(data.get("koi8-r"sv)->as_array()) },
|
||||
{ "koi8_u"sv, prepare_table(data.get("koi8-u"sv)->as_array()) },
|
||||
{ "macintosh"sv, prepare_table(data.get("macintosh"sv)->as_array()) },
|
||||
{ "windows_874"sv, prepare_table(data.get("windows-874"sv)->as_array()) },
|
||||
{ "windows_1250"sv, prepare_table(data.get("windows-1250"sv)->as_array()) },
|
||||
{ "windows_1251"sv, prepare_table(data.get("windows-1251"sv)->as_array()) },
|
||||
{ "windows_1252"sv, prepare_table(data.get("windows-1252"sv)->as_array()) },
|
||||
{ "windows_1253"sv, prepare_table(data.get("windows-1253"sv)->as_array()) },
|
||||
{ "windows_1254"sv, prepare_table(data.get("windows-1254"sv)->as_array()) },
|
||||
{ "windows_1255"sv, prepare_table(data.get("windows-1255"sv)->as_array()) },
|
||||
{ "windows_1256"sv, prepare_table(data.get("windows-1256"sv)->as_array()) },
|
||||
{ "windows_1257"sv, prepare_table(data.get("windows-1257"sv)->as_array()) },
|
||||
{ "windows_1258"sv, prepare_table(data.get("windows-1258"sv)->as_array()) },
|
||||
{ "x_mac_cyrillic"sv, prepare_table(data.get("x-mac-cyrillic"sv)->as_array()) },
|
||||
},
|
||||
};
|
||||
|
||||
auto generated_header_file = TRY(Core::File::open(generated_header_path, Core::File::OpenMode::Write));
|
||||
auto generated_implementation_file = TRY(Core::File::open(generated_implementation_path, Core::File::OpenMode::Write));
|
||||
|
||||
TRY(generate_header_file(tables, *generated_header_file));
|
||||
TRY(generate_implementation_file(tables, *generated_implementation_file));
|
||||
|
||||
return 0;
|
||||
}
|
300
Meta/generate_encoding_indexes.py
Normal file
300
Meta/generate_encoding_indexes.py
Normal file
|
@ -0,0 +1,300 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2024, Simon Wanner <simon@skyrising.xyz>
|
||||
# Copyright (c) 2025, ayeteadoe <ayeteadoe@gmail.com>
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-2-Clause
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
class GenerateAccessor(Enum):
|
||||
NO = False
|
||||
YES = True
|
||||
|
||||
|
||||
class GenerateInverseAccessor(Enum):
|
||||
NO = False
|
||||
YES = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class LookupTable:
|
||||
first_pointer: int
|
||||
max_code_point: int
|
||||
code_points: list[int]
|
||||
generate_accessor: GenerateAccessor
|
||||
generate_inverse_accessor: GenerateInverseAccessor
|
||||
|
||||
|
||||
@dataclass
|
||||
class LookupTables:
|
||||
gb18030_ranges: list[Any]
|
||||
indexes: dict[str, LookupTable]
|
||||
|
||||
|
||||
def prepare_table(
|
||||
data: list[Any],
|
||||
generate_accessor: GenerateAccessor = GenerateAccessor.NO,
|
||||
) -> LookupTable:
|
||||
code_points = []
|
||||
max_code_point = 0
|
||||
first_pointer = 0
|
||||
|
||||
for entry in data:
|
||||
if entry is None:
|
||||
if not code_points:
|
||||
first_pointer += 1
|
||||
else:
|
||||
code_points.append(0xFFFD)
|
||||
max_code_point = max(max_code_point, code_points[-1])
|
||||
else:
|
||||
code_points.append(int(entry))
|
||||
max_code_point = max(max_code_point, code_points[-1])
|
||||
|
||||
if generate_accessor == GenerateAccessor.YES:
|
||||
while code_points and code_points[-1] == 0xFFFD:
|
||||
code_points.pop()
|
||||
else:
|
||||
assert first_pointer == 0
|
||||
|
||||
return LookupTable(
|
||||
first_pointer=first_pointer,
|
||||
max_code_point=max_code_point,
|
||||
code_points=code_points,
|
||||
generate_accessor=GenerateAccessor.YES,
|
||||
generate_inverse_accessor=GenerateInverseAccessor.YES,
|
||||
)
|
||||
|
||||
|
||||
def generate_table(name: str, table: LookupTable) -> str:
|
||||
max_u16 = (1 << 16) - 1
|
||||
value_type = "u32" if table.max_code_point > max_u16 else "u16"
|
||||
size = len(table.code_points)
|
||||
|
||||
lines = []
|
||||
|
||||
if table.first_pointer > 0:
|
||||
lines.append(f"static constexpr u32 s_{name}_index_first_pointer = {table.first_pointer};")
|
||||
|
||||
lines.append(f"static constexpr Array<{value_type}, {size}> s_{name}_index {{")
|
||||
|
||||
formatted_points = []
|
||||
for i, point in enumerate(table.code_points):
|
||||
formatted_points.append(f"0x{point:04x}")
|
||||
if i != len(table.code_points) - 1:
|
||||
if i % 16 == 15:
|
||||
formatted_points.append(",\n ")
|
||||
else:
|
||||
formatted_points.append(", ")
|
||||
|
||||
lines.append(f" {' '.join(formatted_points)}")
|
||||
lines.append("};")
|
||||
|
||||
if table.generate_accessor:
|
||||
lines.append(f"Optional<u32> index_{name}_code_point(u32 pointer);")
|
||||
|
||||
if table.generate_inverse_accessor:
|
||||
lines.append(f"Optional<u32> code_point_{name}_index(u32 code_point);")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_header_file(tables: LookupTables, output_path: Path) -> None:
|
||||
gb18030_ranges_size = len(tables.gb18030_ranges)
|
||||
|
||||
content = f"""#pragma once
|
||||
|
||||
#include <AK/Array.h>
|
||||
#include <AK/Types.h>
|
||||
|
||||
namespace TextCodec {{
|
||||
|
||||
struct Gb18030RangeEntry {{
|
||||
u32 pointer;
|
||||
u32 code_point;
|
||||
}};
|
||||
|
||||
static constexpr Array<Gb18030RangeEntry, {gb18030_ranges_size}> s_gb18030_ranges {{ {{
|
||||
"""
|
||||
|
||||
for range_entry in tables.gb18030_ranges:
|
||||
pointer = range_entry[0]
|
||||
code_point = range_entry[1]
|
||||
content += f" {{ {pointer}, 0x{code_point:04x} }},\n"
|
||||
|
||||
content += "} };\n\n"
|
||||
|
||||
for name, table in tables.indexes.items():
|
||||
content += generate_table(name, table) + "\n\n"
|
||||
|
||||
content += "}\n"
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def generate_table_accessor(name: str, table: LookupTable) -> str:
|
||||
if table.first_pointer > 0:
|
||||
return f"""
|
||||
Optional<u32> index_{name}_code_point(u32 pointer)
|
||||
{{
|
||||
if (pointer < s_{name}_index_first_pointer || pointer - s_{name}_index_first_pointer >= s_{name}_index.size())
|
||||
return {{}};
|
||||
auto value = s_{name}_index[pointer - s_{name}_index_first_pointer];
|
||||
if (value == 0xfffd)
|
||||
return {{}};
|
||||
return value;
|
||||
}}
|
||||
"""
|
||||
else:
|
||||
return f"""
|
||||
Optional<u32> index_{name}_code_point(u32 pointer)
|
||||
{{
|
||||
if (pointer >= s_{name}_index.size())
|
||||
return {{}};
|
||||
auto value = s_{name}_index[pointer];
|
||||
if (value == 0xfffd)
|
||||
return {{}};
|
||||
return value;
|
||||
}}
|
||||
"""
|
||||
|
||||
|
||||
def generate_inverse_table_accessor(name: str, table: LookupTable) -> str:
|
||||
if table.first_pointer > 0:
|
||||
return f"""
|
||||
Optional<u32> code_point_{name}_index(u32 code_point)
|
||||
{{
|
||||
for (u32 i = 0; i < s_{name}_index.size(); ++i) {{
|
||||
if (s_{name}_index[i] == code_point) {{
|
||||
return s_{name}_index_first_pointer + i;
|
||||
}}
|
||||
}}
|
||||
return {{}};
|
||||
}}
|
||||
"""
|
||||
else:
|
||||
return f"""
|
||||
Optional<u32> code_point_{name}_index(u32 code_point)
|
||||
{{
|
||||
for (u32 i = 0; i < s_{name}_index.size(); ++i) {{
|
||||
if (s_{name}_index[i] == code_point) {{
|
||||
return i;
|
||||
}}
|
||||
}}
|
||||
return {{}};
|
||||
}}
|
||||
"""
|
||||
|
||||
|
||||
def generate_implementation_file(tables: LookupTables, output_path: Path) -> None:
|
||||
content = """
|
||||
#include <LibTextCodec/LookupTables.h>
|
||||
|
||||
namespace TextCodec {
|
||||
"""
|
||||
|
||||
for name, table in tables.indexes.items():
|
||||
if table.generate_accessor:
|
||||
content += generate_table_accessor(name, table)
|
||||
if table.generate_inverse_accessor:
|
||||
content += generate_inverse_table_accessor(name, table)
|
||||
|
||||
content += "\n}\n"
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate text codec lookup tables", add_help=False)
|
||||
parser.add_argument("-h", required=True)
|
||||
parser.add_argument("-c", required=True)
|
||||
parser.add_argument("-j", required=True)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.j, "r") as f:
|
||||
data = json.load(f)
|
||||
|
||||
gb18030_table = prepare_table(data["gb18030"], GenerateAccessor.YES)
|
||||
|
||||
# FIXME: Update JSON to match GB-18030-2022 Encoding specification (https://github.com/whatwg/encoding/issues/312)
|
||||
# NOTE: See https://commits.webkit.org/264918@main
|
||||
gb18030_updates = {
|
||||
7182: 0xFE10,
|
||||
7183: 0xFE12,
|
||||
7184: 0xFE11,
|
||||
7185: 0xFE13,
|
||||
7186: 0xFE14,
|
||||
7187: 0xFE15,
|
||||
7188: 0xFE16,
|
||||
7201: 0xFE17,
|
||||
7202: 0xFE18,
|
||||
7208: 0xFE19,
|
||||
23775: 0x9FB4,
|
||||
23783: 0x9FB5,
|
||||
23788: 0x9FB6,
|
||||
23789: 0x9FB7,
|
||||
23795: 0x9FB8,
|
||||
23812: 0x9FB9,
|
||||
23829: 0x9FBA,
|
||||
23845: 0x9FBB,
|
||||
}
|
||||
|
||||
for index, value in gb18030_updates.items():
|
||||
if index < len(gb18030_table.code_points):
|
||||
gb18030_table.code_points[index] = value
|
||||
|
||||
tables = LookupTables(
|
||||
gb18030_ranges=data["gb18030-ranges"],
|
||||
indexes={
|
||||
"gb18030": gb18030_table,
|
||||
"big5": prepare_table(data["big5"], GenerateAccessor.YES),
|
||||
"jis0208": prepare_table(data["jis0208"], GenerateAccessor.YES),
|
||||
"jis0212": prepare_table(data["jis0212"], GenerateAccessor.YES),
|
||||
"euc_kr": prepare_table(data["euc-kr"], GenerateAccessor.YES),
|
||||
"ibm866": prepare_table(data["ibm866"]),
|
||||
"iso_2022_jp_katakana": prepare_table(data["iso-2022-jp-katakana"], GenerateAccessor.YES),
|
||||
"iso_8859_2": prepare_table(data["iso-8859-2"]),
|
||||
"iso_8859_3": prepare_table(data["iso-8859-3"]),
|
||||
"iso_8859_4": prepare_table(data["iso-8859-4"]),
|
||||
"iso_8859_5": prepare_table(data["iso-8859-5"]),
|
||||
"iso_8859_6": prepare_table(data["iso-8859-6"]),
|
||||
"iso_8859_7": prepare_table(data["iso-8859-7"]),
|
||||
"iso_8859_8": prepare_table(data["iso-8859-8"]),
|
||||
"iso_8859_10": prepare_table(data["iso-8859-10"]),
|
||||
"iso_8859_13": prepare_table(data["iso-8859-13"]),
|
||||
"iso_8859_14": prepare_table(data["iso-8859-14"]),
|
||||
"iso_8859_15": prepare_table(data["iso-8859-15"]),
|
||||
"iso_8859_16": prepare_table(data["iso-8859-16"]),
|
||||
"koi8_r": prepare_table(data["koi8-r"]),
|
||||
"koi8_u": prepare_table(data["koi8-u"]),
|
||||
"macintosh": prepare_table(data["macintosh"]),
|
||||
"windows_874": prepare_table(data["windows-874"]),
|
||||
"windows_1250": prepare_table(data["windows-1250"]),
|
||||
"windows_1251": prepare_table(data["windows-1251"]),
|
||||
"windows_1252": prepare_table(data["windows-1252"]),
|
||||
"windows_1253": prepare_table(data["windows-1253"]),
|
||||
"windows_1254": prepare_table(data["windows-1254"]),
|
||||
"windows_1255": prepare_table(data["windows-1255"]),
|
||||
"windows_1256": prepare_table(data["windows-1256"]),
|
||||
"windows_1257": prepare_table(data["windows-1257"]),
|
||||
"windows_1258": prepare_table(data["windows-1258"]),
|
||||
"x_mac_cyrillic": prepare_table(data["x-mac-cyrillic"]),
|
||||
},
|
||||
)
|
||||
|
||||
generate_header_file(tables, Path(args.h))
|
||||
generate_implementation_file(tables, Path(args.c))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue