From 2682f370dd530926cf2b839912d955d73c907180 Mon Sep 17 00:00:00 2001 From: ayeteadoe Date: Thu, 26 Jun 2025 10:01:39 -0700 Subject: [PATCH] Meta: Rewrite GeneratePublicSuffixData in python --- Meta/CMake/public_suffix.cmake | 4 +- .../Lagom/Tools/CodeGenerators/CMakeLists.txt | 1 - .../CodeGenerators/LibURL/CMakeLists.txt | 1 - .../LibURL/GeneratePublicSuffixData.cpp | 191 ------------------ Meta/generate_public_suffix_data.py | 156 ++++++++++++++ 5 files changed, 158 insertions(+), 195 deletions(-) delete mode 100644 Meta/Lagom/Tools/CodeGenerators/LibURL/CMakeLists.txt delete mode 100644 Meta/Lagom/Tools/CodeGenerators/LibURL/GeneratePublicSuffixData.cpp create mode 100644 Meta/generate_public_suffix_data.py diff --git a/Meta/CMake/public_suffix.cmake b/Meta/CMake/public_suffix.cmake index b955e984e34..533dea83f16 100644 --- a/Meta/CMake/public_suffix.cmake +++ b/Meta/CMake/public_suffix.cmake @@ -10,9 +10,9 @@ if (ENABLE_NETWORK_DOWNLOADS) else() message(STATUS "Skipping download of ${PUBLIC_SUFFIX_DATA_URL}, expecting it to be in ${PUBLIC_SUFFIX_DATA_PATH}") endif() -invoke_cpp_generator( +invoke_py_generator( "PublicSuffixData" - Lagom::GeneratePublicSuffixData + "generate_public_suffix_data.py" "${PUBLIC_SUFFIX_PATH}/" "${PUBLIC_SUFFIX_DATA_HEADER}" "${PUBLIC_SUFFIX_DATA_IMPLEMENTATION}" diff --git a/Meta/Lagom/Tools/CodeGenerators/CMakeLists.txt b/Meta/Lagom/Tools/CodeGenerators/CMakeLists.txt index fbfe2a7e739..29b843d90f2 100644 --- a/Meta/Lagom/Tools/CodeGenerators/CMakeLists.txt +++ b/Meta/Lagom/Tools/CodeGenerators/CMakeLists.txt @@ -1,3 +1,2 @@ add_subdirectory(IPCCompiler) -add_subdirectory(LibURL) add_subdirectory(LibWeb) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibURL/CMakeLists.txt b/Meta/Lagom/Tools/CodeGenerators/LibURL/CMakeLists.txt deleted file mode 100644 index 35dec51cc60..00000000000 --- a/Meta/Lagom/Tools/CodeGenerators/LibURL/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -lagom_tool(GeneratePublicSuffixData SOURCES GeneratePublicSuffixData.cpp LIBS LibMain) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibURL/GeneratePublicSuffixData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibURL/GeneratePublicSuffixData.cpp deleted file mode 100644 index 3a064e6b3f8..00000000000 --- a/Meta/Lagom/Tools/CodeGenerators/LibURL/GeneratePublicSuffixData.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2023, Cameron Youell - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include -#include -#include - -ErrorOr generate_header_file(Core::InputBufferedFile&, Core::File&); -ErrorOr generate_implementation_file(Core::InputBufferedFile&, Core::File&); - -static ErrorOr> open_file(StringView path, Core::File::OpenMode mode) -{ - if (path.is_empty()) - return Error::from_string_literal("Provided path is empty, please provide all command line options"); - - auto file = TRY(Core::File::open(path, mode)); - return Core::InputBufferedFile::create(move(file)); -} - -ErrorOr serenity_main(Main::Arguments arguments) -{ - StringView generated_header_path; - StringView generated_implementation_path; - StringView public_suffix_list_path; - - Core::ArgsParser args_parser; - args_parser.add_option(generated_header_path, "Path to the header file to generate", "generated-header-path", 'h', "generated-header-path"); - args_parser.add_option(generated_implementation_path, "Path to the implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); - args_parser.add_option(public_suffix_list_path, "Path to the public suffix list", "public-suffix-list-path", 'p', "public-suffix-list-path"); - args_parser.parse(arguments); - - auto identifier_data = TRY(open_file(public_suffix_list_path, Core::File::OpenMode::Read)); - - auto generated_header_file = TRY(Core::File::open(generated_header_path, Core::File::OpenMode::Write)); - auto generated_implementation_file = TRY(Core::File::open(generated_implementation_path, Core::File::OpenMode::Write)); - - TRY(generate_header_file(*identifier_data, *generated_header_file)); - TRY(generate_implementation_file(*identifier_data, *generated_implementation_file)); - - return 0; -} - -ErrorOr generate_header_file(Core::InputBufferedFile&, Core::File& file) -{ - StringBuilder builder; - SourceGenerator generator { builder }; - generator.append(R"~~~( -#pragma once - -#include -#include -#include - -namespace URL { - -class PublicSuffixData { -protected: - PublicSuffixData(); - -public: - PublicSuffixData(PublicSuffixData const&) = delete; - PublicSuffixData& operator=(PublicSuffixData const&) = delete; - - static PublicSuffixData* the() - { - static PublicSuffixData* s_the; - if (!s_the) - s_the = new PublicSuffixData; - return s_the; - } - - bool is_public_suffix(StringView host); - Optional get_public_suffix(StringView string); - -private: - Trie m_dictionary; -}; - -} - -)~~~"); - - TRY(file.write_until_depleted(generator.as_string_view().bytes())); - return {}; -} - -ErrorOr generate_implementation_file(Core::InputBufferedFile& input, Core::File& file) -{ - StringBuilder builder; - SourceGenerator generator { builder }; - generator.append(R"~~~( -#include -#include -#include - -namespace URL { - -static constexpr auto s_public_suffixes = Array {)~~~"); - - Array buffer {}; - - while (TRY(input.can_read_line())) { - auto line = TRY(input.read_line(buffer)); - - if (line.starts_with("//"sv) || line.is_empty()) - continue; - - auto view = line.split_view("."sv); - view.reverse(); - - auto val = MUST(String::join("."sv, view)); - - generator.set("line", val); - generator.append(R"~~~( - "@line@"sv,)~~~"); - } - - generator.append(R"~~~( -}; - -PublicSuffixData::PublicSuffixData() - : m_dictionary('/') -{ - // FIXME: Reduce the depth of this trie - for (auto str : s_public_suffixes) { - MUST(m_dictionary.insert(str.begin(), str.end(), Empty {}, [](auto const&, auto const&) -> Optional { return {}; })); - } -} - -bool PublicSuffixData::is_public_suffix(StringView host) -{ - auto it = host.begin(); - auto& node = m_dictionary.traverse_until_last_accessible_node(it, host.end()); - return it.is_end() && node.has_metadata(); -} - -Optional PublicSuffixData::get_public_suffix(StringView string) -{ - auto input = string.split_view('.'); - input.reverse(); - - StringBuilder overall_search_string; - StringBuilder search_string; - for (auto part : input) { - search_string.clear(); - search_string.append(overall_search_string.string_view()); - search_string.append(part); - - if (is_public_suffix(search_string.string_view())) { - overall_search_string.append(part); - overall_search_string.append('.'); - continue; - } - - search_string.clear(); - search_string.append(overall_search_string.string_view()); - search_string.append('.'); - - if (is_public_suffix(search_string.string_view())) { - overall_search_string.append(part); - overall_search_string.append('.'); - continue; - } - - break; - } - - auto view = overall_search_string.string_view().split_view('.'); - view.reverse(); - - StringBuilder return_string_builder; - return_string_builder.join('.', view); - - if (return_string_builder.is_empty()) - return Optional {}; - - return MUST(return_string_builder.to_string()); -} - -} - -)~~~"); - - TRY(file.write_until_depleted(generator.as_string_view().bytes())); - return {}; -} diff --git a/Meta/generate_public_suffix_data.py b/Meta/generate_public_suffix_data.py new file mode 100644 index 00000000000..64058f00ae3 --- /dev/null +++ b/Meta/generate_public_suffix_data.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2023, Cameron Youell +# Copyright (c) 2025, ayeteadoe +# +# SPDX-License-Identifier: BSD-2-Clause + +import argparse + +from pathlib import Path + + +def generate_header_file(output_path: Path) -> None: + content = """#pragma once + +#include +#include +#include + +namespace URL { + +class PublicSuffixData { +protected: + PublicSuffixData(); + +public: + PublicSuffixData(PublicSuffixData const&) = delete; + PublicSuffixData& operator=(PublicSuffixData const&) = delete; + + static PublicSuffixData* the() + { + static PublicSuffixData* s_the; + if (!s_the) + s_the = new PublicSuffixData; + return s_the; + } + + bool is_public_suffix(StringView host); + Optional get_public_suffix(StringView string); + +private: + Trie m_dictionary; +}; + +} + +""" + + with open(output_path, "w", encoding="utf-8") as f: + f.write(content) + + +def generate_implementation_file(input_path: Path, output_path: Path) -> None: + content = """#include +#include +#include + +namespace URL { + +static constexpr auto s_public_suffixes = Array {""" + + with open(input_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + + if line.startswith("//") or not line: + continue + + reversed_line = ".".join(line.split(".")[::-1]) + content += f'\n "{reversed_line}"sv,' + + content += """ +}; + +PublicSuffixData::PublicSuffixData() + : m_dictionary('/') +{ + // FIXME: Reduce the depth of this trie + for (auto str : s_public_suffixes) { + MUST(m_dictionary.insert(str.begin(), str.end(), Empty {}, [](auto const&, auto const&) -> Optional { return {}; })); + } +} + +bool PublicSuffixData::is_public_suffix(StringView host) +{ + auto it = host.begin(); + auto& node = m_dictionary.traverse_until_last_accessible_node(it, host.end()); + return it.is_end() && node.has_metadata(); +} + +Optional PublicSuffixData::get_public_suffix(StringView string) +{ + auto input = string.split_view('.'); + input.reverse(); + + StringBuilder overall_search_string; + StringBuilder search_string; + for (auto part : input) { + search_string.clear(); + search_string.append(overall_search_string.string_view()); + search_string.append(part); + + if (is_public_suffix(search_string.string_view())) { + overall_search_string.append(part); + overall_search_string.append('.'); + continue; + } + + search_string.clear(); + search_string.append(overall_search_string.string_view()); + search_string.append('.'); + + if (is_public_suffix(search_string.string_view())) { + overall_search_string.append(part); + overall_search_string.append('.'); + continue; + } + + break; + } + + auto view = overall_search_string.string_view().split_view('.'); + view.reverse(); + + StringBuilder return_string_builder; + return_string_builder.join('.', view); + if (return_string_builder.is_empty()) + return Optional {}; + return MUST(return_string_builder.to_string()); +} + +} + +""" + + with open(output_path, "w", encoding="utf-8") as f: + f.write(content) + + +def main(): + parser = argparse.ArgumentParser(description="Generate public suffix data files", add_help=False) + parser.add_argument("-H", action="help", help="Show this help message and exit") + parser.add_argument("-h", "-generated-header-path", required=True, help="Path to the header file to generate") + parser.add_argument( + "-c", "-generated_implementation_path", required=True, help="Path to the implementation file to generate" + ) + parser.add_argument("-p", "-public-suffix-list-path", required=True, help="Path to the public suffix list") + + args = parser.parse_args() + + generate_header_file(Path(args.h)) + generate_implementation_file(Path(args.p), Path(args.c)) + + +if __name__ == "__main__": + main()