diff --git a/Libraries/LibIPC/Decoder.cpp b/Libraries/LibIPC/Decoder.cpp index a75537be758..e32d7f7f762 100644 --- a/Libraries/LibIPC/Decoder.cpp +++ b/Libraries/LibIPC/Decoder.cpp @@ -111,6 +111,13 @@ ErrorOr decode(Decoder& decoder) return URL::Origin { move(scheme), move(host), port }; } +template<> +ErrorOr decode(Decoder& decoder) +{ + auto value = TRY(decoder.decode()); + return URL::Host { move(value) }; +} + template<> ErrorOr decode(Decoder& decoder) { diff --git a/Libraries/LibIPC/Decoder.h b/Libraries/LibIPC/Decoder.h index af730c46f74..709ad106eaa 100644 --- a/Libraries/LibIPC/Decoder.h +++ b/Libraries/LibIPC/Decoder.h @@ -108,6 +108,9 @@ ErrorOr decode(Decoder&); template<> ErrorOr decode(Decoder&); +template<> +ErrorOr decode(Decoder&); + template<> ErrorOr decode(Decoder&); diff --git a/Libraries/LibIPC/Encoder.cpp b/Libraries/LibIPC/Encoder.cpp index b23c73a4776..5ebae06f557 100644 --- a/Libraries/LibIPC/Encoder.cpp +++ b/Libraries/LibIPC/Encoder.cpp @@ -131,6 +131,13 @@ ErrorOr encode(Encoder& encoder, URL::Origin const& origin) return {}; } +template<> +ErrorOr encode(Encoder& encoder, URL::Host const& host) +{ + TRY(encoder.encode(host.value())); + return {}; +} + template<> ErrorOr encode(Encoder& encoder, File const& file) { diff --git a/Libraries/LibIPC/Encoder.h b/Libraries/LibIPC/Encoder.h index 3900e07d0c4..bedb0103f60 100644 --- a/Libraries/LibIPC/Encoder.h +++ b/Libraries/LibIPC/Encoder.h @@ -107,6 +107,9 @@ ErrorOr encode(Encoder&, URL::URL const&); template<> ErrorOr encode(Encoder&, URL::Origin const&); +template<> +ErrorOr encode(Encoder&, URL::Host const&); + template<> ErrorOr encode(Encoder&, File const&); diff --git a/Libraries/LibURL/CMakeLists.txt b/Libraries/LibURL/CMakeLists.txt index 2edcded7413..8b41bdc67c9 100644 --- a/Libraries/LibURL/CMakeLists.txt +++ b/Libraries/LibURL/CMakeLists.txt @@ -1,6 +1,7 @@ include(public_suffix) set(SOURCES + Host.cpp Origin.cpp Parser.cpp URL.cpp diff --git a/Libraries/LibURL/Host.cpp b/Libraries/LibURL/Host.cpp new file mode 100644 index 00000000000..9edd9b52087 --- /dev/null +++ b/Libraries/LibURL/Host.cpp @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2021, Max Wipfli + * Copyright (c) 2023-2024, Shannon Booth + * Copyright (c) 2024, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +namespace URL { + +Host::Host(VariantType&& value) + : m_value(move(value)) +{ +} + +Host::Host(String&& string) + : m_value(move(string)) +{ +} + +// https://url.spec.whatwg.org/#concept-ipv4-serializer +static String serialize_ipv4_address(IPv4Address address) +{ + // 1. Let output be the empty string. + // NOTE: Array to avoid prepend. + Array output; + + // 2. Let n be the value of address. + u32 n = address; + + // 3. For each i in the range 1 to 4, inclusive: + for (size_t i = 0; i <= 3; ++i) { + // 1. Prepend n % 256, serialized, to output. + output[3 - i] = n % 256; + + // 2. If i is not 4, then prepend U+002E (.) to output. + // NOTE: done at end + + // 3. Set n to floor(n / 256). + n /= 256; + } + + // 4. Return output. + return MUST(String::formatted("{}.{}.{}.{}", output[0], output[1], output[2], output[3])); +} + +// https://url.spec.whatwg.org/#concept-ipv6-serializer +static void serialize_ipv6_address(IPv6Address const& address, StringBuilder& output) +{ + // 1. Let output be the empty string. + + // 2. Let compress be an index to the first IPv6 piece in the first longest sequences of address’s IPv6 pieces that are 0. + Optional compress; + size_t longest_sequence_length = 0; + size_t current_sequence_length = 0; + size_t current_sequence_start = 0; + for (size_t i = 0; i < 8; ++i) { + if (address[i] == 0) { + if (current_sequence_length == 0) + current_sequence_start = i; + ++current_sequence_length; + } else { + if (current_sequence_length > longest_sequence_length) { + longest_sequence_length = current_sequence_length; + compress = current_sequence_start; + } + current_sequence_length = 0; + } + } + + if (current_sequence_length > longest_sequence_length) { + longest_sequence_length = current_sequence_length; + compress = current_sequence_start; + } + + // 3. If there is no sequence of address’s IPv6 pieces that are 0 that is longer than 1, then set compress to null. + if (longest_sequence_length <= 1) + compress = {}; + + // 4. Let ignore0 be false. + auto ignore0 = false; + + // 5. For each pieceIndex in the range 0 to 7, inclusive: + for (size_t piece_index = 0; piece_index <= 7; ++piece_index) { + // 1. If ignore0 is true and address[pieceIndex] is 0, then continue. + if (ignore0 && address[piece_index] == 0) + continue; + + // 2. Otherwise, if ignore0 is true, set ignore0 to false. + if (ignore0) + ignore0 = false; + + // 3. If compress is pieceIndex, then: + if (compress == piece_index) { + // 1. Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise. + auto separator = piece_index == 0 ? "::"sv : ":"sv; + + // 2. Append separator to output. + output.append(separator); + + // 3. Set ignore0 to true and continue. + ignore0 = true; + continue; + } + + // 4. Append address[pieceIndex], represented as the shortest possible lowercase hexadecimal number, to output. + output.appendff("{:x}", address[piece_index]); + + // 5. If pieceIndex is not 7, then append U+003A (:) to output. + if (piece_index != 7) + output.append(':'); + } + + // 6. Return output. +} + +// https://url.spec.whatwg.org/#concept-domain +bool Host::is_domain() const +{ + // A domain is a non-empty ASCII string that identifies a realm within a network. + return m_value.has() && !m_value.get().is_empty(); +} + +// https://url.spec.whatwg.org/#empty-host +bool Host::is_empty_host() const +{ + // An empty host is the empty string. + return m_value.has() && m_value.get().is_empty(); +} + +// https://url.spec.whatwg.org/#concept-host-serializer +String Host::serialize() const +{ + return m_value.visit( + // 1. If host is an IPv4 address, return the result of running the IPv4 serializer on host. + [](IPv4Address const& address) { + return serialize_ipv4_address(address); + }, + // 2. Otherwise, if host is an IPv6 address, return U+005B ([), followed by the result of running the IPv6 serializer on host, followed by U+005D (]). + [](IPv6Address const& address) { + StringBuilder output; + output.append('['); + serialize_ipv6_address(address, output); + output.append(']'); + return output.to_string_without_validation(); + }, + // 3. Otherwise, host is a domain, opaque host, or empty host, return host. + [](String const& string) { + return string; + }); +} + +} diff --git a/Libraries/LibURL/Host.h b/Libraries/LibURL/Host.h index e615d5fb264..cfa1e89db8a 100644 --- a/Libraries/LibURL/Host.h +++ b/Libraries/LibURL/Host.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2024, Shannon Booth + * Copyright (c) 2024, Sam Atkins * * SPDX-License-Identifier: BSD-2-Clause */ @@ -26,6 +27,29 @@ using IPv6Address = Array; // https://url.spec.whatwg.org/#concept-host // A host is a domain, an IP address, an opaque host, or an empty host. Typically a host serves as a network address, // but it is sometimes used as opaque identifier in URLs where a network address is not necessary. -using Host = Variant; +class Host { +public: + using VariantType = Variant; + Host(VariantType&&); + Host(String&&); + + bool is_domain() const; + bool is_empty_host() const; + + template + bool has() const { return m_value.template has(); } + + template + T const& get() const { return m_value.template get(); } + + bool operator==(Host const& other) const = default; + + VariantType const& value() const { return m_value; } + + String serialize() const; + +private: + VariantType m_value; +}; } diff --git a/Libraries/LibURL/Origin.cpp b/Libraries/LibURL/Origin.cpp index 82943ece5b7..c808b32b560 100644 --- a/Libraries/LibURL/Origin.cpp +++ b/Libraries/LibURL/Origin.cpp @@ -24,7 +24,7 @@ String Origin::serialize() const result.append("://"sv); // 4. Append origin's host, serialized, to result. - result.append(MUST(Parser::serialize_host(host()))); + result.append(host().serialize()); // 5. If origin's port is non-null, append a U+003A COLON character (:), and origin's port, serialized, to result. if (port().has_value()) { @@ -50,7 +50,7 @@ unsigned Traits::hash(URL::Origin const& origin) if (origin.port().has_value()) hash = pair_int_hash(hash, *origin.port()); - hash = pair_int_hash(hash, URL::Parser::serialize_host(origin.host()).release_value_but_fixme_should_propagate_errors().hash()); + hash = pair_int_hash(hash, origin.host().serialize().hash()); return hash; } diff --git a/Libraries/LibURL/Parser.cpp b/Libraries/LibURL/Parser.cpp index ecb2df72b2f..f5279da2d3c 100644 --- a/Libraries/LibURL/Parser.cpp +++ b/Libraries/LibURL/Parser.cpp @@ -236,102 +236,6 @@ static Optional parse_ipv4_address(StringView input) return ipv4; } -// https://url.spec.whatwg.org/#concept-ipv4-serializer -static ErrorOr serialize_ipv4_address(IPv4Address address) -{ - // 1. Let output be the empty string. - // NOTE: Array to avoid prepend. - Array output; - - // 2. Let n be the value of address. - u32 n = address; - - // 3. For each i in the range 1 to 4, inclusive: - for (size_t i = 0; i <= 3; ++i) { - // 1. Prepend n % 256, serialized, to output. - output[3 - i] = n % 256; - - // 2. If i is not 4, then prepend U+002E (.) to output. - // NOTE: done at end - - // 3. Set n to floor(n / 256). - n /= 256; - } - - // 4. Return output. - return String::formatted("{}.{}.{}.{}", output[0], output[1], output[2], output[3]); -} - -// https://url.spec.whatwg.org/#concept-ipv6-serializer -static void serialize_ipv6_address(IPv6Address const& address, StringBuilder& output) -{ - // 1. Let output be the empty string. - - // 2. Let compress be an index to the first IPv6 piece in the first longest sequences of address’s IPv6 pieces that are 0. - Optional compress; - size_t longest_sequence_length = 0; - size_t current_sequence_length = 0; - size_t current_sequence_start = 0; - for (size_t i = 0; i < 8; ++i) { - if (address[i] == 0) { - if (current_sequence_length == 0) - current_sequence_start = i; - ++current_sequence_length; - } else { - if (current_sequence_length > longest_sequence_length) { - longest_sequence_length = current_sequence_length; - compress = current_sequence_start; - } - current_sequence_length = 0; - } - } - - if (current_sequence_length > longest_sequence_length) { - longest_sequence_length = current_sequence_length; - compress = current_sequence_start; - } - - // 3. If there is no sequence of address’s IPv6 pieces that are 0 that is longer than 1, then set compress to null. - if (longest_sequence_length <= 1) - compress = {}; - - // 4. Let ignore0 be false. - auto ignore0 = false; - - // 5. For each pieceIndex in the range 0 to 7, inclusive: - for (size_t piece_index = 0; piece_index <= 7; ++piece_index) { - // 1. If ignore0 is true and address[pieceIndex] is 0, then continue. - if (ignore0 && address[piece_index] == 0) - continue; - - // 2. Otherwise, if ignore0 is true, set ignore0 to false. - if (ignore0) - ignore0 = false; - - // 3. If compress is pieceIndex, then: - if (compress == piece_index) { - // 1. Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise. - auto separator = piece_index == 0 ? "::"sv : ":"sv; - - // 2. Append separator to output. - output.append(separator); - - // 3. Set ignore0 to true and continue. - ignore0 = true; - continue; - } - - // 4. Append address[pieceIndex], represented as the shortest possible lowercase hexadecimal number, to output. - output.appendff("{:x}", address[piece_index]); - - // 5. If pieceIndex is not 7, then append U+003A (:) to output. - if (piece_index != 7) - output.append(':'); - } - - // 6. Return output. -} - // https://url.spec.whatwg.org/#concept-ipv6-parser static Optional parse_ipv6_address(StringView input) { @@ -654,7 +558,7 @@ static Optional parse_host(StringView input, bool is_opaque = false) auto address = parse_ipv6_address(input.substring_view(1, input.length() - 2)); if (!address.has_value()) return {}; - return address.release_value(); + return Host { address.release_value() }; } // 2. If isOpaque is true, then return the result of opaque-host parsing input. @@ -690,35 +594,13 @@ static Optional parse_host(StringView input, bool is_opaque = false) if (!ipv4_host.has_value()) return {}; - return ipv4_host.release_value(); + return Host { ipv4_host.release_value() }; } // 9. Return asciiDomain. return ascii_domain; } -// https://url.spec.whatwg.org/#concept-host-serializer -ErrorOr Parser::serialize_host(Host const& host) -{ - // 1. If host is an IPv4 address, return the result of running the IPv4 serializer on host. - if (host.has()) - return serialize_ipv4_address(host.get()); - - // 2. Otherwise, if host is an IPv6 address, return U+005B ([), followed by the result of running the IPv6 serializer on host, followed by U+005D (]). - if (host.has()) { - StringBuilder output; - TRY(output.try_append('[')); - serialize_ipv6_address(host.get(), output); - TRY(output.try_append(']')); - return output.to_string(); - } - - // 3. Otherwise, host is a domain, opaque host, or empty host, return host. - if (host.has()) - return host.get(); - return String {}; -} - // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter constexpr bool starts_with_windows_drive_letter(StringView input) { @@ -953,7 +835,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, URL return *url; // 4. If url’s scheme is "file" and its host is an empty host, then return. - if (url->scheme() == "file"sv && url->host() == String {}) + if (url->scheme() == "file"sv && url->host().has_value() && url->host()->is_empty_host()) return *url; } diff --git a/Libraries/LibURL/Parser.h b/Libraries/LibURL/Parser.h index b605993247a..c7070569e1b 100644 --- a/Libraries/LibURL/Parser.h +++ b/Libraries/LibURL/Parser.h @@ -63,9 +63,6 @@ public: // https://url.spec.whatwg.org/#string-percent-encode-after-encoding static String percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false); - // https://url.spec.whatwg.org/#concept-host-serializer - static ErrorOr serialize_host(Host const&); - // https://url.spec.whatwg.org/#shorten-a-urls-path static void shorten_urls_path(URL&); }; diff --git a/Libraries/LibURL/URL.cpp b/Libraries/LibURL/URL.cpp index b36bc83d747..80d76ee57ac 100644 --- a/Libraries/LibURL/URL.cpp +++ b/Libraries/LibURL/URL.cpp @@ -88,7 +88,7 @@ void URL::set_host(Host host) // https://url.spec.whatwg.org/#concept-host-serializer ErrorOr URL::serialized_host() const { - return Parser::serialize_host(m_data->host.value()); + return m_data->host->serialize(); } void URL::set_port(Optional port) @@ -119,7 +119,8 @@ void URL::append_path(StringView path) bool URL::cannot_have_a_username_or_password_or_port() const { // A URL cannot have a username/password/port if its host is null or the empty string, or its scheme is "file". - return !m_data->host.has_value() || m_data->host == String {} || m_data->scheme == "file"sv; + + return !m_data->host.has_value() || m_data->host->is_empty_host() || m_data->scheme == "file"sv; } // FIXME: This is by no means complete. diff --git a/Libraries/LibWeb/DOM/Document.cpp b/Libraries/LibWeb/DOM/Document.cpp index 6670e9a3260..969307488f0 100644 --- a/Libraries/LibWeb/DOM/Document.cpp +++ b/Libraries/LibWeb/DOM/Document.cpp @@ -3194,7 +3194,7 @@ String Document::domain() const return String {}; // 3. Return effectiveDomain, serialized. - return MUST(URL::Parser::serialize_host(effective_domain.release_value())); + return effective_domain->serialize(); } void Document::set_domain(String const& domain) diff --git a/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp b/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp index b439c831a95..afd2ca7953d 100644 --- a/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp +++ b/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp @@ -322,7 +322,7 @@ WebIDL::ExceptionOr> main_fetch(JS::Realm& realm, Infra // - request’s current URL’s scheme is "http" request->current_url().scheme() == "http"sv // - request’s current URL’s host is a domain - && request->current_url().host().has_value() && DOMURL::host_is_domain(request->current_url().host().value()) + && request->current_url().host().has_value() && request->current_url().host()->is_domain() // FIXME: - Matching request’s current URL’s host per Known HSTS Host Domain Name Matching results in either a // superdomain match with an asserted includeSubDomains directive or a congruent match (with or without an // asserted includeSubDomains directive) [HSTS]; or DNS resolution for the request finds a matching HTTPS RR diff --git a/Libraries/LibWeb/HTML/WorkerLocation.cpp b/Libraries/LibWeb/HTML/WorkerLocation.cpp index f4902c64930..273ecc0e1e7 100644 --- a/Libraries/LibWeb/HTML/WorkerLocation.cpp +++ b/Libraries/LibWeb/HTML/WorkerLocation.cpp @@ -60,8 +60,6 @@ WebIDL::ExceptionOr WorkerLocation::host() const // https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-hostname WebIDL::ExceptionOr WorkerLocation::hostname() const { - auto& vm = realm().vm(); - // The hostname getter steps are: // 1. Let host be this's WorkerGlobalScope object's url's host. auto const& host = m_global_scope->url().host(); @@ -71,7 +69,7 @@ WebIDL::ExceptionOr WorkerLocation::hostname() const return String {}; // 3. Return host, serialized. - return TRY_OR_THROW_OOM(vm, URL::Parser::serialize_host(host.value())); + return host->serialize(); } // https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-port