mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-31 21:29:06 +00:00
LibTextCodec+LibURL: Implement utf-8
and euc-jp
encoders
Implements the corresponding encoders, selects the appropriate one when encoding URL search params. If an encoder for the given encoding could not be found, fallback to utf-8.
This commit is contained in:
parent
82a63e350c
commit
72d0e3284b
Notes:
github-actions[bot]
2024-08-08 16:51:38 +00:00
Author: https://github.com/BenJilks
Commit: 72d0e3284b
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/975
Reviewed-by: https://github.com/shannonbooth
Reviewed-by: https://github.com/skyrising
Reviewed-by: https://github.com/tcl3 ✅
11 changed files with 260 additions and 22 deletions
|
@ -14,6 +14,8 @@
|
|||
#include <AK/StringBuilder.h>
|
||||
#include <AK/StringUtils.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibTextCodec/Encoder.h>
|
||||
#include <LibURL/Parser.h>
|
||||
#include <LibUnicode/IDNA.h>
|
||||
|
||||
|
@ -768,18 +770,17 @@ void Parser::shorten_urls_path(URL& url)
|
|||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
|
||||
ErrorOr<String> Parser::percent_encode_after_encoding(StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus)
|
||||
ErrorOr<String> Parser::percent_encode_after_encoding(TextCodec::Encoder& encoder, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus)
|
||||
{
|
||||
// NOTE: This is written somewhat ad-hoc since we don't yet implement the Encoding spec.
|
||||
|
||||
// 1. Let encodeOutput be an empty I/O queue.
|
||||
StringBuilder output;
|
||||
|
||||
// 3. For each byte of encodeOutput converted to a byte sequence:
|
||||
for (u8 byte : input) {
|
||||
TRY(encoder.process(Utf8View(input), [&](u8 byte) -> ErrorOr<void> {
|
||||
// 1. If spaceAsPlus is true and byte is 0x20 (SP), then append U+002B (+) to output and continue.
|
||||
if (space_as_plus && byte == ' ') {
|
||||
output.append('+');
|
||||
continue;
|
||||
return {};
|
||||
}
|
||||
|
||||
// 2. Let isomorph be a code point whose value is byte’s value.
|
||||
|
@ -796,7 +797,9 @@ ErrorOr<String> Parser::percent_encode_after_encoding(StringView input, PercentE
|
|||
else {
|
||||
output.appendff("%{:02X}", byte);
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}));
|
||||
|
||||
// 6. Return output.
|
||||
return output.to_string();
|
||||
|
@ -851,7 +854,9 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
|
|||
// 4. Let state be state override if given, or scheme start state otherwise.
|
||||
State state = state_override.value_or(State::SchemeStart);
|
||||
|
||||
// FIXME: 5. Set encoding to the result of getting an output encoding from encoding.
|
||||
// 5. Set encoding to the result of getting an output encoding from encoding.
|
||||
auto encoder = TextCodec::encoder_for("utf-8"sv);
|
||||
VERIFY(encoder.has_value());
|
||||
|
||||
// 6. Let buffer be the empty string.
|
||||
StringBuilder buffer;
|
||||
|
@ -1684,7 +1689,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
|
|||
auto query_percent_encode_set = url->is_special() ? PercentEncodeSet::SpecialQuery : PercentEncodeSet::Query;
|
||||
|
||||
// 2. Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to url’s query.
|
||||
url->m_data->query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set).release_value_but_fixme_should_propagate_errors();
|
||||
url->m_data->query = percent_encode_after_encoding(*encoder, buffer.string_view(), query_percent_encode_set).release_value_but_fixme_should_propagate_errors();
|
||||
|
||||
// 3. Set buffer to the empty string.
|
||||
buffer.clear();
|
||||
|
@ -1726,7 +1731,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
|
|||
// NOTE: The percent-encode is done on EOF on the entire buffer.
|
||||
buffer.append_code_point(code_point);
|
||||
} else {
|
||||
url->m_data->fragment = percent_encode_after_encoding(buffer.string_view(), PercentEncodeSet::Fragment).release_value_but_fixme_should_propagate_errors();
|
||||
url->m_data->fragment = percent_encode_after_encoding(*encoder, buffer.string_view(), PercentEncodeSet::Fragment).release_value_but_fixme_should_propagate_errors();
|
||||
buffer.clear();
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue