mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-07 08:39:22 +00:00
LibURL: Percent decode over byte sequence
Instead of going over UTF-8 code points. This better follows the spec, and is also more performant.
This commit is contained in:
parent
fd25fea3ab
commit
0b4670fb7c
Notes:
github-actions[bot]
2024-12-05 16:31:03 +00:00
Author: https://github.com/shannonbooth
Commit: 0b4670fb7c
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2675
1 changed files with 25 additions and 12 deletions
|
@ -475,23 +475,36 @@ String percent_encode(StringView input, PercentEncodeSet set, SpaceAsPlus space_
|
||||||
return MUST(builder.to_string());
|
return MUST(builder.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://url.spec.whatwg.org/#percent-decode
|
||||||
ByteString percent_decode(StringView input)
|
ByteString percent_decode(StringView input)
|
||||||
{
|
{
|
||||||
if (!input.contains('%'))
|
if (!input.contains('%'))
|
||||||
return input;
|
return input;
|
||||||
|
|
||||||
|
// 1. Let output be an empty byte sequence.
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
Utf8View utf8_view(input);
|
|
||||||
for (auto it = utf8_view.begin(); !it.done(); ++it) {
|
// 2. For each byte byte in input:
|
||||||
if (*it != '%') {
|
for (size_t i = 0; i < input.length(); ++i) {
|
||||||
builder.append_code_point(*it);
|
// 1. If byte is not 0x25 (%), then append byte to output.
|
||||||
} else if (!is_ascii_hex_digit(it.peek(1).value_or(0)) || !is_ascii_hex_digit(it.peek(2).value_or(0))) {
|
if (input[i] != '%') {
|
||||||
builder.append_code_point(*it);
|
builder.append(input[i]);
|
||||||
} else {
|
}
|
||||||
++it;
|
// 2. Otherwise, if byte is 0x25 (%) and the next two bytes after byte in input are not in the ranges 0x30 (0)
|
||||||
u8 byte = parse_ascii_hex_digit(*it) << 4;
|
// to 0x39 (9), 0x41 (A) to 0x46 (F), and 0x61 (a) to 0x66 (f), all inclusive, append byte to output.
|
||||||
++it;
|
else if (i + 2 >= input.length() || !is_ascii_hex_digit(input[i + 1]) || !is_ascii_hex_digit(input[i + 2])) {
|
||||||
byte += parse_ascii_hex_digit(*it);
|
builder.append(input[i]);
|
||||||
builder.append(byte);
|
}
|
||||||
|
// 3. Otherwise:
|
||||||
|
else {
|
||||||
|
// 1. Let bytePoint be the two bytes after byte in input, decoded, and then interpreted as hexadecimal number.
|
||||||
|
u8 byte_point = (parse_ascii_hex_digit(input[i + 1]) << 4) | parse_ascii_hex_digit(input[i + 2]);
|
||||||
|
|
||||||
|
// 2. Append a byte whose value is bytePoint to output.
|
||||||
|
builder.append(byte_point);
|
||||||
|
|
||||||
|
// 3. Skip the next two bytes in input.
|
||||||
|
i += 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return builder.to_byte_string();
|
return builder.to_byte_string();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue