mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-01 13:49:16 +00:00
LibTextCodec+LibURL: Implement utf-8
and euc-jp
encoders
Implements the corresponding encoders, selects the appropriate one when encoding URL search params. If an encoder for the given encoding could not be found, fallback to utf-8.
This commit is contained in:
parent
82a63e350c
commit
72d0e3284b
Notes:
github-actions[bot]
2024-08-08 16:51:38 +00:00
Author: https://github.com/BenJilks
Commit: 72d0e3284b
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/975
Reviewed-by: https://github.com/shannonbooth
Reviewed-by: https://github.com/skyrising
Reviewed-by: https://github.com/tcl3 ✅
11 changed files with 260 additions and 22 deletions
|
@ -21,6 +21,7 @@ struct LookupTable {
|
||||||
u32 max_code_point;
|
u32 max_code_point;
|
||||||
Vector<u32> code_points;
|
Vector<u32> code_points;
|
||||||
bool generate_accessor;
|
bool generate_accessor;
|
||||||
|
bool generate_inverse_accessor;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LookupTables {
|
struct LookupTables {
|
||||||
|
@ -33,7 +34,12 @@ enum class GenerateAccessor {
|
||||||
Yes,
|
Yes,
|
||||||
};
|
};
|
||||||
|
|
||||||
LookupTable prepare_table(JsonArray const& data, GenerateAccessor generate_accessor = GenerateAccessor::No)
|
enum class GenerateInverseAccessor {
|
||||||
|
No,
|
||||||
|
Yes,
|
||||||
|
};
|
||||||
|
|
||||||
|
LookupTable prepare_table(JsonArray const& data, GenerateAccessor generate_accessor = GenerateAccessor::No, GenerateInverseAccessor generate_inverse_accessor = GenerateInverseAccessor::No)
|
||||||
{
|
{
|
||||||
Vector<u32> code_points;
|
Vector<u32> code_points;
|
||||||
code_points.ensure_capacity(data.size());
|
code_points.ensure_capacity(data.size());
|
||||||
|
@ -58,7 +64,7 @@ LookupTable prepare_table(JsonArray const& data, GenerateAccessor generate_acces
|
||||||
} else {
|
} else {
|
||||||
VERIFY(first_pointer == 0);
|
VERIFY(first_pointer == 0);
|
||||||
}
|
}
|
||||||
return { first_pointer, max, move(code_points), generate_accessor == GenerateAccessor::Yes };
|
return { first_pointer, max, move(code_points), generate_accessor == GenerateAccessor::Yes, generate_inverse_accessor == GenerateInverseAccessor::Yes };
|
||||||
}
|
}
|
||||||
|
|
||||||
void generate_table(SourceGenerator generator, StringView name, LookupTable& table)
|
void generate_table(SourceGenerator generator, StringView name, LookupTable& table)
|
||||||
|
@ -81,6 +87,8 @@ void generate_table(SourceGenerator generator, StringView name, LookupTable& tab
|
||||||
generator.appendln("\n};");
|
generator.appendln("\n};");
|
||||||
if (table.generate_accessor)
|
if (table.generate_accessor)
|
||||||
generator.appendln("Optional<u32> index_@name@_code_point(u32 pointer);");
|
generator.appendln("Optional<u32> index_@name@_code_point(u32 pointer);");
|
||||||
|
if (table.generate_inverse_accessor)
|
||||||
|
generator.appendln("Optional<u32> code_point_@name@_index(u32 code_point);");
|
||||||
}
|
}
|
||||||
|
|
||||||
ErrorOr<void> generate_header_file(LookupTables& tables, Core::File& file)
|
ErrorOr<void> generate_header_file(LookupTables& tables, Core::File& file)
|
||||||
|
@ -155,6 +163,42 @@ Optional<u32> index_@name@_code_point(u32 pointer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void generate_inverse_table_accessor(SourceGenerator generator, StringView name, LookupTable& table)
|
||||||
|
{
|
||||||
|
generator.set("name", name);
|
||||||
|
generator.set("first_pointer", MUST(String::number(table.first_pointer)));
|
||||||
|
generator.set("size", MUST(String::number(table.code_points.size())));
|
||||||
|
|
||||||
|
// FIXME - Doing a linear search here is really slow, should be generating
|
||||||
|
// some kind of reverse lookup table.
|
||||||
|
|
||||||
|
if (table.first_pointer > 0) {
|
||||||
|
generator.append(R"~~~(
|
||||||
|
Optional<u32> code_point_@name@_index(u32 code_point)
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < s_@name@_index.size(); ++i) {
|
||||||
|
if (s_@name@_index[i] == code_point) {
|
||||||
|
return s_@name@_index_first_pointer + i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
)~~~");
|
||||||
|
} else {
|
||||||
|
generator.append(R"~~~(
|
||||||
|
Optional<u32> code_point_@name@_index(u32 code_point)
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < s_@name@_index.size(); ++i) {
|
||||||
|
if (s_@name@_index[i] == code_point) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
)~~~");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ErrorOr<void> generate_implementation_file(LookupTables& tables, Core::File& file)
|
ErrorOr<void> generate_implementation_file(LookupTables& tables, Core::File& file)
|
||||||
{
|
{
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
|
@ -169,6 +213,8 @@ namespace TextCodec {
|
||||||
for (auto& [key, table] : tables.indexes) {
|
for (auto& [key, table] : tables.indexes) {
|
||||||
if (table.generate_accessor)
|
if (table.generate_accessor)
|
||||||
generate_table_accessor(generator.fork(), key, table);
|
generate_table_accessor(generator.fork(), key, table);
|
||||||
|
if (table.generate_inverse_accessor)
|
||||||
|
generate_inverse_table_accessor(generator.fork(), key, table);
|
||||||
}
|
}
|
||||||
|
|
||||||
generator.appendln("\n}");
|
generator.appendln("\n}");
|
||||||
|
@ -222,7 +268,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
||||||
.indexes = {
|
.indexes = {
|
||||||
{ "gb18030"sv, move(gb18030_table) },
|
{ "gb18030"sv, move(gb18030_table) },
|
||||||
{ "big5"sv, prepare_table(data.get("big5"sv)->as_array(), GenerateAccessor::Yes) },
|
{ "big5"sv, prepare_table(data.get("big5"sv)->as_array(), GenerateAccessor::Yes) },
|
||||||
{ "jis0208"sv, prepare_table(data.get("jis0208"sv)->as_array(), GenerateAccessor::Yes) },
|
{ "jis0208"sv, prepare_table(data.get("jis0208"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes) },
|
||||||
{ "jis0212"sv, prepare_table(data.get("jis0212"sv)->as_array(), GenerateAccessor::Yes) },
|
{ "jis0212"sv, prepare_table(data.get("jis0212"sv)->as_array(), GenerateAccessor::Yes) },
|
||||||
{ "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array(), GenerateAccessor::Yes) },
|
{ "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array(), GenerateAccessor::Yes) },
|
||||||
{ "ibm866"sv, prepare_table(data.get("ibm866"sv)->as_array()) },
|
{ "ibm866"sv, prepare_table(data.get("ibm866"sv)->as_array()) },
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
set(TEST_SOURCES
|
set(TEST_SOURCES
|
||||||
TestTextDecoders.cpp
|
TestTextDecoders.cpp
|
||||||
|
TestTextEncoders.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
foreach(source IN LISTS TEST_SOURCES)
|
foreach(source IN LISTS TEST_SOURCES)
|
||||||
|
|
45
Tests/LibTextCodec/TestTextEncoders.cpp
Normal file
45
Tests/LibTextCodec/TestTextEncoders.cpp
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Ben Jilks <benjyjilks@gmail.com>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <LibTest/TestCase.h>
|
||||||
|
#include <LibTextCodec/Encoder.h>
|
||||||
|
|
||||||
|
TEST_CASE(test_utf8_encode)
|
||||||
|
{
|
||||||
|
TextCodec::UTF8Encoder encoder;
|
||||||
|
// Unicode character U+1F600 GRINNING FACE
|
||||||
|
auto test_string = "\U0001F600"sv;
|
||||||
|
|
||||||
|
Vector<u8> processed_bytes;
|
||||||
|
MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
|
||||||
|
return processed_bytes.try_append(byte);
|
||||||
|
}));
|
||||||
|
EXPECT(processed_bytes.size() == 4);
|
||||||
|
EXPECT(processed_bytes[0] == 0xF0);
|
||||||
|
EXPECT(processed_bytes[1] == 0x9F);
|
||||||
|
EXPECT(processed_bytes[2] == 0x98);
|
||||||
|
EXPECT(processed_bytes[3] == 0x80);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE(test_euc_jp_encoder)
|
||||||
|
{
|
||||||
|
TextCodec::EUCJPEncoder encoder;
|
||||||
|
// U+A5 Yen Sign
|
||||||
|
// U+3088 Hiragana Letter Yo
|
||||||
|
// U+30C4 Katakana Letter Tu
|
||||||
|
auto test_string = "\U000000A5\U00003088\U000030C4"sv;
|
||||||
|
|
||||||
|
Vector<u8> processed_bytes;
|
||||||
|
MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
|
||||||
|
return processed_bytes.try_append(byte);
|
||||||
|
}));
|
||||||
|
EXPECT(processed_bytes.size() == 5);
|
||||||
|
EXPECT(processed_bytes[0] == 0x5C);
|
||||||
|
EXPECT(processed_bytes[1] == 0xA4);
|
||||||
|
EXPECT(processed_bytes[2] == 0xE8);
|
||||||
|
EXPECT(processed_bytes[3] == 0xA5);
|
||||||
|
EXPECT(processed_bytes[4] == 0xC4);
|
||||||
|
}
|
|
@ -2,6 +2,7 @@ include(libtextcodec_generators)
|
||||||
|
|
||||||
set(SOURCES
|
set(SOURCES
|
||||||
Decoder.cpp
|
Decoder.cpp
|
||||||
|
Encoder.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
generate_encoding_indexes()
|
generate_encoding_indexes()
|
||||||
|
|
103
Userland/Libraries/LibTextCodec/Encoder.cpp
Normal file
103
Userland/Libraries/LibTextCodec/Encoder.cpp
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Ben Jilks <benjyjilks@gmail.com>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <AK/Error.h>
|
||||||
|
#include <AK/Utf8View.h>
|
||||||
|
#include <LibTextCodec/Decoder.h>
|
||||||
|
#include <LibTextCodec/Encoder.h>
|
||||||
|
#include <LibTextCodec/LookupTables.h>
|
||||||
|
|
||||||
|
namespace TextCodec {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
UTF8Encoder s_utf8_encoder;
|
||||||
|
EUCJPEncoder s_euc_jp_encoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<Encoder&> encoder_for_exact_name(StringView encoding)
|
||||||
|
{
|
||||||
|
if (encoding.equals_ignoring_ascii_case("utf-8"sv))
|
||||||
|
return s_utf8_encoder;
|
||||||
|
if (encoding.equals_ignoring_ascii_case("euc-jp"sv))
|
||||||
|
return s_euc_jp_encoder;
|
||||||
|
dbgln("TextCodec: No encoder implemented for encoding '{}'", encoding);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<Encoder&> encoder_for(StringView label)
|
||||||
|
{
|
||||||
|
auto encoding = get_standardized_encoding(label);
|
||||||
|
return encoding.has_value() ? encoder_for_exact_name(encoding.value()) : Optional<Encoder&> {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://encoding.spec.whatwg.org/#utf-8-encoder
|
||||||
|
ErrorOr<void> UTF8Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte)
|
||||||
|
{
|
||||||
|
ReadonlyBytes bytes { input.bytes(), input.byte_length() };
|
||||||
|
for (auto byte : bytes)
|
||||||
|
TRY(on_byte(byte));
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://encoding.spec.whatwg.org/#euc-jp-encoder
|
||||||
|
ErrorOr<void> EUCJPEncoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte)
|
||||||
|
{
|
||||||
|
for (auto item : input) {
|
||||||
|
// 1. If code point is end-of-queue, return finished.
|
||||||
|
|
||||||
|
// 2. If code point is an ASCII code point, return a byte whose value is code point.
|
||||||
|
if (is_ascii(item)) {
|
||||||
|
TRY(on_byte(static_cast<u8>(item)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. If code point is U+00A5, return byte 0x5C.
|
||||||
|
if (item == 0x00A5) {
|
||||||
|
TRY(on_byte(static_cast<u8>(0x5C)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. If code point is U+203E, return byte 0x7E.
|
||||||
|
if (item == 0x203E) {
|
||||||
|
TRY(on_byte(static_cast<u8>(0x7E)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. If code point is in the range U+FF61 to U+FF9F, inclusive, return two bytes whose values are 0x8E and code point − 0xFF61 + 0xA1.
|
||||||
|
if (item >= 0xFF61 && item <= 0xFF9F) {
|
||||||
|
TRY(on_byte(0x8E));
|
||||||
|
TRY(on_byte(static_cast<u8>(item - 0xFF61 + 0xA1)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. If code point is U+2212, set it to U+FF0D.
|
||||||
|
if (item == 0x2212)
|
||||||
|
item = 0xFF0D;
|
||||||
|
|
||||||
|
// 7. Let pointer be the index pointer for code point in index jis0208.
|
||||||
|
auto pointer = code_point_jis0208_index(item);
|
||||||
|
|
||||||
|
// 8. If pointer is null, return error with code point.
|
||||||
|
if (!pointer.has_value()) {
|
||||||
|
// TODO: Report error.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 9. Let lead be pointer / 94 + 0xA1.
|
||||||
|
auto lead = *pointer / 94 + 0xA1;
|
||||||
|
|
||||||
|
// 10. Let trail be pointer % 94 + 0xA1.
|
||||||
|
auto trail = *pointer % 94 + 0xA1;
|
||||||
|
|
||||||
|
// 11. Return two bytes whose values are lead and trail.
|
||||||
|
TRY(on_byte(static_cast<u8>(lead)));
|
||||||
|
TRY(on_byte(static_cast<u8>(trail)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
35
Userland/Libraries/LibTextCodec/Encoder.h
Normal file
35
Userland/Libraries/LibTextCodec/Encoder.h
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Ben Jilks <benjyjilks@gmail.com>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <AK/Forward.h>
|
||||||
|
#include <AK/Function.h>
|
||||||
|
|
||||||
|
namespace TextCodec {
|
||||||
|
|
||||||
|
class Encoder {
|
||||||
|
public:
|
||||||
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual ~Encoder() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
class UTF8Encoder final : public Encoder {
|
||||||
|
public:
|
||||||
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
class EUCJPEncoder final : public Encoder {
|
||||||
|
public:
|
||||||
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
Optional<Encoder&> encoder_for_exact_name(StringView encoding);
|
||||||
|
Optional<Encoder&> encoder_for(StringView label);
|
||||||
|
|
||||||
|
}
|
|
@ -4,4 +4,4 @@ set(SOURCES
|
||||||
)
|
)
|
||||||
|
|
||||||
serenity_lib(LibURL url)
|
serenity_lib(LibURL url)
|
||||||
target_link_libraries(LibURL PRIVATE LibUnicode)
|
target_link_libraries(LibURL PRIVATE LibUnicode LibTextCodec)
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
#include <AK/StringUtils.h>
|
#include <AK/StringUtils.h>
|
||||||
#include <AK/Utf8View.h>
|
#include <AK/Utf8View.h>
|
||||||
|
#include <LibTextCodec/Decoder.h>
|
||||||
|
#include <LibTextCodec/Encoder.h>
|
||||||
#include <LibURL/Parser.h>
|
#include <LibURL/Parser.h>
|
||||||
#include <LibUnicode/IDNA.h>
|
#include <LibUnicode/IDNA.h>
|
||||||
|
|
||||||
|
@ -768,18 +770,17 @@ void Parser::shorten_urls_path(URL& url)
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
|
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
|
||||||
ErrorOr<String> Parser::percent_encode_after_encoding(StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus)
|
ErrorOr<String> Parser::percent_encode_after_encoding(TextCodec::Encoder& encoder, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus)
|
||||||
{
|
{
|
||||||
// NOTE: This is written somewhat ad-hoc since we don't yet implement the Encoding spec.
|
// 1. Let encodeOutput be an empty I/O queue.
|
||||||
|
|
||||||
StringBuilder output;
|
StringBuilder output;
|
||||||
|
|
||||||
// 3. For each byte of encodeOutput converted to a byte sequence:
|
// 3. For each byte of encodeOutput converted to a byte sequence:
|
||||||
for (u8 byte : input) {
|
TRY(encoder.process(Utf8View(input), [&](u8 byte) -> ErrorOr<void> {
|
||||||
// 1. If spaceAsPlus is true and byte is 0x20 (SP), then append U+002B (+) to output and continue.
|
// 1. If spaceAsPlus is true and byte is 0x20 (SP), then append U+002B (+) to output and continue.
|
||||||
if (space_as_plus && byte == ' ') {
|
if (space_as_plus && byte == ' ') {
|
||||||
output.append('+');
|
output.append('+');
|
||||||
continue;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Let isomorph be a code point whose value is byte’s value.
|
// 2. Let isomorph be a code point whose value is byte’s value.
|
||||||
|
@ -796,7 +797,9 @@ ErrorOr<String> Parser::percent_encode_after_encoding(StringView input, PercentE
|
||||||
else {
|
else {
|
||||||
output.appendff("%{:02X}", byte);
|
output.appendff("%{:02X}", byte);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
return {};
|
||||||
|
}));
|
||||||
|
|
||||||
// 6. Return output.
|
// 6. Return output.
|
||||||
return output.to_string();
|
return output.to_string();
|
||||||
|
@ -851,7 +854,9 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
|
||||||
// 4. Let state be state override if given, or scheme start state otherwise.
|
// 4. Let state be state override if given, or scheme start state otherwise.
|
||||||
State state = state_override.value_or(State::SchemeStart);
|
State state = state_override.value_or(State::SchemeStart);
|
||||||
|
|
||||||
// FIXME: 5. Set encoding to the result of getting an output encoding from encoding.
|
// 5. Set encoding to the result of getting an output encoding from encoding.
|
||||||
|
auto encoder = TextCodec::encoder_for("utf-8"sv);
|
||||||
|
VERIFY(encoder.has_value());
|
||||||
|
|
||||||
// 6. Let buffer be the empty string.
|
// 6. Let buffer be the empty string.
|
||||||
StringBuilder buffer;
|
StringBuilder buffer;
|
||||||
|
@ -1684,7 +1689,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
|
||||||
auto query_percent_encode_set = url->is_special() ? PercentEncodeSet::SpecialQuery : PercentEncodeSet::Query;
|
auto query_percent_encode_set = url->is_special() ? PercentEncodeSet::SpecialQuery : PercentEncodeSet::Query;
|
||||||
|
|
||||||
// 2. Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to url’s query.
|
// 2. Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to url’s query.
|
||||||
url->m_data->query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set).release_value_but_fixme_should_propagate_errors();
|
url->m_data->query = percent_encode_after_encoding(*encoder, buffer.string_view(), query_percent_encode_set).release_value_but_fixme_should_propagate_errors();
|
||||||
|
|
||||||
// 3. Set buffer to the empty string.
|
// 3. Set buffer to the empty string.
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
|
@ -1726,7 +1731,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
|
||||||
// NOTE: The percent-encode is done on EOF on the entire buffer.
|
// NOTE: The percent-encode is done on EOF on the entire buffer.
|
||||||
buffer.append_code_point(code_point);
|
buffer.append_code_point(code_point);
|
||||||
} else {
|
} else {
|
||||||
url->m_data->fragment = percent_encode_after_encoding(buffer.string_view(), PercentEncodeSet::Fragment).release_value_but_fixme_should_propagate_errors();
|
url->m_data->fragment = percent_encode_after_encoding(*encoder, buffer.string_view(), PercentEncodeSet::Fragment).release_value_but_fixme_should_propagate_errors();
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include <AK/Optional.h>
|
#include <AK/Optional.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
|
#include <LibTextCodec/Encoder.h>
|
||||||
#include <LibURL/URL.h>
|
#include <LibURL/URL.h>
|
||||||
|
|
||||||
namespace URL {
|
namespace URL {
|
||||||
|
@ -60,7 +61,7 @@ public:
|
||||||
static URL basic_parse(StringView input, Optional<URL> const& base_url = {}, Optional<URL> url = {}, Optional<State> state_override = {});
|
static URL basic_parse(StringView input, Optional<URL> const& base_url = {}, Optional<URL> url = {}, Optional<State> state_override = {});
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
|
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
|
||||||
static ErrorOr<String> percent_encode_after_encoding(StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false);
|
static ErrorOr<String> percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false);
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#concept-host-serializer
|
// https://url.spec.whatwg.org/#concept-host-serializer
|
||||||
static ErrorOr<String> serialize_host(Host const&);
|
static ErrorOr<String> serialize_host(Host const&);
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
#include <AK/Utf8View.h>
|
#include <AK/Utf8View.h>
|
||||||
#include <LibTextCodec/Decoder.h>
|
#include <LibTextCodec/Decoder.h>
|
||||||
|
#include <LibTextCodec/Encoder.h>
|
||||||
#include <LibURL/Parser.h>
|
#include <LibURL/Parser.h>
|
||||||
#include <LibWeb/Bindings/ExceptionOrUtils.h>
|
#include <LibWeb/Bindings/ExceptionOrUtils.h>
|
||||||
#include <LibWeb/Bindings/Intrinsics.h>
|
#include <LibWeb/Bindings/Intrinsics.h>
|
||||||
|
@ -47,6 +48,12 @@ ErrorOr<String> url_encode(Vector<QueryParam> const& tuples, StringView encoding
|
||||||
// 1. Set encoding to the result of getting an output encoding from encoding.
|
// 1. Set encoding to the result of getting an output encoding from encoding.
|
||||||
encoding = TextCodec::get_output_encoding(encoding);
|
encoding = TextCodec::get_output_encoding(encoding);
|
||||||
|
|
||||||
|
auto encoder = TextCodec::encoder_for(encoding);
|
||||||
|
if (!encoder.has_value()) {
|
||||||
|
// NOTE: Fallback to default utf-8 encoder.
|
||||||
|
encoder = TextCodec::encoder_for("utf-8"sv);
|
||||||
|
}
|
||||||
|
|
||||||
// 2. Let output be the empty string.
|
// 2. Let output be the empty string.
|
||||||
StringBuilder output;
|
StringBuilder output;
|
||||||
|
|
||||||
|
@ -55,12 +62,10 @@ ErrorOr<String> url_encode(Vector<QueryParam> const& tuples, StringView encoding
|
||||||
// 1. Assert: tuple’s name and tuple’s value are scalar value strings.
|
// 1. Assert: tuple’s name and tuple’s value are scalar value strings.
|
||||||
|
|
||||||
// 2. Let name be the result of running percent-encode after encoding with encoding, tuple’s name, the application/x-www-form-urlencoded percent-encode set, and true.
|
// 2. Let name be the result of running percent-encode after encoding with encoding, tuple’s name, the application/x-www-form-urlencoded percent-encode set, and true.
|
||||||
// FIXME: URL::Parser does not currently implement encoding.
|
auto name = TRY(URL::Parser::percent_encode_after_encoding(*encoder, tuple.name, URL::PercentEncodeSet::ApplicationXWWWFormUrlencoded, true));
|
||||||
auto name = TRY(URL::Parser::percent_encode_after_encoding(tuple.name, URL::PercentEncodeSet::ApplicationXWWWFormUrlencoded, true));
|
|
||||||
|
|
||||||
// 3. Let value be the result of running percent-encode after encoding with encoding, tuple’s value, the application/x-www-form-urlencoded percent-encode set, and true.
|
// 3. Let value be the result of running percent-encode after encoding with encoding, tuple’s value, the application/x-www-form-urlencoded percent-encode set, and true.
|
||||||
// FIXME: URL::Parser does not currently implement encoding.
|
auto value = TRY(URL::Parser::percent_encode_after_encoding(*encoder, tuple.value, URL::PercentEncodeSet::ApplicationXWWWFormUrlencoded, true));
|
||||||
auto value = TRY(URL::Parser::percent_encode_after_encoding(tuple.value, URL::PercentEncodeSet::ApplicationXWWWFormUrlencoded, true));
|
|
||||||
|
|
||||||
// 4. If output is not the empty string, then append U+0026 (&) to output.
|
// 4. If output is not the empty string, then append U+0026 (&) to output.
|
||||||
if (!output.is_empty())
|
if (!output.is_empty())
|
||||||
|
|
|
@ -160,10 +160,6 @@ WebIDL::ExceptionOr<void> HTMLFormElement::submit_form(JS::NonnullGCPtr<HTMLElem
|
||||||
|
|
||||||
// 6. Let encoding be the result of picking an encoding for the form.
|
// 6. Let encoding be the result of picking an encoding for the form.
|
||||||
auto encoding = TRY_OR_THROW_OOM(vm, pick_an_encoding());
|
auto encoding = TRY_OR_THROW_OOM(vm, pick_an_encoding());
|
||||||
if (encoding != "UTF-8"sv) {
|
|
||||||
dbgln("FIXME: Support encodings other than UTF-8 in form submission. Returning from form submission.");
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
// 7. Let entry list be the result of constructing the entry list with form, submitter, and encoding.
|
// 7. Let entry list be the result of constructing the entry list with form, submitter, and encoding.
|
||||||
auto entry_list_or_null = TRY(construct_entry_list(realm, *this, submitter, encoding));
|
auto entry_list_or_null = TRY(construct_entry_list(realm, *this, submitter, encoding));
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue