mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-08 09:09:43 +00:00
LibURL+Everywhere: Only percent decode URL paths when actually needed
Web specs do not return through javascript percent decoded URL path components - but we were doing this in a number of places due to the default behaviour of URL::serialize_path. Since percent encoded URL paths may not contain valid UTF-8 - this was resulting in us crashing in these places. For example - on an HTMLAnchorElement when retrieving the pathname for the URL of: http://ladybird.org/foo%C2%91%91 To fix this make the URL class only return the percent encoded serialized path, matching the URL spec. When the decoded path is required instead explicitly call URL::percent_decode. This fixes a crash running WPT URL tests for the anchor element on: https://wpt.live/url/a-element.html
This commit is contained in:
parent
ffe070d7f9
commit
cc55732332
Notes:
github-actions[bot]
2024-08-05 07:59:02 +00:00
Author: https://github.com/shannonbooth
Commit: cc55732332
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/966
18 changed files with 38 additions and 38 deletions
|
@ -159,7 +159,8 @@ TEST_CASE(file_url_with_encoded_characters)
|
||||||
URL::URL url("file:///my/file/test%23file.txt"sv);
|
URL::URL url("file:///my/file/test%23file.txt"sv);
|
||||||
EXPECT(url.is_valid());
|
EXPECT(url.is_valid());
|
||||||
EXPECT_EQ(url.scheme(), "file");
|
EXPECT_EQ(url.scheme(), "file");
|
||||||
EXPECT_EQ(url.serialize_path(), "/my/file/test#file.txt");
|
EXPECT_EQ(url.serialize_path(), "/my/file/test%23file.txt");
|
||||||
|
EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/my/file/test#file.txt");
|
||||||
EXPECT(!url.query().has_value());
|
EXPECT(!url.query().has_value());
|
||||||
EXPECT(!url.fragment().has_value());
|
EXPECT(!url.fragment().has_value());
|
||||||
}
|
}
|
||||||
|
@ -330,7 +331,8 @@ TEST_CASE(unicode)
|
||||||
{
|
{
|
||||||
URL::URL url { "http://example.com/_ünicöde_téxt_©"sv };
|
URL::URL url { "http://example.com/_ünicöde_téxt_©"sv };
|
||||||
EXPECT(url.is_valid());
|
EXPECT(url.is_valid());
|
||||||
EXPECT_EQ(url.serialize_path(), "/_ünicöde_téxt_©");
|
EXPECT_EQ(url.serialize_path(), "/_%C3%BCnic%C3%B6de_t%C3%A9xt_%C2%A9");
|
||||||
|
EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/_ünicöde_téxt_©");
|
||||||
EXPECT(!url.query().has_value());
|
EXPECT(!url.query().has_value());
|
||||||
EXPECT(!url.fragment().has_value());
|
EXPECT(!url.fragment().has_value());
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
/foo%C2%91%91
|
|
@ -0,0 +1,8 @@
|
||||||
|
<a id="anchor" href="http://ladybird.org/foo%C2%91%91"></a>
|
||||||
|
<script src="../include.js"></script>
|
||||||
|
<script>
|
||||||
|
test(() => {
|
||||||
|
const anchorElement = document.getElementById('anchor');
|
||||||
|
println(anchorElement.pathname);
|
||||||
|
})
|
||||||
|
</script>
|
|
@ -236,12 +236,12 @@ bool is_special_scheme(StringView scheme)
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#url-path-serializer
|
// https://url.spec.whatwg.org/#url-path-serializer
|
||||||
ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) const
|
String URL::serialize_path() const
|
||||||
{
|
{
|
||||||
// 1. If url has an opaque path, then return url’s path.
|
// 1. If url has an opaque path, then return url’s path.
|
||||||
// FIXME: Reimplement this step once we modernize the URL implementation to meet the spec.
|
// FIXME: Reimplement this step once we modernize the URL implementation to meet the spec.
|
||||||
if (cannot_be_a_base_url())
|
if (cannot_be_a_base_url())
|
||||||
return m_data->paths[0].to_byte_string();
|
return m_data->paths[0];
|
||||||
|
|
||||||
// 2. Let output be the empty string.
|
// 2. Let output be the empty string.
|
||||||
StringBuilder output;
|
StringBuilder output;
|
||||||
|
@ -249,11 +249,11 @@ ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) cons
|
||||||
// 3. For each segment of url’s path: append U+002F (/) followed by segment to output.
|
// 3. For each segment of url’s path: append U+002F (/) followed by segment to output.
|
||||||
for (auto const& segment : m_data->paths) {
|
for (auto const& segment : m_data->paths) {
|
||||||
output.append('/');
|
output.append('/');
|
||||||
output.append(apply_percent_decoding == ApplyPercentDecoding::Yes ? percent_decode(segment) : segment.to_byte_string());
|
output.append(segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. Return output.
|
// 4. Return output.
|
||||||
return output.to_byte_string();
|
return output.to_string_without_validation();
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#concept-url-serializer
|
// https://url.spec.whatwg.org/#concept-url-serializer
|
||||||
|
|
|
@ -53,11 +53,6 @@ using IPv6Address = Array<u16, 8>;
|
||||||
// but it is sometimes used as opaque identifier in URLs where a network address is not necessary.
|
// but it is sometimes used as opaque identifier in URLs where a network address is not necessary.
|
||||||
using Host = Variant<IPv4Address, IPv6Address, String, Empty>;
|
using Host = Variant<IPv4Address, IPv6Address, String, Empty>;
|
||||||
|
|
||||||
enum class ApplyPercentDecoding {
|
|
||||||
Yes,
|
|
||||||
No
|
|
||||||
};
|
|
||||||
|
|
||||||
// https://w3c.github.io/FileAPI/#blob-url-entry
|
// https://w3c.github.io/FileAPI/#blob-url-entry
|
||||||
// NOTE: This represents the raw bytes behind a 'Blob' (and does not yet support a MediaSourceQuery).
|
// NOTE: This represents the raw bytes behind a 'Blob' (and does not yet support a MediaSourceQuery).
|
||||||
struct BlobURLEntry {
|
struct BlobURLEntry {
|
||||||
|
@ -161,7 +156,7 @@ public:
|
||||||
m_data->paths.append(String {});
|
m_data->paths.append(String {});
|
||||||
}
|
}
|
||||||
|
|
||||||
ByteString serialize_path(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const;
|
String serialize_path() const;
|
||||||
ByteString serialize(ExcludeFragment = ExcludeFragment::No) const;
|
ByteString serialize(ExcludeFragment = ExcludeFragment::No) const;
|
||||||
ByteString serialize_for_display() const;
|
ByteString serialize_for_display() const;
|
||||||
ByteString to_byte_string() const { return serialize(); }
|
ByteString to_byte_string() const { return serialize(); }
|
||||||
|
|
|
@ -373,12 +373,10 @@ void DOMURL::set_port(String const& port)
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#dom-url-pathname
|
// https://url.spec.whatwg.org/#dom-url-pathname
|
||||||
WebIDL::ExceptionOr<String> DOMURL::pathname() const
|
String DOMURL::pathname() const
|
||||||
{
|
{
|
||||||
auto& vm = realm().vm();
|
|
||||||
|
|
||||||
// The pathname getter steps are to return the result of URL path serializing this’s URL.
|
// The pathname getter steps are to return the result of URL path serializing this’s URL.
|
||||||
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.serialize_path(URL::ApplyPercentDecoding::No)));
|
return m_url.serialize_path();
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#ref-for-dom-url-pathname%E2%91%A0
|
// https://url.spec.whatwg.org/#ref-for-dom-url-pathname%E2%91%A0
|
||||||
|
|
|
@ -55,7 +55,7 @@ public:
|
||||||
WebIDL::ExceptionOr<String> port() const;
|
WebIDL::ExceptionOr<String> port() const;
|
||||||
void set_port(String const&);
|
void set_port(String const&);
|
||||||
|
|
||||||
WebIDL::ExceptionOr<String> pathname() const;
|
String pathname() const;
|
||||||
void set_pathname(String const&);
|
void set_pathname(String const&);
|
||||||
|
|
||||||
Optional<String> const& fragment() const { return m_url.fragment(); }
|
Optional<String> const& fragment() const { return m_url.fragment(); }
|
||||||
|
|
|
@ -291,10 +291,8 @@ String HTMLHyperlinkElementUtils::pathname() const
|
||||||
if (!m_url.has_value())
|
if (!m_url.has_value())
|
||||||
return String {};
|
return String {};
|
||||||
|
|
||||||
// 4. If url's cannot-be-a-base-URL is true, then return url's path[0].
|
// 4. Return the result of URL path serializing url.
|
||||||
// 5. If url's path is empty, then return the empty string.
|
return m_url->serialize_path();
|
||||||
// 6. Return "/", followed by the strings in url's path (including empty strings), separated from each other by "/".
|
|
||||||
return MUST(String::from_byte_string(m_url->serialize_path()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-pathname
|
// https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-pathname
|
||||||
|
|
|
@ -248,15 +248,13 @@ WebIDL::ExceptionOr<void> Location::set_port(String const&)
|
||||||
// https://html.spec.whatwg.org/multipage/history.html#dom-location-pathname
|
// https://html.spec.whatwg.org/multipage/history.html#dom-location-pathname
|
||||||
WebIDL::ExceptionOr<String> Location::pathname() const
|
WebIDL::ExceptionOr<String> Location::pathname() const
|
||||||
{
|
{
|
||||||
auto& vm = this->vm();
|
|
||||||
|
|
||||||
// 1. If this's relevant Document is non-null and its origin is not same origin-domain with the entry settings object's origin, then throw a "SecurityError" DOMException.
|
// 1. If this's relevant Document is non-null and its origin is not same origin-domain with the entry settings object's origin, then throw a "SecurityError" DOMException.
|
||||||
auto const relevant_document = this->relevant_document();
|
auto const relevant_document = this->relevant_document();
|
||||||
if (relevant_document && !relevant_document->origin().is_same_origin_domain(entry_settings_object().origin()))
|
if (relevant_document && !relevant_document->origin().is_same_origin_domain(entry_settings_object().origin()))
|
||||||
return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string);
|
return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string);
|
||||||
|
|
||||||
// 2. Return the result of URL path serializing this Location object's url.
|
// 2. Return the result of URL path serializing this Location object's url.
|
||||||
return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().serialize_path()));
|
return url().serialize_path();
|
||||||
}
|
}
|
||||||
|
|
||||||
WebIDL::ExceptionOr<void> Location::set_pathname(String const&)
|
WebIDL::ExceptionOr<void> Location::set_pathname(String const&)
|
||||||
|
|
|
@ -92,11 +92,10 @@ WebIDL::ExceptionOr<String> WorkerLocation::port() const
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-pathname
|
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-pathname
|
||||||
WebIDL::ExceptionOr<String> WorkerLocation::pathname() const
|
String WorkerLocation::pathname() const
|
||||||
{
|
{
|
||||||
auto& vm = realm().vm();
|
|
||||||
// The pathname getter steps are to return the result of URL path serializing this's WorkerGlobalScope object's url.
|
// The pathname getter steps are to return the result of URL path serializing this's WorkerGlobalScope object's url.
|
||||||
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().serialize_path()));
|
return m_global_scope->url().serialize_path();
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-search
|
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-search
|
||||||
|
|
|
@ -24,7 +24,7 @@ public:
|
||||||
WebIDL::ExceptionOr<String> host() const;
|
WebIDL::ExceptionOr<String> host() const;
|
||||||
WebIDL::ExceptionOr<String> hostname() const;
|
WebIDL::ExceptionOr<String> hostname() const;
|
||||||
WebIDL::ExceptionOr<String> port() const;
|
WebIDL::ExceptionOr<String> port() const;
|
||||||
WebIDL::ExceptionOr<String> pathname() const;
|
String pathname() const;
|
||||||
WebIDL::ExceptionOr<String> search() const;
|
WebIDL::ExceptionOr<String> search() const;
|
||||||
WebIDL::ExceptionOr<String> hash() const;
|
WebIDL::ExceptionOr<String> hash() const;
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ ErrorOr<String> load_error_page(URL::URL const& url, StringView error_message)
|
||||||
ErrorOr<String> load_file_directory_page(URL::URL const& url)
|
ErrorOr<String> load_file_directory_page(URL::URL const& url)
|
||||||
{
|
{
|
||||||
// Generate HTML contents entries table
|
// Generate HTML contents entries table
|
||||||
auto lexical_path = LexicalPath(url.serialize_path());
|
auto lexical_path = LexicalPath(URL::percent_decode(url.serialize_path()));
|
||||||
Core::DirIterator dt(lexical_path.string(), Core::DirIterator::Flags::SkipParentAndBaseDir);
|
Core::DirIterator dt(lexical_path.string(), Core::DirIterator::Flags::SkipParentAndBaseDir);
|
||||||
Vector<ByteString> names;
|
Vector<ByteString> names;
|
||||||
while (dt.has_next())
|
while (dt.has_next())
|
||||||
|
|
|
@ -102,7 +102,7 @@ void Resource::did_load(Badge<ResourceLoader>, ReadonlyBytes data, HTTP::HeaderM
|
||||||
if (content_type_options.value_or("").equals_ignoring_ascii_case("nosniff"sv)) {
|
if (content_type_options.value_or("").equals_ignoring_ascii_case("nosniff"sv)) {
|
||||||
m_mime_type = "text/plain";
|
m_mime_type = "text/plain";
|
||||||
} else {
|
} else {
|
||||||
m_mime_type = Core::guess_mime_type_based_on_filename(url().serialize_path());
|
m_mime_type = Core::guess_mime_type_based_on_filename(URL::percent_decode(url().serialize_path()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -311,7 +311,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
|
||||||
}
|
}
|
||||||
|
|
||||||
auto data = resource.value()->data();
|
auto data = resource.value()->data();
|
||||||
auto response_headers = response_headers_for_file(url.serialize_path(), resource.value()->modified_time());
|
auto response_headers = response_headers_for_file(URL::percent_decode(url.serialize_path()), resource.value()->modified_time());
|
||||||
|
|
||||||
log_success(request);
|
log_success(request);
|
||||||
success_callback(data, response_headers, {});
|
success_callback(data, response_headers, {});
|
||||||
|
@ -328,7 +328,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
FileRequest file_request(url.serialize_path(), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr<i32> file_or_error) {
|
FileRequest file_request(URL::percent_decode(url.serialize_path()), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr<i32> file_or_error) {
|
||||||
--m_pending_loads;
|
--m_pending_loads;
|
||||||
if (on_load_counter_change)
|
if (on_load_counter_change)
|
||||||
on_load_counter_change();
|
on_load_counter_change();
|
||||||
|
@ -376,7 +376,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
|
||||||
}
|
}
|
||||||
|
|
||||||
auto data = maybe_data.release_value();
|
auto data = maybe_data.release_value();
|
||||||
auto response_headers = response_headers_for_file(request.url().serialize_path(), st_or_error.value().st_mtime);
|
auto response_headers = response_headers_for_file(URL::percent_decode(request.url().serialize_path()), st_or_error.value().st_mtime);
|
||||||
|
|
||||||
log_success(request);
|
log_success(request);
|
||||||
success_callback(data, response_headers, {});
|
success_callback(data, response_headers, {});
|
||||||
|
|
|
@ -26,7 +26,7 @@ ByteString ConnectionInfo::resource_name() const
|
||||||
// The "resource-name" can be constructed by concatenating the following:
|
// The "resource-name" can be constructed by concatenating the following:
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
// "/" if the path component is empty
|
// "/" if the path component is empty
|
||||||
auto path = m_url.serialize_path();
|
auto path = URL::percent_decode(m_url.serialize_path());
|
||||||
if (path.is_empty())
|
if (path.is_empty())
|
||||||
builder.append('/');
|
builder.append('/');
|
||||||
// The path component
|
// The path component
|
||||||
|
|
|
@ -269,7 +269,7 @@ String CookieJar::default_path(const URL::URL& url)
|
||||||
// https://tools.ietf.org/html/rfc6265#section-5.1.4
|
// https://tools.ietf.org/html/rfc6265#section-5.1.4
|
||||||
|
|
||||||
// 1. Let uri-path be the path portion of the request-uri if such a portion exists (and empty otherwise).
|
// 1. Let uri-path be the path portion of the request-uri if such a portion exists (and empty otherwise).
|
||||||
auto uri_path = url.serialize_path();
|
auto uri_path = URL::percent_decode(url.serialize_path());
|
||||||
|
|
||||||
// 2. If the uri-path is empty or if the first character of the uri-path is not a %x2F ("/") character, output %x2F ("/") and skip the remaining steps.
|
// 2. If the uri-path is empty or if the first character of the uri-path is not a %x2F ("/") character, output %x2F ("/") and skip the remaining steps.
|
||||||
if (uri_path.is_empty() || (uri_path[0] != '/'))
|
if (uri_path.is_empty() || (uri_path[0] != '/'))
|
||||||
|
@ -283,6 +283,7 @@ String CookieJar::default_path(const URL::URL& url)
|
||||||
return "/"_string;
|
return "/"_string;
|
||||||
|
|
||||||
// 4. Output the characters of the uri-path from the first character up to, but not including, the right-most %x2F ("/").
|
// 4. Output the characters of the uri-path from the first character up to, but not including, the right-most %x2F ("/").
|
||||||
|
// FIXME: The path might not be valid UTF-8.
|
||||||
return MUST(String::from_utf8(uri_path.substring_view(0, last_separator)));
|
return MUST(String::from_utf8(uri_path.substring_view(0, last_separator)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -380,7 +380,7 @@ static ErrorOr<TestResult> run_ref_test(HeadlessWebContentView& view, URL::URL c
|
||||||
|
|
||||||
if (dump_failed_ref_tests) {
|
if (dump_failed_ref_tests) {
|
||||||
warnln("\033[33;1mRef test {} failed; dumping screenshots\033[0m", url);
|
warnln("\033[33;1mRef test {} failed; dumping screenshots\033[0m", url);
|
||||||
auto title = LexicalPath::title(url.serialize_path());
|
auto title = LexicalPath::title(URL::percent_decode(url.serialize_path()));
|
||||||
auto dump_screenshot = [&](Gfx::Bitmap& bitmap, StringView path) -> ErrorOr<void> {
|
auto dump_screenshot = [&](Gfx::Bitmap& bitmap, StringView path) -> ErrorOr<void> {
|
||||||
auto screenshot_file = TRY(Core::File::open(path, Core::File::OpenMode::Write));
|
auto screenshot_file = TRY(Core::File::open(path, Core::File::OpenMode::Write));
|
||||||
auto encoded_data = TRY(Gfx::PNGWriter::encode(bitmap));
|
auto encoded_data = TRY(Gfx::PNGWriter::encode(bitmap));
|
||||||
|
|
|
@ -371,7 +371,7 @@ static auto parse(StringView contents)
|
||||||
if (url.scheme() != "file")
|
if (url.scheme() != "file")
|
||||||
return Error::from_string_literal("NYI: Nonlocal entity");
|
return Error::from_string_literal("NYI: Nonlocal entity");
|
||||||
|
|
||||||
auto file = TRY(Core::File::open(url.serialize_path(), Core::File::OpenMode::Read));
|
auto file = TRY(Core::File::open(URL::percent_decode(url.serialize_path()), Core::File::OpenMode::Read));
|
||||||
return ByteString::copy(TRY(file->read_until_eof()));
|
return ByteString::copy(TRY(file->read_until_eof()));
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -440,7 +440,7 @@ static void do_run_tests(XML::Document& document)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto file_path = url.serialize_path();
|
auto file_path = URL::percent_decode(url.serialize_path());
|
||||||
auto file_result = Core::File::open(file_path, Core::File::OpenMode::Read);
|
auto file_result = Core::File::open(file_path, Core::File::OpenMode::Read);
|
||||||
if (file_result.is_error()) {
|
if (file_result.is_error()) {
|
||||||
warnln("Read error for {}: {}", file_path, file_result.error());
|
warnln("Read error for {}: {}", file_path, file_result.error());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue