mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-05-01 00:38:48 +00:00
LibWeb: Set consistent positions for the start and end of HTML tags
To illustrate the previous behavior, consider these tags and their start and end positions (shown inclusively below): Start tag: End tag: <span> </span> ^ start ^ start ^end ^end The start position of a tag is the first ASCII-alpha code point after the opening brace. The start position of a close tag is the slash just before the first ASCII-alpha code point. And the end position of both is the closing brace. So the opening brace is not included in the emitted tag, but the closing brace is. And the end tag including the slash is an oddity that had to be worked around in its only use case (syntax highlighting). We now consistently exclude the braces from the emitted tag, and also exclude the slash from the end tag, so that it does not need to be accounted for in syntax highlighting. That is, we now have: Start tag: End tag: <span> </span> ^ start ^ start ^end ^end The tokenizer unit test has been extended to test these positions.
This commit is contained in:
parent
70a87795e4
commit
5b2bc90b50
Notes:
sideshowbarker
2024-07-17 07:48:42 +09:00
Author: https://github.com/trflynn89
Commit: 5b2bc90b50
Pull-request: https://github.com/SerenityOS/serenity/pull/20756
3 changed files with 39 additions and 47 deletions
|
@ -347,7 +347,6 @@ _StartOfFunction:
|
|||
ON('>')
|
||||
{
|
||||
m_current_token.set_tag_name(consume_current_builder());
|
||||
m_current_token.set_end_position({}, nth_last_position(1));
|
||||
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
|
||||
}
|
||||
ON_ASCII_UPPER_ALPHA
|
||||
|
@ -366,7 +365,6 @@ _StartOfFunction:
|
|||
ON_EOF
|
||||
{
|
||||
log_parse_error();
|
||||
m_current_token.set_end_position({}, nth_last_position(0));
|
||||
EMIT_EOF;
|
||||
}
|
||||
ANYTHING_ELSE
|
||||
|
@ -2773,19 +2771,9 @@ bool HTMLTokenizer::consume_next_if_match(StringView string, CaseSensitivity cas
|
|||
void HTMLTokenizer::create_new_token(HTMLToken::Type type)
|
||||
{
|
||||
m_current_token = { type };
|
||||
size_t offset = 0;
|
||||
switch (type) {
|
||||
case HTMLToken::Type::StartTag:
|
||||
offset = 1;
|
||||
break;
|
||||
case HTMLToken::Type::EndTag:
|
||||
offset = 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
m_current_token.set_start_position({}, nth_last_position(offset));
|
||||
auto is_start_or_end_tag = type == HTMLToken::Type::StartTag || type == HTMLToken::Type::EndTag;
|
||||
m_current_token.set_start_position({}, nth_last_position(is_start_or_end_tag ? 1 : 0));
|
||||
}
|
||||
|
||||
HTMLTokenizer::HTMLTokenizer()
|
||||
|
@ -2855,7 +2843,9 @@ void HTMLTokenizer::will_emit(HTMLToken& token)
|
|||
{
|
||||
if (token.is_start_tag())
|
||||
m_last_emitted_start_tag_name = token.tag_name();
|
||||
token.set_end_position({}, nth_last_position(0));
|
||||
|
||||
auto is_start_or_end_tag = token.type() == HTMLToken::Type::StartTag || token.type() == HTMLToken::Type::EndTag;
|
||||
token.set_end_position({}, nth_last_position(is_start_or_end_tag ? 1 : 0));
|
||||
}
|
||||
|
||||
bool HTMLTokenizer::current_end_tag_token_is_appropriate() const
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue