mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-26 14:28:49 +00:00
LibWeb: Fix character references losing characters in certain situations
This fixes 4 issues: - RECONSUME_IN_RETURN_STATE was functionally equivalent to SWITCH_TO_RETURN_STATE, which caused us to lose characters. For example, &test= would lose the = - & characters by themselves would be lost. For example, 1 & 2 would become 1 2. This is because we forgot to flush characters in the the ANYTHING_ELSE path in CharacterReference - Named character references didn't work at all in attributes. This is because there was a path that was checking the entity code points instead of the entity itself. Plus, the path that was checking the entity itself wasn't quite spec compliant. - If we fail to match a named character reference, the first character is lost. For example &test would become &est. However, this relies on a little hack since I can't wrap my head around on how to change the code to do as the spec says. The hack is to reconsume in AmbigiousAmpersand instead of just switching to it. Fixes #3957
This commit is contained in:
parent
bca7be2aef
commit
6ffcd53479
Notes:
sideshowbarker
2024-07-19 00:26:09 +09:00
Author: https://github.com/Lubrsi
Commit: 6ffcd53479
Pull-request: https://github.com/SerenityOS/serenity/pull/4630
Issue: https://github.com/SerenityOS/serenity/issues/3957
1 changed files with 13 additions and 16 deletions
|
@ -71,11 +71,13 @@ namespace Web::HTML {
|
|||
goto _StartOfFunction; \
|
||||
} while (0)
|
||||
|
||||
#define RECONSUME_IN_RETURN_STATE \
|
||||
do { \
|
||||
will_reconsume_in(m_return_state); \
|
||||
m_state = m_return_state; \
|
||||
goto _StartOfFunction; \
|
||||
#define RECONSUME_IN_RETURN_STATE \
|
||||
do { \
|
||||
will_reconsume_in(m_return_state); \
|
||||
m_state = m_return_state; \
|
||||
if (current_input_character.has_value()) \
|
||||
m_utf8_iterator = m_prev_utf8_iterator; \
|
||||
goto _StartOfFunction; \
|
||||
} while (0)
|
||||
|
||||
#define SWITCH_TO_AND_EMIT_CURRENT_TOKEN(new_state) \
|
||||
|
@ -1525,6 +1527,7 @@ _StartOfFunction:
|
|||
}
|
||||
ANYTHING_ELSE
|
||||
{
|
||||
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
|
||||
RECONSUME_IN_RETURN_STATE;
|
||||
}
|
||||
}
|
||||
|
@ -1544,17 +1547,9 @@ _StartOfFunction:
|
|||
for (auto ch : match.value().entity)
|
||||
m_temporary_buffer.append(ch);
|
||||
|
||||
if (consumed_as_part_of_an_attribute() && match.value().code_points.last() != ';') {
|
||||
auto next = peek_code_point(0);
|
||||
if (next.has_value() && (next.value() == '=' || isalnum(next.value()))) {
|
||||
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
|
||||
SWITCH_TO_RETURN_STATE;
|
||||
}
|
||||
}
|
||||
|
||||
if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) {
|
||||
if (consumed_as_part_of_an_attribute() && !match.value().entity.ends_with(';')) {
|
||||
auto next_code_point = peek_code_point(0);
|
||||
if (next_code_point.has_value() && next_code_point.value() == '=') {
|
||||
if (next_code_point.has_value() && (next_code_point.value() == '=' || isalnum(next_code_point.value()))) {
|
||||
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
|
||||
SWITCH_TO_RETURN_STATE;
|
||||
}
|
||||
|
@ -1571,7 +1566,9 @@ _StartOfFunction:
|
|||
SWITCH_TO_RETURN_STATE;
|
||||
} else {
|
||||
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
|
||||
SWITCH_TO(AmbiguousAmpersand);
|
||||
// FIXME: This should be SWITCH_TO, but we always lose the first character on this path, so just reconsume it.
|
||||
// I can't wrap my head around how to do it as the spec says.
|
||||
RECONSUME_IN(AmbiguousAmpersand);
|
||||
}
|
||||
}
|
||||
END_STATE
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue