mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-21 03:55:24 +00:00
LibWeb: Allow HTML tokenizer to emit more than one token
Tokens are now put on a queue when emitted, and we always pop from that queue when returning from next_token().
This commit is contained in:
parent
0f2b3cd280
commit
ecd25ce6c7
Notes:
sideshowbarker
2024-07-19 06:06:40 +09:00
Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/ecd25ce6c7d
2 changed files with 26 additions and 10 deletions
|
@ -58,7 +58,16 @@
|
|||
will_switch_to(State::new_state); \
|
||||
m_state = State::new_state; \
|
||||
will_emit(m_current_token); \
|
||||
return m_current_token; \
|
||||
m_queued_tokens.enqueue(m_current_token); \
|
||||
return m_queued_tokens.dequeue(); \
|
||||
} while (0)
|
||||
|
||||
#define EMIT_CHARACTER_AND_RECONSUME_IN(codepoint, new_state) \
|
||||
do { \
|
||||
m_queued_tokens.enqueue(m_current_token); \
|
||||
will_reconsume_in(State::new_state); \
|
||||
m_state = State::new_state; \
|
||||
goto new_state; \
|
||||
} while (0)
|
||||
|
||||
#define DONT_CONSUME_NEXT_INPUT_CHARACTER --m_cursor;
|
||||
|
@ -90,21 +99,23 @@
|
|||
m_has_emitted_eof = true; \
|
||||
create_new_token(HTMLToken::Type::EndOfFile); \
|
||||
will_emit(m_current_token); \
|
||||
return m_current_token; \
|
||||
m_queued_tokens.enqueue(m_current_token); \
|
||||
return m_queued_tokens.dequeue(); \
|
||||
} while (0)
|
||||
|
||||
#define EMIT_CURRENT_TOKEN \
|
||||
do { \
|
||||
will_emit(m_current_token); \
|
||||
return m_current_token; \
|
||||
#define EMIT_CURRENT_TOKEN \
|
||||
do { \
|
||||
will_emit(m_current_token); \
|
||||
m_queued_tokens.enqueue(m_current_token); \
|
||||
return m_queued_tokens.dequeue(); \
|
||||
} while (0)
|
||||
|
||||
#define EMIT_CHARACTER(codepoint) \
|
||||
do { \
|
||||
create_new_token(HTMLToken::Type::Character); \
|
||||
m_current_token.m_comment_or_character.data.append(codepoint); \
|
||||
will_emit(m_current_token); \
|
||||
return m_current_token; \
|
||||
m_queued_tokens.enqueue(m_current_token); \
|
||||
return m_queued_tokens.dequeue(); \
|
||||
} while (0)
|
||||
|
||||
#define EMIT_CURRENT_CHARACTER \
|
||||
|
@ -141,6 +152,9 @@ Optional<u32> HTMLTokenizer::peek_codepoint(size_t offset) const
|
|||
|
||||
Optional<HTMLToken> HTMLTokenizer::next_token()
|
||||
{
|
||||
if (!m_queued_tokens.is_empty())
|
||||
return m_queued_tokens.dequeue();
|
||||
|
||||
for (;;) {
|
||||
auto current_input_character = next_codepoint();
|
||||
switch (m_state) {
|
||||
|
@ -1270,8 +1284,7 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
|
|||
}
|
||||
ANYTHING_ELSE
|
||||
{
|
||||
EMIT_CHARACTER('<');
|
||||
RECONSUME_IN(ScriptData);
|
||||
EMIT_CHARACTER_AND_RECONSUME_IN('<', ScriptData);
|
||||
}
|
||||
}
|
||||
END_STATE
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Queue.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Types.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
|
@ -165,5 +166,7 @@ private:
|
|||
HTMLToken m_last_emitted_start_tag;
|
||||
|
||||
bool m_has_emitted_eof { false };
|
||||
|
||||
Queue<HTMLToken> m_queued_tokens;
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue