LibWeb: Add basic support for dynamic markup insertion

This implements basic support for dynamic markup insertion, adding
 * Document::open()
 * Document::write(Vector<String> const&)
 * Document::writeln(Vector<String> const&)
 * Document::close()

The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
 * m_tokenizer
 * m_aborted
 * m_script_nesting_level

The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.
This commit is contained in:
Lorenz Steinert 2022-02-19 15:58:21 +01:00 committed by Andreas Kling
parent d29d9462e9
commit db789813c9
Notes: sideshowbarker 2024-07-17 22:55:25 +09:00
7 changed files with 282 additions and 19 deletions

View file

@ -2780,6 +2780,15 @@ void HTMLTokenizer::create_new_token(HTMLToken::Type type)
m_current_token.set_start_position({}, nth_last_position(offset));
}
HTMLTokenizer::HTMLTokenizer()
{
m_decoded_input = "";
m_utf8_view = Utf8View(m_decoded_input);
m_utf8_iterator = m_utf8_view.begin();
m_prev_utf8_iterator = m_utf8_view.begin();
m_source_positions.empend(0u, 0u);
}
HTMLTokenizer::HTMLTokenizer(StringView input, String const& encoding)
{
auto* decoder = TextCodec::decoder_for(encoding);
@ -2787,9 +2796,37 @@ HTMLTokenizer::HTMLTokenizer(StringView input, String const& encoding)
m_decoded_input = decoder->to_utf8(input);
m_utf8_view = Utf8View(m_decoded_input);
m_utf8_iterator = m_utf8_view.begin();
m_prev_utf8_iterator = m_utf8_view.begin();
m_source_positions.empend(0u, 0u);
}
void HTMLTokenizer::insert_input_at_insertion_point(String const& input)
{
auto utf8_iterator_byte_offset = m_utf8_view.byte_offset_of(m_utf8_iterator);
// FIXME: Implement a InputStream to handle insertion_point and iterators.
StringBuilder builder {};
builder.append(m_decoded_input.substring(0, m_insertion_point.position));
builder.append(input);
builder.append(m_decoded_input.substring(m_insertion_point.position));
m_decoded_input = builder.build();
m_utf8_view = Utf8View(m_decoded_input);
m_utf8_iterator = m_utf8_view.iterator_at_byte_offset(utf8_iterator_byte_offset);
m_insertion_point.position += input.length();
}
void HTMLTokenizer::insert_eof()
{
m_explicit_eof_inserted = true;
}
bool HTMLTokenizer::is_eof_inserted()
{
return m_explicit_eof_inserted;
}
void HTMLTokenizer::will_switch_to([[maybe_unused]] State new_state)
{
dbgln_if(TOKENIZER_TRACE_DEBUG, "[{}] Switch to {}", state_name(m_state), state_name(new_state));