mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-05-10 21:22:53 +00:00
LibWeb: Plumb content encoding into the new HTML parser
We still don't handle non-ASCII input correctly, but at least now we'll convert e.g ISO-8859-1 to UTF-8 before starting to tokenize. This patch also makes "view source" work with the new parser. :^)
This commit is contained in:
parent
772b51038e
commit
5e53c45113
Notes:
sideshowbarker
2024-07-19 06:02:35 +09:00
Author: https://github.com/awesomekling
Commit: 5e53c45113
6 changed files with 18 additions and 9 deletions
|
@ -24,6 +24,7 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibWeb/Parser/Entities.h>
|
||||
#include <LibWeb/Parser/HTMLToken.h>
|
||||
#include <LibWeb/Parser/HTMLTokenizer.h>
|
||||
|
@ -1711,9 +1712,12 @@ void HTMLTokenizer::create_new_token(HTMLToken::Type type)
|
|||
m_current_token.m_type = type;
|
||||
}
|
||||
|
||||
HTMLTokenizer::HTMLTokenizer(const StringView& input)
|
||||
: m_input(input)
|
||||
HTMLTokenizer::HTMLTokenizer(const StringView& input, const String& encoding)
|
||||
{
|
||||
auto* decoder = TextCodec::decoder_for(encoding);
|
||||
ASSERT(decoder);
|
||||
m_decoded_input = decoder->to_utf8(input);
|
||||
m_input = m_decoded_input;
|
||||
}
|
||||
|
||||
void HTMLTokenizer::will_switch_to([[maybe_unused]] State new_state)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue