mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-05-01 08:48:49 +00:00
LibTextCodec: Start fleshing out a simple text codec library
We're starting with a very basic decoding API and only ISO-8859-1 and UTF-8 decoding (and UTF-8 decoding is really a no-op since String is expected to be UTF-8.)
This commit is contained in:
parent
f3676ebef5
commit
e09b83c60c
Notes:
sideshowbarker
2024-07-19 07:00:24 +09:00
Author: https://github.com/awesomekling
Commit: e09b83c60c
10 changed files with 148 additions and 21 deletions
|
@ -27,6 +27,7 @@
|
|||
#include <AK/Function.h>
|
||||
#include <AK/NonnullRefPtrVector.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibWeb/DOM/Comment.h>
|
||||
#include <LibWeb/DOM/DocumentFragment.h>
|
||||
#include <LibWeb/DOM/DocumentType.h>
|
||||
|
@ -385,21 +386,9 @@ static bool parse_html_document(const StringView& html, Document& document, Pare
|
|||
|
||||
String to_utf8(const StringView& input, const String& encoding)
|
||||
{
|
||||
String output;
|
||||
if (encoding == "utf-8") {
|
||||
output = input;
|
||||
} else if (encoding == "iso-8859-1") {
|
||||
StringBuilder builder(input.length());
|
||||
for (size_t i = 0; i < input.length(); ++i) {
|
||||
u8 ch = input[i];
|
||||
builder.append(ch >= 0x80 ? '?' : ch);
|
||||
}
|
||||
output = builder.to_string();
|
||||
} else {
|
||||
dbg() << "Unknown encoding " << encoding;
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
return output;
|
||||
auto* decoder = TextCodec::decoder_for(encoding);
|
||||
ASSERT(decoder);
|
||||
return decoder->to_utf8(input);
|
||||
}
|
||||
|
||||
RefPtr<DocumentFragment> parse_html_fragment(Document& document, const StringView& raw_html, const String& encoding)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue