mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-27 14:58:46 +00:00
LibTextCodec: Start fleshing out a simple text codec library
We're starting with a very basic decoding API and only ISO-8859-1 and UTF-8 decoding (and UTF-8 decoding is really a no-op since String is expected to be UTF-8.)
This commit is contained in:
parent
f3676ebef5
commit
e09b83c60c
Notes:
sideshowbarker
2024-07-19 07:00:24 +09:00
Author: https://github.com/awesomekling
Commit: e09b83c60c
10 changed files with 148 additions and 21 deletions
|
@ -7,7 +7,7 @@ OBJS = \
|
||||||
|
|
||||||
PROGRAM = Browser
|
PROGRAM = Browser
|
||||||
|
|
||||||
LIB_DEPS = Web JS GUI Gfx IPC Protocol Core
|
LIB_DEPS = Web JS TextCodec GUI Gfx IPC Protocol Core
|
||||||
|
|
||||||
main.cpp: ../../Libraries/LibWeb/CSS/PropertyID.h
|
main.cpp: ../../Libraries/LibWeb/CSS/PropertyID.h
|
||||||
../../Libraries/LibWeb/CSS/PropertyID.h:
|
../../Libraries/LibWeb/CSS/PropertyID.h:
|
||||||
|
|
|
@ -7,6 +7,6 @@ OBJS = \
|
||||||
|
|
||||||
PROGRAM = Help
|
PROGRAM = Help
|
||||||
|
|
||||||
LIB_DEPS = GUI Web JS Gfx Markdown IPC Protocol Thread Pthread Core
|
LIB_DEPS = GUI Web TextCodec JS Gfx Markdown IPC Protocol Thread Pthread Core
|
||||||
|
|
||||||
include ../../Makefile.common
|
include ../../Makefile.common
|
||||||
|
|
|
@ -11,6 +11,6 @@ OBJS = \
|
||||||
|
|
||||||
PROGRAM = IRCClient
|
PROGRAM = IRCClient
|
||||||
|
|
||||||
LIB_DEPS = Web JS GUI Gfx Protocol IPC Thread Pthread Core
|
LIB_DEPS = Web TextCodec JS GUI Gfx Protocol IPC Thread Pthread Core
|
||||||
|
|
||||||
include ../../Makefile.common
|
include ../../Makefile.common
|
||||||
|
|
|
@ -4,6 +4,6 @@ OBJS = \
|
||||||
|
|
||||||
PROGRAM = TextEditor
|
PROGRAM = TextEditor
|
||||||
|
|
||||||
LIB_DEPS = Web Markdown GUI Gfx VT Protocol IPC Thread Pthread Core JS
|
LIB_DEPS = Web TextCodec Markdown GUI Gfx VT Protocol IPC Thread Pthread Core JS
|
||||||
|
|
||||||
include ../../Makefile.common
|
include ../../Makefile.common
|
||||||
|
|
|
@ -18,6 +18,6 @@ OBJS = \
|
||||||
|
|
||||||
PROGRAM = HackStudio
|
PROGRAM = HackStudio
|
||||||
|
|
||||||
LIB_DEPS = GUI Web VT Protocol Markdown Gfx IPC Thread Pthread Core JS Debug
|
LIB_DEPS = GUI Web TextCodec VT Protocol Markdown Gfx IPC Thread Pthread Core JS Debug
|
||||||
|
|
||||||
include ../../Makefile.common
|
include ../../Makefile.common
|
||||||
|
|
73
Libraries/LibTextCodec/Decoder.cpp
Normal file
73
Libraries/LibTextCodec/Decoder.cpp
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <AK/String.h>
|
||||||
|
#include <AK/StringBuilder.h>
|
||||||
|
#include <LibTextCodec/Decoder.h>
|
||||||
|
|
||||||
|
namespace TextCodec {
|
||||||
|
|
||||||
|
Latin1Decoder& latin1_decoder()
|
||||||
|
{
|
||||||
|
static Latin1Decoder* decoder;
|
||||||
|
if (!decoder)
|
||||||
|
decoder = new Latin1Decoder;
|
||||||
|
return *decoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
UTF8Decoder& utf8_decoder()
|
||||||
|
{
|
||||||
|
static UTF8Decoder* decoder;
|
||||||
|
if (!decoder)
|
||||||
|
decoder = new UTF8Decoder;
|
||||||
|
return *decoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
Decoder* decoder_for(const String& encoding)
|
||||||
|
{
|
||||||
|
if (encoding.equals_ignoring_case("iso-8859-1"))
|
||||||
|
return &latin1_decoder();
|
||||||
|
if (encoding.equals_ignoring_case("utf-8"))
|
||||||
|
return &utf8_decoder();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
String UTF8Decoder::to_utf8(const StringView& input)
|
||||||
|
{
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
|
String Latin1Decoder::to_utf8(const StringView& input)
|
||||||
|
{
|
||||||
|
StringBuilder builder(input.length());
|
||||||
|
for (size_t i = 0; i < input.length(); ++i) {
|
||||||
|
u8 ch = input[i];
|
||||||
|
builder.append(ch >= 0x80 ? '?' : ch);
|
||||||
|
}
|
||||||
|
return builder.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
50
Libraries/LibTextCodec/Decoder.h
Normal file
50
Libraries/LibTextCodec/Decoder.h
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <AK/Forward.h>
|
||||||
|
|
||||||
|
namespace TextCodec {
|
||||||
|
|
||||||
|
class Decoder {
|
||||||
|
public:
|
||||||
|
virtual String to_utf8(const StringView&) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class UTF8Decoder final : public Decoder {
|
||||||
|
public:
|
||||||
|
virtual String to_utf8(const StringView&) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Latin1Decoder final : public Decoder {
|
||||||
|
public:
|
||||||
|
virtual String to_utf8(const StringView&) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
Decoder* decoder_for(const String& encoding);
|
||||||
|
|
||||||
|
}
|
15
Libraries/LibTextCodec/Makefile
Normal file
15
Libraries/LibTextCodec/Makefile
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
OBJS = \
|
||||||
|
Decoder.o
|
||||||
|
|
||||||
|
LIBRARY = libtextcodec.a
|
||||||
|
|
||||||
|
install:
|
||||||
|
for dir in .; do \
|
||||||
|
mkdir -p $(SERENITY_BASE_DIR)/Root/usr/include/LibTextCodec/$$dir; \
|
||||||
|
cp $$dir/*.h $(SERENITY_BASE_DIR)/Root/usr/include/LibTextCodec/$$dir/; \
|
||||||
|
done
|
||||||
|
cp $(LIBRARY) $(SERENITY_BASE_DIR)/Root/usr/lib/
|
||||||
|
|
||||||
|
include ../../Makefile.common
|
||||||
|
|
||||||
|
include ../../Makefile.subdir
|
|
@ -27,6 +27,7 @@
|
||||||
#include <AK/Function.h>
|
#include <AK/Function.h>
|
||||||
#include <AK/NonnullRefPtrVector.h>
|
#include <AK/NonnullRefPtrVector.h>
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
|
#include <LibTextCodec/Decoder.h>
|
||||||
#include <LibWeb/DOM/Comment.h>
|
#include <LibWeb/DOM/Comment.h>
|
||||||
#include <LibWeb/DOM/DocumentFragment.h>
|
#include <LibWeb/DOM/DocumentFragment.h>
|
||||||
#include <LibWeb/DOM/DocumentType.h>
|
#include <LibWeb/DOM/DocumentType.h>
|
||||||
|
@ -385,21 +386,9 @@ static bool parse_html_document(const StringView& html, Document& document, Pare
|
||||||
|
|
||||||
String to_utf8(const StringView& input, const String& encoding)
|
String to_utf8(const StringView& input, const String& encoding)
|
||||||
{
|
{
|
||||||
String output;
|
auto* decoder = TextCodec::decoder_for(encoding);
|
||||||
if (encoding == "utf-8") {
|
ASSERT(decoder);
|
||||||
output = input;
|
return decoder->to_utf8(input);
|
||||||
} else if (encoding == "iso-8859-1") {
|
|
||||||
StringBuilder builder(input.length());
|
|
||||||
for (size_t i = 0; i < input.length(); ++i) {
|
|
||||||
u8 ch = input[i];
|
|
||||||
builder.append(ch >= 0x80 ? '?' : ch);
|
|
||||||
}
|
|
||||||
output = builder.to_string();
|
|
||||||
} else {
|
|
||||||
dbg() << "Unknown encoding " << encoding;
|
|
||||||
ASSERT_NOT_REACHED();
|
|
||||||
}
|
|
||||||
return output;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RefPtr<DocumentFragment> parse_html_fragment(Document& document, const StringView& raw_html, const String& encoding)
|
RefPtr<DocumentFragment> parse_html_fragment(Document& document, const StringView& raw_html, const String& encoding)
|
||||||
|
|
|
@ -4,7 +4,7 @@ APPS = ${SRCS:.cpp=}
|
||||||
|
|
||||||
EXTRA_CLEAN = $(APPS)
|
EXTRA_CLEAN = $(APPS)
|
||||||
|
|
||||||
LIB_DEPS = Crypto TLS Web GUI Gfx Audio Protocol IPC Thread Pthread PCIDB Markdown JS Core Line X86 Debug
|
LIB_DEPS = Crypto TLS Web TextCodec GUI Gfx Audio Protocol IPC Thread Pthread PCIDB Markdown JS Core Line X86 Debug
|
||||||
|
|
||||||
include ../Makefile.common
|
include ../Makefile.common
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue