mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-25 22:08:59 +00:00
All "Simple Fonts" in PDF (all but Type0 fonts) have the property that glyphs are selected with single byte character codes. This means that the Encoding objects should use u8 for representing these character codes. Moreover, and as mentioned in a previous commit, there is no need to store the unicode code point associated with a character (which was in turn wrongly associated to a glyph). This commit greatly simplifies the Encoding class. Namely it: * Removes the unnecessary CharDescriptor class. * Changes the internal maps to be u8 -> FlyString and vice-versa, effectively providing two-way lookups. * Adds a new method to set a two-way u8 -> FlyString mapping and uses it in all possible places. * Simplified the creation of Encoding objects. * Changes how the WinAnsi special treatment for bullet points is implemented.
171 lines
5.2 KiB
C++
171 lines
5.2 KiB
C++
/*
|
|
* Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
|
|
* Copyright (c) 2022, Julian Offenhäuser <offenhaeuser@protonmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/Utf8View.h>
|
|
#include <LibPDF/CommonNames.h>
|
|
#include <LibPDF/Encoding.h>
|
|
|
|
namespace PDF {
|
|
|
|
NonnullRefPtr<Encoding> Encoding::create()
|
|
{
|
|
return adopt_ref(*new Encoding());
|
|
}
|
|
|
|
PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
|
|
{
|
|
if (obj->is<NameObject>()) {
|
|
auto name = obj->cast<NameObject>()->name();
|
|
if (name == "StandardEncoding")
|
|
return standard_encoding();
|
|
if (name == "MacRomanEncoding")
|
|
return mac_encoding();
|
|
if (name == "WinAnsiEncoding")
|
|
return windows_encoding();
|
|
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
// Make a custom encoding
|
|
auto dict = obj->cast<DictObject>();
|
|
|
|
RefPtr<Encoding> base_encoding;
|
|
if (dict->contains(CommonNames::BaseEncoding)) {
|
|
auto base_encoding_obj = MUST(dict->get_object(document, CommonNames::BaseEncoding));
|
|
base_encoding = TRY(Encoding::from_object(document, base_encoding_obj));
|
|
} else {
|
|
base_encoding = Encoding::standard_encoding();
|
|
}
|
|
|
|
auto encoding = adopt_ref(*new Encoding());
|
|
|
|
encoding->m_descriptors = base_encoding->m_descriptors;
|
|
encoding->m_name_mapping = base_encoding->m_name_mapping;
|
|
|
|
auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
|
|
|
|
u16 current_code_point = 0;
|
|
bool first = true;
|
|
|
|
for (auto& item : *differences_array) {
|
|
if (item.has_u32()) {
|
|
current_code_point = item.to_int();
|
|
first = false;
|
|
} else {
|
|
VERIFY(item.has<NonnullRefPtr<Object>>());
|
|
VERIFY(!first);
|
|
auto& object = item.get<NonnullRefPtr<Object>>();
|
|
auto name = object->cast<NameObject>()->name();
|
|
encoding->set(current_code_point, name);
|
|
current_code_point++;
|
|
}
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
void Encoding::set(CharCodeType char_code, DeprecatedFlyString const& glyph_name)
|
|
{
|
|
m_descriptors.set(char_code, glyph_name);
|
|
m_name_mapping.set(glyph_name, char_code);
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::standard_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
|
|
encoding->set(standard_code, #name);
|
|
ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::mac_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
|
|
encoding->set(mac_code, #name);
|
|
ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::windows_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
|
|
encoding->set(win_code, #name);
|
|
ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
|
|
// PDF Annex D table D.2, note 3:
|
|
// In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only
|
|
// code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment.
|
|
//
|
|
// Since CharCodeType is u8 *and* we need to include 255, we iterate in reverse order to have more readable code.
|
|
for (CharCodeType char_code = 255; char_code > 040; char_code--) {
|
|
if (!encoding->m_descriptors.contains(char_code))
|
|
encoding->set(char_code, "bullet");
|
|
}
|
|
}
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
|
|
encoding->set(pdf_code, #name);
|
|
ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::symbol_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, code) \
|
|
encoding->set(code, #name);
|
|
ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::zapf_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, code) \
|
|
encoding->set(code, #name);
|
|
ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
return encoding;
|
|
}
|
|
|
|
u16 Encoding::get_char_code(DeprecatedString const& name) const
|
|
{
|
|
auto code_iterator = m_name_mapping.find(name);
|
|
if (code_iterator != m_name_mapping.end())
|
|
return code_iterator->value;
|
|
return 0;
|
|
}
|
|
|
|
}
|