mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-26 06:18:59 +00:00
In PDF's fonts, encoding objects are used to translate bytes into fonts' glyphs. Glyphs (in the fonts we currently support) organise their glyphs in such a way that they are accessed by name, and thus encoding translate between a byte sequence and a glyph name. Note that an no point this translation includes a Unicode character, and therefore assigning a character to a glyph in the Encoding object is the wrong thing to do. Moreover, using the code point for this character during the byte-sequence-to-glyph translation sequence is double-wrong. This commit removes the characters associated to each translation in the built-in Encoding objects. In order to keep commits short and sweet, I'm currently simply removing the character from the enumeration, leaving the old structure this information was held on intact. Instead, I'm filling the "code_point" member with a zero, and filling both mappings (which will be changed later on too) with the glyph name and the associated char code.
184 lines
5.8 KiB
C++
184 lines
5.8 KiB
C++
/*
|
|
* Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
|
|
* Copyright (c) 2022, Julian Offenhäuser <offenhaeuser@protonmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/Utf8View.h>
|
|
#include <LibPDF/CommonNames.h>
|
|
#include <LibPDF/Encoding.h>
|
|
|
|
namespace PDF {
|
|
|
|
PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::create(HashMap<u16, CharDescriptor> descriptors)
|
|
{
|
|
auto encoding = adopt_ref(*new Encoding());
|
|
encoding->m_descriptors = descriptors;
|
|
|
|
for (auto& descriptor : descriptors)
|
|
encoding->m_name_mapping.set(descriptor.value.name, descriptor.value.code_point);
|
|
|
|
return encoding;
|
|
}
|
|
|
|
PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
|
|
{
|
|
if (obj->is<NameObject>()) {
|
|
auto name = obj->cast<NameObject>()->name();
|
|
if (name == "StandardEncoding")
|
|
return standard_encoding();
|
|
if (name == "MacRomanEncoding")
|
|
return mac_encoding();
|
|
if (name == "WinAnsiEncoding")
|
|
return windows_encoding();
|
|
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
// Make a custom encoding
|
|
auto dict = obj->cast<DictObject>();
|
|
|
|
RefPtr<Encoding> base_encoding;
|
|
if (dict->contains(CommonNames::BaseEncoding)) {
|
|
auto base_encoding_obj = MUST(dict->get_object(document, CommonNames::BaseEncoding));
|
|
base_encoding = TRY(Encoding::from_object(document, base_encoding_obj));
|
|
} else {
|
|
base_encoding = Encoding::standard_encoding();
|
|
}
|
|
|
|
auto encoding = adopt_ref(*new Encoding());
|
|
|
|
encoding->m_descriptors = base_encoding->descriptors();
|
|
encoding->m_name_mapping = base_encoding->name_mapping();
|
|
|
|
auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
|
|
|
|
u16 current_code_point = 0;
|
|
bool first = true;
|
|
|
|
for (auto& item : *differences_array) {
|
|
if (item.has_u32()) {
|
|
current_code_point = item.to_int();
|
|
first = false;
|
|
} else {
|
|
VERIFY(item.has<NonnullRefPtr<Object>>());
|
|
VERIFY(!first);
|
|
auto& object = item.get<NonnullRefPtr<Object>>();
|
|
auto name = object->cast<NameObject>()->name();
|
|
|
|
encoding->m_descriptors.set(current_code_point, { name, base_encoding->m_name_mapping.ensure(name) });
|
|
current_code_point++;
|
|
}
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::standard_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
|
|
encoding->m_descriptors.set(standard_code, { #name, 0 }); \
|
|
encoding->m_name_mapping.set(#name, standard_code);
|
|
ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::mac_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
|
|
encoding->m_descriptors.set(mac_code, { #name, 0 }); \
|
|
encoding->m_name_mapping.set(#name, mac_code);
|
|
ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::windows_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
|
|
encoding->m_descriptors.set(win_code, { #name, 0 }); \
|
|
encoding->m_name_mapping.set(#name, win_code);
|
|
ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
encoding->m_windows = true;
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
|
|
encoding->m_descriptors.set(pdf_code, { #name, 0 }); \
|
|
encoding->m_name_mapping.set(#name, pdf_code);
|
|
ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::symbol_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, code) \
|
|
encoding->m_descriptors.set(code, { #name, 0 }); \
|
|
encoding->m_name_mapping.set(#name, code);
|
|
ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
NonnullRefPtr<Encoding> Encoding::zapf_encoding()
|
|
{
|
|
static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
|
|
if (encoding->m_descriptors.is_empty()) {
|
|
#define ENUMERATE(name, code) \
|
|
encoding->m_descriptors.set(code, { #name, 0 }); \
|
|
encoding->m_name_mapping.set(#name, code);
|
|
ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
|
|
#undef ENUMERATE
|
|
}
|
|
|
|
return encoding;
|
|
}
|
|
|
|
CharDescriptor const& Encoding::get_char_code_descriptor(u16 char_code) const
|
|
{
|
|
return const_cast<Encoding*>(this)->m_descriptors.ensure(char_code);
|
|
}
|
|
|
|
u16 Encoding::get_char_code(DeprecatedString const& name) const
|
|
{
|
|
auto code_iterator = m_name_mapping.find(name);
|
|
if (code_iterator != m_name_mapping.end())
|
|
return code_iterator->value;
|
|
return 0;
|
|
}
|
|
|
|
bool Encoding::should_map_to_bullet(u16 char_code) const
|
|
{
|
|
// PDF Annex D table D.2, note 3:
|
|
// In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only
|
|
// code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment.
|
|
return m_windows && char_code > 040 && !m_descriptors.contains(char_code);
|
|
}
|
|
|
|
}
|