LibPDF: Create basic object structure

This commit is the start of LibPDF, and introduces some basic structure
objects. This emulates LibJS's Value structure, where Value is a simple
class that can contain a pointer to a more complex Object class with
more data. All of the basic PDF objects have a representation.
This commit is contained in:
Matthew Olsson 2021-04-30 18:23:17 -07:00 committed by Andreas Kling
parent af9a7b1374
commit a8f5b6aaa3
Notes: sideshowbarker 2024-07-18 18:23:57 +09:00
10 changed files with 637 additions and 0 deletions

93
Base/res/pdf/complex.pdf Normal file
View file

@ -0,0 +1,93 @@
%PDF-1.1
1 0 obj
<< /Kids [2 0 R 3 0 R] /Type /Pages /Count 3 >>
endobj
4 0 obj
<< >>
stream
1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page One) Tj ET
endstream
endobj
2 0 obj
<<
/Rotate 0
/Parent 1 0 R
/Resources
<< /Font << /F0 << /BaseFont /Times-Italic /Subtype /Type1 /Type /Font >> >> >>
/MediaBox [0.000000 0.000000 595.275590551 841.88976378]
/Type /Page
/Contents [4 0 R]
>>
endobj
5 0 obj
<< /PageLayout /TwoColumnLeft /Pages 1 0 R /Type /Catalog >>
endobj
6 0 obj
<<
/Rotate 0
/Parent 3 0 R
/Resources
<< /Font << /F0 << /BaseFont /Times-Italic /Subtype /Type1 /Type /Font >> >> >>
/MediaBox [0.000000 0.000000 595.275590551 841.88976378]
/Type /Page
/Contents [7 0 R]
>>
endobj
3 0 obj
<< /Parent 1 0 R /Kids [8 0 R 6 0 R] /Count 2 /Type /Pages >>
endobj
8 0 obj
<<
/Rotate 270
/Parent 3 0 R
/Resources
<< /Font << /F0 << /BaseFont /Times-Italic /Subtype /Type1 /Type /Font >> >> >>
/MediaBox [0.000000 0.000000 595.275590551 841.88976378]
/Type /Page
/Contents [9 0 R]
>>
endobj
9 0 obj
<< >>
stream
q 1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page Two) Tj ET Q
1. 0.000000 0.000000 1. 50. 750 cm BT /F0 16 Tf ((Rotated by 270 degrees)) Tj ET
endstream
endobj
7 0 obj
<< >>
stream
1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page Three) Tj ET
endstream
endobj
10 0 obj
<<
/Title (PDF Explained Example)
/Author (John Whitington)
/Producer (Manually Created)
/ModDate (D:20110313002346Z)
/CreationDate (D:2011)
>>
endobj xref
0 11
0000000000 65536 f
0000000009 00000 n
0000000177 00000 n
0000000731 00000 n
0000000072 00000 n
0000000416 00000 n
0000000492 00000 n
0000001239 00000 n
0000000808 00000 n
0000001049 00000 n
0000001346 00000 n
trailer
<<
/Info 10 0 R
/Root 5 0 R
/Size 11
/ID [<75ff22189ceac848dfa2afec93deee03> <75ff22189ceac848dfa2afec93deee03>]
>>
startxref
1516
%%EOF

BIN
Base/res/pdf/linearized.pdf Normal file

Binary file not shown.

Binary file not shown.

View file

@ -26,6 +26,7 @@ add_subdirectory(LibLine)
add_subdirectory(LibM)
add_subdirectory(LibMarkdown)
add_subdirectory(LibPCIDB)
add_subdirectory(LibPDF)
add_subdirectory(LibProtocol)
add_subdirectory(LibPthread)
add_subdirectory(LibRegex)

View file

@ -0,0 +1,7 @@
set(SOURCES
Object.cpp
Value.cpp
)
serenity_lib(LibPDF pdf)
target_link_libraries(LibPDF LibC LibCore)

View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
namespace PDF {
class Document;
class Object;
#define ENUMERATE_DIRECT_OBJECT_TYPES(V) \
V(StringObject, string) \
V(NameObject, name) \
V(ArrayObject, array) \
V(DictObject, dict) \
V(StreamObject, stream) \
V(IndirectValue, indirect_value)
#define ENUMERATE_OBJECT_TYPES(V) \
ENUMERATE_DIRECT_OBJECT_TYPES(V) \
V(IndirectValueRef, indirect_value_ref)
#define FORWARD_DECL(class_name, _) class class_name;
ENUMERATE_OBJECT_TYPES(FORWARD_DECL)
#undef FORWARD_DECL
template<typename T>
concept IsObject = IsBaseOf<Object, T>;
}

View file

@ -0,0 +1,116 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Hex.h>
#include <LibPDF/Object.h>
namespace PDF {
static void append_indent(StringBuilder& builder, int indent)
{
for (int i = 0; i < indent; i++)
builder.append(" ");
}
String StringObject::to_string(int) const
{
if (is_binary())
return String::formatted("<{}>", encode_hex(string().bytes()).to_uppercase());
return String::formatted("({})", string());
}
String NameObject::to_string(int) const
{
StringBuilder builder;
builder.appendff("/{}", this->name());
return builder.to_string();
}
String ArrayObject::to_string(int indent) const
{
StringBuilder builder;
builder.append("[\n");
bool first = true;
for (auto& element : elements()) {
if (!first)
builder.append(",\n");
first = false;
append_indent(builder, indent + 1);
builder.appendff("{}", element.to_string(indent));
}
builder.append('\n');
append_indent(builder, indent);
builder.append(']');
return builder.to_string();
}
String DictObject::to_string(int indent) const
{
StringBuilder builder;
builder.append("<<\n");
bool first = true;
for (auto& [key, value] : map()) {
if (!first)
builder.append(",\n");
first = false;
append_indent(builder, indent + 1);
builder.appendff("/{} ", key);
builder.appendff("{}", value.to_string(indent + 1));
}
builder.append('\n');
append_indent(builder, indent);
builder.append(">>");
return builder.to_string();
}
String StreamObject::to_string(int indent) const
{
StringBuilder builder;
builder.append("stream\n");
append_indent(builder, indent);
builder.appendff("{}\n", dict()->to_string(indent + 1));
append_indent(builder, indent + 1);
auto string = encode_hex(bytes());
while (true) {
if (string.length() > 60) {
builder.appendff("{}\n", string.substring(0, 60));
append_indent(builder, indent);
string = string.substring(60);
continue;
}
builder.appendff("{}\n", string);
break;
}
append_indent(builder, indent);
builder.append("endstream");
return builder.to_string();
}
String IndirectValue::to_string(int indent) const
{
StringBuilder builder;
builder.appendff("{} {} obj\n", index(), generation_index());
append_indent(builder, indent + 1);
builder.append(value().to_string(indent + 1));
builder.append('\n');
append_indent(builder, indent);
builder.append("endobj");
return builder.to_string();
}
String IndirectValueRef::to_string(int) const
{
return String::formatted("{} {} R", index(), generation_index());
}
}

View file

@ -0,0 +1,193 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/FlyString.h>
#include <AK/Format.h>
#include <AK/HashMap.h>
#include <AK/RefCounted.h>
#include <LibPDF/Forward.h>
#include <LibPDF/Value.h>
namespace PDF {
class Object : public RefCounted<Object> {
public:
virtual ~Object() = default;
[[nodiscard]] ALWAYS_INLINE u32 generation_index() const { return m_generation_index; }
ALWAYS_INLINE void set_generation_index(u32 generation_index) { m_generation_index = generation_index; }
#define DEFINE_ID(_, name) \
virtual bool is_##name() const { return false; }
ENUMERATE_OBJECT_TYPES(DEFINE_ID)
#undef DEFINE_ID
virtual String to_string(int indent) const = 0;
private:
u32 m_generation_index { 0 };
};
class StringObject final : public Object {
public:
StringObject(String string, bool is_binary)
: m_string(move(string))
, m_is_binary(is_binary)
{
}
~StringObject() override = default;
[[nodiscard]] ALWAYS_INLINE const String& string() const { return m_string; }
[[nodiscard]] ALWAYS_INLINE bool is_binary() const { return m_is_binary; }
ALWAYS_INLINE bool is_string() const override { return true; }
String to_string(int indent) const override;
private:
String m_string;
bool m_is_binary;
};
class NameObject final : public Object {
public:
explicit NameObject(FlyString name)
: m_name(move(name))
{
}
~NameObject() override = default;
[[nodiscard]] ALWAYS_INLINE FlyString name() const { return m_name; }
ALWAYS_INLINE bool is_name() const override { return true; }
String to_string(int indent) const override;
private:
FlyString m_name;
};
class ArrayObject final : public Object {
public:
explicit ArrayObject(Vector<Value> elements)
: m_elements(move(elements))
{
}
~ArrayObject() override = default;
[[nodiscard]] ALWAYS_INLINE Vector<Value> elements() const { return m_elements; }
ALWAYS_INLINE bool is_array() const override { return true; }
String to_string(int indent) const override;
private:
Vector<Value> m_elements;
};
class DictObject final : public Object {
public:
explicit DictObject(HashMap<FlyString, Value> map)
: m_map(move(map))
{
}
~DictObject() override = default;
[[nodiscard]] ALWAYS_INLINE HashMap<FlyString, Value> map() const { return m_map; }
ALWAYS_INLINE bool is_dict() const override { return true; }
String to_string(int indent) const override;
private:
HashMap<FlyString, Value> m_map;
};
class StreamObject final : public Object {
public:
StreamObject(const NonnullRefPtr<DictObject>& dict, const ReadonlyBytes& bytes)
: m_dict(dict)
, m_bytes(bytes)
{
}
~StreamObject() override = default;
[[nodiscard]] ALWAYS_INLINE NonnullRefPtr<DictObject> dict() const { return m_dict; }
[[nodiscard]] ALWAYS_INLINE const ReadonlyBytes& bytes() const { return m_bytes; }
ALWAYS_INLINE bool is_stream() const override { return true; }
String to_string(int indent) const override;
private:
NonnullRefPtr<DictObject> m_dict;
ReadonlyBytes m_bytes;
};
class IndirectValue final : public Object {
public:
IndirectValue(u32 index, u32 generation_index, const Value& value)
: m_index(index)
, m_value(value)
{
set_generation_index(generation_index);
}
~IndirectValue() override = default;
[[nodiscard]] ALWAYS_INLINE u32 index() const { return m_index; }
[[nodiscard]] ALWAYS_INLINE const Value& value() const { return m_value; }
ALWAYS_INLINE bool is_indirect_value() const override { return true; }
String to_string(int indent) const override;
private:
u32 m_index;
Value m_value;
};
class IndirectValueRef final : public Object {
public:
IndirectValueRef(u32 index, u32 generation_index)
: m_index(index)
{
set_generation_index(generation_index);
}
~IndirectValueRef() override = default;
[[nodiscard]] ALWAYS_INLINE u32 index() const { return m_index; }
ALWAYS_INLINE bool is_indirect_value_ref() const override { return true; }
String to_string(int indent) const override;
private:
u32 m_index;
};
}
namespace AK {
template<PDF::IsObject T>
struct Formatter<T> : Formatter<StringView> {
void format(FormatBuilder& builder, const T& object)
{
Formatter<StringView>::format(builder, object.to_string(0));
}
};
template<PDF::IsObject T>
struct Formatter<NonnullRefPtr<T>> : Formatter<T> {
void format(FormatBuilder& builder, const NonnullRefPtr<T>& object)
{
Formatter<T>::format(builder, *object);
}
};
}

View file

@ -0,0 +1,60 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibPDF/Object.h>
#include <LibPDF/Value.h>
namespace PDF {
Value::~Value()
{
if (is_object())
m_as_object->unref();
}
Value& Value::operator=(const Value& other)
{
m_type = other.m_type;
switch (m_type) {
case Type::Null:
break;
case Type::Bool:
m_as_bool = other.m_as_bool;
break;
case Type::Int:
m_as_int = other.m_as_int;
break;
case Type::Float:
m_as_float = other.m_as_float;
break;
case Type::Object:
m_as_object = other.m_as_object;
if (m_as_object)
m_as_object->ref();
break;
}
return *this;
}
String Value::to_string(int indent) const
{
switch (m_type) {
case Type::Null:
return "null";
case Type::Bool:
return as_bool() ? "true" : "false";
case Type::Int:
return String::number(as_int());
case Type::Float:
return String::number(as_float());
case Type::Object:
return as_object()->to_string(indent);
}
VERIFY_NOT_REACHED();
}
}

View file

@ -0,0 +1,134 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Format.h>
namespace PDF {
class Object;
class Value {
public:
Value()
: m_type(Type::Null)
{
}
Value(bool b)
: m_type(Type::Bool)
{
m_as_bool = b;
}
Value(int i)
: m_type(Type::Int)
{
m_as_int = i;
}
Value(float f)
: m_type(Type::Float)
{
m_as_float = f;
}
template<IsObject T>
Value(NonnullRefPtr<T> obj)
: m_type(Type::Object)
{
obj->ref();
m_as_object = obj;
}
Value(const Value& other)
{
*this = other;
}
~Value();
Value& operator=(const Value& other);
[[nodiscard]] ALWAYS_INLINE bool is_null() const { return m_type == Type::Null; }
[[nodiscard]] ALWAYS_INLINE bool is_bool() const { return m_type == Type::Bool; }
[[nodiscard]] ALWAYS_INLINE bool is_int() const { return m_type == Type::Int; }
[[nodiscard]] ALWAYS_INLINE bool is_float() const { return m_type == Type::Float; }
[[nodiscard]] ALWAYS_INLINE bool is_number() const { return is_int() || is_float(); }
[[nodiscard]] ALWAYS_INLINE bool is_object() const { return m_type == Type::Object; }
[[nodiscard]] ALWAYS_INLINE bool as_bool() const
{
VERIFY(is_bool());
return m_as_bool;
}
[[nodiscard]] ALWAYS_INLINE int as_int() const
{
VERIFY(is_int());
return m_as_int;
}
[[nodiscard]] ALWAYS_INLINE int to_int() const
{
if (is_int())
return as_int();
return static_cast<int>(as_float());
}
[[nodiscard]] ALWAYS_INLINE float as_float() const
{
VERIFY(is_float());
return m_as_float;
}
[[nodiscard]] ALWAYS_INLINE float to_float() const
{
if (is_float())
return as_float();
return static_cast<float>(as_int());
}
[[nodiscard]] ALWAYS_INLINE NonnullRefPtr<Object> as_object() const { return *m_as_object; }
[[nodiscard]] ALWAYS_INLINE explicit operator bool() const { return !is_null(); }
[[nodiscard]] String to_string(int indent = 0) const;
private:
enum class Type {
Null,
Bool,
Int,
Float,
Object,
};
union {
bool m_as_bool;
int m_as_int;
float m_as_float;
Object* m_as_object;
};
Type m_type;
};
}
namespace AK {
template<>
struct Formatter<PDF::Value> : Formatter<StringView> {
void format(FormatBuilder& builder, const PDF::Value& value)
{
Formatter<StringView>::format(builder, value.to_string());
}
};
}