LibPDF: Handle indirect reference resolving during parsing more robustly

If `Document::resolve()` was called during parsing, it'd change the
reader's current position, so the parsing code that called it would
then end up at an unexpected position in the file.

Parser.cpp already had special-case recovery when a stream's length
was stored in an indirect reference.

Commit ead02da98ac70c ("/JBIG2Globals") in #23503 added another case
where we could resolve indirect reference during parsing, but wasn't
aware of having to save and restore the reader position for that.

Put the save/restore code in `DocumentParser::parse_object_with_index`
instead, right before the place that ultimately changes the reader's
position during `Document::resolve`. This fixes `/JBIG2Globals` and
lets us remove the special-case code for `/Length` handling.

Since this is kind of subtle, include a test.
This commit is contained in:
Nico Weber 2024-03-19 08:40:38 -04:00 committed by Tim Flynn
commit 0374c1eb3b
Notes: sideshowbarker 2024-07-16 23:23:26 +09:00
4 changed files with 33 additions and 2 deletions

View file

@ -12,6 +12,7 @@ set(TEST_FILES
complex.pdf complex.pdf
encoding.pdf encoding.pdf
encryption_nocopy.pdf encryption_nocopy.pdf
jbig2-globals.pdf
linearized.pdf linearized.pdf
non-linearized.pdf non-linearized.pdf
oss-fuzz-testcase-62065.pdf oss-fuzz-testcase-62065.pdf

View file

@ -121,6 +121,18 @@ TEST_CASE(encrypted_object_stream)
EXPECT_EQ(MUST(info_dict.creator()).value(), "Acrobat PDFMaker 9.1 voor Word"); EXPECT_EQ(MUST(info_dict.creator()).value(), "Acrobat PDFMaker 9.1 voor Word");
} }
TEST_CASE(resolve_indirect_reference_during_parsing)
{
auto file = MUST(Core::MappedFile::map("jbig2-globals.pdf"sv));
auto document = MUST(PDF::Document::create(file->bytes()));
MUST(document->initialize());
EXPECT_EQ(document->get_page_count(), 1U);
auto jbig2_stream_value = MUST(document->get_or_load_value(5));
auto jbig2_stream = MUST(document->resolve_to<PDF::StreamObject>(jbig2_stream_value));
EXPECT_EQ(jbig2_stream->bytes().size(), 20'000U);
}
TEST_CASE(malformed_pdf_document) TEST_CASE(malformed_pdf_document)
{ {
Array test_inputs = { Array test_inputs = {

View file

@ -70,11 +70,31 @@ PDFErrorOr<Value> DocumentParser::parse_object_with_index(u32 index)
if (!m_xref_table->is_object_in_use(index)) if (!m_xref_table->is_object_in_use(index))
return nullptr; return nullptr;
// If this is called to resolve an indirect object reference while parsing another object,
// make sure to restore the current position after parsing the indirect object, so that the
// parser can keep parsing the original object stream afterwards.
// parse_compressed_object_with_index() also moves the reader's position, so this needs
// to be before the potential call to parse_compressed_object_with_index().
class SavePoint {
public:
SavePoint(Reader& reader)
: m_reader(reader)
{
m_reader.save();
}
~SavePoint() { m_reader.load(); }
private:
Reader& m_reader;
};
SavePoint restore_current_position { m_reader };
if (m_xref_table->is_object_compressed(index)) if (m_xref_table->is_object_compressed(index))
// The object can be found in a object stream // The object can be found in a object stream
return parse_compressed_object_with_index(index); return parse_compressed_object_with_index(index);
auto byte_offset = m_xref_table->byte_offset_for_object(index); auto byte_offset = m_xref_table->byte_offset_for_object(index);
m_reader.move_to(byte_offset); m_reader.move_to(byte_offset);
auto indirect_value = TRY(parse_indirect_value()); auto indirect_value = TRY(parse_indirect_value());
VERIFY(indirect_value->index() == index); VERIFY(indirect_value->index() == index);

View file

@ -483,9 +483,7 @@ PDFErrorOr<NonnullRefPtr<StreamObject>> Parser::parse_stream(NonnullRefPtr<DictO
auto maybe_length = dict->get(CommonNames::Length); auto maybe_length = dict->get(CommonNames::Length);
if (maybe_length.has_value() && m_document->can_resolve_references()) { if (maybe_length.has_value() && m_document->can_resolve_references()) {
// The PDF writer has kindly provided us with the direct length of the stream // The PDF writer has kindly provided us with the direct length of the stream
m_reader.save();
auto length = TRY(m_document->resolve_to<int>(maybe_length.value())); auto length = TRY(m_document->resolve_to<int>(maybe_length.value()));
m_reader.load();
bytes = m_reader.bytes().slice(m_reader.offset(), length); bytes = m_reader.bytes().slice(m_reader.offset(), length);
m_reader.move_by(length); m_reader.move_by(length);
m_reader.consume_whitespace(); m_reader.consume_whitespace();