mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-16 05:51:55 +00:00
Text can be rendered in various ways in PDFs: Filled, stroked, both filled and stroked, set as clipping path, hidden, or some combinations thereof. We don't implement any of this at the moment except "filled". Hidden text is used in scanned documents: The image of the scan is drawn in the background, and then OCRd text is "drawn" as hidden on top of the scanned bitmap. That way, the (hidden) text can be selected and copied, and it looks like you're selecting text from the scanned bitmap. Find-in-page also works similarly. (We currently have neither text selection nor find-in-page, but one day we will.) Now that we have pretty good support for CCITT and are growing some support for JBIG2, we now draw both the scanned background image as well as the foreground text. They're not always perfectly aligned. This change makes it so that we don't render text that's marked as hidden. (We still do most of the coordinate math, which will probably come in handy at some point when we implement text selection.) This makes these scanned documents appear as they're supposed to appear (at least in documents where we manage to decode the background bitmap). This also adds a debug option to force rendering of hidden text.
97 lines
4.4 KiB
C++
97 lines
4.4 KiB
C++
/*
|
|
* Copyright (c) 2023, Rodrigo Tobar <rtobarc@gmail.com>.
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <LibGfx/Forward.h>
|
|
#include <LibPDF/CommonNames.h>
|
|
#include <LibPDF/Error.h>
|
|
#include <LibPDF/Fonts/PDFFont.h>
|
|
#include <LibPDF/Fonts/SimpleFont.h>
|
|
#include <LibPDF/Fonts/TrueTypeFont.h>
|
|
#include <LibPDF/Fonts/Type1Font.h>
|
|
#include <LibPDF/Renderer.h>
|
|
|
|
namespace PDF {
|
|
|
|
PDFErrorOr<void> SimpleFont::initialize(Document* document, NonnullRefPtr<DictObject> const& dict, float font_size)
|
|
{
|
|
TRY(PDFFont::initialize(document, dict, font_size));
|
|
if (dict->contains(CommonNames::Encoding)) {
|
|
auto encoding_object = MUST(dict->get_object(document, CommonNames::Encoding));
|
|
m_encoding = TRY(Encoding::from_object(document, encoding_object));
|
|
}
|
|
|
|
if (dict->contains(CommonNames::ToUnicode))
|
|
m_to_unicode = TRY(dict->get_stream(document, CommonNames::ToUnicode));
|
|
|
|
if (dict->contains(CommonNames::FirstChar) && dict->contains(CommonNames::LastChar) && dict->contains(CommonNames::Widths)) {
|
|
auto first_char = dict->get_value(CommonNames::FirstChar).get<int>();
|
|
auto last_char = dict->get_value(CommonNames::LastChar).get<int>();
|
|
auto widths_array = TRY(dict->get_array(document, CommonNames::Widths));
|
|
|
|
VERIFY(widths_array->size() == static_cast<size_t>(last_char - first_char + 1));
|
|
|
|
for (size_t i = 0; i < widths_array->size(); i++)
|
|
m_widths.set(first_char + i, widths_array->at(i).to_int());
|
|
}
|
|
|
|
if (dict->contains(CommonNames::FontDescriptor)) {
|
|
auto descriptor = TRY(dict->get_dict(document, CommonNames::FontDescriptor));
|
|
if (descriptor->contains(CommonNames::MissingWidth))
|
|
m_missing_width = descriptor->get_value(CommonNames::MissingWidth).to_int();
|
|
}
|
|
|
|
return {};
|
|
}
|
|
|
|
PDFErrorOr<Gfx::FloatPoint> SimpleFont::draw_string(Gfx::Painter& painter, Gfx::FloatPoint glyph_position, ByteString const& string, Renderer const& renderer)
|
|
{
|
|
auto horizontal_scaling = renderer.text_state().horizontal_scaling;
|
|
|
|
auto const& text_rendering_matrix = renderer.calculate_text_rendering_matrix();
|
|
|
|
// TrueType fonts are prescaled to text_rendering_matrix.x_scale() * text_state().font_size / horizontal_scaling,
|
|
// cf `Renderer::text_set_font()`. That's the width we get back from `get_glyph_width()` if we use a fallback
|
|
// (or built-in) font. Scale the width size too, so the m_width.get() codepath is consistent.
|
|
auto const font_size = text_rendering_matrix.x_scale() * renderer.text_state().font_size / horizontal_scaling;
|
|
|
|
auto character_spacing = renderer.text_state().character_spacing;
|
|
auto word_spacing = renderer.text_state().word_spacing;
|
|
|
|
for (auto char_code : string.bytes()) {
|
|
// Use the width specified in the font's dictionary if available,
|
|
// and use the default width for the given font otherwise.
|
|
float glyph_width;
|
|
if (auto width = m_widths.get(char_code); width.has_value())
|
|
glyph_width = font_size * width.value() * m_font_matrix.x_scale();
|
|
else if (auto width = get_glyph_width(char_code); width.has_value())
|
|
glyph_width = width.value();
|
|
else
|
|
glyph_width = font_size * m_missing_width * m_font_matrix.x_scale();
|
|
|
|
if (renderer.text_state().rendering_mode != TextRenderingMode::Invisible || renderer.show_hidden_text()) {
|
|
Gfx::FloatPoint glyph_render_position = text_rendering_matrix.map(glyph_position);
|
|
TRY(draw_glyph(painter, glyph_render_position, glyph_width, char_code, renderer));
|
|
}
|
|
|
|
// glyph_width is scaled by `text_rendering_matrix.x_scale() * renderer.text_state().font_size / horizontal_scaling`,
|
|
// but it should only be scaled by `renderer.text_state().font_size`.
|
|
// FIXME: Having to divide here isn't pretty. Refactor things so that this isn't needed.
|
|
auto tx = glyph_width / text_rendering_matrix.x_scale() * horizontal_scaling;
|
|
tx += character_spacing;
|
|
|
|
// ISO 32000 (PDF 2.0), 9.3.3 Wordspacing
|
|
// "Word spacing shall be applied to every occurrence of the single-byte character code 32
|
|
// in a string when using a simple font (including Type 3) or a composite font that defines
|
|
// code 32 as a single-byte code."
|
|
if (char_code == ' ')
|
|
tx += word_spacing;
|
|
|
|
glyph_position += { tx, 0.0f };
|
|
}
|
|
return glyph_position;
|
|
}
|
|
|
|
}
|