mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-22 12:35:14 +00:00
LibMarkdown: Rewrite Inline text parser to be more forgiving
The previous Text::parse was not able to give up on parsing a textual element, and just leave it as plain text. Because this is a very important part of markdown, I fully rewrote the parser to support this without having to backtrack. Also the parser now some other little features, such ast delimiter runs and flanking.
This commit is contained in:
parent
80e58dab9a
commit
ec9f892899
Notes:
sideshowbarker
2024-07-18 04:10:58 +09:00
Author: https://github.com/petelliott Commit: https://github.com/SerenityOS/serenity/commit/ec9f892899c Pull-request: https://github.com/SerenityOS/serenity/pull/9928 Reviewed-by: https://github.com/BenWiederhake ✅ Reviewed-by: https://github.com/alimpfard
10 changed files with 462 additions and 397 deletions
|
@ -10,51 +10,24 @@
|
|||
|
||||
namespace Markdown {
|
||||
|
||||
Text::Style CodeBlock::style() const
|
||||
{
|
||||
if (m_style_spec.spans().is_empty())
|
||||
return {};
|
||||
return m_style_spec.spans()[0].style;
|
||||
}
|
||||
|
||||
String CodeBlock::style_language() const
|
||||
{
|
||||
if (m_style_spec.spans().is_empty())
|
||||
return {};
|
||||
return m_style_spec.spans()[0].text;
|
||||
}
|
||||
|
||||
String CodeBlock::render_to_html() const
|
||||
{
|
||||
StringBuilder builder;
|
||||
|
||||
String style_language = this->style_language();
|
||||
Text::Style style = this->style();
|
||||
|
||||
builder.append("<pre>");
|
||||
|
||||
if (style.strong)
|
||||
builder.append("<b>");
|
||||
if (style.emph)
|
||||
builder.append("<em>");
|
||||
|
||||
if (style_language.is_empty())
|
||||
if (m_language.is_empty())
|
||||
builder.append("<code>");
|
||||
else
|
||||
builder.appendff("<code class=\"{}\">", escape_html_entities(style_language));
|
||||
builder.appendff("<code class=\"{}\">", escape_html_entities(m_language));
|
||||
|
||||
if (style_language == "js")
|
||||
if (m_language == "js")
|
||||
builder.append(JS::MarkupGenerator::html_from_source(m_code));
|
||||
else
|
||||
builder.append(escape_html_entities(m_code));
|
||||
|
||||
builder.append("\n</code>");
|
||||
|
||||
if (style.emph)
|
||||
builder.append("</em>");
|
||||
if (style.strong)
|
||||
builder.append("</b>");
|
||||
|
||||
builder.append("</pre>\n");
|
||||
|
||||
return builder.build();
|
||||
|
@ -64,28 +37,7 @@ String CodeBlock::render_for_terminal(size_t) const
|
|||
{
|
||||
StringBuilder builder;
|
||||
|
||||
Text::Style style = this->style();
|
||||
bool needs_styling = style.strong || style.emph;
|
||||
if (needs_styling) {
|
||||
builder.append("\033[");
|
||||
bool first = true;
|
||||
if (style.strong) {
|
||||
builder.append('1');
|
||||
first = false;
|
||||
}
|
||||
if (style.emph) {
|
||||
if (!first)
|
||||
builder.append(';');
|
||||
builder.append('4');
|
||||
}
|
||||
builder.append('m');
|
||||
}
|
||||
|
||||
builder.append(m_code);
|
||||
|
||||
if (needs_styling)
|
||||
builder.append("\033[0m");
|
||||
|
||||
builder.append("\n\n");
|
||||
|
||||
return builder.build();
|
||||
|
@ -102,21 +54,7 @@ OwnPtr<CodeBlock> CodeBlock::parse(Vector<StringView>::ConstIterator& lines)
|
|||
if (!line.starts_with(tick_tick_tick))
|
||||
return {};
|
||||
|
||||
// Our Markdown extension: we allow
|
||||
// specifying a style and a language
|
||||
// for a code block, like so:
|
||||
//
|
||||
// ```**sh**
|
||||
// $ echo hello friends!
|
||||
// ````
|
||||
//
|
||||
// The code block will be made bold,
|
||||
// and if possible syntax-highlighted
|
||||
// as appropriate for a shell script.
|
||||
StringView style_spec = line.substring_view(3, line.length() - 3);
|
||||
auto spec = Text::parse(style_spec);
|
||||
if (!spec.has_value())
|
||||
return {};
|
||||
|
||||
++lines;
|
||||
|
||||
|
@ -136,7 +74,7 @@ OwnPtr<CodeBlock> CodeBlock::parse(Vector<StringView>::ConstIterator& lines)
|
|||
first = false;
|
||||
}
|
||||
|
||||
return make<CodeBlock>(move(spec.value()), builder.build());
|
||||
return make<CodeBlock>(style_spec, builder.build());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -14,9 +14,9 @@ namespace Markdown {
|
|||
|
||||
class CodeBlock final : public Block {
|
||||
public:
|
||||
CodeBlock(Text&& style_spec, const String& code)
|
||||
CodeBlock(const String& language, const String& code)
|
||||
: m_code(move(code))
|
||||
, m_style_spec(move(style_spec))
|
||||
, m_language(language)
|
||||
{
|
||||
}
|
||||
virtual ~CodeBlock() override { }
|
||||
|
@ -26,11 +26,8 @@ public:
|
|||
static OwnPtr<CodeBlock> parse(Vector<StringView>::ConstIterator& lines);
|
||||
|
||||
private:
|
||||
String style_language() const;
|
||||
Text::Style style() const;
|
||||
|
||||
String m_code;
|
||||
Text m_style_spec;
|
||||
String m_language;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
|
||||
* Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -75,15 +76,16 @@ OwnPtr<Document> Document::parse(const StringView& str)
|
|||
auto lines = lines_vec.begin();
|
||||
auto document = make<Document>();
|
||||
auto& blocks = document->m_blocks;
|
||||
NonnullOwnPtrVector<Paragraph::Line> paragraph_lines;
|
||||
StringBuilder paragraph_text;
|
||||
|
||||
auto flush_paragraph = [&] {
|
||||
if (paragraph_lines.is_empty())
|
||||
if (paragraph_text.is_empty())
|
||||
return;
|
||||
auto paragraph = make<Paragraph>(move(paragraph_lines));
|
||||
auto paragraph = make<Paragraph>(Text::parse(paragraph_text.build()));
|
||||
document->m_blocks.append(move(paragraph));
|
||||
paragraph_lines.clear();
|
||||
paragraph_text.clear();
|
||||
};
|
||||
|
||||
while (true) {
|
||||
if (lines.is_end())
|
||||
break;
|
||||
|
@ -98,7 +100,7 @@ OwnPtr<Document> Document::parse(const StringView& str)
|
|||
|| helper<Heading>(lines, blocks) || helper<HorizontalRule>(lines, blocks);
|
||||
|
||||
if (any) {
|
||||
if (!paragraph_lines.is_empty()) {
|
||||
if (!paragraph_text.is_empty()) {
|
||||
auto last_block = document->m_blocks.take_last();
|
||||
flush_paragraph();
|
||||
document->m_blocks.append(move(last_block));
|
||||
|
@ -106,15 +108,11 @@ OwnPtr<Document> Document::parse(const StringView& str)
|
|||
continue;
|
||||
}
|
||||
|
||||
auto line = Paragraph::Line::parse(lines);
|
||||
if (!line)
|
||||
return {};
|
||||
|
||||
paragraph_lines.append(line.release_nonnull());
|
||||
paragraph_text.append(*lines++);
|
||||
paragraph_text.append("\n");
|
||||
}
|
||||
|
||||
if (!paragraph_lines.is_empty())
|
||||
flush_paragraph();
|
||||
flush_paragraph();
|
||||
|
||||
return document;
|
||||
}
|
||||
|
|
|
@ -53,10 +53,7 @@ OwnPtr<Heading> Heading::parse(Vector<StringView>::ConstIterator& lines)
|
|||
|
||||
StringView title_view = line.substring_view(level + 1, line.length() - level - 1);
|
||||
auto text = Text::parse(title_view);
|
||||
if (!text.has_value())
|
||||
return {};
|
||||
|
||||
auto heading = make<Heading>(move(text.value()), level);
|
||||
auto heading = make<Heading>(move(text), level);
|
||||
|
||||
++lines;
|
||||
return heading;
|
||||
|
|
|
@ -59,10 +59,7 @@ OwnPtr<List> List::parse(Vector<StringView>::ConstIterator& lines)
|
|||
return true;
|
||||
|
||||
auto text = Text::parse(item_builder.string_view());
|
||||
if (!text.has_value())
|
||||
return false;
|
||||
|
||||
items.append(move(text.value()));
|
||||
items.append(move(text));
|
||||
|
||||
item_builder.clear();
|
||||
return true;
|
||||
|
|
|
@ -13,13 +13,7 @@ String Paragraph::render_to_html() const
|
|||
{
|
||||
StringBuilder builder;
|
||||
builder.append("<p>");
|
||||
bool first = true;
|
||||
for (auto& line : m_lines) {
|
||||
if (!first)
|
||||
builder.append('\n');
|
||||
first = false;
|
||||
builder.append(line.text().render_to_html().trim(" \t"));
|
||||
}
|
||||
builder.append(m_text.render_to_html());
|
||||
builder.append("</p>\n");
|
||||
return builder.build();
|
||||
}
|
||||
|
@ -27,26 +21,9 @@ String Paragraph::render_to_html() const
|
|||
String Paragraph::render_for_terminal(size_t) const
|
||||
{
|
||||
StringBuilder builder;
|
||||
bool first = true;
|
||||
for (auto& line : m_lines) {
|
||||
if (!first)
|
||||
builder.append(' ');
|
||||
first = false;
|
||||
builder.append(line.text().render_for_terminal());
|
||||
}
|
||||
builder.append(m_text.render_for_terminal());
|
||||
builder.append("\n\n");
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
OwnPtr<Paragraph::Line> Paragraph::Line::parse(Vector<StringView>::ConstIterator& lines)
|
||||
{
|
||||
if (lines.is_end())
|
||||
return {};
|
||||
|
||||
auto text = Text::parse(*lines++);
|
||||
if (!text.has_value())
|
||||
return {};
|
||||
|
||||
return make<Paragraph::Line>(text.release_value());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,22 +15,8 @@ namespace Markdown {
|
|||
|
||||
class Paragraph final : public Block {
|
||||
public:
|
||||
class Line {
|
||||
public:
|
||||
explicit Line(Text&& text)
|
||||
: m_text(move(text))
|
||||
{
|
||||
}
|
||||
|
||||
static OwnPtr<Line> parse(Vector<StringView>::ConstIterator& lines);
|
||||
const Text& text() const { return m_text; }
|
||||
|
||||
private:
|
||||
Text m_text;
|
||||
};
|
||||
|
||||
Paragraph(NonnullOwnPtrVector<Line>&& lines)
|
||||
: m_lines(move(lines))
|
||||
Paragraph(Text text)
|
||||
: m_text(move(text))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -40,7 +26,7 @@ public:
|
|||
virtual String render_for_terminal(size_t view_width = 0) const override;
|
||||
|
||||
private:
|
||||
NonnullOwnPtrVector<Line> m_lines;
|
||||
Text m_text;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -16,9 +16,7 @@ String Table::render_for_terminal(size_t view_width) const
|
|||
StringBuilder builder;
|
||||
|
||||
auto write_aligned = [&](const auto& text, auto width, auto alignment) {
|
||||
size_t original_length = 0;
|
||||
for (auto& span : text.spans())
|
||||
original_length += span.text.length();
|
||||
size_t original_length = text.terminal_length();
|
||||
auto string = text.render_for_terminal();
|
||||
if (alignment == Alignment::Center) {
|
||||
auto padding_length = (width - original_length) / 2;
|
||||
|
@ -137,11 +135,8 @@ OwnPtr<Table> Table::parse(Vector<StringView>::ConstIterator& lines)
|
|||
table->m_columns.resize(header_delimiters.size());
|
||||
|
||||
for (size_t i = 0; i < header_segments.size(); ++i) {
|
||||
auto text_option = Text::parse(header_segments[i]);
|
||||
if (!text_option.has_value())
|
||||
return {}; // An invalid 'text' in the header should just fail the table parse.
|
||||
auto text = Text::parse(header_segments[i]);
|
||||
|
||||
auto text = text_option.release_value();
|
||||
auto& column = table->m_columns[i];
|
||||
|
||||
column.header = move(text);
|
||||
|
@ -199,16 +194,10 @@ OwnPtr<Table> Table::parse(Vector<StringView>::ConstIterator& lines)
|
|||
if (i >= segments.size()) {
|
||||
// Ran out of segments, but still have headers.
|
||||
// Just make an empty cell.
|
||||
table->m_columns[i].rows.append(Text { "" });
|
||||
table->m_columns[i].rows.append(Text::parse(""));
|
||||
} else {
|
||||
auto text_option = Text::parse(segments[i]);
|
||||
// We treat an invalid 'text' as a literal.
|
||||
if (text_option.has_value()) {
|
||||
auto text = text_option.release_value();
|
||||
table->m_columns[i].rows.append(move(text));
|
||||
} else {
|
||||
table->m_columns[i].rows.append(Text { segments[i] });
|
||||
}
|
||||
auto text = Text::parse(segments[i]);
|
||||
table->m_columns[i].rows.append(move(text));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
|
||||
* Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -8,268 +9,362 @@
|
|||
#include <AK/ScopeGuard.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <LibMarkdown/Text.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace Markdown {
|
||||
|
||||
static String unescape(const StringView& text)
|
||||
void Text::EmphasisNode::render_to_html(StringBuilder& builder) const
|
||||
{
|
||||
StringBuilder builder;
|
||||
for (size_t i = 0; i < text.length(); ++i) {
|
||||
if (text[i] == '\\' && i != text.length() - 1) {
|
||||
builder.append(text[i + 1]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
builder.append(text[i]);
|
||||
}
|
||||
return builder.build();
|
||||
builder.append((strong) ? "<strong>" : "<em>");
|
||||
child->render_to_html(builder);
|
||||
builder.append((strong) ? "</strong>" : "</em>");
|
||||
}
|
||||
|
||||
Text::Text(String&& text)
|
||||
void Text::EmphasisNode::render_for_terminal(StringBuilder&) const
|
||||
{
|
||||
m_spans.append({ move(text), Style {} });
|
||||
// FIXME.
|
||||
}
|
||||
|
||||
size_t Text::EmphasisNode::terminal_length() const
|
||||
{
|
||||
return child->terminal_length();
|
||||
}
|
||||
|
||||
void Text::CodeNode::render_to_html(StringBuilder& builder) const
|
||||
{
|
||||
builder.append("<code>");
|
||||
code->render_to_html(builder);
|
||||
builder.append("</code>");
|
||||
}
|
||||
|
||||
void Text::CodeNode::render_for_terminal(StringBuilder&) const
|
||||
{
|
||||
// FIXME.
|
||||
}
|
||||
|
||||
size_t Text::CodeNode::terminal_length() const
|
||||
{
|
||||
return code->terminal_length();
|
||||
}
|
||||
|
||||
void Text::TextNode::render_to_html(StringBuilder& builder) const
|
||||
{
|
||||
builder.append(escape_html_entities(text));
|
||||
}
|
||||
|
||||
void Text::TextNode::render_for_terminal(StringBuilder&) const
|
||||
{
|
||||
// FIXME.
|
||||
}
|
||||
|
||||
size_t Text::TextNode::terminal_length() const
|
||||
{
|
||||
return text.length();
|
||||
}
|
||||
|
||||
void Text::LinkNode::render_to_html(StringBuilder& builder) const
|
||||
{
|
||||
if (is_image) {
|
||||
builder.append("<img src=\"");
|
||||
href->render_to_html(builder);
|
||||
builder.append("\" alt=\"");
|
||||
text->render_to_html(builder);
|
||||
builder.append("\" >");
|
||||
} else {
|
||||
builder.append("<a href=\"");
|
||||
href->render_to_html(builder);
|
||||
builder.append("\">");
|
||||
text->render_to_html(builder);
|
||||
builder.append("</a>");
|
||||
}
|
||||
}
|
||||
|
||||
void Text::LinkNode::render_for_terminal(StringBuilder&) const
|
||||
{
|
||||
// FIXME.
|
||||
}
|
||||
|
||||
size_t Text::LinkNode::terminal_length() const
|
||||
{
|
||||
return text->terminal_length();
|
||||
}
|
||||
|
||||
void Text::MultiNode::render_to_html(StringBuilder& builder) const
|
||||
{
|
||||
for (auto& child : children) {
|
||||
child.render_to_html(builder);
|
||||
}
|
||||
}
|
||||
|
||||
void Text::MultiNode::render_for_terminal(StringBuilder&) const
|
||||
{
|
||||
// FIXME.
|
||||
}
|
||||
|
||||
size_t Text::MultiNode::terminal_length() const
|
||||
{
|
||||
size_t length = 0;
|
||||
for (auto& child : children) {
|
||||
length += child.terminal_length();
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
size_t Text::terminal_length() const
|
||||
{
|
||||
return m_node->terminal_length();
|
||||
}
|
||||
|
||||
String Text::render_to_html() const
|
||||
{
|
||||
StringBuilder builder;
|
||||
|
||||
Vector<String> open_tags;
|
||||
Style current_style;
|
||||
|
||||
for (auto& span : m_spans) {
|
||||
struct TagAndFlag {
|
||||
String tag;
|
||||
bool Style::*flag;
|
||||
};
|
||||
TagAndFlag tags_and_flags[] = {
|
||||
{ "em", &Style::emph },
|
||||
{ "b", &Style::strong },
|
||||
{ "code", &Style::code }
|
||||
};
|
||||
auto it = open_tags.find_if([&](const String& open_tag) {
|
||||
if (open_tag == "a" && current_style.href != span.style.href)
|
||||
return true;
|
||||
if (open_tag == "img" && current_style.img != span.style.img)
|
||||
return true;
|
||||
for (auto& tag_and_flag : tags_and_flags) {
|
||||
if (open_tag == tag_and_flag.tag && !(span.style.*tag_and_flag.flag))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (!it.is_end()) {
|
||||
// We found an open tag that should
|
||||
// not be open for the new span. Close
|
||||
// it and all the open tags that follow
|
||||
// it.
|
||||
for (ssize_t j = open_tags.size() - 1; j >= static_cast<ssize_t>(it.index()); --j) {
|
||||
auto& tag = open_tags[j];
|
||||
if (tag == "img") {
|
||||
builder.append("\" />");
|
||||
current_style.img = {};
|
||||
continue;
|
||||
}
|
||||
builder.appendff("</{}>", tag);
|
||||
if (tag == "a") {
|
||||
current_style.href = {};
|
||||
continue;
|
||||
}
|
||||
for (auto& tag_and_flag : tags_and_flags)
|
||||
if (tag == tag_and_flag.tag)
|
||||
current_style.*tag_and_flag.flag = false;
|
||||
}
|
||||
open_tags.shrink(it.index());
|
||||
}
|
||||
if (current_style.href.is_null() && !span.style.href.is_null()) {
|
||||
open_tags.append("a");
|
||||
builder.appendff("<a href=\"{}\">", span.style.href);
|
||||
}
|
||||
if (current_style.img.is_null() && !span.style.img.is_null()) {
|
||||
open_tags.append("img");
|
||||
builder.appendff("<img src=\"{}\" alt=\"", span.style.img);
|
||||
}
|
||||
for (auto& tag_and_flag : tags_and_flags) {
|
||||
if (current_style.*tag_and_flag.flag != span.style.*tag_and_flag.flag) {
|
||||
open_tags.append(tag_and_flag.tag);
|
||||
builder.appendff("<{}>", tag_and_flag.tag);
|
||||
}
|
||||
}
|
||||
|
||||
current_style = span.style;
|
||||
builder.append(escape_html_entities(span.text));
|
||||
}
|
||||
|
||||
for (ssize_t i = open_tags.size() - 1; i >= 0; --i) {
|
||||
auto& tag = open_tags[i];
|
||||
if (tag == "img") {
|
||||
builder.append("\" />");
|
||||
continue;
|
||||
}
|
||||
builder.appendff("</{}>", tag);
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
m_node->render_to_html(builder);
|
||||
return builder.build().trim(" \n\t");
|
||||
}
|
||||
|
||||
String Text::render_for_terminal() const
|
||||
{
|
||||
StringBuilder builder;
|
||||
|
||||
for (auto& span : m_spans) {
|
||||
bool needs_styling = span.style.strong || span.style.emph || span.style.code;
|
||||
if (needs_styling) {
|
||||
builder.append("\033[");
|
||||
bool first = true;
|
||||
if (span.style.strong || span.style.code) {
|
||||
builder.append('1');
|
||||
first = false;
|
||||
}
|
||||
if (span.style.emph) {
|
||||
if (!first)
|
||||
builder.append(';');
|
||||
builder.append('4');
|
||||
}
|
||||
builder.append('m');
|
||||
}
|
||||
|
||||
if (!span.style.href.is_null()) {
|
||||
if (strstr(span.style.href.characters(), "://") != nullptr) {
|
||||
builder.append("\033]8;;");
|
||||
builder.append(span.style.href);
|
||||
builder.append("\033\\");
|
||||
}
|
||||
}
|
||||
|
||||
builder.append(span.text.characters());
|
||||
|
||||
if (needs_styling)
|
||||
builder.append("\033[0m");
|
||||
|
||||
if (!span.style.href.is_null()) {
|
||||
// When rendering for the terminal, ignore any
|
||||
// non-absolute links, because the user has no
|
||||
// chance to follow them anyway.
|
||||
if (strstr(span.style.href.characters(), "://") != nullptr) {
|
||||
builder.appendff(" <{}>", span.style.href);
|
||||
builder.append("\033]8;;\033\\");
|
||||
}
|
||||
}
|
||||
if (!span.style.img.is_null()) {
|
||||
if (strstr(span.style.img.characters(), "://") != nullptr) {
|
||||
builder.appendff(" <{}>", span.style.img);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
m_node->render_for_terminal(builder);
|
||||
return builder.build().trim(" \n\t");
|
||||
}
|
||||
|
||||
Optional<Text> Text::parse(const StringView& str)
|
||||
Text Text::parse(StringView const& str)
|
||||
{
|
||||
Style current_style;
|
||||
size_t current_span_start = 0;
|
||||
int first_span_in_the_current_link = -1;
|
||||
bool current_link_is_actually_img = false;
|
||||
Vector<Span> spans;
|
||||
Text text;
|
||||
auto const tokens = tokenize(str);
|
||||
auto iterator = tokens.begin();
|
||||
text.m_node = parse_sequence(iterator, false);
|
||||
return text;
|
||||
}
|
||||
|
||||
auto append_span_if_needed = [&](size_t offset) {
|
||||
VERIFY(current_span_start <= offset);
|
||||
if (current_span_start != offset) {
|
||||
Span span {
|
||||
unescape(str.substring_view(current_span_start, offset - current_span_start)),
|
||||
current_style
|
||||
};
|
||||
spans.append(move(span));
|
||||
current_span_start = offset;
|
||||
}
|
||||
Vector<Text::Token> Text::tokenize(StringView const& str)
|
||||
{
|
||||
Vector<Token> tokens;
|
||||
StringBuilder current_token;
|
||||
|
||||
auto flush_token = [&](bool left_flanking, bool right_flanking, bool is_run) {
|
||||
if (current_token.is_empty())
|
||||
return;
|
||||
|
||||
tokens.append({
|
||||
current_token.build(),
|
||||
left_flanking,
|
||||
right_flanking,
|
||||
is_run,
|
||||
});
|
||||
current_token.clear();
|
||||
};
|
||||
|
||||
for (size_t offset = 0; offset < str.length(); offset++) {
|
||||
for (size_t offset = 0; offset < str.length(); ++offset) {
|
||||
auto has = [&](StringView const& seq) {
|
||||
if (offset + seq.length() > str.length())
|
||||
return false;
|
||||
|
||||
return str.substring_view(offset, seq.length()) == seq;
|
||||
};
|
||||
|
||||
auto expect = [&](StringView const& seq) {
|
||||
VERIFY(has(seq));
|
||||
flush_token(false, false, false);
|
||||
current_token.append(seq);
|
||||
flush_token(false, false, false);
|
||||
offset += seq.length() - 1;
|
||||
};
|
||||
|
||||
char ch = str[offset];
|
||||
|
||||
bool is_escape = ch == '\\';
|
||||
if (is_escape && offset != str.length() - 1) {
|
||||
offset++;
|
||||
continue;
|
||||
if (ch == '\\' && offset + 1 < str.length()) {
|
||||
current_token.append(str[offset + 1]);
|
||||
++offset;
|
||||
} else if (ch == '*' || ch == '_' || ch == '`') {
|
||||
flush_token(false, false, false);
|
||||
|
||||
char delim = ch;
|
||||
size_t run_offset;
|
||||
for (run_offset = offset; run_offset < str.length() && str[run_offset] == delim; ++run_offset) {
|
||||
current_token.append(str[run_offset]);
|
||||
}
|
||||
|
||||
bool left_flanking = run_offset < str.length() && !isspace(str[run_offset]);
|
||||
bool right_flanking = offset > 0 && !isspace(str[offset - 1]);
|
||||
flush_token(left_flanking, right_flanking, true);
|
||||
offset = run_offset - 1;
|
||||
|
||||
} else if (ch == '\n') {
|
||||
flush_token(false, false, false);
|
||||
current_token.append(ch);
|
||||
flush_token(false, false, false);
|
||||
} else if (has("[")) {
|
||||
expect("[");
|
||||
} else if (has(") {
|
||||
expect("](");
|
||||
} else if (has(")")) {
|
||||
expect(")");
|
||||
} else {
|
||||
current_token.append(ch);
|
||||
}
|
||||
}
|
||||
flush_token(false, false, false);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
NonnullOwnPtr<Text::MultiNode> Text::parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link)
|
||||
{
|
||||
auto node = make<MultiNode>();
|
||||
|
||||
for (; !tokens.is_end(); ++tokens) {
|
||||
if (tokens->is_run) {
|
||||
switch (tokens->run_char()) {
|
||||
case '*':
|
||||
case '_':
|
||||
node->children.append(parse_emph(tokens, in_link));
|
||||
break;
|
||||
case '`':
|
||||
node->children.append(parse_code(tokens));
|
||||
break;
|
||||
}
|
||||
} else if (!in_link && (*tokens == "[" || *tokens == " {
|
||||
return node;
|
||||
} else {
|
||||
node->children.append(make<TextNode>(tokens->data));
|
||||
}
|
||||
|
||||
bool is_special_character = false;
|
||||
is_special_character |= ch == '`';
|
||||
if (!current_style.code)
|
||||
is_special_character |= ch == '*' || ch == '_' || ch == '[' || ch == ']' || (ch == '!' && offset + 1 < str.length() && str[offset + 1] == '[');
|
||||
if (!is_special_character)
|
||||
continue;
|
||||
if (in_link && !tokens.is_end() && *tokens == "](")
|
||||
return node;
|
||||
|
||||
append_span_if_needed(offset);
|
||||
if (tokens.is_end())
|
||||
break;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
switch (ch) {
|
||||
case '`':
|
||||
current_style.code = !current_style.code;
|
||||
break;
|
||||
case '*':
|
||||
case '_':
|
||||
if (offset + 1 < str.length() && str[offset + 1] == ch) {
|
||||
offset++;
|
||||
current_style.strong = !current_style.strong;
|
||||
} else {
|
||||
current_style.emph = !current_style.emph;
|
||||
}
|
||||
break;
|
||||
case '!':
|
||||
current_link_is_actually_img = true;
|
||||
break;
|
||||
case '[':
|
||||
if constexpr (MARKDOWN_DEBUG) {
|
||||
if (first_span_in_the_current_link != -1)
|
||||
dbgln("Dropping the outer link");
|
||||
}
|
||||
first_span_in_the_current_link = spans.size();
|
||||
break;
|
||||
case ']': {
|
||||
if (first_span_in_the_current_link == -1) {
|
||||
dbgln_if(MARKDOWN_DEBUG, "Unmatched ]");
|
||||
continue;
|
||||
}
|
||||
ScopeGuard guard = [&] {
|
||||
first_span_in_the_current_link = -1;
|
||||
current_link_is_actually_img = false;
|
||||
};
|
||||
if (offset + 2 >= str.length() || str[offset + 1] != '(')
|
||||
continue;
|
||||
offset += 2;
|
||||
size_t start_of_href = offset;
|
||||
bool Text::can_open(Token const& opening)
|
||||
{
|
||||
return (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && !opening.right_flanking);
|
||||
}
|
||||
|
||||
do
|
||||
offset++;
|
||||
while (offset < str.length() && str[offset] != ')');
|
||||
if (offset == str.length())
|
||||
offset--;
|
||||
bool Text::can_close_for(Token const& opening, Text::Token const& closing)
|
||||
{
|
||||
if (opening.run_char() != closing.run_char())
|
||||
return false;
|
||||
|
||||
const StringView href = str.substring_view(start_of_href, offset - start_of_href);
|
||||
for (size_t i = first_span_in_the_current_link; i < spans.size(); i++) {
|
||||
if (current_link_is_actually_img)
|
||||
spans[i].style.img = href;
|
||||
else
|
||||
spans[i].style.href = href;
|
||||
if (opening.run_length() != closing.run_length())
|
||||
return false;
|
||||
|
||||
return (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && !closing.left_flanking && closing.right_flanking);
|
||||
}
|
||||
|
||||
NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link)
|
||||
{
|
||||
auto opening = *tokens;
|
||||
|
||||
// Check that the opening delimiter run is properly flanking.
|
||||
if (!can_open(opening))
|
||||
return make<TextNode>(opening.data);
|
||||
|
||||
auto child = make<MultiNode>();
|
||||
for (++tokens; !tokens.is_end(); ++tokens) {
|
||||
if (tokens->is_run) {
|
||||
if (can_close_for(opening, *tokens)) {
|
||||
return make<EmphasisNode>(opening.run_length() >= 2, move(child));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
|
||||
switch (tokens->run_char()) {
|
||||
case '*':
|
||||
case '_':
|
||||
child->children.append(parse_emph(tokens, in_link));
|
||||
break;
|
||||
case '`':
|
||||
child->children.append(parse_code(tokens));
|
||||
break;
|
||||
}
|
||||
} else if (*tokens == "[" || *tokens == " {
|
||||
child->children.prepend(make<TextNode>(opening.data));
|
||||
return child;
|
||||
} else {
|
||||
child->children.append(make<TextNode>(tokens->data));
|
||||
}
|
||||
|
||||
// We've processed the character as a special, so the next offset will
|
||||
// start after it. Note that explicit continue statements skip over this
|
||||
// line, effectively treating the character as not special.
|
||||
current_span_start = offset + 1;
|
||||
if (in_link && !tokens.is_end() && *tokens == "](") {
|
||||
child->children.prepend(make<TextNode>(opening.data));
|
||||
return child;
|
||||
}
|
||||
|
||||
if (tokens.is_end())
|
||||
break;
|
||||
}
|
||||
child->children.prepend(make<TextNode>(opening.data));
|
||||
return child;
|
||||
}
|
||||
|
||||
NonnullOwnPtr<Text::Node> Text::parse_code(Vector<Token>::ConstIterator& tokens)
|
||||
{
|
||||
auto opening = *tokens;
|
||||
|
||||
auto is_closing = [&](Token const& token) {
|
||||
return token.is_run && token.run_char() == '`' && token.run_length() == opening.run_length();
|
||||
};
|
||||
|
||||
bool is_all_whitespace = true;
|
||||
auto code = make<MultiNode>();
|
||||
for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
|
||||
if (is_closing(*iterator)) {
|
||||
tokens = iterator;
|
||||
|
||||
// Strip first and last space, when appropriate.
|
||||
if (!is_all_whitespace) {
|
||||
auto& first = dynamic_cast<TextNode&>(code->children.first());
|
||||
auto& last = dynamic_cast<TextNode&>(code->children.last());
|
||||
if (first.text.starts_with(" ") && last.text.ends_with(" ")) {
|
||||
first.text = first.text.substring(1);
|
||||
last.text = last.text.substring(0, last.text.length() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
return make<CodeNode>(move(code));
|
||||
}
|
||||
|
||||
is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
|
||||
code->children.append(make<TextNode>((*iterator == "\n") ? " " : iterator->data));
|
||||
}
|
||||
|
||||
append_span_if_needed(str.length());
|
||||
|
||||
return Text(move(spans));
|
||||
return make<TextNode>(opening.data);
|
||||
}
|
||||
|
||||
NonnullOwnPtr<Text::Node> Text::parse_link(Vector<Token>::ConstIterator& tokens)
|
||||
{
|
||||
auto opening = *tokens++;
|
||||
bool is_image = opening == " {
|
||||
link_text->children.prepend(make<TextNode>(opening.data));
|
||||
return link_text;
|
||||
}
|
||||
auto seperator = *tokens;
|
||||
VERIFY(seperator == "](");
|
||||
|
||||
auto address = make<MultiNode>();
|
||||
for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
|
||||
if (*iterator == ")") {
|
||||
tokens = iterator;
|
||||
return make<LinkNode>(is_image, move(link_text), move(address));
|
||||
}
|
||||
|
||||
address->children.append(make<TextNode>(iterator->data));
|
||||
}
|
||||
|
||||
link_text->children.prepend(make<TextNode>(opening.data));
|
||||
link_text->children.append(make<TextNode>(seperator.data));
|
||||
return link_text;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
|
||||
* Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -7,48 +8,138 @@
|
|||
#pragma once
|
||||
|
||||
#include <AK/Noncopyable.h>
|
||||
#include <AK/NonnullOwnPtrVector.h>
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Markdown {
|
||||
|
||||
class Text final {
|
||||
AK_MAKE_NONCOPYABLE(Text);
|
||||
|
||||
public:
|
||||
struct Style {
|
||||
bool emph { false };
|
||||
bool strong { false };
|
||||
bool code { false };
|
||||
String href;
|
||||
String img;
|
||||
class Node {
|
||||
public:
|
||||
virtual void render_to_html(StringBuilder& builder) const = 0;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const = 0;
|
||||
virtual size_t terminal_length() const = 0;
|
||||
|
||||
virtual ~Node() { }
|
||||
};
|
||||
|
||||
struct Span {
|
||||
class EmphasisNode : public Node {
|
||||
public:
|
||||
bool strong;
|
||||
NonnullOwnPtr<Node> child;
|
||||
|
||||
EmphasisNode(bool strong, NonnullOwnPtr<Node> child)
|
||||
: strong(strong)
|
||||
, child(move(child))
|
||||
{
|
||||
}
|
||||
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
class CodeNode : public Node {
|
||||
public:
|
||||
NonnullOwnPtr<Node> code;
|
||||
|
||||
CodeNode(NonnullOwnPtr<Node> code)
|
||||
: code(move(code))
|
||||
{
|
||||
}
|
||||
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
class TextNode : public Node {
|
||||
public:
|
||||
String text;
|
||||
Style style;
|
||||
|
||||
TextNode(StringView const& text)
|
||||
: text(text)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
explicit Text(String&& text);
|
||||
Text(Text&& text) = default;
|
||||
Text() = default;
|
||||
class LinkNode : public Node {
|
||||
public:
|
||||
bool is_image;
|
||||
NonnullOwnPtr<Node> text;
|
||||
NonnullOwnPtr<Node> href;
|
||||
|
||||
Text& operator=(Text&&) = default;
|
||||
LinkNode(bool is_image, NonnullOwnPtr<Node> text, NonnullOwnPtr<Node> href)
|
||||
: is_image(is_image)
|
||||
, text(move(text))
|
||||
, href(move(href))
|
||||
{
|
||||
}
|
||||
|
||||
const Vector<Span>& spans() const { return m_spans; }
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
class MultiNode : public Node {
|
||||
public:
|
||||
NonnullOwnPtrVector<Node> children;
|
||||
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
size_t terminal_length() const;
|
||||
|
||||
String render_to_html() const;
|
||||
String render_for_terminal() const;
|
||||
|
||||
static Optional<Text> parse(const StringView&);
|
||||
static Text parse(StringView const&);
|
||||
|
||||
private:
|
||||
Text(Vector<Span>&& spans)
|
||||
: m_spans(move(spans))
|
||||
{
|
||||
}
|
||||
struct Token {
|
||||
String data;
|
||||
// Flanking basically means that a delimiter run has a non-whitespace,
|
||||
// non-punctuation character on the corresponsing side. For a more exact
|
||||
// definition, see the CommonMark spec.
|
||||
bool left_flanking;
|
||||
bool right_flanking;
|
||||
// is_run indicates that this token is a 'delimiter run'. A delimiter
|
||||
// run occurs when several of the same sytactical character ('`', '_',
|
||||
// or '*') occur in a row.
|
||||
bool is_run;
|
||||
|
||||
Vector<Span> m_spans;
|
||||
char run_char() const
|
||||
{
|
||||
VERIFY(is_run);
|
||||
return data[0];
|
||||
}
|
||||
char run_length() const
|
||||
{
|
||||
VERIFY(is_run);
|
||||
return data.length();
|
||||
}
|
||||
bool operator==(StringView const& str) const { return str == data; }
|
||||
};
|
||||
|
||||
static Vector<Token> tokenize(StringView const&);
|
||||
|
||||
static bool can_open(Token const& opening);
|
||||
static bool can_close_for(Token const& opening, Token const& closing);
|
||||
|
||||
static NonnullOwnPtr<MultiNode> parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link);
|
||||
static NonnullOwnPtr<Node> parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link);
|
||||
static NonnullOwnPtr<Node> parse_code(Vector<Token>::ConstIterator& tokens);
|
||||
static NonnullOwnPtr<Node> parse_link(Vector<Token>::ConstIterator& tokens);
|
||||
|
||||
OwnPtr<Node> m_node;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue