LibGUi: Rework GML parser into a lexer+parser

This will make it easier to add GML syntax highlighting. :^)
This commit is contained in:
Andreas Kling 2020-12-21 12:32:27 +01:00
parent 5f51d85184
commit 3d3084f088
Notes: sideshowbarker 2024-07-19 00:41:30 +09:00
4 changed files with 323 additions and 49 deletions

View file

@ -30,6 +30,7 @@ set(SOURCES
FileSystemModel.cpp
FilteringProxyModel.cpp
Frame.cpp
GMLLexer.cpp
GMLParser.cpp
GroupBox.cpp
HeaderView.cpp

View file

@ -0,0 +1,180 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "GMLLexer.h"
#include <AK/Vector.h>
#include <ctype.h>
namespace GUI {
GMLLexer::GMLLexer(const StringView& input)
: m_input(input)
{
}
char GMLLexer::peek(size_t offset) const
{
if ((m_index + offset) >= m_input.length())
return 0;
return m_input[m_index + offset];
}
char GMLLexer::consume()
{
ASSERT(m_index < m_input.length());
char ch = m_input[m_index++];
m_previous_position = m_position;
if (ch == '\n') {
m_position.line++;
m_position.column = 0;
} else {
m_position.column++;
}
return ch;
}
static bool is_valid_identifier_start(char ch)
{
return isalpha(ch) || ch == '_';
}
static bool is_valid_identifier_character(char ch)
{
return isalnum(ch) || ch == '_';
}
static bool is_valid_class_start(char ch)
{
return isalpha(ch) || ch == '_';
}
static bool is_valid_class_character(char ch)
{
return isalnum(ch) || ch == '_' || ch == ':';
}
Vector<GMLToken> GMLLexer::lex()
{
Vector<GMLToken> tokens;
size_t token_start_index = 0;
GMLPosition token_start_position;
auto begin_token = [&] {
token_start_index = m_index;
token_start_position = m_position;
};
auto commit_token = [&](auto type) {
GMLToken token;
token.m_view = m_input.substring_view(token_start_index, m_index - token_start_index);
token.m_type = type;
token.m_start = token_start_position;
token.m_end = m_previous_position;
tokens.append(token);
};
auto consume_class = [&] {
begin_token();
consume();
commit_token(GMLToken::Type::ClassMarker);
begin_token();
while (is_valid_class_character(peek()))
consume();
commit_token(GMLToken::Type::ClassName);
};
while (m_index < m_input.length()) {
if (isspace(peek(0))) {
begin_token();
while (isspace(peek()))
consume();
continue;
}
// C++ style comments
if (peek(0) && peek(0) == '/' && peek(1) == '/') {
begin_token();
while (peek() && peek() != '\n')
consume();
commit_token(GMLToken::Type::Comment);
continue;
}
if (peek(0) == '{') {
begin_token();
consume();
commit_token(GMLToken::Type::LeftCurly);
continue;
}
if (peek(0) == '}') {
begin_token();
consume();
commit_token(GMLToken::Type::RightCurly);
continue;
}
if (peek(0) == '@' && is_valid_class_start(peek(1))) {
consume_class();
continue;
}
if (is_valid_identifier_start(peek(0))) {
begin_token();
consume();
while (is_valid_identifier_character(peek(0)))
consume();
commit_token(GMLToken::Type::Identifier);
continue;
}
if (peek(0) == ':') {
begin_token();
consume();
commit_token(GMLToken::Type::Colon);
while (isspace(peek()))
consume();
if (peek(0) == '@' && is_valid_class_start(peek(1))) {
consume_class();
} else {
begin_token();
while (peek() && peek() != '\n')
consume();
commit_token(GMLToken::Type::JsonValue);
}
continue;
}
consume();
commit_token(GMLToken::Type::Unknown);
}
return tokens;
}
}

View file

@ -0,0 +1,90 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/StringView.h>
namespace GUI {
#define FOR_EACH_TOKEN_TYPE \
__TOKEN(Unknown) \
__TOKEN(Comment) \
__TOKEN(ClassMarker) \
__TOKEN(ClassName) \
__TOKEN(LeftCurly) \
__TOKEN(RightCurly) \
__TOKEN(Identifier) \
__TOKEN(Colon) \
__TOKEN(JsonValue)
struct GMLPosition {
size_t line;
size_t column;
};
struct GMLToken {
enum class Type {
#define __TOKEN(x) x,
FOR_EACH_TOKEN_TYPE
#undef __TOKEN
};
const char* to_string() const
{
switch (m_type) {
#define __TOKEN(x) \
case Type::x: \
return #x;
FOR_EACH_TOKEN_TYPE
#undef __TOKEN
}
ASSERT_NOT_REACHED();
}
Type m_type { Type::Unknown };
StringView m_view;
GMLPosition m_start;
GMLPosition m_end;
};
class GMLLexer {
public:
GMLLexer(const StringView&);
Vector<GMLToken> lex();
private:
char peek(size_t offset = 0) const;
char consume();
StringView m_input;
size_t m_index { 0 };
GMLPosition m_previous_position { 0, 0 };
GMLPosition m_position { 0, 0 };
};
}

View file

@ -27,87 +27,81 @@
#include <AK/GenericLexer.h>
#include <AK/JsonObject.h>
#include <AK/JsonValue.h>
#include <AK/Queue.h>
#include <LibGUI/GMLLexer.h>
#include <LibGUI/GMLParser.h>
#include <ctype.h>
namespace GUI {
static bool is_valid_class_name_character(char ch)
{
return isalpha(ch) || ch == ':';
}
static bool is_valid_property_name_character(char ch)
{
return isalpha(ch) || ch == '_';
}
static void swallow_whitespace(GenericLexer& scanner)
{
scanner.consume_while([](auto ch) { return isspace(ch); });
}
static Optional<JsonValue> parse_core_object(GenericLexer& scanner)
static Optional<JsonValue> parse_core_object(Queue<GMLToken>& tokens)
{
JsonObject object;
JsonArray children;
// '@Foo' means new Core::Object of class Foo
if (!scanner.consume_specific('@')) {
dbgln("Expected '@'");
auto peek = [&] {
if (tokens.is_empty())
return GMLToken::Type::Unknown;
return tokens.head().m_type;
};
if (peek() != GMLToken::Type::ClassMarker) {
dbgln("Expected class marker");
return {};
}
auto class_name = scanner.consume_while([](auto ch) { return is_valid_class_name_character(ch); });
object.set("class", JsonValue(class_name));
tokens.dequeue();
swallow_whitespace(scanner);
if (!scanner.consume_specific('{')) {
dbgln("Expected '{{'");
if (peek() != GMLToken::Type::ClassName) {
dbgln("Expected class name");
return {};
}
swallow_whitespace(scanner);
auto class_name = tokens.dequeue();
object.set("class", JsonValue(class_name.m_view));
if (peek() != GMLToken::Type::LeftCurly) {
dbgln("Expected {{");
return {};
}
tokens.dequeue();
for (;;) {
swallow_whitespace(scanner);
if (scanner.peek() == '}') {
if (peek() == GMLToken::Type::RightCurly) {
// End of object
break;
}
if (scanner.peek() == '@') {
if (peek() == GMLToken::Type::ClassMarker) {
// It's a child object.
auto value = parse_core_object(scanner);
if (!value.has_value())
auto value = parse_core_object(tokens);
if (!value.has_value()) {
dbgln("Parsing child object failed");
return {};
}
if (!value.value().is_object()) {
dbgln("Expected child to be Core::Object");
return {};
}
children.append(value.release_value());
} else {
} else if (peek() == GMLToken::Type::Identifier) {
// It's a property.
auto property_name = scanner.consume_while([](auto ch) { return is_valid_property_name_character(ch); });
swallow_whitespace(scanner);
auto property_name = tokens.dequeue();
if (property_name.is_empty()) {
if (property_name.m_view.is_empty()) {
dbgln("Expected non-empty property name");
return {};
}
if (!scanner.consume_specific(':')) {
if (peek() != GMLToken::Type::Colon) {
dbgln("Expected ':'");
return {};
}
swallow_whitespace(scanner);
tokens.dequeue();
JsonValue value;
if (scanner.peek() == '@') {
auto parsed_value = parse_core_object(scanner);
if (peek() == GMLToken::Type::ClassMarker) {
auto parsed_value = parse_core_object(tokens);
if (!parsed_value.has_value())
return {};
if (!parsed_value.value().is_object()) {
@ -115,23 +109,27 @@ static Optional<JsonValue> parse_core_object(GenericLexer& scanner)
return {};
}
value = parsed_value.release_value();
} else {
auto value_string = scanner.consume_line();
auto parsed_value = JsonValue::from_string(value_string);
} else if (peek() == GMLToken::Type::JsonValue) {
auto value_string = tokens.dequeue();
auto parsed_value = JsonValue::from_string(value_string.m_view);
if (!parsed_value.has_value()) {
dbgln("Expected property to be JSON value");
return {};
}
value = parsed_value.release_value();
}
object.set(property_name, move(value));
object.set(property_name.m_view, move(value));
} else {
dbgln("Expected child, property, or }}");
return {};
}
}
if (!scanner.consume_specific('}')) {
dbgln("Expected '}'");
if (peek() != GMLToken::Type::RightCurly) {
dbgln("Expected }}");
return {};
}
tokens.dequeue();
if (!children.is_empty())
object.set("children", move(children));
@ -141,8 +139,13 @@ static Optional<JsonValue> parse_core_object(GenericLexer& scanner)
JsonValue parse_gml(const StringView& string)
{
GenericLexer scanner(string);
auto root = parse_core_object(scanner);
auto lexer = GMLLexer(string);
Queue<GMLToken> tokens;
for (auto& token : lexer.lex())
tokens.enqueue(token);
auto root = parse_core_object(tokens);
if (!root.has_value())
return JsonValue();