mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-16 05:51:55 +00:00
LibGUi: Rework GML parser into a lexer+parser
This will make it easier to add GML syntax highlighting. :^)
This commit is contained in:
parent
5f51d85184
commit
3d3084f088
Notes:
sideshowbarker
2024-07-19 00:41:30 +09:00
Author: https://github.com/awesomekling
Commit: 3d3084f088
4 changed files with 323 additions and 49 deletions
|
@ -30,6 +30,7 @@ set(SOURCES
|
|||
FileSystemModel.cpp
|
||||
FilteringProxyModel.cpp
|
||||
Frame.cpp
|
||||
GMLLexer.cpp
|
||||
GMLParser.cpp
|
||||
GroupBox.cpp
|
||||
HeaderView.cpp
|
||||
|
|
180
Libraries/LibGUI/GMLLexer.cpp
Normal file
180
Libraries/LibGUI/GMLLexer.cpp
Normal file
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "GMLLexer.h"
|
||||
#include <AK/Vector.h>
|
||||
#include <ctype.h>
|
||||
|
||||
namespace GUI {
|
||||
|
||||
GMLLexer::GMLLexer(const StringView& input)
|
||||
: m_input(input)
|
||||
{
|
||||
}
|
||||
|
||||
char GMLLexer::peek(size_t offset) const
|
||||
{
|
||||
if ((m_index + offset) >= m_input.length())
|
||||
return 0;
|
||||
return m_input[m_index + offset];
|
||||
}
|
||||
|
||||
char GMLLexer::consume()
|
||||
{
|
||||
ASSERT(m_index < m_input.length());
|
||||
char ch = m_input[m_index++];
|
||||
m_previous_position = m_position;
|
||||
if (ch == '\n') {
|
||||
m_position.line++;
|
||||
m_position.column = 0;
|
||||
} else {
|
||||
m_position.column++;
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
static bool is_valid_identifier_start(char ch)
|
||||
{
|
||||
return isalpha(ch) || ch == '_';
|
||||
}
|
||||
|
||||
static bool is_valid_identifier_character(char ch)
|
||||
{
|
||||
return isalnum(ch) || ch == '_';
|
||||
}
|
||||
|
||||
static bool is_valid_class_start(char ch)
|
||||
{
|
||||
return isalpha(ch) || ch == '_';
|
||||
}
|
||||
|
||||
static bool is_valid_class_character(char ch)
|
||||
{
|
||||
return isalnum(ch) || ch == '_' || ch == ':';
|
||||
}
|
||||
|
||||
Vector<GMLToken> GMLLexer::lex()
|
||||
{
|
||||
Vector<GMLToken> tokens;
|
||||
|
||||
size_t token_start_index = 0;
|
||||
GMLPosition token_start_position;
|
||||
|
||||
auto begin_token = [&] {
|
||||
token_start_index = m_index;
|
||||
token_start_position = m_position;
|
||||
};
|
||||
|
||||
auto commit_token = [&](auto type) {
|
||||
GMLToken token;
|
||||
token.m_view = m_input.substring_view(token_start_index, m_index - token_start_index);
|
||||
token.m_type = type;
|
||||
token.m_start = token_start_position;
|
||||
token.m_end = m_previous_position;
|
||||
tokens.append(token);
|
||||
};
|
||||
|
||||
auto consume_class = [&] {
|
||||
begin_token();
|
||||
consume();
|
||||
commit_token(GMLToken::Type::ClassMarker);
|
||||
begin_token();
|
||||
while (is_valid_class_character(peek()))
|
||||
consume();
|
||||
commit_token(GMLToken::Type::ClassName);
|
||||
};
|
||||
|
||||
while (m_index < m_input.length()) {
|
||||
if (isspace(peek(0))) {
|
||||
begin_token();
|
||||
while (isspace(peek()))
|
||||
consume();
|
||||
continue;
|
||||
}
|
||||
|
||||
// C++ style comments
|
||||
if (peek(0) && peek(0) == '/' && peek(1) == '/') {
|
||||
begin_token();
|
||||
while (peek() && peek() != '\n')
|
||||
consume();
|
||||
commit_token(GMLToken::Type::Comment);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (peek(0) == '{') {
|
||||
begin_token();
|
||||
consume();
|
||||
commit_token(GMLToken::Type::LeftCurly);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (peek(0) == '}') {
|
||||
begin_token();
|
||||
consume();
|
||||
commit_token(GMLToken::Type::RightCurly);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (peek(0) == '@' && is_valid_class_start(peek(1))) {
|
||||
consume_class();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_valid_identifier_start(peek(0))) {
|
||||
begin_token();
|
||||
consume();
|
||||
while (is_valid_identifier_character(peek(0)))
|
||||
consume();
|
||||
commit_token(GMLToken::Type::Identifier);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (peek(0) == ':') {
|
||||
begin_token();
|
||||
consume();
|
||||
commit_token(GMLToken::Type::Colon);
|
||||
|
||||
while (isspace(peek()))
|
||||
consume();
|
||||
|
||||
if (peek(0) == '@' && is_valid_class_start(peek(1))) {
|
||||
consume_class();
|
||||
} else {
|
||||
begin_token();
|
||||
while (peek() && peek() != '\n')
|
||||
consume();
|
||||
commit_token(GMLToken::Type::JsonValue);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
consume();
|
||||
commit_token(GMLToken::Type::Unknown);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
}
|
90
Libraries/LibGUI/GMLLexer.h
Normal file
90
Libraries/LibGUI/GMLLexer.h
Normal file
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/StringView.h>
|
||||
|
||||
namespace GUI {
|
||||
|
||||
#define FOR_EACH_TOKEN_TYPE \
|
||||
__TOKEN(Unknown) \
|
||||
__TOKEN(Comment) \
|
||||
__TOKEN(ClassMarker) \
|
||||
__TOKEN(ClassName) \
|
||||
__TOKEN(LeftCurly) \
|
||||
__TOKEN(RightCurly) \
|
||||
__TOKEN(Identifier) \
|
||||
__TOKEN(Colon) \
|
||||
__TOKEN(JsonValue)
|
||||
|
||||
struct GMLPosition {
|
||||
size_t line;
|
||||
size_t column;
|
||||
};
|
||||
|
||||
struct GMLToken {
|
||||
enum class Type {
|
||||
#define __TOKEN(x) x,
|
||||
FOR_EACH_TOKEN_TYPE
|
||||
#undef __TOKEN
|
||||
};
|
||||
|
||||
const char* to_string() const
|
||||
{
|
||||
switch (m_type) {
|
||||
#define __TOKEN(x) \
|
||||
case Type::x: \
|
||||
return #x;
|
||||
FOR_EACH_TOKEN_TYPE
|
||||
#undef __TOKEN
|
||||
}
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
Type m_type { Type::Unknown };
|
||||
StringView m_view;
|
||||
GMLPosition m_start;
|
||||
GMLPosition m_end;
|
||||
};
|
||||
|
||||
class GMLLexer {
|
||||
public:
|
||||
GMLLexer(const StringView&);
|
||||
|
||||
Vector<GMLToken> lex();
|
||||
|
||||
private:
|
||||
char peek(size_t offset = 0) const;
|
||||
char consume();
|
||||
|
||||
StringView m_input;
|
||||
size_t m_index { 0 };
|
||||
GMLPosition m_previous_position { 0, 0 };
|
||||
GMLPosition m_position { 0, 0 };
|
||||
};
|
||||
|
||||
}
|
|
@ -27,87 +27,81 @@
|
|||
#include <AK/GenericLexer.h>
|
||||
#include <AK/JsonObject.h>
|
||||
#include <AK/JsonValue.h>
|
||||
#include <AK/Queue.h>
|
||||
#include <LibGUI/GMLLexer.h>
|
||||
#include <LibGUI/GMLParser.h>
|
||||
#include <ctype.h>
|
||||
|
||||
namespace GUI {
|
||||
|
||||
static bool is_valid_class_name_character(char ch)
|
||||
{
|
||||
return isalpha(ch) || ch == ':';
|
||||
}
|
||||
|
||||
static bool is_valid_property_name_character(char ch)
|
||||
{
|
||||
return isalpha(ch) || ch == '_';
|
||||
}
|
||||
|
||||
static void swallow_whitespace(GenericLexer& scanner)
|
||||
{
|
||||
scanner.consume_while([](auto ch) { return isspace(ch); });
|
||||
}
|
||||
|
||||
static Optional<JsonValue> parse_core_object(GenericLexer& scanner)
|
||||
static Optional<JsonValue> parse_core_object(Queue<GMLToken>& tokens)
|
||||
{
|
||||
JsonObject object;
|
||||
JsonArray children;
|
||||
|
||||
// '@Foo' means new Core::Object of class Foo
|
||||
if (!scanner.consume_specific('@')) {
|
||||
dbgln("Expected '@'");
|
||||
auto peek = [&] {
|
||||
if (tokens.is_empty())
|
||||
return GMLToken::Type::Unknown;
|
||||
return tokens.head().m_type;
|
||||
};
|
||||
|
||||
if (peek() != GMLToken::Type::ClassMarker) {
|
||||
dbgln("Expected class marker");
|
||||
return {};
|
||||
}
|
||||
|
||||
auto class_name = scanner.consume_while([](auto ch) { return is_valid_class_name_character(ch); });
|
||||
object.set("class", JsonValue(class_name));
|
||||
tokens.dequeue();
|
||||
|
||||
swallow_whitespace(scanner);
|
||||
|
||||
if (!scanner.consume_specific('{')) {
|
||||
dbgln("Expected '{{'");
|
||||
if (peek() != GMLToken::Type::ClassName) {
|
||||
dbgln("Expected class name");
|
||||
return {};
|
||||
}
|
||||
|
||||
swallow_whitespace(scanner);
|
||||
auto class_name = tokens.dequeue();
|
||||
object.set("class", JsonValue(class_name.m_view));
|
||||
|
||||
if (peek() != GMLToken::Type::LeftCurly) {
|
||||
dbgln("Expected {{");
|
||||
return {};
|
||||
}
|
||||
tokens.dequeue();
|
||||
|
||||
for (;;) {
|
||||
swallow_whitespace(scanner);
|
||||
|
||||
if (scanner.peek() == '}') {
|
||||
if (peek() == GMLToken::Type::RightCurly) {
|
||||
// End of object
|
||||
break;
|
||||
}
|
||||
|
||||
if (scanner.peek() == '@') {
|
||||
if (peek() == GMLToken::Type::ClassMarker) {
|
||||
// It's a child object.
|
||||
auto value = parse_core_object(scanner);
|
||||
if (!value.has_value())
|
||||
auto value = parse_core_object(tokens);
|
||||
if (!value.has_value()) {
|
||||
dbgln("Parsing child object failed");
|
||||
return {};
|
||||
}
|
||||
if (!value.value().is_object()) {
|
||||
dbgln("Expected child to be Core::Object");
|
||||
return {};
|
||||
}
|
||||
children.append(value.release_value());
|
||||
} else {
|
||||
} else if (peek() == GMLToken::Type::Identifier) {
|
||||
// It's a property.
|
||||
auto property_name = scanner.consume_while([](auto ch) { return is_valid_property_name_character(ch); });
|
||||
swallow_whitespace(scanner);
|
||||
auto property_name = tokens.dequeue();
|
||||
|
||||
if (property_name.is_empty()) {
|
||||
if (property_name.m_view.is_empty()) {
|
||||
dbgln("Expected non-empty property name");
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!scanner.consume_specific(':')) {
|
||||
if (peek() != GMLToken::Type::Colon) {
|
||||
dbgln("Expected ':'");
|
||||
return {};
|
||||
}
|
||||
|
||||
swallow_whitespace(scanner);
|
||||
tokens.dequeue();
|
||||
|
||||
JsonValue value;
|
||||
if (scanner.peek() == '@') {
|
||||
auto parsed_value = parse_core_object(scanner);
|
||||
if (peek() == GMLToken::Type::ClassMarker) {
|
||||
auto parsed_value = parse_core_object(tokens);
|
||||
if (!parsed_value.has_value())
|
||||
return {};
|
||||
if (!parsed_value.value().is_object()) {
|
||||
|
@ -115,23 +109,27 @@ static Optional<JsonValue> parse_core_object(GenericLexer& scanner)
|
|||
return {};
|
||||
}
|
||||
value = parsed_value.release_value();
|
||||
} else {
|
||||
auto value_string = scanner.consume_line();
|
||||
auto parsed_value = JsonValue::from_string(value_string);
|
||||
} else if (peek() == GMLToken::Type::JsonValue) {
|
||||
auto value_string = tokens.dequeue();
|
||||
auto parsed_value = JsonValue::from_string(value_string.m_view);
|
||||
if (!parsed_value.has_value()) {
|
||||
dbgln("Expected property to be JSON value");
|
||||
return {};
|
||||
}
|
||||
value = parsed_value.release_value();
|
||||
}
|
||||
object.set(property_name, move(value));
|
||||
object.set(property_name.m_view, move(value));
|
||||
} else {
|
||||
dbgln("Expected child, property, or }}");
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
if (!scanner.consume_specific('}')) {
|
||||
dbgln("Expected '}'");
|
||||
if (peek() != GMLToken::Type::RightCurly) {
|
||||
dbgln("Expected }}");
|
||||
return {};
|
||||
}
|
||||
tokens.dequeue();
|
||||
|
||||
if (!children.is_empty())
|
||||
object.set("children", move(children));
|
||||
|
@ -141,8 +139,13 @@ static Optional<JsonValue> parse_core_object(GenericLexer& scanner)
|
|||
|
||||
JsonValue parse_gml(const StringView& string)
|
||||
{
|
||||
GenericLexer scanner(string);
|
||||
auto root = parse_core_object(scanner);
|
||||
auto lexer = GMLLexer(string);
|
||||
|
||||
Queue<GMLToken> tokens;
|
||||
for (auto& token : lexer.lex())
|
||||
tokens.enqueue(token);
|
||||
|
||||
auto root = parse_core_object(tokens);
|
||||
|
||||
if (!root.has_value())
|
||||
return JsonValue();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue