mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-28 19:59:17 +00:00
LibWeb: Add start of HTML Tokenizer in Swift
Currently it's just a Token class.
This commit is contained in:
parent
d0bc266c55
commit
fb074f9d0c
Notes:
github-actions[bot]
2024-08-24 01:18:22 +00:00
Author: https://github.com/ADKaster
Commit: fb074f9d0c
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1130
Reviewed-by: https://github.com/AtkinsSJ
Reviewed-by: https://github.com/alimpfard
Reviewed-by: https://github.com/dzfrias
5 changed files with 169 additions and 1 deletions
|
@ -20,4 +20,9 @@ if (ENABLE_SWIFT)
|
||||||
target_link_libraries(TestLibWebSwiftBindings PRIVATE AK LibWeb)
|
target_link_libraries(TestLibWebSwiftBindings PRIVATE AK LibWeb)
|
||||||
target_compile_options(TestLibWebSwiftBindings PRIVATE -parse-as-library)
|
target_compile_options(TestLibWebSwiftBindings PRIVATE -parse-as-library)
|
||||||
add_test(NAME TestLibWebSwiftBindings COMMAND TestLibWebSwiftBindings)
|
add_test(NAME TestLibWebSwiftBindings COMMAND TestLibWebSwiftBindings)
|
||||||
|
|
||||||
|
add_executable(TestHTMLTokenizerSwift TestHTMLTokenizerSwift.swift)
|
||||||
|
target_link_libraries(TestHTMLTokenizerSwift PRIVATE AK LibWeb)
|
||||||
|
target_compile_options(TestHTMLTokenizerSwift PRIVATE -parse-as-library)
|
||||||
|
add_test(NAME TestHTMLTokenizerSwift COMMAND TestHTMLTokenizerSwift)
|
||||||
endif()
|
endif()
|
||||||
|
|
58
Tests/LibWeb/TestHTMLTokenizerSwift.swift
Normal file
58
Tests/LibWeb/TestHTMLTokenizerSwift.swift
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
import AK
|
||||||
|
import LibWeb
|
||||||
|
import SwiftLibWeb
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
class StandardError: TextOutputStream {
|
||||||
|
func write(_ string: Swift.String) {
|
||||||
|
try! FileHandle.standardError.write(contentsOf: Data(string.utf8))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@main
|
||||||
|
struct TestHTMLTokenizerSwift {
|
||||||
|
|
||||||
|
static func testTokenTypes() {
|
||||||
|
var standardError = StandardError()
|
||||||
|
print("Testing HTMLToken types...", to: &standardError)
|
||||||
|
|
||||||
|
let default_token = HTMLToken()
|
||||||
|
default_token.type = .Character(codePoint: "a")
|
||||||
|
precondition(default_token.isCharacter())
|
||||||
|
|
||||||
|
print("HTMLToken types pass", to: &standardError)
|
||||||
|
}
|
||||||
|
|
||||||
|
static func testParserWhitespace() {
|
||||||
|
var standardError = StandardError()
|
||||||
|
print("Testing HTMLToken parser whitespace...", to: &standardError)
|
||||||
|
|
||||||
|
for codePoint: Character in ["\t", "\n", "\r", "\u{000C}", " "] {
|
||||||
|
let token = HTMLToken(type: .Character(codePoint: codePoint))
|
||||||
|
precondition(token.isParserWhitespace())
|
||||||
|
}
|
||||||
|
|
||||||
|
for codePoint: Character in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] {
|
||||||
|
let token = HTMLToken(type: .Character(codePoint: codePoint))
|
||||||
|
precondition(!token.isParserWhitespace())
|
||||||
|
}
|
||||||
|
|
||||||
|
print("HTMLToken parser whitespace pass", to: &standardError)
|
||||||
|
}
|
||||||
|
|
||||||
|
static func main() {
|
||||||
|
var standardError = StandardError()
|
||||||
|
print("Starting test suite...", to: &standardError)
|
||||||
|
|
||||||
|
testTokenTypes()
|
||||||
|
testParserWhitespace()
|
||||||
|
|
||||||
|
print("All tests pass", to: &standardError)
|
||||||
|
}
|
||||||
|
}
|
|
@ -44,4 +44,4 @@ struct TestLibWebSwiftBindings {
|
||||||
|
|
||||||
print("All tests pass", to: &standardError)
|
print("All tests pass", to: &standardError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -794,4 +794,31 @@ if (ENABLE_SWIFT)
|
||||||
list(APPEND LIBWEB_ALL_GENERATED_HEADERS ${generated_headers})
|
list(APPEND LIBWEB_ALL_GENERATED_HEADERS ${generated_headers})
|
||||||
|
|
||||||
generate_clang_module_map(LibWeb GENERATED_FILES ${LIBWEB_ALL_GENERATED_HEADERS})
|
generate_clang_module_map(LibWeb GENERATED_FILES ${LIBWEB_ALL_GENERATED_HEADERS})
|
||||||
|
|
||||||
|
target_compile_features(LibWeb PUBLIC cxx_std_23)
|
||||||
|
|
||||||
|
target_sources(LibWeb PRIVATE
|
||||||
|
HTML/Parser/HTMLToken.swift
|
||||||
|
)
|
||||||
|
target_compile_definitions(LibWeb PRIVATE LIBWEB_USE_SWIFT)
|
||||||
|
set_target_properties(LibWeb PROPERTIES Swift_MODULE_NAME "SwiftLibWeb")
|
||||||
|
|
||||||
|
# FIXME: These should be pulled automatically from interface compile options for the target
|
||||||
|
set(VFS_OVERLAY_OPTIONS
|
||||||
|
-Xcc -ivfsoverlay${CMAKE_CURRENT_BINARY_DIR}/vfs_overlay.yaml
|
||||||
|
-Xcc -ivfsoverlay${CMAKE_CURRENT_BINARY_DIR}/../LibGfx/vfs_overlay.yaml
|
||||||
|
-Xcc -ivfsoverlay${Lagom_BINARY_DIR}/AK/vfs_overlay.yaml
|
||||||
|
)
|
||||||
|
get_target_property(LIBWEB_NATIVE_DIRS LibWeb INCLUDE_DIRECTORIES)
|
||||||
|
_swift_generate_cxx_header(LibWeb "LibWeb-Swift.h"
|
||||||
|
SEARCH_PATHS ${LIBWEB_NATIVE_DIRS}
|
||||||
|
COMPILE_OPTIONS ${VFS_OVERLAY_OPTIONS}
|
||||||
|
)
|
||||||
|
|
||||||
|
# FIXME: https://gitlab.kitware.com/cmake/cmake/-/issues/26175
|
||||||
|
if (APPLE)
|
||||||
|
add_custom_command(TARGET LibWeb POST_BUILD
|
||||||
|
COMMAND install_name_tool -id @rpath/liblagom-web.0.dylib "$<TARGET_FILE:LibWeb>"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
78
Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.swift
Normal file
78
Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.swift
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class HTMLToken {
|
||||||
|
public struct Position {
|
||||||
|
var line = UInt()
|
||||||
|
var column = UInt()
|
||||||
|
var byteOffset = UInt()
|
||||||
|
}
|
||||||
|
|
||||||
|
public struct Attribute {
|
||||||
|
var prefix: String?
|
||||||
|
var localName: String
|
||||||
|
var namespace_: String?
|
||||||
|
var value: String
|
||||||
|
var nameStartPosition: Position
|
||||||
|
var nameEndPosition: Position
|
||||||
|
var valueStartPosition: Position
|
||||||
|
var valueEndPosition: Position
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum TokenType {
|
||||||
|
case Invalid
|
||||||
|
case DOCTYPE(
|
||||||
|
name: String?,
|
||||||
|
publicIdentifier: String?,
|
||||||
|
systemIdentifier: String?,
|
||||||
|
forceQuirksMode: Bool)
|
||||||
|
case StartTag(
|
||||||
|
tagName: String,
|
||||||
|
selfClosing: Bool,
|
||||||
|
selfClosingAcknowledged: Bool,
|
||||||
|
attributes: [Attribute])
|
||||||
|
case EndTag(
|
||||||
|
tagName: String,
|
||||||
|
selfClosing: Bool,
|
||||||
|
selfClosingAcknowledged: Bool,
|
||||||
|
attributes: [Attribute])
|
||||||
|
case Comment(data: String)
|
||||||
|
case Character(codePoint: Character)
|
||||||
|
case EndOfFile
|
||||||
|
}
|
||||||
|
|
||||||
|
public func isCharacter() -> Bool {
|
||||||
|
if case .Character(_) = self.type {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
public func isParserWhitespace() -> Bool {
|
||||||
|
precondition(isCharacter(), "isParserWhitespace() called on non-character token")
|
||||||
|
|
||||||
|
// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
|
||||||
|
switch self.type {
|
||||||
|
case .Character(codePoint: "\t"),
|
||||||
|
.Character(codePoint: "\n"),
|
||||||
|
.Character(codePoint: "\u{000C}"), // \f
|
||||||
|
.Character(codePoint: "\r"),
|
||||||
|
.Character(codePoint: " "):
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public var type = TokenType.Invalid
|
||||||
|
public var startPosition = Position()
|
||||||
|
public var endPosition = Position()
|
||||||
|
|
||||||
|
public init() {}
|
||||||
|
public init(type: TokenType) {
|
||||||
|
self.type = type
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue