mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-28 19:59:17 +00:00
LibWeb: Add start of HTML Tokenizer in Swift
Currently it's just a Token class.
This commit is contained in:
parent
d0bc266c55
commit
fb074f9d0c
Notes:
github-actions[bot]
2024-08-24 01:18:22 +00:00
Author: https://github.com/ADKaster
Commit: fb074f9d0c
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1130
Reviewed-by: https://github.com/AtkinsSJ
Reviewed-by: https://github.com/alimpfard
Reviewed-by: https://github.com/dzfrias
5 changed files with 169 additions and 1 deletions
78
Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.swift
Normal file
78
Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.swift
Normal file
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
public class HTMLToken {
|
||||
public struct Position {
|
||||
var line = UInt()
|
||||
var column = UInt()
|
||||
var byteOffset = UInt()
|
||||
}
|
||||
|
||||
public struct Attribute {
|
||||
var prefix: String?
|
||||
var localName: String
|
||||
var namespace_: String?
|
||||
var value: String
|
||||
var nameStartPosition: Position
|
||||
var nameEndPosition: Position
|
||||
var valueStartPosition: Position
|
||||
var valueEndPosition: Position
|
||||
}
|
||||
|
||||
public enum TokenType {
|
||||
case Invalid
|
||||
case DOCTYPE(
|
||||
name: String?,
|
||||
publicIdentifier: String?,
|
||||
systemIdentifier: String?,
|
||||
forceQuirksMode: Bool)
|
||||
case StartTag(
|
||||
tagName: String,
|
||||
selfClosing: Bool,
|
||||
selfClosingAcknowledged: Bool,
|
||||
attributes: [Attribute])
|
||||
case EndTag(
|
||||
tagName: String,
|
||||
selfClosing: Bool,
|
||||
selfClosingAcknowledged: Bool,
|
||||
attributes: [Attribute])
|
||||
case Comment(data: String)
|
||||
case Character(codePoint: Character)
|
||||
case EndOfFile
|
||||
}
|
||||
|
||||
public func isCharacter() -> Bool {
|
||||
if case .Character(_) = self.type {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
public func isParserWhitespace() -> Bool {
|
||||
precondition(isCharacter(), "isParserWhitespace() called on non-character token")
|
||||
|
||||
// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
|
||||
switch self.type {
|
||||
case .Character(codePoint: "\t"),
|
||||
.Character(codePoint: "\n"),
|
||||
.Character(codePoint: "\u{000C}"), // \f
|
||||
.Character(codePoint: "\r"),
|
||||
.Character(codePoint: " "):
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
public var type = TokenType.Invalid
|
||||
public var startPosition = Position()
|
||||
public var endPosition = Position()
|
||||
|
||||
public init() {}
|
||||
public init(type: TokenType) {
|
||||
self.type = type
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue