LibJS: Support LegacyOctalEscapeSequence in string literals

https://tc39.es/ecma262/#sec-additional-syntax-string-literals

The syntax and semantics of 11.8.4 is extended as follows except that
this extension is not allowed for strict mode code:

Syntax

    EscapeSequence::
        CharacterEscapeSequence
        LegacyOctalEscapeSequence
        NonOctalDecimalEscapeSequence
        HexEscapeSequence
        UnicodeEscapeSequence

    LegacyOctalEscapeSequence::
        OctalDigit [lookahead ∉ OctalDigit]
        ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
        FourToSeven OctalDigit
        ZeroToThree OctalDigit OctalDigit

    ZeroToThree :: one of
        0 1 2 3

    FourToSeven :: one of
        4 5 6 7

    NonOctalDecimalEscapeSequence :: one of
        8 9

This definition of EscapeSequence is not used in strict mode or when
parsing TemplateCharacter.

Note

It is possible for string literals to precede a Use Strict Directive
that places the enclosing code in strict mode, and implementations must
take care to not use this extended definition of EscapeSequence with
such literals. For example, attempting to parse the following source
text must fail:

function invalid() { "\7"; "use strict"; }
This commit is contained in:
Linus Groh 2020-10-24 13:30:57 +01:00 committed by Andreas Kling
parent 9f036959e8
commit 4fb96afafc
Notes: sideshowbarker 2024-07-19 01:46:30 +09:00
5 changed files with 104 additions and 11 deletions

View file

@ -836,23 +836,41 @@ NonnullRefPtr<ArrayExpression> Parser::parse_array_expression()
return create_ast_node<ArrayExpression>(move(elements));
}
NonnullRefPtr<StringLiteral> Parser::parse_string_literal(Token token)
NonnullRefPtr<StringLiteral> Parser::parse_string_literal(Token token, bool in_template_literal)
{
auto status = Token::StringValueStatus::Ok;
auto string = token.string_value(status);
if (status != Token::StringValueStatus::Ok) {
String message;
if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) {
if (status == Token::StringValueStatus::LegacyOctalEscapeSequence) {
m_parser_state.m_string_legacy_octal_escape_sequence_in_scope = true;
if (in_template_literal)
message = "Octal escape sequence not allowed in template literal";
else if (m_parser_state.m_strict_mode)
message = "Octal escape sequence in string literal not allowed in strict mode";
} else if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) {
auto type = status == Token::StringValueStatus::MalformedUnicodeEscape ? "unicode" : "hexadecimal";
message = String::formatted("Malformed {} escape sequence", type);
} else if (status == Token::StringValueStatus::UnicodeEscapeOverflow) {
message = "Unicode code_point must not be greater than 0x10ffff in escape sequence";
} else {
ASSERT_NOT_REACHED();
}
if (!message.is_empty())
syntax_error(message, token.line_number(), token.line_column());
}
// It is possible for string literals to precede a Use Strict Directive that places the
// enclosing code in strict mode, and implementations must take care to not use this
// extended definition of EscapeSequence with such literals. For example, attempting to
// parse the following source text must fail:
//
// function invalid() { "\7"; "use strict"; }
if (m_parser_state.m_string_legacy_octal_escape_sequence_in_scope && string == "use strict")
syntax_error("Octal escape sequence in string literal not allowed in strict mode");
if (m_parser_state.m_use_strict_directive == UseStrictDirectiveState::Looking) {
if (string == "use strict" && token.type() != TokenType::TemplateLiteralString) {
m_parser_state.m_use_strict_directive = UseStrictDirectiveState::Found;
@ -884,7 +902,7 @@ NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
while (!done() && !match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) {
if (match(TokenType::TemplateLiteralString)) {
auto token = consume();
expressions.append(parse_string_literal(token));
expressions.append(parse_string_literal(token, true));
if (is_tagged)
raw_strings.append(create_ast_node<StringLiteral>(token.value()));
} else if (match(TokenType::TemplateLiteralExprStart)) {
@ -1249,6 +1267,7 @@ NonnullRefPtr<BlockStatement> Parser::parse_block_statement(bool& is_strict)
first = false;
}
m_parser_state.m_strict_mode = initial_strict_mode_state;
m_parser_state.m_string_legacy_octal_escape_sequence_in_scope = false;
consume(TokenType::CurlyClose);
block->add_variables(m_parser_state.m_let_scopes.last());
block->add_functions(m_parser_state.m_function_scopes.last());

View file

@ -87,7 +87,7 @@ public:
NonnullRefPtr<RegExpLiteral> parse_regexp_literal();
NonnullRefPtr<ObjectExpression> parse_object_expression();
NonnullRefPtr<ArrayExpression> parse_array_expression();
NonnullRefPtr<StringLiteral> parse_string_literal(Token token);
NonnullRefPtr<StringLiteral> parse_string_literal(Token token, bool in_template_literal = false);
NonnullRefPtr<TemplateLiteral> parse_template_literal(bool is_tagged);
NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right);
NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>);
@ -184,6 +184,7 @@ private:
bool m_in_function_context { false };
bool m_in_break_context { false };
bool m_in_continue_context { false };
bool m_string_legacy_octal_escape_sequence_in_scope { false };
explicit ParserState(Lexer);
};

View file

@ -13,3 +13,32 @@ test("unicode escapes", () => {
expect(`\u{1f41e}`).toBe("🐞");
expect("\u00ff").toBe(String.fromCharCode(0xff));
});
describe("octal escapes", () => {
test("basic functionality", () => {
expect("\1").toBe("\u0001");
expect("\2").toBe("\u0002");
expect("\3").toBe("\u0003");
expect("\4").toBe("\u0004");
expect("\5").toBe("\u0005");
expect("\6").toBe("\u0006");
expect("\7").toBe("\u0007");
expect("\8").toBe("8");
expect("\9").toBe("9");
expect("\128").toBe("\n8");
expect("\141bc").toBe("abc");
expect("f\157o\142a\162").toBe("foobar");
expect("\123\145\162\145\156\151\164\171\117\123").toBe("SerenityOS");
});
test("syntax error in template literal", () => {
expect("`\\123`").not.toEval();
});
test("syntax error in strict mode", () => {
expect("'use strict'; '\\123'").not.toEval();
expect('"use strict"; "\\123"').not.toEval();
// Special case, string literal precedes use strict directive
expect("'\\123'; somethingElse; 'use strict'").not.toEval();
});
});

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@gmx.de>
* Copyright (c) 2020, Linus Groh <mail@linusgroh.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -103,8 +104,19 @@ String Token::string_value(StringValueStatus& status) const
{
ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
auto is_template = type() == TokenType::TemplateLiteralString;
auto offset = is_template ? 0 : 1;
auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1;
size_t i;
auto lookahead = [&]<typename T>(T fn, size_t distance = 1) -> bool {
if (i + distance >= m_value.length() - offset)
return false;
return fn(m_value[i + distance]);
};
auto is_octal_digit = [](char c) {
return c >= '0' && c <= '7';
};
auto encoding_failure = [&status](StringValueStatus parse_status) -> String {
status = parse_status;
@ -112,7 +124,7 @@ String Token::string_value(StringValueStatus& status) const
};
StringBuilder builder;
for (size_t i = offset; i < m_value.length() - offset; ++i) {
for (i = offset; i < m_value.length() - offset; ++i) {
if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
i++;
switch (m_value[i]) {
@ -134,9 +146,6 @@ String Token::string_value(StringValueStatus& status) const
case 'v':
builder.append('\v');
break;
case '0':
builder.append((char)0);
break;
case '\'':
builder.append('\'');
break;
@ -200,9 +209,43 @@ String Token::string_value(StringValueStatus& status) const
builder.append(m_value[i]);
break;
}
if (m_value[i] == '0' && !lookahead(isdigit)) {
builder.append((char)0);
break;
}
// FIXME: Also parse octal. Should anything else generate a syntax error?
builder.append(m_value[i]);
// In non-strict mode LegacyOctalEscapeSequence is allowed in strings:
// https://tc39.es/ecma262/#sec-additional-syntax-string-literals
String octal_str;
// OctalDigit [lookahead ∉ OctalDigit]
if (is_octal_digit(m_value[i]) && !lookahead(is_octal_digit)) {
status = StringValueStatus::LegacyOctalEscapeSequence;
octal_str = String(&m_value[i], 1);
}
// ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
else if (m_value[i] >= '0' && m_value[i] <= '3' && lookahead(is_octal_digit) && !lookahead(is_octal_digit, 2)) {
status = StringValueStatus::LegacyOctalEscapeSequence;
octal_str = String(m_value.substring_view(i, 2));
i++;
}
// FourToSeven OctalDigit
else if (m_value[i] >= '4' && m_value[i] <= '7' && lookahead(is_octal_digit)) {
status = StringValueStatus::LegacyOctalEscapeSequence;
octal_str = String(m_value.substring_view(i, 2));
i++;
}
// ZeroToThree OctalDigit OctalDigit
else if (m_value[i] >= '0' && m_value[i] <= '3' && lookahead(is_octal_digit) && lookahead(is_octal_digit, 2)) {
status = StringValueStatus::LegacyOctalEscapeSequence;
octal_str = String(m_value.substring_view(i, 3));
i += 2;
}
if (status == StringValueStatus::LegacyOctalEscapeSequence)
builder.append_code_point(strtoul(octal_str.characters(), nullptr, 8));
else
builder.append(m_value[i]);
}
} else {
builder.append(m_value[i]);

View file

@ -208,6 +208,7 @@ public:
MalformedHexEscape,
MalformedUnicodeEscape,
UnicodeEscapeOverflow,
LegacyOctalEscapeSequence,
};
String string_value(StringValueStatus& status) const;