mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-10 01:59:31 +00:00
LibJS: Store RegExp flags as a bitmask
This avoids having to do O(n) contains() in the various flag accessors. Yields a ~20% speed-up on the following microbenchmark: const re = /foo/dgimsvy; for (let i = 0; i < 1_000_000; ++i) re.flags;
This commit is contained in:
parent
6b82ab06fd
commit
257ebea364
Notes:
github-actions[bot]
2024-10-26 13:43:53 +00:00
Author: https://github.com/awesomekling
Commit: 257ebea364
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1972
Reviewed-by: https://github.com/trflynn89
5 changed files with 53 additions and 15 deletions
|
@ -140,14 +140,14 @@
|
||||||
__JS_ENUMERATE(dispose, dispose)
|
__JS_ENUMERATE(dispose, dispose)
|
||||||
|
|
||||||
#define JS_ENUMERATE_REGEXP_FLAGS \
|
#define JS_ENUMERATE_REGEXP_FLAGS \
|
||||||
__JS_ENUMERATE(hasIndices, has_indices, d) \
|
__JS_ENUMERATE(HasIndices, hasIndices, has_indices, d) \
|
||||||
__JS_ENUMERATE(global, global, g) \
|
__JS_ENUMERATE(Global, global, global, g) \
|
||||||
__JS_ENUMERATE(ignoreCase, ignore_case, i) \
|
__JS_ENUMERATE(IgnoreCase, ignoreCase, ignore_case, i) \
|
||||||
__JS_ENUMERATE(multiline, multiline, m) \
|
__JS_ENUMERATE(Multiline, multiline, multiline, m) \
|
||||||
__JS_ENUMERATE(dotAll, dot_all, s) \
|
__JS_ENUMERATE(DotAll, dotAll, dot_all, s) \
|
||||||
__JS_ENUMERATE(unicodeSets, unicode_sets, v) \
|
__JS_ENUMERATE(UnicodeSets, unicodeSets, unicode_sets, v) \
|
||||||
__JS_ENUMERATE(unicode, unicode, u) \
|
__JS_ENUMERATE(Unicode, unicode, unicode, u) \
|
||||||
__JS_ENUMERATE(sticky, sticky, y)
|
__JS_ENUMERATE(Sticky, sticky, sticky, y)
|
||||||
|
|
||||||
namespace JS {
|
namespace JS {
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
|
* Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
|
||||||
|
* Copyright (c) 2024, Andreas Kling <andreas@ladybird.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
@ -160,10 +161,29 @@ RegExpObject::RegExpObject(Object& prototype)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static RegExpObject::Flags to_flag_bits(StringView flags)
|
||||||
|
{
|
||||||
|
RegExpObject::Flags flag_bits = static_cast<RegExpObject::Flags>(0);
|
||||||
|
for (auto ch : flags) {
|
||||||
|
switch (ch) {
|
||||||
|
#define __JS_ENUMERATE(FlagName, flagName, flag_name, flag_char) \
|
||||||
|
case #flag_char[0]: \
|
||||||
|
flag_bits |= RegExpObject::Flags::FlagName; \
|
||||||
|
break;
|
||||||
|
JS_ENUMERATE_REGEXP_FLAGS
|
||||||
|
#undef __JS_ENUMERATE
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return flag_bits;
|
||||||
|
}
|
||||||
|
|
||||||
RegExpObject::RegExpObject(Regex<ECMA262> regex, ByteString pattern, ByteString flags, Object& prototype)
|
RegExpObject::RegExpObject(Regex<ECMA262> regex, ByteString pattern, ByteString flags, Object& prototype)
|
||||||
: Object(ConstructWithPrototypeTag::Tag, prototype)
|
: Object(ConstructWithPrototypeTag::Tag, prototype)
|
||||||
, m_pattern(move(pattern))
|
, m_pattern(move(pattern))
|
||||||
, m_flags(move(flags))
|
, m_flags(move(flags))
|
||||||
|
, m_flag_bits(to_flag_bits(m_flags))
|
||||||
, m_regex(move(regex))
|
, m_regex(move(regex))
|
||||||
{
|
{
|
||||||
VERIFY(m_regex->parser_result.error == regex::Error::NoError);
|
VERIFY(m_regex->parser_result.error == regex::Error::NoError);
|
||||||
|
@ -228,6 +248,7 @@ ThrowCompletionOr<NonnullGCPtr<RegExpObject>> RegExpObject::regexp_initialize(VM
|
||||||
m_pattern = move(pattern);
|
m_pattern = move(pattern);
|
||||||
|
|
||||||
// 17. Set obj.[[OriginalFlags]] to F.
|
// 17. Set obj.[[OriginalFlags]] to F.
|
||||||
|
m_flag_bits = to_flag_bits(flags);
|
||||||
m_flags = move(flags);
|
m_flags = move(flags);
|
||||||
|
|
||||||
// 18. Let capturingGroupsCount be CountLeftCapturingParensWithin(parseResult).
|
// 18. Let capturingGroupsCount be CountLeftCapturingParensWithin(parseResult).
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
|
* Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
|
||||||
|
* Copyright (c) 2024, Andreas Kling <andreas@ladybird.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <AK/EnumBits.h>
|
||||||
#include <AK/Optional.h>
|
#include <AK/Optional.h>
|
||||||
#include <AK/Result.h>
|
#include <AK/Result.h>
|
||||||
#include <LibJS/Runtime/Object.h>
|
#include <LibJS/Runtime/Object.h>
|
||||||
|
@ -37,6 +39,17 @@ public:
|
||||||
| regex::ECMAScriptFlags::BrowserExtended
|
| regex::ECMAScriptFlags::BrowserExtended
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class Flags {
|
||||||
|
HasIndices = 1 << 0,
|
||||||
|
Global = 1 << 1,
|
||||||
|
IgnoreCase = 1 << 2,
|
||||||
|
Multiline = 1 << 3,
|
||||||
|
DotAll = 1 << 4,
|
||||||
|
UnicodeSets = 1 << 5,
|
||||||
|
Unicode = 1 << 6,
|
||||||
|
Sticky = 1 << 7,
|
||||||
|
};
|
||||||
|
|
||||||
static NonnullGCPtr<RegExpObject> create(Realm&);
|
static NonnullGCPtr<RegExpObject> create(Realm&);
|
||||||
static NonnullGCPtr<RegExpObject> create(Realm&, Regex<ECMA262> regex, ByteString pattern, ByteString flags);
|
static NonnullGCPtr<RegExpObject> create(Realm&, Regex<ECMA262> regex, ByteString pattern, ByteString flags);
|
||||||
|
|
||||||
|
@ -48,6 +61,7 @@ public:
|
||||||
|
|
||||||
ByteString const& pattern() const { return m_pattern; }
|
ByteString const& pattern() const { return m_pattern; }
|
||||||
ByteString const& flags() const { return m_flags; }
|
ByteString const& flags() const { return m_flags; }
|
||||||
|
Flags flag_bits() const { return m_flag_bits; }
|
||||||
Regex<ECMA262> const& regex() { return *m_regex; }
|
Regex<ECMA262> const& regex() { return *m_regex; }
|
||||||
Regex<ECMA262> const& regex() const { return *m_regex; }
|
Regex<ECMA262> const& regex() const { return *m_regex; }
|
||||||
Realm& realm() { return *m_realm; }
|
Realm& realm() { return *m_realm; }
|
||||||
|
@ -64,10 +78,13 @@ private:
|
||||||
|
|
||||||
ByteString m_pattern;
|
ByteString m_pattern;
|
||||||
ByteString m_flags;
|
ByteString m_flags;
|
||||||
|
Flags m_flag_bits { 0 };
|
||||||
bool m_legacy_features_enabled { false }; // [[LegacyFeaturesEnabled]]
|
bool m_legacy_features_enabled { false }; // [[LegacyFeaturesEnabled]]
|
||||||
// Note: This is initialized in RegExpAlloc, but will be non-null afterwards
|
// Note: This is initialized in RegExpAlloc, but will be non-null afterwards
|
||||||
GCPtr<Realm> m_realm; // [[Realm]]
|
GCPtr<Realm> m_realm; // [[Realm]]
|
||||||
Optional<Regex<ECMA262>> m_regex;
|
Optional<Regex<ECMA262>> m_regex;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
AK_ENUM_BITWISE_OPERATORS(RegExpObject::Flags);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,7 @@ void RegExpPrototype::initialize(Realm& realm)
|
||||||
define_native_accessor(realm, vm.names.flags, flags, {}, Attribute::Configurable);
|
define_native_accessor(realm, vm.names.flags, flags, {}, Attribute::Configurable);
|
||||||
define_native_accessor(realm, vm.names.source, source, {}, Attribute::Configurable);
|
define_native_accessor(realm, vm.names.source, source, {}, Attribute::Configurable);
|
||||||
|
|
||||||
#define __JS_ENUMERATE(flagName, flag_name, flag_char) \
|
#define __JS_ENUMERATE(FlagName, flagName, flag_name, flag_char) \
|
||||||
define_native_accessor(realm, vm.names.flagName, flag_name, {}, Attribute::Configurable);
|
define_native_accessor(realm, vm.names.flagName, flag_name, {}, Attribute::Configurable);
|
||||||
JS_ENUMERATE_REGEXP_FLAGS
|
JS_ENUMERATE_REGEXP_FLAGS
|
||||||
#undef __JS_ENUMERATE
|
#undef __JS_ENUMERATE
|
||||||
|
@ -441,7 +441,7 @@ size_t advance_string_index(Utf16View const& string, size_t index, bool unicode)
|
||||||
// 22.2.6.15 get RegExp.prototype.sticky, https://tc39.es/ecma262/#sec-get-regexp.prototype.sticky
|
// 22.2.6.15 get RegExp.prototype.sticky, https://tc39.es/ecma262/#sec-get-regexp.prototype.sticky
|
||||||
// 22.2.6.18 get RegExp.prototype.unicode, https://tc39.es/ecma262/#sec-get-regexp.prototype.unicode
|
// 22.2.6.18 get RegExp.prototype.unicode, https://tc39.es/ecma262/#sec-get-regexp.prototype.unicode
|
||||||
// 22.2.6.19 get RegExp.prototype.unicodeSets, https://tc39.es/ecma262/#sec-get-regexp.prototype.unicodesets
|
// 22.2.6.19 get RegExp.prototype.unicodeSets, https://tc39.es/ecma262/#sec-get-regexp.prototype.unicodesets
|
||||||
#define __JS_ENUMERATE(flagName, flag_name, flag_char) \
|
#define __JS_ENUMERATE(FlagName, flagName, flag_name, flag_char) \
|
||||||
JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::flag_name) \
|
JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::flag_name) \
|
||||||
{ \
|
{ \
|
||||||
auto& realm = *vm.current_realm(); \
|
auto& realm = *vm.current_realm(); \
|
||||||
|
@ -456,10 +456,10 @@ size_t advance_string_index(Utf16View const& string, size_t index, bool unicode)
|
||||||
return vm.throw_completion<TypeError>(ErrorType::NotAnObjectOfType, "RegExp"); \
|
return vm.throw_completion<TypeError>(ErrorType::NotAnObjectOfType, "RegExp"); \
|
||||||
} \
|
} \
|
||||||
/* 3. Let flags be R.[[OriginalFlags]]. */ \
|
/* 3. Let flags be R.[[OriginalFlags]]. */ \
|
||||||
auto const& flags = static_cast<RegExpObject&>(*regexp_object).flags(); \
|
auto flags = static_cast<RegExpObject&>(*regexp_object).flag_bits(); \
|
||||||
/* 4. If flags contains codeUnit, return true. */ \
|
/* 4. If flags contains codeUnit, return true. */ \
|
||||||
/* 5. Return false. */ \
|
/* 5. Return false. */ \
|
||||||
return Value(flags.contains(#flag_char##sv)); \
|
return Value(has_flag(flags, RegExpObject::Flags::FlagName)); \
|
||||||
}
|
}
|
||||||
JS_ENUMERATE_REGEXP_FLAGS
|
JS_ENUMERATE_REGEXP_FLAGS
|
||||||
#undef __JS_ENUMERATE
|
#undef __JS_ENUMERATE
|
||||||
|
@ -505,7 +505,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::flags)
|
||||||
// 17. If unicodeSets is true, append the code unit 0x0076 (LATIN SMALL LETTER V) as the last code unit of result.
|
// 17. If unicodeSets is true, append the code unit 0x0076 (LATIN SMALL LETTER V) as the last code unit of result.
|
||||||
// 18. Let sticky be ToBoolean(? Get(R, "sticky")).
|
// 18. Let sticky be ToBoolean(? Get(R, "sticky")).
|
||||||
// 19. If sticky is true, append the code unit 0x0079 (LATIN SMALL LETTER Y) as the last code unit of result.
|
// 19. If sticky is true, append the code unit 0x0079 (LATIN SMALL LETTER Y) as the last code unit of result.
|
||||||
#define __JS_ENUMERATE(flagName, flag_name, flag_char) \
|
#define __JS_ENUMERATE(FlagName, flagName, flag_name, flag_char) \
|
||||||
auto flag_##flag_name = TRY(regexp_object->get(vm.names.flagName)); \
|
auto flag_##flag_name = TRY(regexp_object->get(vm.names.flagName)); \
|
||||||
if (flag_##flag_name.to_boolean()) \
|
if (flag_##flag_name.to_boolean()) \
|
||||||
builder.append(#flag_char##sv);
|
builder.append(#flag_char##sv);
|
||||||
|
|
|
@ -38,7 +38,7 @@ private:
|
||||||
JS_DECLARE_NATIVE_FUNCTION(to_string);
|
JS_DECLARE_NATIVE_FUNCTION(to_string);
|
||||||
JS_DECLARE_NATIVE_FUNCTION(compile);
|
JS_DECLARE_NATIVE_FUNCTION(compile);
|
||||||
|
|
||||||
#define __JS_ENUMERATE(_, flag_name, ...) \
|
#define __JS_ENUMERATE(FlagName, flagName, flag_name, ...) \
|
||||||
JS_DECLARE_NATIVE_FUNCTION(flag_name);
|
JS_DECLARE_NATIVE_FUNCTION(flag_name);
|
||||||
JS_ENUMERATE_REGEXP_FLAGS
|
JS_ENUMERATE_REGEXP_FLAGS
|
||||||
#undef __JS_ENUMERATE
|
#undef __JS_ENUMERATE
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue