Everywhere: Replace ctype.h to avoid narrowing conversions

This replaces ctype.h with CharacterType.h everywhere I could find
issues with narrowing conversions. While using it will probably make
sense almost everywhere in the future, the most critical places should
have been addressed.
This commit is contained in:
Max Wipfli 2021-06-01 21:18:08 +02:00 committed by Andreas Kling
parent 1c9d87c455
commit bc8d16ad28
Notes: sideshowbarker 2024-07-18 16:57:29 +09:00
16 changed files with 153 additions and 266 deletions

View file

@ -5,6 +5,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/Debug.h> #include <AK/Debug.h>
#include <AK/LexicalPath.h> #include <AK/LexicalPath.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
@ -14,26 +15,6 @@
namespace AK { namespace AK {
constexpr bool is_ascii_alpha(u32 code_point)
{
return ('a' <= code_point && code_point <= 'z') || ('A' <= code_point && code_point <= 'Z');
}
constexpr bool is_ascii_digit(u32 code_point)
{
return '0' <= code_point && code_point <= '9';
}
constexpr bool is_ascii_alphanumeric(u32 code_point)
{
return is_ascii_alpha(code_point) || is_ascii_digit(code_point);
}
constexpr bool is_ascii_hex_digit(u32 code_point)
{
return is_ascii_digit(code_point) || (code_point >= 'a' && code_point <= 'f') || (code_point >= 'A' && code_point <= 'F');
}
// FIXME: It could make sense to force users of URL to use URLParser::parse() explicitly instead of using a constructor. // FIXME: It could make sense to force users of URL to use URLParser::parse() explicitly instead of using a constructor.
URL::URL(StringView const& string) URL::URL(StringView const& string)
: URL(URLParser::parse({}, string)) : URL(URLParser::parse({}, string))
@ -403,17 +384,6 @@ String URL::percent_encode(StringView const& input, URL::PercentEncodeSet set)
return builder.to_string(); return builder.to_string();
} }
constexpr u8 parse_hex_digit(u8 digit)
{
if (digit >= '0' && digit <= '9')
return digit - '0';
if (digit >= 'a' && digit <= 'f')
return digit - 'a' + 10;
if (digit >= 'A' && digit <= 'F')
return digit - 'A' + 10;
VERIFY_NOT_REACHED();
}
String URL::percent_decode(StringView const& input) String URL::percent_decode(StringView const& input)
{ {
if (!input.contains('%')) if (!input.contains('%'))
@ -427,9 +397,9 @@ String URL::percent_decode(StringView const& input)
builder.append_code_point(*it); builder.append_code_point(*it);
} else { } else {
++it; ++it;
u8 byte = parse_hex_digit(*it) << 4; u8 byte = parse_ascii_hex_digit(*it) << 4;
++it; ++it;
byte += parse_hex_digit(*it); byte += parse_ascii_hex_digit(*it);
builder.append(byte); builder.append(byte);
} }
} }

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/Debug.h> #include <AK/Debug.h>
#include <AK/Optional.h> #include <AK/Optional.h>
#include <AK/SourceLocation.h> #include <AK/SourceLocation.h>
@ -15,26 +16,6 @@
namespace AK { namespace AK {
constexpr bool is_ascii_alpha(u32 code_point)
{
return ('a' <= code_point && code_point <= 'z') || ('A' <= code_point && code_point <= 'Z');
}
constexpr bool is_ascii_digit(u32 code_point)
{
return '0' <= code_point && code_point <= '9';
}
constexpr bool is_ascii_alphanumeric(u32 code_point)
{
return is_ascii_alpha(code_point) || is_ascii_digit(code_point);
}
constexpr bool is_ascii_hex_digit(u32 code_point)
{
return is_ascii_digit(code_point) || (code_point >= 'a' && code_point <= 'f') || (code_point >= 'A' && code_point <= 'F');
}
constexpr bool is_url_code_point(u32 code_point) constexpr bool is_url_code_point(u32 code_point)
{ {
// FIXME: [...] and code points in the range U+00A0 to U+10FFFD, inclusive, excluding surrogates and noncharacters. // FIXME: [...] and code points in the range U+00A0 to U+10FFFD, inclusive, excluding surrogates and noncharacters.

View file

@ -4,12 +4,23 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <LibGUI/EditingEngine.h> #include <LibGUI/EditingEngine.h>
#include <LibGUI/Event.h> #include <LibGUI/Event.h>
#include <LibGUI/TextEditor.h> #include <LibGUI/TextEditor.h>
namespace GUI { namespace GUI {
constexpr bool is_vim_alphanumeric(u32 code_point)
{
return is_ascii_alphanumeric(code_point) || code_point == '_';
}
constexpr bool is_vim_punctuation(u32 code_point)
{
return is_ascii_punctuation(code_point) && code_point != '_';
}
EditingEngine::~EditingEngine() EditingEngine::~EditingEngine()
{ {
} }
@ -379,15 +390,7 @@ TextPosition EditingEngine::find_beginning_of_next_word()
* If the end of the input is reached, jump there * If the end of the input is reached, jump there
*/ */
auto vim_isalnum = [](int c) { bool started_on_punct = is_vim_punctuation(m_editor->current_line().to_utf8().characters()[m_editor->cursor().column()]);
return c == '_' || isalnum(c);
};
auto vim_ispunct = [](int c) {
return c != '_' && ispunct(c);
};
bool started_on_punct = vim_ispunct(m_editor->current_line().to_utf8().characters()[m_editor->cursor().column()]);
bool has_seen_whitespace = false; bool has_seen_whitespace = false;
bool is_first_line = true; bool is_first_line = true;
auto& lines = m_editor->lines(); auto& lines = m_editor->lines();
@ -409,18 +412,18 @@ TextPosition EditingEngine::find_beginning_of_next_word()
const u32* line_chars = line.view().code_points(); const u32* line_chars = line.view().code_points();
const u32 current_char = line_chars[column_index]; const u32 current_char = line_chars[column_index];
if (started_on_punct && vim_isalnum(current_char)) { if (started_on_punct && is_vim_alphanumeric(current_char)) {
return { line_index, column_index }; return { line_index, column_index };
} }
if (vim_ispunct(current_char) && !started_on_punct) { if (is_vim_punctuation(current_char) && !started_on_punct) {
return { line_index, column_index }; return { line_index, column_index };
} }
if (isspace(current_char)) if (is_ascii_space(current_char))
has_seen_whitespace = true; has_seen_whitespace = true;
if (has_seen_whitespace && (vim_isalnum(current_char) || vim_ispunct(current_char))) { if (has_seen_whitespace && (is_vim_alphanumeric(current_char) || is_vim_punctuation(current_char))) {
return { line_index, column_index }; return { line_index, column_index };
} }
@ -449,14 +452,6 @@ TextPosition EditingEngine::find_end_of_next_word()
* If the end of the input is reached, jump there * If the end of the input is reached, jump there
*/ */
auto vim_isalnum = [](int c) {
return c == '_' || isalnum(c);
};
auto vim_ispunct = [](int c) {
return c != '_' && ispunct(c);
};
bool is_first_line = true; bool is_first_line = true;
bool is_first_iteration = true; bool is_first_iteration = true;
auto& lines = m_editor->lines(); auto& lines = m_editor->lines();
@ -481,7 +476,7 @@ TextPosition EditingEngine::find_end_of_next_word()
const u32* line_chars = line.view().code_points(); const u32* line_chars = line.view().code_points();
const u32 current_char = line_chars[column_index]; const u32 current_char = line_chars[column_index];
if (column_index == lines.at(line_index).length() - 1 && !is_first_iteration && (vim_isalnum(current_char) || vim_ispunct(current_char))) if (column_index == lines.at(line_index).length() - 1 && !is_first_iteration && (is_vim_alphanumeric(current_char) || is_vim_punctuation(current_char)))
return { line_index, column_index }; return { line_index, column_index };
else if (column_index == lines.at(line_index).length() - 1) { else if (column_index == lines.at(line_index).length() - 1) {
is_first_iteration = false; is_first_iteration = false;
@ -490,10 +485,10 @@ TextPosition EditingEngine::find_end_of_next_word()
const u32 next_char = line_chars[column_index + 1]; const u32 next_char = line_chars[column_index + 1];
if (!is_first_iteration && vim_isalnum(current_char) && (isspace(next_char) || vim_ispunct(next_char))) if (!is_first_iteration && is_vim_alphanumeric(current_char) && (is_ascii_space(next_char) || is_vim_punctuation(next_char)))
return { line_index, column_index }; return { line_index, column_index };
if (!is_first_iteration && vim_ispunct(current_char) && (isspace(next_char) || vim_isalnum(next_char))) if (!is_first_iteration && is_vim_punctuation(current_char) && (is_ascii_space(next_char) || is_vim_alphanumeric(next_char)))
return { line_index, column_index }; return { line_index, column_index };
if (line_index == lines.size() - 1 && column_index == line.length() - 1) { if (line_index == lines.size() - 1 && column_index == line.length() - 1) {
@ -513,15 +508,7 @@ void EditingEngine::move_to_end_of_next_word()
TextPosition EditingEngine::find_end_of_previous_word() TextPosition EditingEngine::find_end_of_previous_word()
{ {
auto vim_isalnum = [](int c) { bool started_on_punct = is_vim_punctuation(m_editor->current_line().to_utf8().characters()[m_editor->cursor().column()]);
return c == '_' || isalnum(c);
};
auto vim_ispunct = [](int c) {
return c != '_' && ispunct(c);
};
bool started_on_punct = vim_ispunct(m_editor->current_line().to_utf8().characters()[m_editor->cursor().column()]);
bool is_first_line = true; bool is_first_line = true;
bool has_seen_whitespace = false; bool has_seen_whitespace = false;
auto& lines = m_editor->lines(); auto& lines = m_editor->lines();
@ -545,19 +532,19 @@ TextPosition EditingEngine::find_end_of_previous_word()
const u32* line_chars = line.view().code_points(); const u32* line_chars = line.view().code_points();
const u32 current_char = line_chars[column_index]; const u32 current_char = line_chars[column_index];
if (started_on_punct && vim_isalnum(current_char)) { if (started_on_punct && is_vim_alphanumeric(current_char)) {
return { line_index, column_index }; return { line_index, column_index };
} }
if (vim_ispunct(current_char) && !started_on_punct) { if (is_vim_punctuation(current_char) && !started_on_punct) {
return { line_index, column_index }; return { line_index, column_index };
} }
if (isspace(current_char)) { if (is_ascii_space(current_char)) {
has_seen_whitespace = true; has_seen_whitespace = true;
} }
if (has_seen_whitespace && (vim_isalnum(current_char) || vim_ispunct(current_char))) { if (has_seen_whitespace && (is_vim_alphanumeric(current_char) || is_vim_punctuation(current_char))) {
return { line_index, column_index }; return { line_index, column_index };
} }
@ -580,14 +567,6 @@ void EditingEngine::move_to_end_of_previous_word()
TextPosition EditingEngine::find_beginning_of_previous_word() TextPosition EditingEngine::find_beginning_of_previous_word()
{ {
auto vim_isalnum = [](int c) {
return c == '_' || isalnum(c);
};
auto vim_ispunct = [](int c) {
return c != '_' && ispunct(c);
};
bool is_first_iterated_line = true; bool is_first_iterated_line = true;
bool is_first_iteration = true; bool is_first_iteration = true;
auto& lines = m_editor->lines(); auto& lines = m_editor->lines();
@ -618,7 +597,7 @@ TextPosition EditingEngine::find_beginning_of_previous_word()
const u32* line_chars = line.view().code_points(); const u32* line_chars = line.view().code_points();
const u32 current_char = line_chars[column_index]; const u32 current_char = line_chars[column_index];
if (column_index == 0 && !is_first_iteration && (vim_isalnum(current_char) || vim_ispunct(current_char))) { if (column_index == 0 && !is_first_iteration && (is_vim_alphanumeric(current_char) || is_vim_punctuation(current_char))) {
return { line_index, column_index }; return { line_index, column_index };
} else if (line_index == 0 && column_index == 0) { } else if (line_index == 0 && column_index == 0) {
return { line_index, column_index }; return { line_index, column_index };
@ -629,10 +608,10 @@ TextPosition EditingEngine::find_beginning_of_previous_word()
const u32 next_char = line_chars[column_index - 1]; const u32 next_char = line_chars[column_index - 1];
if (!is_first_iteration && vim_isalnum(current_char) && (isspace(next_char) || vim_ispunct(next_char))) if (!is_first_iteration && is_vim_alphanumeric(current_char) && (is_ascii_space(next_char) || is_vim_punctuation(next_char)))
return { line_index, column_index }; return { line_index, column_index };
if (!is_first_iteration && vim_ispunct(current_char) && (isspace(next_char) || vim_isalnum(next_char))) if (!is_first_iteration && is_vim_punctuation(current_char) && (is_ascii_space(next_char) || is_vim_alphanumeric(next_char)))
return { line_index, column_index }; return { line_index, column_index };
is_first_iteration = false; is_first_iteration = false;

View file

@ -5,6 +5,7 @@
*/ */
#include <AK/Badge.h> #include <AK/Badge.h>
#include <AK/CharacterTypes.h>
#include <AK/ScopeGuard.h> #include <AK/ScopeGuard.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
#include <AK/Utf8View.h> #include <AK/Utf8View.h>
@ -12,7 +13,6 @@
#include <LibGUI/TextDocument.h> #include <LibGUI/TextDocument.h>
#include <LibGUI/TextEditor.h> #include <LibGUI/TextEditor.h>
#include <LibRegex/Regex.h> #include <LibRegex/Regex.h>
#include <ctype.h>
namespace GUI { namespace GUI {
@ -104,7 +104,7 @@ size_t TextDocumentLine::first_non_whitespace_column() const
{ {
for (size_t i = 0; i < length(); ++i) { for (size_t i = 0; i < length(); ++i) {
auto code_point = code_points()[i]; auto code_point = code_points()[i];
if (!isspace(code_point)) if (!is_ascii_space(code_point))
return i; return i;
} }
return length(); return length();
@ -114,7 +114,7 @@ Optional<size_t> TextDocumentLine::last_non_whitespace_column() const
{ {
for (ssize_t i = length() - 1; i >= 0; --i) { for (ssize_t i = length() - 1; i >= 0; --i) {
auto code_point = code_points()[i]; auto code_point = code_points()[i];
if (!isspace(code_point)) if (!is_ascii_space(code_point))
return i; return i;
} }
return {}; return {};
@ -124,7 +124,7 @@ bool TextDocumentLine::ends_in_whitespace() const
{ {
if (!length()) if (!length())
return false; return false;
return isspace(code_points()[length() - 1]); return is_ascii_space(code_points()[length() - 1]);
} }
bool TextDocumentLine::can_select() const bool TextDocumentLine::can_select() const
@ -638,11 +638,11 @@ TextPosition TextDocument::first_word_break_before(const TextPosition& position,
if (target.column() == line.length()) if (target.column() == line.length())
modifier = 1; modifier = 1;
auto is_start_alphanumeric = isalnum(line.code_points()[target.column() - modifier]); auto is_start_alphanumeric = is_ascii_alphanumeric(line.code_points()[target.column() - modifier]);
while (target.column() > 0) { while (target.column() > 0) {
auto prev_code_point = line.code_points()[target.column() - 1]; auto prev_code_point = line.code_points()[target.column() - 1];
if ((is_start_alphanumeric && !isalnum(prev_code_point)) || (!is_start_alphanumeric && isalnum(prev_code_point))) if ((is_start_alphanumeric && !is_ascii_alphanumeric(prev_code_point)) || (!is_start_alphanumeric && is_ascii_alphanumeric(prev_code_point)))
break; break;
target.set_column(target.column() - 1); target.set_column(target.column() - 1);
} }
@ -662,11 +662,11 @@ TextPosition TextDocument::first_word_break_after(const TextPosition& position)
return TextPosition(position.line() + 1, 0); return TextPosition(position.line() + 1, 0);
} }
auto is_start_alphanumeric = isalnum(line.code_points()[target.column()]); auto is_start_alphanumeric = is_ascii_alphanumeric(line.code_points()[target.column()]);
while (target.column() < line.length()) { while (target.column() < line.length()) {
auto next_code_point = line.code_points()[target.column()]; auto next_code_point = line.code_points()[target.column()];
if ((is_start_alphanumeric && !isalnum(next_code_point)) || (!is_start_alphanumeric && isalnum(next_code_point))) if ((is_start_alphanumeric && !is_ascii_alphanumeric(next_code_point)) || (!is_start_alphanumeric && is_ascii_alphanumeric(next_code_point)))
break; break;
target.set_column(target.column() + 1); target.set_column(target.column() + 1);
} }

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/Debug.h> #include <AK/Debug.h>
#include <AK/ScopeGuard.h> #include <AK/ScopeGuard.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
@ -25,7 +26,6 @@
#include <LibGfx/FontDatabase.h> #include <LibGfx/FontDatabase.h>
#include <LibGfx/Palette.h> #include <LibGfx/Palette.h>
#include <LibSyntax/Highlighter.h> #include <LibSyntax/Highlighter.h>
#include <ctype.h>
#include <fcntl.h> #include <fcntl.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
@ -1232,12 +1232,12 @@ size_t TextEditor::number_of_selected_words() const
bool in_word = false; bool in_word = false;
auto selected_text = this->selected_text(); auto selected_text = this->selected_text();
for (char c : selected_text) { for (char c : selected_text) {
if (in_word && isspace(c)) { if (in_word && is_ascii_space(c)) {
in_word = false; in_word = false;
word_count++; word_count++;
continue; continue;
} }
if (!in_word && !isspace(c)) if (!in_word && !is_ascii_space(c))
in_word = true; in_word = true;
} }
if (in_word) if (in_word)
@ -1561,7 +1561,7 @@ void TextEditor::recompute_visual_lines(size_t line_index)
auto glyph_spacing = font().glyph_spacing(); auto glyph_spacing = font().glyph_spacing();
for (size_t i = 0; i < line.length(); ++i) { for (size_t i = 0; i < line.length(); ++i) {
auto code_point = line.code_points()[i]; auto code_point = line.code_points()[i];
if (isspace(code_point)) { if (is_ascii_space(code_point)) {
last_whitespace_index = i; last_whitespace_index = i;
line_width_since_last_whitespace = 0; line_width_since_last_whitespace = 0;
} }

View file

@ -5,6 +5,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/Hex.h> #include <AK/Hex.h>
#include <AK/Platform.h> #include <AK/Platform.h>
#include <AK/TemporaryChange.h> #include <AK/TemporaryChange.h>
@ -56,7 +57,6 @@
#include <LibJS/Runtime/TypedArrayConstructor.h> #include <LibJS/Runtime/TypedArrayConstructor.h>
#include <LibJS/Runtime/TypedArrayPrototype.h> #include <LibJS/Runtime/TypedArrayPrototype.h>
#include <LibJS/Runtime/Value.h> #include <LibJS/Runtime/Value.h>
#include <ctype.h>
namespace JS { namespace JS {
@ -249,16 +249,10 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::parse_int)
} }
auto parse_digit = [&](u32 code_point, i32 radix) -> Optional<i32> { auto parse_digit = [&](u32 code_point, i32 radix) -> Optional<i32> {
i32 digit = -1; if (!is_ascii_hex_digit(code_point) || radix <= 0)
return {};
if (isdigit(code_point)) auto digit = parse_ascii_hex_digit(code_point);
digit = code_point - '0'; if (digit >= (u32)radix)
else if (islower(code_point))
digit = 10 + (code_point - 'a');
else if (isupper(code_point))
digit = 10 + (code_point - 'A');
if (digit == -1 || digit >= radix)
return {}; return {};
return digit; return digit;
}; };

View file

@ -6,6 +6,7 @@
*/ */
#include "Editor.h" #include "Editor.h"
#include <AK/CharacterTypes.h>
#include <AK/Debug.h> #include <AK/Debug.h>
#include <AK/GenericLexer.h> #include <AK/GenericLexer.h>
#include <AK/JsonObject.h> #include <AK/JsonObject.h>
@ -19,7 +20,6 @@
#include <LibCore/EventLoop.h> #include <LibCore/EventLoop.h>
#include <LibCore/File.h> #include <LibCore/File.h>
#include <LibCore/Notifier.h> #include <LibCore/Notifier.h>
#include <ctype.h>
#include <errno.h> #include <errno.h>
#include <signal.h> #include <signal.h>
#include <stdio.h> #include <stdio.h>
@ -1338,7 +1338,7 @@ void Editor::refresh_display()
auto print_character_at = [this](size_t i) { auto print_character_at = [this](size_t i) {
StringBuilder builder; StringBuilder builder;
auto c = m_buffer[i]; auto c = m_buffer[i];
bool should_print_masked = isascii(c) && iscntrl(c) && c != '\n'; bool should_print_masked = is_ascii_control(c) && c != '\n';
bool should_print_caret = c < 64 && should_print_masked; bool should_print_caret = c < 64 && should_print_masked;
if (should_print_caret) if (should_print_caret)
builder.appendff("^{:c}", c + 64); builder.appendff("^{:c}", c + 64);
@ -1722,7 +1722,7 @@ Editor::VTState Editor::actual_rendered_string_length_step(StringMetrics& metric
current_line.length = 0; current_line.length = 0;
return state; return state;
} }
if (isascii(c) && iscntrl(c) && c != '\n') if (is_ascii_control(c) && c != '\n')
current_line.masked_chars.append({ index, 1, c < 64 ? 2u : 4u }); // if the character cannot be represented as ^c, represent it as \xbb. current_line.masked_chars.append({ index, 1, c < 64 ? 2u : 4u }); // if the character cannot be represented as ^c, represent it as \xbb.
// FIXME: This will not support anything sophisticated // FIXME: This will not support anything sophisticated
++current_line.length; ++current_line.length;
@ -1740,7 +1740,7 @@ Editor::VTState Editor::actual_rendered_string_length_step(StringMetrics& metric
// FIXME: This does not support non-VT (aside from set-title) escapes // FIXME: This does not support non-VT (aside from set-title) escapes
return state; return state;
case Bracket: case Bracket:
if (isdigit(c)) { if (is_ascii_digit(c)) {
return BracketArgsSemi; return BracketArgsSemi;
} }
return state; return state;
@ -1748,7 +1748,7 @@ Editor::VTState Editor::actual_rendered_string_length_step(StringMetrics& metric
if (c == ';') { if (c == ';') {
return Bracket; return Bracket;
} }
if (!isdigit(c)) if (!is_ascii_digit(c))
state = Free; state = Free;
return state; return state;
case Title: case Title:
@ -1848,7 +1848,7 @@ Vector<size_t, 2> Editor::vt_dsr()
m_incomplete_data.append(c); m_incomplete_data.append(c);
continue; continue;
case SawBracket: case SawBracket:
if (isdigit(c)) { if (is_ascii_digit(c)) {
state = InFirstCoordinate; state = InFirstCoordinate;
coordinate_buffer.append(c); coordinate_buffer.append(c);
continue; continue;
@ -1856,7 +1856,7 @@ Vector<size_t, 2> Editor::vt_dsr()
m_incomplete_data.append(c); m_incomplete_data.append(c);
continue; continue;
case InFirstCoordinate: case InFirstCoordinate:
if (isdigit(c)) { if (is_ascii_digit(c)) {
coordinate_buffer.append(c); coordinate_buffer.append(c);
continue; continue;
} }
@ -1872,7 +1872,7 @@ Vector<size_t, 2> Editor::vt_dsr()
m_incomplete_data.append(c); m_incomplete_data.append(c);
continue; continue;
case SawSemicolon: case SawSemicolon:
if (isdigit(c)) { if (is_ascii_digit(c)) {
state = InSecondCoordinate; state = InSecondCoordinate;
coordinate_buffer.append(c); coordinate_buffer.append(c);
continue; continue;
@ -1880,7 +1880,7 @@ Vector<size_t, 2> Editor::vt_dsr()
m_incomplete_data.append(c); m_incomplete_data.append(c);
continue; continue;
case InSecondCoordinate: case InSecondCoordinate:
if (isdigit(c)) { if (is_ascii_digit(c)) {
coordinate_buffer.append(c); coordinate_buffer.append(c);
continue; continue;
} }

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/FileStream.h> #include <AK/FileStream.h>
#include <AK/ScopeGuard.h> #include <AK/ScopeGuard.h>
#include <AK/ScopedValueRollback.h> #include <AK/ScopedValueRollback.h>
@ -11,7 +12,6 @@
#include <AK/TemporaryChange.h> #include <AK/TemporaryChange.h>
#include <LibCore/File.h> #include <LibCore/File.h>
#include <LibLine/Editor.h> #include <LibLine/Editor.h>
#include <ctype.h>
#include <stdio.h> #include <stdio.h>
#include <sys/wait.h> #include <sys/wait.h>
#include <unistd.h> #include <unistd.h>
@ -84,7 +84,7 @@ void Editor::cursor_left_word()
for (;;) { for (;;) {
if (m_cursor == 0) if (m_cursor == 0)
break; break;
if (skipped_at_least_one_character && !isalnum(m_buffer[m_cursor - 1])) // stop *after* a non-alnum, but only if it changes the position if (skipped_at_least_one_character && !is_ascii_alphanumeric(m_buffer[m_cursor - 1])) // stop *after* a non-alnum, but only if it changes the position
break; break;
skipped_at_least_one_character = true; skipped_at_least_one_character = true;
--m_cursor; --m_cursor;
@ -109,7 +109,7 @@ void Editor::cursor_right_word()
for (;;) { for (;;) {
if (m_cursor >= m_buffer.size()) if (m_cursor >= m_buffer.size())
break; break;
if (!isalnum(m_buffer[++m_cursor])) if (!is_ascii_alphanumeric(m_buffer[++m_cursor]))
break; break;
} }
m_buffer.take_last(); m_buffer.take_last();
@ -178,7 +178,7 @@ void Editor::erase_word_backwards()
// A word here is space-separated. `foo=bar baz` is two words. // A word here is space-separated. `foo=bar baz` is two words.
bool has_seen_nonspace = false; bool has_seen_nonspace = false;
while (m_cursor > 0) { while (m_cursor > 0) {
if (isspace(m_buffer[m_cursor - 1])) { if (is_ascii_space(m_buffer[m_cursor - 1])) {
if (has_seen_nonspace) if (has_seen_nonspace)
break; break;
} else { } else {
@ -375,27 +375,27 @@ void Editor::transpose_words()
// Move to end of word under (or after) caret. // Move to end of word under (or after) caret.
size_t cursor = m_cursor; size_t cursor = m_cursor;
while (cursor < m_buffer.size() && !isalnum(m_buffer[cursor])) while (cursor < m_buffer.size() && !is_ascii_alphanumeric(m_buffer[cursor]))
++cursor; ++cursor;
while (cursor < m_buffer.size() && isalnum(m_buffer[cursor])) while (cursor < m_buffer.size() && is_ascii_alphanumeric(m_buffer[cursor]))
++cursor; ++cursor;
// Move left over second word and the space to its right. // Move left over second word and the space to its right.
size_t end = cursor; size_t end = cursor;
size_t start = cursor; size_t start = cursor;
while (start > 0 && !isalnum(m_buffer[start - 1])) while (start > 0 && !is_ascii_alphanumeric(m_buffer[start - 1]))
--start; --start;
while (start > 0 && isalnum(m_buffer[start - 1])) while (start > 0 && is_ascii_alphanumeric(m_buffer[start - 1]))
--start; --start;
size_t start_second_word = start; size_t start_second_word = start;
// Move left over space between the two words. // Move left over space between the two words.
while (start > 0 && !isalnum(m_buffer[start - 1])) while (start > 0 && !is_ascii_alphanumeric(m_buffer[start - 1]))
--start; --start;
size_t start_gap = start; size_t start_gap = start;
// Move left over first word. // Move left over first word.
while (start > 0 && isalnum(m_buffer[start - 1])) while (start > 0 && is_ascii_alphanumeric(m_buffer[start - 1]))
--start; --start;
if (start != start_gap) { if (start != start_gap) {
@ -452,7 +452,7 @@ void Editor::erase_alnum_word_backwards()
// A word here is contiguous alnums. `foo=bar baz` is three words. // A word here is contiguous alnums. `foo=bar baz` is three words.
bool has_seen_alnum = false; bool has_seen_alnum = false;
while (m_cursor > 0) { while (m_cursor > 0) {
if (!isalnum(m_buffer[m_cursor - 1])) { if (!is_ascii_alphanumeric(m_buffer[m_cursor - 1])) {
if (has_seen_alnum) if (has_seen_alnum)
break; break;
} else { } else {
@ -467,7 +467,7 @@ void Editor::erase_alnum_word_forwards()
// A word here is contiguous alnums. `foo=bar baz` is three words. // A word here is contiguous alnums. `foo=bar baz` is three words.
bool has_seen_alnum = false; bool has_seen_alnum = false;
while (m_cursor < m_buffer.size()) { while (m_cursor < m_buffer.size()) {
if (!isalnum(m_buffer[m_cursor])) { if (!is_ascii_alphanumeric(m_buffer[m_cursor])) {
if (has_seen_alnum) if (has_seen_alnum)
break; break;
} else { } else {
@ -480,15 +480,15 @@ void Editor::erase_alnum_word_forwards()
void Editor::case_change_word(Editor::CaseChangeOp change_op) void Editor::case_change_word(Editor::CaseChangeOp change_op)
{ {
// A word here is contiguous alnums. `foo=bar baz` is three words. // A word here is contiguous alnums. `foo=bar baz` is three words.
while (m_cursor < m_buffer.size() && !isalnum(m_buffer[m_cursor])) while (m_cursor < m_buffer.size() && !is_ascii_alphanumeric(m_buffer[m_cursor]))
++m_cursor; ++m_cursor;
size_t start = m_cursor; size_t start = m_cursor;
while (m_cursor < m_buffer.size() && isalnum(m_buffer[m_cursor])) { while (m_cursor < m_buffer.size() && is_ascii_alphanumeric(m_buffer[m_cursor])) {
if (change_op == CaseChangeOp::Uppercase || (change_op == CaseChangeOp::Capital && m_cursor == start)) { if (change_op == CaseChangeOp::Uppercase || (change_op == CaseChangeOp::Capital && m_cursor == start)) {
m_buffer[m_cursor] = toupper(m_buffer[m_cursor]); m_buffer[m_cursor] = to_ascii_uppercase(m_buffer[m_cursor]);
} else { } else {
VERIFY(change_op == CaseChangeOp::Lowercase || (change_op == CaseChangeOp::Capital && m_cursor > start)); VERIFY(change_op == CaseChangeOp::Lowercase || (change_op == CaseChangeOp::Capital && m_cursor > start));
m_buffer[m_cursor] = tolower(m_buffer[m_cursor]); m_buffer[m_cursor] = to_ascii_lowercase(m_buffer[m_cursor]);
} }
++m_cursor; ++m_cursor;
m_refresh_needed = true; m_refresh_needed = true;

View file

@ -7,10 +7,9 @@
#include "RegexByteCode.h" #include "RegexByteCode.h"
#include "AK/StringBuilder.h" #include "AK/StringBuilder.h"
#include "RegexDebug.h" #include "RegexDebug.h"
#include <AK/CharacterTypes.h>
#include <AK/Debug.h> #include <AK/Debug.h>
#include <ctype.h>
namespace regex { namespace regex {
const char* OpCode::name(OpCodeId opcode_id) const char* OpCode::name(OpCodeId opcode_id)
@ -241,7 +240,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(const MatchInput& input
ALWAYS_INLINE ExecutionResult OpCode_CheckBoundary::execute(const MatchInput& input, MatchState& state, MatchOutput&) const ALWAYS_INLINE ExecutionResult OpCode_CheckBoundary::execute(const MatchInput& input, MatchState& state, MatchOutput&) const
{ {
auto isword = [](auto ch) { return isalnum(ch) || ch == '_'; }; auto isword = [](auto ch) { return is_ascii_alphanumeric(ch) || ch == '_'; };
auto is_word_boundary = [&] { auto is_word_boundary = [&] {
if (state.string_position == input.view.length()) { if (state.string_position == input.view.length()) {
if (state.string_position > 0 && isword(input.view[state.string_position - 1])) if (state.string_position > 0 && isword(input.view[state.string_position - 1]))
@ -510,8 +509,8 @@ ALWAYS_INLINE void OpCode_Compare::compare_char(const MatchInput& input, MatchSt
u32 ch2 = input.view[state.string_position]; u32 ch2 = input.view[state.string_position];
if (input.regex_options & AllFlags::Insensitive) { if (input.regex_options & AllFlags::Insensitive) {
ch1 = tolower(ch1); ch1 = to_ascii_lowercase(ch1);
ch2 = tolower(ch2); ch2 = to_ascii_uppercase(ch2);
} }
if (ch1 == ch2) { if (ch1 == ch2) {
@ -551,7 +550,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
{ {
switch (character_class) { switch (character_class) {
case CharClass::Alnum: case CharClass::Alnum:
if (isalnum(ch)) { if (is_ascii_alphanumeric(ch)) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -559,11 +558,11 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Alpha: case CharClass::Alpha:
if (isalpha(ch)) if (is_ascii_alpha(ch))
++state.string_position; ++state.string_position;
break; break;
case CharClass::Blank: case CharClass::Blank:
if (ch == ' ' || ch == '\t') { if (is_ascii_blank(ch)) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -571,7 +570,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Cntrl: case CharClass::Cntrl:
if (iscntrl(ch)) { if (is_ascii_control(ch)) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -579,7 +578,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Digit: case CharClass::Digit:
if (isdigit(ch)) { if (is_ascii_digit(ch)) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -587,7 +586,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Graph: case CharClass::Graph:
if (isgraph(ch)) { if (is_ascii_graphical(ch)) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -595,7 +594,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Lower: case CharClass::Lower:
if (islower(ch) || ((input.regex_options & AllFlags::Insensitive) && isupper(ch))) { if (is_ascii_lower_alpha(ch) || ((input.regex_options & AllFlags::Insensitive) && is_ascii_upper_alpha(ch))) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -603,7 +602,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Print: case CharClass::Print:
if (isprint(ch)) { if (is_ascii_printable(ch)) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -611,7 +610,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Punct: case CharClass::Punct:
if (ispunct(ch)) { if (is_ascii_punctuation(ch)) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -619,7 +618,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Space: case CharClass::Space:
if (isspace(ch)) { if (is_ascii_space(ch)) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -627,7 +626,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Upper: case CharClass::Upper:
if (isupper(ch) || ((input.regex_options & AllFlags::Insensitive) && islower(ch))) { if (is_ascii_upper_alpha(ch) || ((input.regex_options & AllFlags::Insensitive) && is_ascii_lower_alpha(ch))) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -635,7 +634,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Word: case CharClass::Word:
if (isalnum(ch) || ch == '_') { if (is_ascii_alphanumeric(ch) || ch == '_') {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -643,7 +642,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
break; break;
case CharClass::Xdigit: case CharClass::Xdigit:
if (isxdigit(ch)) { if (is_ascii_hex_digit(ch)) {
if (inverse) if (inverse)
inverse_matched = true; inverse_matched = true;
else else
@ -656,9 +655,9 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched) ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched)
{ {
if (input.regex_options & AllFlags::Insensitive) { if (input.regex_options & AllFlags::Insensitive) {
from = tolower(from); from = to_ascii_lowercase(from);
to = tolower(to); to = to_ascii_lowercase(to);
ch = tolower(ch); ch = to_ascii_lowercase(ch);
} }
if (ch >= from && ch <= to) { if (ch >= from && ch <= to) {
@ -689,7 +688,7 @@ const Vector<String> OpCode_Compare::variable_arguments_to_string(Optional<Match
if (compare_type == CharacterCompareType::Char) { if (compare_type == CharacterCompareType::Char) {
auto ch = m_bytecode->at(offset++); auto ch = m_bytecode->at(offset++);
auto is_ascii = isascii(ch) && isprint(ch); auto is_ascii = is_ascii_printable(ch);
if (is_ascii) if (is_ascii)
result.empend(String::formatted("value='{:c}'", static_cast<char>(ch))); result.empend(String::formatted("value='{:c}'", static_cast<char>(ch)));
else else

View file

@ -4,11 +4,11 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/SourceLocation.h> #include <AK/SourceLocation.h>
#include <AK/Vector.h> #include <AK/Vector.h>
#include <LibTextCodec/Decoder.h> #include <LibTextCodec/Decoder.h>
#include <LibWeb/CSS/Parser/Tokenizer.h> #include <LibWeb/CSS/Parser/Tokenizer.h>
#include <ctype.h>
#define CSS_TOKENIZER_TRACE 0 #define CSS_TOKENIZER_TRACE 0
@ -20,11 +20,6 @@ static inline void log_parse_error(const SourceLocation& location = SourceLocati
dbgln_if(CSS_TOKENIZER_TRACE, "Parse error (css tokenization) {} ", location); dbgln_if(CSS_TOKENIZER_TRACE, "Parse error (css tokenization) {} ", location);
} }
static inline bool is_surrogate(u32 code_point)
{
return (code_point & 0xfffff800) == 0xd800;
}
static inline bool is_quotation_mark(u32 code_point) static inline bool is_quotation_mark(u32 code_point)
{ {
return code_point == 0x22; return code_point == 0x22;
@ -35,24 +30,14 @@ static inline bool is_greater_than_maximum_allowed_code_point(u32 code_point)
return code_point > 0x10FFFF; return code_point > 0x10FFFF;
} }
static inline bool is_hex_digit(u32 code_point)
{
return isxdigit(code_point);
}
static inline bool is_low_line(u32 code_point) static inline bool is_low_line(u32 code_point)
{ {
return code_point == 0x5F; return code_point == 0x5F;
} }
static inline bool is_non_ascii(u32 code_point)
{
return code_point >= 0x80;
}
static inline bool is_name_start_code_point(u32 code_point) static inline bool is_name_start_code_point(u32 code_point)
{ {
return isalpha(code_point) || is_non_ascii(code_point) || is_low_line(code_point); return is_ascii_alpha(code_point) || !is_ascii(code_point) || is_low_line(code_point);
} }
static inline bool is_hyphen_minus(u32 code_point) static inline bool is_hyphen_minus(u32 code_point)
@ -62,7 +47,7 @@ static inline bool is_hyphen_minus(u32 code_point)
static inline bool is_name_code_point(u32 code_point) static inline bool is_name_code_point(u32 code_point)
{ {
return is_name_start_code_point(code_point) || isdigit(code_point) || is_hyphen_minus(code_point); return is_name_start_code_point(code_point) || is_ascii_digit(code_point) || is_hyphen_minus(code_point);
} }
static inline bool is_non_printable(u32 code_point) static inline bool is_non_printable(u32 code_point)
@ -303,12 +288,12 @@ u32 Tokenizer::consume_escaped_code_point()
auto input = code_point.value(); auto input = code_point.value();
if (is_hex_digit(input)) { if (is_ascii_hex_digit(input)) {
StringBuilder builder; StringBuilder builder;
builder.append_code_point(input); builder.append_code_point(input);
size_t counter = 0; size_t counter = 0;
while (is_hex_digit(peek_code_point().value()) && counter++ < 5) { while (is_ascii_hex_digit(peek_code_point().value()) && counter++ < 5) {
builder.append_code_point(next_code_point().value()); builder.append_code_point(next_code_point().value());
} }
@ -317,7 +302,7 @@ u32 Tokenizer::consume_escaped_code_point()
} }
auto unhexed = strtoul(builder.to_string().characters(), nullptr, 16); auto unhexed = strtoul(builder.to_string().characters(), nullptr, 16);
if (unhexed == 0 || is_surrogate(unhexed) || is_greater_than_maximum_allowed_code_point(unhexed)) { if (unhexed == 0 || is_unicode_surrogate(unhexed) || is_greater_than_maximum_allowed_code_point(unhexed)) {
return REPLACEMENT_CHARACTER; return REPLACEMENT_CHARACTER;
} }
@ -378,14 +363,14 @@ CSSNumber Tokenizer::consume_a_number()
for (;;) { for (;;) {
auto digits = peek_code_point().value(); auto digits = peek_code_point().value();
if (!isdigit(digits)) if (!is_ascii_digit(digits))
break; break;
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
} }
auto maybe_number = peek_twin().value(); auto maybe_number = peek_twin().value();
if (is_full_stop(maybe_number.first) && isdigit(maybe_number.second)) { if (is_full_stop(maybe_number.first) && is_ascii_digit(maybe_number.second)) {
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
@ -393,7 +378,7 @@ CSSNumber Tokenizer::consume_a_number()
for (;;) { for (;;) {
auto digits = peek_code_point(); auto digits = peek_code_point();
if (digits.has_value() && !isdigit(digits.value())) if (digits.has_value() && !is_ascii_digit(digits.value()))
break; break;
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
@ -403,12 +388,12 @@ CSSNumber Tokenizer::consume_a_number()
auto maybe_exp = peek_triplet().value(); auto maybe_exp = peek_triplet().value();
if (is_E(maybe_exp.first) || is_e(maybe_exp.first)) { if (is_E(maybe_exp.first) || is_e(maybe_exp.first)) {
if (is_plus_sign(maybe_exp.second) || is_hyphen_minus(maybe_exp.second)) { if (is_plus_sign(maybe_exp.second) || is_hyphen_minus(maybe_exp.second)) {
if (isdigit(maybe_exp.third)) { if (is_ascii_digit(maybe_exp.third)) {
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
} }
} else if (isdigit(maybe_exp.second)) { } else if (is_ascii_digit(maybe_exp.second)) {
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
} }
@ -417,7 +402,7 @@ CSSNumber Tokenizer::consume_a_number()
for (;;) { for (;;) {
auto digits = peek_code_point().value(); auto digits = peek_code_point().value();
if (!isdigit(digits)) if (!is_ascii_digit(digits))
break; break;
repr.append_code_point(next_code_point().value()); repr.append_code_point(next_code_point().value());
@ -588,19 +573,19 @@ bool Tokenizer::starts_with_a_number() const
bool Tokenizer::starts_with_a_number(U32Triplet values) bool Tokenizer::starts_with_a_number(U32Triplet values)
{ {
if (is_plus_sign(values.first) || is_hyphen_minus(values.first)) { if (is_plus_sign(values.first) || is_hyphen_minus(values.first)) {
if (isdigit(values.second)) if (is_ascii_digit(values.second))
return true; return true;
if (is_full_stop(values.second) && isdigit(values.third)) if (is_full_stop(values.second) && is_ascii_digit(values.third))
return true; return true;
return false; return false;
} }
if (is_full_stop(values.first)) if (is_full_stop(values.first))
return isdigit(values.second); return is_ascii_digit(values.second);
if (isdigit(values.first)) if (is_ascii_digit(values.first))
return true; return true;
return false; return false;
@ -902,7 +887,7 @@ Token Tokenizer::consume_a_token()
return create_new_token(Token::TokenType::CloseCurly); return create_new_token(Token::TokenType::CloseCurly);
} }
if (isdigit(input)) { if (is_ascii_digit(input)) {
dbgln_if(CSS_TOKENIZER_TRACE, "is digit"); dbgln_if(CSS_TOKENIZER_TRACE, "is digit");
reconsume_current_input_code_point(); reconsume_current_input_code_point();
return consume_a_numeric_token(); return consume_a_numeric_token();

View file

@ -6,6 +6,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
#include <AK/Utf8View.h> #include <AK/Utf8View.h>
#include <LibCore/Timer.h> #include <LibCore/Timer.h>
@ -53,7 +54,6 @@
#include <LibWeb/Page/BrowsingContext.h> #include <LibWeb/Page/BrowsingContext.h>
#include <LibWeb/SVG/TagNames.h> #include <LibWeb/SVG/TagNames.h>
#include <LibWeb/UIEvents/MouseEvent.h> #include <LibWeb/UIEvents/MouseEvent.h>
#include <ctype.h>
namespace Web::DOM { namespace Web::DOM {
@ -253,7 +253,7 @@ String Document::title() const
StringBuilder builder; StringBuilder builder;
bool last_was_space = false; bool last_was_space = false;
for (auto code_point : Utf8View(raw_title)) { for (auto code_point : Utf8View(raw_title)) {
if (isspace(code_point)) { if (is_ascii_space(code_point)) {
last_was_space = true; last_was_space = true;
} else { } else {
if (last_was_space && !builder.is_empty()) if (last_was_space && !builder.is_empty())

View file

@ -4,13 +4,13 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/Debug.h> #include <AK/Debug.h>
#include <AK/SourceLocation.h> #include <AK/SourceLocation.h>
#include <LibTextCodec/Decoder.h> #include <LibTextCodec/Decoder.h>
#include <LibWeb/HTML/Parser/Entities.h> #include <LibWeb/HTML/Parser/Entities.h>
#include <LibWeb/HTML/Parser/HTMLToken.h> #include <LibWeb/HTML/Parser/HTMLToken.h>
#include <LibWeb/HTML/Parser/HTMLTokenizer.h> #include <LibWeb/HTML/Parser/HTMLTokenizer.h>
#include <ctype.h>
#include <string.h> #include <string.h>
namespace Web::HTML { namespace Web::HTML {
@ -93,25 +93,25 @@ namespace Web::HTML {
if (!current_input_character.has_value()) if (!current_input_character.has_value())
#define ON_ASCII_ALPHA \ #define ON_ASCII_ALPHA \
if (current_input_character.has_value() && isalpha(current_input_character.value())) if (current_input_character.has_value() && is_ascii_alpha(current_input_character.value()))
#define ON_ASCII_ALPHANUMERIC \ #define ON_ASCII_ALPHANUMERIC \
if (current_input_character.has_value() && isalnum(current_input_character.value())) if (current_input_character.has_value() && is_ascii_alphanumeric(current_input_character.value()))
#define ON_ASCII_UPPER_ALPHA \ #define ON_ASCII_UPPER_ALPHA \
if (current_input_character.has_value() && current_input_character.value() >= 'A' && current_input_character.value() <= 'Z') if (current_input_character.has_value() && is_ascii_upper_alpha(current_input_character.value()))
#define ON_ASCII_LOWER_ALPHA \ #define ON_ASCII_LOWER_ALPHA \
if (current_input_character.has_value() && current_input_character.value() >= 'a' && current_input_character.value() <= 'z') if (current_input_character.has_value() && is_ascii_lower_alpha(current_input_character.value()))
#define ON_ASCII_DIGIT \ #define ON_ASCII_DIGIT \
if (current_input_character.has_value() && isdigit(current_input_character.value())) if (current_input_character.has_value() && is_ascii_digit(current_input_character.value()))
#define ON_ASCII_HEX_DIGIT \ #define ON_ASCII_HEX_DIGIT \
if (current_input_character.has_value() && isxdigit(current_input_character.value())) if (current_input_character.has_value() && is_ascii_hex_digit(current_input_character.value()))
#define ON_WHITESPACE \ #define ON_WHITESPACE \
if (current_input_character.has_value() && strchr("\t\n\f ", current_input_character.value())) if (current_input_character.has_value() && is_ascii(current_input_character.value()) && "\t\n\f "sv.contains(current_input_character.value()))
#define ANYTHING_ELSE if (1) #define ANYTHING_ELSE if (1)
@ -172,26 +172,6 @@ static inline void log_parse_error(const SourceLocation& location = SourceLocati
dbgln_if(TOKENIZER_TRACE_DEBUG, "Parse error (tokenization) {}", location); dbgln_if(TOKENIZER_TRACE_DEBUG, "Parse error (tokenization) {}", location);
} }
static inline bool is_surrogate(u32 code_point)
{
return (code_point & 0xfffff800) == 0xd800;
}
static inline bool is_noncharacter(u32 code_point)
{
return code_point >= 0xfdd0 && (code_point <= 0xfdef || (code_point & 0xfffe) == 0xfffe) && code_point <= 0x10ffff;
}
static inline bool is_c0_control(u32 code_point)
{
return code_point <= 0x1f;
}
static inline bool is_control(u32 code_point)
{
return is_c0_control(code_point) || (code_point >= 0x7f && code_point <= 0x9f);
}
Optional<u32> HTMLTokenizer::next_code_point() Optional<u32> HTMLTokenizer::next_code_point()
{ {
if (m_utf8_iterator == m_utf8_view.end()) if (m_utf8_iterator == m_utf8_view.end())
@ -322,7 +302,7 @@ _StartOfFunction:
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_current_token.m_end_position = nth_last_position(0); m_current_token.m_end_position = nth_last_position(0);
continue; continue;
} }
@ -458,7 +438,7 @@ _StartOfFunction:
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
create_new_token(HTMLToken::Type::DOCTYPE); create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.name.append(tolower(current_input_character.value())); m_current_token.m_doctype.name.append(to_ascii_lowercase(current_input_character.value()));
m_current_token.m_doctype.missing_name = false; m_current_token.m_doctype.missing_name = false;
SWITCH_TO(DOCTYPEName); SWITCH_TO(DOCTYPEName);
} }
@ -507,7 +487,7 @@ _StartOfFunction:
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_current_token.m_doctype.name.append(tolower(current_input_character.value())); m_current_token.m_doctype.name.append(to_ascii_lowercase(current_input_character.value()));
continue; continue;
} }
ON(0) ON(0)
@ -550,10 +530,10 @@ _StartOfFunction:
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
if (toupper(current_input_character.value()) == 'P' && consume_next_if_match("UBLIC", CaseSensitivity::CaseInsensitive)) { if (to_ascii_uppercase(current_input_character.value()) == 'P' && consume_next_if_match("UBLIC", CaseSensitivity::CaseInsensitive)) {
SWITCH_TO(AfterDOCTYPEPublicKeyword); SWITCH_TO(AfterDOCTYPEPublicKeyword);
} }
if (toupper(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) { if (to_ascii_uppercase(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) {
SWITCH_TO(AfterDOCTYPESystemKeyword); SWITCH_TO(AfterDOCTYPESystemKeyword);
} }
log_parse_error(); log_parse_error();
@ -1068,7 +1048,7 @@ _StartOfFunction:
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_current_token.m_tag.attributes.last().local_name_builder.append_code_point(tolower(current_input_character.value())); m_current_token.m_tag.attributes.last().local_name_builder.append_code_point(to_ascii_lowercase(current_input_character.value()));
continue; continue;
} }
ON(0) ON(0)
@ -1558,7 +1538,7 @@ _StartOfFunction:
if (consumed_as_part_of_an_attribute() && !match.value().entity.ends_with(';')) { if (consumed_as_part_of_an_attribute() && !match.value().entity.ends_with(';')) {
auto next_code_point = peek_code_point(0); auto next_code_point = peek_code_point(0);
if (next_code_point.has_value() && (next_code_point.value() == '=' || isalnum(next_code_point.value()))) { if (next_code_point.has_value() && (next_code_point.value() == '=' || is_ascii_alphanumeric(next_code_point.value()))) {
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO_RETURN_STATE; SWITCH_TO_RETURN_STATE;
} }
@ -1720,14 +1700,14 @@ _StartOfFunction:
log_parse_error(); log_parse_error();
m_character_reference_code = 0xFFFD; m_character_reference_code = 0xFFFD;
} }
if (is_surrogate(m_character_reference_code)) { if (is_unicode_surrogate(m_character_reference_code)) {
log_parse_error(); log_parse_error();
m_character_reference_code = 0xFFFD; m_character_reference_code = 0xFFFD;
} }
if (is_noncharacter(m_character_reference_code)) { if (is_unicode_noncharacter(m_character_reference_code)) {
log_parse_error(); log_parse_error();
} }
if (m_character_reference_code == 0xd || (is_control(m_character_reference_code) && !isspace(m_character_reference_code))) { if (m_character_reference_code == 0xd || (is_unicode_control(m_character_reference_code) && !is_ascii_space(m_character_reference_code))) {
log_parse_error(); log_parse_error();
constexpr struct { constexpr struct {
u32 number; u32 number;
@ -1870,7 +1850,7 @@ _StartOfFunction:
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_temporary_buffer.append(current_input_character.value()); m_temporary_buffer.append(current_input_character.value());
continue; continue;
} }
@ -1980,7 +1960,7 @@ _StartOfFunction:
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_temporary_buffer.append(current_input_character.value()); m_temporary_buffer.append(current_input_character.value());
continue; continue;
} }
@ -2193,7 +2173,7 @@ _StartOfFunction:
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_temporary_buffer.append(current_input_character.value()); m_temporary_buffer.append(current_input_character.value());
continue; continue;
} }
@ -2247,7 +2227,7 @@ _StartOfFunction:
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_temporary_buffer.append(tolower(current_input_character.value())); m_temporary_buffer.append(to_ascii_lowercase(current_input_character.value()));
EMIT_CURRENT_CHARACTER; EMIT_CURRENT_CHARACTER;
} }
ON_ASCII_LOWER_ALPHA ON_ASCII_LOWER_ALPHA
@ -2393,7 +2373,7 @@ _StartOfFunction:
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_temporary_buffer.append(tolower(current_input_character.value())); m_temporary_buffer.append(to_ascii_lowercase(current_input_character.value()));
EMIT_CURRENT_CHARACTER; EMIT_CURRENT_CHARACTER;
} }
ON_ASCII_LOWER_ALPHA ON_ASCII_LOWER_ALPHA
@ -2512,7 +2492,7 @@ _StartOfFunction:
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_temporary_buffer.append(current_input_character.value()); m_temporary_buffer.append(current_input_character.value());
continue; continue;
} }
@ -2598,7 +2578,7 @@ bool HTMLTokenizer::consume_next_if_match(const StringView& string, CaseSensitiv
// FIXME: This should be more Unicode-aware. // FIXME: This should be more Unicode-aware.
if (case_sensitivity == CaseSensitivity::CaseInsensitive) { if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
if (code_point.value() < 0x80) { if (code_point.value() < 0x80) {
if (tolower(code_point.value()) != tolower(string[i])) if (to_ascii_lowercase(code_point.value()) != to_ascii_lowercase(string[i]))
return false; return false;
continue; continue;
} }

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/ScopeGuard.h> #include <AK/ScopeGuard.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
#include <LibGfx/Painter.h> #include <LibGfx/Painter.h>
@ -13,7 +14,6 @@
#include <LibWeb/Layout/Label.h> #include <LibWeb/Layout/Label.h>
#include <LibWeb/Layout/TextNode.h> #include <LibWeb/Layout/TextNode.h>
#include <LibWeb/Page/BrowsingContext.h> #include <LibWeb/Page/BrowsingContext.h>
#include <ctype.h>
namespace Web::Layout { namespace Web::Layout {
@ -30,7 +30,7 @@ TextNode::~TextNode()
static bool is_all_whitespace(const StringView& string) static bool is_all_whitespace(const StringView& string)
{ {
for (size_t i = 0; i < string.length(); ++i) { for (size_t i = 0; i < string.length(); ++i) {
if (!isspace(string[i])) if (!is_ascii_space(string[i]))
return false; return false;
} }
return true; return true;
@ -116,7 +116,7 @@ void TextNode::compute_text_for_rendering(bool collapse, bool previous_is_empty_
auto it = utf8_view.begin(); auto it = utf8_view.begin();
auto skip_over_whitespace = [&] { auto skip_over_whitespace = [&] {
auto prev = it; auto prev = it;
while (it != utf8_view.end() && isspace(*it)) { while (it != utf8_view.end() && is_ascii_space(*it)) {
prev = it; prev = it;
++it; ++it;
} }
@ -125,7 +125,7 @@ void TextNode::compute_text_for_rendering(bool collapse, bool previous_is_empty_
if (previous_is_empty_or_ends_in_whitespace) if (previous_is_empty_or_ends_in_whitespace)
skip_over_whitespace(); skip_over_whitespace();
for (; it != utf8_view.end(); ++it) { for (; it != utf8_view.end(); ++it) {
if (!isspace(*it)) { if (!is_ascii_space(*it)) {
builder.append(utf8_view.as_string().characters_without_null_termination() + utf8_view.byte_offset_of(it), it.code_point_length_in_bytes()); builder.append(utf8_view.as_string().characters_without_null_termination() + utf8_view.byte_offset_of(it), it.code_point_length_in_bytes());
} else { } else {
builder.append(' '); builder.append(' ');
@ -160,7 +160,7 @@ void TextNode::split_into_lines_by_rules(InlineFormattingContext& context, Layou
float chunk_width; float chunk_width;
if (do_wrap_lines) { if (do_wrap_lines) {
if (do_collapse && isspace(*chunk.view.begin()) && line_boxes.last().is_empty_or_ends_in_whitespace()) { if (do_collapse && is_ascii_space(*chunk.view.begin()) && line_boxes.last().is_empty_or_ends_in_whitespace()) {
// This is a non-empty chunk that starts with collapsible whitespace. // This is a non-empty chunk that starts with collapsible whitespace.
// We are at either at the start of a new line, or after something that ended in whitespace, // We are at either at the start of a new line, or after something that ended in whitespace,
// so we don't need to contribute our own whitespace to the line. Skip over it instead! // so we don't need to contribute our own whitespace to the line. Skip over it instead!
@ -264,7 +264,7 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
, m_start_of_chunk(m_utf8_view.begin()) , m_start_of_chunk(m_utf8_view.begin())
, m_iterator(m_utf8_view.begin()) , m_iterator(m_utf8_view.begin())
{ {
m_last_was_space = !text.is_empty() && isspace(*m_utf8_view.begin()); m_last_was_space = !text.is_empty() && is_ascii_space(*m_utf8_view.begin());
} }
Optional<TextNode::Chunk> TextNode::ChunkIterator::next() Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
@ -286,7 +286,7 @@ Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
return result.release_value(); return result.release_value();
} }
if (m_wrap_lines) { if (m_wrap_lines) {
bool is_space = isspace(*m_iterator); bool is_space = is_ascii_space(*m_iterator);
if (is_space != m_last_was_space) { if (is_space != m_last_was_space) {
m_last_was_space = is_space; m_last_was_space = is_space;
if (auto result = try_commit_chunk(m_iterator, false); result.has_value()) if (auto result = try_commit_chunk(m_iterator, false); result.has_value())

View file

@ -12,6 +12,7 @@
#include "Screen.h" #include "Screen.h"
#include "Window.h" #include "Window.h"
#include "WindowManager.h" #include "WindowManager.h"
#include <AK/CharacterTypes.h>
#include <LibGfx/Bitmap.h> #include <LibGfx/Bitmap.h>
#include <LibGfx/CharacterBitmap.h> #include <LibGfx/CharacterBitmap.h>
#include <LibGfx/Font.h> #include <LibGfx/Font.h>
@ -20,7 +21,6 @@
#include <LibGfx/Triangle.h> #include <LibGfx/Triangle.h>
#include <WindowServer/ClientConnection.h> #include <WindowServer/ClientConnection.h>
#include <WindowServer/WindowClientEndpoint.h> #include <WindowServer/WindowClientEndpoint.h>
#include <ctype.h>
namespace WindowServer { namespace WindowServer {
@ -631,14 +631,14 @@ void Menu::set_visible(bool visible)
void Menu::add_item(NonnullOwnPtr<MenuItem> item) void Menu::add_item(NonnullOwnPtr<MenuItem> item)
{ {
if (auto alt_shortcut = find_ampersand_shortcut_character(item->text())) { if (auto alt_shortcut = find_ampersand_shortcut_character(item->text())) {
m_alt_shortcut_character_to_item_indices.ensure(tolower(alt_shortcut)).append(m_items.size()); m_alt_shortcut_character_to_item_indices.ensure(to_ascii_lowercase(alt_shortcut)).append(m_items.size());
} }
m_items.append(move(item)); m_items.append(move(item));
} }
const Vector<size_t>* Menu::items_with_alt_shortcut(u32 alt_shortcut) const const Vector<size_t>* Menu::items_with_alt_shortcut(u32 alt_shortcut) const
{ {
auto it = m_alt_shortcut_character_to_item_indices.find(tolower(alt_shortcut)); auto it = m_alt_shortcut_character_to_item_indices.find(to_ascii_lowercase(alt_shortcut));
if (it == m_alt_shortcut_character_to_item_indices.end()) if (it == m_alt_shortcut_character_to_item_indices.end())
return nullptr; return nullptr;
return &it->value; return &it->value;

View file

@ -13,8 +13,8 @@
#include "Screen.h" #include "Screen.h"
#include "WindowManager.h" #include "WindowManager.h"
#include <AK/Badge.h> #include <AK/Badge.h>
#include <AK/CharacterTypes.h>
#include <WindowServer/WindowClientEndpoint.h> #include <WindowServer/WindowClientEndpoint.h>
#include <ctype.h>
namespace WindowServer { namespace WindowServer {
@ -461,7 +461,7 @@ void Window::handle_keydown_event(const KeyEvent& event)
if (event.modifiers() == Mod_Alt && event.code_point() && menubar()) { if (event.modifiers() == Mod_Alt && event.code_point() && menubar()) {
Menu* menu_to_open = nullptr; Menu* menu_to_open = nullptr;
menubar()->for_each_menu([&](Menu& menu) { menubar()->for_each_menu([&](Menu& menu) {
if (tolower(menu.alt_shortcut_character()) == tolower(event.code_point())) { if (to_ascii_lowercase(menu.alt_shortcut_character()) == to_ascii_lowercase(event.code_point())) {
menu_to_open = &menu; menu_to_open = &menu;
return IterationDecision::Break; return IterationDecision::Break;
} }

View file

@ -7,6 +7,7 @@
#include "Shell.h" #include "Shell.h"
#include "Execution.h" #include "Execution.h"
#include "Formatter.h" #include "Formatter.h"
#include <AK/CharacterTypes.h>
#include <AK/Debug.h> #include <AK/Debug.h>
#include <AK/Function.h> #include <AK/Function.h>
#include <AK/LexicalPath.h> #include <AK/LexicalPath.h>
@ -1188,10 +1189,8 @@ Shell::SpecialCharacterEscapeMode Shell::special_character_escape_mode(u32 code_
return SpecialCharacterEscapeMode::QuotedAsEscape; return SpecialCharacterEscapeMode::QuotedAsEscape;
default: default:
// FIXME: Should instead use unicode's "graphic" property (categories L, M, N, P, S, Zs) // FIXME: Should instead use unicode's "graphic" property (categories L, M, N, P, S, Zs)
if (code_point < NumericLimits<i32>::max()) { if (is_ascii(code_point))
if (isascii(static_cast<i32>(code_point))) return is_ascii_printable(code_point) ? SpecialCharacterEscapeMode::Untouched : SpecialCharacterEscapeMode::QuotedAsHex;
return isprint(static_cast<i32>(code_point)) ? SpecialCharacterEscapeMode::Untouched : SpecialCharacterEscapeMode::QuotedAsHex;
}
return SpecialCharacterEscapeMode::Untouched; return SpecialCharacterEscapeMode::Untouched;
} }
} }