LibUnicode+LibJS: Normalize spaces in formatted date-time strings

ICU 72 began using non-ASCII spaces in some formatted date-time strings.
Every major browser has found that this introduced major breakage in web
compatibility, as many sites and tools expect ASCII spaces. This patch
removes these non-ASCII spaces in the same manner as the major engines.
Such behavior is also tested by WPT.
This commit is contained in:
Timothy Flynn 2024-08-01 11:30:17 -04:00 committed by Andreas Kling
commit ee00730225
Notes: github-actions[bot] 2024-08-02 06:08:16 +00:00
7 changed files with 112 additions and 97 deletions

View file

@ -732,6 +732,7 @@ public:
if (icu_failure(status))
return {};
normalize_spaces(formatted_time);
return icu_string_to_string(formatted_time);
}
@ -750,6 +751,8 @@ public:
if (icu_failure(status))
return {};
normalize_spaces(formatted_time);
icu::ConstrainedFieldPosition position;
i32 previous_end_index = 0;
@ -802,6 +805,7 @@ private:
if (icu_failure(status))
return {};
normalize_spaces(formatted_time);
return formatted_time;
}
@ -842,6 +846,23 @@ private:
return formatted;
}
// ICU 72 introduced the use of NBSP to separate time fields and day periods. All major browsers have found that
// this significantly breaks web compatibilty, and they all replace these spaces with normal ASCII spaces. See:
//
// https://bugzilla.mozilla.org/show_bug.cgi?id=1806042
// https://bugs.webkit.org/show_bug.cgi?id=252147
// https://issues.chromium.org/issues/40256057
static void normalize_spaces(icu::UnicodeString& string)
{
static char16_t NARROW_NO_BREAK_SPACE = 0x202f;
static char16_t THIN_SPACE = 0x2009;
for (i32 i = 0; i < string.length(); ++i) {
if (string[i] == NARROW_NO_BREAK_SPACE || string[i] == THIN_SPACE)
string.setCharAt(i, ' ');
}
}
icu::Locale& m_locale;
CalendarPattern m_pattern;