From b999f925dcfb0417c534e1a21aa74626a06b466b Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Sun, 20 Oct 2024 13:50:39 +1300 Subject: [PATCH] LibWeb: Allow splitting surrogate pairs in CharacterData.substringData() --- ...Data-substringData-break-surrogate-pair.txt | 5 +++++ ...ata-substringData-break-surrogate-pair.html | 18 ++++++++++++++++++ .../Libraries/LibWeb/DOM/CharacterData.cpp | 4 ++-- 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 Tests/LibWeb/Text/expected/DOM/CharacterData-substringData-break-surrogate-pair.txt create mode 100644 Tests/LibWeb/Text/input/DOM/CharacterData-substringData-break-surrogate-pair.html diff --git a/Tests/LibWeb/Text/expected/DOM/CharacterData-substringData-break-surrogate-pair.txt b/Tests/LibWeb/Text/expected/DOM/CharacterData-substringData-break-surrogate-pair.txt new file mode 100644 index 00000000000..6a8d5b1c046 --- /dev/null +++ b/Tests/LibWeb/Text/expected/DOM/CharacterData-substringData-break-surrogate-pair.txt @@ -0,0 +1,5 @@ +Before substringData: +[0]: 55357 +[1]: 56374 +After substringData(0, 1): +[0]: 55357 diff --git a/Tests/LibWeb/Text/input/DOM/CharacterData-substringData-break-surrogate-pair.html b/Tests/LibWeb/Text/input/DOM/CharacterData-substringData-break-surrogate-pair.html new file mode 100644 index 00000000000..fb8328b1e85 --- /dev/null +++ b/Tests/LibWeb/Text/input/DOM/CharacterData-substringData-break-surrogate-pair.html @@ -0,0 +1,18 @@ + + diff --git a/Userland/Libraries/LibWeb/DOM/CharacterData.cpp b/Userland/Libraries/LibWeb/DOM/CharacterData.cpp index 07018dcd7ce..44c8080ea82 100644 --- a/Userland/Libraries/LibWeb/DOM/CharacterData.cpp +++ b/Userland/Libraries/LibWeb/DOM/CharacterData.cpp @@ -57,10 +57,10 @@ WebIDL::ExceptionOr CharacterData::substring_data(size_t offset, size_t // 3. If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit // to the end of node’s data, and then return. if (offset + count > length) - return MUST(utf16_view.substring_view(offset).to_utf8()); + return MUST(utf16_view.substring_view(offset).to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); // 4. Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data. - return MUST(utf16_view.substring_view(offset, count).to_utf8()); + return MUST(utf16_view.substring_view(offset, count).to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); } // https://dom.spec.whatwg.org/#concept-cd-replace