/* * Copyright (c) 2023, Tim Schumacher * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include namespace Compress { ErrorOr XzMultibyteInteger::read_from_stream(Stream& stream) { // 1.2. Multibyte Integers: // "When smaller values are more likely than bigger values (for // example file sizes), multibyte integers are encoded in a // variable-length representation: // - Numbers in the range [0, 127] are copied as is, and take // one byte of space. // - Bigger numbers will occupy two or more bytes. All but the // last byte of the multibyte representation have the highest // (eighth) bit set." // 9 * 7 bits is 63 bits, which is the largest that will fit into an u64. constexpr size_t maximum_number_of_bytes = 9; u64 result = 0; for (size_t i = 0; i < maximum_number_of_bytes; i++) { u64 const next_byte = TRY(stream.read_value()); result |= (next_byte & 0x7F) << (i * 7); // We should reject numbers that are encoded in too many bytes. if (next_byte == 0x00 && i != 0) return Error::from_string_literal("XZ multibyte integer has a larger encoding than necessary"); if ((next_byte & 0x80) == 0) break; } return XzMultibyteInteger { result }; } ErrorOr XzStreamHeader::validate() const { // 2.1.1.1. Header Magic Bytes: // "The first six (6) bytes of the Stream are so called Header // Magic Bytes. They can be used to identify the file type. // // Using a C array and ASCII: // const uint8_t HEADER_MAGIC[6] // = { 0xFD, '7', 'z', 'X', 'Z', 0x00 }; // // In plain hexadecimal: // FD 37 7A 58 5A 00 // // If the Header Magic Bytes don't match, the decoder MUST // indicate an error." if (magic[0] != 0xFD || magic[1] != '7' || magic[2] != 'z' || magic[3] != 'X' || magic[4] != 'Z' || magic[5] != 0x00) return Error::from_string_literal("XZ stream header has an invalid magic"); // 2.1.1.2. Stream Flags: // "If any reserved bit is set, the decoder MUST indicate an error. // It is possible that there is a new field present which the // decoder is not aware of, and can thus parse the Stream Header // incorrectly." if (flags.reserved != 0 || flags.reserved_bits != 0) return Error::from_string_literal("XZ stream header has reserved non-null stream flag bits"); // 2.1.1.3. CRC32: // "The CRC32 is calculated from the Stream Flags field. It is // stored as an unsigned 32-bit little endian integer. If the // calculated value does not match the stored one, the decoder // MUST indicate an error." if (Crypto::Checksum::CRC32({ &flags, sizeof(flags) }).digest() != flags_crc32) return Error::from_string_literal("XZ stream header has an invalid CRC32 checksum"); return {}; } ErrorOr XzStreamFooter::validate() const { // 2.1.2.1. CRC32: // "The CRC32 is calculated from the Backward Size and Stream Flags // fields. It is stored as an unsigned 32-bit little endian // integer. If the calculated value does not match the stored one, // the decoder MUST indicate an error." Crypto::Checksum::CRC32 calculated_crc32; calculated_crc32.update({ &encoded_backward_size, sizeof(encoded_backward_size) }); calculated_crc32.update({ &flags, sizeof(flags) }); if (calculated_crc32.digest() != size_and_flags_crc32) return Error::from_string_literal("XZ stream footer has an invalid CRC32 checksum"); // 2.1.2.4. Footer Magic Bytes: // "As the last step of the decoding process, the decoder MUST // verify the existence of Footer Magic Bytes. If they don't // match, an error MUST be indicated. // // Using a C array and ASCII: // const uint8_t FOOTER_MAGIC[2] = { 'Y', 'Z' }; // // In hexadecimal: // 59 5A" if (magic[0] != 'Y' || magic[1] != 'Z') return Error::from_string_literal("XZ stream footer has an invalid magic"); return {}; } u32 XzStreamFooter::backward_size() const { // 2.1.2.2. Backward Size: // "Backward Size is stored as a 32-bit little endian integer, // which indicates the size of the Index field as multiple of // four bytes, minimum value being four bytes: // // real_backward_size = (stored_backward_size + 1) * 4;" return (encoded_backward_size + 1) * 4; } size_t XzBlockFlags::number_of_filters() const { // 3.1.2. Block Flags: // "Bit(s) Mask Description // 0-1 0x03 Number of filters (1-4)" return encoded_number_of_filters + 1; } ErrorOr XzFilterLzma2Properties::validate() const { // 5.3.1. LZMA2: // "Bits Mask Description // 6-7 0xC0 Reserved for future use; MUST be zero for now." if (reserved != 0) return Error::from_string_literal("XZ LZMA2 filter properties contains non-null reserved bits"); // " const uint8_t bits = get_dictionary_flags() & 0x3F; // if (bits > 40) // return DICTIONARY_TOO_BIG; // Bigger than 4 GiB" if (encoded_dictionary_size > 40) return Error::from_string_literal("XZ LZMA2 filter properties contains larger-than-allowed dictionary size"); return {}; } u32 XzFilterLzma2Properties::dictionary_size() const { // "Dictionary Size is encoded with one-bit mantissa and five-bit // exponent. The smallest dictionary size is 4 KiB and the biggest // is 4 GiB. // Instead of having a table in the decoder, the dictionary size // can be decoded using the following C code:" if (encoded_dictionary_size == 40) return NumericLimits::max(); u32 dictionary_size = 2 | (encoded_dictionary_size & 1); dictionary_size <<= encoded_dictionary_size / 2 + 11; return dictionary_size; } u32 XzFilterDeltaProperties::distance() const { // "The Properties byte indicates the delta distance, which can be // 1-256 bytes backwards from the current byte: 0x00 indicates // distance of 1 byte and 0xFF distance of 256 bytes." return encoded_distance + 1; } ErrorOr> XzFilterDelta::create(MaybeOwned stream, u32 distance) { auto buffer = TRY(CircularBuffer::create_empty(distance)); auto filter = TRY(adopt_nonnull_own_or_enomem(new (nothrow) XzFilterDelta(move(stream), move(buffer)))); return filter; } XzFilterDelta::XzFilterDelta(MaybeOwned stream, CircularBuffer buffer) : m_stream(move(stream)) , m_buffer(move(buffer)) { } ErrorOr XzFilterDelta::read_some(Bytes bytes) { bytes = TRY(m_stream->read_some(bytes)); auto distance = m_buffer.capacity(); for (auto& byte : bytes) { if (m_buffer.seekback_limit() >= distance) { u8 byte_at_distance { 0 }; MUST(m_buffer.read_with_seekback({ &byte_at_distance, 1 }, distance)); byte = byte_at_distance + byte; } m_buffer.write({ &byte, 1 }); MUST(m_buffer.discard(1)); } return bytes; } ErrorOr XzFilterDelta::write_some(ReadonlyBytes) { return EBADF; } bool XzFilterDelta::is_eof() const { return m_stream->is_eof(); } bool XzFilterDelta::is_open() const { return m_stream->is_open(); } void XzFilterDelta::close() { } ErrorOr> XzFilterBCJArm64::create(MaybeOwned stream, u32 start_offset) { if (start_offset % INSTRUCTION_ALIGNMENT != 0) return Error::from_string_literal("XZ BCJ filter offset is not a multiple of the alignment"); auto counting_stream = CountingStream { move(stream) }; auto input_buffer = TRY(CircularBuffer::create_empty(INSTRUCTION_SIZE)); auto output_buffer = TRY(CircularBuffer::create_empty(INSTRUCTION_SIZE)); auto filter = TRY(adopt_nonnull_own_or_enomem(new (nothrow) XzFilterBCJArm64(move(counting_stream), start_offset, move(input_buffer), move(output_buffer)))); return filter; } XzFilterBCJArm64::XzFilterBCJArm64(CountingStream stream, u32 start_offset, CircularBuffer input_buffer, CircularBuffer output_buffer) : m_stream(move(stream)) , m_start_offset(start_offset) , m_input_buffer(move(input_buffer)) , m_output_buffer(move(output_buffer)) { } ErrorOr XzFilterBCJArm64::read_some(Bytes bytes) { if (m_output_buffer.used_space() > 0) { // If we still have buffered outgoing data, return that first. return m_output_buffer.read(bytes); } while (m_input_buffer.used_space() < INSTRUCTION_SIZE) { if (m_stream.is_eof()) { // If we can't get any more input data, dump the buffered contents unchanged. // We won't be able to assemble another instruction. return m_input_buffer.read(bytes); } TRY(m_input_buffer.fill_from_stream(m_stream)); } // The algorithm considers the offset of the current bytes to be the current program counter. u32 stream_offset = m_start_offset + m_stream.read_bytes() - m_input_buffer.used_space(); Array buffer; auto buffer_span = m_input_buffer.read(buffer); VERIFY(buffer_span.size() == INSTRUCTION_SIZE); if ((buffer[3] & 0b11111100) == 0b10010100) { // The ARM64 instruction manual notes that BL is encoded as the following in a little-endian byte order: // 100101XX XXXXXXX XXXXXXXX XXXXXXXX // X is an immediate 26 bit value designating the program counter offset divided by 4. stream_offset >>= 2; u32 program_counter = ((buffer[3] & 0b11) << 24) | (buffer[2] << 16) | (buffer[1] << 8) | buffer[0]; u32 program_counter_offset = program_counter - stream_offset; // Reassemble the instruction. buffer[3] = ((program_counter_offset >> 24) & 0b11) | 0b10010100; buffer[2] = program_counter_offset >> 16; buffer[1] = program_counter_offset >> 8; buffer[0] = program_counter_offset; } else if ((buffer[3] & 0b10011111) == 0b10010000) { // ADRP instructions are encoded in the following format: // 1XX10000 YYYYYYYY YYYYYYYY YYYZZZZZ // Y:X is an immediate 21 bit value designating the program counter offset divided by 4096 (i.e. a right shift by 12). // Z is the register number. stream_offset >>= 12; auto register_number = buffer[0] & 0b11111; u32 program_counter = (buffer[2] << 13) | (buffer[1] << 5) | ((buffer[0] >> 3) & 0b11100) | ((buffer[3] >> 5) & 0b11); // Only offsets between -512MiB and +512MiB are processed, which is suppsoed to reduce false-positives. // Note: The XZ reference implementation presents a human readable range, an unoptimized condition, and an optimized condition for this. // Since none of the three entirely match each other, our only option is to copy the exact formula that is used in practice. if (!((program_counter + 0x00020000) & 0x001C0000)) { u32 program_counter_offset = program_counter - stream_offset; // Clip the immediate to 18 bits, then sign-extend to 21 bits. program_counter_offset &= (1 << 18) - 1; program_counter_offset |= (0 - (program_counter_offset & (1 << 17))) & (0b111 << 18); // Reassemble the instruction. buffer[3] = ((program_counter_offset & 0b11) << 5) | 0b10010000; buffer[2] = program_counter_offset >> 13; buffer[1] = program_counter_offset >> 5; buffer[0] = ((program_counter_offset & 0b11100) << 3) | register_number; } } // Write what we can into the Span, put the rest into the output buffer. auto size_in_span = min(INSTRUCTION_SIZE, bytes.size()); bytes = bytes.trim(size_in_span); buffer.span().trim(size_in_span).copy_to(bytes); if (size_in_span < INSTRUCTION_SIZE) { auto bytes_written_to_buffer = m_output_buffer.write(buffer.span().slice(size_in_span)); VERIFY(bytes_written_to_buffer == INSTRUCTION_SIZE - size_in_span); } return bytes; } ErrorOr XzFilterBCJArm64::write_some(ReadonlyBytes) { return EBADF; } bool XzFilterBCJArm64::is_eof() const { return m_stream.is_eof(); } bool XzFilterBCJArm64::is_open() const { return m_stream.is_open(); } void XzFilterBCJArm64::close() { } ErrorOr> XzDecompressor::create(MaybeOwned stream) { auto counting_stream = TRY(try_make(move(stream))); auto decompressor = TRY(adopt_nonnull_own_or_enomem(new (nothrow) XzDecompressor(move(counting_stream)))); return decompressor; } XzDecompressor::XzDecompressor(NonnullOwnPtr stream) : m_stream(move(stream)) { } static Optional size_for_check_type(XzStreamCheckType check_type) { switch (check_type) { case XzStreamCheckType::None: return 0; case XzStreamCheckType::CRC32: return 4; case XzStreamCheckType::CRC64: return 8; case XzStreamCheckType::SHA256: return 32; default: return {}; } } ErrorOr XzDecompressor::load_next_stream() { // If we already determined to have found the last stream footer, there is nothing more to do. if (m_found_last_stream_footer) return false; // This assumes that we can just read the Stream Header into memory as-is. Check that this still holds up for good measure. static_assert(AK::Traits::is_trivially_serializable()); XzStreamHeader stream_header {}; Bytes stream_header_bytes { &stream_header, sizeof(stream_header) }; if (m_found_first_stream_header) { // 2.2. Stream Padding: // "Stream Padding MUST contain only null bytes. To preserve the // four-byte alignment of consecutive Streams, the size of Stream // Padding MUST be a multiple of four bytes. Empty Stream Padding // is allowed. If these requirements are not met, the decoder MUST // indicate an error." VERIFY(m_stream->read_bytes() % 4 == 0); while (true) { // Read the first byte until we either get a non-null byte or reach EOF. auto byte_or_error = m_stream->read_value(); if (byte_or_error.is_error() && m_stream->is_eof()) break; auto byte = TRY(byte_or_error); if (byte != 0) { stream_header_bytes[0] = byte; stream_header_bytes = stream_header_bytes.slice(1); break; } } // If we aren't at EOF we already read the potential first byte of the header, so we need to subtract that. auto end_of_padding_offset = m_stream->read_bytes(); if (!m_stream->is_eof()) end_of_padding_offset -= 1; if (end_of_padding_offset % 4 != 0) return Error::from_string_literal("XZ Stream Padding is not aligned to 4 bytes"); if (m_stream->is_eof()) { m_found_last_stream_footer = true; return false; } } TRY(m_stream->read_until_filled(stream_header_bytes)); TRY(stream_header.validate()); m_stream_flags = stream_header.flags; m_found_first_stream_header = true; return true; } ErrorOr XzDecompressor::load_next_block(u8 encoded_block_header_size) { // We already read the encoded Block Header size (one byte) to determine that this is not an Index. m_current_block_start_offset = m_stream->read_bytes() - 1; // Ensure that the start of the block is aligned to a multiple of four (in theory, everything in XZ is). VERIFY(m_current_block_start_offset % 4 == 0); // 3.1.1. Block Header Size: // "This field contains the size of the Block Header field, // including the Block Header Size field itself. Valid values are // in the range [0x01, 0xFF], which indicate the size of the Block // Header as multiples of four bytes, minimum size being eight // bytes: // // real_header_size = (encoded_header_size + 1) * 4;" u64 const block_header_size = (encoded_block_header_size + 1) * 4; // Read the whole header into a buffer to allow calculating the CRC32 later (3.1.7. CRC32). auto header = TRY(ByteBuffer::create_uninitialized(block_header_size)); header[0] = encoded_block_header_size; TRY(m_stream->read_until_filled(header.span().slice(1))); FixedMemoryStream header_stream { header.span().slice(1) }; // 3.1.2. Block Flags: // "If any reserved bit is set, the decoder MUST indicate an error. // It is possible that there is a new field present which the // decoder is not aware of, and can thus parse the Block Header // incorrectly." auto const flags = TRY(header_stream.read_value()); if (flags.reserved != 0) return Error::from_string_literal("XZ block header has reserved non-null block flag bits"); MaybeOwned new_block_stream { *m_stream }; // 3.1.3. Compressed Size: // "This field is present only if the appropriate bit is set in // the Block Flags field (see Section 3.1.2)." if (flags.compressed_size_present) { // "Compressed Size is stored using the encoding described in Section 1.2." u64 const compressed_size = TRY(header_stream.read_value()); // "The Compressed Size field contains the size of the Compressed // Data field, which MUST be non-zero." if (compressed_size == 0) return Error::from_string_literal("XZ block header contains a compressed size of zero"); new_block_stream = TRY(try_make(move(new_block_stream), compressed_size)); } // 3.1.4. Uncompressed Size: // "This field is present only if the appropriate bit is set in // the Block Flags field (see Section 3.1.2)." if (flags.uncompressed_size_present) { // "Uncompressed Size is stored using the encoding described in Section 1.2." u64 const uncompressed_size = TRY(header_stream.read_value()); m_current_block_expected_uncompressed_size = uncompressed_size; } else { m_current_block_expected_uncompressed_size.clear(); } // We need to process the filters in reverse order, since they are listed in the order that they have been applied in. struct FilterEntry { u64 id; ByteBuffer properties; bool last; }; Vector filters; // 3.1.5. List of Filter Flags: // "The number of Filter Flags fields is stored in the Block Flags // field (see Section 3.1.2)." for (size_t i = 0; i < flags.number_of_filters(); i++) { auto last = (i == flags.number_of_filters() - 1); // "The format of each Filter Flags field is as follows: // Both Filter ID and Size of Properties are stored using the // encoding described in Section 1.2." u64 const filter_id = TRY(header_stream.read_value()); u64 const size_of_properties = TRY(header_stream.read_value()); // "Size of Properties indicates the size of the Filter Properties field as bytes." auto filter_properties = TRY(ByteBuffer::create_uninitialized(size_of_properties)); TRY(header_stream.read_until_filled(filter_properties)); filters.empend(filter_id, move(filter_properties), last); } for (auto& filter : filters.in_reverse()) { // 5.3.1. LZMA2 if (filter.id == 0x21) { if (!filter.last) return Error::from_string_literal("XZ LZMA2 filter can only be the last filter"); if (filter.properties.size() < sizeof(XzFilterLzma2Properties)) return Error::from_string_literal("XZ LZMA2 filter has a smaller-than-needed properties size"); auto const* properties = reinterpret_cast(filter.properties.data()); TRY(properties->validate()); new_block_stream = TRY(Lzma2Decompressor::create_from_raw_stream(move(new_block_stream), properties->dictionary_size())); continue; } // 5.3.2. Branch/Call/Jump Filters for Executables if (filter.id == 0x0a) { if (filter.last) return Error::from_string_literal("XZ BCJ filter can only be a non-last filter"); u32 start_offset = 0; if (filter.properties.size() == 0) { // No start offset given. } else if (filter.properties.size() == sizeof(XzFilterBCJProperties)) { auto const* properties = reinterpret_cast(filter.properties.data()); start_offset = properties->start_offset; } else { return Error::from_string_literal("XZ BCJ filter has an unknown properties size"); } new_block_stream = TRY(XzFilterBCJArm64::create(move(new_block_stream), start_offset)); continue; } // 5.3.3. Delta if (filter.id == 0x03) { if (filter.last) return Error::from_string_literal("XZ Delta filter can only be a non-last filter"); if (filter.properties.size() < sizeof(XzFilterDeltaProperties)) return Error::from_string_literal("XZ Delta filter has a smaller-than-needed properties size"); auto const* properties = reinterpret_cast(filter.properties.data()); new_block_stream = TRY(XzFilterDelta::create(move(new_block_stream), properties->distance())); continue; } return Error::from_string_literal("XZ block header contains unknown filter ID"); } // 3.1.6. Header Padding: // "This field contains as many null byte as it is needed to make // the Block Header have the size specified in Block Header Size." constexpr size_t size_of_block_header_size = 1; constexpr size_t size_of_crc32 = 4; while (MUST(header_stream.tell()) < block_header_size - size_of_block_header_size - size_of_crc32) { auto const padding_byte = TRY(header_stream.read_value()); // "If any of the bytes are not null bytes, the decoder MUST // indicate an error." if (padding_byte != 0) return Error::from_string_literal("XZ block header padding contains non-null bytes"); } // 3.1.7. CRC32: // "The CRC32 is calculated over everything in the Block Header // field except the CRC32 field itself. Crypto::Checksum::CRC32 calculated_header_crc32 { header.span().trim(block_header_size - size_of_crc32) }; // It is stored as an unsigned 32-bit little endian integer. u32 const stored_header_crc32 = TRY(header_stream.read_value>()); // If the calculated value does not match the stored one, the decoder MUST indicate // an error." if (calculated_header_crc32.digest() != stored_header_crc32) return Error::from_string_literal("Stored XZ block header CRC32 does not match the stored CRC32"); m_current_block_stream = move(new_block_stream); m_current_block_uncompressed_size = 0; return {}; } ErrorOr XzDecompressor::finish_current_block() { auto unpadded_size = m_stream->read_bytes() - m_current_block_start_offset; // 3.3. Block Padding: // "Block Padding MUST contain 0-3 null bytes to make the size of // the Block a multiple of four bytes. This can be needed when // the size of Compressed Data is not a multiple of four." for (size_t i = 0; (unpadded_size + i) % 4 != 0; i++) { auto const padding_byte = TRY(m_stream->read_value()); // "If any of the bytes in Block Padding are not null bytes, the decoder // MUST indicate an error." if (padding_byte != 0) return Error::from_string_literal("XZ block contains a non-null padding byte"); } // 3.4. Check: // "The type and size of the Check field depends on which bits // are set in the Stream Flags field (see Section 2.1.1.2). // // The Check, when used, is calculated from the original // uncompressed data. If the calculated Check does not match the // stored one, the decoder MUST indicate an error. If the selected // type of Check is not supported by the decoder, it SHOULD // indicate a warning or error." auto const maybe_check_size = size_for_check_type(m_stream_flags->check_type); if (!maybe_check_size.has_value()) return Error::from_string_literal("XZ stream has an unknown check type"); // TODO: Block content checks are currently unimplemented as a whole, independent of the check type. // For now, we only make sure to remove the correct amount of bytes from the stream. TRY(m_stream->discard(*maybe_check_size)); unpadded_size += *maybe_check_size; if (m_current_block_expected_uncompressed_size.has_value()) { if (*m_current_block_expected_uncompressed_size != m_current_block_uncompressed_size) return Error::from_string_literal("Uncompressed size of XZ block does not match the expected value"); } TRY(m_processed_blocks.try_append({ .uncompressed_size = m_current_block_uncompressed_size, .unpadded_size = unpadded_size, })); return {}; } ErrorOr XzDecompressor::finish_current_stream() { // We already read the Index Indicator (one byte) to determine that this is an Index. auto const start_of_current_block = m_stream->read_bytes() - 1; // 4.2. Number of Records: // "This field indicates how many Records there are in the List // of Records field, and thus how many Blocks there are in the // Stream. The value is stored using the encoding described in // Section 1.2." u64 const number_of_records = TRY(m_stream->read_value()); if (m_processed_blocks.size() != number_of_records) return Error::from_string_literal("Number of Records in XZ Index does not match the number of processed Blocks"); // 4.3. List of Records: // "List of Records consists of as many Records as indicated by the // Number of Records field:" for (u64 i = 0; i < number_of_records; i++) { // "Each Record contains information about one Block: // // +===============+===================+ // | Unpadded Size | Uncompressed Size | // +===============+===================+" // 4.3.1. Unpadded Size: // "This field indicates the size of the Block excluding the Block // Padding field. That is, Unpadded Size is the size of the Block // Header, Compressed Data, and Check fields. Unpadded Size is // stored using the encoding described in Section 1.2." u64 const unpadded_size = TRY(m_stream->read_value()); // "The value MUST never be zero; with the current structure of Blocks, the // actual minimum value for Unpadded Size is five." if (unpadded_size < 5) return Error::from_string_literal("XZ index contains a record with an unpadded size of less than five"); // 4.3.2. Uncompressed Size: // "This field indicates the Uncompressed Size of the respective // Block as bytes. The value is stored using the encoding // described in Section 1.2." u64 const uncompressed_size = TRY(m_stream->read_value()); // 4.3. List of Records: // "If the decoder has decoded all the Blocks of the Stream, it // MUST verify that the contents of the Records match the real // Unpadded Size and Uncompressed Size of the respective Blocks." if (m_processed_blocks[i].uncompressed_size != uncompressed_size) return Error::from_string_literal("Uncompressed size of XZ Block does not match the Index"); if (m_processed_blocks[i].unpadded_size != unpadded_size) return Error::from_string_literal("Unpadded size of XZ Block does not match the Index"); } // 4.4. Index Padding: // "This field MUST contain 0-3 null bytes to pad the Index to // a multiple of four bytes. If any of the bytes are not null // bytes, the decoder MUST indicate an error." while ((m_stream->read_bytes() - start_of_current_block) % 4 != 0) { auto padding_byte = TRY(m_stream->read_value()); if (padding_byte != 0) return Error::from_string_literal("XZ index contains a non-null padding byte"); } // 4.5. CRC32: // "The CRC32 is calculated over everything in the Index field // except the CRC32 field itself. The CRC32 is stored as an // unsigned 32-bit little endian integer." u32 const index_crc32 = TRY(m_stream->read_value>()); // "If the calculated value does not match the stored one, the decoder MUST indicate // an error." // TODO: Validation of the index CRC32 is currently unimplemented. (void)index_crc32; auto const size_of_index = m_stream->read_bytes() - start_of_current_block; // According to the specification of a stream (2.1. Stream), the index is the last element in a stream, // followed by the stream footer (2.1.2. Stream Footer). auto const stream_footer = TRY(m_stream->read_value()); // This handles verifying the CRC32 (2.1.2.1. CRC32) and the magic bytes (2.1.2.4. Footer Magic Bytes). TRY(stream_footer.validate()); // 2.1.2.2. Backward Size: // "If the stored value does not match the real size of the Index // field, the decoder MUST indicate an error." if (stream_footer.backward_size() != size_of_index) return Error::from_string_literal("XZ index size does not match the stored size in the stream footer"); // 2.1.2.3. Stream Flags: // "This is a copy of the Stream Flags field from the Stream // Header. The information stored to Stream Flags is needed // when parsing the Stream backwards. The decoder MUST compare // the Stream Flags fields in both Stream Header and Stream // Footer, and indicate an error if they are not identical." if (ReadonlyBytes { &*m_stream_flags, sizeof(XzStreamFlags) } != ReadonlyBytes { &stream_footer.flags, sizeof(stream_footer.flags) }) return Error::from_string_literal("XZ stream header flags don't match the stream footer"); return {}; } ErrorOr XzDecompressor::read_some(Bytes bytes) { if (!m_stream_flags.has_value()) { if (!TRY(load_next_stream())) return bytes.trim(0); } if (!m_current_block_stream.has_value() || (*m_current_block_stream)->is_eof()) { if (m_current_block_stream.has_value()) { // We have already processed a block, so we weed to clean up trailing data before the next block starts. TRY(finish_current_block()); } // The first byte between Block Header (3.1.1. Block Header Size) and Index (4.1. Index Indicator) overlap. // Block header sizes have valid values in the range of [0x01, 0xFF], the only valid value for an Index Indicator is therefore 0x00. auto const encoded_block_header_size_or_index_indicator = TRY(m_stream->read_value()); if (encoded_block_header_size_or_index_indicator == 0x00) { // This is an Index, which is the last element before the stream footer. TRY(finish_current_stream()); // Another XZ Stream might follow, so we just unset the current information and continue on the next read. m_stream_flags.clear(); m_processed_blocks.clear(); return bytes.trim(0); } TRY(load_next_block(encoded_block_header_size_or_index_indicator)); } auto result = TRY((*m_current_block_stream)->read_some(bytes)); m_current_block_uncompressed_size += result.size(); return result; } ErrorOr XzDecompressor::write_some(ReadonlyBytes) { return Error::from_errno(EBADF); } bool XzDecompressor::is_eof() const { return m_found_last_stream_footer; } bool XzDecompressor::is_open() const { return true; } void XzDecompressor::close() { } }