From 9437b29b431154291f4e2f3db5e3aabb2541511c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6nke=20Holz?= Date: Wed, 17 Apr 2024 21:38:49 +0200 Subject: [PATCH] LibELF+LibC: Add support for Variant I of the TLS data structures We currently only supported Variant II which is used by x86-64. Variant I is used by both AArch64 (when using the traditional non-TLSDESC model) and RISC-V, although with small differences. The TLS layout for Variant I is essentially flipped. The static TLS blocks are after the thread pointer for Variant I, while on Variant II they are before it. Some code using ELF TLS already worked on AArch64 and RISC-V even though we only support Variant II. This is because only the local-exec model directly uses TLS offsets, other models use relocations or __tls_get_addr(). --- Userland/Libraries/LibC/tls.cpp | 3 +- Userland/Libraries/LibELF/Arch/aarch64/tls.S | 2 + Userland/Libraries/LibELF/DynamicLinker.cpp | 62 +++++++++++++------- Userland/Libraries/LibELF/DynamicLoader.cpp | 30 +++++++--- 4 files changed, 67 insertions(+), 30 deletions(-) diff --git a/Userland/Libraries/LibC/tls.cpp b/Userland/Libraries/LibC/tls.cpp index 8eae23dc62b..c773cb6021c 100644 --- a/Userland/Libraries/LibC/tls.cpp +++ b/Userland/Libraries/LibC/tls.cpp @@ -5,6 +5,7 @@ */ #include +#include #include extern "C" { @@ -21,6 +22,6 @@ extern "C" { // changed if we support dynamically allocated TLS blocks. void* __tls_get_addr(__tls_index* index) { - return reinterpret_cast(reinterpret_cast(__builtin_thread_pointer()) + index->ti_module + index->ti_offset); + return reinterpret_cast(reinterpret_cast(__builtin_thread_pointer()) + index->ti_module + index->ti_offset + ELF::TLS_DTV_OFFSET); } } diff --git a/Userland/Libraries/LibELF/Arch/aarch64/tls.S b/Userland/Libraries/LibELF/Arch/aarch64/tls.S index a24ba3392eb..5f5836679fd 100644 --- a/Userland/Libraries/LibELF/Arch/aarch64/tls.S +++ b/Userland/Libraries/LibELF/Arch/aarch64/tls.S @@ -60,4 +60,6 @@ .type __tlsdesc_static,@function __tlsdesc_static: ldr x0, [x0, #8] + // The first static TLS block is 16 bytes after the thread pointer on AArch64. + add x0, x0, 16 ret diff --git a/Userland/Libraries/LibELF/DynamicLinker.cpp b/Userland/Libraries/LibELF/DynamicLinker.cpp index 2a5d5817559..ac31004c468 100644 --- a/Userland/Libraries/LibELF/DynamicLinker.cpp +++ b/Userland/Libraries/LibELF/DynamicLinker.cpp @@ -58,6 +58,8 @@ struct TLSData { void* tls_template { nullptr }; size_t tls_template_size { 0 }; size_t alignment { 0 }; + size_t static_tls_region_size { 0 }; + size_t static_tls_region_alignment { 0 }; }; static TLSData s_tls_data; @@ -131,10 +133,18 @@ static Result, DlErrorMessage> map_library(ByteStri static size_t s_current_tls_offset = 0; - s_current_tls_offset -= loader->tls_size_of_current_object(); - if (loader->tls_alignment_of_current_object()) - s_current_tls_offset = align_down_to(s_current_tls_offset, loader->tls_alignment_of_current_object()); - loader->set_tls_offset(s_current_tls_offset); + if constexpr (TLS_VARIANT == 1) { + if (loader->tls_alignment_of_current_object() != 0) + s_current_tls_offset = align_up_to(s_current_tls_offset, loader->tls_alignment_of_current_object()); + loader->set_tls_offset(s_current_tls_offset); + + s_current_tls_offset += loader->tls_size_of_current_object(); + } else if constexpr (TLS_VARIANT == 2) { + s_current_tls_offset -= loader->tls_size_of_current_object(); + if (loader->tls_alignment_of_current_object() != 0) + s_current_tls_offset = align_down_to(s_current_tls_offset, loader->tls_alignment_of_current_object()); + loader->set_tls_offset(s_current_tls_offset); + } // This actually maps the library at the intended and final place. auto main_library_object = loader->map(); @@ -237,35 +247,37 @@ static Result map_dependencies(ByteString const& path) return {}; } -struct ThreadSpecificData { - ThreadSpecificData* self; -}; - static ErrorOr __create_new_tls_region() { - auto static_tls_region_alignment = max(s_tls_data.alignment, alignof(ThreadSpecificData)); - auto static_tls_region_size = align_up_to(s_tls_data.tls_template_size, static_tls_region_alignment) + sizeof(ThreadSpecificData); - void* thread_specific_ptr = serenity_mmap(nullptr, static_tls_region_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0, static_tls_region_alignment, "Static TLS Data"); - if (thread_specific_ptr == MAP_FAILED) + void* static_tls_region = serenity_mmap(nullptr, s_tls_data.static_tls_region_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0, s_tls_data.static_tls_region_alignment, "Static TLS Data"); + if (static_tls_region == MAP_FAILED) return Error::from_syscall("mmap"sv, -errno); - auto* thread_specific_data = bit_cast(bit_cast(thread_specific_ptr) + (align_up_to(s_tls_data.tls_template_size, static_tls_region_alignment))); - thread_specific_data->self = thread_specific_data; + auto thread_pointer = calculate_tp_value_from_static_tls_region_address(bit_cast(static_tls_region), s_tls_data.tls_template_size, s_tls_data.static_tls_region_alignment); + VERIFY(thread_pointer % s_tls_data.static_tls_region_alignment == 0); - auto* thread_local_storage = bit_cast(bit_cast(thread_specific_data) - align_up_to(s_tls_data.tls_template_size, s_tls_data.alignment)); + auto* tcb = get_tcb_pointer_from_thread_pointer(thread_pointer); + + // FIXME: Add support for dynamically-allocated TLS blocks. + tcb->dynamic_thread_vector = nullptr; + +#if ARCH(X86_64) + tcb->thread_pointer = bit_cast(thread_pointer); +#endif + + auto* static_tls_blocks = get_pointer_to_first_static_tls_block_from_thread_pointer(thread_pointer, s_tls_data.tls_template_size, s_tls_data.static_tls_region_alignment); if (s_tls_data.tls_template_size != 0) - memcpy(thread_local_storage, s_tls_data.tls_template, s_tls_data.tls_template_size); + memcpy(static_tls_blocks, s_tls_data.tls_template, s_tls_data.tls_template_size); - return bit_cast(thread_specific_data); + return thread_pointer; } static ErrorOr __free_tls_region(FlatPtr thread_pointer) { - auto static_tls_region_alignment = max(s_tls_data.alignment, alignof(ThreadSpecificData)); - auto static_tls_region_size = align_up_to(s_tls_data.tls_template_size, static_tls_region_alignment) + sizeof(ThreadSpecificData); + auto* static_tls_region = get_pointer_to_static_tls_region_from_thread_pointer(thread_pointer, s_tls_data.tls_template_size, s_tls_data.static_tls_region_alignment); - if (munmap(bit_cast(bit_cast(thread_pointer) - align_up_to(s_tls_data.tls_template_size, s_tls_data.alignment)), static_tls_region_size) != 0) + if (munmap(static_tls_region, s_tls_data.static_tls_region_size) != 0) return Error::from_syscall("mmap"sv, -errno); return {}; @@ -273,6 +285,12 @@ static ErrorOr __free_tls_region(FlatPtr thread_pointer) static void allocate_tls() { + // FIXME: Use the max p_align of all TLS segments. + // We currently pass s_tls_data.static_tls_region_alignment as the alignment to mmap, + // so we would have to manually insert padding, as mmap only accepts alignments that + // are multiples of PAGE_SIZE. Or instead use aligned_alloc/posix_memalign? + s_tls_data.alignment = PAGE_SIZE; + for (auto const& data : s_loaders) { dbgln_if(DYNAMIC_LOAD_DEBUG, "{}: TLS Size: {}, TLS Alignment: {}", data.key, data.value->tls_size_of_current_object(), data.value->tls_alignment_of_current_object()); s_tls_data.total_tls_size += data.value->tls_size_of_current_object() + data.value->tls_alignment_of_current_object(); @@ -282,7 +300,6 @@ static void allocate_tls() return; s_tls_data.tls_template_size = align_up_to(s_tls_data.total_tls_size, PAGE_SIZE); - s_tls_data.alignment = PAGE_SIZE; s_tls_data.tls_template = mmap_with_name(nullptr, s_tls_data.tls_template_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0, "TLS Template"); if (s_tls_data.tls_template == MAP_FAILED) { @@ -290,6 +307,9 @@ static void allocate_tls() VERIFY_NOT_REACHED(); } + s_tls_data.static_tls_region_alignment = max(s_tls_data.alignment, sizeof(ThreadControlBlock)); + s_tls_data.static_tls_region_size = calculate_static_tls_region_size(s_tls_data.tls_template_size, s_tls_data.static_tls_region_alignment); + auto tls_template = Bytes(s_tls_data.tls_template, s_tls_data.tls_template_size); // Initialize TLS data diff --git a/Userland/Libraries/LibELF/DynamicLoader.cpp b/Userland/Libraries/LibELF/DynamicLoader.cpp index 599a842ef3d..f9962e5fcaf 100644 --- a/Userland/Libraries/LibELF/DynamicLoader.cpp +++ b/Userland/Libraries/LibELF/DynamicLoader.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -654,10 +655,16 @@ DynamicLoader::RelocationResult DynamicLoader::do_direct_relocation(DynamicObjec auto [dynamic_object_of_symbol, symbol_value] = maybe_resolution.value(); size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr; - *patch_ptr = addend + dynamic_object_of_symbol.tls_offset().value() + symbol_value; + *patch_ptr = addend + dynamic_object_of_symbol.tls_offset().value() + symbol_value + TLS_TP_STATIC_TLS_BLOCK_OFFSET; + + if constexpr (TLS_VARIANT == 1) { + // Until offset TLS_TP_STATIC_TLS_BLOCK_OFFSET there's the thread's ThreadControlBlock, we don't want to collide with it. + VERIFY(static_cast(*patch_ptr) >= static_cast(TLS_TP_STATIC_TLS_BLOCK_OFFSET)); + } else if constexpr (TLS_VARIANT == 2) { + // At offset 0 there's the thread's ThreadControlBlock, we don't want to collide with it. + VERIFY(static_cast(*patch_ptr) < 0); + } - // At offset 0 there's the thread's ThreadSpecificData structure, we don't want to collide with it. - VERIFY(static_cast(*patch_ptr) < 0); break; } case TLS_DTPMOD: { @@ -676,7 +683,7 @@ DynamicLoader::RelocationResult DynamicLoader::do_direct_relocation(DynamicObjec break; size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr; - *patch_ptr = addend + maybe_resolution->value; + *patch_ptr = addend + maybe_resolution->value - TLS_DTV_OFFSET + TLS_TP_STATIC_TLS_BLOCK_OFFSET; break; } #ifdef HAS_TLSDESC_SUPPORT @@ -765,14 +772,21 @@ void DynamicLoader::copy_initial_tls_data_into(Bytes buffer) const // only included in the "size in memory" metric, and is expected to not be touched or read from, as // it is not present in the image and zeroed out in-memory. We will still check that the buffer has // space for both the initialized and the uninitialized data. - // Note: The m_tls_offset here is (of course) negative. // TODO: Is the initialized data always in the beginning of the TLS segment, or should we walk the // sections to figure that out? - size_t tls_start_in_buffer = buffer.size() + m_tls_offset; + VERIFY(program_header.size_in_image() <= program_header.size_in_memory()); VERIFY(program_header.size_in_memory() <= m_tls_size_of_current_object); - VERIFY(tls_start_in_buffer + program_header.size_in_memory() <= buffer.size()); - memcpy(buffer.data() + tls_start_in_buffer, static_cast(m_file_data) + program_header.offset(), program_header.size_in_image()); + + if constexpr (TLS_VARIANT == 1) { + size_t tls_start_in_buffer = m_tls_offset; + VERIFY(tls_start_in_buffer + program_header.size_in_memory() <= buffer.size()); + memcpy(buffer.data() + tls_start_in_buffer, static_cast(m_file_data) + program_header.offset(), program_header.size_in_image()); + } else if constexpr (TLS_VARIANT == 2) { + size_t tls_start_in_buffer = buffer.size() + m_tls_offset; + VERIFY(tls_start_in_buffer + program_header.size_in_memory() <= buffer.size()); + memcpy(buffer.data() + tls_start_in_buffer, static_cast(m_file_data) + program_header.offset(), program_header.size_in_image()); + } return IterationDecision::Break; });