diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 0250daead2..4c668290af 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -404,57 +404,76 @@ namespace } }; -template -NEVER_INLINE std::tuple upload_untouched_skip_restart(std::span> src, std::span dst, T restart_index) -{ - T min_index = index_limit(); - T max_index = 0; - u32 written = 0; - u32 length = ::size32(src); - - for (u32 i = written; i < length; ++i) + template + NEVER_INLINE std::tuple upload_untouched_skip_restart(std::span> src, std::span dst, T restart_index) { - T index = src[i]; - if (index != restart_index) + T min_index = index_limit(); + T max_index = 0; + u32 written = 0; + u32 length = ::size32(src); + + for (u32 i = written; i < length; ++i) { - dst[written++] = min_max(min_index, max_index, index); + T index = src[i]; + if (index != restart_index) + { + dst[written++] = min_max(min_index, max_index, index); + } } + + return std::make_tuple(min_index, max_index, written); } - return std::make_tuple(min_index, max_index, written); -} - -template -std::tuple upload_untouched(std::span> src, std::span dst, rsx::primitive_type draw_mode, bool is_primitive_restart_enabled, u32 primitive_restart_index) -{ - if (!is_primitive_restart_enabled) + template + std::tuple upload_untouched(std::span> src, std::span dst, rsx::primitive_type draw_mode, bool is_primitive_restart_enabled, u32 primitive_restart_index) { - return untouched_impl::upload_untouched(src, dst); - } - else if constexpr (std::is_same_v) - { - if (primitive_restart_index > 0xffff) + if (!is_primitive_restart_enabled) { return untouched_impl::upload_untouched(src, dst); } + else if constexpr (std::is_same_v) + { + if (primitive_restart_index > 0xffff) + { + return untouched_impl::upload_untouched(src, dst); + } + else if (is_primitive_disjointed(draw_mode)) + { + return upload_untouched_skip_restart(src, dst, static_cast(primitive_restart_index)); + } + else + { + return primitive_restart_impl::upload_untouched(src, dst, static_cast(primitive_restart_index)); + } + } else if (is_primitive_disjointed(draw_mode)) { - return upload_untouched_skip_restart(src, dst, static_cast(primitive_restart_index)); + return upload_untouched_skip_restart(src, dst, primitive_restart_index); } else { - return primitive_restart_impl::upload_untouched(src, dst, static_cast(primitive_restart_index)); + return primitive_restart_impl::upload_untouched(src, dst, primitive_restart_index); } } - else if (is_primitive_disjointed(draw_mode)) + + void iota16(u16* dst, u32 count) { - return upload_untouched_skip_restart(src, dst, primitive_restart_index); + unsigned i = 0; +#if defined(ARCH_X64) || defined(ARCH_ARM64) + const unsigned step = 8; // We do 8 entries per step + const __m128i vec_step = _mm_set1_epi16(8); // Constant to increment the raw values + __m128i values = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); + __m128i* vec_ptr = reinterpret_cast<__m128i*>(dst); + + for (; (i + step) <= count; i += step, vec_ptr++) + { + _mm_stream_si128(vec_ptr, values); + _mm_add_epi16(values, vec_step); + } +#endif + for (; i < count; ++i) + dst[i] = i; } - else - { - return primitive_restart_impl::upload_untouched(src, dst, primitive_restart_index); - } -} template std::tuple expand_indexed_triangle_fan(std::span> src, std::span dst, bool is_primitive_restart_enabled, u32 primitive_restart_index) @@ -624,8 +643,7 @@ void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, switch (draw_mode) { case rsx::primitive_type::line_loop: - for (unsigned i = 0; i < count; ++i) - typedDst[i] = i; + iota16(typedDst, count); typedDst[count] = 0; return; case rsx::primitive_type::triangle_fan: diff --git a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h index 46e99d928a..05914b54ae 100644 --- a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h +++ b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h @@ -20,29 +20,33 @@ protected: template bool can_alloc(usz size) const { - usz alloc_size = utils::align(size, Alignment); - usz aligned_put_pos = utils::align(m_put_pos, Alignment); - if (aligned_put_pos + alloc_size < m_size) + const usz alloc_size = utils::align(size, Alignment); + const usz aligned_put_pos = utils::align(m_put_pos, Alignment); + const usz alloc_end = aligned_put_pos + alloc_size; + + if (alloc_end < m_size) [[ likely ]] { - // range before get - if (aligned_put_pos + alloc_size < m_get_pos) + // Range before get + if (alloc_end < m_get_pos) return true; - // range after get + + // Range after get if (aligned_put_pos > m_get_pos) return true; + return false; } - else - { - // ..]....[..get.. - if (aligned_put_pos < m_get_pos) - return false; - // ..get..]...[... - // Actually all resources extending beyond heap space starts at 0 - if (alloc_size > m_get_pos) - return false; - return true; - } + + // ..]....[..get.. + if (aligned_put_pos < m_get_pos) + return false; + + // ..get..]...[... + // Actually all resources extending beyond heap space starts at 0 + if (alloc_size > m_get_pos) + return false; + + return true; } // Grow the buffer to hold at least size bytes @@ -53,10 +57,9 @@ protected: } usz m_size; - usz m_put_pos; // Start of free space - usz m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget - usz m_current_allocated_size; - usz m_largest_allocated_pool; + usz m_put_pos; // Start of free space + usz m_get_pos; // End of free space + usz m_min_guard_size; // If an allocation touches the guard region, reset the heap to avoid going over budget char* m_name; public: @@ -65,8 +68,6 @@ public: data_heap(const data_heap&) = delete; data_heap(data_heap&&) = delete; - usz m_get_pos; // End of free space - void init(usz heap_size, const char* buffer_name = "unnamed", usz min_guard_size=0x10000) { m_name = const_cast(buffer_name); @@ -75,10 +76,8 @@ public: m_put_pos = 0; m_get_pos = heap_size - 1; - //allocation stats + // Allocation stats m_min_guard_size = min_guard_size; - m_current_allocated_size = 0; - m_largest_allocated_pool = 0; } template @@ -89,24 +88,45 @@ public: if (!can_alloc(size) && !grow(alloc_size)) { - fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d", - m_name, m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool); + fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d", + m_name, m_size, size, m_min_guard_size); } - const usz block_length = (aligned_put_pos - m_put_pos) + alloc_size; - m_current_allocated_size += block_length; - m_largest_allocated_pool = std::max(m_largest_allocated_pool, block_length); - - if (aligned_put_pos + alloc_size < m_size) + const usz alloc_end = aligned_put_pos + alloc_size; + if (alloc_end < m_size) { - m_put_pos = aligned_put_pos + alloc_size; + m_put_pos = alloc_end; return aligned_put_pos; } - else + + m_put_pos = alloc_size; + return 0; + } + + /* + * For use in cases where we take a fixed amount each time + */ + template + usz static_alloc() + { + static_assert((Size & (Alignment - 1)) == 0); + ensure((m_put_pos & (Alignment - 1)) == 0); + + if (!can_alloc(Size) && !grow(Size)) { - m_put_pos = alloc_size; - return 0; + fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d", + m_name, m_size, Size, m_min_guard_size); } + + const usz alloc_end = m_put_pos + Size; + if (m_put_pos + Size < m_size) + { + m_put_pos = alloc_end; + return m_put_pos; + } + + m_put_pos = Size; + return 0; } /** @@ -117,30 +137,25 @@ public: return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1; } + inline void set_get_pos(usz value) + { + m_get_pos = value; + } + virtual bool is_critical() const { - const usz guard_length = std::max(m_min_guard_size, m_largest_allocated_pool); - return (m_current_allocated_size + guard_length) >= m_size; + return m_min_guard_size >= m_size; } void reset_allocation_stats() { - m_current_allocated_size = 0; - m_largest_allocated_pool = 0; m_get_pos = get_current_put_pos_minus_one(); } // Updates the current_allocated_size metrics - void notify() + inline void notify() { - if (m_get_pos == umax) - m_current_allocated_size = 0; - else if (m_get_pos < m_put_pos) - m_current_allocated_size = (m_put_pos - m_get_pos - 1); - else if (m_get_pos > m_put_pos) - m_current_allocated_size = (m_put_pos + (m_size - m_get_pos - 1)); - else - fmt::throw_exception("m_put_pos == m_get_pos!"); + // @unused } usz size() const diff --git a/rpcs3/Emu/RSX/VK/VKDataHeapManager.cpp b/rpcs3/Emu/RSX/VK/VKDataHeapManager.cpp index b3d797d67f..e577b01c61 100644 --- a/rpcs3/Emu/RSX/VK/VKDataHeapManager.cpp +++ b/rpcs3/Emu/RSX/VK/VKDataHeapManager.cpp @@ -41,7 +41,7 @@ namespace vk::data_heap_manager continue; } - heap->m_get_pos = found->second; + heap->set_get_pos(found->second); heap->notify(); } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index a900098a50..04c49bc17c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1162,7 +1162,6 @@ void VKGSRender::check_heap_status(u32 flags) { heap_critical = false; u32 test = 1u << std::countr_zero(flags); - do { switch (flags & test) @@ -2046,7 +2045,7 @@ void VKGSRender::load_program_env() check_heap_status(VK_HEAP_CHECK_VERTEX_ENV_STORAGE); // Vertex state - const auto mem = m_vertex_env_ring_info.alloc<256>(256); + const auto mem = m_vertex_env_ring_info.static_alloc<256>(); auto buf = static_cast(m_vertex_env_ring_info.map(mem, 148)); m_draw_processor.fill_scale_offset_data(buf, false); @@ -2134,7 +2133,7 @@ void VKGSRender::load_program_env() { check_heap_status(VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE); - auto mem = m_fragment_env_ring_info.alloc<256>(256); + auto mem = m_fragment_env_ring_info.static_alloc<256>(); auto buf = m_fragment_env_ring_info.map(mem, 32); m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program); @@ -2146,7 +2145,7 @@ void VKGSRender::load_program_env() { check_heap_status(VK_HEAP_CHECK_TEXTURE_ENV_STORAGE); - auto mem = m_fragment_texture_params_ring_info.alloc<256>(768); + auto mem = m_fragment_texture_params_ring_info.static_alloc<256, 768>(); auto buf = m_fragment_texture_params_ring_info.map(mem, 768); current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask); @@ -2158,7 +2157,7 @@ void VKGSRender::load_program_env() { check_heap_status(VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE); - auto mem = m_raster_env_ring_info.alloc<256>(256); + auto mem = m_raster_env_ring_info.static_alloc<256>(); auto buf = m_raster_env_ring_info.map(mem, 128); std::memcpy(buf, rsx::method_registers.polygon_stipple_pattern(), 128);