From 10fe14e783e91b89f7fe4e4f1f624fbb61b05487 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 4 Apr 2024 02:34:40 +0300 Subject: [PATCH] rsx: Separate register context from RSX thread --- rpcs3/Emu/RSX/NV47/common.cpp | 66 + rpcs3/Emu/RSX/NV47/common.h | 60 + rpcs3/Emu/RSX/NV47/context.h | 30 + rpcs3/Emu/RSX/NV47/context_accessors.define.h | 3 + rpcs3/Emu/RSX/NV47/context_accessors.undef.h | 3 + rpcs3/Emu/RSX/NV47/nv3089.cpp | 655 ++++++ rpcs3/Emu/RSX/NV47/nv3089.h | 10 + rpcs3/Emu/RSX/NV47/nv308a.cpp | 159 ++ rpcs3/Emu/RSX/NV47/nv308a.h | 14 + rpcs3/Emu/RSX/NV47/nv406e.cpp | 125 ++ rpcs3/Emu/RSX/NV47/nv406e.h | 15 + rpcs3/Emu/RSX/NV47/nv4097.cpp | 629 ++++++ rpcs3/Emu/RSX/NV47/nv4097.h | 238 +++ rpcs3/Emu/RSX/NV47/nv47.h | 7 + rpcs3/Emu/RSX/RSXDisAsm.cpp | 2 +- rpcs3/Emu/RSX/RSXFIFO.cpp | 23 +- rpcs3/Emu/RSX/RSXThread.cpp | 2 +- rpcs3/Emu/RSX/rsx_methods.cpp | 1879 +---------------- rpcs3/Emu/RSX/rsx_methods.h | 4 +- rpcs3/emucore.vcxproj | 14 + rpcs3/emucore.vcxproj.filters | 45 + 21 files changed, 2149 insertions(+), 1834 deletions(-) create mode 100644 rpcs3/Emu/RSX/NV47/common.cpp create mode 100644 rpcs3/Emu/RSX/NV47/common.h create mode 100644 rpcs3/Emu/RSX/NV47/context.h create mode 100644 rpcs3/Emu/RSX/NV47/context_accessors.define.h create mode 100644 rpcs3/Emu/RSX/NV47/context_accessors.undef.h create mode 100644 rpcs3/Emu/RSX/NV47/nv3089.cpp create mode 100644 rpcs3/Emu/RSX/NV47/nv3089.h create mode 100644 rpcs3/Emu/RSX/NV47/nv308a.cpp create mode 100644 rpcs3/Emu/RSX/NV47/nv308a.h create mode 100644 rpcs3/Emu/RSX/NV47/nv406e.cpp create mode 100644 rpcs3/Emu/RSX/NV47/nv406e.h create mode 100644 rpcs3/Emu/RSX/NV47/nv4097.cpp create mode 100644 rpcs3/Emu/RSX/NV47/nv4097.h create mode 100644 rpcs3/Emu/RSX/NV47/nv47.h diff --git a/rpcs3/Emu/RSX/NV47/common.cpp b/rpcs3/Emu/RSX/NV47/common.cpp new file mode 100644 index 0000000000..319a7c41bf --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/common.cpp @@ -0,0 +1,66 @@ +#include "stdafx.h" +#include "common.h" + +#include "Emu/RSX/RSXThread.h" + +#define RSX(ctx) ctx->rsxthr +#define REGS(ctx) (&rsx::method_registers) + +namespace rsx +{ + namespace util + { + void push_vertex_data(rsx::context* ctx, u32 attrib_index, u32 channel_select, int count, rsx::vertex_base_type vtype, u32 value) + { + if (RSX(ctx)->in_begin_end) + { + // Update to immediate mode register/array + // NOTE: Push buffers still behave like register writes. + // You do not need to specify each attribute for each vertex, the register is referenced instead. + // This is classic OpenGL 1.x behavior as I remember. + RSX(ctx)->append_to_push_buffer(attrib_index, count, channel_select, vtype, value); + } + + auto& info = REGS(ctx)->register_vertex_info[attrib_index]; + + info.type = vtype; + info.size = count; + info.frequency = 0; + info.stride = 0; + REGS(ctx)->register_vertex_info[attrib_index].data[channel_select] = value; + } + + void push_draw_parameter_change(rsx::context* ctx, rsx::command_barrier_type type, u32 reg, u32 arg) + { + if (REGS(ctx)->latch == arg || + !RSX(ctx)->in_begin_end || + REGS(ctx)->current_draw_clause.empty()) + { + return; + } + + // Defer the change. Rollback... + REGS(ctx)->decode(reg, REGS(ctx)->latch); + + // Insert barrier to reinsert the value later + REGS(ctx)->current_draw_clause.insert_command_barrier(index_base_modifier_barrier, arg); + } + + u32 get_report_data_impl(rsx::context* ctx, u32 offset) + { + u32 location = 0; + blit_engine::context_dma report_dma = REGS(ctx)->context_dma_report(); + + switch (report_dma) + { + case blit_engine::context_dma::to_memory_get_report: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_LOCAL; break; + case blit_engine::context_dma::report_location_main: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN; break; + case blit_engine::context_dma::memory_host_buffer: location = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER; break; + default: + return vm::addr_t(0); + } + + return vm::cast(get_address(offset, location)); + } + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/NV47/common.h b/rpcs3/Emu/RSX/NV47/common.h new file mode 100644 index 0000000000..6fc50edbd1 --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/common.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include "context.h" +#include "context_accessors.define.h" + +namespace rsx +{ + enum command_barrier_type : u32; + enum vertex_base_type; + + namespace util + { + u32 get_report_data_impl(rsx::context* ctx, u32 offset); + + void push_vertex_data(rsx::context* ctx, u32 attrib_index, u32 channel_select, int count, rsx::vertex_base_type vtype, u32 value); + + void push_draw_parameter_change(rsx::context* ctx, rsx::command_barrier_type type, u32 reg, u32 arg); + + template + void write_gcm_label(context* ctx, u32 address, u32 data) + { + const bool is_flip_sema = (address == (RSX(ctx)->label_addr + 0x10) || address == (RSX(ctx)->device_addr + 0x30)); + if (!is_flip_sema) + { + // First, queue the GPU work. If it flushes the queue for us, the following routines will be faster. + const bool handled = RSX(ctx)->get_backend_config().supports_host_gpu_labels && RSX(ctx)->release_GCM_label(address, data); + + if (vm::_ref(address).val == data) + { + // It's a no-op to write the same value (although there is a delay in real-hw so it's more accurate to allow GPU label in this case) + return; + } + + if constexpr (FlushDMA) + { + // If the backend handled the request, this call will basically be a NOP + g_fxo->get().sync(); + } + + if constexpr (FlushPipe) + { + // Manually flush the pipeline. + // It is possible to stream report writes using the host GPU, but that generates too much submit traffic. + RSX(ctx)->sync(); + } + + if (handled) + { + // Backend will handle it, nothing to write. + return; + } + } + + vm::_ref(address).val = data; + } + } +} + +#include "context_accessors.undef.h" diff --git a/rpcs3/Emu/RSX/NV47/context.h b/rpcs3/Emu/RSX/NV47/context.h new file mode 100644 index 0000000000..24f67bdfae --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/context.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace rsx +{ + class thread; + +#if 0 + // TODO: Separate GRAPH context from RSX state + struct GRAPH_context + { + u32 id; + std::array registers; + + GRAPH_context(u32 ctx_id) + : id(ctx_id) + { + std::fill(registers.begin(), registers.end(), 0); + } + }; +#endif + + struct context + { + thread* rsxthr; + // GRAPH_context* graph; + rsx_state* register_state; + }; +} diff --git a/rpcs3/Emu/RSX/NV47/context_accessors.define.h b/rpcs3/Emu/RSX/NV47/context_accessors.define.h new file mode 100644 index 0000000000..b423f11cd9 --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/context_accessors.define.h @@ -0,0 +1,3 @@ +#define RSX(ctx) ctx->rsxthr +#define REGS(ctx) ctx->register_state +#define RSX_CAPTURE_EVENT(name) if (RSX(ctx)->capture_current_frame) { RSX(ctx)->capture_frame(name); } diff --git a/rpcs3/Emu/RSX/NV47/context_accessors.undef.h b/rpcs3/Emu/RSX/NV47/context_accessors.undef.h new file mode 100644 index 0000000000..4fd31eec4e --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/context_accessors.undef.h @@ -0,0 +1,3 @@ +#undef RSX +#undef REGS +#undef RSX_CAPTURE_EVENT diff --git a/rpcs3/Emu/RSX/NV47/nv3089.cpp b/rpcs3/Emu/RSX/NV47/nv3089.cpp new file mode 100644 index 0000000000..97f837ecf8 --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/nv3089.cpp @@ -0,0 +1,655 @@ +#include "stdafx.h" +#include "nv3089.h" + +#include "Emu/RSX/RSXThread.h" + +#include "context_accessors.define.h" + +namespace rsx +{ + namespace nv3089 + { + static std::tuple decode_transfer_registers(context* ctx) + { + blit_src_info src_info = {}; + blit_dst_info dst_info = {}; + + const rsx::blit_engine::transfer_operation operation = REGS(ctx)->blit_engine_operation(); + + const u16 out_x = REGS(ctx)->blit_engine_output_x(); + const u16 out_y = REGS(ctx)->blit_engine_output_y(); + const u16 out_w = REGS(ctx)->blit_engine_output_width(); + const u16 out_h = REGS(ctx)->blit_engine_output_height(); + + const u16 in_w = REGS(ctx)->blit_engine_input_width(); + const u16 in_h = REGS(ctx)->blit_engine_input_height(); + + const blit_engine::transfer_origin in_origin = REGS(ctx)->blit_engine_input_origin(); + auto src_color_format = REGS(ctx)->blit_engine_src_color_format(); + + const f32 scale_x = REGS(ctx)->blit_engine_ds_dx(); + const f32 scale_y = REGS(ctx)->blit_engine_dt_dy(); + + // Clipping + // Validate that clipping rect will fit onto both src and dst regions + const u16 clip_w = std::min(REGS(ctx)->blit_engine_clip_width(), out_w); + const u16 clip_h = std::min(REGS(ctx)->blit_engine_clip_height(), out_h); + + // Check both clip dimensions and dst dimensions + if (clip_w == 0 || clip_h == 0) + { + rsx_log.warning("NV3089_IMAGE_IN: Operation NOPed out due to empty regions"); + return { false, src_info, dst_info }; + } + + if (in_w == 0 || in_h == 0) + { + // Input cant be an empty region + fmt::throw_exception("NV3089_IMAGE_IN_SIZE: Invalid blit dimensions passed (in_w=%d, in_h=%d)", in_w, in_h); + } + + u16 clip_x = REGS(ctx)->blit_engine_clip_x(); + u16 clip_y = REGS(ctx)->blit_engine_clip_y(); + + //Fit onto dst + if (clip_x && (out_x + clip_x + clip_w) > out_w) clip_x = 0; + if (clip_y && (out_y + clip_y + clip_h) > out_h) clip_y = 0; + + u16 in_pitch = REGS(ctx)->blit_engine_input_pitch(); + + switch (in_origin) + { + case blit_engine::transfer_origin::corner: + case blit_engine::transfer_origin::center: + break; + default: + rsx_log.warning("NV3089_IMAGE_IN_SIZE: unknown origin (%d)", static_cast(in_origin)); + } + + if (operation != rsx::blit_engine::transfer_operation::srccopy) + { + rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown operation (0x%x)", REGS(ctx)->registers[NV3089_SET_OPERATION]); + RSX(ctx)->recover_fifo(); + return { false, src_info, dst_info }; + } + + if (!src_color_format) + { + rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown src color format (0x%x)", REGS(ctx)->registers[NV3089_SET_COLOR_FORMAT]); + RSX(ctx)->recover_fifo(); + return { false, src_info, dst_info }; + } + + const u32 src_offset = REGS(ctx)->blit_engine_input_offset(); + const u32 src_dma = REGS(ctx)->blit_engine_input_location(); + + u32 dst_offset; + u32 dst_dma = 0; + rsx::blit_engine::transfer_destination_format dst_color_format; + u32 out_pitch = 0; + [[maybe_unused]] u32 out_alignment = 64; + bool is_block_transfer = false; + + switch (REGS(ctx)->blit_engine_context_surface()) + { + case blit_engine::context_surface::surface2d: + { + dst_dma = REGS(ctx)->blit_engine_output_location_nv3062(); + dst_offset = REGS(ctx)->blit_engine_output_offset_nv3062(); + out_pitch = REGS(ctx)->blit_engine_output_pitch_nv3062(); + out_alignment = REGS(ctx)->blit_engine_output_alignment_nv3062(); + is_block_transfer = fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f); + + if (auto dst_fmt = REGS(ctx)->blit_engine_nv3062_color_format(); !dst_fmt) + { + rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown NV3062 dst color format (0x%x)", REGS(ctx)->registers[NV3062_SET_COLOR_FORMAT]); + RSX(ctx)->recover_fifo(); + return { false, src_info, dst_info }; + } + else + { + dst_color_format = dst_fmt; + } + + break; + } + case blit_engine::context_surface::swizzle2d: + { + dst_dma = REGS(ctx)->blit_engine_nv309E_location(); + dst_offset = REGS(ctx)->blit_engine_nv309E_offset(); + + if (auto dst_fmt = REGS(ctx)->blit_engine_output_format_nv309E(); !dst_fmt) + { + rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown NV309E dst color format (0x%x)", REGS(ctx)->registers[NV309E_SET_FORMAT]); + RSX(ctx)->recover_fifo(); + return { false, src_info, dst_info }; + } + else + { + dst_color_format = dst_fmt; + } + + break; + } + default: + rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", static_cast(REGS(ctx)->blit_engine_context_surface())); + return { false, src_info, dst_info }; + } + + const u32 in_bpp = (src_color_format == rsx::blit_engine::transfer_source_format::r5g6b5) ? 2 : 4; // bytes per pixel + const u32 out_bpp = (dst_color_format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? 2 : 4; + + if (out_pitch == 0) + { + out_pitch = out_bpp * out_w; + } + + if (in_pitch == 0) + { + in_pitch = in_bpp * in_w; + } + + if (in_bpp != out_bpp) + { + is_block_transfer = false; + } + + u16 in_x, in_y; + if (in_origin == blit_engine::transfer_origin::center) + { + // Convert to normal u,v addressing. Under this scheme offset of 1 is actually half-way inside pixel 0 + const float x = std::max(REGS(ctx)->blit_engine_in_x(), 0.5f); + const float y = std::max(REGS(ctx)->blit_engine_in_y(), 0.5f); + in_x = static_cast(std::floor(x - 0.5f)); + in_y = static_cast(std::floor(y - 0.5f)); + } + else + { + in_x = static_cast(std::floor(REGS(ctx)->blit_engine_in_x())); + in_y = static_cast(std::floor(REGS(ctx)->blit_engine_in_y())); + } + + // Check for subpixel addressing + if (scale_x < 1.f) + { + float dst_x = in_x * scale_x; + in_x = static_cast(std::floor(dst_x) / scale_x); + } + + if (scale_y < 1.f) + { + float dst_y = in_y * scale_y; + in_y = static_cast(std::floor(dst_y) / scale_y); + } + + const u32 in_offset = in_x * in_bpp + in_pitch * in_y; + const u32 out_offset = out_x * out_bpp + out_pitch * out_y; + + const u32 src_line_length = (in_w * in_bpp); + + u32 src_address = 0; + const u32 dst_address = get_address(dst_offset, dst_dma, 1); // TODO: Add size + + if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch))) + { + const u32 nb_lines = std::min(clip_h, in_h); + const u32 data_length = nb_lines * src_line_length; + + if (src_address = get_address(src_offset, src_dma, data_length); + !src_address || !dst_address) + { + RSX(ctx)->recover_fifo(); + return { false, src_info, dst_info }; + } + + RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, data_length); + + if (const auto result = RSX(ctx)->read_barrier(src_address, data_length, false); + result == rsx::result_zcull_intr) + { + if (RSX(ctx)->copy_zcull_stats(src_address, data_length, dst_address) == data_length) + { + // All writes deferred + return { false, src_info, dst_info }; + } + } + } + else + { + const u16 read_h = std::min(static_cast(clip_h / scale_y), in_h); + const u32 data_length = in_pitch * (read_h - 1) + src_line_length; + + if (src_address = get_address(src_offset, src_dma, data_length); + !src_address || !dst_address) + { + RSX(ctx)->recover_fifo(); + return { false, src_info, dst_info }; + } + + RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, data_length); + RSX(ctx)->read_barrier(src_address, data_length, true); + } + + if (src_address == dst_address && + in_w == clip_w && in_h == clip_h && + in_pitch == out_pitch && + rsx::fcmp(scale_x, 1.f) && rsx::fcmp(scale_y, 1.f)) + { + // NULL operation + rsx_log.warning("NV3089_IMAGE_IN: Operation writes memory onto itself with no modification (move-to-self). Will ignore."); + return { false, src_info, dst_info }; + } + + u8* pixels_src = vm::_ptr(src_address + in_offset); + u8* pixels_dst = vm::_ptr(dst_address + out_offset); + + if (dst_color_format != rsx::blit_engine::transfer_destination_format::r5g6b5 && + dst_color_format != rsx::blit_engine::transfer_destination_format::a8r8g8b8) + { + fmt::throw_exception("NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", static_cast(dst_color_format)); + } + + if (src_color_format != rsx::blit_engine::transfer_source_format::r5g6b5 && + src_color_format != rsx::blit_engine::transfer_source_format::a8r8g8b8) + { + // Alpha has no meaning in both formats + if (src_color_format == rsx::blit_engine::transfer_source_format::x8r8g8b8) + { + src_color_format = rsx::blit_engine::transfer_source_format::a8r8g8b8; + } + else + { + // TODO: Support more formats + fmt::throw_exception("NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", static_cast(*src_color_format)); + } + } + + u32 convert_w = static_cast(std::abs(scale_x) * in_w); + u32 convert_h = static_cast(std::abs(scale_y) * in_h); + + if (convert_w == 0 || convert_h == 0) + { + rsx_log.error("NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored (ds_dx=%f, dt_dy=%f)", + REGS(ctx)->blit_engine_ds_dx(), REGS(ctx)->blit_engine_dt_dy()); + return { false, src_info, dst_info }; + } + + src_info.format = src_color_format; + src_info.origin = in_origin; + src_info.width = in_w; + src_info.height = in_h; + src_info.pitch = in_pitch; + src_info.bpp = in_bpp; + src_info.offset_x = in_x; + src_info.offset_y = in_y; + src_info.dma = src_dma; + src_info.rsx_address = src_address; + src_info.pixels = pixels_src; + + dst_info.format = dst_color_format; + dst_info.width = convert_w; + dst_info.height = convert_h; + dst_info.clip_x = clip_x; + dst_info.clip_y = clip_y; + dst_info.clip_width = clip_w; + dst_info.clip_height = clip_h; + dst_info.offset_x = out_x; + dst_info.offset_y = out_y; + dst_info.pitch = out_pitch; + dst_info.bpp = out_bpp; + dst_info.scale_x = scale_x; + dst_info.scale_y = scale_y; + dst_info.dma = dst_dma; + dst_info.rsx_address = dst_address; + dst_info.pixels = pixels_dst; + dst_info.swizzled = (REGS(ctx)->blit_engine_context_surface() == blit_engine::context_surface::swizzle2d); + + return { true, src_info, dst_info }; + } + + void linear_copy( + const blit_dst_info& dst, + const blit_src_info& src, + u16 out_w, + u16 out_h, + u32 slice_h, + AVPixelFormat ffmpeg_src_format, + AVPixelFormat ffmpeg_dst_format, + bool need_convert, + bool need_clip, + bool src_is_modified, + bool interpolate) + { + std::vector temp2; + + if (!need_convert) [[ likely ]] + { + const bool is_overlapping = !src_is_modified && dst.dma == src.dma && [&]() -> bool + { + const auto src_range = utils::address_range::start_length(src.rsx_address, src.pitch * (src.height - 1) + (src.bpp * src.width)); + const auto dst_range = utils::address_range::start_length(dst.rsx_address, dst.pitch * (dst.clip_height - 1) + (dst.bpp * dst.clip_width)); + return src_range.overlaps(dst_range); + }(); + + if (is_overlapping) [[ unlikely ]] + { + if (need_clip) + { + temp2.resize(dst.pitch * dst.clip_height); + clip_image_may_overlap(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch, temp2.data()); + return; + } + + if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) + { + const u32 buffer_pitch = dst.bpp * out_w; + temp2.resize(buffer_pitch * out_h); + std::add_pointer_t buf = temp2.data(), pixels = src.pixels; + + // Read the whole buffer from source + for (u32 y = 0; y < out_h; ++y) + { + std::memcpy(buf, pixels, buffer_pitch); + pixels += src.pitch; + buf += buffer_pitch; + } + + buf = temp2.data(), pixels = dst.pixels; + + // Write to destination + for (u32 y = 0; y < out_h; ++y) + { + std::memcpy(pixels, buf, buffer_pitch); + pixels += dst.pitch; + buf += buffer_pitch; + } + + return; + } + + std::memmove(dst.pixels, src.pixels, dst.pitch * out_h); + return; + } + + if (need_clip) [[ unlikely ]] + { + clip_image(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch); + return; + } + + if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) [[ unlikely ]] + { + u8* dst_pixels = dst.pixels, * src_pixels = src.pixels; + + for (u32 y = 0; y < out_h; ++y) + { + std::memcpy(dst_pixels, src_pixels, out_w * dst.bpp); + dst_pixels += dst.pitch; + src_pixels += src.pitch; + } + + return; + } + + std::memcpy(dst.pixels, src.pixels, dst.pitch * out_h); + return; + } + + if (need_clip) [[ unlikely ]] + { + temp2.resize(dst.pitch * std::max(dst.height, dst.clip_height)); + + convert_scale_image(temp2.data(), ffmpeg_dst_format, dst.width, dst.height, dst.pitch, + src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, interpolate); + + clip_image(dst.pixels, temp2.data(), dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, dst.pitch, dst.pitch); + return; + } + + convert_scale_image(dst.pixels, ffmpeg_dst_format, out_w, out_h, dst.pitch, + src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, + interpolate); + } + + std::vector swizzled_copy_1( + const blit_dst_info& dst, + const blit_src_info& src, + u16 out_w, + u16 out_h, + u32 slice_h, + AVPixelFormat ffmpeg_src_format, + AVPixelFormat ffmpeg_dst_format, + bool need_convert, + bool need_clip, + bool interpolate) + { + std::vector temp2, temp3; + + if (need_clip) + { + temp3.resize(dst.pitch * dst.clip_height); + + if (need_convert) + { + temp2.resize(dst.pitch * std::max(dst.height, dst.clip_height)); + + convert_scale_image(temp2.data(), ffmpeg_dst_format, dst.width, dst.height, dst.pitch, + src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, + interpolate); + + clip_image(temp3.data(), temp2.data(), dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, dst.pitch, dst.pitch); + return temp3; + } + + clip_image(temp3.data(), src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch); + return temp3; + } + + if (need_convert) + { + temp3.resize(dst.pitch * out_h); + + convert_scale_image(temp3.data(), ffmpeg_dst_format, out_w, out_h, dst.pitch, + src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, + interpolate); + + return temp3; + } + + return {}; + } + + void swizzled_copy_2( + u8* linear_pixels, + u8* swizzled_pixels, + u32 linear_pitch, + u16 out_w, + u16 out_h, + u8 out_bpp) + { + // TODO: Validate these claims. Are the registers always correctly initialized? Should we trust them at all? + // It looks like rsx may ignore the requested swizzle size and just always + // round up to nearest power of 2 + /* + u8 sw_width_log2 = REGS(ctx)->nv309e_sw_width_log2(); + u8 sw_height_log2 = REGS(ctx)->nv309e_sw_height_log2(); + + // 0 indicates height of 1 pixel + sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2; + + // swizzle based on destination size + u16 sw_width = 1 << sw_width_log2; + u16 sw_height = 1 << sw_height_log2; + */ + + std::vector sw_temp; + + u32 sw_width = next_pow2(out_w); + u32 sw_height = next_pow2(out_h); + + // Check and pad texture out if we are given non power of 2 output + if (sw_width != out_w || sw_height != out_h) + { + sw_temp.resize(out_bpp * sw_width * sw_height); + + switch (out_bpp) + { + case 1: + pad_texture(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height); + break; + case 2: + pad_texture(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height); + break; + case 4: + pad_texture(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height); + break; + } + + linear_pixels = sw_temp.data(); + } + + switch (out_bpp) + { + case 1: + convert_linear_swizzle(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch); + break; + case 2: + convert_linear_swizzle(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch); + break; + case 4: + convert_linear_swizzle(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch); + break; + } + } + + std::vector _mirror_transform(const blit_src_info& src, bool flip_x, bool flip_y) + { + std::vector temp1; + if (!flip_x && !flip_y) + { + return temp1; + } + + const u32 packed_pitch = src.width * src.bpp; + temp1.resize(packed_pitch * src.height); + + const s32 stride_y = (flip_y ? -1 : 1) * static_cast(src.pitch); + + for (u32 y = 0; y < src.height; ++y) + { + u8* dst_pixels = temp1.data() + (packed_pitch * y); + u8* src_pixels = src.pixels + (static_cast(y) * stride_y); + + if (flip_x) + { + if (src.bpp == 4) [[ likely ]] + { + rsx::memcpy_r(dst_pixels, src_pixels, src.width); + continue; + } + + rsx::memcpy_r(dst_pixels, src_pixels, src.width); + continue; + } + + std::memcpy(dst_pixels, src_pixels, packed_pitch); + } + + return temp1; + } + + void image_in(context* ctx, u32 /*reg*/, u32 /*arg*/) + { + auto [success, src, dst] = decode_transfer_registers(ctx); + if (!success) + { + return; + } + + // Decode extra params before locking + const blit_engine::transfer_interpolator in_inter = REGS(ctx)->blit_engine_input_inter(); + const u16 out_w = REGS(ctx)->blit_engine_output_width(); + const u16 out_h = REGS(ctx)->blit_engine_output_height(); + + // Lock here. RSX cannot execute any locking operations from this point, including ZCULL read barriers + auto res = ::rsx::reservation_lock( + dst.rsx_address, dst.pitch * dst.clip_height, + src.rsx_address, src.pitch * src.height); + + if (!g_cfg.video.force_cpu_blit_processing && + (dst.dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src.dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER) && + RSX(ctx)->scaled_image_from_memory(src, dst, in_inter == blit_engine::transfer_interpolator::foh)) + { + // HW-accelerated blit + return; + } + + std::vector mirror_tmp; + bool src_is_temp = false; + + // Flip source if needed + if (dst.scale_y < 0 || dst.scale_x < 0) + { + mirror_tmp = _mirror_transform(src, dst.scale_x < 0, dst.scale_y < 0); + src.pixels = mirror_tmp.data(); + src.pitch = src.width * src.bpp; + src_is_temp = true; + } + + const AVPixelFormat in_format = (src.format == rsx::blit_engine::transfer_source_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; + const AVPixelFormat out_format = (dst.format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; + + const bool need_clip = + dst.clip_width != src.width || + dst.clip_height != src.height || + dst.clip_x > 0 || dst.clip_y > 0 || + dst.width != out_w || dst.height != out_h; + + const bool need_convert = out_format != in_format || !rsx::fcmp(fabsf(dst.scale_x), 1.f) || !rsx::fcmp(fabsf(dst.scale_y), 1.f); + const u32 slice_h = static_cast(std::ceil(static_cast(dst.clip_height + dst.clip_y) / dst.scale_y)); + const bool interpolate = in_inter == blit_engine::transfer_interpolator::foh; + + auto real_dst = dst.pixels; + const auto tiled_region = RSX(ctx)->get_tiled_memory_region(utils::address_range::start_length(dst.rsx_address, dst.pitch * dst.clip_height)); + std::vector tmp; + + if (tiled_region) + { + tmp.resize(tiled_region.tile->size); + real_dst = dst.pixels; + dst.pixels = tmp.data(); + } + + if (REGS(ctx)->blit_engine_context_surface() != blit_engine::context_surface::swizzle2d) + { + linear_copy(dst, src, out_w, out_h, slice_h, in_format, out_format, need_convert, need_clip, src_is_temp, interpolate); + } + else + { + const auto swz_temp = swizzled_copy_1(dst, src, out_w, out_h, slice_h, in_format, out_format, need_convert, need_clip, interpolate); + auto pixels_src = swz_temp.empty() ? src.pixels : swz_temp.data(); + + swizzled_copy_2(const_cast(pixels_src), dst.pixels, src.pitch, out_w, out_h, dst.bpp); + } + + if (tiled_region) + { + const auto tile_func = dst.bpp == 4 + ? rsx::tile_texel_data32 + : rsx::tile_texel_data16; + + tile_func( + real_dst, + dst.pixels, + tiled_region.base_address, + dst.rsx_address - tiled_region.base_address, + tiled_region.tile->size, + tiled_region.tile->bank, + tiled_region.tile->pitch, + dst.clip_width, + dst.clip_height + ); + } + } + } +} diff --git a/rpcs3/Emu/RSX/NV47/nv3089.h b/rpcs3/Emu/RSX/NV47/nv3089.h new file mode 100644 index 0000000000..e54b4a48c5 --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/nv3089.h @@ -0,0 +1,10 @@ +#pragma once +#include "context.h" + +namespace rsx +{ + namespace nv3089 + { + void image_in(context* ctx, u32 reg, u32 arg); + } +} diff --git a/rpcs3/Emu/RSX/NV47/nv308a.cpp b/rpcs3/Emu/RSX/NV47/nv308a.cpp new file mode 100644 index 0000000000..049f39d192 --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/nv308a.cpp @@ -0,0 +1,159 @@ +#include "stdafx.h" +#include "nv308a.h" + +#include "Emu/RSX/RSXThread.h" + +#include "context_accessors.define.h" + +namespace rsx +{ + namespace nv308a + { + void color::impl(context* ctx, u32 reg, u32) + { + const u32 out_x_max = REGS(ctx)->nv308a_size_out_x(); + const u32 index = reg - NV308A_COLOR; + + if (index >= out_x_max) + { + // Skip + return; + } + + // Get position of the current command arg + [[maybe_unused]] const u32 src_offset = RSX(ctx)->fifo_ctrl->get_pos(); + + // FIFO args count including this one + const u32 fifo_args_cnt = RSX(ctx)->fifo_ctrl->get_remaining_args_count() + 1; + + // The range of methods this function resposible to + const u32 method_range = std::min(0x700 - index, out_x_max - index); + + // Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min) + const u32 fifo_read_limit = static_cast(((RSX(ctx)->ctrl->put & ~3ull) - (RSX(ctx)->fifo_ctrl->get_pos())) / 4); + + u32 count = std::min({ fifo_args_cnt, fifo_read_limit, method_range }); + + const u32 dst_dma = REGS(ctx)->blit_engine_output_location_nv3062(); + const u32 dst_offset = REGS(ctx)->blit_engine_output_offset_nv3062(); + const u32 out_pitch = REGS(ctx)->blit_engine_output_pitch_nv3062(); + + const u32 x = REGS(ctx)->nv308a_x() + index; + const u32 y = REGS(ctx)->nv308a_y(); + + const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr(); + + if (fifo_span.size() < count) + { + count = ::size32(fifo_span); + } + + // Skip "handled methods" + RSX(ctx)->fifo_ctrl->skip_methods(count - 1); + + // 308A::COLOR can be used to create custom sync primitives. + // Hide this behind strict mode due to the potential performance implications. + if (count == 1 && g_cfg.video.strict_rendering_mode && !g_cfg.video.relaxed_zcull_sync) + { + RSX(ctx)->sync(); + } + + switch (*REGS(ctx)->blit_engine_nv3062_color_format()) + { + case blit_engine::transfer_destination_format::a8r8g8b8: + case blit_engine::transfer_destination_format::y32: + { + // Bit cast - optimize to mem copy + + const u32 data_length = count * 4; + + const auto dst_address = get_address(dst_offset + (x * 4) + (out_pitch * y), dst_dma, data_length); + + if (!dst_address) + { + RSX(ctx)->recover_fifo(); + return; + } + + const auto dst = vm::_ptr(dst_address); + const auto src = reinterpret_cast(fifo_span.data()); + + rsx::reservation_lock rsx_lock(dst_address, data_length); + + if (RSX(ctx)->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]] + { + // Move last 32 bits + reinterpret_cast(dst)[0] = reinterpret_cast(src)[count - 1]; + RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, 4); + } + else + { + if (dst_dma & CELL_GCM_LOCATION_MAIN) + { + // May overlap + std::memmove(dst, src, data_length); + } + else + { + // Never overlaps + std::memcpy(dst, src, data_length); + } + + RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, count * 4); + } + + break; + } + case blit_engine::transfer_destination_format::r5g6b5: + { + const auto data_length = count * 2; + + const auto dst_address = get_address(dst_offset + (x * 2) + (y * out_pitch), dst_dma, data_length); + const auto dst = vm::_ptr(dst_address); + const auto src = utils::bless>(fifo_span.data()); + + if (!dst_address) + { + RSX(ctx)->recover_fifo(); + return; + } + + rsx::reservation_lock rsx_lock(dst_address, data_length); + + auto convert = [](u32 input) -> u16 + { + // Input is considered to be ARGB8 + u32 r = (input >> 16) & 0xFF; + u32 g = (input >> 8) & 0xFF; + u32 b = input & 0xFF; + + r = (r * 32) / 255; + g = (g * 64) / 255; + b = (b * 32) / 255; + return static_cast((r << 11) | (g << 5) | b); + }; + + if (RSX(ctx)->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]] + { + // Move last 16 bits + dst[0] = convert(src[count - 1]); + RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, 2); + break; + } + + for (u32 i = 0; i < count; i++) + { + dst[i] = convert(src[i]); + } + + RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, count * 2); + break; + } + default: + { + fmt::throw_exception("Unreachable"); + } + } + } + } +} diff --git a/rpcs3/Emu/RSX/NV47/nv308a.h b/rpcs3/Emu/RSX/NV47/nv308a.h new file mode 100644 index 0000000000..eb28063c2b --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/nv308a.h @@ -0,0 +1,14 @@ +#pragma once + +#include "context.h" + +namespace rsx +{ + namespace nv308a + { + struct color + { + static void impl(context* ctx, u32 reg, u32 arg); + }; + } +} diff --git a/rpcs3/Emu/RSX/NV47/nv406e.cpp b/rpcs3/Emu/RSX/NV47/nv406e.cpp new file mode 100644 index 0000000000..3c28acfcef --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/nv406e.cpp @@ -0,0 +1,125 @@ +#include "stdafx.h" +#include "nv406e.h" +#include "common.h" + +#include "Emu/RSX/RSXThread.h" + +#include "context_accessors.define.h" + +namespace rsx +{ + namespace nv406e + { + void set_reference(context* ctx, u32 /*reg*/, u32 arg) + { + RSX(ctx)->sync(); + + // Write ref+get (get will be written again with the same value at command end) + auto& dma = vm::_ref(RSX(ctx)->dma_address); + dma.get.release(RSX(ctx)->fifo_ctrl->get_pos()); + dma.ref.store(arg); + } + + void semaphore_acquire(context* ctx, u32 /*reg*/, u32 arg) + { + RSX(ctx)->sync_point_request.release(true); + const u32 addr = get_address(REGS(ctx)->semaphore_offset_406e(), REGS(ctx)->semaphore_context_dma_406e()); + + const auto& sema = vm::_ref(addr).val; + + if (sema == arg) + { + // Flip semaphore doesnt need wake-up delay + if (addr != RSX(ctx)->label_addr + 0x10) + { + RSX(ctx)->flush_fifo(); + RSX(ctx)->fifo_wake_delay(2); + } + + return; + } + else + { + RSX(ctx)->flush_fifo(); + } + + u64 start = rsx::uclock(); + u64 last_check_val = start; + + while (sema != arg) + { + if (RSX(ctx)->test_stopped()) + { + RSX(ctx)->state += cpu_flag::again; + return; + } + + if (const auto tdr = static_cast(g_cfg.video.driver_recovery_timeout)) + { + const u64 current = rsx::uclock(); + + if (current - last_check_val > 20'000) + { + // Suspicious amnount of time has passed + // External pause such as debuggers' pause or operating system sleep may have taken place + // Ignore it + start += current - last_check_val; + } + + last_check_val = current; + + if ((current - start) > tdr) + { + // If longer than driver timeout force exit + rsx_log.error("nv406e::semaphore_acquire has timed out. semaphore_address=0x%X", addr); + break; + } + } + + RSX(ctx)->cpu_wait({}); + } + + RSX(ctx)->fifo_wake_delay(); + RSX(ctx)->performance_counters.idle_time += (rsx::uclock() - start); + } + + void semaphore_release(context* ctx, u32 /*reg*/, u32 arg) + { + const u32 offset = REGS(ctx)->semaphore_offset_406e(); + + if (offset % 4) + { + rsx_log.warning("NV406E semaphore release is using unaligned semaphore, ignoring. (offset=0x%x)", offset); + return; + } + + const u32 ctxt = REGS(ctx)->semaphore_context_dma_406e(); + + // By avoiding doing this on flip's semaphore release + // We allow last gcm's registers reset to occur in case of a crash + if (const bool is_flip_sema = (offset == 0x10 && ctxt == CELL_GCM_CONTEXT_DMA_SEMAPHORE_R); + !is_flip_sema) + { + RSX(ctx)->sync_point_request.release(true); + } + + const u32 addr = get_address(offset, ctxt); + + // TODO: Check if possible to write on reservations + if (RSX(ctx)->label_addr >> 28 != addr >> 28) + { + rsx_log.error("NV406E semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr); + RSX(ctx)->recover_fifo(); + return; + } + + if (addr == RSX(ctx)->device_addr + 0x30 && !arg) + { + // HW flip synchronization related, 1 is not written without display queue command (TODO: make it behave as real hw) + arg = 1; + } + + util::write_gcm_label(ctx, addr, arg); + } + } +} diff --git a/rpcs3/Emu/RSX/NV47/nv406e.h b/rpcs3/Emu/RSX/NV47/nv406e.h new file mode 100644 index 0000000000..426228741c --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/nv406e.h @@ -0,0 +1,15 @@ +#pragma once + +#include "context.h" + +namespace rsx +{ + namespace nv406e + { + void set_reference(context* ctx, u32 reg, u32 arg); + + void semaphore_acquire(context* ctx, u32 reg, u32 arg); + + void semaphore_release(context* ctx, u32 reg, u32 arg); + } +} diff --git a/rpcs3/Emu/RSX/NV47/nv4097.cpp b/rpcs3/Emu/RSX/NV47/nv4097.cpp new file mode 100644 index 0000000000..c9155f5dbc --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/nv4097.cpp @@ -0,0 +1,629 @@ +#include "stdafx.h" +#include "nv4097.h" + +#include "Emu/RSX/RSXThread.h" +#include "Emu/RSX/Common/BufferUtils.h" + +#define RSX(ctx) ctx->rsxthr +#define REGS(ctx) (&rsx::method_registers) +#define RSX_CAPTURE_EVENT(name) if (RSX(ctx)->capture_current_frame) { RSX(ctx)->capture_frame(name); } + +namespace rsx +{ + template struct vertex_data_type_from_element_type; + template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::f; }; + template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::sf; }; + template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::ub; }; + template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::s32k; }; + template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::s1; }; + + namespace nv4097 + { + ///// Program management + + void set_shader_program_dirty(context* ctx, u32, u32) + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_ucode_dirty; + } + + void set_transform_constant::impl(context* ctx, u32 reg, u32 arg) + { + const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT; + const u32 constant_id = index / 4; + const u8 subreg = index % 4; + + // FIFO args count including this one + const u32 fifo_args_cnt = RSX(ctx)->fifo_ctrl->get_remaining_args_count() + 1; + + // The range of methods this function resposible to + const u32 method_range = 32 - index; + + // Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min) + const u32 fifo_read_limit = static_cast(((RSX(ctx)->ctrl->put & ~3ull) - (RSX(ctx)->fifo_ctrl->get_pos())) / 4); + + const u32 count = std::min({ fifo_args_cnt, fifo_read_limit, method_range }); + + const u32 load = REGS(ctx)->transform_constant_load(); + + u32 rcount = count; + if (const u32 max = (load + constant_id) * 4 + count + subreg, limit = 468 * 4; max > limit) + { + // Ignore addresses outside the usable [0, 467] range + rsx_log.warning("Invalid transform register index (load=%u, index=%u, count=%u)", load, index, count); + + if ((max - count) < limit) + rcount -= max - limit; + else + rcount = 0; + } + + const auto values = ®S(ctx)->transform_constants[load + constant_id][subreg]; + + const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr(); + + if (fifo_span.size() < rcount) + { + rcount = ::size32(fifo_span); + } + + if (RSX(ctx)->m_graphics_state & rsx::pipeline_state::transform_constants_dirty) + { + // Minor optimization: don't compare values if we already know we need invalidation + copy_data_swap_u32(values, fifo_span.data(), rcount); + } + else + { + if (copy_data_swap_u32_cmp(values, fifo_span.data(), rcount)) + { + // Transform constants invalidation is expensive (~8k bytes per update) + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::transform_constants_dirty; + } + } + + RSX(ctx)->fifo_ctrl->skip_methods(rcount - 1); + } + + void set_transform_program::impl(context* ctx, u32 reg, u32 arg) + { + const u32 index = reg - NV4097_SET_TRANSFORM_PROGRAM; + + // FIFO args count including this one + const u32 fifo_args_cnt = RSX(ctx)->fifo_ctrl->get_remaining_args_count() + 1; + + // The range of methods this function resposible to + const u32 method_range = 32 - index; + + // Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min) + const u32 fifo_read_limit = static_cast(((RSX(ctx)->ctrl->put & ~3ull) - (RSX(ctx)->fifo_ctrl->get_pos())) / 4); + + const u32 count = std::min({ fifo_args_cnt, fifo_read_limit, method_range }); + + const u32 load_pos = REGS(ctx)->transform_program_load(); + + u32 rcount = count; + + if (const u32 max = load_pos * 4 + rcount + (index % 4); + max > max_vertex_program_instructions * 4) + { + rsx_log.warning("Program buffer overflow! Attempted to write %u VP instructions.", max / 4); + rcount -= max - (max_vertex_program_instructions * 4); + } + + const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr(); + + if (fifo_span.size() < rcount) + { + rcount = ::size32(fifo_span); + } + + copy_data_swap_u32(®S(ctx)->transform_program[load_pos * 4 + index % 4], fifo_span.data(), rcount); + + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty; + REGS(ctx)->transform_program_load_set(load_pos + ((rcount + index % 4) / 4)); + RSX(ctx)->fifo_ctrl->skip_methods(rcount - 1); + } + + ///// Texture management + + ///// Surface management + + void set_surface_dirty_bit(context* ctx, u32 reg, u32 arg) + { + if (arg == REGS(ctx)->latch) + { + return; + } + + switch (reg) + { + case NV4097_SET_SURFACE_COLOR_TARGET: + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::pipeline_config_dirty; + break; + case NV4097_SET_SURFACE_CLIP_VERTICAL: + case NV4097_SET_SURFACE_CLIP_HORIZONTAL: + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty; + break; + default: + break; + } + + RSX(ctx)->m_graphics_state.set(rtt_config_dirty); + RSX(ctx)->m_graphics_state.clear(rtt_config_contested); + } + + void set_surface_format(context* ctx, u32 reg, u32 arg) + { + // The high bits of this register are just log2(dimension), ignore them + if ((arg & 0xFFFF) == (REGS(ctx)->latch & 0xFFFF)) + { + return; + } + + // The important parameters have changed (format, type, antialias) + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::pipeline_config_dirty; + + // Check if we need to also update fragment state + const auto current = REGS(ctx)->decode(arg); + const auto previous = REGS(ctx)->decode(REGS(ctx)->latch); + + if (*current.antialias() != *previous.antialias() || // Antialias control has changed, update ROP parameters + current.is_integer_color_format() != previous.is_integer_color_format()) // The type of color format also requires ROP control update + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty; + } + + set_surface_dirty_bit(ctx, reg, arg); + } + + void set_surface_options_dirty_bit(context* ctx, u32 reg, u32 arg) + { + if (arg != REGS(ctx)->latch) + { + RSX(ctx)->on_framebuffer_options_changed(reg); + RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty; + } + } + + void set_color_mask(context* ctx, u32 reg, u32 arg) + { + if (arg == REGS(ctx)->latch) + { + return; + } + + if (REGS(ctx)->decode(arg).is_invalid()) [[ unlikely ]] + { + REGS(ctx)->decode(reg, REGS(ctx)->latch); + } + else + { + set_surface_options_dirty_bit(ctx, reg, arg); + } + } + + void set_stencil_op(context* ctx, u32 reg, u32 arg) + { + if (arg == REGS(ctx)->latch) + { + return; + } + + const auto typed = to_stencil_op(arg); + if (typed) [[ likely ]] + { + set_surface_options_dirty_bit(ctx, reg, arg); + } + else + { + REGS(ctx)->decode(reg, REGS(ctx)->latch); + } + } + + ///// Draw call setup (vertex, etc) + + void set_array_element16(context* ctx, u32, u32 arg) + { + if (RSX(ctx)->in_begin_end) + { + RSX(ctx)->append_array_element(arg & 0xFFFF); + RSX(ctx)->append_array_element(arg >> 16); + } + } + + void set_array_element32(context* ctx, u32, u32 arg) + { + if (RSX(ctx)->in_begin_end) + RSX(ctx)->append_array_element(arg); + } + + void draw_arrays(context* /*rsx*/, u32 /*reg*/, u32 arg) + { + REGS(ctx)->current_draw_clause.command = rsx::draw_command::array; + rsx::registers_decoder::decoded_type v(arg); + + REGS(ctx)->current_draw_clause.append(v.start(), v.count()); + } + + void draw_index_array(context* /*rsx*/, u32 /*reg*/, u32 arg) + { + REGS(ctx)->current_draw_clause.command = rsx::draw_command::indexed; + rsx::registers_decoder::decoded_type v(arg); + + REGS(ctx)->current_draw_clause.append(v.start(), v.count()); + } + + void draw_inline_array(context* /*rsx*/, u32 /*reg*/, u32 arg) + { + arg = std::bit_cast>(arg); + REGS(ctx)->current_draw_clause.command = rsx::draw_command::inlined_array; + REGS(ctx)->current_draw_clause.inline_vertex_array.push_back(arg); + } + + void set_transform_program_start(context* ctx, u32 reg, u32) + { + if (REGS(ctx)->registers[reg] != REGS(ctx)->latch) + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty; + } + } + + void set_vertex_attribute_output_mask(context* ctx, u32 reg, u32) + { + if (REGS(ctx)->registers[reg] != REGS(ctx)->latch) + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty; + } + } + + void set_vertex_base_offset(context* ctx, u32 reg, u32 arg) + { + util::push_draw_parameter_change(ctx, vertex_base_modifier_barrier, reg, arg); + } + + void set_index_base_offset(context* ctx, u32 reg, u32 arg) + { + util::push_draw_parameter_change(ctx, index_base_modifier_barrier, reg, arg); + } + + void check_index_array_dma(context* ctx, u32 reg, u32 arg) + { + // Check if either location or index type are invalid + if (arg & ~(CELL_GCM_LOCATION_MAIN | (CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16 << 4))) + { + // Ignore invalid value, recover + REGS(ctx)->registers[reg] = REGS(ctx)->latch; + RSX(ctx)->recover_fifo(); + + rsx_log.error("Invalid NV4097_SET_INDEX_ARRAY_DMA value: 0x%x", arg); + } + } + + ///// Drawing + + void set_begin_end(context* ctx, u32 /*reg*/, u32 arg) + { + // Ignore upper bits + if (const u8 prim = static_cast(arg)) + { + const auto primitive_type = to_primitive_type(prim); + if (!primitive_type) + { + RSX(ctx)->in_begin_end = true; + + rsx_log.warning("Invalid NV4097_SET_BEGIN_END value: 0x%x", arg); + return; + } + + REGS(ctx)->current_draw_clause.reset(primitive_type); + RSX(ctx)->begin(); + return; + } + + // Check if we have immediate mode vertex data in a driver-local buffer + if (REGS(ctx)->current_draw_clause.command == rsx::draw_command::none) + { + const u32 push_buffer_vertices_count = RSX(ctx)->get_push_buffer_vertex_count(); + const u32 push_buffer_index_count = RSX(ctx)->get_push_buffer_index_count(); + + // Need to set this flag since it overrides some register contents + REGS(ctx)->current_draw_clause.is_immediate_draw = true; + + if (push_buffer_index_count) + { + REGS(ctx)->current_draw_clause.command = rsx::draw_command::indexed; + REGS(ctx)->current_draw_clause.append(0, push_buffer_index_count); + } + else if (push_buffer_vertices_count) + { + REGS(ctx)->current_draw_clause.command = rsx::draw_command::array; + REGS(ctx)->current_draw_clause.append(0, push_buffer_vertices_count); + } + } + else + { + REGS(ctx)->current_draw_clause.is_immediate_draw = false; + } + + if (!REGS(ctx)->current_draw_clause.empty()) + { + REGS(ctx)->current_draw_clause.compile(); + + if (g_cfg.video.disable_video_output) + { + RSX(ctx)->execute_nop_draw(); + RSX(ctx)->rsx::thread::end(); + return; + } + + RSX(ctx)->end(); + } + else + { + RSX(ctx)->in_begin_end = false; + } + + if (RSX(ctx)->pause_on_draw && RSX(ctx)->pause_on_draw.exchange(false)) + { + RSX(ctx)->state -= cpu_flag::dbg_step; + RSX(ctx)->state += cpu_flag::dbg_pause; + RSX(ctx)->check_state(); + } + } + + void clear(context* ctx, u32 /*reg*/, u32 arg) + { + RSX(ctx)->clear_surface(arg); + + RSX_CAPTURE_EVENT("clear"); + } + + void clear_zcull(context* ctx, u32 /*reg*/, u32 /*arg*/) + { + RSX_CAPTURE_EVENT("clear zcull memory"); + } + + void set_face_property(context* ctx, u32 reg, u32 arg) + { + if (reg == REGS(ctx)->latch) + { + return; + } + + bool valid; + switch (reg) + { + case NV4097_SET_CULL_FACE: + valid = !!to_cull_face(arg); break; + case NV4097_SET_FRONT_FACE: + valid = !!to_front_face(arg); break; + default: + valid = false; break; + } + + if (valid) [[ likely ]] + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty; + } + else + { + REGS(ctx)->registers[reg] = REGS(ctx)->latch; + } + } + + void set_blend_equation(context* ctx, u32 reg, u32 arg) + { + if (reg == REGS(ctx)->latch) + { + return; + } + + if (to_blend_equation(arg & 0xFFFF) && + to_blend_equation((arg >> 16) & 0xFFFF)) [[ likely ]] + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty; + } + else + { + REGS(ctx)->decode(reg, REGS(ctx)->latch); + } + } + + void set_blend_factor(context* ctx, u32 reg, u32 arg) + { + if (reg == REGS(ctx)->latch) + { + return; + } + + if (to_blend_factor(arg & 0xFFFF) && + to_blend_factor((arg >> 16) & 0xFFFF)) [[ likely ]] + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty; + } + else + { + REGS(ctx)->decode(reg, REGS(ctx)->latch); + } + } + + ///// Reports + + void get_report(context* ctx, u32 /*reg*/, u32 arg) + { + u8 type = arg >> 24; + u32 offset = arg & 0xffffff; + + auto address_ptr = util::get_report_data_impl(ctx, offset); + if (!address_ptr) + { + rsx_log.error("Bad argument passed to NV4097_GET_REPORT, arg=0x%X", arg); + return; + } + + switch (type) + { + case CELL_GCM_ZPASS_PIXEL_CNT: + case CELL_GCM_ZCULL_STATS: + case CELL_GCM_ZCULL_STATS1: + case CELL_GCM_ZCULL_STATS2: + case CELL_GCM_ZCULL_STATS3: + RSX(ctx)->get_zcull_stats(type, vm::cast(address_ptr)); + break; + default: + rsx_log.error("NV4097_GET_REPORT: Bad type %d", type); + + vm::_ref>(address_ptr).atomic_op([&](CellGcmReportData& data) + { + data.timer = RSX(ctx)->timestamp(); + data.padding = 0; + }); + break; + } + } + + void clear_report_value(context* ctx, u32 /*reg*/, u32 arg) + { + switch (arg) + { + case CELL_GCM_ZPASS_PIXEL_CNT: + case CELL_GCM_ZCULL_STATS: + break; + default: + rsx_log.error("NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg); + break; + } + + RSX(ctx)->clear_zcull_stats(arg); + } + + void set_render_mode(context* ctx, u32, u32 arg) + { + const u32 mode = arg >> 24; + switch (mode) + { + case 1: + RSX(ctx)->disable_conditional_rendering(); + return; + case 2: + break; + default: + rsx_log.error("Unknown render mode %d", mode); + return; + } + + const u32 offset = arg & 0xffffff; + auto address_ptr = util::get_report_data_impl(ctx, offset); + + if (!address_ptr) + { + rsx_log.error("Bad argument passed to NV4097_SET_RENDER_ENABLE, arg=0x%X", arg); + return; + } + + // Defer conditional render evaluation + RSX(ctx)->enable_conditional_rendering(vm::cast(address_ptr)); + } + + void set_zcull_render_enable(context* ctx, u32, u32) + { + RSX(ctx)->notify_zcull_info_changed(); + } + + void set_zcull_stats_enable(context* ctx, u32, u32) + { + RSX(ctx)->notify_zcull_info_changed(); + } + + void set_zcull_pixel_count_enable(context* ctx, u32, u32) + { + RSX(ctx)->notify_zcull_info_changed(); + } + + ///// Misc (sync objects, etc) + + void set_notify(context* ctx, u32 /*reg*/, u32 /*arg*/) + { + const u32 location = REGS(ctx)->context_dma_notify(); + const u32 index = (location & 0x7) ^ 0x7; + + if ((location & ~7) != (CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_0 & ~7)) + { + if (rsx_log.trace) + rsx_log.trace("NV4097_NOTIFY: invalid context = 0x%x", REGS(ctx)->context_dma_notify()); + return; + } + + const u32 addr = RSX(ctx)->iomap_table.get_addr(0xf100000 + (index * 0x40)); + + ensure(addr != umax); + + vm::_ref>(addr).store( + { + RSX(ctx)->timestamp(), + 0 + }); + } + + void texture_read_semaphore_release(context* ctx, u32 /*reg*/, u32 arg) + { + // Pipeline barrier seems to be equivalent to a SHADER_READ stage barrier. + // Ideally the GPU only needs to have cached all textures declared up to this point before writing the label. + + // lle-gcm likes to inject system reserved semaphores, presumably for system/vsh usage + // Avoid calling render to avoid any havoc(flickering) they may cause from invalid flush/write + const u32 offset = REGS(ctx)->semaphore_offset_4097(); + + if (offset % 16) + { + rsx_log.error("NV4097 semaphore using unaligned offset, recovering. (offset=0x%x)", offset); + RSX(ctx)->recover_fifo(); + return; + } + + const u32 addr = get_address(offset, REGS(ctx)->semaphore_context_dma_4097()); + + if (RSX(ctx)->label_addr >> 28 != addr >> 28) + { + rsx_log.error("NV4097 semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr); + } + + if (g_cfg.video.strict_rendering_mode) [[ unlikely ]] + { + util::write_gcm_label(ctx, addr, arg); + } + else + { + util::write_gcm_label(ctx, addr, arg); + } + } + + void back_end_write_semaphore_release(context* ctx, u32 /*reg*/, u32 arg) + { + // Full pipeline barrier. GPU must flush pipeline before writing the label + + const u32 offset = REGS(ctx)->semaphore_offset_4097(); + + if (offset % 16) + { + rsx_log.error("NV4097 semaphore using unaligned offset, recovering. (offset=0x%x)", offset); + RSX(ctx)->recover_fifo(); + return; + } + + const u32 addr = get_address(offset, REGS(ctx)->semaphore_context_dma_4097()); + + if (RSX(ctx)->label_addr >> 28 != addr >> 28) + { + rsx_log.error("NV4097 semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr); + } + + const u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff); + util::write_gcm_label(ctx, addr, val); + } + + void sync(context* ctx, u32, u32) + { + RSX(ctx)->sync(); + } + } +} diff --git a/rpcs3/Emu/RSX/NV47/nv4097.h b/rpcs3/Emu/RSX/NV47/nv4097.h new file mode 100644 index 0000000000..6007563434 --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/nv4097.h @@ -0,0 +1,238 @@ +// NV47 3D Engine +#pragma once + +#include "common.h" + +namespace rsx +{ + enum command_barrier_type; + enum vertex_base_type; + + namespace nv4097 + { + void clear(context* ctx, u32 reg, u32 arg); + + void clear_zcull(context* ctx, u32 reg, u32 arg); + + void set_face_property(context* ctx, u32 reg, u32 arg); + + void set_notify(context* ctx, u32 reg, u32 arg); + + void texture_read_semaphore_release(context* ctx, u32 reg, u32 arg); + + void back_end_write_semaphore_release(context* ctx, u32 reg, u32 arg); + + void set_array_element16(context* ctx, u32, u32 arg); + + void set_array_element32(context* ctx, u32, u32 arg); + + void draw_arrays(context* /*rsx*/, u32 reg, u32 arg); + + void draw_index_array(context* /*rsx*/, u32 reg, u32 arg); + + void draw_inline_array(context* /*rsx*/, u32 reg, u32 arg); + + void set_transform_program_start(context* ctx, u32 reg, u32); + + void set_vertex_attribute_output_mask(context* ctx, u32 reg, u32); + + void set_begin_end(context* ctxthr, u32 reg, u32 arg); + + void get_report(context* ctx, u32 reg, u32 arg); + + void clear_report_value(context* ctx, u32 reg, u32 arg); + + void set_render_mode(context* ctx, u32, u32 arg); + + void set_zcull_render_enable(context* ctx, u32, u32); + + void set_zcull_stats_enable(context* ctx, u32, u32); + + void set_zcull_pixel_count_enable(context* ctx, u32, u32); + + void sync(context* ctx, u32, u32); + + void set_shader_program_dirty(context* ctx, u32, u32); + + void set_surface_dirty_bit(context* ctx, u32 reg, u32 arg); + + void set_surface_format(context* ctx, u32 reg, u32 arg); + + void set_surface_options_dirty_bit(context* ctx, u32 reg, u32 arg); + + void set_color_mask(context* ctx, u32 reg, u32 arg); + + void set_stencil_op(context* ctx, u32 reg, u32 arg); + + void set_vertex_base_offset(context* ctx, u32 reg, u32 arg); + + void set_index_base_offset(context* ctx, u32 reg, u32 arg); + + void check_index_array_dma(context* ctx, u32 reg, u32 arg); + + void set_blend_equation(context* ctx, u32 reg, u32 arg); + + void set_blend_factor(context* ctx, u32 reg, u32 arg); + +#define RSX(ctx) ctx->rsxthr +#define REGS(ctx) (&rsx::method_registers) + + /** + * id = base method register + * index = register index in method + * count = element count per attribute + * register_count = number of registers consumed per attribute. E.g 3-element methods have padding + */ + template + void set_vertex_data_impl(context* ctx, u32 arg) + { + static constexpr usz increment_per_array_index = (register_count * sizeof(type)) / sizeof(u32); + + static constexpr usz attribute_index = index / increment_per_array_index; + static constexpr usz vertex_subreg = index % increment_per_array_index; + + constexpr auto vtype = vertex_data_type_from_element_type::type; + static_assert(vtype != rsx::vertex_base_type::cmp); + static_assert(vtype != rsx::vertex_base_type::ub256); + + // Convert LE data to BE layout + if constexpr (sizeof(type) == 4) + { + arg = std::bit_cast>(arg); + } + else if constexpr (sizeof(type) == 2) + { + // 2 16-bit values packed in 1 32-bit word + const auto be_data = std::bit_cast>(arg); + + // After u32 swap, the components are in the wrong position + arg = (be_data << 16) | (be_data >> 16); + } + + util::push_vertex_data(attribute_index, vertex_subreg, count, vtype); + } + + template + struct set_vertex_data4ub_m + { + static void impl(context* ctx, u32 reg, u32 arg) + { + set_vertex_data_impl(ctx, arg); + } + }; + + template + struct set_vertex_data1f_m + { + static void impl(context* ctx, u32 reg, u32 arg) + { + set_vertex_data_impl(ctx, arg); + } + }; + + template + struct set_vertex_data2f_m + { + static void impl(context* ctx, u32 reg, u32 arg) + { + set_vertex_data_impl(ctx, arg); + } + }; + + template + struct set_vertex_data3f_m + { + static void impl(context* ctx, u32 reg, u32 arg) + { + //Register alignment is only 1, 2, or 4 (Rachet & Clank 2) + set_vertex_data_impl(ctx, arg); + } + }; + + template + struct set_vertex_data4f_m + { + static void impl(context* ctx, u32 reg, u32 arg) + { + set_vertex_data_impl(ctx, arg); + } + }; + + template + struct set_vertex_data2s_m + { + static void impl(context* ctx, u32 reg, u32 arg) + { + set_vertex_data_impl(ctx, arg); + } + }; + + template + struct set_vertex_data4s_m + { + static void impl(context* ctx, u32 reg, u32 arg) + { + set_vertex_data_impl(ctx, arg); + } + }; + + template + struct set_vertex_data_scaled4s_m + { + static void impl(context* ctx, u32 reg, u32 arg) + { + set_vertex_data_impl(ctx, arg); + } + }; + + struct set_transform_constant + { + static void impl(context* ctx, u32 reg, u32 arg); + }; + + struct set_transform_program + { + static void impl(context* ctx, u32 reg, u32 arg); + }; + + template + struct set_vertex_array_offset + { + static void impl(context* ctx, u32 reg, u32 arg) + { + util::push_draw_parameter_change(ctx, vertex_array_offset_modifier_barrier, reg, arg); + } + }; + + template + struct set_texture_dirty_bit + { + static void impl(context* ctx, u32 reg, u32 arg) + { + RSX(ctx)->m_textures_dirty[index] = true; + + if (RSX(ctx)->current_fp_metadata.referenced_textures_mask & (1 << index)) + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty; + } + } + }; + + template + struct set_vertex_texture_dirty_bit + { + static void impl(context* ctx, u32 reg, u32 arg) + { + RSX(ctx)->m_vertex_textures_dirty[index] = true; + + if (RSX(ctx)->current_vp_metadata.referenced_textures_mask & (1 << index)) + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty; + } + } + }; + +#undef RSX +#undef REGS + } +} diff --git a/rpcs3/Emu/RSX/NV47/nv47.h b/rpcs3/Emu/RSX/NV47/nv47.h new file mode 100644 index 0000000000..8a27f4d14c --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/nv47.h @@ -0,0 +1,7 @@ +// 3D Engine definitions +#pragma once + +#include "nv3089.h" +#include "nv308a.h" +#include "nv406e.h" +#include "nv4097.h" diff --git a/rpcs3/Emu/RSX/RSXDisAsm.cpp b/rpcs3/Emu/RSX/RSXDisAsm.cpp index 81ba45c586..e1ac01c001 100644 --- a/rpcs3/Emu/RSX/RSXDisAsm.cpp +++ b/rpcs3/Emu/RSX/RSXDisAsm.cpp @@ -8,7 +8,7 @@ namespace rsx { - void invalid_method(thread*, u32, u32); + void invalid_method(context*, u32, u32); } u32 RSXDisAsm::disasm(u32 pc) diff --git a/rpcs3/Emu/RSX/RSXFIFO.cpp b/rpcs3/Emu/RSX/RSXFIFO.cpp index 637db20913..825bf88e5f 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.cpp +++ b/rpcs3/Emu/RSX/RSXFIFO.cpp @@ -7,6 +7,8 @@ #include "Core/RSXReservationLock.hpp" #include "Emu/Memory/vm_reservation.h" #include "Emu/Cell/lv2/sys_rsx.h" +#include "NV47/context.h" + #include "util/asm.hpp" #include @@ -808,6 +810,9 @@ namespace rsx } } + // FIXME: This should be properly managed + rsx::context ctx{ .rsxthr = this, .register_state = &method_registers }; + if (m_flattener.is_enabled()) [[unlikely]] { switch(m_flattener.test(command)) @@ -819,15 +824,15 @@ namespace rsx case FIFO::EMIT_END: { // Emit end command to close existing scope - //ensure(in_begin_end); - methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0); + AUDIT(in_begin_end); + methods[NV4097_SET_BEGIN_END](&ctx, NV4097_SET_BEGIN_END, 0); break; } case FIFO::EMIT_BARRIER: { - //ensure(in_begin_end); - methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0); - methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, m_flattener.get_primitive()); + AUDIT(in_begin_end); + methods[NV4097_SET_BEGIN_END](&ctx, NV4097_SET_BEGIN_END, 0); + methods[NV4097_SET_BEGIN_END](&ctx, NV4097_SET_BEGIN_END, m_flattener.get_primitive()); break; } default: @@ -846,19 +851,19 @@ namespace rsx const u32 reg = (command.reg & 0xffff) >> 2; const u32 value = command.value; - method_registers.decode(reg, value); + ctx.register_state->decode(reg, value); if (auto method = methods[reg]) { - method(this, reg, value); + method(&ctx, reg, value); if (state & cpu_flag::again) { - method_registers.decode(reg, method_registers.register_previous_value); + ctx.register_state->decode(reg, ctx.register_state->latch); break; } } - else if (method_registers.register_previous_value != value) + else if (ctx.register_state->latch != value) { // Something changed, set signal flags if any specified m_graphics_state |= state_signals[reg]; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 498433d776..0d195a18f2 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -3383,7 +3383,7 @@ namespace rsx return fifo_ctrl->last_cmd(); } - void invalid_method(thread*, u32, u32); + void invalid_method(context*, u32, u32); void thread::dump_regs(std::string& result, std::any& /*custom_data*/) const { diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 9ddce9c959..706d07219a 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -10,6 +10,9 @@ #include "Emu/Cell/lv2/sys_rsx.h" #include "Emu/RSX/Common/BufferUtils.h" +#include "Emu/RSX/NV47/nv47.h" +#include "Emu/RSX/NV47/context_accessors.define.h" + namespace rsx { rsx_state method_registers; @@ -17,1807 +20,36 @@ namespace rsx std::array methods{}; std::array state_signals{}; - void invalid_method(thread* rsx, u32 reg, u32 arg) + void invalid_method(context* ctx, u32 reg, u32 arg) { //Don't throw, gather information and ignore broken/garbage commands //TODO: Investigate why these commands are executed at all. (Heap corruption? Alignment padding?) - const u32 cmd = rsx->get_fifo_cmd(); + const u32 cmd = RSX(ctx)->get_fifo_cmd(); rsx_log.error("Invalid RSX method 0x%x (arg=0x%x, start=0x%x, count=0x%x, non-inc=%s)", reg << 2, arg, cmd & 0xfffc, (cmd >> 18) & 0x7ff, !!(cmd & RSX_METHOD_NON_INCREMENT_CMD)); if (g_cfg.core.rsx_fifo_accuracy != rsx_fifo_mode::as_ps3) { - rsx->recover_fifo(); + RSX(ctx)->recover_fifo(); } } - static void trace_method(thread* /*rsx*/, u32 reg, u32 arg) + static void trace_method(context* /*ctx*/, u32 reg, u32 arg) { // For unknown yet valid methods rsx_log.trace("RSX method 0x%x (arg=0x%x)", reg << 2, arg); } - template - void write_gcm_label(thread* rsx, u32 address, u32 data) - { - const bool is_flip_sema = (address == (rsx->label_addr + 0x10) || address == (rsx->device_addr + 0x30)); - if (!is_flip_sema) - { - // First, queue the GPU work. If it flushes the queue for us, the following routines will be faster. - const bool handled = rsx->get_backend_config().supports_host_gpu_labels && rsx->release_GCM_label(address, data); - - if (vm::_ref(address).val == data) - { - // It's a no-op to write the same value (although there is a delay in real-hw so it's more accurate to allow GPU label in this case) - return; - } - - if constexpr (FlushDMA) - { - // If the backend handled the request, this call will basically be a NOP - g_fxo->get().sync(); - } - - if constexpr (FlushPipe) - { - // Manually flush the pipeline. - // It is possible to stream report writes using the host GPU, but that generates too much submit traffic. - rsx->sync(); - } - - if (handled) - { - // Backend will handle it, nothing to write. - return; - } - } - - vm::_ref(address).val = data; - } - - template struct vertex_data_type_from_element_type; - template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::f; }; - template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::sf; }; - template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::ub; }; - template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::s32k; }; - template<> struct vertex_data_type_from_element_type { static const vertex_base_type type = vertex_base_type::s1; }; - - namespace nv406e - { - void set_reference(thread* rsx, u32 /*reg*/, u32 arg) - { - rsx->sync(); - - // Write ref+get (get will be written again with the same value at command end) - auto& dma = vm::_ref(rsx->dma_address); - dma.get.release(rsx->fifo_ctrl->get_pos()); - dma.ref.store(arg); - } - - void semaphore_acquire(thread* rsx, u32 /*reg*/, u32 arg) - { - rsx->sync_point_request.release(true); - const u32 addr = get_address(method_registers.semaphore_offset_406e(), method_registers.semaphore_context_dma_406e()); - - const auto& sema = vm::_ref(addr).val; - - if (sema == arg) - { - // Flip semaphore doesnt need wake-up delay - if (addr != rsx->label_addr + 0x10) - { - rsx->flush_fifo(); - rsx->fifo_wake_delay(2); - } - - return; - } - else - { - rsx->flush_fifo(); - } - - u64 start = rsx::uclock(); - u64 last_check_val = start; - - while (sema != arg) - { - if (rsx->test_stopped()) - { - rsx->state += cpu_flag::again; - return; - } - - if (const auto tdr = static_cast(g_cfg.video.driver_recovery_timeout)) - { - const u64 current = rsx::uclock(); - - if (current - last_check_val > 20'000) - { - // Suspicious amnount of time has passed - // External pause such as debuggers' pause or operating system sleep may have taken place - // Ignore it - start += current - last_check_val; - } - - last_check_val = current; - - if ((current - start) > tdr) - { - // If longer than driver timeout force exit - rsx_log.error("nv406e::semaphore_acquire has timed out. semaphore_address=0x%X", addr); - break; - } - } - - rsx->cpu_wait({}); - } - - rsx->fifo_wake_delay(); - rsx->performance_counters.idle_time += (rsx::uclock() - start); - } - - void semaphore_release(thread* rsx, u32 /*reg*/, u32 arg) - { - const u32 offset = method_registers.semaphore_offset_406e(); - - if (offset % 4) - { - rsx_log.warning("NV406E semaphore release is using unaligned semaphore, ignoring. (offset=0x%x)", offset); - return; - } - - const u32 ctxt = method_registers.semaphore_context_dma_406e(); - - // By avoiding doing this on flip's semaphore release - // We allow last gcm's registers reset to occur in case of a crash - if (const bool is_flip_sema = (offset == 0x10 && ctxt == CELL_GCM_CONTEXT_DMA_SEMAPHORE_R); - !is_flip_sema) - { - rsx->sync_point_request.release(true); - } - - const u32 addr = get_address(offset, ctxt); - - // TODO: Check if possible to write on reservations - if (rsx->label_addr >> 28 != addr >> 28) - { - rsx_log.error("NV406E semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr); - rsx->recover_fifo(); - return; - } - - if (addr == rsx->device_addr + 0x30 && !arg) - { - // HW flip synchronization related, 1 is not written without display queue command (TODO: make it behave as real hw) - arg = 1; - } - - write_gcm_label(rsx, addr, arg); - } - } - - namespace nv4097 - { - void clear(thread* rsx, u32 /*reg*/, u32 arg) - { - rsx->clear_surface(arg); - - if (rsx->capture_current_frame) - { - rsx->capture_frame("clear"); - } - } - - void clear_zcull(thread* rsx, u32 /*reg*/, u32 /*arg*/) - { - if (rsx->capture_current_frame) - { - rsx->capture_frame("clear zcull memory"); - } - } - - void set_face_property(thread* rsx, u32 reg, u32 arg) - { - if (reg == method_registers.register_previous_value) - { - return; - } - - bool valid; - switch (reg) - { - case NV4097_SET_CULL_FACE: - valid = !!to_cull_face(arg); break; - case NV4097_SET_FRONT_FACE: - valid = !!to_front_face(arg); break; - default: - valid = false; break; - } - - if (valid) [[ likely ]] - { - rsx->m_graphics_state |= rsx::pipeline_config_dirty; - } - else - { - method_registers.registers[reg] = method_registers.register_previous_value; - } - } - - void set_notify(thread* rsx, u32 /*reg*/, u32 /*arg*/) - { - const u32 location = method_registers.context_dma_notify(); - const u32 index = (location & 0x7) ^ 0x7; - - if ((location & ~7) != (CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_0 & ~7)) - { - if (rsx_log.trace) - rsx_log.trace("NV4097_NOTIFY: invalid context = 0x%x", method_registers.context_dma_notify()); - return; - } - - const u32 addr = rsx->iomap_table.get_addr(0xf100000 + (index * 0x40)); - - ensure(addr != umax); - - vm::_ref>(addr).store( - { - rsx->timestamp(), - 0 - }); - } - - void texture_read_semaphore_release(thread* rsx, u32 /*reg*/, u32 arg) - { - // Pipeline barrier seems to be equivalent to a SHADER_READ stage barrier. - // Ideally the GPU only needs to have cached all textures declared up to this point before writing the label. - - // lle-gcm likes to inject system reserved semaphores, presumably for system/vsh usage - // Avoid calling render to avoid any havoc(flickering) they may cause from invalid flush/write - const u32 offset = method_registers.semaphore_offset_4097(); - - if (offset % 16) - { - rsx_log.error("NV4097 semaphore using unaligned offset, recovering. (offset=0x%x)", offset); - rsx->recover_fifo(); - return; - } - - const u32 addr = get_address(offset, method_registers.semaphore_context_dma_4097()); - - if (rsx->label_addr >> 28 != addr >> 28) - { - rsx_log.error("NV4097 semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr); - } - - if (g_cfg.video.strict_rendering_mode) [[ unlikely ]] - { - write_gcm_label(rsx, addr, arg); - } - else - { - write_gcm_label(rsx, addr, arg); - } - } - - void back_end_write_semaphore_release(thread* rsx, u32 /*reg*/, u32 arg) - { - // Full pipeline barrier. GPU must flush pipeline before writing the label - - const u32 offset = method_registers.semaphore_offset_4097(); - - if (offset % 16) - { - rsx_log.error("NV4097 semaphore using unaligned offset, recovering. (offset=0x%x)", offset); - rsx->recover_fifo(); - return; - } - - const u32 addr = get_address(offset, method_registers.semaphore_context_dma_4097()); - - if (rsx->label_addr >> 28 != addr >> 28) - { - rsx_log.error("NV4097 semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr); - } - - const u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff); - write_gcm_label(rsx, addr, val); - } - - /** - * id = base method register - * index = register index in method - * count = element count per attribute - * register_count = number of registers consumed per attribute. E.g 3-element methods have padding - */ - template - void set_vertex_data_impl(thread* rsx, u32 arg) - { - static constexpr usz increment_per_array_index = (register_count * sizeof(type)) / sizeof(u32); - - static constexpr usz attribute_index = index / increment_per_array_index; - static constexpr usz vertex_subreg = index % increment_per_array_index; - - constexpr auto vtype = vertex_data_type_from_element_type::type; - static_assert(vtype != rsx::vertex_base_type::cmp); - static_assert(vtype != rsx::vertex_base_type::ub256); - - // Convert LE data to BE layout - if constexpr (sizeof(type) == 4) - { - arg = std::bit_cast>(arg); - } - else if constexpr (sizeof(type) == 2) - { - // 2 16-bit values packed in 1 32-bit word - const auto be_data = std::bit_cast>(arg); - - // After u32 swap, the components are in the wrong position - arg = (be_data << 16) | (be_data >> 16); - } - - if (rsx->in_begin_end) - { - // Update to immediate mode register/array - // NOTE: Push buffers still behave like register writes. - // You do not need to specify each attribute for each vertex, the register is referenced instead. - // This is classic OpenGL 1.x behavior as I remember. - rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, vtype, arg); - } - - auto& info = rsx::method_registers.register_vertex_info[attribute_index]; - - info.type = vtype; - info.size = count; - info.frequency = 0; - info.stride = 0; - rsx::method_registers.register_vertex_info[attribute_index].data[vertex_subreg] = arg; - } - - template - struct set_vertex_data4ub_m - { - static void impl(thread* rsx, u32 /*reg*/, u32 arg) - { - set_vertex_data_impl(rsx, arg); - } - }; - - template - struct set_vertex_data1f_m - { - static void impl(thread* rsx, u32 /*reg*/, u32 arg) - { - set_vertex_data_impl(rsx, arg); - } - }; - - template - struct set_vertex_data2f_m - { - static void impl(thread* rsx, u32 /*reg*/, u32 arg) - { - set_vertex_data_impl(rsx, arg); - } - }; - - template - struct set_vertex_data3f_m - { - static void impl(thread* rsx, u32 /*reg*/, u32 arg) - { - //Register alignment is only 1, 2, or 4 (Rachet & Clank 2) - set_vertex_data_impl(rsx, arg); - } - }; - - template - struct set_vertex_data4f_m - { - static void impl(thread* rsx, u32 /*reg*/, u32 arg) - { - set_vertex_data_impl(rsx, arg); - } - }; - - template - struct set_vertex_data2s_m - { - static void impl(thread* rsx, u32 /*reg*/, u32 arg) - { - set_vertex_data_impl(rsx, arg); - } - }; - - template - struct set_vertex_data4s_m - { - static void impl(thread* rsx, u32 /*reg*/, u32 arg) - { - set_vertex_data_impl(rsx, arg); - } - }; - - template - struct set_vertex_data_scaled4s_m - { - static void impl(thread* rsx, u32 /*reg*/, u32 arg) - { - set_vertex_data_impl(rsx, arg); - } - }; - - void set_array_element16(thread* rsx, u32, u32 arg) - { - if (rsx->in_begin_end) - { - rsx->append_array_element(arg & 0xFFFF); - rsx->append_array_element(arg >> 16); - } - } - - void set_array_element32(thread* rsx, u32, u32 arg) - { - if (rsx->in_begin_end) - rsx->append_array_element(arg); - } - - void draw_arrays(thread* /*rsx*/, u32 /*reg*/, u32 arg) - { - rsx::method_registers.current_draw_clause.command = rsx::draw_command::array; - rsx::registers_decoder::decoded_type v(arg); - - rsx::method_registers.current_draw_clause.append(v.start(), v.count()); - } - - void draw_index_array(thread* /*rsx*/, u32 /*reg*/, u32 arg) - { - rsx::method_registers.current_draw_clause.command = rsx::draw_command::indexed; - rsx::registers_decoder::decoded_type v(arg); - - rsx::method_registers.current_draw_clause.append(v.start(), v.count()); - } - - void draw_inline_array(thread* /*rsx*/, u32 /*reg*/, u32 arg) - { - arg = std::bit_cast>(arg); - rsx::method_registers.current_draw_clause.command = rsx::draw_command::inlined_array; - rsx::method_registers.current_draw_clause.inline_vertex_array.push_back(arg); - } - - struct set_transform_constant - { - static void impl(thread* rsx, u32 _reg, u32 /*arg*/) - { - const u32 index = _reg - NV4097_SET_TRANSFORM_CONSTANT; - const u32 reg = index / 4; - const u8 subreg = index % 4; - - // FIFO args count including this one - const u32 fifo_args_cnt = rsx->fifo_ctrl->get_remaining_args_count() + 1; - - // The range of methods this function resposible to - const u32 method_range = 32 - index; - - // Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min) - const u32 fifo_read_limit = static_cast(((rsx->ctrl->put & ~3ull) - (rsx->fifo_ctrl->get_pos())) / 4); - - const u32 count = std::min({fifo_args_cnt, fifo_read_limit, method_range}); - - const u32 load = rsx::method_registers.transform_constant_load(); - - u32 rcount = count; - if (const u32 max = (load + reg) * 4 + count + subreg, limit = 468 * 4; max > limit) - { - // Ignore addresses outside the usable [0, 467] range - rsx_log.warning("Invalid transform register index (load=%u, index=%u, count=%u)", load, index, count); - - if ((max - count) < limit) - rcount -= max - limit; - else - rcount = 0; - } - - const auto values = &rsx::method_registers.transform_constants[load + reg][subreg]; - - const auto fifo_span = rsx->fifo_ctrl->get_current_arg_ptr(); - - if (fifo_span.size() < rcount) - { - rcount = ::size32(fifo_span); - } - - if (rsx->m_graphics_state & rsx::pipeline_state::transform_constants_dirty) - { - // Minor optimization: don't compare values if we already know we need invalidation - copy_data_swap_u32(values, fifo_span.data(), rcount); - } - else - { - if (copy_data_swap_u32_cmp(values, fifo_span.data(), rcount)) - { - // Transform constants invalidation is expensive (~8k bytes per update) - rsx->m_graphics_state |= rsx::pipeline_state::transform_constants_dirty; - } - } - - rsx->fifo_ctrl->skip_methods(rcount - 1); - } - }; - - struct set_transform_program - { - static void impl(thread* rsx, u32 reg, u32 /*arg*/) - { - const u32 index = reg - NV4097_SET_TRANSFORM_PROGRAM; - - // FIFO args count including this one - const u32 fifo_args_cnt = rsx->fifo_ctrl->get_remaining_args_count() + 1; - - // The range of methods this function resposible to - const u32 method_range = 32 - index; - - // Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min) - const u32 fifo_read_limit = static_cast(((rsx->ctrl->put & ~3ull) - (rsx->fifo_ctrl->get_pos())) / 4); - - const u32 count = std::min({fifo_args_cnt, fifo_read_limit, method_range}); - - const u32 load_pos = rsx::method_registers.transform_program_load(); - - u32 rcount = count; - - if (const u32 max = load_pos * 4 + rcount + (index % 4); - max > max_vertex_program_instructions * 4) - { - rsx_log.warning("Program buffer overflow! Attempted to write %u VP instructions.", max / 4); - rcount -= max - (max_vertex_program_instructions * 4); - } - - const auto fifo_span = rsx->fifo_ctrl->get_current_arg_ptr(); - - if (fifo_span.size() < rcount) - { - rcount = ::size32(fifo_span); - } - - copy_data_swap_u32(&rsx::method_registers.transform_program[load_pos * 4 + index % 4], fifo_span.data(), rcount); - - rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty; - rsx::method_registers.transform_program_load_set(load_pos + ((rcount + index % 4) / 4)); - rsx->fifo_ctrl->skip_methods(rcount - 1); - } - }; - - void set_transform_program_start(thread* rsx, u32 reg, u32) - { - if (method_registers.registers[reg] != method_registers.register_previous_value) - { - rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty; - } - } - - void set_vertex_attribute_output_mask(thread* rsx, u32 reg, u32) - { - if (method_registers.registers[reg] != method_registers.register_previous_value) - { - rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty; - } - } - - void set_begin_end(thread* rsxthr, u32 /*reg*/, u32 arg) - { - // Ignore upper bits - if (const u8 prim = static_cast(arg)) - { - const auto primitive_type = to_primitive_type(prim); - if (!primitive_type) - { - rsxthr->in_begin_end = true; - - rsx_log.warning("Invalid NV4097_SET_BEGIN_END value: 0x%x", arg); - return; - } - - rsx::method_registers.current_draw_clause.reset(primitive_type); - rsxthr->begin(); - return; - } - - // Check if we have immediate mode vertex data in a driver-local buffer - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::none) - { - const u32 push_buffer_vertices_count = rsxthr->get_push_buffer_vertex_count(); - const u32 push_buffer_index_count = rsxthr->get_push_buffer_index_count(); - - // Need to set this flag since it overrides some register contents - rsx::method_registers.current_draw_clause.is_immediate_draw = true; - - if (push_buffer_index_count) - { - rsx::method_registers.current_draw_clause.command = rsx::draw_command::indexed; - rsx::method_registers.current_draw_clause.append(0, push_buffer_index_count); - } - else if (push_buffer_vertices_count) - { - rsx::method_registers.current_draw_clause.command = rsx::draw_command::array; - rsx::method_registers.current_draw_clause.append(0, push_buffer_vertices_count); - } - } - else - { - rsx::method_registers.current_draw_clause.is_immediate_draw = false; - } - - if (!rsx::method_registers.current_draw_clause.empty()) - { - rsx::method_registers.current_draw_clause.compile(); - - if (g_cfg.video.disable_video_output) - { - rsxthr->execute_nop_draw(); - rsxthr->rsx::thread::end(); - return; - } - - rsxthr->end(); - } - else - { - rsxthr->in_begin_end = false; - } - - if (rsxthr->pause_on_draw && rsxthr->pause_on_draw.exchange(false)) - { - rsxthr->state -= cpu_flag::dbg_step; - rsxthr->state += cpu_flag::dbg_pause; - rsxthr->check_state(); - } - } - - vm::addr_t get_report_data_impl(u32 offset) - { - u32 location = 0; - blit_engine::context_dma report_dma = method_registers.context_dma_report(); - - switch (report_dma) - { - case blit_engine::context_dma::to_memory_get_report: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_LOCAL; break; - case blit_engine::context_dma::report_location_main: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN; break; - case blit_engine::context_dma::memory_host_buffer: location = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER; break; - default: - return vm::addr_t(0); - } - - return vm::cast(get_address(offset, location)); - } - - void get_report(thread* rsx, u32 /*reg*/, u32 arg) - { - u8 type = arg >> 24; - u32 offset = arg & 0xffffff; - - auto address_ptr = get_report_data_impl(offset); - if (!address_ptr) - { - rsx_log.error("Bad argument passed to NV4097_GET_REPORT, arg=0x%X", arg); - return; - } - - switch (type) - { - case CELL_GCM_ZPASS_PIXEL_CNT: - case CELL_GCM_ZCULL_STATS: - case CELL_GCM_ZCULL_STATS1: - case CELL_GCM_ZCULL_STATS2: - case CELL_GCM_ZCULL_STATS3: - rsx->get_zcull_stats(type, address_ptr); - break; - default: - rsx_log.error("NV4097_GET_REPORT: Bad type %d", type); - - vm::_ref>(address_ptr).atomic_op([&](CellGcmReportData& data) - { - data.timer = rsx->timestamp(); - data.padding = 0; - }); - break; - } - } - - void clear_report_value(thread* rsx, u32 /*reg*/, u32 arg) - { - switch (arg) - { - case CELL_GCM_ZPASS_PIXEL_CNT: - case CELL_GCM_ZCULL_STATS: - break; - default: - rsx_log.error("NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg); - break; - } - - rsx->clear_zcull_stats(arg); - } - - void set_render_mode(thread* rsx, u32, u32 arg) - { - const u32 mode = arg >> 24; - switch (mode) - { - case 1: - rsx->disable_conditional_rendering(); - return; - case 2: - break; - default: - rsx_log.error("Unknown render mode %d", mode); - return; - } - - const u32 offset = arg & 0xffffff; - auto address_ptr = get_report_data_impl(offset); - - if (!address_ptr) - { - rsx_log.error("Bad argument passed to NV4097_SET_RENDER_ENABLE, arg=0x%X", arg); - return; - } - - // Defer conditional render evaluation - rsx->enable_conditional_rendering(address_ptr); - } - - void set_zcull_render_enable(thread* rsx, u32, u32) - { - rsx->notify_zcull_info_changed(); - } - - void set_zcull_stats_enable(thread* rsx, u32, u32) - { - rsx->notify_zcull_info_changed(); - } - - void set_zcull_pixel_count_enable(thread* rsx, u32, u32) - { - rsx->notify_zcull_info_changed(); - } - - void sync(thread* rsx, u32, u32) - { - rsx->sync(); - } - - void set_shader_program_dirty(thread* rsx, u32, u32) - { - rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_ucode_dirty; - } - - void set_surface_dirty_bit(thread* rsx, u32 reg, u32 arg) - { - if (arg == method_registers.register_previous_value) - { - return; - } - - switch (reg) - { - case NV4097_SET_SURFACE_COLOR_TARGET: - rsx->m_graphics_state |= rsx::pipeline_state::pipeline_config_dirty; - break; - case NV4097_SET_SURFACE_CLIP_VERTICAL: - case NV4097_SET_SURFACE_CLIP_HORIZONTAL: - rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty; - break; - default: - break; - } - - rsx->m_graphics_state.set(rtt_config_dirty); - rsx->m_graphics_state.clear(rtt_config_contested); - } - - void set_surface_format(thread* rsx, u32 reg, u32 arg) - { - // The high bits of this register are just log2(dimension), ignore them - if ((arg & 0xFFFF) == (method_registers.register_previous_value & 0xFFFF)) - { - return; - } - - // The important parameters have changed (format, type, antialias) - rsx->m_graphics_state |= rsx::pipeline_state::pipeline_config_dirty; - - // Check if we need to also update fragment state - const auto current = method_registers.decode(arg); - const auto previous = method_registers.decode(method_registers.register_previous_value); - - if (*current.antialias() != *previous.antialias() || // Antialias control has changed, update ROP parameters - current.is_integer_color_format() != previous.is_integer_color_format()) // The type of color format also requires ROP control update - { - rsx->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty; - } - - set_surface_dirty_bit(rsx, reg, arg); - } - - void set_surface_options_dirty_bit(thread* rsx, u32 reg, u32 arg) - { - if (arg != method_registers.register_previous_value) - { - rsx->on_framebuffer_options_changed(reg); - rsx->m_graphics_state |= rsx::pipeline_config_dirty; - } - } - - void set_color_mask(thread* rsx, u32 reg, u32 arg) - { - if (arg == method_registers.register_previous_value) - { - return; - } - - if (method_registers.decode(arg).is_invalid()) [[ unlikely ]] - { - method_registers.decode(reg, method_registers.register_previous_value); - } - else - { - set_surface_options_dirty_bit(rsx, reg, arg); - } - } - - void set_stencil_op(thread* rsx, u32 reg, u32 arg) - { - if (arg == method_registers.register_previous_value) - { - return; - } - - const auto typed = to_stencil_op(arg); - if (typed) [[ likely ]] - { - set_surface_options_dirty_bit(rsx, reg, arg); - } - else - { - method_registers.decode(reg, method_registers.register_previous_value); - } - } - - void set_vertex_base_offset(thread* rsx, u32 reg, u32 arg) - { - if (rsx->in_begin_end && - !rsx::method_registers.current_draw_clause.empty() && - reg != method_registers.register_previous_value) - { - // Revert change to queue later - method_registers.decode(reg, method_registers.register_previous_value); - - // Insert base mofifier barrier - method_registers.current_draw_clause.insert_command_barrier(vertex_base_modifier_barrier, arg); - } - } - - void set_index_base_offset(thread* rsx, u32 reg, u32 arg) - { - if (rsx->in_begin_end && - !rsx::method_registers.current_draw_clause.empty() && - reg != method_registers.register_previous_value) - { - // Revert change to queue later - method_registers.decode(reg, method_registers.register_previous_value); - - // Insert base mofifier barrier - method_registers.current_draw_clause.insert_command_barrier(index_base_modifier_barrier, arg); - } - } - - template - struct set_vertex_array_offset - { - static void impl(thread* rsx, u32 reg, u32 arg) - { - if (rsx->in_begin_end && - !rsx::method_registers.current_draw_clause.empty() && - reg != method_registers.register_previous_value) - { - // Revert change to queue later - method_registers.decode(reg, method_registers.register_previous_value); - - // Insert offset mofifier barrier - method_registers.current_draw_clause.insert_command_barrier(vertex_array_offset_modifier_barrier, arg, index); - } - } - }; - - void check_index_array_dma(thread* rsx, u32 reg, u32 arg) - { - // Check if either location or index type are invalid - if (arg & ~(CELL_GCM_LOCATION_MAIN | (CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16 << 4))) - { - // Ignore invalid value, recover - method_registers.registers[reg] = method_registers.register_previous_value; - rsx->recover_fifo(); - - rsx_log.error("Invalid NV4097_SET_INDEX_ARRAY_DMA value: 0x%x", arg); - } - } - - void set_blend_equation(thread* rsx, u32 reg, u32 arg) - { - if (reg == method_registers.register_previous_value) - { - return; - } - - if (to_blend_equation(arg & 0xFFFF) && - to_blend_equation((arg >> 16) & 0xFFFF)) [[ likely ]] - { - rsx->m_graphics_state |= rsx::pipeline_config_dirty; - } - else - { - method_registers.decode(reg, method_registers.register_previous_value); - } - } - - void set_blend_factor(thread* rsx, u32 reg, u32 arg) - { - if (reg == method_registers.register_previous_value) - { - return; - } - - if (to_blend_factor(arg & 0xFFFF) && - to_blend_factor((arg >> 16) & 0xFFFF)) [[ likely ]] - { - rsx->m_graphics_state |= rsx::pipeline_config_dirty; - } - else - { - method_registers.decode(reg, method_registers.register_previous_value); - } - } - - template - struct set_texture_dirty_bit - { - static void impl(thread* rsx, u32 /*reg*/, u32 /*arg*/) - { - rsx->m_textures_dirty[index] = true; - - if (rsx->current_fp_metadata.referenced_textures_mask & (1 << index)) - { - rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty; - } - } - }; - - template - struct set_vertex_texture_dirty_bit - { - static void impl(thread* rsx, u32 /*reg*/, u32 /*arg*/) - { - rsx->m_vertex_textures_dirty[index] = true; - - if (rsx->current_vp_metadata.referenced_textures_mask & (1 << index)) - { - rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty; - } - } - }; - } - - namespace nv308a - { - struct color - { - static void impl(thread* rsx, u32 reg, u32) - { - const u32 out_x_max = method_registers.nv308a_size_out_x(); - const u32 index = reg - NV308A_COLOR; - - if (index >= out_x_max) - { - // Skip - return; - } - - // Get position of the current command arg - [[maybe_unused]] const u32 src_offset = rsx->fifo_ctrl->get_pos(); - - // FIFO args count including this one - const u32 fifo_args_cnt = rsx->fifo_ctrl->get_remaining_args_count() + 1; - - // The range of methods this function resposible to - const u32 method_range = std::min(0x700 - index, out_x_max - index); - - // Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min) - const u32 fifo_read_limit = static_cast(((rsx->ctrl->put & ~3ull) - (rsx->fifo_ctrl->get_pos())) / 4); - - u32 count = std::min({fifo_args_cnt, fifo_read_limit, method_range}); - - const u32 dst_dma = method_registers.blit_engine_output_location_nv3062(); - const u32 dst_offset = method_registers.blit_engine_output_offset_nv3062(); - const u32 out_pitch = method_registers.blit_engine_output_pitch_nv3062(); - - const u32 x = method_registers.nv308a_x() + index; - const u32 y = method_registers.nv308a_y(); - - const auto fifo_span = rsx->fifo_ctrl->get_current_arg_ptr(); - - if (fifo_span.size() < count) - { - count = ::size32(fifo_span); - } - - // Skip "handled methods" - rsx->fifo_ctrl->skip_methods(count - 1); - - // 308A::COLOR can be used to create custom sync primitives. - // Hide this behind strict mode due to the potential performance implications. - if (count == 1 && g_cfg.video.strict_rendering_mode && !g_cfg.video.relaxed_zcull_sync) - { - rsx->sync(); - } - - switch (*method_registers.blit_engine_nv3062_color_format()) - { - case blit_engine::transfer_destination_format::a8r8g8b8: - case blit_engine::transfer_destination_format::y32: - { - // Bit cast - optimize to mem copy - - const u32 data_length = count * 4; - - const auto dst_address = get_address(dst_offset + (x * 4) + (out_pitch * y), dst_dma, data_length); - - if (!dst_address) - { - rsx->recover_fifo(); - return; - } - - const auto dst = vm::_ptr(dst_address); - const auto src = reinterpret_cast(fifo_span.data()); - - rsx::reservation_lock rsx_lock(dst_address, data_length); - - if (rsx->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]] - { - // Move last 32 bits - reinterpret_cast(dst)[0] = reinterpret_cast(src)[count - 1]; - rsx->invalidate_fragment_program(dst_dma, dst_offset, 4); - } - else - { - if (dst_dma & CELL_GCM_LOCATION_MAIN) - { - // May overlap - std::memmove(dst, src, data_length); - } - else - { - // Never overlaps - std::memcpy(dst, src, data_length); - } - - rsx->invalidate_fragment_program(dst_dma, dst_offset, count * 4); - } - - break; - } - case blit_engine::transfer_destination_format::r5g6b5: - { - const auto data_length = count * 2; - - const auto dst_address = get_address(dst_offset + (x * 2) + (y * out_pitch), dst_dma, data_length); - const auto dst = vm::_ptr(dst_address); - const auto src = utils::bless>(fifo_span.data()); - - if (!dst_address) - { - rsx->recover_fifo(); - return; - } - - rsx::reservation_lock rsx_lock(dst_address, data_length); - - auto convert = [](u32 input) -> u16 - { - // Input is considered to be ARGB8 - u32 r = (input >> 16) & 0xFF; - u32 g = (input >> 8) & 0xFF; - u32 b = input & 0xFF; - - r = (r * 32) / 255; - g = (g * 64) / 255; - b = (b * 32) / 255; - return static_cast((r << 11) | (g << 5) | b); - }; - - if (rsx->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]] - { - // Move last 16 bits - dst[0] = convert(src[count - 1]); - rsx->invalidate_fragment_program(dst_dma, dst_offset, 2); - break; - } - - for (u32 i = 0; i < count; i++) - { - dst[i] = convert(src[i]); - } - - rsx->invalidate_fragment_program(dst_dma, dst_offset, count * 2); - break; - } - default: - { - fmt::throw_exception("Unreachable"); - } - } - } - }; - } - - namespace nv3089 - { - std::tuple _decode_transfer_registers(thread* rsx) - { - blit_src_info src_info = {}; - blit_dst_info dst_info = {}; - - const rsx::blit_engine::transfer_operation operation = method_registers.blit_engine_operation(); - - const u16 out_x = method_registers.blit_engine_output_x(); - const u16 out_y = method_registers.blit_engine_output_y(); - const u16 out_w = method_registers.blit_engine_output_width(); - const u16 out_h = method_registers.blit_engine_output_height(); - - const u16 in_w = method_registers.blit_engine_input_width(); - const u16 in_h = method_registers.blit_engine_input_height(); - - const blit_engine::transfer_origin in_origin = method_registers.blit_engine_input_origin(); - auto src_color_format = method_registers.blit_engine_src_color_format(); - - const f32 scale_x = method_registers.blit_engine_ds_dx(); - const f32 scale_y = method_registers.blit_engine_dt_dy(); - - // Clipping - // Validate that clipping rect will fit onto both src and dst regions - const u16 clip_w = std::min(method_registers.blit_engine_clip_width(), out_w); - const u16 clip_h = std::min(method_registers.blit_engine_clip_height(), out_h); - - // Check both clip dimensions and dst dimensions - if (clip_w == 0 || clip_h == 0) - { - rsx_log.warning("NV3089_IMAGE_IN: Operation NOPed out due to empty regions"); - return { false, src_info, dst_info }; - } - - if (in_w == 0 || in_h == 0) - { - // Input cant be an empty region - fmt::throw_exception("NV3089_IMAGE_IN_SIZE: Invalid blit dimensions passed (in_w=%d, in_h=%d)", in_w, in_h); - } - - u16 clip_x = method_registers.blit_engine_clip_x(); - u16 clip_y = method_registers.blit_engine_clip_y(); - - //Fit onto dst - if (clip_x && (out_x + clip_x + clip_w) > out_w) clip_x = 0; - if (clip_y && (out_y + clip_y + clip_h) > out_h) clip_y = 0; - - u16 in_pitch = method_registers.blit_engine_input_pitch(); - - switch (in_origin) - { - case blit_engine::transfer_origin::corner: - case blit_engine::transfer_origin::center: - break; - default: - rsx_log.warning("NV3089_IMAGE_IN_SIZE: unknown origin (%d)", static_cast(in_origin)); - } - - if (operation != rsx::blit_engine::transfer_operation::srccopy) - { - rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown operation (0x%x)", method_registers.registers[NV3089_SET_OPERATION]); - rsx->recover_fifo(); - return { false, src_info, dst_info }; - } - - if (!src_color_format) - { - rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown src color format (0x%x)", method_registers.registers[NV3089_SET_COLOR_FORMAT]); - rsx->recover_fifo(); - return { false, src_info, dst_info }; - } - - const u32 src_offset = method_registers.blit_engine_input_offset(); - const u32 src_dma = method_registers.blit_engine_input_location(); - - u32 dst_offset; - u32 dst_dma = 0; - rsx::blit_engine::transfer_destination_format dst_color_format; - u32 out_pitch = 0; - [[maybe_unused]] u32 out_alignment = 64; - bool is_block_transfer = false; - - switch (method_registers.blit_engine_context_surface()) - { - case blit_engine::context_surface::surface2d: - { - dst_dma = method_registers.blit_engine_output_location_nv3062(); - dst_offset = method_registers.blit_engine_output_offset_nv3062(); - out_pitch = method_registers.blit_engine_output_pitch_nv3062(); - out_alignment = method_registers.blit_engine_output_alignment_nv3062(); - is_block_transfer = fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f); - - if (auto dst_fmt = method_registers.blit_engine_nv3062_color_format(); !dst_fmt) - { - rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown NV3062 dst color format (0x%x)", method_registers.registers[NV3062_SET_COLOR_FORMAT]); - rsx->recover_fifo(); - return { false, src_info, dst_info }; - } - else - { - dst_color_format = dst_fmt; - } - - break; - } - case blit_engine::context_surface::swizzle2d: - { - dst_dma = method_registers.blit_engine_nv309E_location(); - dst_offset = method_registers.blit_engine_nv309E_offset(); - - if (auto dst_fmt = method_registers.blit_engine_output_format_nv309E(); !dst_fmt) - { - rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown NV309E dst color format (0x%x)", method_registers.registers[NV309E_SET_FORMAT]); - rsx->recover_fifo(); - return { false, src_info, dst_info }; - } - else - { - dst_color_format = dst_fmt; - } - - break; - } - default: - rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", static_cast(method_registers.blit_engine_context_surface())); - return { false, src_info, dst_info }; - } - - const u32 in_bpp = (src_color_format == rsx::blit_engine::transfer_source_format::r5g6b5) ? 2 : 4; // bytes per pixel - const u32 out_bpp = (dst_color_format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? 2 : 4; - - if (out_pitch == 0) - { - out_pitch = out_bpp * out_w; - } - - if (in_pitch == 0) - { - in_pitch = in_bpp * in_w; - } - - if (in_bpp != out_bpp) - { - is_block_transfer = false; - } - - u16 in_x, in_y; - if (in_origin == blit_engine::transfer_origin::center) - { - // Convert to normal u,v addressing. Under this scheme offset of 1 is actually half-way inside pixel 0 - const float x = std::max(method_registers.blit_engine_in_x(), 0.5f); - const float y = std::max(method_registers.blit_engine_in_y(), 0.5f); - in_x = static_cast(std::floor(x - 0.5f)); - in_y = static_cast(std::floor(y - 0.5f)); - } - else - { - in_x = static_cast(std::floor(method_registers.blit_engine_in_x())); - in_y = static_cast(std::floor(method_registers.blit_engine_in_y())); - } - - // Check for subpixel addressing - if (scale_x < 1.f) - { - float dst_x = in_x * scale_x; - in_x = static_cast(std::floor(dst_x) / scale_x); - } - - if (scale_y < 1.f) - { - float dst_y = in_y * scale_y; - in_y = static_cast(std::floor(dst_y) / scale_y); - } - - const u32 in_offset = in_x * in_bpp + in_pitch * in_y; - const u32 out_offset = out_x * out_bpp + out_pitch * out_y; - - const u32 src_line_length = (in_w * in_bpp); - - u32 src_address = 0; - const u32 dst_address = get_address(dst_offset, dst_dma, 1); // TODO: Add size - - if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch))) - { - const u32 nb_lines = std::min(clip_h, in_h); - const u32 data_length = nb_lines * src_line_length; - - if (src_address = get_address(src_offset, src_dma, data_length); - !src_address || !dst_address) - { - rsx->recover_fifo(); - return { false, src_info, dst_info }; - } - - rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length); - - if (const auto result = rsx->read_barrier(src_address, data_length, false); - result == rsx::result_zcull_intr) - { - if (rsx->copy_zcull_stats(src_address, data_length, dst_address) == data_length) - { - // All writes deferred - return { false, src_info, dst_info }; - } - } - } - else - { - const u16 read_h = std::min(static_cast(clip_h / scale_y), in_h); - const u32 data_length = in_pitch * (read_h - 1) + src_line_length; - - if (src_address = get_address(src_offset, src_dma, data_length); - !src_address || !dst_address) - { - rsx->recover_fifo(); - return { false, src_info, dst_info }; - } - - rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length); - rsx->read_barrier(src_address, data_length, true); - } - - if (src_address == dst_address && - in_w == clip_w && in_h == clip_h && - in_pitch == out_pitch && - rsx::fcmp(scale_x, 1.f) && rsx::fcmp(scale_y, 1.f)) - { - // NULL operation - rsx_log.warning("NV3089_IMAGE_IN: Operation writes memory onto itself with no modification (move-to-self). Will ignore."); - return { false, src_info, dst_info }; - } - - u8* pixels_src = vm::_ptr(src_address + in_offset); - u8* pixels_dst = vm::_ptr(dst_address + out_offset); - - if (dst_color_format != rsx::blit_engine::transfer_destination_format::r5g6b5 && - dst_color_format != rsx::blit_engine::transfer_destination_format::a8r8g8b8) - { - fmt::throw_exception("NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", static_cast(dst_color_format)); - } - - if (src_color_format != rsx::blit_engine::transfer_source_format::r5g6b5 && - src_color_format != rsx::blit_engine::transfer_source_format::a8r8g8b8) - { - // Alpha has no meaning in both formats - if (src_color_format == rsx::blit_engine::transfer_source_format::x8r8g8b8) - { - src_color_format = rsx::blit_engine::transfer_source_format::a8r8g8b8; - } - else - { - // TODO: Support more formats - fmt::throw_exception("NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", static_cast(*src_color_format)); - } - } - - u32 convert_w = static_cast(std::abs(scale_x) * in_w); - u32 convert_h = static_cast(std::abs(scale_y) * in_h); - - if (convert_w == 0 || convert_h == 0) - { - rsx_log.error("NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored (ds_dx=%f, dt_dy=%f)", - method_registers.blit_engine_ds_dx(), method_registers.blit_engine_dt_dy()); - return { false, src_info, dst_info }; - } - - src_info.format = src_color_format; - src_info.origin = in_origin; - src_info.width = in_w; - src_info.height = in_h; - src_info.pitch = in_pitch; - src_info.bpp = in_bpp; - src_info.offset_x = in_x; - src_info.offset_y = in_y; - src_info.dma = src_dma; - src_info.rsx_address = src_address; - src_info.pixels = pixels_src; - - dst_info.format = dst_color_format; - dst_info.width = convert_w; - dst_info.height = convert_h; - dst_info.clip_x = clip_x; - dst_info.clip_y = clip_y; - dst_info.clip_width = clip_w; - dst_info.clip_height = clip_h; - dst_info.offset_x = out_x; - dst_info.offset_y = out_y; - dst_info.pitch = out_pitch; - dst_info.bpp = out_bpp; - dst_info.scale_x = scale_x; - dst_info.scale_y = scale_y; - dst_info.dma = dst_dma; - dst_info.rsx_address = dst_address; - dst_info.pixels = pixels_dst; - dst_info.swizzled = (method_registers.blit_engine_context_surface() == blit_engine::context_surface::swizzle2d); - - return { true, src_info, dst_info }; - } - - void _linear_copy( - const blit_dst_info& dst, - const blit_src_info& src, - u16 out_w, - u16 out_h, - u32 slice_h, - AVPixelFormat ffmpeg_src_format, - AVPixelFormat ffmpeg_dst_format, - bool need_convert, - bool need_clip, - bool src_is_modified, - bool interpolate) - { - std::vector temp2; - - if (!need_convert) [[ likely ]] - { - const bool is_overlapping = !src_is_modified && dst.dma == src.dma && [&]() -> bool - { - const auto src_range = utils::address_range::start_length(src.rsx_address, src.pitch * (src.height - 1) + (src.bpp * src.width)); - const auto dst_range = utils::address_range::start_length(dst.rsx_address, dst.pitch * (dst.clip_height - 1) + (dst.bpp * dst.clip_width)); - return src_range.overlaps(dst_range); - }(); - - if (is_overlapping) [[ unlikely ]] - { - if (need_clip) - { - temp2.resize(dst.pitch * dst.clip_height); - clip_image_may_overlap(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch, temp2.data()); - return; - } - - if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) - { - const u32 buffer_pitch = dst.bpp * out_w; - temp2.resize(buffer_pitch * out_h); - std::add_pointer_t buf = temp2.data(), pixels = src.pixels; - - // Read the whole buffer from source - for (u32 y = 0; y < out_h; ++y) - { - std::memcpy(buf, pixels, buffer_pitch); - pixels += src.pitch; - buf += buffer_pitch; - } - - buf = temp2.data(), pixels = dst.pixels; - - // Write to destination - for (u32 y = 0; y < out_h; ++y) - { - std::memcpy(pixels, buf, buffer_pitch); - pixels += dst.pitch; - buf += buffer_pitch; - } - - return; - } - - std::memmove(dst.pixels, src.pixels, dst.pitch * out_h); - return; - } - - if (need_clip) [[ unlikely ]] - { - clip_image(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch); - return; - } - - if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) [[ unlikely ]] - { - u8 *dst_pixels = dst.pixels, *src_pixels = src.pixels; - - for (u32 y = 0; y < out_h; ++y) - { - std::memcpy(dst_pixels, src_pixels, out_w * dst.bpp); - dst_pixels += dst.pitch; - src_pixels += src.pitch; - } - - return; - } - - std::memcpy(dst.pixels, src.pixels, dst.pitch * out_h); - return; - } - - if (need_clip) [[ unlikely ]] - { - temp2.resize(dst.pitch * std::max(dst.height, dst.clip_height)); - - convert_scale_image(temp2.data(), ffmpeg_dst_format, dst.width, dst.height, dst.pitch, - src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, interpolate); - - clip_image(dst.pixels, temp2.data(), dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, dst.pitch, dst.pitch); - return; - } - - convert_scale_image(dst.pixels, ffmpeg_dst_format, out_w, out_h, dst.pitch, - src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, - interpolate); - } - - std::vector _swizzled_copy_1( - const blit_dst_info& dst, - const blit_src_info& src, - u16 out_w, - u16 out_h, - u32 slice_h, - AVPixelFormat ffmpeg_src_format, - AVPixelFormat ffmpeg_dst_format, - bool need_convert, - bool need_clip, - bool interpolate) - { - std::vector temp2, temp3; - - if (need_clip) - { - temp3.resize(dst.pitch * dst.clip_height); - - if (need_convert) - { - temp2.resize(dst.pitch * std::max(dst.height, dst.clip_height)); - - convert_scale_image(temp2.data(), ffmpeg_dst_format, dst.width, dst.height, dst.pitch, - src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, - interpolate); - - clip_image(temp3.data(), temp2.data(), dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, dst.pitch, dst.pitch); - return temp3; - } - - clip_image(temp3.data(), src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch); - return temp3; - } - - if (need_convert) - { - temp3.resize(dst.pitch * out_h); - - convert_scale_image(temp3.data(), ffmpeg_dst_format, out_w, out_h, dst.pitch, - src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, - interpolate); - - return temp3; - } - - return {}; - } - - void _swizzled_copy_2( - u8* linear_pixels, - u8* swizzled_pixels, - u32 linear_pitch, - u16 out_w, - u16 out_h, - u8 out_bpp) - { - // TODO: Validate these claims. Are the registers always correctly initialized? Should we trust them at all? - // It looks like rsx may ignore the requested swizzle size and just always - // round up to nearest power of 2 - /* - u8 sw_width_log2 = method_registers.nv309e_sw_width_log2(); - u8 sw_height_log2 = method_registers.nv309e_sw_height_log2(); - - // 0 indicates height of 1 pixel - sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2; - - // swizzle based on destination size - u16 sw_width = 1 << sw_width_log2; - u16 sw_height = 1 << sw_height_log2; - */ - - std::vector sw_temp; - - u32 sw_width = next_pow2(out_w); - u32 sw_height = next_pow2(out_h); - - // Check and pad texture out if we are given non power of 2 output - if (sw_width != out_w || sw_height != out_h) - { - sw_temp.resize(out_bpp * sw_width * sw_height); - - switch (out_bpp) - { - case 1: - pad_texture(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height); - break; - case 2: - pad_texture(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height); - break; - case 4: - pad_texture(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height); - break; - } - - linear_pixels = sw_temp.data(); - } - - switch (out_bpp) - { - case 1: - convert_linear_swizzle(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch); - break; - case 2: - convert_linear_swizzle(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch); - break; - case 4: - convert_linear_swizzle(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch); - break; - } - } - - std::vector _mirror_transform(const blit_src_info& src, bool flip_x, bool flip_y) - { - std::vector temp1; - if (!flip_x && !flip_y) - { - return temp1; - } - - const u32 packed_pitch = src.width * src.bpp; - temp1.resize(packed_pitch * src.height); - - const s32 stride_y = (flip_y ? -1 : 1) * static_cast(src.pitch); - - for (u32 y = 0; y < src.height; ++y) - { - u8* dst_pixels = temp1.data() + (packed_pitch * y); - u8* src_pixels = src.pixels + (static_cast(y) * stride_y); - - if (flip_x) - { - if (src.bpp == 4) [[ likely ]] - { - rsx::memcpy_r(dst_pixels, src_pixels, src.width); - continue; - } - - rsx::memcpy_r(dst_pixels, src_pixels, src.width); - continue; - } - - std::memcpy(dst_pixels, src_pixels, packed_pitch); - } - - return temp1; - } - - void image_in(thread* rsx, u32 /*reg*/, u32 /*arg*/) - { - auto [success, src, dst] = _decode_transfer_registers(rsx); - if (!success) - { - return; - } - - // Decode extra params before locking - const blit_engine::transfer_interpolator in_inter = method_registers.blit_engine_input_inter(); - const u16 out_w = method_registers.blit_engine_output_width(); - const u16 out_h = method_registers.blit_engine_output_height(); - - // Lock here. RSX cannot execute any locking operations from this point, including ZCULL read barriers - auto res = ::rsx::reservation_lock( - dst.rsx_address, dst.pitch * dst.clip_height, - src.rsx_address, src.pitch * src.height); - - if (!g_cfg.video.force_cpu_blit_processing && - (dst.dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src.dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER) && - rsx->scaled_image_from_memory(src, dst, in_inter == blit_engine::transfer_interpolator::foh)) - { - // HW-accelerated blit - return; - } - - std::vector mirror_tmp; - bool src_is_temp = false; - - // Flip source if needed - if (dst.scale_y < 0 || dst.scale_x < 0) - { - mirror_tmp = _mirror_transform(src, dst.scale_x < 0, dst.scale_y < 0); - src.pixels = mirror_tmp.data(); - src.pitch = src.width * src.bpp; - src_is_temp = true; - } - - const AVPixelFormat in_format = (src.format == rsx::blit_engine::transfer_source_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; - const AVPixelFormat out_format = (dst.format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; - - const bool need_clip = - dst.clip_width != src.width || - dst.clip_height != src.height || - dst.clip_x > 0 || dst.clip_y > 0 || - dst.width != out_w || dst.height != out_h; - - const bool need_convert = out_format != in_format || !rsx::fcmp(fabsf(dst.scale_x), 1.f) || !rsx::fcmp(fabsf(dst.scale_y), 1.f); - const u32 slice_h = static_cast(std::ceil(static_cast(dst.clip_height + dst.clip_y) / dst.scale_y)); - const bool interpolate = in_inter == blit_engine::transfer_interpolator::foh; - - auto real_dst = dst.pixels; - const auto tiled_region = rsx->get_tiled_memory_region(utils::address_range::start_length(dst.rsx_address, dst.pitch * dst.clip_height)); - std::vector tmp; - - if (tiled_region) - { - tmp.resize(tiled_region.tile->size); - real_dst = dst.pixels; - dst.pixels = tmp.data(); - } - - if (method_registers.blit_engine_context_surface() != blit_engine::context_surface::swizzle2d) - { - _linear_copy(dst, src, out_w, out_h, slice_h, in_format, out_format, need_convert, need_clip, src_is_temp, interpolate); - } - else - { - const auto swz_temp = _swizzled_copy_1(dst, src, out_w, out_h, slice_h, in_format, out_format, need_convert, need_clip, interpolate); - auto pixels_src = swz_temp.empty() ? src.pixels : swz_temp.data(); - - _swizzled_copy_2(const_cast(pixels_src), dst.pixels, src.pitch, out_w, out_h, dst.bpp); - } - - if (tiled_region) - { - const auto tile_func = dst.bpp == 4 - ? rsx::tile_texel_data32 - : rsx::tile_texel_data16; - - tile_func( - real_dst, - dst.pixels, - tiled_region.base_address, - dst.rsx_address - tiled_region.base_address, - tiled_region.tile->size, - tiled_region.tile->bank, - tiled_region.tile->pitch, - dst.clip_width, - dst.clip_height - ); - } - } - } - namespace nv0039 { - void buffer_notify(thread* rsx, u32, u32 arg) + void buffer_notify(context* ctx, u32, u32 arg) { - s32 in_pitch = method_registers.nv0039_input_pitch(); - s32 out_pitch = method_registers.nv0039_output_pitch(); - const u32 line_length = method_registers.nv0039_line_length(); - const u32 line_count = method_registers.nv0039_line_count(); - const u8 out_format = method_registers.nv0039_output_format(); - const u8 in_format = method_registers.nv0039_input_format(); + s32 in_pitch = REGS(ctx)->nv0039_input_pitch(); + s32 out_pitch = REGS(ctx)->nv0039_output_pitch(); + const u32 line_length = REGS(ctx)->nv0039_line_length(); + const u32 line_count = REGS(ctx)->nv0039_line_count(); + const u8 out_format = REGS(ctx)->nv0039_output_format(); + const u8 in_format = REGS(ctx)->nv0039_input_format(); const u32 notify = arg; if (!line_count || !line_length) @@ -1830,11 +62,11 @@ namespace rsx rsx_log.trace("NV0039_BUFFER_NOTIFY: pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x", in_pitch, out_pitch, line_length, line_count, in_format, out_format, notify); - u32 src_offset = method_registers.nv0039_input_offset(); - u32 src_dma = method_registers.nv0039_input_location(); + u32 src_offset = REGS(ctx)->nv0039_input_offset(); + u32 src_dma = REGS(ctx)->nv0039_input_location(); - u32 dst_offset = method_registers.nv0039_output_offset(); - u32 dst_dma = method_registers.nv0039_output_location(); + u32 dst_offset = REGS(ctx)->nv0039_output_offset(); + u32 dst_dma = REGS(ctx)->nv0039_output_location(); const bool is_block_transfer = (in_pitch == out_pitch && out_pitch + 0u == line_length); const auto read_address = get_address(src_offset, src_dma); @@ -1842,13 +74,13 @@ namespace rsx const auto read_length = in_pitch * (line_count - 1) + line_length; const auto write_length = out_pitch * (line_count - 1) + line_length; - rsx->invalidate_fragment_program(dst_dma, dst_offset, write_length); + RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, write_length); - if (const auto result = rsx->read_barrier(read_address, read_length, !is_block_transfer); + if (const auto result = RSX(ctx)->read_barrier(read_address, read_length, !is_block_transfer); result == rsx::result_zcull_intr) { // This transfer overlaps will zcull data pool - if (rsx->copy_zcull_stats(read_address, read_length, write_address) == write_length) + if (RSX(ctx)->copy_zcull_stats(read_address, read_length, write_address) == write_length) { // All writes deferred return; @@ -1938,53 +170,53 @@ namespace rsx } } - void flip_command(thread* rsx, u32, u32 arg) + void flip_command(context* ctx, u32, u32 arg) { - ensure(rsx->isHLE); + ensure(RSX(ctx)->isHLE); - if (rsx->vblank_at_flip != umax) + if (RSX(ctx)->vblank_at_flip != umax) { - rsx->flip_notification_count++; + RSX(ctx)->flip_notification_count++; } - if (auto ptr = rsx->queue_handler) + if (auto ptr = RSX(ctx)->queue_handler) { - rsx->intr_thread->cmd_list + RSX(ctx)->intr_thread->cmd_list ({ { ppu_cmd::set_args, 1 }, u64{1}, { ppu_cmd::lle_call, ptr }, { ppu_cmd::sleep, 0 } }); - rsx->intr_thread->cmd_notify++; - rsx->intr_thread->cmd_notify.notify_one(); + RSX(ctx)->intr_thread->cmd_notify++; + RSX(ctx)->intr_thread->cmd_notify.notify_one(); } - rsx->reset(); - rsx->on_frame_end(arg); - rsx->request_emu_flip(arg); - vm::_ref>(rsx->label_addr + 0x10).store(u128{}); + RSX(ctx)->reset(); + RSX(ctx)->on_frame_end(arg); + RSX(ctx)->request_emu_flip(arg); + vm::_ref>(RSX(ctx)->label_addr + 0x10).store(u128{}); } - void user_command(thread* rsx, u32, u32 arg) + void user_command(context* ctx, u32, u32 arg) { - if (!rsx->isHLE) + if (!RSX(ctx)->isHLE) { sys_rsx_context_attribute(0x55555555, 0xFEF, 0, arg, 0, 0); return; } - if (auto ptr = rsx->user_handler) + if (auto ptr = RSX(ctx)->user_handler) { - rsx->intr_thread->cmd_list + RSX(ctx)->intr_thread->cmd_list ({ { ppu_cmd::set_args, 1 }, u64{arg}, { ppu_cmd::lle_call, ptr }, { ppu_cmd::sleep, 0 } }); - rsx->intr_thread->cmd_notify++; - rsx->intr_thread->cmd_notify.notify_one(); + RSX(ctx)->intr_thread->cmd_notify++; + RSX(ctx)->intr_thread->cmd_notify.notify_one(); } } @@ -1993,7 +225,7 @@ namespace rsx template struct driver_flip { - static void impl(thread*, u32 /*reg*/, u32 arg) + static void impl(context*, u32 /*reg*/, u32 arg) { sys_rsx_context_attribute(0x55555555, 0x102, index, arg, 0, 0); } @@ -2002,11 +234,11 @@ namespace rsx template struct queue_flip { - static void impl(thread* rsx, u32 /*reg*/, u32 arg) + static void impl(context* ctx, u32 /*reg*/, u32 arg) { - if (rsx->vblank_at_flip != umax) + if (RSX(ctx)->vblank_at_flip != umax) { - rsx->flip_notification_count++; + RSX(ctx)->flip_notification_count++; } sys_rsx_context_attribute(0x55555555, 0x103, index, arg, 0, 0); @@ -2016,14 +248,14 @@ namespace rsx namespace fifo { - void draw_barrier(thread* rsx, u32, u32) + void draw_barrier(context* ctx, u32, u32) { - if (rsx->in_begin_end) + if (RSX(ctx)->in_begin_end) { - if (!method_registers.current_draw_clause.is_disjoint_primitive) + if (!REGS(ctx)->current_draw_clause.is_disjoint_primitive) { // Enable primitive barrier request - method_registers.current_draw_clause.primitive_barrier_enable = true; + REGS(ctx)->current_draw_clause.primitive_barrier_enable = true; } } } @@ -2587,7 +819,12 @@ namespace rsx registers[NV308A_SIZE_OUT] = 0x0; registers[NV308A_SIZE_IN] = 0x0; registers[NV406E_SET_REFERENCE] = umax; - if (auto rsx = Emu.IsStopped() ? nullptr : get_current_renderer(); rsx && rsx->ctrl) rsx->ctrl->ref = u32{umax}; + + if (auto rsx = Emu.IsStopped() ? nullptr : get_current_renderer(); rsx && rsx->ctrl) + { + // FIXME: Multi-context unaware + rsx->ctrl->ref = u32{ umax }; + } } { @@ -3098,7 +1335,7 @@ namespace rsx void rsx_state::decode(u32 reg, u32 value) { // Store new value and save previous - register_previous_value = std::exchange(registers[reg], value); + latch = std::exchange(registers[reg], value); } bool rsx_state::test(u32 reg, u32 value) const @@ -3192,17 +1429,17 @@ namespace rsx break; case index_base_modifier_barrier: // Change index base offset - method_registers.decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg); + REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg); result |= index_base_changed; break; case vertex_base_modifier_barrier: // Change vertex base offset - method_registers.decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg); + REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg); result |= vertex_base_changed; break; case vertex_array_offset_modifier_barrier: // Change vertex array offset - method_registers.decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg); + REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg); result |= vertex_arrays_changed; break; default: @@ -3737,7 +1974,7 @@ namespace rsx // FIFO bind(FIFO::FIFO_DRAW_BARRIER >> 2, fifo::draw_barrier); - method_registers.init(); + REGS(ctx)->init(); return true; }(); diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index 7347c24b81..fd3db6dcfc 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -375,7 +375,7 @@ namespace rsx } }; - using rsx_method_t = void(*)(class thread*, u32 reg, u32 arg); + using rsx_method_t = void(*)(class context*, u32 reg, u32 arg); //TODO union alignas(4) method_registers_t @@ -442,7 +442,7 @@ namespace rsx { public: std::array registers{}; - u32 register_previous_value{}; + u32 latch{}; template using decoded_type = typename registers_decoder::decoded_type; diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index e7f0442c80..20d40bc3a2 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -95,6 +95,11 @@ + + + + + @@ -584,6 +589,15 @@ + + + + + + + + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 665e056f92..12889ce92d 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -97,6 +97,9 @@ {cab197c1-581c-49db-9d8b-670335b44cb2} + + {213387bd-09c5-4247-8fb0-b3cae06ba34b} + @@ -1210,6 +1213,21 @@ Emu\GPU\RSX\Program + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + @@ -2449,6 +2467,33 @@ Emu\GPU\RSX\Program + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 + + + Emu\GPU\RSX\NV47 +