diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index 0e8b16e7bf..8d56dd2fee 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -157,6 +157,7 @@ std::string FragmentProgramDecompiler::Format(const std::string& code) { "$1", [this]() -> std::string {return GetSRC(src1);} },//std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetSRC), this, src1) }, { "$2", [this]() -> std::string {return GetSRC(src2);} },//std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetSRC), this, src2) }, { "$t", std::bind(std::mem_fn(&FragmentProgramDecompiler::AddTex), this) }, + { "$_i", [this]() -> std::string {return std::to_string(dst.tex_num);} }, { "$m", std::bind(std::mem_fn(&FragmentProgramDecompiler::GetMask), this) }, { "$ifcond ", [this]() -> std::string { @@ -166,7 +167,7 @@ std::string FragmentProgramDecompiler::Format(const std::string& code) } }, { "$cond", std::bind(std::mem_fn(&FragmentProgramDecompiler::GetCond), this) }, - { "$c", std::bind(std::mem_fn(&FragmentProgramDecompiler::AddConst), this) } + { "$_c", std::bind(std::mem_fn(&FragmentProgramDecompiler::AddConst), this) } }; return fmt::replace_all(code, repl_list); diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 554ea424af..2322be1176 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -363,6 +363,34 @@ u8 get_format_block_size_in_texel(int format) LOG_ERROR(RSX, "Unimplemented block size in texels for texture format: 0x%x", format); return 1; } +} + +u8 get_format_block_size_in_bytes(rsx::surface_color_format format) +{ + switch (format) + { + case rsx::surface_color_format::b8: + return 1; + case rsx::surface_color_format::g8b8: + case rsx::surface_color_format::r5g6b5: + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + return 2; + case rsx::surface_color_format::a8b8g8r8: + case rsx::surface_color_format::a8r8g8b8: + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x32: + return 4; + case rsx::surface_color_format::w16z16y16x16: + return 8; + case rsx::surface_color_format::w32z32y32x32: + return 16; + default: + fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format); + } } static size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 9865a340de..5de0dbc2f9 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -32,6 +32,7 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre u8 get_format_block_size_in_bytes(int format); u8 get_format_block_size_in_texel(int format); +u8 get_format_block_size_in_bytes(rsx::surface_color_format format); /** * Get number of bytes occupied by texture in RSX mem diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 40644fd83d..96a275c73b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -88,16 +88,7 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) // Separate constant buffer void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + 256)); fill_scale_offset_data(mapped_buffer); - int is_alpha_tested = rsx::method_registers.alpha_test_enabled(); - u8 alpha_ref_raw = rsx::method_registers.alpha_ref(); - float alpha_ref = alpha_ref_raw / 255.f; - memcpy((char*)mapped_buffer + 16 * sizeof(float), &is_alpha_tested, sizeof(int)); - memcpy((char*)mapped_buffer + 17 * sizeof(float), &alpha_ref, sizeof(float)); - f32 fogp0 = rsx::method_registers.fog_params_0(); - f32 fogp1 = rsx::method_registers.fog_params_1(); - memcpy((char*)mapped_buffer + 18 * sizeof(float), &fogp0, sizeof(float)); - memcpy((char*)mapped_buffer + 19 * sizeof(float), &fogp1, sizeof(float)); - + fill_fragment_state_buffer((char *)mapped_buffer + 64, m_fragment_program); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + 256)); D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Formats.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Formats.cpp index 351acb4f89..159eb4b4d8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Formats.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Formats.cpp @@ -172,6 +172,35 @@ DXGI_FORMAT get_texture_format(u8 format) fmt::throw_exception("Invalid texture format (0x%x)" HERE, (u32)format); } +UCHAR get_dxgi_texel_size(DXGI_FORMAT format) +{ + switch (format) + { + case DXGI_FORMAT_R8_UNORM: + return 1; + case DXGI_FORMAT_B5G5R5A1_UNORM: + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_TYPELESS: + return 2; + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_R24G8_TYPELESS: + return 4; + case DXGI_FORMAT_R16G16B16A16_FLOAT: + return 8; + case DXGI_FORMAT_R32G32B32A32_FLOAT: + return 16; + } + + fmt::throw_exception("Unsupported DXGI format 0x%X" HERE, (u32)format); +} + UINT get_texture_max_aniso(rsx::texture_max_anisotropy aniso) { switch (aniso) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Formats.h b/rpcs3/Emu/RSX/D3D12/D3D12Formats.h index 43309513ec..57baea3c40 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Formats.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12Formats.h @@ -88,6 +88,11 @@ DXGI_FORMAT get_depth_stencil_typeless_surface_format(rsx::surface_depth_format */ DXGI_FORMAT get_depth_samplable_surface_format(rsx::surface_depth_format format); +/** +* Get block size in bytes for a DXGI_FORMAT +*/ +UCHAR get_dxgi_texel_size(DXGI_FORMAT format); + /** * Convert front face value to bool value telling wheter front face is counterclockwise or not */ diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index be2b3e5b9e..035df2f307 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -38,10 +38,11 @@ void D3D12FragmentDecompiler::insertHeader(std::stringstream & OS) OS << "cbuffer SCALE_OFFSET : register(b0)" << std::endl; OS << "{" << std::endl; OS << " float4x4 scaleOffsetMat;" << std::endl; - OS << " int isAlphaTested;" << std::endl; - OS << " float alphaRef;" << std::endl; OS << " float fog_param0;\n"; OS << " float fog_param1;\n"; + OS << " int isAlphaTested;" << std::endl; + OS << " float alphaRef;" << std::endl; + OS << " float4 texture_parameters[16];\n"; OS << "};" << std::endl; } @@ -255,9 +256,10 @@ void D3D12FragmentDecompiler::insertMainStart(std::stringstream & OS) OS << " float2 " << PI.name << "_scale = float2(1., 1.);" << std::endl; continue; } + OS << " float2 " << PI.name << "_dim;" << std::endl; OS << " " << PI.name << ".GetDimensions(" << PI.name << "_dim.x, " << PI.name << "_dim.y);" << std::endl; - OS << " float2 " << PI.name << "_scale = float2(1., 1.) / " << PI.name << "_dim;" << std::endl; + OS << " float2 " << PI.name << "_scale = texture_parameters[" << textureIndex << "] / " << PI.name << "_dim;" << std::endl; } } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 2fb4bf779b..0b644962a2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -40,28 +40,23 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st) void D3D12GSRender::load_program() { - m_vertex_program = get_current_vertex_program(); - m_fragment_program = get_current_fragment_program(); - - for (int i = 0; i < 16; ++i) + auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple { - auto &tex = rsx::method_registers.fragment_textures[i]; - if (tex.enabled()) - { - const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - if (m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) - { - if (m_rtts.get_texture_from_render_target_if_applicable(texaddr)) - continue; + ID3D12Resource *surface = nullptr; + if (!is_depth) + surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr); + else + surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - if (format == CELL_GCM_TEXTURE_A8R8G8B8 || format == CELL_GCM_TEXTURE_D8R8G8B8) - { - m_fragment_program.redirected_textures |= (1 << i); - } - } - } - } + if (!surface) return std::make_tuple(false, 0); + + D3D12_RESOURCE_DESC desc = surface->GetDesc(); + u16 native_pitch = get_dxgi_texel_size(desc.Format) * (u16)desc.Width; + return std::make_tuple(true, native_pitch); + }; + + m_vertex_program = get_current_vertex_program(); + m_fragment_program = get_current_fragment_program(rtt_lookup_func); D3D12PipelineProperties prop = {}; prop.Topology = get_primitive_topology_type(rsx::method_registers.current_draw_clause.primitive); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 4cd4aabad0..8a249f0913 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -31,10 +31,11 @@ void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS) OS << "cbuffer SCALE_OFFSET : register(b0)" << std::endl; OS << "{" << std::endl; OS << " float4x4 scaleOffsetMat;" << std::endl; + OS << " float fog_param0;" << std::endl; + OS << " float fog_param1;" << std::endl; OS << " int isAlphaTested;" << std::endl; OS << " float alphaRef;" << std::endl; - OS << " float fog_param0;\n"; - OS << " float fog_param1;\n"; + OS << " float4 texture_parameters[16];" << std::endl; OS << "};" << std::endl; } diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 98b3c8ebfc..c2a4baa8f2 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -30,15 +30,6 @@ std::string GLFragmentDecompilerThread::compareFunction(COMPARE f, const std::st void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS) { OS << "#version 420" << std::endl; - - OS << "layout(std140, binding = 0) uniform ScaleOffsetBuffer\n"; - OS << "{\n"; - OS << " mat4 scaleOffsetMat;\n"; - OS << " float fog_param0;\n"; - OS << " float fog_param1;\n"; - OS << " uint alpha_test;\n"; - OS << " float alpha_ref;\n"; - OS << "};\n"; } void GLFragmentDecompilerThread::insertIntputs(std::stringstream & OS) @@ -136,8 +127,12 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << " " << PT.type << " " << PI.name << ";" << std::endl; } - // A dummy value otherwise it's invalid to create an empty uniform buffer - OS << " vec4 void_value;" << std::endl; + // Fragment state parameters + OS << " float fog_param0;\n"; + OS << " float fog_param1;\n"; + OS << " uint alpha_test;\n"; + OS << " float alpha_ref;\n"; + OS << " vec4 texture_parameters[16];\n"; //sampling: x,y scaling and (unused) offsets data OS << "};" << std::endl; } @@ -189,7 +184,7 @@ namespace } if (prog.unnormalized_coords & (1 << index)) - OS << "\t" << vec_type << " tex" << index << "_coord_scale = 1. / textureSize(tex" << index << ", 0);\n"; + OS << "\t" << vec_type << " tex" << index << "_coord_scale = texture_parameters[" << index << "].xy / textureSize(tex" << index << ", 0);\n"; else OS << "\t" << vec_type << " tex" << index << "_coord_scale = " << vec_type << "(1.);\n"; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 564d568cae..6b5a7afbe1 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -699,8 +699,23 @@ bool GLGSRender::do_method(u32 cmd, u32 arg) bool GLGSRender::load_program() { + auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple + { + gl::render_target *surface = nullptr; + if (!is_depth) + surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr); + else + surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); + + if (!surface) return std::make_tuple(false, 0); + return std::make_tuple(true, surface->get_native_pitch()); + }; + RSXVertexProgram vertex_program = get_current_vertex_program(); - RSXFragmentProgram fragment_program = get_current_fragment_program(); + RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func); + + std::array rtt_scaling; + u32 unnormalized_rtts = 0; for (auto &vtx : vertex_program.rsx_vertex_inputs) { @@ -714,27 +729,6 @@ bool GLGSRender::load_program() } } - for (int i = 0; i < 16; ++i) - { - auto &tex = rsx::method_registers.fragment_textures[i]; - if (tex.enabled()) - { - const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - if (m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) - { - //Ignore this rtt since we have an aloasing color texture that will be used - if (m_rtts.get_texture_from_render_target_if_applicable(texaddr)) - continue; - - u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - if (format == CELL_GCM_TEXTURE_A8R8G8B8 || format == CELL_GCM_TEXTURE_D8R8G8B8) - { - fragment_program.redirected_textures |= (1 << i); - } - } - } - } - auto old_program = m_program; m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); m_program->use(); @@ -779,7 +773,7 @@ bool GLGSRender::load_program() m_transform_constants_dirty = false; u32 fragment_constants_size = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); - fragment_constants_size = std::max(32U, fragment_constants_size); + u32 fragment_buffer_size = fragment_constants_size + (17 * 4 * sizeof(float)); u32 max_buffer_sz = 512 + 8192 + align(fragment_constants_size, m_uniform_buffer_offset_align); if (manually_flush_ring_buffers) @@ -796,16 +790,6 @@ bool GLGSRender::load_program() scale_offset_offset = mapping.second; fill_scale_offset_data(buf, false); - // Fragment state - u32 is_alpha_tested = rsx::method_registers.alpha_test_enabled(); - float alpha_ref = rsx::method_registers.alpha_ref() / 255.f; - f32 fog0 = rsx::method_registers.fog_params_0(); - f32 fog1 = rsx::method_registers.fog_params_1(); - memcpy(buf + 16 * sizeof(float), &fog0, sizeof(float)); - memcpy(buf + 17 * sizeof(float), &fog1, sizeof(float)); - memcpy(buf + 18 * sizeof(float), &is_alpha_tested, sizeof(u32)); - memcpy(buf + 19 * sizeof(float), &alpha_ref, sizeof(float)); - // Vertex constants mapping = m_uniform_ring_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); buf = static_cast(mapping.first); @@ -813,20 +797,18 @@ bool GLGSRender::load_program() fill_vertex_program_constants_data(buf); // Fragment constants + mapping = m_uniform_ring_buffer->alloc_from_heap(fragment_buffer_size, m_uniform_buffer_offset_align); + buf = static_cast(mapping.first); + fragment_constants_offset = mapping.second; if (fragment_constants_size) - { - mapping = m_uniform_ring_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align); - buf = static_cast(mapping.first); - fragment_constants_offset = mapping.second; m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_size) }, fragment_program); - } + + // Fragment state + fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program); m_uniform_ring_buffer->bind_range(0, scale_offset_offset, 512); m_uniform_ring_buffer->bind_range(1, vertex_constants_offset, 8192); - if (fragment_constants_size) - { - m_uniform_ring_buffer->bind_range(2, fragment_constants_offset, fragment_constants_size); - } + m_uniform_ring_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); if (manually_flush_ring_buffers) m_uniform_ring_buffer->unmap(); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 6a59086b3f..d67e5042c6 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -8,24 +8,33 @@ namespace gl { class render_target : public texture { - bool is_cleared; + bool is_cleared = false; + u16 native_pitch = 0; public: - render_target() - { - is_cleared = false; - } + render_target() {} void set_cleared() { is_cleared = true; } - bool cleared() + bool cleared() const { return is_cleared; } + + // Internal pitch is the actual row length in bytes of the openGL texture + void set_native_pitch(u16 pitch) + { + native_pitch = pitch; + } + + u16 get_native_pitch() const + { + return native_pitch; + } }; } @@ -89,6 +98,7 @@ struct gl_render_target_traits auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); result->recreate(gl::texture::target::texture2D); + result->set_native_pitch(width * format.channel_count * format.channel_size); __glcheck result->config() .size({ (int)width, (int)height }) @@ -129,6 +139,12 @@ struct gl_render_target_traits __glcheck result->pixel_pack_settings().aligment(1); __glcheck result->pixel_unpack_settings().aligment(1); + u16 native_pitch = width * 2; + if (surface_depth_format == rsx::surface_depth_format::z24s8) + native_pitch *= 2; + + result->set_native_pitch(native_pitch); + return result; } diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index d33ec0c7df..fff8cee9c7 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -34,10 +34,6 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << "layout(std140, binding = 0) uniform ScaleOffsetBuffer" << std::endl; OS << "{" << std::endl; OS << " mat4 scaleOffsetMat;" << std::endl; - OS << " float fog_param0;\n"; - OS << " float fog_param1;\n"; - OS << " uint alpha_test;\n"; - OS << " float alpha_ref;\n"; OS << "};" << std::endl; } diff --git a/rpcs3/Emu/RSX/RSXFragmentProgram.h b/rpcs3/Emu/RSX/RSXFragmentProgram.h index 2583dc8daa..da8b59fac6 100644 --- a/rpcs3/Emu/RSX/RSXFragmentProgram.h +++ b/rpcs3/Emu/RSX/RSXFragmentProgram.h @@ -234,6 +234,7 @@ struct RSXFragmentProgram rsx::fog_mode fog_equation; u16 height; + float texture_pitch_scale[16]; u8 textures_alpha_kill[16]; u32 textures_zfunc[16]; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index c967c6304d..dc79a378a7 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -524,6 +524,26 @@ namespace rsx stream_vector_from_memory((char*)buffer + entry.first * 4 * sizeof(float), (void*)entry.second.rgba); } + void thread::fill_fragment_state_buffer(void *buffer, const RSXFragmentProgram &fragment_program) + { + u32 *dst = static_cast(buffer); + + const u32 is_alpha_tested = rsx::method_registers.alpha_test_enabled(); + const float alpha_ref = rsx::method_registers.alpha_ref() / 255.f; + const f32 fog0 = rsx::method_registers.fog_params_0(); + const f32 fog1 = rsx::method_registers.fog_params_1(); + const float one = 1.f; + + stream_vector(dst, (u32&)fog0, (u32&)fog1, is_alpha_tested, (u32&)alpha_ref); + + size_t offset = 4; + for (int index = 0; index < 16; ++index) + { + stream_vector(&dst[offset], (u32&)fragment_program.texture_pitch_scale[index], (u32&)one, 0U, 0U); + offset += 4; + } + } + void thread::write_inline_array_to_buffer(void *dst_buffer) { u8* src = @@ -814,8 +834,7 @@ namespace rsx return result; } - - RSXFragmentProgram thread::get_current_fragment_program() const + RSXFragmentProgram thread::get_current_fragment_program(std::function(u32, bool)> get_surface_info) const { RSXFragmentProgram result = {}; u32 shader_program = rsx::method_registers.shader_program_address(); @@ -839,23 +858,52 @@ namespace rsx std::array texture_dimensions; for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i) { - if (!rsx::method_registers.fragment_textures[i].enabled()) + auto &tex = rsx::method_registers.fragment_textures[i]; + result.texture_pitch_scale[i] = 1.f; + + if (!tex.enabled()) { texture_dimensions[i] = texture_dimension_extended::texture_dimension_2d; result.textures_alpha_kill[i] = 0; result.textures_zfunc[i] = 0; } - else { - texture_dimensions[i] = rsx::method_registers.fragment_textures[i].get_extended_texture_dimension(); - result.textures_alpha_kill[i] = rsx::method_registers.fragment_textures[i].alpha_kill_enabled() ? 1 : 0; - result.textures_zfunc[i] = rsx::method_registers.fragment_textures[i].zfunc(); - } + texture_dimensions[i] = tex.get_extended_texture_dimension(); + result.textures_alpha_kill[i] = tex.alpha_kill_enabled() ? 1 : 0; + result.textures_zfunc[i] = tex.zfunc(); - if (rsx::method_registers.fragment_textures[i].enabled() && (rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN)) - result.unnormalized_coords |= (1 << i); + const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); + const u32 raw_format = tex.format(); + + if (raw_format & CELL_GCM_TEXTURE_UN) + result.unnormalized_coords |= (1 << i); + + bool surface_exists; + u16 surface_pitch; + + std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, false); + + if (surface_exists && surface_pitch) + { + if (raw_format & CELL_GCM_TEXTURE_UN) + result.texture_pitch_scale[i] = (float)surface_pitch / tex.pitch(); + } + else + { + std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, true); + if (surface_exists) + { + u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + if (format == CELL_GCM_TEXTURE_A8R8G8B8 || format == CELL_GCM_TEXTURE_D8R8G8B8) + { + result.redirected_textures |= (1 << i); + } + } + } + } } + result.set_texture_dimension(texture_dimensions); return result; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 3c48e8d2d3..4d2fc2ac10 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -215,7 +215,13 @@ namespace rsx std::array get_color_surface_addresses() const; u32 get_zeta_surface_address() const; RSXVertexProgram get_current_vertex_program() const; - RSXFragmentProgram get_current_fragment_program() const; + + /** + * Gets current fragment program and associated fragment state + * get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth + * returns whether surface is a render target and surface pitch in native format + */ + RSXFragmentProgram get_current_fragment_program(std::function(u32, bool)> get_surface_info) const; public: double fps_limit = 59.94; @@ -292,6 +298,12 @@ namespace rsx */ void fill_vertex_program_constants_data(void *buffer); + /** + * Fill buffer with fragment rasterization state. + * Fills current fog values, alpha test parameters and texture scaling parameters + */ + void fill_fragment_state_buffer(void *buffer, const RSXFragmentProgram &fragment_program); + /** * Write inlined array data to buffer. * The storage of inlined data looks different from memory stored arrays. diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 3eb4804932..c90c18870d 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -56,11 +56,11 @@ namespace vk case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_GRAD: return "textureGrad($t, $0.x, $1.x, $2.y)"; case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D: - return "texture($t, $0.xy)"; + return "texture($t, $0.xy * texture_parameters[$_i].xy)"; case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_PROJ: return "textureProj($t, $0.xyz, $1.x)"; // Note: $1.x is bias case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_LOD: - return "textureLod($t, $0.xy, $1.x)"; + return "textureLod($t, $0.xy * texture_parameters[$_i].xy, $1.x)"; case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_GRAD: return "textureGrad($t, $0.xyz, $1.x, $2.y)"; // Note: $1.x is bias case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE: diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 5eaad88749..a82a9b2ffb 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -31,23 +31,6 @@ void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) { OS << "#version 420" << std::endl; OS << "#extension GL_ARB_separate_shader_objects: enable" << std::endl << std::endl; - - OS << "layout(std140, set=0, binding = 0) uniform ScaleOffsetBuffer" << std::endl; - OS << "{" << std::endl; - OS << " mat4 scaleOffsetMat;" << std::endl; - OS << " float fog_param0;" << std::endl; - OS << " float fog_param1;" << std::endl; - OS << " uint alpha_test;" << std::endl; - OS << " float alpha_ref;" << std::endl; - OS << "};" << std::endl << std::endl; - - vk::glsl::program_input in; - in.location = 0; - in.domain = vk::glsl::glsl_fragment_program; - in.name = "ScaleOffsetBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - - inputs.push_back(in); } void VKFragmentDecompilerThread::insertIntputs(std::stringstream & OS) @@ -170,8 +153,11 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << " " << PT.type << " " << PI.name << ";" << std::endl; } - // A dummy value otherwise it's invalid to create an empty uniform buffer - OS << " vec4 void_value;" << std::endl; + OS << " float fog_param0;" << std::endl; + OS << " float fog_param1;" << std::endl; + OS << " uint alpha_test;" << std::endl; + OS << " float alpha_ref;" << std::endl; + OS << " vec4 texture_parameters[16];" << std::endl; OS << "};" << std::endl; vk::glsl::program_input in; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ba6ebe0afe..63bcfbc8db 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -980,28 +980,21 @@ bool VKGSRender::do_method(u32 cmd, u32 arg) bool VKGSRender::load_program() { - RSXVertexProgram vertex_program = get_current_vertex_program(); - RSXFragmentProgram fragment_program = get_current_fragment_program(); - - for (int i = 0; i < 16; ++i) + auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple { - auto &tex = rsx::method_registers.fragment_textures[i]; - if (tex.enabled()) - { - const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - if (m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) - { - if (m_rtts.get_texture_from_render_target_if_applicable(texaddr)) - continue; + vk::render_target *surface = nullptr; + if (!is_depth) + surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr); + else + surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - if (format == CELL_GCM_TEXTURE_A8R8G8B8 || format == CELL_GCM_TEXTURE_D8R8G8B8) - { - fragment_program.redirected_textures |= (1 << i); - } - } - } - } + if (!surface) return std::make_tuple(false, 0); + + return std::make_tuple(true, surface->native_pitch); + }; + + RSXVertexProgram vertex_program = get_current_vertex_program(); + RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func); vk::pipeline_props properties = {}; @@ -1183,16 +1176,6 @@ bool VKGSRender::load_program() stream_vector((char*)buf + 48, 0, 0, 0, (u32&)one); } - u32 is_alpha_tested = rsx::method_registers.alpha_test_enabled(); - u8 alpha_ref_raw = rsx::method_registers.alpha_ref(); - float alpha_ref = alpha_ref_raw / 255.f; - - f32 fog0 = rsx::method_registers.fog_params_0(); - f32 fog1 = rsx::method_registers.fog_params_1(); - memcpy((char*)buf + 64, &fog0, sizeof(float)); - memcpy((char*)buf + 68, &fog1, sizeof(float)); - memcpy((char*)buf + 72, &is_alpha_tested, sizeof(u32)); - memcpy((char*)buf + 76, &alpha_ref, sizeof(float)); m_uniform_buffer_ring_info.unmap(); const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float)); @@ -1201,18 +1184,19 @@ bool VKGSRender::load_program() m_uniform_buffer_ring_info.unmap(); const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); - const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(std::max(fragment_constants_sz, static_cast(32))); + const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float)); + const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_buffer_sz); + buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_buffer_sz); if (fragment_constants_sz) - { - buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_constants_sz); m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, fragment_program); - m_uniform_buffer_ring_info.unmap(); - } + + fill_fragment_state_buffer(buf+fragment_constants_sz, fragment_program); + m_uniform_buffer_ring_info.unmap(); m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, scale_offset_offset, 256 }, SCALE_OFFSET_BIND_SLOT, descriptor_sets); m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 512 * 4 * sizeof(float) }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, (fragment_constants_sz? fragment_constants_sz: 32) }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); + m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); return true; } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index de6b5c38e2..8054532f81 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -4,24 +4,50 @@ #include "VKHelpers.h" #include "../GCM.h" #include "../Common/surface_store.h" +#include "../Common/TextureUtils.h" #include "VKFormats.h" +namespace vk +{ + struct render_target : public image + { + u16 native_pitch = 0; + + render_target(vk::render_device &dev, + uint32_t memory_type_index, + uint32_t access_flags, + VkImageType image_type, + VkFormat format, + uint32_t width, uint32_t height, uint32_t depth, + uint32_t mipmaps, uint32_t layers, + VkSampleCountFlagBits samples, + VkImageLayout initial_layout, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags image_flags) + + :image(dev, memory_type_index, access_flags, image_type, format, width, height, depth, + mipmaps, layers, samples, initial_layout, tiling, usage, image_flags) + {} + }; +} + namespace rsx { struct vk_render_target_traits { - using surface_storage_type = std::unique_ptr; - using surface_type = vk::image*; + using surface_storage_type = std::unique_ptr; + using surface_type = vk::render_target*; using command_list_type = vk::command_buffer*; using download_buffer_object = void*; - static std::unique_ptr create_new_surface(u32 address, surface_color_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &support, const vk::memory_type_mapping &mem_mapping) + static std::unique_ptr create_new_surface(u32 address, surface_color_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &support, const vk::memory_type_mapping &mem_mapping) { auto fmt = vk::get_compatible_surface_format(format); VkFormat requested_format = fmt.first; - std::unique_ptr rtt; - rtt.reset(new vk::image(device, mem_mapping.device_local, + std::unique_ptr rtt; + rtt.reset(new vk::render_target(device, mem_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, requested_format, @@ -45,10 +71,11 @@ namespace rsx change_image_layout(*cmd, rtt->value, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); rtt->native_layout = fmt.second; + rtt->native_pitch = width * get_format_block_size_in_bytes(format); return rtt; } - static std::unique_ptr create_new_surface(u32 address, surface_depth_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &support, const vk::memory_type_mapping &mem_mapping) + static std::unique_ptr create_new_surface(u32 address, surface_depth_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &support, const vk::memory_type_mapping &mem_mapping) { VkFormat requested_format = vk::get_compatible_depth_surface_format(support, format); VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT); @@ -56,8 +83,8 @@ namespace rsx if (requested_format != VK_FORMAT_D16_UNORM) range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; - std::unique_ptr ds; - ds.reset(new vk::image(device, mem_mapping.device_local, + std::unique_ptr ds; + ds.reset(new vk::render_target(device, mem_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, requested_format, @@ -73,37 +100,40 @@ namespace rsx //Clear new surface.. VkClearDepthStencilValue clear_depth = {}; - clear_depth.depth = 1.f; clear_depth.stencil = 0; vkCmdClearDepthStencilImage(*cmd, ds->value, VK_IMAGE_LAYOUT_GENERAL, &clear_depth, 1, &range); change_image_layout(*cmd, ds->value, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); + ds->native_pitch = width * 2; + if (format == rsx::surface_depth_format::z24s8) + ds->native_pitch *= 2; + return ds; } - static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::image *surface) + static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface) { // surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } - static void prepare_rtt_for_sampling(vk::command_buffer* pcmd, vk::image *surface) + static void prepare_rtt_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface) { // surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } - static void prepare_ds_for_drawing(vk::command_buffer* pcmd, vk::image *surface) + static void prepare_ds_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface) { // surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); } - static void prepare_ds_for_sampling(vk::command_buffer* pcmd, vk::image *surface) + static void prepare_ds_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface) { // surface->change_layout(*pcmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } - static bool rtt_has_format_width_height(const std::unique_ptr &rtt, surface_color_format format, size_t width, size_t height) + static bool rtt_has_format_width_height(const std::unique_ptr &rtt, surface_color_format format, size_t width, size_t height) { VkFormat fmt = vk::get_compatible_surface_format(format).first; @@ -115,7 +145,7 @@ namespace rsx return false; } - static bool ds_has_format_width_height(const std::unique_ptr &ds, surface_depth_format format, size_t width, size_t height) + static bool ds_has_format_width_height(const std::unique_ptr &ds, surface_depth_format format, size_t width, size_t height) { // TODO: check format //VkFormat fmt = vk::get_compatible_depth_surface_format(format); @@ -152,7 +182,7 @@ namespace rsx { } - static vk::image *get(const std::unique_ptr &tex) + static vk::render_target *get(const std::unique_ptr &tex) { return tex.get(); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 457a118c97..9cfb163920 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -33,10 +33,6 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << "layout(std140, set = 0, binding = 0) uniform ScaleOffsetBuffer" << std::endl; OS << "{" << std::endl; OS << " mat4 scaleOffsetMat;" << std::endl; - OS << " float fog_param0;\n"; - OS << " float fog_param1;\n"; - OS << " uint alpha_test;\n"; - OS << " float alpha_ref;\n"; OS << "};" << std::endl; vk::glsl::program_input in;