From 976d707596048f1d2f8dee6d05a2f1b1204ef24a Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 10 May 2015 01:05:00 +0200 Subject: [PATCH 001/343] d3d12: Start implementation --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 599 ++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 88 +++ rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 11 + rpcs3/Emu/RSX/GSManager.cpp | 6 + rpcs3/Gui/MainFrame.cpp | 4 +- rpcs3/emucore.vcxproj | 12 +- rpcs3/emucore.vcxproj.filters | 12 + 7 files changed, 727 insertions(+), 5 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12GSRender.h create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp new file mode 100644 index 0000000000..c3c6207f2c --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -0,0 +1,599 @@ +#include "stdafx.h" +#include "D3D12GSRender.h" +#include +#include + +static void check(HRESULT hr) +{ + if (hr != 0) + abort(); +} + +D3D12GSRender::D3D12GSRender() + : GSRender() +{ + // Enable d3d debug layer + Microsoft::WRL::ComPtr debugInterface; + D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); + debugInterface->EnableDebugLayer(); + + // Create adapter + Microsoft::WRL::ComPtr dxgiFactory; + check(CreateDXGIFactory(IID_PPV_ARGS(&dxgiFactory))); + IDXGIAdapter* warpAdapter; + check(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter))); + check(D3D12CreateDevice(warpAdapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); + + // Queues + D3D12_COMMAND_QUEUE_DESC copyQueueDesc = {}, graphicQueueDesc = {}; + copyQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY; + graphicQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + check(m_device->CreateCommandQueue(©QueueDesc, IID_PPV_ARGS(&m_commandQueueCopy))); + check(m_device->CreateCommandQueue(&graphicQueueDesc, IID_PPV_ARGS(&m_commandQueueGraphic))); +} + +D3D12GSRender::~D3D12GSRender() +{ + // NOTE: Should be released only if no command are in flight ! + m_commandQueueGraphic->Release(); + m_commandQueueCopy->Release(); + m_device->Release(); +} + +void D3D12GSRender::Close() +{ +} + + +void D3D12GSRender::InitDrawBuffers() +{ + // if (!m_fbo.IsCreated() || RSXThread::m_width != last_width || RSXThread::m_height != last_height || last_depth_format != m_surface_depth_format) + { + /* + LOG_WARNING(RSX, "New FBO (%dx%d)", RSXThread::m_width, RSXThread::m_height); + last_width = RSXThread::m_width; + last_height = RSXThread::m_height; + last_depth_format = m_surface_depth_format; + + m_fbo.Create(); + checkForGlError("m_fbo.Create"); + m_fbo.Bind(); + + m_rbo.Create(4 + 1); + checkForGlError("m_rbo.Create"); + + for (int i = 0; i < 4; ++i) + { + m_rbo.Bind(i); + m_rbo.Storage(GL_RGBA, RSXThread::m_width, RSXThread::m_height); + checkForGlError("m_rbo.Storage(GL_RGBA)"); + } + + m_rbo.Bind(4); + + switch (m_surface_depth_format) + { + case 0: + { + // case 0 found in BLJM60410-[Suzukaze no Melt - Days in the Sanctuary] + // [E : RSXThread]: Bad depth format! (0) + // [E : RSXThread]: glEnable: opengl error 0x0506 + // [E : RSXThread]: glDrawArrays: opengl error 0x0506 + m_rbo.Storage(GL_DEPTH_COMPONENT, RSXThread::m_width, RSXThread::m_height); + checkForGlError("m_rbo.Storage(GL_DEPTH_COMPONENT)"); + break; + } + + case CELL_GCM_SURFACE_Z16: + { + m_rbo.Storage(GL_DEPTH_COMPONENT16, RSXThread::m_width, RSXThread::m_height); + checkForGlError("m_rbo.Storage(GL_DEPTH_COMPONENT16)"); + + m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT, m_rbo.GetId(4)); + checkForGlError("m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT)"); + break; + } + + + case CELL_GCM_SURFACE_Z24S8: + { + m_rbo.Storage(GL_DEPTH24_STENCIL8, RSXThread::m_width, RSXThread::m_height); + checkForGlError("m_rbo.Storage(GL_DEPTH24_STENCIL8)"); + + m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT, m_rbo.GetId(4)); + checkForGlError("m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT)"); + + m_fbo.Renderbuffer(GL_STENCIL_ATTACHMENT, m_rbo.GetId(4)); + checkForGlError("m_fbo.Renderbuffer(GL_STENCIL_ATTACHMENT)"); + + break; + + } + + + default: + { + LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); + assert(0); + break; + } + } + + for (int i = 0; i < 4; ++i) + { + m_fbo.Renderbuffer(GL_COLOR_ATTACHMENT0 + i, m_rbo.GetId(i)); + checkForGlError(fmt::Format("m_fbo.Renderbuffer(GL_COLOR_ATTACHMENT%d)", i)); + } + */ + //m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT, m_rbo.GetId(4)); + //checkForGlError("m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT)"); + + //if (m_surface_depth_format == 2) + //{ + // m_fbo.Renderbuffer(GL_STENCIL_ATTACHMENT, m_rbo.GetId(4)); + // checkForGlError("m_fbo.Renderbuffer(GL_STENCIL_ATTACHMENT)"); + //} + } + /* + if (!m_set_surface_clip_horizontal) + { + m_surface_clip_x = 0; + m_surface_clip_w = RSXThread::m_width; + } + + if (!m_set_surface_clip_vertical) + { + m_surface_clip_y = 0; + m_surface_clip_h = RSXThread::m_height; + } + + m_fbo.Bind(); + + static const GLenum draw_buffers[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2, GL_COLOR_ATTACHMENT3 }; + + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_NONE: break; + + case CELL_GCM_SURFACE_TARGET_0: + { + glDrawBuffer(draw_buffers[0]); + checkForGlError("glDrawBuffer(0)"); + break; + } + + case CELL_GCM_SURFACE_TARGET_1: + { + glDrawBuffer(draw_buffers[1]); + checkForGlError("glDrawBuffer(1)"); + break; + } + + case CELL_GCM_SURFACE_TARGET_MRT1: + { + glDrawBuffers(2, draw_buffers); + checkForGlError("glDrawBuffers(2)"); + break; + } + + case CELL_GCM_SURFACE_TARGET_MRT2: + { + glDrawBuffers(3, draw_buffers); + checkForGlError("glDrawBuffers(3)"); + break; + } + + case CELL_GCM_SURFACE_TARGET_MRT3: + { + glDrawBuffers(4, draw_buffers); + checkForGlError("glDrawBuffers(4)"); + break; + } + + default: + { + LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); + break; + } + + } + + if (m_read_buffer) + { + u32 format = GL_BGRA; + CellGcmDisplayInfo* buffers = vm::get_ptr(m_gcm_buffers_addr); + u32 addr = GetAddress(buffers[m_gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL); + u32 width = buffers[m_gcm_current_buffer].width; + u32 height = buffers[m_gcm_current_buffer].height; + glDrawPixels(width, height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, vm::get_ptr(addr)); + }*/ +} + +void D3D12GSRender::OnInit() +{ +} + +void D3D12GSRender::OnInitThread() +{ +} + +void D3D12GSRender::OnExitThread() +{ +} + +void D3D12GSRender::OnReset() +{ +} + +void D3D12GSRender::ExecCMD(u32 cmd) +{ +} + +void D3D12GSRender::ExecCMD() +{ + ID3D12CommandAllocator *commandAllocator; + m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&commandAllocator)); + ID3D12CommandList *commandList; + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + + + //return; +/* if (!LoadProgram()) + { + LOG_ERROR(RSX, "LoadProgram failed."); + Emu.Pause(); + return; + } + + InitDrawBuffers(); + + if (m_set_color_mask) + { + glColorMask(m_color_mask_r, m_color_mask_g, m_color_mask_b, m_color_mask_a); + checkForGlError("glColorMask"); + } + + if (!m_indexed_array.m_count && !m_draw_array_count) + { + u32 min_vertex_size = ~0; + for (auto &i : m_vertex_data) + { + if (!i.size) + continue; + + u32 vertex_size = i.data.size() / (i.size * i.GetTypeSize()); + + if (min_vertex_size > vertex_size) + min_vertex_size = vertex_size; + } + + m_draw_array_count = min_vertex_size; + m_draw_array_first = 0; + } + + Enable(m_set_depth_test, GL_DEPTH_TEST); + Enable(m_set_alpha_test, GL_ALPHA_TEST); + Enable(m_set_blend || m_set_blend_mrt1 || m_set_blend_mrt2 || m_set_blend_mrt3, GL_BLEND); + Enable(m_set_scissor_horizontal && m_set_scissor_vertical, GL_SCISSOR_TEST); + Enable(m_set_logic_op, GL_LOGIC_OP); + Enable(m_set_cull_face, GL_CULL_FACE); + Enable(m_set_dither, GL_DITHER); + Enable(m_set_stencil_test, GL_STENCIL_TEST); + Enable(m_set_line_smooth, GL_LINE_SMOOTH); + Enable(m_set_poly_smooth, GL_POLYGON_SMOOTH); + Enable(m_set_point_sprite_control, GL_POINT_SPRITE); + Enable(m_set_specular, GL_LIGHTING); + Enable(m_set_poly_offset_fill, GL_POLYGON_OFFSET_FILL); + Enable(m_set_poly_offset_line, GL_POLYGON_OFFSET_LINE); + Enable(m_set_poly_offset_point, GL_POLYGON_OFFSET_POINT); + Enable(m_set_restart_index, GL_PRIMITIVE_RESTART); + Enable(m_set_line_stipple, GL_LINE_STIPPLE); + Enable(m_set_polygon_stipple, GL_POLYGON_STIPPLE); + + if (!is_intel_vendor) + { + Enable(m_set_depth_bounds_test, GL_DEPTH_BOUNDS_TEST_EXT); + } + + if (m_set_clip_plane) + { + Enable(m_clip_plane_0, GL_CLIP_PLANE0); + Enable(m_clip_plane_1, GL_CLIP_PLANE1); + Enable(m_clip_plane_2, GL_CLIP_PLANE2); + Enable(m_clip_plane_3, GL_CLIP_PLANE3); + Enable(m_clip_plane_4, GL_CLIP_PLANE4); + Enable(m_clip_plane_5, GL_CLIP_PLANE5); + + checkForGlError("m_set_clip_plane"); + } + + checkForGlError("glEnable"); + + if (m_set_front_polygon_mode) + { + glPolygonMode(GL_FRONT, m_front_polygon_mode); + checkForGlError("glPolygonMode(Front)"); + } + + if (m_set_back_polygon_mode) + { + glPolygonMode(GL_BACK, m_back_polygon_mode); + checkForGlError("glPolygonMode(Back)"); + } + + if (m_set_point_size) + { + glPointSize(m_point_size); + checkForGlError("glPointSize"); + } + + if (m_set_poly_offset_mode) + { + glPolygonOffset(m_poly_offset_scale_factor, m_poly_offset_bias); + checkForGlError("glPolygonOffset"); + } + + if (m_set_logic_op) + { + glLogicOp(m_logic_op); + checkForGlError("glLogicOp"); + } + + if (m_set_scissor_horizontal && m_set_scissor_vertical) + { + glScissor(m_scissor_x, m_scissor_y, m_scissor_w, m_scissor_h); + checkForGlError("glScissor"); + } + + if (m_set_two_sided_stencil_test_enable) + { + if (m_set_stencil_fail && m_set_stencil_zfail && m_set_stencil_zpass) + { + glStencilOpSeparate(GL_FRONT, m_stencil_fail, m_stencil_zfail, m_stencil_zpass); + checkForGlError("glStencilOpSeparate"); + } + + if (m_set_stencil_mask) + { + glStencilMaskSeparate(GL_FRONT, m_stencil_mask); + checkForGlError("glStencilMaskSeparate"); + } + + if (m_set_stencil_func && m_set_stencil_func_ref && m_set_stencil_func_mask) + { + glStencilFuncSeparate(GL_FRONT, m_stencil_func, m_stencil_func_ref, m_stencil_func_mask); + checkForGlError("glStencilFuncSeparate"); + } + + if (m_set_back_stencil_fail && m_set_back_stencil_zfail && m_set_back_stencil_zpass) + { + glStencilOpSeparate(GL_BACK, m_back_stencil_fail, m_back_stencil_zfail, m_back_stencil_zpass); + checkForGlError("glStencilOpSeparate(GL_BACK)"); + } + + if (m_set_back_stencil_mask) + { + glStencilMaskSeparate(GL_BACK, m_back_stencil_mask); + checkForGlError("glStencilMaskSeparate(GL_BACK)"); + } + + if (m_set_back_stencil_func && m_set_back_stencil_func_ref && m_set_back_stencil_func_mask) + { + glStencilFuncSeparate(GL_BACK, m_back_stencil_func, m_back_stencil_func_ref, m_back_stencil_func_mask); + checkForGlError("glStencilFuncSeparate(GL_BACK)"); + } + } + else + { + if (m_set_stencil_fail && m_set_stencil_zfail && m_set_stencil_zpass) + { + glStencilOp(m_stencil_fail, m_stencil_zfail, m_stencil_zpass); + checkForGlError("glStencilOp"); + } + + if (m_set_stencil_mask) + { + glStencilMask(m_stencil_mask); + checkForGlError("glStencilMask"); + } + + if (m_set_stencil_func && m_set_stencil_func_ref && m_set_stencil_func_mask) + { + glStencilFunc(m_stencil_func, m_stencil_func_ref, m_stencil_func_mask); + checkForGlError("glStencilFunc"); + } + } + + // TODO: Use other glLightModel functions? + glLightModeli(GL_LIGHT_MODEL_TWO_SIDE, m_set_two_side_light_enable ? GL_TRUE : GL_FALSE); + checkForGlError("glLightModeli"); + + if (m_set_shade_mode) + { + glShadeModel(m_shade_mode); + checkForGlError("glShadeModel"); + } + + if (m_set_depth_mask) + { + glDepthMask(m_depth_mask); + checkForGlError("glDepthMask"); + } + + if (m_set_depth_func) + { + glDepthFunc(m_depth_func); + checkForGlError("glDepthFunc"); + } + + if (m_set_depth_bounds && !is_intel_vendor) + { + glDepthBoundsEXT(m_depth_bounds_min, m_depth_bounds_max); + checkForGlError("glDepthBounds"); + } + + if (m_set_clip) + { + glDepthRangef(m_clip_min, m_clip_max); + checkForGlError("glDepthRangef"); + } + + if (m_set_line_width) + { + glLineWidth(m_line_width); + checkForGlError("glLineWidth"); + } + + if (m_set_line_stipple) + { + glLineStipple(m_line_stipple_factor, m_line_stipple_pattern); + checkForGlError("glLineStipple"); + } + + if (m_set_polygon_stipple) + { + glPolygonStipple((const GLubyte*)m_polygon_stipple_pattern); + checkForGlError("glPolygonStipple"); + } + + if (m_set_blend_equation) + { + glBlendEquationSeparate(m_blend_equation_rgb, m_blend_equation_alpha); + checkForGlError("glBlendEquationSeparate"); + } + + if (m_set_blend_sfactor && m_set_blend_dfactor) + { + glBlendFuncSeparate(m_blend_sfactor_rgb, m_blend_dfactor_rgb, m_blend_sfactor_alpha, m_blend_dfactor_alpha); + checkForGlError("glBlendFuncSeparate"); + } + + if (m_set_blend_color) + { + glBlendColor(m_blend_color_r, m_blend_color_g, m_blend_color_b, m_blend_color_a); + checkForGlError("glBlendColor"); + } + + if (m_set_cull_face) + { + glCullFace(m_cull_face); + checkForGlError("glCullFace"); + } + + if (m_set_front_face) + { + glFrontFace(m_front_face); + checkForGlError("glFrontFace"); + } + + if (m_set_alpha_func && m_set_alpha_ref) + { + glAlphaFunc(m_alpha_func, m_alpha_ref); + checkForGlError("glAlphaFunc"); + } + + if (m_set_fog_mode) + { + glFogi(GL_FOG_MODE, m_fog_mode); + checkForGlError("glFogi(GL_FOG_MODE)"); + } + + if (m_set_fog_params) + { + glFogf(GL_FOG_START, m_fog_param0); + checkForGlError("glFogf(GL_FOG_START)"); + glFogf(GL_FOG_END, m_fog_param1); + checkForGlError("glFogf(GL_FOG_END)"); + } + + if (m_set_restart_index) + { + glPrimitiveRestartIndex(m_restart_index); + checkForGlError("glPrimitiveRestartIndex"); + } + + if (m_indexed_array.m_count && m_draw_array_count) + { + LOG_WARNING(RSX, "m_indexed_array.m_count && draw_array_count"); + } + + for (u32 i = 0; i < m_textures_count; ++i) + { + if (!m_textures[i].IsEnabled()) continue; + + glActiveTexture(GL_TEXTURE0 + i); + checkForGlError("glActiveTexture"); + m_gl_textures[i].Create(); + m_gl_textures[i].Bind(); + checkForGlError(fmt::Format("m_gl_textures[%d].Bind", i)); + m_program.SetTex(i); + m_gl_textures[i].Init(m_textures[i]); + checkForGlError(fmt::Format("m_gl_textures[%d].Init", i)); + } + + for (u32 i = 0; i < m_textures_count; ++i) + { + if (!m_vertex_textures[i].IsEnabled()) continue; + + glActiveTexture(GL_TEXTURE0 + m_textures_count + i); + checkForGlError("glActiveTexture"); + m_gl_vertex_textures[i].Create(); + m_gl_vertex_textures[i].Bind(); + checkForGlError(fmt::Format("m_gl_vertex_textures[%d].Bind", i)); + m_program.SetVTex(i); + m_gl_vertex_textures[i].Init(m_vertex_textures[i]); + checkForGlError(fmt::Format("m_gl_vertex_textures[%d].Init", i)); + } + + m_vao.Bind(); + + if (m_indexed_array.m_count) + { + LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); + } + + if (m_indexed_array.m_count || m_draw_array_count) + { + EnableVertexData(m_indexed_array.m_count ? true : false); + + InitVertexData(); + InitFragmentData(); + } + + if (m_indexed_array.m_count) + { + switch (m_indexed_array.m_type) + { + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + glDrawElements(m_draw_mode - 1, m_indexed_array.m_count, GL_UNSIGNED_INT, nullptr); + checkForGlError("glDrawElements #4"); + break; + + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + glDrawElements(m_draw_mode - 1, m_indexed_array.m_count, GL_UNSIGNED_SHORT, nullptr); + checkForGlError("glDrawElements #2"); + break; + + default: + LOG_ERROR(RSX, "Bad indexed array type (%d)", m_indexed_array.m_type); + break; + } + + DisableVertexData(); + m_indexed_array.Reset(); + } + + if (m_draw_array_count) + { + //LOG_WARNING(RSX,"glDrawArrays(%d,%d,%d)", m_draw_mode - 1, m_draw_array_first, m_draw_array_count); + glDrawArrays(m_draw_mode - 1, 0, m_draw_array_count); + checkForGlError("glDrawArrays"); + DisableVertexData(); + } + + WriteBuffers();*/ +} + +void D3D12GSRender::Flip() +{ +} diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h new file mode 100644 index 0000000000..a8c1c43d67 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -0,0 +1,88 @@ +#pragma once + +#ifdef _WIN32 +#include +#include "rpcs3/Ini.h" +#include "Utilities/rPlatform.h" // only for rImage +#include "Utilities/File.h" +#include "Utilities/Log.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/RSX/GSRender.h" + +#include "D3D12RenderTargetSets.h" + +#pragma comment (lib, "d3d12.lib") +#pragma comment (lib, "dxgi.lib") +#pragma comment (lib, "d3dcompiler.lib") + + +class D3D12GSRender //TODO: find out why this used to inherit from wxWindow + : //public wxWindow + /*,*/ public GSRender +{ +private: + // std::vector m_vdata; + // std::vector m_post_draw_objs; + + // GLProgram m_program; + int m_fp_buf_num; + int m_vp_buf_num; + // GLProgramBuffer m_prog_buffer; + + // GLFragmentProgram m_fragment_prog; + // GLVertexProgram m_vertex_prog; + + // GLTexture m_gl_textures[m_textures_count]; + // GLTexture m_gl_vertex_textures[m_textures_count]; + + // GLvao m_vao; + // GLvbo m_vbo; + // GLrbo m_rbo; + D3D12RenderTargetSets m_fbo; + ID3D12Device* m_device; + ID3D12CommandQueue *m_commandQueueCopy; + ID3D12CommandQueue *m_commandQueueGraphic; + + void* m_context; + +public: + // GSFrameBase* m_frame; + u32 m_draw_frames; + u32 m_skip_frames; + + D3D12GSRender(); + virtual ~D3D12GSRender(); + +private: + virtual void Close() override; + /* void EnableVertexData(bool indexed_draw = false); + void DisableVertexData(); + void InitVertexData(); + void InitFragmentData(); + + void Enable(bool enable, const u32 cap); + + bool LoadProgram(); + void WriteBuffers(); + void WriteDepthBuffer(); + void WriteColorBuffers(); + void WriteColorBufferA(); + void WriteColorBufferB(); + void WriteColorBufferC(); + void WriteColorBufferD(); + + void DrawObjects();*/ + void InitDrawBuffers(); + +protected: + virtual void OnInit() override; + virtual void OnInitThread() override; + virtual void OnExitThread() override; + virtual void OnReset() override; + virtual void ExecCMD(u32 cmd) override; + virtual void ExecCMD() override; + virtual void Flip() override; +}; + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h new file mode 100644 index 0000000000..7f7378558f --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -0,0 +1,11 @@ +#pragma once + + +class D3D12RenderTargetSets +{ +public: + bool IsCreated() + { + return false; + } +}; \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GSManager.cpp b/rpcs3/Emu/RSX/GSManager.cpp index 1c3d02d2f7..6a0588ec81 100644 --- a/rpcs3/Emu/RSX/GSManager.cpp +++ b/rpcs3/Emu/RSX/GSManager.cpp @@ -7,6 +7,9 @@ #include "GSManager.h" #include "Null/NullGSRender.h" #include "GL/GLGSRender.h" +#ifdef WIN32 +#include "D3D12/D3D12GSRender.h" +#endif void GSInfo::Init() { @@ -34,6 +37,9 @@ void GSManager::Init() default: case 0: m_render = new NullGSRender(); break; case 1: m_render = new GLGSRender(); break; +#ifdef WIN32 + case 2: m_render = new D3D12GSRender(); break; +#endif } //m_render->Init(GetInfo().outresolution.width, GetInfo().outresolution.height); } diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp index 5a06624cf3..e9f0a6109c 100644 --- a/rpcs3/Gui/MainFrame.cpp +++ b/rpcs3/Gui/MainFrame.cpp @@ -474,7 +474,9 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) cbox_gs_render->Append("Null"); cbox_gs_render->Append("OpenGL"); - //cbox_gs_render->Append("Software"); +#ifdef WIN32 + cbox_gs_render->Append("D3D12"); +#endif for(int i = 1; i < WXSIZEOF(ResolutionTable); ++i) { diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 2830b24d70..4d595b4823 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -41,6 +41,7 @@ + @@ -498,6 +499,8 @@ + + @@ -719,7 +722,7 @@ Disabled false Use - _UNICODE;UNICODE;%(PreprocessorDefinitions) + _UNICODE;UNICODE;%(PreprocessorDefinitions);DX12_SUPPORT stdafx.h Async true @@ -758,7 +761,7 @@ Disabled false Use - _UNICODE;UNICODE;MSVC_CRT_MEMLEAK_DETECTION;%(PreprocessorDefinitions) + _UNICODE;UNICODE;MSVC_CRT_MEMLEAK_DETECTION;%(PreprocessorDefinitions);DX12_SUPPORT stdafx.h Async true @@ -782,6 +785,7 @@ true + _UNICODE;UNICODE;%(PreprocessorDefinitions);DX12_SUPPORT true @@ -799,7 +803,7 @@ Use stdafx.h Async - LLVM_AVAILABLE;%(PreprocessorDefinitions) + LLVM_AVAILABLE;%(PreprocessorDefinitions);DX12_SUPPORT true @@ -817,4 +821,4 @@ - \ No newline at end of file + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index e93306a72e..ca6c9fc43b 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -90,6 +90,9 @@ {2a8841dc-bce0-41bb-9fcb-5bf1f8dda213} + + {25818cb6-10d5-4ae3-8c5e-9dd79c306e53} + @@ -965,6 +968,9 @@ Emu\SysCalls\Modules + + Emu\GPU\RSX\D3D12 + @@ -1828,5 +1834,11 @@ Emu\SysCalls\Modules + + Emu\GPU\RSX\D3D12 + + + Emu\GPU\RSX\D3D12 + \ No newline at end of file From 0b5816d6d60902612a7b35ce42d6143d6ec37dcd Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 10 May 2015 02:21:43 +0200 Subject: [PATCH 002/343] d3d12: Create window --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 22 +++++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 24 +++++++ rpcs3/Gui/D3DGSFrame.cpp | 95 +++++++++++++++++++++++++++ rpcs3/Gui/D3DGSFrame.h | 31 +++++++++ rpcs3/Gui/GLGSFrame.cpp | 1 + rpcs3/rpcs3.cpp | 6 ++ rpcs3/rpcs3.vcxproj | 2 + rpcs3/rpcs3.vcxproj.filters | 6 ++ 8 files changed, 181 insertions(+), 6 deletions(-) create mode 100644 rpcs3/Gui/D3DGSFrame.cpp create mode 100644 rpcs3/Gui/D3DGSFrame.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index c3c6207f2c..cb1ea4298d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -3,6 +3,13 @@ #include #include +GetGSFrameCb2 GetGSFrame = nullptr; + +void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) +{ + GetGSFrame = value; +} + static void check(HRESULT hr) { if (hr != 0) @@ -30,6 +37,9 @@ D3D12GSRender::D3D12GSRender() graphicQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; check(m_device->CreateCommandQueue(©QueueDesc, IID_PPV_ARGS(&m_commandQueueCopy))); check(m_device->CreateCommandQueue(&graphicQueueDesc, IID_PPV_ARGS(&m_commandQueueGraphic))); + + GSFrameBase2 *tmp = GetGSFrame(); + tmp->Show(); } D3D12GSRender::~D3D12GSRender() @@ -47,15 +57,15 @@ void D3D12GSRender::Close() void D3D12GSRender::InitDrawBuffers() { - // if (!m_fbo.IsCreated() || RSXThread::m_width != last_width || RSXThread::m_height != last_height || last_depth_format != m_surface_depth_format) + if (!m_fbo.IsCreated() || RSXThread::m_width != m_lastWidth || RSXThread::m_height != m_lastHeight || m_lastDepth != m_surface_depth_format) { - /* + LOG_WARNING(RSX, "New FBO (%dx%d)", RSXThread::m_width, RSXThread::m_height); - last_width = RSXThread::m_width; - last_height = RSXThread::m_height; - last_depth_format = m_surface_depth_format; + m_lastWidth = RSXThread::m_width; + m_lastHeight = RSXThread::m_height; + m_lastDepth = m_surface_depth_format; - m_fbo.Create(); +/* m_fbo.Create(); checkForGlError("m_fbo.Create"); m_fbo.Bind(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index a8c1c43d67..4439ed8f47 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -16,6 +16,28 @@ #pragma comment (lib, "dxgi.lib") #pragma comment (lib, "d3dcompiler.lib") +class GSFrameBase2 +{ +public: + GSFrameBase2() {} + GSFrameBase2(const GSFrameBase2&) = delete; + virtual void Close() = 0; + + virtual bool IsShown() = 0; + virtual void Hide() = 0; + virtual void Show() = 0; + + virtual void* GetNewContext() = 0; + virtual void SetCurrent(void* ctx) = 0; + virtual void DeleteContext(void* ctx) = 0; + virtual void Flip(void* ctx) = 0; + +}; + +typedef GSFrameBase2*(*GetGSFrameCb2)(); + +void SetGetD3DGSFrameCallback(GetGSFrameCb2 value); + class D3D12GSRender //TODO: find out why this used to inherit from wxWindow : //public wxWindow @@ -44,6 +66,8 @@ private: ID3D12CommandQueue *m_commandQueueCopy; ID3D12CommandQueue *m_commandQueueGraphic; + size_t m_lastWidth, m_lastHeight, m_lastDepth; + void* m_context; public: diff --git a/rpcs3/Gui/D3DGSFrame.cpp b/rpcs3/Gui/D3DGSFrame.cpp new file mode 100644 index 0000000000..6d38812a8e --- /dev/null +++ b/rpcs3/Gui/D3DGSFrame.cpp @@ -0,0 +1,95 @@ +#include "stdafx_gui.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "D3DGSFrame.h" +#include "Utilities/Timer.h" + +D3DGSFrame::D3DGSFrame() + : GSFrame(nullptr, "GSFrame[OpenGL]") + , m_frames(0) +{ + canvas = new wxWindow(this, wxID_ANY); + canvas->SetSize(GetClientSize()); + + canvas->Bind(wxEVT_LEFT_DCLICK, &GSFrame::OnLeftDclick, this); +} + +D3DGSFrame::~D3DGSFrame() +{ +} + +void D3DGSFrame::Close() +{ + GSFrame::Close(); +} + +bool D3DGSFrame::IsShown() +{ + return GSFrame::IsShown(); +} + +void D3DGSFrame::Hide() +{ + GSFrame::Hide(); +} + +void D3DGSFrame::Show() +{ + GSFrame::Show(); +} + +void* D3DGSFrame::GetNewContext() +{ + return nullptr;//new wxGLContext(GetCanvas()); +} + +void D3DGSFrame::SetCurrent(void* ctx) +{ +// GetCanvas()->SetCurrent(*(wxGLContext*)ctx); +} + +void D3DGSFrame::DeleteContext(void* ctx) +{ +// delete (wxGLContext*)ctx; +} + +void D3DGSFrame::Flip(void* context) +{ + if (!canvas) return; +// canvas->SetCurrent(*(wxGLContext*)context); + + static Timer fps_t; +// canvas->SwapBuffers(); + m_frames++; + + const std::string sub_title = Emu.GetTitle() + (Emu.GetTitleID().length() ? " [" + Emu.GetTitleID() + "] | " : " | "); + + if (fps_t.GetElapsedTimeInSec() >= 0.5) + { + // can freeze on exit + SetTitle(wxString(sub_title.c_str(), wxConvUTF8) + wxString::Format("FPS: %.2f", (double)m_frames / fps_t.GetElapsedTimeInSec())); + m_frames = 0; + fps_t.Start(); + } +} + +void D3DGSFrame::OnSize(wxSizeEvent& event) +{ + if (canvas) canvas->SetSize(GetClientSize()); + event.Skip(); +} + +void D3DGSFrame::SetViewport(int x, int y, u32 w, u32 h) +{ + /* + //ConLog.Warning("SetViewport(x=%d, y=%d, w=%d, h=%d)", x, y, w, h); + + const wxSize client = GetClientSize(); + const wxSize viewport = AspectRatio(client, wxSize(w, h)); + + const int vx = (client.GetX() - viewport.GetX()) / 2; + const int vy = (client.GetY() - viewport.GetY()) / 2; + + glViewport(vx + x, vy + y, viewport.GetWidth(), viewport.GetHeight()); + */ +} \ No newline at end of file diff --git a/rpcs3/Gui/D3DGSFrame.h b/rpcs3/Gui/D3DGSFrame.h new file mode 100644 index 0000000000..4925eeb7fa --- /dev/null +++ b/rpcs3/Gui/D3DGSFrame.h @@ -0,0 +1,31 @@ +#pragma once +#include "Emu/RSX/D3D12/D3D12GSRender.h" +#include "Gui/GSFrame.h" +#include "wx/window.h" + +struct D3DGSFrame : public GSFrame, public GSFrameBase2 +{ + wxWindow* canvas; + u32 m_frames; + + D3DGSFrame(); + ~D3DGSFrame(); + + virtual void Close() override; + + virtual bool IsShown() override; + virtual void Hide() override; + virtual void Show() override; + + virtual void* GetNewContext() override; + virtual void SetCurrent(void* ctx) override; + virtual void DeleteContext(void* ctx) override; + virtual void Flip(void* context) override; + + wxWindow* GetCanvas() const { return canvas; } + + virtual void SetViewport(int x, int y, u32 w, u32 h); + +private: + virtual void OnSize(wxSizeEvent& event); +}; \ No newline at end of file diff --git a/rpcs3/Gui/GLGSFrame.cpp b/rpcs3/Gui/GLGSFrame.cpp index 899b2de32d..a5419f5c3f 100644 --- a/rpcs3/Gui/GLGSFrame.cpp +++ b/rpcs3/Gui/GLGSFrame.cpp @@ -3,6 +3,7 @@ #include "Emu/Memory/Memory.h" #include "Emu/System.h" #include "GLGSFrame.h" +#include "D3DGSFrame.h" #include "Utilities/Timer.h" GLGSFrame::GLGSFrame() diff --git a/rpcs3/rpcs3.cpp b/rpcs3/rpcs3.cpp index 20a5c9d01f..69757d3786 100644 --- a/rpcs3/rpcs3.cpp +++ b/rpcs3/rpcs3.cpp @@ -26,6 +26,7 @@ #include "Gui/SaveDataDialog.h" #include "Gui/GLGSFrame.h" +#include "Gui/D3DGSFrame.h" #include #ifdef _WIN32 @@ -137,6 +138,11 @@ bool Rpcs3App::OnInit() return new GLGSFrame(); }); + SetGetD3DGSFrameCallback([]() ->GSFrameBase2* + { + return new D3DGSFrame(); + }); + g_msg_dialog.reset(new MsgDialogFrame); g_savedata_dialog.reset(new SaveDataDialogFrame); diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index f2e3fd3df3..648a0d7e93 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -177,6 +177,7 @@ + @@ -218,6 +219,7 @@ + diff --git a/rpcs3/rpcs3.vcxproj.filters b/rpcs3/rpcs3.vcxproj.filters index f1b37ef93a..a5efe10e7e 100644 --- a/rpcs3/rpcs3.vcxproj.filters +++ b/rpcs3/rpcs3.vcxproj.filters @@ -102,6 +102,9 @@ Gui + + Gui + @@ -207,6 +210,9 @@ Gui + + Gui + From 4f708ab9eec0d1d685765f895759ede1df23380c Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 10 May 2015 19:00:32 +0200 Subject: [PATCH 003/343] d3d12: Enable buffer flip --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 20 +++++++++++++++++--- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 5 +++-- rpcs3/Gui/D3DGSFrame.cpp | 7 ++++++- rpcs3/Gui/D3DGSFrame.h | 3 ++- 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index cb1ea4298d..7454d229c0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -38,8 +38,19 @@ D3D12GSRender::D3D12GSRender() check(m_device->CreateCommandQueue(©QueueDesc, IID_PPV_ARGS(&m_commandQueueCopy))); check(m_device->CreateCommandQueue(&graphicQueueDesc, IID_PPV_ARGS(&m_commandQueueGraphic))); - GSFrameBase2 *tmp = GetGSFrame(); - tmp->Show(); + m_frame = GetGSFrame(); + + DXGI_SWAP_CHAIN_DESC swapChain = {}; + swapChain.BufferCount = 2; + swapChain.Windowed = true; + swapChain.OutputWindow = m_frame->getHandle(); + swapChain.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swapChain.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swapChain.SampleDesc.Count = 1; + swapChain.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + swapChain.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + + check(dxgiFactory->CreateSwapChain(m_commandQueueGraphic, &swapChain, (IDXGISwapChain**)&m_swapChain)); } D3D12GSRender::~D3D12GSRender() @@ -48,13 +59,14 @@ D3D12GSRender::~D3D12GSRender() m_commandQueueGraphic->Release(); m_commandQueueCopy->Release(); m_device->Release(); + m_swapChain->Release(); } void D3D12GSRender::Close() { + m_frame->Hide(); } - void D3D12GSRender::InitDrawBuffers() { if (!m_fbo.IsCreated() || RSXThread::m_width != m_lastWidth || RSXThread::m_height != m_lastHeight || m_lastDepth != m_surface_depth_format) @@ -221,6 +233,7 @@ void D3D12GSRender::InitDrawBuffers() void D3D12GSRender::OnInit() { + m_frame->Show(); } void D3D12GSRender::OnInitThread() @@ -606,4 +619,5 @@ void D3D12GSRender::ExecCMD() void D3D12GSRender::Flip() { + check(m_swapChain->Present(1, 0)); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 4439ed8f47..12e1c253a4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -31,7 +31,7 @@ public: virtual void SetCurrent(void* ctx) = 0; virtual void DeleteContext(void* ctx) = 0; virtual void Flip(void* ctx) = 0; - + virtual HWND getHandle() const = 0; }; typedef GSFrameBase2*(*GetGSFrameCb2)(); @@ -65,13 +65,14 @@ private: ID3D12Device* m_device; ID3D12CommandQueue *m_commandQueueCopy; ID3D12CommandQueue *m_commandQueueGraphic; + struct IDXGISwapChain3 *m_swapChain; size_t m_lastWidth, m_lastHeight, m_lastDepth; void* m_context; public: - // GSFrameBase* m_frame; + GSFrameBase2 *m_frame; u32 m_draw_frames; u32 m_skip_frames; diff --git a/rpcs3/Gui/D3DGSFrame.cpp b/rpcs3/Gui/D3DGSFrame.cpp index 6d38812a8e..c2ede93df7 100644 --- a/rpcs3/Gui/D3DGSFrame.cpp +++ b/rpcs3/Gui/D3DGSFrame.cpp @@ -92,4 +92,9 @@ void D3DGSFrame::SetViewport(int x, int y, u32 w, u32 h) glViewport(vx + x, vy + y, viewport.GetWidth(), viewport.GetHeight()); */ -} \ No newline at end of file +} + +HWND D3DGSFrame::getHandle() const +{ + return canvas->GetHandle(); +} diff --git a/rpcs3/Gui/D3DGSFrame.h b/rpcs3/Gui/D3DGSFrame.h index 4925eeb7fa..51c395eee1 100644 --- a/rpcs3/Gui/D3DGSFrame.h +++ b/rpcs3/Gui/D3DGSFrame.h @@ -24,7 +24,8 @@ struct D3DGSFrame : public GSFrame, public GSFrameBase2 wxWindow* GetCanvas() const { return canvas; } - virtual void SetViewport(int x, int y, u32 w, u32 h); + virtual void SetViewport(int x, int y, u32 w, u32 h) override; + virtual HWND getHandle() const override; private: virtual void OnSize(wxSizeEvent& event); From 5e7531b9bfd711d8e73d46e56082c172161c3954 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 10 May 2015 19:41:08 +0200 Subject: [PATCH 004/343] d3d12: Use ifdef to guard code --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 ++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- rpcs3/Emu/RSX/GSManager.cpp | 2 +- rpcs3/Gui/D3DGSFrame.cpp | 2 ++ rpcs3/Gui/D3DGSFrame.h | 6 +++++- rpcs3/Gui/MainFrame.cpp | 2 +- rpcs3/rpcs3.cpp | 2 ++ 7 files changed, 14 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 7454d229c0..e2c9ed3085 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1,4 +1,5 @@ #include "stdafx.h" +#if defined(DX12_SUPPORT) #include "D3D12GSRender.h" #include #include @@ -621,3 +622,4 @@ void D3D12GSRender::Flip() { check(m_swapChain->Present(1, 0)); } +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 12e1c253a4..428d4f3acb 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -1,6 +1,6 @@ #pragma once +#if defined(DX12_SUPPORT) -#ifdef _WIN32 #include #include "rpcs3/Ini.h" #include "Utilities/rPlatform.h" // only for rImage diff --git a/rpcs3/Emu/RSX/GSManager.cpp b/rpcs3/Emu/RSX/GSManager.cpp index 6a0588ec81..cbddef535a 100644 --- a/rpcs3/Emu/RSX/GSManager.cpp +++ b/rpcs3/Emu/RSX/GSManager.cpp @@ -37,7 +37,7 @@ void GSManager::Init() default: case 0: m_render = new NullGSRender(); break; case 1: m_render = new GLGSRender(); break; -#ifdef WIN32 +#if defined(DX12_SUPPORT) case 2: m_render = new D3D12GSRender(); break; #endif } diff --git a/rpcs3/Gui/D3DGSFrame.cpp b/rpcs3/Gui/D3DGSFrame.cpp index c2ede93df7..e8b22fcd73 100644 --- a/rpcs3/Gui/D3DGSFrame.cpp +++ b/rpcs3/Gui/D3DGSFrame.cpp @@ -1,4 +1,5 @@ #include "stdafx_gui.h" +#if defined(DX12_SUPPORT) #include "Emu/Memory/Memory.h" #include "Emu/System.h" #include "D3DGSFrame.h" @@ -98,3 +99,4 @@ HWND D3DGSFrame::getHandle() const { return canvas->GetHandle(); } +#endif \ No newline at end of file diff --git a/rpcs3/Gui/D3DGSFrame.h b/rpcs3/Gui/D3DGSFrame.h index 51c395eee1..47de197d0a 100644 --- a/rpcs3/Gui/D3DGSFrame.h +++ b/rpcs3/Gui/D3DGSFrame.h @@ -1,4 +1,6 @@ #pragma once + +#if defined(DX12_SUPPORT) #include "Emu/RSX/D3D12/D3D12GSRender.h" #include "Gui/GSFrame.h" #include "wx/window.h" @@ -29,4 +31,6 @@ struct D3DGSFrame : public GSFrame, public GSFrameBase2 private: virtual void OnSize(wxSizeEvent& event); -}; \ No newline at end of file +}; + +#endif \ No newline at end of file diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp index e9f0a6109c..ef725668a4 100644 --- a/rpcs3/Gui/MainFrame.cpp +++ b/rpcs3/Gui/MainFrame.cpp @@ -474,7 +474,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) cbox_gs_render->Append("Null"); cbox_gs_render->Append("OpenGL"); -#ifdef WIN32 +#if defined(DX12_SUPPORT) cbox_gs_render->Append("D3D12"); #endif diff --git a/rpcs3/rpcs3.cpp b/rpcs3/rpcs3.cpp index 69757d3786..819e2aa7f4 100644 --- a/rpcs3/rpcs3.cpp +++ b/rpcs3/rpcs3.cpp @@ -138,10 +138,12 @@ bool Rpcs3App::OnInit() return new GLGSFrame(); }); +#if defined(DX12_SUPPORT) SetGetD3DGSFrameCallback([]() ->GSFrameBase2* { return new D3DGSFrame(); }); +#endif g_msg_dialog.reset(new MsgDialogFrame); g_savedata_dialog.reset(new SaveDataDialogFrame); From 777363140137cc91fdeb3ad33099c7699134ccab Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 10 May 2015 20:17:39 +0200 Subject: [PATCH 005/343] d3d12: backbuffer are properly cleared --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 91 ++++++++++++++++++++++++++- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 ++ 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index e2c9ed3085..90ede543d2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -39,8 +39,12 @@ D3D12GSRender::D3D12GSRender() check(m_device->CreateCommandQueue(©QueueDesc, IID_PPV_ARGS(&m_commandQueueCopy))); check(m_device->CreateCommandQueue(&graphicQueueDesc, IID_PPV_ARGS(&m_commandQueueGraphic))); + // Create a global command allocator + m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); + m_frame = GetGSFrame(); + // Create swap chain and put them in a descriptor heap as rendertarget DXGI_SWAP_CHAIN_DESC swapChain = {}; swapChain.BufferCount = 2; swapChain.Windowed = true; @@ -52,11 +56,25 @@ D3D12GSRender::D3D12GSRender() swapChain.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; check(dxgiFactory->CreateSwapChain(m_commandQueueGraphic, &swapChain, (IDXGISwapChain**)&m_swapChain)); + m_swapChain->GetBuffer(0, IID_PPV_ARGS(&m_backBuffer[0])); + m_swapChain->GetBuffer(1, IID_PPV_ARGS(&m_backBuffer[1])); + + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; + heapDesc.NumDescriptors = 1; + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + D3D12_RENDER_TARGET_VIEW_DESC rttDesc = {}; + rttDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[0])); + m_device->CreateRenderTargetView(m_backBuffer[0], &rttDesc, m_backbufferAsRendertarget[0]->GetCPUDescriptorHandleForHeapStart()); + m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[1])); + m_device->CreateRenderTargetView(m_backBuffer[1], &rttDesc, m_backbufferAsRendertarget[1]->GetCPUDescriptorHandleForHeapStart()); } D3D12GSRender::~D3D12GSRender() { // NOTE: Should be released only if no command are in flight ! + m_commandAllocator->Release(); m_commandQueueGraphic->Release(); m_commandQueueCopy->Release(); m_device->Release(); @@ -251,14 +269,71 @@ void D3D12GSRender::OnReset() void D3D12GSRender::ExecCMD(u32 cmd) { + assert(cmd == NV4097_CLEAR_SURFACE); + ID3D12GraphicsCommandList *commandList; + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + + D3D12_RESOURCE_BARRIER transition = {}; + transition.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + transition.Transition.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; + transition.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; + transition.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; + + commandList->ResourceBarrier(1, &transition); + +/* if (m_set_color_mask) + { + glColorMask(m_color_mask_r, m_color_mask_g, m_color_mask_b, m_color_mask_a); + checkForGlError("glColorMask"); + } + + if (m_set_scissor_horizontal && m_set_scissor_vertical) + { + glScissor(m_scissor_x, m_scissor_y, m_scissor_w, m_scissor_h); + checkForGlError("glScissor"); + } + + GLbitfield f = 0;*/ + + if (m_clear_surface_mask & 0x1) + { +// commandList->ClearDepthStencilView() +// glClearDepth(m_clear_surface_z / (float)0xffffff); + } + +/* if (m_clear_surface_mask & 0x2) + { + glClearStencil(m_clear_surface_s); + checkForGlError("glClearStencil"); + + f |= GL_STENCIL_BUFFER_BIT; + }*/ + + if (m_clear_surface_mask & 0xF0) + { + float clearColor[] = + { + m_clear_surface_color_r / 255.0f, + m_clear_surface_color_g / 255.0f, + m_clear_surface_color_b / 255.0f, + m_clear_surface_color_a / 255.0f + }; + commandList->ClearRenderTargetView(m_backbufferAsRendertarget[m_swapChain->GetCurrentBackBufferIndex()]->GetCPUDescriptorHandleForHeapStart(), clearColor, 0, nullptr); + } + + transition.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; + transition.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; + + commandList->ResourceBarrier(1, &transition); + + check(commandList->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } void D3D12GSRender::ExecCMD() { - ID3D12CommandAllocator *commandAllocator; - m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&commandAllocator)); ID3D12CommandList *commandList; - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); +// m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); //return; @@ -621,5 +696,15 @@ void D3D12GSRender::ExecCMD() void D3D12GSRender::Flip() { check(m_swapChain->Present(1, 0)); + // Wait execution is over + // TODO: It's suboptimal, we should use 2 command allocator + Microsoft::WRL::ComPtr fence; + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)); + HANDLE gfxqueuecompletion = CreateEvent(0, 0, 0, 0); + fence->SetEventOnCompletion(1, gfxqueuecompletion); + m_commandQueueGraphic->Signal(fence.Get(), 1); + WaitForSingleObject(gfxqueuecompletion, INFINITE); + CloseHandle(gfxqueuecompletion); + m_commandAllocator->Reset(); } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 428d4f3acb..929fa0a75e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -65,7 +65,11 @@ private: ID3D12Device* m_device; ID3D12CommandQueue *m_commandQueueCopy; ID3D12CommandQueue *m_commandQueueGraphic; + ID3D12CommandAllocator *m_commandAllocator; struct IDXGISwapChain3 *m_swapChain; + ID3D12Resource* m_backBuffer[2]; + + ID3D12DescriptorHeap *m_backbufferAsRendertarget[2]; size_t m_lastWidth, m_lastHeight, m_lastDepth; From b1c3e0915572a25aee2e8ab280953a14709d07d5 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 10 May 2015 20:30:33 +0200 Subject: [PATCH 006/343] d3d12: Clean backbuffers and descriptors --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 90ede543d2..6f8599cbff 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -77,12 +77,17 @@ D3D12GSRender::~D3D12GSRender() m_commandAllocator->Release(); m_commandQueueGraphic->Release(); m_commandQueueCopy->Release(); - m_device->Release(); + m_backbufferAsRendertarget[0]->Release(); + m_backbufferAsRendertarget[1]->Release(); + m_backBuffer[0]->Release(); + m_backBuffer[1]->Release(); m_swapChain->Release(); + m_device->Release(); } void D3D12GSRender::Close() { + Stop(); m_frame->Hide(); } From 2f5a6eb993ac997c6a311e33e0e43ba517e95b90 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 10 May 2015 20:37:27 +0200 Subject: [PATCH 007/343] d3d12: Fix gfxcommandlist leak --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 +++++- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 6f8599cbff..d8fb2c1b5b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -276,7 +276,8 @@ void D3D12GSRender::ExecCMD(u32 cmd) { assert(cmd == NV4097_CLEAR_SURFACE); ID3D12GraphicsCommandList *commandList; - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + m_inflightCommandList.push_back(commandList); D3D12_RESOURCE_BARRIER transition = {}; transition.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; @@ -711,5 +712,8 @@ void D3D12GSRender::Flip() WaitForSingleObject(gfxqueuecompletion, INFINITE); CloseHandle(gfxqueuecompletion); m_commandAllocator->Reset(); + for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) + gfxCommandList->Release(); + m_inflightCommandList.clear(); } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 929fa0a75e..31cdb433de 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -66,6 +66,7 @@ private: ID3D12CommandQueue *m_commandQueueCopy; ID3D12CommandQueue *m_commandQueueGraphic; ID3D12CommandAllocator *m_commandAllocator; + std::list m_inflightCommandList; struct IDXGISwapChain3 *m_swapChain; ID3D12Resource* m_backBuffer[2]; From 728736ccdd42956ad272a5ed1c51ec54ff285417 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 10 May 2015 23:49:19 +0200 Subject: [PATCH 008/343] d3d12: Add fbo support and blit it before present --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 258 ++++++------------ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 139 ++++++++++ rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 27 +- 4 files changed, 245 insertions(+), 181 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index d8fb2c1b5b..4cb34e7545 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -18,7 +18,7 @@ static void check(HRESULT hr) } D3D12GSRender::D3D12GSRender() - : GSRender() + : GSRender(), m_fbo(nullptr) { // Enable d3d debug layer Microsoft::WRL::ComPtr debugInterface; @@ -93,7 +93,7 @@ void D3D12GSRender::Close() void D3D12GSRender::InitDrawBuffers() { - if (!m_fbo.IsCreated() || RSXThread::m_width != m_lastWidth || RSXThread::m_height != m_lastHeight || m_lastDepth != m_surface_depth_format) + if (m_fbo == nullptr || RSXThread::m_width != m_lastWidth || RSXThread::m_height != m_lastHeight || m_lastDepth != m_surface_depth_format) { LOG_WARNING(RSX, "New FBO (%dx%d)", RSXThread::m_width, RSXThread::m_height); @@ -101,158 +101,8 @@ void D3D12GSRender::InitDrawBuffers() m_lastHeight = RSXThread::m_height; m_lastDepth = m_surface_depth_format; -/* m_fbo.Create(); - checkForGlError("m_fbo.Create"); - m_fbo.Bind(); - - m_rbo.Create(4 + 1); - checkForGlError("m_rbo.Create"); - - for (int i = 0; i < 4; ++i) - { - m_rbo.Bind(i); - m_rbo.Storage(GL_RGBA, RSXThread::m_width, RSXThread::m_height); - checkForGlError("m_rbo.Storage(GL_RGBA)"); - } - - m_rbo.Bind(4); - - switch (m_surface_depth_format) - { - case 0: - { - // case 0 found in BLJM60410-[Suzukaze no Melt - Days in the Sanctuary] - // [E : RSXThread]: Bad depth format! (0) - // [E : RSXThread]: glEnable: opengl error 0x0506 - // [E : RSXThread]: glDrawArrays: opengl error 0x0506 - m_rbo.Storage(GL_DEPTH_COMPONENT, RSXThread::m_width, RSXThread::m_height); - checkForGlError("m_rbo.Storage(GL_DEPTH_COMPONENT)"); - break; - } - - case CELL_GCM_SURFACE_Z16: - { - m_rbo.Storage(GL_DEPTH_COMPONENT16, RSXThread::m_width, RSXThread::m_height); - checkForGlError("m_rbo.Storage(GL_DEPTH_COMPONENT16)"); - - m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT, m_rbo.GetId(4)); - checkForGlError("m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT)"); - break; - } - - - case CELL_GCM_SURFACE_Z24S8: - { - m_rbo.Storage(GL_DEPTH24_STENCIL8, RSXThread::m_width, RSXThread::m_height); - checkForGlError("m_rbo.Storage(GL_DEPTH24_STENCIL8)"); - - m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT, m_rbo.GetId(4)); - checkForGlError("m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT)"); - - m_fbo.Renderbuffer(GL_STENCIL_ATTACHMENT, m_rbo.GetId(4)); - checkForGlError("m_fbo.Renderbuffer(GL_STENCIL_ATTACHMENT)"); - - break; - - } - - - default: - { - LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); - assert(0); - break; - } - } - - for (int i = 0; i < 4; ++i) - { - m_fbo.Renderbuffer(GL_COLOR_ATTACHMENT0 + i, m_rbo.GetId(i)); - checkForGlError(fmt::Format("m_fbo.Renderbuffer(GL_COLOR_ATTACHMENT%d)", i)); - } - */ - //m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT, m_rbo.GetId(4)); - //checkForGlError("m_fbo.Renderbuffer(GL_DEPTH_ATTACHMENT)"); - - //if (m_surface_depth_format == 2) - //{ - // m_fbo.Renderbuffer(GL_STENCIL_ATTACHMENT, m_rbo.GetId(4)); - // checkForGlError("m_fbo.Renderbuffer(GL_STENCIL_ATTACHMENT)"); - //} + m_fbo = new D3D12RenderTargetSets(m_device, (u8)m_lastDepth, m_lastWidth, m_lastHeight); } - /* - if (!m_set_surface_clip_horizontal) - { - m_surface_clip_x = 0; - m_surface_clip_w = RSXThread::m_width; - } - - if (!m_set_surface_clip_vertical) - { - m_surface_clip_y = 0; - m_surface_clip_h = RSXThread::m_height; - } - - m_fbo.Bind(); - - static const GLenum draw_buffers[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2, GL_COLOR_ATTACHMENT3 }; - - switch (m_surface_color_target) - { - case CELL_GCM_SURFACE_TARGET_NONE: break; - - case CELL_GCM_SURFACE_TARGET_0: - { - glDrawBuffer(draw_buffers[0]); - checkForGlError("glDrawBuffer(0)"); - break; - } - - case CELL_GCM_SURFACE_TARGET_1: - { - glDrawBuffer(draw_buffers[1]); - checkForGlError("glDrawBuffer(1)"); - break; - } - - case CELL_GCM_SURFACE_TARGET_MRT1: - { - glDrawBuffers(2, draw_buffers); - checkForGlError("glDrawBuffers(2)"); - break; - } - - case CELL_GCM_SURFACE_TARGET_MRT2: - { - glDrawBuffers(3, draw_buffers); - checkForGlError("glDrawBuffers(3)"); - break; - } - - case CELL_GCM_SURFACE_TARGET_MRT3: - { - glDrawBuffers(4, draw_buffers); - checkForGlError("glDrawBuffers(4)"); - break; - } - - default: - { - LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); - break; - } - - } - - if (m_read_buffer) - { - u32 format = GL_BGRA; - CellGcmDisplayInfo* buffers = vm::get_ptr(m_gcm_buffers_addr); - u32 addr = GetAddress(buffers[m_gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL); - u32 width = buffers[m_gcm_current_buffer].width; - u32 height = buffers[m_gcm_current_buffer].height; - glDrawPixels(width, height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, vm::get_ptr(addr)); - }*/ } void D3D12GSRender::OnInit() @@ -275,17 +125,13 @@ void D3D12GSRender::OnReset() void D3D12GSRender::ExecCMD(u32 cmd) { assert(cmd == NV4097_CLEAR_SURFACE); + + InitDrawBuffers(); + ID3D12GraphicsCommandList *commandList; check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); m_inflightCommandList.push_back(commandList); - D3D12_RESOURCE_BARRIER transition = {}; - transition.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - transition.Transition.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; - transition.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; - transition.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; - - commandList->ResourceBarrier(1, &transition); /* if (m_set_color_mask) { @@ -301,19 +147,12 @@ void D3D12GSRender::ExecCMD(u32 cmd) GLbitfield f = 0;*/ + // TODO: Merge depth and stencil clear when possible if (m_clear_surface_mask & 0x1) - { -// commandList->ClearDepthStencilView() -// glClearDepth(m_clear_surface_z / (float)0xffffff); - } + commandList->ClearDepthStencilView(m_fbo->getDSVCPUHandle(), D3D12_CLEAR_FLAG_DEPTH, m_clear_surface_z / (float)0xffffff, 0, 0, nullptr); -/* if (m_clear_surface_mask & 0x2) - { - glClearStencil(m_clear_surface_s); - checkForGlError("glClearStencil"); - - f |= GL_STENCIL_BUFFER_BIT; - }*/ + if (m_clear_surface_mask & 0x2) + commandList->ClearDepthStencilView(m_fbo->getDSVCPUHandle(), D3D12_CLEAR_FLAG_STENCIL, 0.f, m_clear_surface_s, 0, nullptr); if (m_clear_surface_mask & 0xF0) { @@ -324,13 +163,41 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f }; - commandList->ClearRenderTargetView(m_backbufferAsRendertarget[m_swapChain->GetCurrentBackBufferIndex()]->GetCPUDescriptorHandleForHeapStart(), clearColor, 0, nullptr); + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_NONE: break; + + case CELL_GCM_SURFACE_TARGET_0: + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(0), clearColor, 0, nullptr); + break; + case CELL_GCM_SURFACE_TARGET_1: + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(1), clearColor, 0, nullptr); + break; + case CELL_GCM_SURFACE_TARGET_MRT1: + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(0), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(1), clearColor, 0, nullptr); + break; + case CELL_GCM_SURFACE_TARGET_MRT2: + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(0), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(1), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(2), clearColor, 0, nullptr); + break; + case CELL_GCM_SURFACE_TARGET_MRT3: + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(0), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(1), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(2), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(3), clearColor, 0, nullptr); + break; + default: + LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); + } + } - transition.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - transition.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; +// transition.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; +// transition.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; - commandList->ResourceBarrier(1, &transition); +// commandList->ResourceBarrier(1, &transition); check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); @@ -701,6 +568,47 @@ void D3D12GSRender::ExecCMD() void D3D12GSRender::Flip() { + ID3D12GraphicsCommandList *commandList; + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + m_inflightCommandList.push_back(commandList); + + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_0: + case CELL_GCM_SURFACE_TARGET_1: + case CELL_GCM_SURFACE_TARGET_MRT1: + case CELL_GCM_SURFACE_TARGET_MRT2: + case CELL_GCM_SURFACE_TARGET_MRT3: + { + D3D12_RESOURCE_BARRIER barriers[2] = {}; + barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barriers[0].Transition.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; + barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + + barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barriers[1].Transition.pResource = m_fbo->getRenderTargetTexture(0); + barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; + barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + + commandList->ResourceBarrier(2, barriers); + D3D12_TEXTURE_COPY_LOCATION src = {}, dst = {}; + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.SubresourceIndex = 0, dst.SubresourceIndex = 0; + src.pResource = m_fbo->getRenderTargetTexture(0), dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; + D3D12_BOX box = { 0, 0, 0, RSXThread::m_width, RSXThread::m_height, 1 }; + commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, &box); + + barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; + barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; + commandList->ResourceBarrier(2, barriers); + commandList->Close(); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); + } + } + check(m_swapChain->Present(1, 0)); // Wait execution is over // TODO: It's suboptimal, we should use 2 command allocator diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 31cdb433de..2b2cffa755 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -61,7 +61,7 @@ private: // GLvao m_vao; // GLvbo m_vbo; // GLrbo m_rbo; - D3D12RenderTargetSets m_fbo; + D3D12RenderTargetSets *m_fbo; ID3D12Device* m_device; ID3D12CommandQueue *m_commandQueueCopy; ID3D12CommandQueue *m_commandQueueGraphic; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp new file mode 100644 index 0000000000..e6c34301cf --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -0,0 +1,139 @@ +#include "stdafx.h" +#if defined(DX12_SUPPORT) +#include "D3D12RenderTargetSets.h" +#include "rpcs3/Ini.h" +#include "Utilities/rPlatform.h" // only for rImage +#include "Utilities/File.h" +#include "Utilities/Log.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/RSX/GSRender.h" + +D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDepthFormat, size_t width, size_t height) +{ + + D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; + descriptorHeapDesc.NumDescriptors = 1; + descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; + device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_depthStencilDescriptorHeap)); + + descriptorHeapDesc.NumDescriptors = 4; + descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_rttDescriptorHeap)); + + // Every resource are committed for simplicity, later we could use heap + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; + D3D12_RESOURCE_DESC resourceDesc = {}; + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + resourceDesc.Width = (UINT)width; + resourceDesc.Height = (UINT)height; + resourceDesc.SampleDesc.Count = 1; + resourceDesc.DepthOrArraySize = 1; + + switch (surfaceDepthFormat) + { + case 0: + break; + case CELL_GCM_SURFACE_Z16: + resourceDesc.Format = DXGI_FORMAT_R16_TYPELESS; + break; + case CELL_GCM_SURFACE_Z24S8: + resourceDesc.Format = DXGI_FORMAT_R24G8_TYPELESS; + break; + default: + LOG_ERROR(RSX, "Bad depth format! (%d)", surfaceDepthFormat); + assert(0); + } + + device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_DEPTH_WRITE, + nullptr, // TODO: Assign sensible default clearvalue here + IID_PPV_ARGS(&m_depthStencilTexture) + ); + D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {}; + switch (surfaceDepthFormat) + { + case 0: + break; + case CELL_GCM_SURFACE_Z16: + depthStencilViewDesc.Format = DXGI_FORMAT_D16_UNORM; + break; + case CELL_GCM_SURFACE_Z24S8: + depthStencilViewDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + break; + default: + LOG_ERROR(RSX, "Bad depth format! (%d)", surfaceDepthFormat); + assert(0); + } + depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + device->CreateDepthStencilView(m_depthStencilTexture, &depthStencilViewDesc, m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); + + g_RTTIncrement = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rttDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + for (int i = 0; i < 4; ++i) + { + D3D12_RESOURCE_DESC resourceDesc = {}; + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + resourceDesc.Width = width; + resourceDesc.Height = height; + resourceDesc.DepthOrArraySize = 1; + resourceDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + resourceDesc.SampleDesc.Count = 1; + + device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_RENDER_TARGET, + nullptr, // TODO: Assign sensible default clearvalue here + IID_PPV_ARGS(&m_rtts[i]) + ); + + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + + device->CreateRenderTargetView(m_rtts[i], &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + } + + /*if (!m_set_surface_clip_horizontal) + { + m_surface_clip_x = 0; + m_surface_clip_w = RSXThread::m_width; + } + + if (!m_set_surface_clip_vertical) + { + m_surface_clip_y = 0; + m_surface_clip_h = RSXThread::m_height; + }*/ +} + +D3D12RenderTargetSets::~D3D12RenderTargetSets() +{ + +} + +D3D12_CPU_DESCRIPTOR_HANDLE D3D12RenderTargetSets::getRTTCPUHandle(u8 baseFBO) const +{ + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rttDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += baseFBO * g_RTTIncrement; + return Handle; +} + +D3D12_CPU_DESCRIPTOR_HANDLE D3D12RenderTargetSets::getDSVCPUHandle() const +{ + return m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); +} +ID3D12Resource * D3D12RenderTargetSets::getRenderTargetTexture(u8 Id) const +{ + return m_rtts[Id]; +} +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 7f7378558f..d6b292101e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -1,11 +1,28 @@ #pragma once +#if defined(DX12_SUPPORT) +#include +/** + * Class that embeds a RenderTargetDescriptor view and eventually a DepthStencil Descriptor View. + * Used to imitate OpenGL FrameBuffer concept. + */ class D3D12RenderTargetSets { + size_t g_RTTIncrement; + ID3D12Resource *m_depthStencilTexture; + ID3D12Resource *m_rtts[4]; + ID3D12DescriptorHeap *m_rttDescriptorHeap; + ID3D12DescriptorHeap *m_depthStencilDescriptorHeap; public: - bool IsCreated() - { - return false; - } -}; \ No newline at end of file + D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDepthFormat, size_t width, size_t height); + ~D3D12RenderTargetSets(); + /** + * Return the base descriptor address for the give surface target. + * All rtt's view descriptor are contigous. + */ + D3D12_CPU_DESCRIPTOR_HANDLE getRTTCPUHandle(u8 baseFBO) const; + D3D12_CPU_DESCRIPTOR_HANDLE getDSVCPUHandle() const; + ID3D12Resource *getRenderTargetTexture(u8 Id) const; +}; +#endif \ No newline at end of file From d5b4a31c377d62fabff2f5a890e6f09c30cc9ae5 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 01:56:48 +0200 Subject: [PATCH 009/343] d3d12: Start caching shaders --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 51 ++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 9 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 315 ++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 17 ++ rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl | 4 + 5 files changed, 369 insertions(+), 27 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h create mode 100644 rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 4cb34e7545..f9b456ea1d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -18,7 +18,7 @@ static void check(HRESULT hr) } D3D12GSRender::D3D12GSRender() - : GSRender(), m_fbo(nullptr) + : GSRender(), m_fbo(nullptr), m_PSO(nullptr) { // Enable d3d debug layer Microsoft::WRL::ComPtr debugInterface; @@ -132,7 +132,6 @@ void D3D12GSRender::ExecCMD(u32 cmd) check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); m_inflightCommandList.push_back(commandList); - /* if (m_set_color_mask) { glColorMask(m_color_mask_r, m_color_mask_g, m_color_mask_b, m_color_mask_a); @@ -143,9 +142,7 @@ void D3D12GSRender::ExecCMD(u32 cmd) { glScissor(m_scissor_x, m_scissor_y, m_scissor_w, m_scissor_h); checkForGlError("glScissor"); - } - - GLbitfield f = 0;*/ + }*/ // TODO: Merge depth and stencil clear when possible if (m_clear_surface_mask & 0x1) @@ -191,26 +188,36 @@ void D3D12GSRender::ExecCMD(u32 cmd) default: LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); } - } -// transition.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; -// transition.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; - -// commandList->ResourceBarrier(1, &transition); - check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } + +bool D3D12GSRender::LoadProgram() +{ + if (!m_cur_fragment_prog) + { + LOG_WARNING(RSX, "LoadProgram: m_cur_shader_prog == NULL"); + return false; + } + + m_cur_fragment_prog->ctrl = m_shader_ctrl; + + if (!m_cur_vertex_prog) + { + LOG_WARNING(RSX, "LoadProgram: m_cur_vertex_prog == NULL"); + return false; + } + + m_PSO = new D3D12PipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog); + return true; +} + void D3D12GSRender::ExecCMD() { - ID3D12CommandList *commandList; -// m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); - - - //return; -/* if (!LoadProgram()) + if (!LoadProgram()) { LOG_ERROR(RSX, "LoadProgram failed."); Emu.Pause(); @@ -219,7 +226,10 @@ void D3D12GSRender::ExecCMD() InitDrawBuffers(); - if (m_set_color_mask) + ID3D12CommandList *commandList; +// m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + +/* if (m_set_color_mask) { glColorMask(m_color_mask_r, m_color_mask_g, m_color_mask_b, m_color_mask_a); checkForGlError("glColorMask"); @@ -262,11 +272,6 @@ void D3D12GSRender::ExecCMD() Enable(m_set_line_stipple, GL_LINE_STIPPLE); Enable(m_set_polygon_stipple, GL_POLYGON_STIPPLE); - if (!is_intel_vendor) - { - Enable(m_set_depth_bounds_test, GL_DEPTH_BOUNDS_TEST_EXT); - } - if (m_set_clip_plane) { Enable(m_clip_plane_0, GL_CLIP_PLANE0); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 2b2cffa755..bbc86c13be 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -11,10 +11,10 @@ #include "Emu/RSX/GSRender.h" #include "D3D12RenderTargetSets.h" +#include "D3D12PipelineState.h" #pragma comment (lib, "d3d12.lib") #pragma comment (lib, "dxgi.lib") -#pragma comment (lib, "d3dcompiler.lib") class GSFrameBase2 { @@ -47,7 +47,7 @@ private: // std::vector m_vdata; // std::vector m_post_draw_objs; - // GLProgram m_program; + D3D12PipelineState *m_PSO; int m_fp_buf_num; int m_vp_buf_num; // GLProgramBuffer m_prog_buffer; @@ -60,7 +60,6 @@ private: // GLvao m_vao; // GLvbo m_vbo; - // GLrbo m_rbo; D3D12RenderTargetSets *m_fbo; ID3D12Device* m_device; ID3D12CommandQueue *m_commandQueueCopy; @@ -86,6 +85,8 @@ public: private: virtual void Close() override; + + bool LoadProgram(); /* void EnableVertexData(bool indexed_draw = false); void DisableVertexData(); void InitVertexData(); @@ -93,7 +94,7 @@ private: void Enable(bool enable, const u32 cap); - bool LoadProgram(); + void WriteBuffers(); void WriteDepthBuffer(); void WriteColorBuffers(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp new file mode 100644 index 0000000000..5c6052bcf3 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -0,0 +1,315 @@ +#include "stdafx.h" +#if defined (DX12_SUPPORT) + +#include "D3D12PipelineState.h" +#include "Emu/Memory/vm.h" +#include "Utilities/Log.h" +#include +#include +#include + +#pragma comment (lib, "d3dcompiler.lib") + +std::unordered_map > CachedShader; + +struct GLBufferInfo +{ + ID3D12PipelineState *prog_id; + u32 fp_id; + u32 vp_id; + std::vector fp_data; + std::vector vp_data; + std::string fp_shader; + std::string vp_shader; + Microsoft::WRL::ComPtr fp_bytecode; + Microsoft::WRL::ComPtr vp_bytecode; +}; + +// Copied from GL implementation + +enum class SHADER_TYPE +{ + SHADER_TYPE_VERTEX, + SHADER_TYPE_FRAGMENT +}; + +/** Storage for a shader +* Embeds the D3DBlob corresponding to +*/ +class Shader +{ +public: + Shader() : bytecode(nullptr) {} + ~Shader() {} + +// GLParamArray parr; + u32 id; + std::string shader; + Microsoft::WRL::ComPtr bytecode; + + /** + * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. + * @param prog RSXShaderProgram specifying the location and size of the shader in memory + */ +// void Decompile(RSXFragmentProgram& prog); + + /** + * Asynchronously decompile a fragment shader located in the PS3's Memory. + * When this function is called you must call Wait() before GetShaderText() will return valid data. + * @param prog RSXShaderProgram specifying the location and size of the shader in memory + */ +// void DecompileAsync(RSXFragmentProgram& prog); + + /** Wait for the decompiler task to complete decompilation. */ +// void Wait(); + + /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ + void Compile(SHADER_TYPE st) + { + static const char VSstring[] = + "float4 main(float4 pos : POSITION) : SV_POSITION" + "{" + " return pos;" + "}"; + static const char FSstring[] = + "float4 main() : SV_TARGET" + "{" + "return float4(1.0f, 1.0f, 1.0f, 1.0f);" + "}"; + HRESULT hr; + Microsoft::WRL::ComPtr errorBlob; + switch (st) + { + case SHADER_TYPE::SHADER_TYPE_VERTEX: + hr = D3DCompile(VSstring, sizeof(VSstring), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, bytecode.GetAddressOf(), errorBlob.GetAddressOf()); + if (hr != S_OK) + LOG_ERROR(RSX, "VS build failed:%s", errorBlob->GetBufferPointer()); + break; + case SHADER_TYPE::SHADER_TYPE_FRAGMENT: + hr = D3DCompile(FSstring, sizeof(FSstring), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, bytecode.GetAddressOf(), errorBlob.GetAddressOf()); + if (hr != S_OK) + LOG_ERROR(RSX, "FS build failed:%s", errorBlob->GetBufferPointer()); + break; + } + } + +private: + /** Threaded fragment shader decompiler responsible for decompiling this program */ +// GLFragmentDecompilerThread* m_decompiler_thread; + + /** Deletes the shader and any stored information */ +// void Delete(); +}; + +// Could be improved with an (un)ordered map ? +class ProgramBuffer +{ + std::vector m_buf; +public: + int SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) + { + int n = m_buf.size(); + for (int i = 0; i < m_buf.size(); ++i) + { + if (memcmp(&m_buf[i].fp_data[0], vm::get_ptr(rsx_fp.addr), m_buf[i].fp_data.size()) != 0) continue; + + shader.id = m_buf[i].fp_id; + shader.shader = m_buf[i].fp_shader.c_str(); + shader.bytecode = m_buf[i].fp_bytecode; + + return i; + } + + return -1; + } + + int SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader) + { + for (u32 i = 0; i < m_buf.size(); ++i) + { + if (m_buf[i].vp_data.size() != rsx_vp.data.size()) continue; + if (memcmp(m_buf[i].vp_data.data(), rsx_vp.data.data(), rsx_vp.data.size() * 4) != 0) continue; + + shader.id = m_buf[i].vp_id; + shader.shader = m_buf[i].vp_shader.c_str(); + shader.bytecode = m_buf[i].vp_bytecode; + + return i; + } + + return -1; + } + + ID3D12PipelineState *GetProg(u32 fp, u32 vp) const + { + if (fp == vp) + { + /* + LOG_NOTICE(RSX, "Get program (%d):", fp); + LOG_NOTICE(RSX, "*** prog id = %d", m_buf[fp].prog_id); + LOG_NOTICE(RSX, "*** vp id = %d", m_buf[fp].vp_id); + LOG_NOTICE(RSX, "*** fp id = %d", m_buf[fp].fp_id); + + LOG_NOTICE(RSX, "*** vp shader = \n%s", m_buf[fp].vp_shader.wx_str()); + LOG_NOTICE(RSX, "*** fp shader = \n%s", m_buf[fp].fp_shader.wx_str()); + */ + return m_buf[fp].prog_id; + } + + for (u32 i = 0; i(rsx_fp.addr), vm::get_ptr(rsx_fp.addr + rsx_fp.size)); + new_buf.vp_data = rsx_vp.data; + new_buf.vp_bytecode = vp.bytecode; + + new_buf.vp_shader = vp.shader; + new_buf.fp_shader = fp.shader; + + m_buf.resize(m_buf.size() + 1); + m_buf.push_back(new_buf); + } +}; + +static ProgramBuffer g_cachedProgram; + +D3D12PipelineState::D3D12PipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader) +{ + Shader m_vertex_prog, m_fragment_prog; + int m_fp_buf_num = g_cachedProgram.SearchFp(*fragmentShader, m_fragment_prog); + int m_vp_buf_num = g_cachedProgram.SearchVp(*vertexShader, m_vertex_prog); + + if (m_fp_buf_num == -1) + { + LOG_WARNING(RSX, "FP not found in buffer!"); +// m_fragment_prog.Decompile(*fragmentShader); + m_fragment_prog.Compile(SHADER_TYPE::SHADER_TYPE_FRAGMENT); + + // TODO: This shouldn't use current dir +// fs::file("./FragmentProgram.txt", o_write | o_create | o_trunc).write(m_fragment_prog.shader.c_str(), m_fragment_prog.shader.size()); + } + + if (m_vp_buf_num == -1) + { + LOG_WARNING(RSX, "VP not found in buffer!"); +// m_vertex_prog.Decompile(*vertexShader); + m_vertex_prog.Compile(SHADER_TYPE::SHADER_TYPE_VERTEX); + + // TODO: This shouldn't use current dir +// fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(m_vertex_prog.shader.c_str(), m_vertex_prog.shader.size()); + } + + if (m_fp_buf_num != -1 && m_vp_buf_num != -1) + { +// m_program.id = m_prog_buffer.GetProg(m_fp_buf_num, m_vp_buf_num); + } + + if (false)//m_program.id) + { +/* // RSX Debugger: Check if this program was modified and update it + if (Ini.GSLogPrograms.GetValue()) + { + for (auto& program : m_debug_programs) + { + if (program.id == m_program.id && program.modified) + { + // TODO: This isn't working perfectly. Is there any better/shorter way to update the program + m_vertex_prog.shader = program.vp_shader; + m_fragment_prog.shader = program.fp_shader; + m_vertex_prog.Wait(); + m_vertex_prog.Compile(); + checkForGlError("m_vertex_prog.Compile"); + m_fragment_prog.Wait(); + m_fragment_prog.Compile(); + checkForGlError("m_fragment_prog.Compile"); + glAttachShader(m_program.id, m_vertex_prog.id); + glAttachShader(m_program.id, m_fragment_prog.id); + glLinkProgram(m_program.id); + checkForGlError("glLinkProgram"); + glDetachShader(m_program.id, m_vertex_prog.id); + glDetachShader(m_program.id, m_fragment_prog.id); + program.vp_id = m_vertex_prog.id; + program.fp_id = m_fragment_prog.id; + program.modified = false; + } + } + } + m_program.Use();*/ + } + else + { + D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; + + graphicPipelineStateDesc.VS.BytecodeLength = m_vertex_prog.bytecode->GetBufferSize(); + graphicPipelineStateDesc.VS.pShaderBytecode = m_vertex_prog.bytecode->GetBufferPointer(); + graphicPipelineStateDesc.PS.BytecodeLength = m_fragment_prog.bytecode->GetBufferSize(); + graphicPipelineStateDesc.PS.pShaderBytecode = m_fragment_prog.bytecode->GetBufferPointer(); + device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&m_pipelineStateObject)); + g_cachedProgram.Add(m_pipelineStateObject, m_fragment_prog, *fragmentShader, m_vertex_prog, *vertexShader); + /*m_program.Create(m_vertex_prog.id, m_fragment_prog.id); + checkForGlError("m_program.Create"); + m_prog_buffer.Add(m_program, m_fragment_prog, *m_cur_fragment_prog, m_vertex_prog, *m_cur_vertex_prog); + checkForGlError("m_prog_buffer.Add"); + m_program.Use(); + + // RSX Debugger + if (Ini.GSLogPrograms.GetValue()) + { + RSXDebuggerProgram program; + program.id = m_program.id; + program.vp_id = m_vertex_prog.id; + program.fp_id = m_fragment_prog.id; + program.vp_shader = m_vertex_prog.shader; + program.fp_shader = m_fragment_prog.shader; + m_debug_programs.push_back(program); + }*/ + } +} + +D3D12PipelineState::~D3D12PipelineState() +{ + +} + + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h new file mode 100644 index 0000000000..1916f1768c --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -0,0 +1,17 @@ +#pragma once +#if defined (DX12_SUPPORT) + +#include +#include "Emu/RSX/RSXFragmentProgram.h" +#include "Emu/RSX/RSXVertexProgram.h" + +class D3D12PipelineState +{ + ID3D12PipelineState *m_pipelineStateObject; + ID3D12RootSignature *m_rootSignature; +public: + D3D12PipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader); + ~D3D12PipelineState(); +}; + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl b/rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl new file mode 100644 index 0000000000..68075ef0c2 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl @@ -0,0 +1,4 @@ +float4 main() : SV_TARGET +{ + return float4(1.0f, 1.0f, 1.0f, 1.0f); +} \ No newline at end of file From 3f24da47a0affbe964ae138563a2bdbd8f35644b Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 14:43:28 +0200 Subject: [PATCH 010/343] d3d12: Update vs project files --- rpcs3/emucore.vcxproj | 6 ++++++ rpcs3/emucore.vcxproj.filters | 14 ++++++++++++++ rpcs3/rpcs3.vcxproj | 5 +++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 4d595b4823..125c782d6f 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -42,6 +42,8 @@ + + @@ -500,6 +502,7 @@ + @@ -627,6 +630,9 @@ + + + {C4A10229-4712-4BD2-B63E-50D93C67A038} emucore diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index ca6c9fc43b..2e2b882156 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -971,6 +971,12 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + + + Emu\GPU\RSX\D3D12 + @@ -1840,5 +1846,13 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + + + + + Emu\GPU\RSX\D3D12 + \ No newline at end of file diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 648a0d7e93..3ac2b0fd1a 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -83,6 +83,7 @@ Async stdafx_gui.h $(IntDir)$(TargetName)_gui.pch + _UNICODE;UNICODE;%(PreprocessorDefinitions);DX12_SUPPORT true @@ -109,7 +110,7 @@ ProgramDatabase Use ..\wxWidgets\include\msvc - _UNICODE;UNICODE;MSVC_CRT_MEMLEAK_DETECTION;%(PreprocessorDefinitions) + _UNICODE;UNICODE;MSVC_CRT_MEMLEAK_DETECTION;%(PreprocessorDefinitions);DX12_SUPPORT Async stdafx_gui.h $(IntDir)$(TargetName)_gui.pch @@ -140,7 +141,7 @@ true ..\wxWidgets\include\msvc MultiThreadedDLL - WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) + WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions);DX12_SUPPORT false Use Speed From c29616b341e70764f98e5ab3e406646f7613e006 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 18:04:47 +0200 Subject: [PATCH 011/343] d3d12: Fragment Program caching works But not VP... --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 273 ++++++++++++------ .../RSX/D3D12/D3D12ProgramDisassembler.cpp | 268 +++++++++++++++++ .../Emu/RSX/D3D12/D3D12ProgramDisassembler.h | 6 + rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl | 4 - rpcs3/emucore.vcxproj | 6 - rpcs3/emucore.vcxproj.filters | 14 - 6 files changed, 462 insertions(+), 109 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.h delete mode 100644 rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 5c6052bcf3..f51d1a3d30 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -2,6 +2,7 @@ #if defined (DX12_SUPPORT) #include "D3D12PipelineState.h" +#include "D3D12ProgramDisassembler.h" #include "Emu/Memory/vm.h" #include "Utilities/Log.h" #include @@ -10,7 +11,7 @@ #pragma comment (lib, "d3dcompiler.lib") -std::unordered_map > CachedShader; + struct GLBufferInfo { @@ -25,8 +26,6 @@ struct GLBufferInfo Microsoft::WRL::ComPtr vp_bytecode; }; -// Copied from GL implementation - enum class SHADER_TYPE { SHADER_TYPE_VERTEX, @@ -42,26 +41,14 @@ public: Shader() : bytecode(nullptr) {} ~Shader() {} -// GLParamArray parr; - u32 id; - std::string shader; Microsoft::WRL::ComPtr bytecode; + std::vector RSXBinary; /** * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. * @param prog RSXShaderProgram specifying the location and size of the shader in memory */ -// void Decompile(RSXFragmentProgram& prog); - - /** - * Asynchronously decompile a fragment shader located in the PS3's Memory. - * When this function is called you must call Wait() before GetShaderText() will return valid data. - * @param prog RSXShaderProgram specifying the location and size of the shader in memory - */ -// void DecompileAsync(RSXFragmentProgram& prog); - - /** Wait for the decompiler task to complete decompilation. */ -// void Wait(); +// void Decompile(RSXFragmentProgram& prog) /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ void Compile(SHADER_TYPE st) @@ -92,58 +79,171 @@ public: break; } } - -private: - /** Threaded fragment shader decompiler responsible for decompiling this program */ -// GLFragmentDecompilerThread* m_decompiler_thread; - - /** Deletes the shader and any stored information */ -// void Delete(); }; -// Could be improved with an (un)ordered map ? +// Based on +// https://github.com/AlexAltea/nucleus/blob/master/nucleus/gpu/rsx_pgraph.cpp +union qword +{ + u64 dword[2]; + u32 word[4]; +}; + +size_t getVPBinarySize(void *ptr) +{ + const qword *instBuffer = (const qword*)ptr; + size_t instIndex = 0; + while (true) + { + const qword& inst = instBuffer[instIndex]; + bool end = inst.word[0] & 0x1; + if (end) + return (instIndex + 1) * 4; + instIndex++; + } +} + +size_t getFPBinarySize(void *ptr) +{ + const qword *instBuffer = (const qword*)ptr; + size_t instIndex = 0; + while (true) + { + const qword& inst = instBuffer[instIndex]; + bool end = (inst.word[0] >> 8) & 0x1; + if (end) + return (instIndex + 1) * 4; + instIndex++; + } +} + +struct HashVertexProgram +{ + size_t operator()(const void *program) const + { + // 64-bit Fowler/Noll/Vo FNV-1a hash code + size_t hash = 0xCBF29CE484222325ULL; + const qword *instbuffer = (const qword*)program; + size_t instIndex = 0; + bool end = false; + return 0; + while (true) + { + const qword inst = instbuffer[instIndex]; + bool end = inst.word[0] >> 31; + if (end) + return hash; + hash ^= inst.dword[0]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + hash ^= inst.dword[1]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + instIndex++; + } + return 0; + } +}; + +struct HashFragmentProgram +{ + size_t operator()(const void *program) const + { + // 64-bit Fowler/Noll/Vo FNV-1a hash code + size_t hash = 0xCBF29CE484222325ULL; + const qword *instbuffer = (const qword*)program; + size_t instIndex = 0; + while (true) + { + const qword& inst = instbuffer[instIndex]; + bool end = (inst.word[0] >> 8) & 0x1; + if (end) + return hash; + hash ^= inst.dword[0]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + hash ^= inst.dword[1]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + instIndex++; + } + return 0; + } +}; + +struct VertexProgramCompare +{ + bool operator()(const void *binary1, const void *binary2) const + { + const qword *instBuffer1 = (const qword*)binary1; + const qword *instBuffer2 = (const qword*)binary2; + size_t instIndex = 0; + while (true) + { + const qword& inst1 = instBuffer1[instIndex]; + const qword& inst2 = instBuffer2[instIndex]; + bool end = (inst1.word[0] >> 31) && (inst2.word[0] >> 31); + if (end) + return true; + if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) + return false; + instIndex++; + } + } +}; + +struct FragmentProgramCompare +{ + bool operator()(const void *binary1, const void *binary2) const + { + const qword *instBuffer1 = (const qword*)binary1; + const qword *instBuffer2 = (const qword*)binary2; + size_t instIndex = 0; + while (true) + { + const qword& inst1 = instBuffer1[instIndex]; + const qword& inst2 = instBuffer2[instIndex]; + bool end = ((inst1.word[0] >> 8) & 0x1) && ((inst2.word[0] >> 8) & 0x1); + if (end) + return true; + if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) + return false; + instIndex++; + } + } +}; + +typedef std::unordered_map binary2VS; +typedef std::unordered_map binary2FS; +static int tmp = 0; class ProgramBuffer { - std::vector m_buf; public: - int SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) + binary2VS cacheVS; + binary2FS cacheFS; + + bool SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) { - int n = m_buf.size(); - for (int i = 0; i < m_buf.size(); ++i) + binary2FS::const_iterator It = cacheFS.find(vm::get_ptr(rsx_fp.addr)); + if (It != cacheFS.end()) { - if (memcmp(&m_buf[i].fp_data[0], vm::get_ptr(rsx_fp.addr), m_buf[i].fp_data.size()) != 0) continue; - - shader.id = m_buf[i].fp_id; - shader.shader = m_buf[i].fp_shader.c_str(); - shader.bytecode = m_buf[i].fp_bytecode; - - return i; + shader = It->second; + return true; } - - return -1; + return false; } - int SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader) + bool SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader) { - for (u32 i = 0; i < m_buf.size(); ++i) + binary2VS::const_iterator It = cacheVS.find((void*)rsx_vp.data.data()); + if (It != cacheVS.end()) { - if (m_buf[i].vp_data.size() != rsx_vp.data.size()) continue; - if (memcmp(m_buf[i].vp_data.data(), rsx_vp.data.data(), rsx_vp.data.size() * 4) != 0) continue; - - shader.id = m_buf[i].vp_id; - shader.shader = m_buf[i].vp_shader.c_str(); - shader.bytecode = m_buf[i].vp_bytecode; - - return i; + shader = It->second; + return true; } - - return -1; + return false; } - ID3D12PipelineState *GetProg(u32 fp, u32 vp) const +/* ID3D12PipelineState *GetProg(u32 fp, u32 vp) const { if (fp == vp) - { + {*/ /* LOG_NOTICE(RSX, "Get program (%d):", fp); LOG_NOTICE(RSX, "*** prog id = %d", m_buf[fp].prog_id); @@ -153,15 +253,15 @@ public: LOG_NOTICE(RSX, "*** vp shader = \n%s", m_buf[fp].vp_shader.wx_str()); LOG_NOTICE(RSX, "*** fp shader = \n%s", m_buf[fp].fp_shader.wx_str()); */ - return m_buf[fp].prog_id; +/* return m_buf[fp].prog_id; } for (u32 i = 0; i(rsx_fp.addr)); + void *fpShadowCopy = malloc(actualFPSize); + memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); + cacheFS.insert(std::make_pair(fpShadowCopy, fp)); + } void Add(ID3D12PipelineState *prog, Shader& fp, RSXFragmentProgram& rsx_fp, Shader& vp, RSXVertexProgram& rsx_vp) { - GLBufferInfo new_buf = {}; - - LOG_NOTICE(RSX, "Add program (%d):", m_buf.size()); +/* LOG_NOTICE(RSX, "Add program (%d):", m_buf.size()); LOG_NOTICE(RSX, "*** prog id = %x", prog); LOG_NOTICE(RSX, "*** vp id = %d", vp.id); LOG_NOTICE(RSX, "*** fp id = %d", fp.id); @@ -191,23 +308,7 @@ public: LOG_NOTICE(RSX, "*** fp data size = %d", rsx_fp.size); LOG_NOTICE(RSX, "*** vp shader = \n%s", vp.shader.c_str()); - LOG_NOTICE(RSX, "*** fp shader = \n%s", fp.shader.c_str()); - - - new_buf.prog_id = prog; - new_buf.vp_id = vp.id; - new_buf.fp_id = fp.id; - new_buf.fp_bytecode = fp.bytecode; - - new_buf.fp_data.insert(new_buf.fp_data.end(), vm::get_ptr(rsx_fp.addr), vm::get_ptr(rsx_fp.addr + rsx_fp.size)); - new_buf.vp_data = rsx_vp.data; - new_buf.vp_bytecode = vp.bytecode; - - new_buf.vp_shader = vp.shader; - new_buf.fp_shader = fp.shader; - - m_buf.resize(m_buf.size() + 1); - m_buf.push_back(new_buf); + LOG_NOTICE(RSX, "*** fp shader = \n%s", fp.shader.c_str());*/ } }; @@ -216,30 +317,32 @@ static ProgramBuffer g_cachedProgram; D3D12PipelineState::D3D12PipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader) { Shader m_vertex_prog, m_fragment_prog; - int m_fp_buf_num = g_cachedProgram.SearchFp(*fragmentShader, m_fragment_prog); - int m_vp_buf_num = g_cachedProgram.SearchVp(*vertexShader, m_vertex_prog); + bool m_fp_buf_num = g_cachedProgram.SearchFp(*fragmentShader, m_fragment_prog); + bool m_vp_buf_num = g_cachedProgram.SearchVp(*vertexShader, m_vertex_prog); - if (m_fp_buf_num == -1) + if (!m_fp_buf_num) { LOG_WARNING(RSX, "FP not found in buffer!"); -// m_fragment_prog.Decompile(*fragmentShader); +// Decompile(*fragmentShader); m_fragment_prog.Compile(SHADER_TYPE::SHADER_TYPE_FRAGMENT); + g_cachedProgram.AddFragmentProgram(m_fragment_prog, *fragmentShader); // TODO: This shouldn't use current dir -// fs::file("./FragmentProgram.txt", o_write | o_create | o_trunc).write(m_fragment_prog.shader.c_str(), m_fragment_prog.shader.size()); + //fs::file("./FragmentProgram.txt", o_write | o_create | o_trunc).write(m_fragment_prog.shader.c_str(), m_fragment_prog.shader.size()); } - if (m_vp_buf_num == -1) + if (!m_vp_buf_num) { LOG_WARNING(RSX, "VP not found in buffer!"); // m_vertex_prog.Decompile(*vertexShader); m_vertex_prog.Compile(SHADER_TYPE::SHADER_TYPE_VERTEX); + g_cachedProgram.AddVertexProgram(m_vertex_prog, *vertexShader); // TODO: This shouldn't use current dir // fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(m_vertex_prog.shader.c_str(), m_vertex_prog.shader.size()); } - if (m_fp_buf_num != -1 && m_vp_buf_num != -1) +// if (m_fp_buf_num != -1 && m_vp_buf_num != -1) { // m_program.id = m_prog_buffer.GetProg(m_fp_buf_num, m_vp_buf_num); } @@ -280,11 +383,11 @@ D3D12PipelineState::D3D12PipelineState(ID3D12Device *device, RSXVertexProgram *v { D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; - graphicPipelineStateDesc.VS.BytecodeLength = m_vertex_prog.bytecode->GetBufferSize(); +/* graphicPipelineStateDesc.VS.BytecodeLength = m_vertex_prog.bytecode->GetBufferSize(); graphicPipelineStateDesc.VS.pShaderBytecode = m_vertex_prog.bytecode->GetBufferPointer(); graphicPipelineStateDesc.PS.BytecodeLength = m_fragment_prog.bytecode->GetBufferSize(); graphicPipelineStateDesc.PS.pShaderBytecode = m_fragment_prog.bytecode->GetBufferPointer(); - device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&m_pipelineStateObject)); + device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&m_pipelineStateObject));*/ g_cachedProgram.Add(m_pipelineStateObject, m_fragment_prog, *fragmentShader, m_vertex_prog, *vertexShader); /*m_program.Create(m_vertex_prog.id, m_fragment_prog.id); checkForGlError("m_program.Create"); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.cpp new file mode 100644 index 0000000000..af3451cbfc --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.cpp @@ -0,0 +1,268 @@ +#include "stdafx.h" +#if defined (DX12_SUPPORT) +#include "D3D12ProgramDisassembler.h" +#include "Emu/Memory/vm.h" +#include "Utilities/Log.h" + +static u32 GetData(const u32 d) { return d << 16 | d >> 16; } + +void Decompile(RSXFragmentProgram& prog) +{ + auto data = vm::ptr::make(prog.addr); + size_t m_size = 0; + size_t m_location = 0; + size_t m_loop_count = 0; + size_t m_code_level = 1; + + enum + { + FORCE_NONE, + FORCE_SCT, + FORCE_SCB, + }; + + int forced_unit = FORCE_NONE; + + OPDEST operandDST; + + while (true) + { + operandDST.HEX = GetData(data[0]); +/* for (auto finded = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); + finded != m_end_offsets.end(); + finded = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size)) + { + m_end_offsets.erase(finded); + m_code_level--; + AddCode("}"); + m_loop_count--; + }*/ + +/* for (auto finded = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); + finded != m_else_offsets.end(); + finded = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size)) + { + m_else_offsets.erase(finded); + m_code_level--; + AddCode("}"); + AddCode("else"); + AddCode("{"); + m_code_level++; + } + + dst.HEX = GetData(data[0]); + src0.HEX = GetData(data[1]); + src1.HEX = GetData(data[2]); + src2.HEX = GetData(data[3]); + + m_offset = 4 * sizeof(u32); + + const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + + auto SCT = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; + case RSX_FP_OPCODE_DIV: SetDst("($0 / $1)"); break; + case RSX_FP_OPCODE_DIVSQ: SetDst("($0 / sqrt($1))"); break; + case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; + case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; + case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; + case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; + case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; + case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; + case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; + case RSX_FP_OPCODE_MOV: SetDst("$0"); break; + case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; + case RSX_FP_OPCODE_RCP: SetDst("1 / $0"); break; + case RSX_FP_OPCODE_RSQ: SetDst("inversesqrt(abs($0))"); break; + case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; + case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; + case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; + case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; + case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; + case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; + + default: + return false; + } + + return true; + }; + + auto SCB = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; + case RSX_FP_OPCODE_COS: SetDst("cos($0)"); break; + case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; + case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; + case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; + case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; + case RSX_FP_OPCODE_DST: SetDst("vec4(distance($0, $1))"); break; + case RSX_FP_OPCODE_REFL: LOG_ERROR(RSX, "Unimplemented SCB instruction: REFL"); break; // TODO: Is this in the right category? + case RSX_FP_OPCODE_EX2: SetDst("exp2($0)"); break; + case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); break; + case RSX_FP_OPCODE_FRC: SetDst("fract($0)"); break; + case RSX_FP_OPCODE_LIT: SetDst("vec4(1.0, $0.x, ($0.x > 0.0 ? exp($0.w * log2($0.y)) : 0.0), 1.0)"); break; + case RSX_FP_OPCODE_LIF: SetDst("vec4(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); break; + case RSX_FP_OPCODE_LRP: LOG_ERROR(RSX, "Unimplemented SCB instruction: LRP"); break; // TODO: Is this in the right category? + case RSX_FP_OPCODE_LG2: SetDst("log2($0)"); break; + case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; + case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; + case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; + case RSX_FP_OPCODE_MOV: SetDst("$0"); break; + case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; + case RSX_FP_OPCODE_PK2: SetDst("packSnorm2x16($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) + case RSX_FP_OPCODE_PK4: SetDst("packSnorm4x8($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) + case RSX_FP_OPCODE_PK16: LOG_ERROR(RSX, "Unimplemented SCB instruction: PK16"); break; + case RSX_FP_OPCODE_PKB: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKB"); break; + case RSX_FP_OPCODE_PKG: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKG"); break; + case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; + case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; + case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; + case RSX_FP_OPCODE_SIN: SetDst("sin($0)"); break; + case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; + case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; + case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; + + default: + return false; + } + + return true; + }; + + auto TEX_SRB = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_DDX: SetDst("dFdx($0)"); break; + case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; + case RSX_FP_OPCODE_NRM: SetDst("normalize($0)"); break; + case RSX_FP_OPCODE_BEM: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: BEM"); break; + case RSX_FP_OPCODE_TEX: SetDst("texture($t, $0.xy)"); break; + case RSX_FP_OPCODE_TEXBEM: SetDst("texture($t, $0.xy, $1.x)"); break; + case RSX_FP_OPCODE_TXP: SetDst("textureProj($t, $0.xyz, $1.x)"); break; //TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478) and The Simpsons Arcade Game (NPUB30563)) + case RSX_FP_OPCODE_TXPBEM: SetDst("textureProj($t, $0.xyz, $1.x)"); break; + case RSX_FP_OPCODE_TXD: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: TXD"); break; + case RSX_FP_OPCODE_TXB: SetDst("texture($t, $0.xy, $1.x)"); break; + case RSX_FP_OPCODE_TXL: SetDst("textureLod($t, $0.xy, $1.x)"); break; + case RSX_FP_OPCODE_UP2: SetDst("unpackSnorm2x16($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) + case RSX_FP_OPCODE_UP4: SetDst("unpackSnorm4x8($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) + case RSX_FP_OPCODE_UP16: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UP16"); break; + case RSX_FP_OPCODE_UPB: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPB"); break; + case RSX_FP_OPCODE_UPG: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPG"); break; + + default: + return false; + } + + return true; + }; + + auto SIP = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_BRK: SetDst("break"); break; + case RSX_FP_OPCODE_CAL: LOG_ERROR(RSX, "Unimplemented SIP instruction: CAL"); break; + case RSX_FP_OPCODE_FENCT: forced_unit = FORCE_SCT; break; + case RSX_FP_OPCODE_FENCB: forced_unit = FORCE_SCB; break; + case RSX_FP_OPCODE_IFE: + AddCode("if($cond)"); + m_else_offsets.push_back(src1.else_offset << 2); + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + break; + case RSX_FP_OPCODE_LOOP: + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) + { + AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); + } + else + { + AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + m_loop_count++; + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + } + break; + case RSX_FP_OPCODE_REP: + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) + { + AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); + } + else + { + AddCode(fmt::Format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + m_loop_count++; + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + } + break; + case RSX_FP_OPCODE_RET: SetDst("return"); break; + + default: + return false; + } + + return true; + }; + + switch (opcode) + { + case RSX_FP_OPCODE_NOP: break; + case RSX_FP_OPCODE_KIL: SetDst("discard", false); break; + + default: + if (forced_unit == FORCE_NONE) + { + if (SIP()) break; + if (SCT()) break; + if (TEX_SRB()) break; + if (SCB()) break; + } + else if (forced_unit == FORCE_SCT) + { + forced_unit = FORCE_NONE; + if (SCT()) break; + } + else if (forced_unit == FORCE_SCB) + { + forced_unit = FORCE_NONE; + if (SCB()) break; + } + + LOG_ERROR(RSX, "Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, forced_unit); + break; + } + + m_size += m_offset;*/ + + if (operandDST.end) break; + +// assert(m_offset % sizeof(u32) == 0); + data += 4 / sizeof(u32); + } + + // flush m_code_level + m_code_level = 1; +/* m_shader = BuildCode(); + main.clear(); + m_parr.params.clear();*/ +} +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.h b/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.h new file mode 100644 index 0000000000..011cef292e --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.h @@ -0,0 +1,6 @@ +#pragma once +#if defined (DX12_SUPPORT) +#include "Emu/RSX/RSXFragmentProgram.h" + +void Decompile(RSXFragmentProgram& prog); +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl b/rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl deleted file mode 100644 index 68075ef0c2..0000000000 --- a/rpcs3/Emu/RSX/D3D12/DefaultPixelShader.hlsl +++ /dev/null @@ -1,4 +0,0 @@ -float4 main() : SV_TARGET -{ - return float4(1.0f, 1.0f, 1.0f, 1.0f); -} \ No newline at end of file diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 125c782d6f..4d595b4823 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -42,8 +42,6 @@ - - @@ -502,7 +500,6 @@ - @@ -630,9 +627,6 @@ - - - {C4A10229-4712-4BD2-B63E-50D93C67A038} emucore diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 2e2b882156..ca6c9fc43b 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -971,12 +971,6 @@ Emu\GPU\RSX\D3D12 - - Emu\GPU\RSX\D3D12 - - - Emu\GPU\RSX\D3D12 - @@ -1846,13 +1840,5 @@ Emu\GPU\RSX\D3D12 - - Emu\GPU\RSX\D3D12 - - - - - Emu\GPU\RSX\D3D12 - \ No newline at end of file From 0ebc22101100371c29e7070baeeea665b7494a0f Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 18:18:05 +0200 Subject: [PATCH 012/343] d3d12: VS Caching apparently fixed --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index f51d1a3d30..173eb1066d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -89,20 +89,6 @@ union qword u32 word[4]; }; -size_t getVPBinarySize(void *ptr) -{ - const qword *instBuffer = (const qword*)ptr; - size_t instIndex = 0; - while (true) - { - const qword& inst = instBuffer[instIndex]; - bool end = inst.word[0] & 0x1; - if (end) - return (instIndex + 1) * 4; - instIndex++; - } -} - size_t getFPBinarySize(void *ptr) { const qword *instBuffer = (const qword*)ptr; @@ -174,6 +160,7 @@ struct VertexProgramCompare const qword *instBuffer1 = (const qword*)binary1; const qword *instBuffer2 = (const qword*)binary2; size_t instIndex = 0; + return true; while (true) { const qword& inst1 = instBuffer1[instIndex]; @@ -280,13 +267,9 @@ public: void AddVertexProgram(const Shader& vp, RSXVertexProgram& rsx_vp) { - size_t actualVPSize = getVPBinarySize(rsx_vp.data.data()); + size_t actualVPSize = rsx_vp.data.size() * 4; void *fpShadowCopy = malloc(actualVPSize); memcpy(fpShadowCopy, rsx_vp.data.data(), actualVPSize); - int* tmpint = (int*)fpShadowCopy; - if (tmp++) - LOG_WARNING(RSX, "vp:%x %x %x %x\n", tmpint[0], tmpint[1], tmpint[2], tmpint[3]); - cacheVS.insert(std::make_pair(fpShadowCopy, vp)); } From e58292bb1151153e7975f26d0bcc7c2959c43fcf Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 18:38:57 +0200 Subject: [PATCH 013/343] d3d12: PSO caching works --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 +- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 107 ++++++++------------- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 9 +- 4 files changed, 42 insertions(+), 80 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index f9b456ea1d..106ff9143b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -211,7 +211,7 @@ bool D3D12GSRender::LoadProgram() return false; } - m_PSO = new D3D12PipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog); + m_PSO = getGraphicPipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog); return true; } @@ -226,7 +226,7 @@ void D3D12GSRender::ExecCMD() InitDrawBuffers(); - ID3D12CommandList *commandList; +// ID3D12CommandList *commandList; // m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); /* if (m_set_color_mask) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index bbc86c13be..c0a36a241d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -47,7 +47,7 @@ private: // std::vector m_vdata; // std::vector m_post_draw_objs; - D3D12PipelineState *m_PSO; + ID3D12PipelineState *m_PSO; int m_fp_buf_num; int m_vp_buf_num; // GLProgramBuffer m_prog_buffer; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 173eb1066d..4e83f034ae 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -41,6 +41,7 @@ public: Shader() : bytecode(nullptr) {} ~Shader() {} + u32 Id; Microsoft::WRL::ComPtr bytecode; std::vector RSXBinary; @@ -198,13 +199,21 @@ struct FragmentProgramCompare typedef std::unordered_map binary2VS; typedef std::unordered_map binary2FS; -static int tmp = 0; + class ProgramBuffer { public: binary2VS cacheVS; binary2FS cacheFS; + // Key is vertex << 32 | fragment ids + std::unordered_map cachePSO; + + size_t currentShaderId; + + ProgramBuffer() : currentShaderId(0) + {} + bool SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) { binary2FS::const_iterator It = cacheFS.find(vm::get_ptr(rsx_fp.addr)); @@ -227,78 +236,45 @@ public: return false; } -/* ID3D12PipelineState *GetProg(u32 fp, u32 vp) const + ID3D12PipelineState *GetProg(u32 fp, u32 vp) const { - if (fp == vp) - {*/ - /* - LOG_NOTICE(RSX, "Get program (%d):", fp); - LOG_NOTICE(RSX, "*** prog id = %d", m_buf[fp].prog_id); - LOG_NOTICE(RSX, "*** vp id = %d", m_buf[fp].vp_id); - LOG_NOTICE(RSX, "*** fp id = %d", m_buf[fp].fp_id); + u64 key = vp << 32 | fp; + std::unordered_map::const_iterator It = cachePSO.find(key); + if (It == cachePSO.end()) + return nullptr; + return It->second; + } - LOG_NOTICE(RSX, "*** vp shader = \n%s", m_buf[fp].vp_shader.wx_str()); - LOG_NOTICE(RSX, "*** fp shader = \n%s", m_buf[fp].fp_shader.wx_str()); - */ -/* return m_buf[fp].prog_id; - } - - for (u32 i = 0; i(rsx_fp.addr)); void *fpShadowCopy = malloc(actualFPSize); memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); + fp.Id = currentShaderId++; cacheFS.insert(std::make_pair(fpShadowCopy, fp)); } - void Add(ID3D12PipelineState *prog, Shader& fp, RSXFragmentProgram& rsx_fp, Shader& vp, RSXVertexProgram& rsx_vp) + void Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp) { -/* LOG_NOTICE(RSX, "Add program (%d):", m_buf.size()); - LOG_NOTICE(RSX, "*** prog id = %x", prog); - LOG_NOTICE(RSX, "*** vp id = %d", vp.id); - LOG_NOTICE(RSX, "*** fp id = %d", fp.id); - LOG_NOTICE(RSX, "*** vp data size = %d", rsx_vp.data.size() * 4); - LOG_NOTICE(RSX, "*** fp data size = %d", rsx_fp.size); - - LOG_NOTICE(RSX, "*** vp shader = \n%s", vp.shader.c_str()); - LOG_NOTICE(RSX, "*** fp shader = \n%s", fp.shader.c_str());*/ + u64 key = vp.Id << 32 | fp.Id; + cachePSO.insert(std::make_pair(key, prog)); } }; static ProgramBuffer g_cachedProgram; -D3D12PipelineState::D3D12PipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader) +ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader) { + ID3D12PipelineState *result = nullptr; Shader m_vertex_prog, m_fragment_prog; bool m_fp_buf_num = g_cachedProgram.SearchFp(*fragmentShader, m_fragment_prog); bool m_vp_buf_num = g_cachedProgram.SearchVp(*vertexShader, m_vertex_prog); @@ -325,13 +301,12 @@ D3D12PipelineState::D3D12PipelineState(ID3D12Device *device, RSXVertexProgram *v // fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(m_vertex_prog.shader.c_str(), m_vertex_prog.shader.size()); } -// if (m_fp_buf_num != -1 && m_vp_buf_num != -1) - { -// m_program.id = m_prog_buffer.GetProg(m_fp_buf_num, m_vp_buf_num); - } + if (m_fp_buf_num && m_vp_buf_num) + result = g_cachedProgram.GetProg(m_fragment_prog.Id, m_vertex_prog.Id); - if (false)//m_program.id) + if (result != nullptr) { + return result; /* // RSX Debugger: Check if this program was modified and update it if (Ini.GSLogPrograms.GetValue()) { @@ -364,22 +339,21 @@ D3D12PipelineState::D3D12PipelineState(ID3D12Device *device, RSXVertexProgram *v } else { + LOG_WARNING(RSX, "Add program :"); + LOG_WARNING(RSX, "*** vp id = %d", m_vertex_prog.Id); + LOG_WARNING(RSX, "*** fp id = %d", m_fragment_prog.Id); + D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; -/* graphicPipelineStateDesc.VS.BytecodeLength = m_vertex_prog.bytecode->GetBufferSize(); + graphicPipelineStateDesc.VS.BytecodeLength = m_vertex_prog.bytecode->GetBufferSize(); graphicPipelineStateDesc.VS.pShaderBytecode = m_vertex_prog.bytecode->GetBufferPointer(); graphicPipelineStateDesc.PS.BytecodeLength = m_fragment_prog.bytecode->GetBufferSize(); graphicPipelineStateDesc.PS.pShaderBytecode = m_fragment_prog.bytecode->GetBufferPointer(); - device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&m_pipelineStateObject));*/ - g_cachedProgram.Add(m_pipelineStateObject, m_fragment_prog, *fragmentShader, m_vertex_prog, *vertexShader); - /*m_program.Create(m_vertex_prog.id, m_fragment_prog.id); - checkForGlError("m_program.Create"); - m_prog_buffer.Add(m_program, m_fragment_prog, *m_cur_fragment_prog, m_vertex_prog, *m_cur_vertex_prog); - checkForGlError("m_prog_buffer.Add"); - m_program.Use(); + device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result)); + g_cachedProgram.Add(result, m_fragment_prog, m_vertex_prog); // RSX Debugger - if (Ini.GSLogPrograms.GetValue()) + /*if (Ini.GSLogPrograms.GetValue()) { RSXDebuggerProgram program; program.id = m_program.id; @@ -392,10 +366,5 @@ D3D12PipelineState::D3D12PipelineState(ID3D12Device *device, RSXVertexProgram *v } } -D3D12PipelineState::~D3D12PipelineState() -{ - -} - #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 1916f1768c..f4b428f2a5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -5,13 +5,6 @@ #include "Emu/RSX/RSXFragmentProgram.h" #include "Emu/RSX/RSXVertexProgram.h" -class D3D12PipelineState -{ - ID3D12PipelineState *m_pipelineStateObject; - ID3D12RootSignature *m_rootSignature; -public: - D3D12PipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader); - ~D3D12PipelineState(); -}; +ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader); #endif \ No newline at end of file From 2734c987165fa481658b41098e9c79e987a68fac Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 18:48:07 +0200 Subject: [PATCH 014/343] d3d12: PSO related reorganisation --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 376 ++++++--------------- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 155 ++++++++- 4 files changed, 265 insertions(+), 269 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 106ff9143b..a1b6d44602 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -211,7 +211,7 @@ bool D3D12GSRender::LoadProgram() return false; } - m_PSO = getGraphicPipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog); + m_PSO = cachePSO.getGraphicPipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog); return true; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index c0a36a241d..a1ee7947e8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -47,6 +47,7 @@ private: // std::vector m_vdata; // std::vector m_post_draw_objs; + PipelineStateObjectCache cachePSO; ID3D12PipelineState *m_PSO; int m_fp_buf_num; int m_vp_buf_num; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 4e83f034ae..7e74641c49 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -12,84 +12,6 @@ #pragma comment (lib, "d3dcompiler.lib") - -struct GLBufferInfo -{ - ID3D12PipelineState *prog_id; - u32 fp_id; - u32 vp_id; - std::vector fp_data; - std::vector vp_data; - std::string fp_shader; - std::string vp_shader; - Microsoft::WRL::ComPtr fp_bytecode; - Microsoft::WRL::ComPtr vp_bytecode; -}; - -enum class SHADER_TYPE -{ - SHADER_TYPE_VERTEX, - SHADER_TYPE_FRAGMENT -}; - -/** Storage for a shader -* Embeds the D3DBlob corresponding to -*/ -class Shader -{ -public: - Shader() : bytecode(nullptr) {} - ~Shader() {} - - u32 Id; - Microsoft::WRL::ComPtr bytecode; - std::vector RSXBinary; - - /** - * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. - * @param prog RSXShaderProgram specifying the location and size of the shader in memory - */ -// void Decompile(RSXFragmentProgram& prog) - - /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ - void Compile(SHADER_TYPE st) - { - static const char VSstring[] = - "float4 main(float4 pos : POSITION) : SV_POSITION" - "{" - " return pos;" - "}"; - static const char FSstring[] = - "float4 main() : SV_TARGET" - "{" - "return float4(1.0f, 1.0f, 1.0f, 1.0f);" - "}"; - HRESULT hr; - Microsoft::WRL::ComPtr errorBlob; - switch (st) - { - case SHADER_TYPE::SHADER_TYPE_VERTEX: - hr = D3DCompile(VSstring, sizeof(VSstring), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, bytecode.GetAddressOf(), errorBlob.GetAddressOf()); - if (hr != S_OK) - LOG_ERROR(RSX, "VS build failed:%s", errorBlob->GetBufferPointer()); - break; - case SHADER_TYPE::SHADER_TYPE_FRAGMENT: - hr = D3DCompile(FSstring, sizeof(FSstring), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, bytecode.GetAddressOf(), errorBlob.GetAddressOf()); - if (hr != S_OK) - LOG_ERROR(RSX, "FS build failed:%s", errorBlob->GetBufferPointer()); - break; - } - } -}; - -// Based on -// https://github.com/AlexAltea/nucleus/blob/master/nucleus/gpu/rsx_pgraph.cpp -union qword -{ - u64 dword[2]; - u32 word[4]; -}; - size_t getFPBinarySize(void *ptr) { const qword *instBuffer = (const qword*)ptr; @@ -104,187 +26,78 @@ size_t getFPBinarySize(void *ptr) } } -struct HashVertexProgram -{ - size_t operator()(const void *program) const - { - // 64-bit Fowler/Noll/Vo FNV-1a hash code - size_t hash = 0xCBF29CE484222325ULL; - const qword *instbuffer = (const qword*)program; - size_t instIndex = 0; - bool end = false; - return 0; - while (true) - { - const qword inst = instbuffer[instIndex]; - bool end = inst.word[0] >> 31; - if (end) - return hash; - hash ^= inst.dword[0]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - hash ^= inst.dword[1]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - instIndex++; - } - return 0; - } -}; -struct HashFragmentProgram -{ - size_t operator()(const void *program) const - { - // 64-bit Fowler/Noll/Vo FNV-1a hash code - size_t hash = 0xCBF29CE484222325ULL; - const qword *instbuffer = (const qword*)program; - size_t instIndex = 0; - while (true) - { - const qword& inst = instbuffer[instIndex]; - bool end = (inst.word[0] >> 8) & 0x1; - if (end) - return hash; - hash ^= inst.dword[0]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - hash ^= inst.dword[1]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - instIndex++; - } - return 0; - } -}; +PipelineStateObjectCache::PipelineStateObjectCache() : currentShaderId(0) +{} -struct VertexProgramCompare +bool PipelineStateObjectCache::SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) { - bool operator()(const void *binary1, const void *binary2) const + binary2FS::const_iterator It = cacheFS.find(vm::get_ptr(rsx_fp.addr)); + if (It != cacheFS.end()) { - const qword *instBuffer1 = (const qword*)binary1; - const qword *instBuffer2 = (const qword*)binary2; - size_t instIndex = 0; + shader = It->second; return true; - while (true) - { - const qword& inst1 = instBuffer1[instIndex]; - const qword& inst2 = instBuffer2[instIndex]; - bool end = (inst1.word[0] >> 31) && (inst2.word[0] >> 31); - if (end) - return true; - if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) - return false; - instIndex++; - } } -}; + return false; +} -struct FragmentProgramCompare +bool PipelineStateObjectCache::SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader) { - bool operator()(const void *binary1, const void *binary2) const + binary2VS::const_iterator It = cacheVS.find((void*)rsx_vp.data.data()); + if (It != cacheVS.end()) { - const qword *instBuffer1 = (const qword*)binary1; - const qword *instBuffer2 = (const qword*)binary2; - size_t instIndex = 0; - while (true) - { - const qword& inst1 = instBuffer1[instIndex]; - const qword& inst2 = instBuffer2[instIndex]; - bool end = ((inst1.word[0] >> 8) & 0x1) && ((inst2.word[0] >> 8) & 0x1); - if (end) - return true; - if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) - return false; - instIndex++; - } + shader = It->second; + return true; } -}; + return false; +} -typedef std::unordered_map binary2VS; -typedef std::unordered_map binary2FS; - -class ProgramBuffer +ID3D12PipelineState *PipelineStateObjectCache::GetProg(u32 fp, u32 vp) const { -public: - binary2VS cacheVS; - binary2FS cacheFS; + u64 key = vp << 32 | fp; + std::unordered_map::const_iterator It = cachePSO.find(key); + if (It == cachePSO.end()) + return nullptr; + return It->second; +} - // Key is vertex << 32 | fragment ids - std::unordered_map cachePSO; +void PipelineStateObjectCache::AddVertexProgram(Shader& vp, RSXVertexProgram& rsx_vp) +{ + size_t actualVPSize = rsx_vp.data.size() * 4; + void *fpShadowCopy = malloc(actualVPSize); + memcpy(fpShadowCopy, rsx_vp.data.data(), actualVPSize); + vp.Id = currentShaderId++; + cacheVS.insert(std::make_pair(fpShadowCopy, vp)); +} - size_t currentShaderId; +void PipelineStateObjectCache::AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp) +{ + size_t actualFPSize = getFPBinarySize(vm::get_ptr(rsx_fp.addr)); + void *fpShadowCopy = malloc(actualFPSize); + memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); + fp.Id = currentShaderId++; + cacheFS.insert(std::make_pair(fpShadowCopy, fp)); +} - ProgramBuffer() : currentShaderId(0) - {} +void PipelineStateObjectCache::Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp) +{ + u64 key = vp.Id << 32 | fp.Id; + cachePSO.insert(std::make_pair(key, prog)); +} - bool SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) - { - binary2FS::const_iterator It = cacheFS.find(vm::get_ptr(rsx_fp.addr)); - if (It != cacheFS.end()) - { - shader = It->second; - return true; - } - return false; - } - - bool SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader) - { - binary2VS::const_iterator It = cacheVS.find((void*)rsx_vp.data.data()); - if (It != cacheVS.end()) - { - shader = It->second; - return true; - } - return false; - } - - ID3D12PipelineState *GetProg(u32 fp, u32 vp) const - { - u64 key = vp << 32 | fp; - std::unordered_map::const_iterator It = cachePSO.find(key); - if (It == cachePSO.end()) - return nullptr; - return It->second; - } - - void AddVertexProgram(Shader& vp, RSXVertexProgram& rsx_vp) - { - size_t actualVPSize = rsx_vp.data.size() * 4; - void *fpShadowCopy = malloc(actualVPSize); - memcpy(fpShadowCopy, rsx_vp.data.data(), actualVPSize); - vp.Id = currentShaderId++; - cacheVS.insert(std::make_pair(fpShadowCopy, vp)); - } - - void AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp) - { - size_t actualFPSize = getFPBinarySize(vm::get_ptr(rsx_fp.addr)); - void *fpShadowCopy = malloc(actualFPSize); - memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); - fp.Id = currentShaderId++; - cacheFS.insert(std::make_pair(fpShadowCopy, fp)); - } - - void Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp) - { - u64 key = vp.Id << 32 | fp.Id; - cachePSO.insert(std::make_pair(key, prog)); - } -}; - -static ProgramBuffer g_cachedProgram; - -ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader) +ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader) { ID3D12PipelineState *result = nullptr; Shader m_vertex_prog, m_fragment_prog; - bool m_fp_buf_num = g_cachedProgram.SearchFp(*fragmentShader, m_fragment_prog); - bool m_vp_buf_num = g_cachedProgram.SearchVp(*vertexShader, m_vertex_prog); + bool m_fp_buf_num = SearchFp(*fragmentShader, m_fragment_prog); + bool m_vp_buf_num = SearchVp(*vertexShader, m_vertex_prog); if (!m_fp_buf_num) { LOG_WARNING(RSX, "FP not found in buffer!"); -// Decompile(*fragmentShader); + // Decompile(*fragmentShader); m_fragment_prog.Compile(SHADER_TYPE::SHADER_TYPE_FRAGMENT); - g_cachedProgram.AddFragmentProgram(m_fragment_prog, *fragmentShader); + AddFragmentProgram(m_fragment_prog, *fragmentShader); // TODO: This shouldn't use current dir //fs::file("./FragmentProgram.txt", o_write | o_create | o_trunc).write(m_fragment_prog.shader.c_str(), m_fragment_prog.shader.size()); @@ -293,49 +106,49 @@ ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProg if (!m_vp_buf_num) { LOG_WARNING(RSX, "VP not found in buffer!"); -// m_vertex_prog.Decompile(*vertexShader); + // m_vertex_prog.Decompile(*vertexShader); m_vertex_prog.Compile(SHADER_TYPE::SHADER_TYPE_VERTEX); - g_cachedProgram.AddVertexProgram(m_vertex_prog, *vertexShader); + AddVertexProgram(m_vertex_prog, *vertexShader); // TODO: This shouldn't use current dir // fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(m_vertex_prog.shader.c_str(), m_vertex_prog.shader.size()); } if (m_fp_buf_num && m_vp_buf_num) - result = g_cachedProgram.GetProg(m_fragment_prog.Id, m_vertex_prog.Id); + result = GetProg(m_fragment_prog.Id, m_vertex_prog.Id); if (result != nullptr) { return result; -/* // RSX Debugger: Check if this program was modified and update it - if (Ini.GSLogPrograms.GetValue()) - { - for (auto& program : m_debug_programs) - { - if (program.id == m_program.id && program.modified) + /* // RSX Debugger: Check if this program was modified and update it + if (Ini.GSLogPrograms.GetValue()) { - // TODO: This isn't working perfectly. Is there any better/shorter way to update the program - m_vertex_prog.shader = program.vp_shader; - m_fragment_prog.shader = program.fp_shader; - m_vertex_prog.Wait(); - m_vertex_prog.Compile(); - checkForGlError("m_vertex_prog.Compile"); - m_fragment_prog.Wait(); - m_fragment_prog.Compile(); - checkForGlError("m_fragment_prog.Compile"); - glAttachShader(m_program.id, m_vertex_prog.id); - glAttachShader(m_program.id, m_fragment_prog.id); - glLinkProgram(m_program.id); - checkForGlError("glLinkProgram"); - glDetachShader(m_program.id, m_vertex_prog.id); - glDetachShader(m_program.id, m_fragment_prog.id); - program.vp_id = m_vertex_prog.id; - program.fp_id = m_fragment_prog.id; - program.modified = false; + for (auto& program : m_debug_programs) + { + if (program.id == m_program.id && program.modified) + { + // TODO: This isn't working perfectly. Is there any better/shorter way to update the program + m_vertex_prog.shader = program.vp_shader; + m_fragment_prog.shader = program.fp_shader; + m_vertex_prog.Wait(); + m_vertex_prog.Compile(); + checkForGlError("m_vertex_prog.Compile"); + m_fragment_prog.Wait(); + m_fragment_prog.Compile(); + checkForGlError("m_fragment_prog.Compile"); + glAttachShader(m_program.id, m_vertex_prog.id); + glAttachShader(m_program.id, m_fragment_prog.id); + glLinkProgram(m_program.id); + checkForGlError("glLinkProgram"); + glDetachShader(m_program.id, m_vertex_prog.id); + glDetachShader(m_program.id, m_fragment_prog.id); + program.vp_id = m_vertex_prog.id; + program.fp_id = m_fragment_prog.id; + program.modified = false; + } + } } - } - } - m_program.Use();*/ + m_program.Use();*/ } else { @@ -350,7 +163,7 @@ ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProg graphicPipelineStateDesc.PS.BytecodeLength = m_fragment_prog.bytecode->GetBufferSize(); graphicPipelineStateDesc.PS.pShaderBytecode = m_fragment_prog.bytecode->GetBufferPointer(); device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result)); - g_cachedProgram.Add(result, m_fragment_prog, m_vertex_prog); + Add(result, m_fragment_prog, m_vertex_prog); // RSX Debugger /*if (Ini.GSLogPrograms.GetValue()) @@ -367,4 +180,33 @@ ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProg } -#endif \ No newline at end of file +#endif + +void Shader::Compile(SHADER_TYPE st) +{ + static const char VSstring[] = + "float4 main(float4 pos : POSITION) : SV_POSITION" + "{" + " return pos;" + "}"; + static const char FSstring[] = + "float4 main() : SV_TARGET" + "{" + "return float4(1.0f, 1.0f, 1.0f, 1.0f);" + "}"; + HRESULT hr; + Microsoft::WRL::ComPtr errorBlob; + switch (st) + { + case SHADER_TYPE::SHADER_TYPE_VERTEX: + hr = D3DCompile(VSstring, sizeof(VSstring), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + LOG_ERROR(RSX, "VS build failed:%s", errorBlob->GetBufferPointer()); + break; + case SHADER_TYPE::SHADER_TYPE_FRAGMENT: + hr = D3DCompile(FSstring, sizeof(FSstring), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + LOG_ERROR(RSX, "FS build failed:%s", errorBlob->GetBufferPointer()); + break; + } +} diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index f4b428f2a5..81aca20d00 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -5,6 +5,159 @@ #include "Emu/RSX/RSXFragmentProgram.h" #include "Emu/RSX/RSXVertexProgram.h" -ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader); + +enum class SHADER_TYPE +{ + SHADER_TYPE_VERTEX, + SHADER_TYPE_FRAGMENT +}; + +/** Storage for a shader +* Embeds the D3DBlob corresponding to +*/ +class Shader +{ +public: + Shader() : bytecode(nullptr) {} + ~Shader() {} + + u32 Id; + ID3DBlob *bytecode; + + /** + * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. + * @param prog RSXShaderProgram specifying the location and size of the shader in memory + */ + // void Decompile(RSXFragmentProgram& prog) + + /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ + void Compile(SHADER_TYPE st); +}; + +// Based on +// https://github.com/AlexAltea/nucleus/blob/master/nucleus/gpu/rsx_pgraph.cpp +union qword +{ + u64 dword[2]; + u32 word[4]; +}; + +struct HashVertexProgram +{ + size_t operator()(const void *program) const + { + // 64-bit Fowler/Noll/Vo FNV-1a hash code + size_t hash = 0xCBF29CE484222325ULL; + const qword *instbuffer = (const qword*)program; + size_t instIndex = 0; + bool end = false; + return 0; + while (true) + { + const qword inst = instbuffer[instIndex]; + bool end = inst.word[0] >> 31; + if (end) + return hash; + hash ^= inst.dword[0]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + hash ^= inst.dword[1]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + instIndex++; + } + return 0; + } +}; + +struct HashFragmentProgram +{ + size_t operator()(const void *program) const + { + // 64-bit Fowler/Noll/Vo FNV-1a hash code + size_t hash = 0xCBF29CE484222325ULL; + const qword *instbuffer = (const qword*)program; + size_t instIndex = 0; + while (true) + { + const qword& inst = instbuffer[instIndex]; + bool end = (inst.word[0] >> 8) & 0x1; + if (end) + return hash; + hash ^= inst.dword[0]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + hash ^= inst.dword[1]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + instIndex++; + } + return 0; + } +}; + +struct VertexProgramCompare +{ + bool operator()(const void *binary1, const void *binary2) const + { + const qword *instBuffer1 = (const qword*)binary1; + const qword *instBuffer2 = (const qword*)binary2; + size_t instIndex = 0; + return true; + while (true) + { + const qword& inst1 = instBuffer1[instIndex]; + const qword& inst2 = instBuffer2[instIndex]; + bool end = (inst1.word[0] >> 31) && (inst2.word[0] >> 31); + if (end) + return true; + if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) + return false; + instIndex++; + } + } +}; + +struct FragmentProgramCompare +{ + bool operator()(const void *binary1, const void *binary2) const + { + const qword *instBuffer1 = (const qword*)binary1; + const qword *instBuffer2 = (const qword*)binary2; + size_t instIndex = 0; + while (true) + { + const qword& inst1 = instBuffer1[instIndex]; + const qword& inst2 = instBuffer2[instIndex]; + bool end = ((inst1.word[0] >> 8) & 0x1) && ((inst2.word[0] >> 8) & 0x1); + if (end) + return true; + if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) + return false; + instIndex++; + } + } +}; + +typedef std::unordered_map binary2VS; +typedef std::unordered_map binary2FS; + +class PipelineStateObjectCache +{ +private: + size_t currentShaderId; + binary2VS cacheVS; + binary2FS cacheFS; + // Key is vertex << 32 | fragment ids + std::unordered_map cachePSO; + + bool SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader); + bool SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader); + ID3D12PipelineState *GetProg(u32 fp, u32 vp) const; + void AddVertexProgram(Shader& vp, RSXVertexProgram& rsx_vp); + void AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp); + void Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp); +public: + PipelineStateObjectCache(); + ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader); +}; + + #endif \ No newline at end of file From 34bf82d81caab1e33877c86869bd9ee44786d290 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 18:52:32 +0200 Subject: [PATCH 015/343] d3d12: Add sensible default to PSO --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 7e74641c49..50762990f2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -162,6 +162,50 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev graphicPipelineStateDesc.VS.pShaderBytecode = m_vertex_prog.bytecode->GetBufferPointer(); graphicPipelineStateDesc.PS.BytecodeLength = m_fragment_prog.bytecode->GetBufferSize(); graphicPipelineStateDesc.PS.pShaderBytecode = m_fragment_prog.bytecode->GetBufferPointer(); + + // Sensible default value + static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = + { + D3D12_FILL_MODE_SOLID, + D3D12_CULL_MODE_BACK, + FALSE, + D3D12_DEFAULT_DEPTH_BIAS, + D3D12_DEFAULT_DEPTH_BIAS_CLAMP, + D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, + TRUE, + FALSE, + FALSE, + 0, + D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, + }; + + static D3D12_DEPTH_STENCIL_DESC CD3D12_DEPTH_STENCIL_DESC = + { + TRUE, + D3D12_DEPTH_WRITE_MASK_ALL, + D3D12_COMPARISON_FUNC_LESS_EQUAL, + FALSE, + D3D12_DEFAULT_STENCIL_READ_MASK, + D3D12_DEFAULT_STENCIL_WRITE_MASK, + }; + + static D3D12_BLEND_DESC CD3D12_BLEND_DESC = + { + FALSE, + FALSE, + { + FALSE,FALSE, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, + } + }; + + graphicPipelineStateDesc.BlendState = CD3D12_BLEND_DESC; + graphicPipelineStateDesc.DepthStencilState = CD3D12_DEPTH_STENCIL_DESC; + graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; + device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result)); Add(result, m_fragment_prog, m_vertex_prog); From c6e2f070ee95985b7848dad67598a4a9becf9031 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 19:08:39 +0200 Subject: [PATCH 016/343] d3d12: Some fix for creation of PSO --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 25 +++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 50762990f2..af1f7d9aa9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -152,9 +152,9 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev } else { - LOG_WARNING(RSX, "Add program :"); +/* LOG_WARNING(RSX, "Add program :"); LOG_WARNING(RSX, "*** vp id = %d", m_vertex_prog.Id); - LOG_WARNING(RSX, "*** fp id = %d", m_fragment_prog.Id); + LOG_WARNING(RSX, "*** fp id = %d", m_fragment_prog.Id);*/ D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; @@ -205,6 +205,17 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev graphicPipelineStateDesc.BlendState = CD3D12_BLEND_DESC; graphicPipelineStateDesc.DepthStencilState = CD3D12_DEPTH_STENCIL_DESC; graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; + graphicPipelineStateDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + graphicPipelineStateDesc.NumRenderTargets = 1; + graphicPipelineStateDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + graphicPipelineStateDesc.DSVFormat = DXGI_FORMAT_D16_UNORM; + +// graphicPipelineStateDesc.InputLayout.pInputElementDescs = VTXLayout::getInputAssemblyLayout(); +// graphicPipelineStateDesc.InputLayout.NumElements = (UINT)VTXLayout::getInputAssemblySize(); + graphicPipelineStateDesc.SampleDesc.Count = 1; + graphicPipelineStateDesc.SampleMask = UINT_MAX; + graphicPipelineStateDesc.NodeMask = 1; device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result)); Add(result, m_fragment_prog, m_vertex_prog); @@ -229,11 +240,15 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev void Shader::Compile(SHADER_TYPE st) { static const char VSstring[] = - "float4 main(float4 pos : POSITION) : SV_POSITION" - "{" - " return pos;" + "#define RS \"RootFlags( ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)\"\n" + "[RootSignature(RS)]\n" + "float4 main(float4 pos : POSITION) : SV_POSITION\n" + "{\n" + " return pos;\n" "}"; static const char FSstring[] = + "#define RS \"RootFlags( ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)\"\n" + "[RootSignature(RS)]\n" "float4 main() : SV_TARGET" "{" "return float4(1.0f, 1.0f, 1.0f, 1.0f);" From d2889786a224b4416d1223f6441cb4245fb32b1f Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 19:18:02 +0200 Subject: [PATCH 017/343] d3d12: Some cleaning --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 3 +++ rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index af1f7d9aa9..e7010aee3a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -30,6 +30,9 @@ size_t getFPBinarySize(void *ptr) PipelineStateObjectCache::PipelineStateObjectCache() : currentShaderId(0) {} +PipelineStateObjectCache::~PipelineStateObjectCache() +{} + bool PipelineStateObjectCache::SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) { binary2FS::const_iterator It = cacheFS.find(vm::get_ptr(rsx_fp.addr)); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 81aca20d00..2b8edf061a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -138,6 +138,10 @@ struct FragmentProgramCompare typedef std::unordered_map binary2VS; typedef std::unordered_map binary2FS; +/** + * Cache for shader blobs and Pipeline state object + * The class is responsible for creating the object so the state only has to call getGraphicPipelineState + */ class PipelineStateObjectCache { private: @@ -155,6 +159,7 @@ private: void Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp); public: PipelineStateObjectCache(); + ~PipelineStateObjectCache(); ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader); }; From 722e6b8ac5791551d2ee99d0a08280ab48d49407 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 20:20:58 +0200 Subject: [PATCH 018/343] d3d12: Fix some warning and start adding IALayout support --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 4 + rpcs3/Emu/RSX/D3D12/D3D12Buffer.h | 4 + rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 295 +++++++++++++++++++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 20 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 25 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 3 +- rpcs3/emucore.vcxproj.filters | 6 + 7 files changed, 315 insertions(+), 42 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12Buffer.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp new file mode 100644 index 0000000000..643a4d4081 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -0,0 +1,4 @@ +#include "stdafx.h" +#if defined(DX12_SUPPORT) + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h new file mode 100644 index 0000000000..1a857ae62c --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h @@ -0,0 +1,4 @@ +#pragma once +#if defined(DX12_SUPPORT) + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a1b6d44602..8c140c8b1c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -194,6 +194,267 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } +// Where are these type defined ??? +static +DXGI_FORMAT getFormat(u8 type, u8 size) +{ + /*static const u32 gl_types[] = + { + GL_SHORT, + GL_FLOAT, + GL_HALF_FLOAT, + GL_UNSIGNED_BYTE, + GL_SHORT, + GL_FLOAT, // Needs conversion + GL_UNSIGNED_BYTE, + }; + + static const bool gl_normalized[] = + { + GL_TRUE, + GL_FALSE, + GL_FALSE, + GL_TRUE, + GL_FALSE, + GL_TRUE, + GL_FALSE, + };*/ + static const DXGI_FORMAT typeX1[] = + { + DXGI_FORMAT_R16_SNORM, + DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R16_FLOAT, + DXGI_FORMAT_R8_UNORM, + DXGI_FORMAT_R16_SINT, + DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R8_UINT + }; + static const DXGI_FORMAT typeX2[] = + { + DXGI_FORMAT_R16G16_SNORM, + DXGI_FORMAT_R32G32_FLOAT, + DXGI_FORMAT_R16G16_FLOAT, + DXGI_FORMAT_R8G8_UNORM, + DXGI_FORMAT_R16G16_SINT, + DXGI_FORMAT_R32G32_FLOAT, + DXGI_FORMAT_R8G8_UINT + }; + static const DXGI_FORMAT typeX3[] = + { + DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R8G8B8A8_UINT + }; + static const DXGI_FORMAT typeX4[] = + { + DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R32G32B32A32_FLOAT, + DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R32G32B32A32_FLOAT, + DXGI_FORMAT_R8G8B8A8_UINT + }; + + switch (size) + { + case 1: + return typeX1[type]; + case 2: + return typeX2[type]; + case 3: + return typeX3[type]; + case 4: + return typeX4[type]; + } +} + +void D3D12GSRender::EnableVertexData(bool indexed_draw) +{ + static u32 offset_list[m_vertex_count]; + u32 cur_offset = 0; + + const u32 data_offset = indexed_draw ? 0 : m_draw_array_first; + + for (u32 i = 0; i < m_vertex_count; ++i) + { + offset_list[i] = cur_offset; + + if (!m_vertex_data[i].IsEnabled()) continue; + const size_t item_size = m_vertex_data[i].GetTypeSize() * m_vertex_data[i].size; + const size_t data_size = m_vertex_data[i].data.size() - data_offset * item_size; + const u32 pos = m_vdata.size(); + + cur_offset += data_size; + m_vdata.resize(m_vdata.size() + data_size); + memcpy(&m_vdata[pos], &m_vertex_data[i].data[data_offset * item_size], data_size); + } + +/* m_vao.Create(); + m_vao.Bind(); + checkForGlError("initializing vao"); + + m_vbo.Create(indexed_draw ? 2 : 1); + m_vbo.Bind(0); + m_vbo.SetData(m_vdata.data(), m_vdata.size());*/ + + if (indexed_draw) + { + // TODO: Use default heap and upload data + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; + + D3D12_RESOURCE_DESC resDesc = {}; + resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resDesc.Width = (UINT)m_indexed_array.m_data.size(); + resDesc.Height = 1; + resDesc.DepthOrArraySize = 1; + resDesc.SampleDesc.Count = 1; + check(m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_indexBuffer) + )); + void *indexBufferMap; + check(m_indexBuffer->Map(0, nullptr, (void**)indexBufferMap)); + memcpy(indexBufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size()); + m_indexBuffer->Unmap(0, nullptr); + + D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; + indexBufferView.SizeInBytes = (UINT)m_indexed_array.m_data.size(); + indexBufferView.BufferLocation = m_indexBuffer->GetGPUVirtualAddress(); + } + +#if DUMP_VERTEX_DATA + rFile dump("VertexDataArray.dump", rFile::write); +#endif + + m_IASet.clear(); + + for (u32 i = 0; i < m_vertex_count; ++i) + { + if (!m_vertex_data[i].IsEnabled()) continue; + +#if DUMP_VERTEX_DATA + dump.Write(wxString::Format("VertexData[%d]:\n", i)); + switch (m_vertex_data[i].type) + { + case CELL_GCM_VERTEX_S1: + for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) + { + dump.Write(wxString::Format("%d\n", *(u16*)&m_vertex_data[i].data[j])); + if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + case CELL_GCM_VERTEX_F: + for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 4) + { + dump.Write(wxString::Format("%.01f\n", *(float*)&m_vertex_data[i].data[j])); + if (!(((j + 4) / 4) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + case CELL_GCM_VERTEX_SF: + for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) + { + dump.Write(wxString::Format("%.01f\n", *(float*)&m_vertex_data[i].data[j])); + if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + case CELL_GCM_VERTEX_UB: + for (u32 j = 0; j < m_vertex_data[i].data.size(); ++j) + { + dump.Write(wxString::Format("%d\n", m_vertex_data[i].data[j])); + if (!((j + 1) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + case CELL_GCM_VERTEX_S32K: + for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) + { + dump.Write(wxString::Format("%d\n", *(u16*)&m_vertex_data[i].data[j])); + if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + // case CELL_GCM_VERTEX_CMP: + + case CELL_GCM_VERTEX_UB256: + for (u32 j = 0; j < m_vertex_data[i].data.size(); ++j) + { + dump.Write(wxString::Format("%d\n", m_vertex_data[i].data[j])); + if (!((j + 1) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + default: + LOG_ERROR(HLE, "Bad cv type! %d", m_vertex_data[i].type); + return; + } + + dump.Write("\n"); +#endif + + if (m_vertex_data[i].type < 1 || m_vertex_data[i].type > 7) + { + LOG_ERROR(RSX, "GLGSRender::EnableVertexData: Bad vertex data type (%d)!", m_vertex_data[i].type); + } + + D3D12_INPUT_ELEMENT_DESC IAElement = {}; +/* if (!m_vertex_data[i].addr) + { + switch (m_vertex_data[i].type) + { + case CELL_GCM_VERTEX_S32K: + case CELL_GCM_VERTEX_S1: + switch (m_vertex_data[i].size) + { + case 1: glVertexAttrib1s(i, (GLshort&)m_vertex_data[i].data[0]); break; + case 2: glVertexAttrib2sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; + case 3: glVertexAttrib3sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; + case 4: glVertexAttrib4sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; + } + break; + + case CELL_GCM_VERTEX_F: + switch (m_vertex_data[i].size) + { + case 1: glVertexAttrib1f(i, (GLfloat&)m_vertex_data[i].data[0]); break; + case 2: glVertexAttrib2fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; + case 3: glVertexAttrib3fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; + case 4: glVertexAttrib4fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; + } + break; + + case CELL_GCM_VERTEX_CMP: + case CELL_GCM_VERTEX_UB: + glVertexAttrib4ubv(i, (GLubyte*)&m_vertex_data[i].data[0]); + break; + } + + checkForGlError("glVertexAttrib"); + } + else*/ + { + IAElement.SemanticName = "TEXCOORD"; + IAElement.SemanticIndex = i; + IAElement.Format = getFormat(m_vertex_data[i].type - 1, m_vertex_data[i].size); + + IAElement.AlignedByteOffset = offset_list[i]; + } + m_IASet.push_back(IAElement); + } +} + bool D3D12GSRender::LoadProgram() { @@ -211,12 +472,25 @@ bool D3D12GSRender::LoadProgram() return false; } - m_PSO = cachePSO.getGraphicPipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog); + m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog, m_IASet); return true; } void D3D12GSRender::ExecCMD() { + if (m_indexed_array.m_count) + { + // LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); + } + + if (m_indexed_array.m_count || m_draw_array_count) + { + EnableVertexData(m_indexed_array.m_count ? true : false); + + // InitVertexData(); + // InitFragmentData(); + } + if (!LoadProgram()) { LOG_ERROR(RSX, "LoadProgram failed."); @@ -520,24 +794,9 @@ void D3D12GSRender::ExecCMD() m_program.SetVTex(i); m_gl_vertex_textures[i].Init(m_vertex_textures[i]); checkForGlError(fmt::Format("m_gl_vertex_textures[%d].Init", i)); - } + }*/ - m_vao.Bind(); - - if (m_indexed_array.m_count) - { - LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); - } - - if (m_indexed_array.m_count || m_draw_array_count) - { - EnableVertexData(m_indexed_array.m_count ? true : false); - - InitVertexData(); - InitFragmentData(); - } - - if (m_indexed_array.m_count) +/* if (m_indexed_array.m_count) { switch (m_indexed_array.m_type) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index a1ee7947e8..50419a5e42 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -12,6 +12,7 @@ #include "D3D12RenderTargetSets.h" #include "D3D12PipelineState.h" +#include "D3D12Buffer.h" #pragma comment (lib, "d3d12.lib") #pragma comment (lib, "dxgi.lib") @@ -39,18 +40,14 @@ typedef GSFrameBase2*(*GetGSFrameCb2)(); void SetGetD3DGSFrameCallback(GetGSFrameCb2 value); -class D3D12GSRender //TODO: find out why this used to inherit from wxWindow - : //public wxWindow - /*,*/ public GSRender +class D3D12GSRender : public GSRender { private: - // std::vector m_vdata; + std::vector m_vdata; // std::vector m_post_draw_objs; - PipelineStateObjectCache cachePSO; + PipelineStateObjectCache m_cachePSO; ID3D12PipelineState *m_PSO; - int m_fp_buf_num; - int m_vp_buf_num; // GLProgramBuffer m_prog_buffer; // GLFragmentProgram m_fragment_prog; @@ -59,6 +56,8 @@ private: // GLTexture m_gl_textures[m_textures_count]; // GLTexture m_gl_vertex_textures[m_textures_count]; + ID3D12Resource *m_indexBuffer; + std::vector m_IASet; // GLvao m_vao; // GLvbo m_vbo; D3D12RenderTargetSets *m_fbo; @@ -73,9 +72,6 @@ private: ID3D12DescriptorHeap *m_backbufferAsRendertarget[2]; size_t m_lastWidth, m_lastHeight, m_lastDepth; - - void* m_context; - public: GSFrameBase2 *m_frame; u32 m_draw_frames; @@ -88,8 +84,8 @@ private: virtual void Close() override; bool LoadProgram(); - /* void EnableVertexData(bool indexed_draw = false); - void DisableVertexData(); + void EnableVertexData(bool indexed_draw = false); + /*void DisableVertexData(); void InitVertexData(); void InitFragmentData(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index e7010aee3a..c08b247907 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -57,7 +57,8 @@ bool PipelineStateObjectCache::SearchVp(const RSXVertexProgram& rsx_vp, Shader& ID3D12PipelineState *PipelineStateObjectCache::GetProg(u32 fp, u32 vp) const { - u64 key = vp << 32 | fp; + u64 vpLong = vp; + u64 key = vpLong << 32 | fp; std::unordered_map::const_iterator It = cachePSO.find(key); if (It == cachePSO.end()) return nullptr; @@ -69,7 +70,7 @@ void PipelineStateObjectCache::AddVertexProgram(Shader& vp, RSXVertexProgram& rs size_t actualVPSize = rsx_vp.data.size() * 4; void *fpShadowCopy = malloc(actualVPSize); memcpy(fpShadowCopy, rsx_vp.data.data(), actualVPSize); - vp.Id = currentShaderId++; + vp.Id = (u32)currentShaderId++; cacheVS.insert(std::make_pair(fpShadowCopy, vp)); } @@ -78,17 +79,18 @@ void PipelineStateObjectCache::AddFragmentProgram(Shader& fp, RSXFragmentProgram size_t actualFPSize = getFPBinarySize(vm::get_ptr(rsx_fp.addr)); void *fpShadowCopy = malloc(actualFPSize); memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); - fp.Id = currentShaderId++; + fp.Id = (u32)currentShaderId++; cacheFS.insert(std::make_pair(fpShadowCopy, fp)); } void PipelineStateObjectCache::Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp) { - u64 key = vp.Id << 32 | fp.Id; + u64 vpLong = vp.Id; + u64 key = vpLong << 32 | fp.Id; cachePSO.insert(std::make_pair(key, prog)); } -ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader) +ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const std::vector &IASet) { ID3D12PipelineState *result = nullptr; Shader m_vertex_prog, m_fragment_prog; @@ -199,9 +201,9 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev { FALSE,FALSE, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, - D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, - D3D12_LOGIC_OP_NOOP, - D3D12_COLOR_WRITE_ENABLE_ALL, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, } }; @@ -214,8 +216,8 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev graphicPipelineStateDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; graphicPipelineStateDesc.DSVFormat = DXGI_FORMAT_D16_UNORM; -// graphicPipelineStateDesc.InputLayout.pInputElementDescs = VTXLayout::getInputAssemblyLayout(); -// graphicPipelineStateDesc.InputLayout.NumElements = (UINT)VTXLayout::getInputAssemblySize(); + graphicPipelineStateDesc.InputLayout.pInputElementDescs = IASet.data(); + graphicPipelineStateDesc.InputLayout.NumElements = (UINT)IASet.size(); graphicPipelineStateDesc.SampleDesc.Count = 1; graphicPipelineStateDesc.SampleMask = UINT_MAX; graphicPipelineStateDesc.NodeMask = 1; @@ -235,6 +237,7 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev m_debug_programs.push_back(program); }*/ } + return result; } @@ -245,7 +248,7 @@ void Shader::Compile(SHADER_TYPE st) static const char VSstring[] = "#define RS \"RootFlags( ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)\"\n" "[RootSignature(RS)]\n" - "float4 main(float4 pos : POSITION) : SV_POSITION\n" + "float4 main(float4 pos : TEXCOORD0) : SV_POSITION\n" "{\n" " return pos;\n" "}"; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 2b8edf061a..bddf433027 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -160,7 +160,8 @@ private: public: PipelineStateObjectCache(); ~PipelineStateObjectCache(); - ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader); + // Note: the last param is not taken into account if the PSO is not regenerated + ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const std::vector &IASet); }; diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index ca6c9fc43b..07f2c03563 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -971,6 +971,9 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + @@ -1840,5 +1843,8 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + \ No newline at end of file From fedd35989cf3b195bc2e2bd4f447c7da10bb5f81 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 20:45:02 +0200 Subject: [PATCH 019/343] d3d12: Start writing vertex/index buffer support --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 42 ++++++++++++++-------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 11 +----- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 4 +-- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 8c140c8b1c..f3b090a3f0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -293,27 +293,41 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) m_vdata.resize(m_vdata.size() + data_size); memcpy(&m_vdata[pos], &m_vertex_data[i].data[data_offset * item_size], data_size); } + // TODO: Use default heap and upload data + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; + D3D12_RESOURCE_DESC resDesc = {}; + resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resDesc.Width = (UINT)m_vdata.size(); + resDesc.Height = 1; + resDesc.DepthOrArraySize = 1; + resDesc.SampleDesc.Count = 1; + resDesc.MipLevels = 1; + resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + check(m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_vertexBuffer) + )); + void *bufferMap; -/* m_vao.Create(); - m_vao.Bind(); - checkForGlError("initializing vao"); - - m_vbo.Create(indexed_draw ? 2 : 1); - m_vbo.Bind(0); - m_vbo.SetData(m_vdata.data(), m_vdata.size());*/ + check(m_vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); + memcpy(bufferMap, m_vdata.data(), m_vdata.size()); + m_vertexBuffer->Unmap(0, nullptr); if (indexed_draw) { - // TODO: Use default heap and upload data - D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; - D3D12_RESOURCE_DESC resDesc = {}; resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; resDesc.Width = (UINT)m_indexed_array.m_data.size(); resDesc.Height = 1; resDesc.DepthOrArraySize = 1; resDesc.SampleDesc.Count = 1; + resDesc.MipLevels = 1; + resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; check(m_device->CreateCommittedResource( &heapProp, D3D12_HEAP_FLAG_NONE, @@ -322,9 +336,9 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) nullptr, IID_PPV_ARGS(&m_indexBuffer) )); - void *indexBufferMap; - check(m_indexBuffer->Map(0, nullptr, (void**)indexBufferMap)); - memcpy(indexBufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size()); + + check(m_indexBuffer->Map(0, nullptr, (void**)&bufferMap)); + memcpy(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size()); m_indexBuffer->Unmap(0, nullptr); D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 50419a5e42..9d4a6fa9a3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -48,18 +48,12 @@ private: PipelineStateObjectCache m_cachePSO; ID3D12PipelineState *m_PSO; - // GLProgramBuffer m_prog_buffer; - - // GLFragmentProgram m_fragment_prog; - // GLVertexProgram m_vertex_prog; // GLTexture m_gl_textures[m_textures_count]; // GLTexture m_gl_vertex_textures[m_textures_count]; - ID3D12Resource *m_indexBuffer; + ID3D12Resource *m_indexBuffer, *m_vertexBuffer; std::vector m_IASet; - // GLvao m_vao; - // GLvbo m_vbo; D3D12RenderTargetSets *m_fbo; ID3D12Device* m_device; ID3D12CommandQueue *m_commandQueueCopy; @@ -89,9 +83,6 @@ private: void InitVertexData(); void InitFragmentData(); - void Enable(bool enable, const u32 cap); - - void WriteBuffers(); void WriteDepthBuffer(); void WriteColorBuffers(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index c08b247907..6927e4e451 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -157,9 +157,9 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev } else { -/* LOG_WARNING(RSX, "Add program :"); + LOG_WARNING(RSX, "Add program :"); LOG_WARNING(RSX, "*** vp id = %d", m_vertex_prog.Id); - LOG_WARNING(RSX, "*** fp id = %d", m_fragment_prog.Id);*/ + LOG_WARNING(RSX, "*** fp id = %d", m_fragment_prog.Id); D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; From 14e932315498f611c515bfb71ecf33a8288d0a7b Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 22:46:50 +0200 Subject: [PATCH 020/343] d3d12: make some shape appears --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 117 ++++++++++++++++++-------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + 2 files changed, 83 insertions(+), 35 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index f3b090a3f0..3f1f0b13ae 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -318,6 +318,11 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) memcpy(bufferMap, m_vdata.data(), m_vdata.size()); m_vertexBuffer->Unmap(0, nullptr); + m_vertexBufferView = {}; + m_vertexBufferView.BufferLocation = m_vertexBuffer->GetGPUVirtualAddress(); + m_vertexBufferView.SizeInBytes = (UINT)m_vdata.size(); + m_vertexBufferView.StrideInBytes = (UINT)cur_offset; + if (indexed_draw) { D3D12_RESOURCE_DESC resDesc = {}; @@ -492,6 +497,10 @@ bool D3D12GSRender::LoadProgram() void D3D12GSRender::ExecCMD() { + ID3D12GraphicsCommandList *commandList; + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + m_inflightCommandList.push_back(commandList); + if (m_indexed_array.m_count) { // LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); @@ -500,7 +509,7 @@ void D3D12GSRender::ExecCMD() if (m_indexed_array.m_count || m_draw_array_count) { EnableVertexData(m_indexed_array.m_count ? true : false); - + commandList->IASetVertexBuffers(0, 1, &m_vertexBufferView); // InitVertexData(); // InitFragmentData(); } @@ -511,11 +520,80 @@ void D3D12GSRender::ExecCMD() Emu.Pause(); return; } + commandList->SetPipelineState(m_PSO); InitDrawBuffers(); + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_NONE: break; + case CELL_GCM_SURFACE_TARGET_0: + commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(0), true, nullptr); + break; + case CELL_GCM_SURFACE_TARGET_1: + commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(1), true, nullptr); + break; + case CELL_GCM_SURFACE_TARGET_MRT1: + commandList->OMSetRenderTargets(2, &m_fbo->getRTTCPUHandle(0), true, nullptr); + break; + case CELL_GCM_SURFACE_TARGET_MRT2: + commandList->OMSetRenderTargets(3, &m_fbo->getRTTCPUHandle(0), true, nullptr); + break; + case CELL_GCM_SURFACE_TARGET_MRT3: + commandList->OMSetRenderTargets(4, &m_fbo->getRTTCPUHandle(0), true, nullptr); + break; + default: + LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); + } + D3D12_VIEWPORT viewport = + { + 0.f, + 0.f, + RSXThread::m_width, + RSXThread::m_height, + -1.f, + 1.f + }; + commandList->RSSetViewports(1, &viewport); + D3D12_RECT box = + { + 0, 0, + RSXThread::m_width, RSXThread::m_height, + }; + commandList->RSSetScissorRects(1, &box); -// ID3D12CommandList *commandList; -// m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + if (m_indexed_array.m_count) + { +/* switch (m_indexed_array.m_type) + { + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + commandList->DrawIndexedInstanced + glDrawElements(m_draw_mode - 1, m_indexed_array.m_count, GL_UNSIGNED_INT, nullptr); + checkForGlError("glDrawElements #4"); + break; + + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + glDrawElements(m_draw_mode - 1, m_indexed_array.m_count, GL_UNSIGNED_SHORT, nullptr); + checkForGlError("glDrawElements #2"); + break; + + default: + LOG_ERROR(RSX, "Bad indexed array type (%d)", m_indexed_array.m_type); + break; + } + + DisableVertexData(); + m_indexed_array.Reset();*/ + } + + if (m_draw_array_count) + { + //LOG_WARNING(RSX,"glDrawArrays(%d,%d,%d)", m_draw_mode - 1, m_draw_array_first, m_draw_array_count); + commandList->DrawInstanced(m_draw_array_first, 1, m_draw_array_count, 0); + } + check(commandList->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); /* if (m_set_color_mask) { @@ -810,38 +888,7 @@ void D3D12GSRender::ExecCMD() checkForGlError(fmt::Format("m_gl_vertex_textures[%d].Init", i)); }*/ -/* if (m_indexed_array.m_count) - { - switch (m_indexed_array.m_type) - { - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - glDrawElements(m_draw_mode - 1, m_indexed_array.m_count, GL_UNSIGNED_INT, nullptr); - checkForGlError("glDrawElements #4"); - break; - - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: - glDrawElements(m_draw_mode - 1, m_indexed_array.m_count, GL_UNSIGNED_SHORT, nullptr); - checkForGlError("glDrawElements #2"); - break; - - default: - LOG_ERROR(RSX, "Bad indexed array type (%d)", m_indexed_array.m_type); - break; - } - - DisableVertexData(); - m_indexed_array.Reset(); - } - - if (m_draw_array_count) - { - //LOG_WARNING(RSX,"glDrawArrays(%d,%d,%d)", m_draw_mode - 1, m_draw_array_first, m_draw_array_count); - glDrawArrays(m_draw_mode - 1, 0, m_draw_array_count); - checkForGlError("glDrawArrays"); - DisableVertexData(); - } - - WriteBuffers();*/ +// WriteBuffers(); } void D3D12GSRender::Flip() diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 9d4a6fa9a3..1c335dadc3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -52,6 +52,7 @@ private: // GLTexture m_gl_textures[m_textures_count]; // GLTexture m_gl_vertex_textures[m_textures_count]; + D3D12_VERTEX_BUFFER_VIEW m_vertexBufferView; ID3D12Resource *m_indexBuffer, *m_vertexBuffer; std::vector m_IASet; D3D12RenderTargetSets *m_fbo; From 8ec9f84c1bb20f5648893df568538cf4497dae0e Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 11 May 2015 23:03:42 +0200 Subject: [PATCH 021/343] d3d12; Factorise IALayout generation code --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 223 ++++++++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12Buffer.h | 5 + rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 202 +---------------------- rpcs3/Emu/RSX/RSXThread.cpp | 2 +- rpcs3/Emu/RSX/RSXThread.h | 2 +- 5 files changed, 231 insertions(+), 203 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 643a4d4081..20b607536c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -1,4 +1,227 @@ #include "stdafx.h" #if defined(DX12_SUPPORT) +#include "D3D12Buffer.h" +#include "Utilities/Log.h" + +const int g_vertexCount = 32; + +// Where are these type defined ??? +static +DXGI_FORMAT getFormat(u8 type, u8 size) +{ + /*static const u32 gl_types[] = + { + GL_SHORT, + GL_FLOAT, + GL_HALF_FLOAT, + GL_UNSIGNED_BYTE, + GL_SHORT, + GL_FLOAT, // Needs conversion + GL_UNSIGNED_BYTE, + }; + + static const bool gl_normalized[] = + { + GL_TRUE, + GL_FALSE, + GL_FALSE, + GL_TRUE, + GL_FALSE, + GL_TRUE, + GL_FALSE, + };*/ + static const DXGI_FORMAT typeX1[] = + { + DXGI_FORMAT_R16_SNORM, + DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R16_FLOAT, + DXGI_FORMAT_R8_UNORM, + DXGI_FORMAT_R16_SINT, + DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R8_UINT + }; + static const DXGI_FORMAT typeX2[] = + { + DXGI_FORMAT_R16G16_SNORM, + DXGI_FORMAT_R32G32_FLOAT, + DXGI_FORMAT_R16G16_FLOAT, + DXGI_FORMAT_R8G8_UNORM, + DXGI_FORMAT_R16G16_SINT, + DXGI_FORMAT_R32G32_FLOAT, + DXGI_FORMAT_R8G8_UINT + }; + static const DXGI_FORMAT typeX3[] = + { + DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R8G8B8A8_UINT + }; + static const DXGI_FORMAT typeX4[] = + { + DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R32G32B32A32_FLOAT, + DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R32G32B32A32_FLOAT, + DXGI_FORMAT_R8G8B8A8_UINT + }; + + switch (size) + { + case 1: + return typeX1[type]; + case 2: + return typeX2[type]; + case 3: + return typeX3[type]; + case 4: + return typeX4[type]; + } +} + +std::vector getIALayout(ID3D12Device *device, bool indexedDraw, const RSXVertexData *vertexData) +{ + std::vector result; + u32 offset_list[g_vertexCount]; + u32 cur_offset = 0; + + const u32 data_offset = indexedDraw ? 0 : 1;// m_draw_array_first; + + for (u32 i = 0; i < g_vertexCount; ++i) + { + offset_list[i] = cur_offset; + + if (!vertexData[i].IsEnabled()) continue; + const size_t item_size = vertexData[i].GetTypeSize() * vertexData[i].size; + const size_t data_size = vertexData[i].data.size() - data_offset * item_size; + cur_offset += data_size; + } + +#if DUMP_VERTEX_DATA + rFile dump("VertexDataArray.dump", rFile::write); +#endif + + for (u32 i = 0; i < g_vertexCount; ++i) + { + if (!vertexData[i].IsEnabled()) continue; + +#if DUMP_VERTEX_DATA + dump.Write(wxString::Format("VertexData[%d]:\n", i)); + switch (m_vertex_data[i].type) + { + case CELL_GCM_VERTEX_S1: + for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) + { + dump.Write(wxString::Format("%d\n", *(u16*)&m_vertex_data[i].data[j])); + if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + case CELL_GCM_VERTEX_F: + for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 4) + { + dump.Write(wxString::Format("%.01f\n", *(float*)&m_vertex_data[i].data[j])); + if (!(((j + 4) / 4) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + case CELL_GCM_VERTEX_SF: + for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) + { + dump.Write(wxString::Format("%.01f\n", *(float*)&m_vertex_data[i].data[j])); + if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + case CELL_GCM_VERTEX_UB: + for (u32 j = 0; j < m_vertex_data[i].data.size(); ++j) + { + dump.Write(wxString::Format("%d\n", m_vertex_data[i].data[j])); + if (!((j + 1) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + case CELL_GCM_VERTEX_S32K: + for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) + { + dump.Write(wxString::Format("%d\n", *(u16*)&m_vertex_data[i].data[j])); + if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + // case CELL_GCM_VERTEX_CMP: + + case CELL_GCM_VERTEX_UB256: + for (u32 j = 0; j < m_vertex_data[i].data.size(); ++j) + { + dump.Write(wxString::Format("%d\n", m_vertex_data[i].data[j])); + if (!((j + 1) % m_vertex_data[i].size)) dump.Write("\n"); + } + break; + + default: + LOG_ERROR(HLE, "Bad cv type! %d", m_vertex_data[i].type); + return; + } + + dump.Write("\n"); +#endif + + if (vertexData[i].type < 1 || vertexData[i].type > 7) + { + LOG_ERROR(RSX, "GLGSRender::EnableVertexData: Bad vertex data type (%d)!", vertexData[i].type); + } + + D3D12_INPUT_ELEMENT_DESC IAElement = {}; + /* if (!m_vertex_data[i].addr) + { + switch (m_vertex_data[i].type) + { + case CELL_GCM_VERTEX_S32K: + case CELL_GCM_VERTEX_S1: + switch (m_vertex_data[i].size) + { + case 1: glVertexAttrib1s(i, (GLshort&)m_vertex_data[i].data[0]); break; + case 2: glVertexAttrib2sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; + case 3: glVertexAttrib3sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; + case 4: glVertexAttrib4sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; + } + break; + + case CELL_GCM_VERTEX_F: + switch (m_vertex_data[i].size) + { + case 1: glVertexAttrib1f(i, (GLfloat&)m_vertex_data[i].data[0]); break; + case 2: glVertexAttrib2fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; + case 3: glVertexAttrib3fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; + case 4: glVertexAttrib4fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; + } + break; + + case CELL_GCM_VERTEX_CMP: + case CELL_GCM_VERTEX_UB: + glVertexAttrib4ubv(i, (GLubyte*)&m_vertex_data[i].data[0]); + break; + } + + checkForGlError("glVertexAttrib"); + } + else*/ + { + IAElement.SemanticName = "TEXCOORD"; + IAElement.SemanticIndex = i; + IAElement.Format = getFormat(vertexData[i].type - 1, vertexData[i].size); + + IAElement.AlignedByteOffset = offset_list[i]; + } + result.push_back(IAElement); + } + return result; +} #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h index 1a857ae62c..cb363e0f6d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h @@ -1,4 +1,9 @@ #pragma once #if defined(DX12_SUPPORT) +#include +#include "Emu/Memory/vm.h" +#include "Emu/RSX/RSXThread.h" + +std::vector getIALayout(ID3D12Device *device, bool indexedDraw, const RSXVertexData *vertexData); #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3f1f0b13ae..e865f04546 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -194,87 +194,9 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } -// Where are these type defined ??? -static -DXGI_FORMAT getFormat(u8 type, u8 size) -{ - /*static const u32 gl_types[] = - { - GL_SHORT, - GL_FLOAT, - GL_HALF_FLOAT, - GL_UNSIGNED_BYTE, - GL_SHORT, - GL_FLOAT, // Needs conversion - GL_UNSIGNED_BYTE, - }; - - static const bool gl_normalized[] = - { - GL_TRUE, - GL_FALSE, - GL_FALSE, - GL_TRUE, - GL_FALSE, - GL_TRUE, - GL_FALSE, - };*/ - static const DXGI_FORMAT typeX1[] = - { - DXGI_FORMAT_R16_SNORM, - DXGI_FORMAT_R32_FLOAT, - DXGI_FORMAT_R16_FLOAT, - DXGI_FORMAT_R8_UNORM, - DXGI_FORMAT_R16_SINT, - DXGI_FORMAT_R32_FLOAT, - DXGI_FORMAT_R8_UINT - }; - static const DXGI_FORMAT typeX2[] = - { - DXGI_FORMAT_R16G16_SNORM, - DXGI_FORMAT_R32G32_FLOAT, - DXGI_FORMAT_R16G16_FLOAT, - DXGI_FORMAT_R8G8_UNORM, - DXGI_FORMAT_R16G16_SINT, - DXGI_FORMAT_R32G32_FLOAT, - DXGI_FORMAT_R8G8_UINT - }; - static const DXGI_FORMAT typeX3[] = - { - DXGI_FORMAT_R16G16B16A16_SNORM, - DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R16G16B16A16_FLOAT, - DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R16G16B16A16_SINT, - DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R8G8B8A8_UINT - }; - static const DXGI_FORMAT typeX4[] = - { - DXGI_FORMAT_R16G16B16A16_SNORM, - DXGI_FORMAT_R32G32B32A32_FLOAT, - DXGI_FORMAT_R16G16B16A16_FLOAT, - DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R16G16B16A16_SINT, - DXGI_FORMAT_R32G32B32A32_FLOAT, - DXGI_FORMAT_R8G8B8A8_UINT - }; - - switch (size) - { - case 1: - return typeX1[type]; - case 2: - return typeX2[type]; - case 3: - return typeX3[type]; - case 4: - return typeX4[type]; - } -} - void D3D12GSRender::EnableVertexData(bool indexed_draw) { + m_IASet = getIALayout(m_device, indexed_draw, m_vertex_data); static u32 offset_list[m_vertex_count]; u32 cur_offset = 0; @@ -350,128 +272,6 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) indexBufferView.SizeInBytes = (UINT)m_indexed_array.m_data.size(); indexBufferView.BufferLocation = m_indexBuffer->GetGPUVirtualAddress(); } - -#if DUMP_VERTEX_DATA - rFile dump("VertexDataArray.dump", rFile::write); -#endif - - m_IASet.clear(); - - for (u32 i = 0; i < m_vertex_count; ++i) - { - if (!m_vertex_data[i].IsEnabled()) continue; - -#if DUMP_VERTEX_DATA - dump.Write(wxString::Format("VertexData[%d]:\n", i)); - switch (m_vertex_data[i].type) - { - case CELL_GCM_VERTEX_S1: - for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) - { - dump.Write(wxString::Format("%d\n", *(u16*)&m_vertex_data[i].data[j])); - if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - case CELL_GCM_VERTEX_F: - for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 4) - { - dump.Write(wxString::Format("%.01f\n", *(float*)&m_vertex_data[i].data[j])); - if (!(((j + 4) / 4) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - case CELL_GCM_VERTEX_SF: - for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) - { - dump.Write(wxString::Format("%.01f\n", *(float*)&m_vertex_data[i].data[j])); - if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - case CELL_GCM_VERTEX_UB: - for (u32 j = 0; j < m_vertex_data[i].data.size(); ++j) - { - dump.Write(wxString::Format("%d\n", m_vertex_data[i].data[j])); - if (!((j + 1) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - case CELL_GCM_VERTEX_S32K: - for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) - { - dump.Write(wxString::Format("%d\n", *(u16*)&m_vertex_data[i].data[j])); - if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - // case CELL_GCM_VERTEX_CMP: - - case CELL_GCM_VERTEX_UB256: - for (u32 j = 0; j < m_vertex_data[i].data.size(); ++j) - { - dump.Write(wxString::Format("%d\n", m_vertex_data[i].data[j])); - if (!((j + 1) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - default: - LOG_ERROR(HLE, "Bad cv type! %d", m_vertex_data[i].type); - return; - } - - dump.Write("\n"); -#endif - - if (m_vertex_data[i].type < 1 || m_vertex_data[i].type > 7) - { - LOG_ERROR(RSX, "GLGSRender::EnableVertexData: Bad vertex data type (%d)!", m_vertex_data[i].type); - } - - D3D12_INPUT_ELEMENT_DESC IAElement = {}; -/* if (!m_vertex_data[i].addr) - { - switch (m_vertex_data[i].type) - { - case CELL_GCM_VERTEX_S32K: - case CELL_GCM_VERTEX_S1: - switch (m_vertex_data[i].size) - { - case 1: glVertexAttrib1s(i, (GLshort&)m_vertex_data[i].data[0]); break; - case 2: glVertexAttrib2sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; - case 3: glVertexAttrib3sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; - case 4: glVertexAttrib4sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; - } - break; - - case CELL_GCM_VERTEX_F: - switch (m_vertex_data[i].size) - { - case 1: glVertexAttrib1f(i, (GLfloat&)m_vertex_data[i].data[0]); break; - case 2: glVertexAttrib2fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; - case 3: glVertexAttrib3fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; - case 4: glVertexAttrib4fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; - } - break; - - case CELL_GCM_VERTEX_CMP: - case CELL_GCM_VERTEX_UB: - glVertexAttrib4ubv(i, (GLubyte*)&m_vertex_data[i].data[0]); - break; - } - - checkForGlError("glVertexAttrib"); - } - else*/ - { - IAElement.SemanticName = "TEXCOORD"; - IAElement.SemanticIndex = i; - IAElement.Format = getFormat(m_vertex_data[i].type - 1, m_vertex_data[i].size); - - IAElement.AlignedByteOffset = offset_list[i]; - } - m_IASet.push_back(IAElement); - } } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 80cd1946f8..d3cd7129a4 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -118,7 +118,7 @@ void RSXVertexData::Load(u32 start, u32 count, u32 baseOffset, u32 baseIndex = 0 } } -u32 RSXVertexData::GetTypeSize() +u32 RSXVertexData::GetTypeSize() const { switch (type) { diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 6cacabc967..ded727a469 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -37,7 +37,7 @@ struct RSXVertexData bool IsEnabled() const { return size > 0; } void Load(u32 start, u32 count, u32 baseOffset, u32 baseIndex); - u32 GetTypeSize(); + u32 GetTypeSize() const; }; struct RSXIndexArrayData From 9abaf80099a68b7d45b1b780c9f908f66fe42337 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 12 May 2015 00:06:58 +0200 Subject: [PATCH 022/343] d3d12: Vertex element are not interleaved. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 10 +-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 93 ++++++++++++---------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 2 +- 4 files changed, 57 insertions(+), 52 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 20b607536c..ae3907e37c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -90,16 +90,11 @@ std::vector getIALayout(ID3D12Device *device, bool ind u32 offset_list[g_vertexCount]; u32 cur_offset = 0; - const u32 data_offset = indexedDraw ? 0 : 1;// m_draw_array_first; - for (u32 i = 0; i < g_vertexCount; ++i) { - offset_list[i] = cur_offset; - if (!vertexData[i].IsEnabled()) continue; const size_t item_size = vertexData[i].GetTypeSize() * vertexData[i].size; - const size_t data_size = vertexData[i].data.size() - data_offset * item_size; - cur_offset += data_size; + offset_list[i] = item_size; } #if DUMP_VERTEX_DATA @@ -215,9 +210,8 @@ std::vector getIALayout(ID3D12Device *device, bool ind { IAElement.SemanticName = "TEXCOORD"; IAElement.SemanticIndex = i; + IAElement.InputSlot = i; IAElement.Format = getFormat(vertexData[i].type - 1, vertexData[i].size); - - IAElement.AlignedByteOffset = offset_list[i]; } result.push_back(IAElement); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index e865f04546..9f48b0c19b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -20,6 +20,7 @@ static void check(HRESULT hr) D3D12GSRender::D3D12GSRender() : GSRender(), m_fbo(nullptr), m_PSO(nullptr) { + memset(vertexBufferSize, 0, sizeof(vertexBufferSize)); // Enable d3d debug layer Microsoft::WRL::ComPtr debugInterface; D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); @@ -69,6 +70,31 @@ D3D12GSRender::D3D12GSRender() m_device->CreateRenderTargetView(m_backBuffer[0], &rttDesc, m_backbufferAsRendertarget[0]->GetCPUDescriptorHandleForHeapStart()); m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[1])); m_device->CreateRenderTargetView(m_backBuffer[1], &rttDesc, m_backbufferAsRendertarget[1]->GetCPUDescriptorHandleForHeapStart()); + + // Create global vertex buffers (1 MB, hopefully big enough...) + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; + + D3D12_RESOURCE_DESC resDesc = {}; + resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resDesc.Width = (UINT)1024 * 1024; + resDesc.Height = 1; + resDesc.DepthOrArraySize = 1; + resDesc.SampleDesc.Count = 1; + resDesc.MipLevels = 1; + resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + for (unsigned i = 0; i < m_vertex_count; i++) + { + check(m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_vertexBuffer[i]) + )); + } } D3D12GSRender::~D3D12GSRender() @@ -197,57 +223,27 @@ void D3D12GSRender::ExecCMD(u32 cmd) void D3D12GSRender::EnableVertexData(bool indexed_draw) { m_IASet = getIALayout(m_device, indexed_draw, m_vertex_data); - static u32 offset_list[m_vertex_count]; - u32 cur_offset = 0; const u32 data_offset = indexed_draw ? 0 : m_draw_array_first; for (u32 i = 0; i < m_vertex_count; ++i) { - offset_list[i] = cur_offset; - if (!m_vertex_data[i].IsEnabled()) continue; const size_t item_size = m_vertex_data[i].GetTypeSize() * m_vertex_data[i].size; const size_t data_size = m_vertex_data[i].data.size() - data_offset * item_size; - const u32 pos = m_vdata.size(); - cur_offset += data_size; - m_vdata.resize(m_vdata.size() + data_size); - memcpy(&m_vdata[pos], &m_vertex_data[i].data[data_offset * item_size], data_size); + // TODO: Use default heap and upload data + void *bufferMap; + check(m_vertexBuffer[i]->Map(0, nullptr, (void**)&bufferMap)); + memcpy((char*)bufferMap + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); + m_vertexBuffer[i]->Unmap(0, nullptr); + size_t newOffset = (data_offset + data_size) * item_size; + vertexBufferSize[i] = newOffset > vertexBufferSize[i] ? newOffset : vertexBufferSize[i]; } - // TODO: Use default heap and upload data - D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; - D3D12_RESOURCE_DESC resDesc = {}; - resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = (UINT)m_vdata.size(); - resDesc.Height = 1; - resDesc.DepthOrArraySize = 1; - resDesc.SampleDesc.Count = 1; - resDesc.MipLevels = 1; - resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - check(m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_vertexBuffer) - )); - void *bufferMap; - - check(m_vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); - memcpy(bufferMap, m_vdata.data(), m_vdata.size()); - m_vertexBuffer->Unmap(0, nullptr); - - m_vertexBufferView = {}; - m_vertexBufferView.BufferLocation = m_vertexBuffer->GetGPUVirtualAddress(); - m_vertexBufferView.SizeInBytes = (UINT)m_vdata.size(); - m_vertexBufferView.StrideInBytes = (UINT)cur_offset; if (indexed_draw) { - D3D12_RESOURCE_DESC resDesc = {}; +/* D3D12_RESOURCE_DESC resDesc = {}; resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; resDesc.Width = (UINT)m_indexed_array.m_data.size(); resDesc.Height = 1; @@ -270,7 +266,7 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; indexBufferView.SizeInBytes = (UINT)m_indexed_array.m_data.size(); - indexBufferView.BufferLocation = m_indexBuffer->GetGPUVirtualAddress(); + indexBufferView.BufferLocation = m_indexBuffer->GetGPUVirtualAddress();*/ } } @@ -309,7 +305,21 @@ void D3D12GSRender::ExecCMD() if (m_indexed_array.m_count || m_draw_array_count) { EnableVertexData(m_indexed_array.m_count ? true : false); - commandList->IASetVertexBuffers(0, 1, &m_vertexBufferView); + std::vector vertexBufferViews; + for (u32 i = 0; i < m_vertex_count; ++i) + { + if (!m_vertex_data[i].IsEnabled()) continue; + const size_t item_size = m_vertex_data[i].GetTypeSize() * m_vertex_data[i].size; + D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; + + vertexBufferView.BufferLocation = m_vertexBuffer[i]->GetGPUVirtualAddress(); + vertexBufferView.SizeInBytes = (UINT)vertexBufferSize[i]; + vertexBufferView.StrideInBytes = (UINT)item_size; + vertexBufferViews.push_back(vertexBufferView); + + assert((m_draw_array_first + m_draw_array_count) * item_size <= vertexBufferSize[i]); + } + commandList->IASetVertexBuffers(0, vertexBufferViews.size(), vertexBufferViews.data()); // InitVertexData(); // InitFragmentData(); } @@ -390,7 +400,7 @@ void D3D12GSRender::ExecCMD() if (m_draw_array_count) { //LOG_WARNING(RSX,"glDrawArrays(%d,%d,%d)", m_draw_mode - 1, m_draw_array_first, m_draw_array_count); - commandList->DrawInstanced(m_draw_array_first, 1, m_draw_array_count, 0); + commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); } check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); @@ -748,5 +758,6 @@ void D3D12GSRender::Flip() for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) gfxCommandList->Release(); m_inflightCommandList.clear(); + memset(vertexBufferSize, 0, sizeof(vertexBufferSize)); } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 1c335dadc3..e05cb2153d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -43,6 +43,7 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value); class D3D12GSRender : public GSRender { private: + u32 vertexBufferSize[32]; std::vector m_vdata; // std::vector m_post_draw_objs; @@ -52,8 +53,7 @@ private: // GLTexture m_gl_textures[m_textures_count]; // GLTexture m_gl_vertex_textures[m_textures_count]; - D3D12_VERTEX_BUFFER_VIEW m_vertexBufferView; - ID3D12Resource *m_indexBuffer, *m_vertexBuffer; + ID3D12Resource *m_indexBuffer, *m_vertexBuffer[m_vertex_count]; std::vector m_IASet; D3D12RenderTargetSets *m_fbo; ID3D12Device* m_device; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 6927e4e451..f1ffcadb85 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -250,7 +250,7 @@ void Shader::Compile(SHADER_TYPE st) "[RootSignature(RS)]\n" "float4 main(float4 pos : TEXCOORD0) : SV_POSITION\n" "{\n" - " return pos;\n" + " return float4(pos.x, pos.y, 0., 1.);\n" "}"; static const char FSstring[] = "#define RS \"RootFlags( ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)\"\n" From 646c908a93ec4fde6ebec5cbb5127923d0685a60 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 12 May 2015 00:14:32 +0200 Subject: [PATCH 023/343] d3d12: Fix some warnings --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 8 ++++---- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 9f48b0c19b..1135d80fe0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -319,7 +319,7 @@ void D3D12GSRender::ExecCMD() assert((m_draw_array_first + m_draw_array_count) * item_size <= vertexBufferSize[i]); } - commandList->IASetVertexBuffers(0, vertexBufferViews.size(), vertexBufferViews.data()); + commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); // InitVertexData(); // InitFragmentData(); } @@ -358,8 +358,8 @@ void D3D12GSRender::ExecCMD() { 0.f, 0.f, - RSXThread::m_width, - RSXThread::m_height, + (float)RSXThread::m_width, + (float)RSXThread::m_height, -1.f, 1.f }; @@ -367,7 +367,7 @@ void D3D12GSRender::ExecCMD() D3D12_RECT box = { 0, 0, - RSXThread::m_width, RSXThread::m_height, + (LONG)RSXThread::m_width, (LONG)RSXThread::m_height, }; commandList->RSSetScissorRects(1, &box); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index e05cb2153d..5bcc64c487 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -43,7 +43,7 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value); class D3D12GSRender : public GSRender { private: - u32 vertexBufferSize[32]; + size_t vertexBufferSize[32]; std::vector m_vdata; // std::vector m_post_draw_objs; From 6f487f910c663038c434d0be6a9a142f60b395e1 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 12 May 2015 21:55:20 +0200 Subject: [PATCH 024/343] d3d12: Fix build with DX12_SUPPORT not def --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index f1ffcadb85..3fa7ed3901 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -240,9 +240,6 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev return result; } - -#endif - void Shader::Compile(SHADER_TYPE st) { static const char VSstring[] = @@ -275,3 +272,6 @@ void Shader::Compile(SHADER_TYPE st) break; } } + + +#endif \ No newline at end of file From 65fbc572216630b131e7b6ff818d82935977c7e8 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 12 May 2015 23:41:03 +0200 Subject: [PATCH 025/343] d3d12: Enable constant buffer support --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 147 +++++++++++++++++++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 10 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 68 +++++++--- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 2 +- 4 files changed, 196 insertions(+), 31 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 1135d80fe0..7aa469dc93 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -20,7 +20,9 @@ static void check(HRESULT hr) D3D12GSRender::D3D12GSRender() : GSRender(), m_fbo(nullptr), m_PSO(nullptr) { - memset(vertexBufferSize, 0, sizeof(vertexBufferSize)); + memset(m_vertexBufferSize, 0, sizeof(m_vertexBufferSize)); + m_constantsBufferOffset = 0; + m_constantsBufferIndex = 0; // Enable d3d debug layer Microsoft::WRL::ComPtr debugInterface; D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); @@ -95,6 +97,45 @@ D3D12GSRender::D3D12GSRender() IID_PPV_ARGS(&m_vertexBuffer[i]) )); } + + check(m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_constantsBuffer) + )); + + D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; + descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + descriptorHeapDesc.NumDescriptors = 1000; // For safety + descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + check(m_device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_constantsBufferDescriptorsHeap))); + + // Common root signature + D3D12_DESCRIPTOR_RANGE descriptorRange = {}; + descriptorRange.BaseShaderRegister = 0; + descriptorRange.NumDescriptors = 1; + descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + D3D12_ROOT_PARAMETER RP = {}; + RP.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP.DescriptorTable.pDescriptorRanges = &descriptorRange; + RP.DescriptorTable.NumDescriptorRanges = 1; + + D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; + rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + rootSignatureDesc.NumParameters = 1; + rootSignatureDesc.pParameters = &RP; + + Microsoft::WRL::ComPtr rootSignatureBlob; + check(D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, nullptr)); + + m_device->CreateRootSignature(0, + rootSignatureBlob->GetBufferPointer(), + rootSignatureBlob->GetBufferSize(), + IID_PPV_ARGS(&m_rootSignature)); } D3D12GSRender::~D3D12GSRender() @@ -238,7 +279,7 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) memcpy((char*)bufferMap + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); m_vertexBuffer[i]->Unmap(0, nullptr); size_t newOffset = (data_offset + data_size) * item_size; - vertexBufferSize[i] = newOffset > vertexBufferSize[i] ? newOffset : vertexBufferSize[i]; + m_vertexBufferSize[i] = newOffset > m_vertexBufferSize[i] ? newOffset : m_vertexBufferSize[i]; } if (indexed_draw) @@ -270,6 +311,86 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) } } +void D3D12GSRender::FillVertexShaderConstantsBuffer() +{ + float scaleOffsetMat[16] = + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; + size_t currentoffset = 0; + void *constantsBufferMap; + // TODO: Use finer range + D3D12_RANGE range = { + m_constantsBufferOffset, + 1024 * 1024 - m_constantsBufferOffset + }; + check(m_constantsBuffer->Map(0, &range, &constantsBufferMap)); + + // Scale + scaleOffsetMat[0] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 0)] / (RSXThread::m_width / RSXThread::m_width_scale); + scaleOffsetMat[5] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 1)] / (RSXThread::m_height / RSXThread::m_height_scale); + scaleOffsetMat[10] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 2)]; + + // Offset + scaleOffsetMat[3] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 0)] - (RSXThread::m_width / RSXThread::m_width_scale); + scaleOffsetMat[7] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 1)] - (RSXThread::m_height / RSXThread::m_height_scale); + scaleOffsetMat[11] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 2)] - 1 / 2.0f; + + scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; + scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; + + memcpy((char*)constantsBufferMap + m_constantsBufferOffset + currentoffset, scaleOffsetMat, 16 * sizeof(float)); + currentoffset += 16 * sizeof(float); + + for (const RSXTransformConstant& c : m_transform_constants) + { + float vector[] = { c.x, c.y, c.z, c.w }; + memcpy((char*)constantsBufferMap + m_constantsBufferOffset + currentoffset, vector, 4 * sizeof(float)); + currentoffset += 4 * sizeof(float); + } + m_constantsBuffer->Unmap(0, &range); + // Align to 256 byte + currentoffset = (currentoffset + 255) & ~255; + + + D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; + constantBufferViewDesc.BufferLocation = m_constantsBuffer->GetGPUVirtualAddress() + m_constantsBufferOffset; + constantBufferViewDesc.SizeInBytes = (UINT)currentoffset; + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); + m_constantsBufferOffset += currentoffset; +} + +void D3D12GSRender::FillPixelShaderConstantsBuffer() +{ +/* if (!m_cur_fragment_prog) + { + LOG_ERROR(RSX, "InitFragmentData: m_cur_shader_prog == NULL"); + return; + } + + for (const RSXTransformConstant& c : m_fragment_constants) + { + u32 id = c.id - m_cur_fragment_prog->offset; + + //LOG_WARNING(RSX,"fc%u[0x%x - 0x%x] = (%f, %f, %f, %f)", id, c.id, m_cur_shader_prog->offset, c.x, c.y, c.z, c.w); + + const std::string name = fmt::Format("fc%u", id); + const int l = m_program.GetLocation(name); + checkForGlError("glGetUniformLocation " + name); + + glUniform4f(l, c.x, c.y, c.z, c.w); + checkForGlError("glUniform4f " + name + fmt::Format(" %u [%f %f %f %f]", l, c.x, c.y, c.z, c.w)); + } + + //if (m_fragment_constants.GetCount())*/ + // LOG_NOTICE(HLE, ""); +} + bool D3D12GSRender::LoadProgram() { @@ -287,7 +408,7 @@ bool D3D12GSRender::LoadProgram() return false; } - m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_cur_vertex_prog, m_cur_fragment_prog, m_IASet); + m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_rootSignature, m_cur_vertex_prog, m_cur_fragment_prog, m_IASet); return true; } @@ -297,6 +418,8 @@ void D3D12GSRender::ExecCMD() m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); m_inflightCommandList.push_back(commandList); + commandList->SetGraphicsRootSignature(m_rootSignature); + if (m_indexed_array.m_count) { // LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); @@ -313,15 +436,21 @@ void D3D12GSRender::ExecCMD() D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; vertexBufferView.BufferLocation = m_vertexBuffer[i]->GetGPUVirtualAddress(); - vertexBufferView.SizeInBytes = (UINT)vertexBufferSize[i]; + vertexBufferView.SizeInBytes = (UINT)m_vertexBufferSize[i]; vertexBufferView.StrideInBytes = (UINT)item_size; vertexBufferViews.push_back(vertexBufferView); - assert((m_draw_array_first + m_draw_array_count) * item_size <= vertexBufferSize[i]); + assert((m_draw_array_first + m_draw_array_count) * item_size <= m_vertexBufferSize[i]); } commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); - // InitVertexData(); - // InitFragmentData(); + FillVertexShaderConstantsBuffer(); + commandList->SetDescriptorHeaps(1, &m_constantsBufferDescriptorsHeap); + D3D12_GPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetGraphicsRootDescriptorTable(0, Handle); + m_constantsBufferIndex++; + + FillPixelShaderConstantsBuffer(); } if (!LoadProgram()) @@ -758,6 +887,8 @@ void D3D12GSRender::Flip() for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) gfxCommandList->Release(); m_inflightCommandList.clear(); - memset(vertexBufferSize, 0, sizeof(vertexBufferSize)); + memset(m_vertexBufferSize, 0, sizeof(m_vertexBufferSize)); + m_constantsBufferOffset = 0; + m_constantsBufferIndex = 0; } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 5bcc64c487..e987143e40 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -43,17 +43,21 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value); class D3D12GSRender : public GSRender { private: - size_t vertexBufferSize[32]; + size_t m_vertexBufferSize[32]; std::vector m_vdata; // std::vector m_post_draw_objs; PipelineStateObjectCache m_cachePSO; ID3D12PipelineState *m_PSO; + ID3D12RootSignature *m_rootSignature; // GLTexture m_gl_textures[m_textures_count]; // GLTexture m_gl_vertex_textures[m_textures_count]; ID3D12Resource *m_indexBuffer, *m_vertexBuffer[m_vertex_count]; + ID3D12Resource *m_constantsBuffer; + ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; + size_t m_constantsBufferOffset, m_constantsBufferIndex; std::vector m_IASet; D3D12RenderTargetSets *m_fbo; ID3D12Device* m_device; @@ -80,9 +84,9 @@ private: bool LoadProgram(); void EnableVertexData(bool indexed_draw = false); + void FillVertexShaderConstantsBuffer(); + void FillPixelShaderConstantsBuffer(); /*void DisableVertexData(); - void InitVertexData(); - void InitFragmentData(); void WriteBuffers(); void WriteDepthBuffer(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 3fa7ed3901..613a65fad6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -90,7 +90,7 @@ void PipelineStateObjectCache::Add(ID3D12PipelineState *prog, Shader& fp, Shader cachePSO.insert(std::make_pair(key, prog)); } -ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const std::vector &IASet) +ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Device *device, ID3D12RootSignature *rootSignature, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const std::vector &IASet) { ID3D12PipelineState *result = nullptr; Shader m_vertex_prog, m_fragment_prog; @@ -163,10 +163,18 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; - graphicPipelineStateDesc.VS.BytecodeLength = m_vertex_prog.bytecode->GetBufferSize(); - graphicPipelineStateDesc.VS.pShaderBytecode = m_vertex_prog.bytecode->GetBufferPointer(); - graphicPipelineStateDesc.PS.BytecodeLength = m_fragment_prog.bytecode->GetBufferSize(); - graphicPipelineStateDesc.PS.pShaderBytecode = m_fragment_prog.bytecode->GetBufferPointer(); + if (m_vertex_prog.bytecode != nullptr) + { + graphicPipelineStateDesc.VS.BytecodeLength = m_vertex_prog.bytecode->GetBufferSize(); + graphicPipelineStateDesc.VS.pShaderBytecode = m_vertex_prog.bytecode->GetBufferPointer(); + } + if (m_fragment_prog.bytecode != nullptr) + { + graphicPipelineStateDesc.PS.BytecodeLength = m_fragment_prog.bytecode->GetBufferSize(); + graphicPipelineStateDesc.PS.pShaderBytecode = m_fragment_prog.bytecode->GetBufferPointer(); + } + + graphicPipelineStateDesc.pRootSignature = rootSignature; // Sensible default value static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = @@ -240,22 +248,44 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev return result; } +#define TO_STRING(x) #x + void Shader::Compile(SHADER_TYPE st) { - static const char VSstring[] = - "#define RS \"RootFlags( ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)\"\n" - "[RootSignature(RS)]\n" - "float4 main(float4 pos : TEXCOORD0) : SV_POSITION\n" - "{\n" - " return float4(pos.x, pos.y, 0., 1.);\n" - "}"; - static const char FSstring[] = - "#define RS \"RootFlags( ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)\"\n" - "[RootSignature(RS)]\n" - "float4 main() : SV_TARGET" - "{" - "return float4(1.0f, 1.0f, 1.0f, 1.0f);" - "}"; + static const char VSstring[] = TO_STRING( + cbuffer CONSTANT : register(b0) + { + float4x4 scaleOffsetMat; + }; + + struct vertex { + float4 pos : TEXCOORD0; + float4 color : TEXCOORD3; + }; + + struct pixel { + float4 pos : SV_POSITION; + float4 color : TEXCOORD0; + }; + + pixel main(vertex In) + { + pixel Out; + Out.pos = mul(float4(In.pos.x, In.pos.y, 0., 1.), scaleOffsetMat); + Out.color = In.color; + return Out; + }); + + static const char FSstring[] = TO_STRING( + struct pixel { + float4 pos : SV_POSITION; + float4 color : TEXCOORD0; + }; + float4 main(pixel In) : SV_TARGET + { + return In.color; + }); + HRESULT hr; Microsoft::WRL::ComPtr errorBlob; switch (st) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index bddf433027..f8fa6a5ea2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -161,7 +161,7 @@ public: PipelineStateObjectCache(); ~PipelineStateObjectCache(); // Note: the last param is not taken into account if the PSO is not regenerated - ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const std::vector &IASet); + ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, ID3D12RootSignature *rootSignature, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const std::vector &IASet); }; From 28b964aeb023735cb5f06e5bbe8b32c883d9ea0e Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 17:00:25 +0200 Subject: [PATCH 026/343] d3d12: Fix Input Layout --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index ae3907e37c..30be9c4e11 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -101,6 +101,7 @@ std::vector getIALayout(ID3D12Device *device, bool ind rFile dump("VertexDataArray.dump", rFile::write); #endif + size_t inputSlot = 0; for (u32 i = 0; i < g_vertexCount; ++i) { if (!vertexData[i].IsEnabled()) continue; @@ -210,8 +211,9 @@ std::vector getIALayout(ID3D12Device *device, bool ind { IAElement.SemanticName = "TEXCOORD"; IAElement.SemanticIndex = i; - IAElement.InputSlot = i; + IAElement.InputSlot = inputSlot; IAElement.Format = getFormat(vertexData[i].type - 1, vertexData[i].size); + inputSlot++; } result.push_back(IAElement); } From 1c2849a736a5a3389f8d885b8a423884a35256f1 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 17:01:15 +0200 Subject: [PATCH 027/343] d3d12: Remove useless class member --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 - 1 file changed, 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index e987143e40..80d8b0ea5d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -44,7 +44,6 @@ class D3D12GSRender : public GSRender { private: size_t m_vertexBufferSize[32]; - std::vector m_vdata; // std::vector m_post_draw_objs; PipelineStateObjectCache m_cachePSO; From d627f9cb83beadfab4c30f1cbcc511898cff5eaa Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 17:04:44 +0200 Subject: [PATCH 028/343] d3d12: Stop emulation if program compilation fails --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 7aa469dc93..fcfaec85db 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -409,7 +409,7 @@ bool D3D12GSRender::LoadProgram() } m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_rootSignature, m_cur_vertex_prog, m_cur_fragment_prog, m_IASet); - return true; + return m_PSO != nullptr; } void D3D12GSRender::ExecCMD() From 411265d83a774da6ea68d8f6b04d5403da015743 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 17:20:43 +0200 Subject: [PATCH 029/343] d3d12: Use constant buffer content --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 14 +++++++++----- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 9 ++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index fcfaec85db..dadd5c8051 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -345,24 +345,28 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() memcpy((char*)constantsBufferMap + m_constantsBufferOffset + currentoffset, scaleOffsetMat, 16 * sizeof(float)); currentoffset += 16 * sizeof(float); + size_t bufferSize = currentoffset; + for (const RSXTransformConstant& c : m_transform_constants) { + size_t offset = c.id * 4 * sizeof(float) + currentoffset; float vector[] = { c.x, c.y, c.z, c.w }; - memcpy((char*)constantsBufferMap + m_constantsBufferOffset + currentoffset, vector, 4 * sizeof(float)); - currentoffset += 4 * sizeof(float); + memcpy((char*)constantsBufferMap + m_constantsBufferOffset + offset, vector, 4 * sizeof(float)); + size_t bufferSizeCandidate = offset + 4 * sizeof(float); + bufferSize = bufferSizeCandidate > bufferSize ? bufferSizeCandidate : bufferSize; } m_constantsBuffer->Unmap(0, &range); // Align to 256 byte - currentoffset = (currentoffset + 255) & ~255; + bufferSize = (bufferSize + 255) & ~255; D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = m_constantsBuffer->GetGPUVirtualAddress() + m_constantsBufferOffset; - constantBufferViewDesc.SizeInBytes = (UINT)currentoffset; + constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - m_constantsBufferOffset += currentoffset; + m_constantsBufferOffset += bufferSize; } void D3D12GSRender::FillPixelShaderConstantsBuffer() diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 613a65fad6..66032a0dc7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -256,6 +256,7 @@ void Shader::Compile(SHADER_TYPE st) cbuffer CONSTANT : register(b0) { float4x4 scaleOffsetMat; + float4 vc[468]; }; struct vertex { @@ -271,7 +272,13 @@ void Shader::Compile(SHADER_TYPE st) pixel main(vertex In) { pixel Out; - Out.pos = mul(float4(In.pos.x, In.pos.y, 0., 1.), scaleOffsetMat); + float4 pos = In.pos; + pos.w = dot(pos, vc[259]); + pos.z = dot(pos, vc[258]); + pos.y = dot(pos, vc[257]); + pos.x = dot(pos, vc[256]); + pos.z = 0; + Out.pos = mul(pos, scaleOffsetMat); Out.color = In.color; return Out; }); From 40e19e0c9513405c688f2c488ad6a0aaf857ac50 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 17:48:02 +0200 Subject: [PATCH 030/343] d3d12: Something strange is happening to depth... --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 66032a0dc7..568c394945 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -277,7 +277,7 @@ void Shader::Compile(SHADER_TYPE st) pos.z = dot(pos, vc[258]); pos.y = dot(pos, vc[257]); pos.x = dot(pos, vc[256]); - pos.z = 0; + pos.z = -pos.z; Out.pos = mul(pos, scaleOffsetMat); Out.color = In.color; return Out; From 07ea311b3a125dbbe781bbed9f2d392ca15ce534 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 18:39:34 +0200 Subject: [PATCH 031/343] d3d12: Separate scaleOffset and classic constants --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 111 ++++++++++++++++++-------- 1 file changed, 79 insertions(+), 32 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index dadd5c8051..87d6b5fb56 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -23,6 +23,7 @@ D3D12GSRender::D3D12GSRender() memset(m_vertexBufferSize, 0, sizeof(m_vertexBufferSize)); m_constantsBufferOffset = 0; m_constantsBufferIndex = 0; + m_currentScaleOffsetBufferIndex = 0; // Enable d3d debug layer Microsoft::WRL::ComPtr debugInterface; D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); @@ -113,24 +114,51 @@ D3D12GSRender::D3D12GSRender() descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; check(m_device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_constantsBufferDescriptorsHeap))); + // Scale offset buffer + // Separate constant buffer + check(m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_scaleOffsetBuffer) + )); + descriptorHeapDesc = {}; + descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + descriptorHeapDesc.NumDescriptors = 1000; // For safety + descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + check(m_device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_scaleOffsetDescriptorHeap))); + + // Common root signature - D3D12_DESCRIPTOR_RANGE descriptorRange = {}; - descriptorRange.BaseShaderRegister = 0; - descriptorRange.NumDescriptors = 1; - descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - D3D12_ROOT_PARAMETER RP = {}; - RP.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - RP.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - RP.DescriptorTable.pDescriptorRanges = &descriptorRange; - RP.DescriptorTable.NumDescriptorRanges = 1; + D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {}; + // Scale Offset data + descriptorRange[0].BaseShaderRegister = 0; + descriptorRange[0].NumDescriptors = 1; + descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + // Constants + descriptorRange[1].BaseShaderRegister = 1; + descriptorRange[1].NumDescriptors = 1; + descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + D3D12_ROOT_PARAMETER RP[2] = {}; + RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; + RP[0].DescriptorTable.NumDescriptorRanges = 1; + RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1]; + RP[1].DescriptorTable.NumDescriptorRanges = 1; D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - rootSignatureDesc.NumParameters = 1; - rootSignatureDesc.pParameters = &RP; + rootSignatureDesc.NumParameters = 2; + rootSignatureDesc.pParameters = RP; Microsoft::WRL::ComPtr rootSignatureBlob; - check(D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, nullptr)); + Microsoft::WRL::ComPtr errorBlob; + check(D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); m_device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), @@ -311,7 +339,7 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) } } -void D3D12GSRender::FillVertexShaderConstantsBuffer() +void D3D12GSRender::setScaleOffset() { float scaleOffsetMat[16] = { @@ -320,14 +348,6 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f }; - size_t currentoffset = 0; - void *constantsBufferMap; - // TODO: Use finer range - D3D12_RANGE range = { - m_constantsBufferOffset, - 1024 * 1024 - m_constantsBufferOffset - }; - check(m_constantsBuffer->Map(0, &range, &constantsBufferMap)); // Scale scaleOffsetMat[0] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 0)] / (RSXThread::m_width / RSXThread::m_width_scale); @@ -342,31 +362,49 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; - memcpy((char*)constantsBufferMap + m_constantsBufferOffset + currentoffset, scaleOffsetMat, 16 * sizeof(float)); - currentoffset += 16 * sizeof(float); + void *scaleOffsetMap; + size_t offset = m_currentScaleOffsetBufferIndex * 256; + D3D12_RANGE range = { + offset, + 1024 * 1024 - offset + }; + check(m_scaleOffsetBuffer->Map(0, &range, &scaleOffsetMap)); + memcpy((char*)scaleOffsetMap + offset, scaleOffsetMat, 16 * sizeof(float)); + m_scaleOffsetBuffer->Unmap(0, &range); - size_t bufferSize = currentoffset; + D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; + constantBufferViewDesc.BufferLocation = m_scaleOffsetBuffer->GetGPUVirtualAddress() + offset; + constantBufferViewDesc.SizeInBytes = (UINT)256; + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); +} + +void D3D12GSRender::FillVertexShaderConstantsBuffer() +{ + size_t bufferSize = 0; + + void *constantsBufferMap; + check(m_constantsBuffer->Map(0, nullptr, &constantsBufferMap)); for (const RSXTransformConstant& c : m_transform_constants) { - size_t offset = c.id * 4 * sizeof(float) + currentoffset; + size_t offset = c.id * 4 * sizeof(float); float vector[] = { c.x, c.y, c.z, c.w }; - memcpy((char*)constantsBufferMap + m_constantsBufferOffset + offset, vector, 4 * sizeof(float)); + memcpy((char*)constantsBufferMap + offset, vector, 4 * sizeof(float)); size_t bufferSizeCandidate = offset + 4 * sizeof(float); bufferSize = bufferSizeCandidate > bufferSize ? bufferSizeCandidate : bufferSize; } - m_constantsBuffer->Unmap(0, &range); + m_constantsBuffer->Unmap(0, nullptr); // Align to 256 byte bufferSize = (bufferSize + 255) & ~255; - D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = m_constantsBuffer->GetGPUVirtualAddress() + m_constantsBufferOffset; + constantBufferViewDesc.BufferLocation = m_constantsBuffer->GetGPUVirtualAddress(); constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - m_constantsBufferOffset += bufferSize; } void D3D12GSRender::FillPixelShaderConstantsBuffer() @@ -447,11 +485,19 @@ void D3D12GSRender::ExecCMD() assert((m_draw_array_first + m_draw_array_count) * item_size <= m_vertexBufferSize[i]); } commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); + + setScaleOffset(); + commandList->SetDescriptorHeaps(1, &m_scaleOffsetDescriptorHeap); + D3D12_GPU_DESCRIPTOR_HANDLE Handle = m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetGraphicsRootDescriptorTable(0, Handle); + m_currentScaleOffsetBufferIndex++; + FillVertexShaderConstantsBuffer(); commandList->SetDescriptorHeaps(1, &m_constantsBufferDescriptorsHeap); - D3D12_GPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + Handle = m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetGraphicsRootDescriptorTable(0, Handle); + commandList->SetGraphicsRootDescriptorTable(1, Handle); m_constantsBufferIndex++; FillPixelShaderConstantsBuffer(); @@ -894,5 +940,6 @@ void D3D12GSRender::Flip() memset(m_vertexBufferSize, 0, sizeof(m_vertexBufferSize)); m_constantsBufferOffset = 0; m_constantsBufferIndex = 0; + m_currentScaleOffsetBufferIndex = 0; } #endif \ No newline at end of file From b016fbc9e4f64952e1d1126c8e6f435b669d65cf Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 18:55:18 +0200 Subject: [PATCH 032/343] d3d12: Add missing hunk + properly clean our D3D allocations --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 9 +++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 6 ++++ rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 35 +++++++++++-------- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 11 +++--- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 11 +++--- 5 files changed, 49 insertions(+), 23 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 87d6b5fb56..a80791dc5c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -174,6 +174,15 @@ D3D12GSRender::~D3D12GSRender() m_commandQueueCopy->Release(); m_backbufferAsRendertarget[0]->Release(); m_backbufferAsRendertarget[1]->Release(); + m_constantsBufferDescriptorsHeap->Release(); + m_scaleOffsetDescriptorHeap->Release(); + m_constantsBuffer->Release(); + m_scaleOffsetBuffer->Release(); + for (unsigned i = 0; i < 32; i++) + m_vertexBuffer[i]->Release(); + if (m_fbo) + delete m_fbo; + m_rootSignature->Release(); m_backBuffer[0]->Release(); m_backBuffer[1]->Release(); m_swapChain->Release(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 80d8b0ea5d..0d091451a7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -57,6 +57,11 @@ private: ID3D12Resource *m_constantsBuffer; ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; size_t m_constantsBufferOffset, m_constantsBufferIndex; + + ID3D12Resource *m_scaleOffsetBuffer; + ID3D12DescriptorHeap *m_scaleOffsetDescriptorHeap; + size_t m_currentScaleOffsetBufferIndex; + std::vector m_IASet; D3D12RenderTargetSets *m_fbo; ID3D12Device* m_device; @@ -83,6 +88,7 @@ private: bool LoadProgram(); void EnableVertexData(bool indexed_draw = false); + void setScaleOffset(); void FillVertexShaderConstantsBuffer(); void FillPixelShaderConstantsBuffer(); /*void DisableVertexData(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 568c394945..601f06fa45 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -27,16 +27,19 @@ size_t getFPBinarySize(void *ptr) } -PipelineStateObjectCache::PipelineStateObjectCache() : currentShaderId(0) +PipelineStateObjectCache::PipelineStateObjectCache() : m_currentShaderId(0) {} PipelineStateObjectCache::~PipelineStateObjectCache() -{} +{ + for (auto pair : m_cachePSO) + pair.second->Release(); +} bool PipelineStateObjectCache::SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) { - binary2FS::const_iterator It = cacheFS.find(vm::get_ptr(rsx_fp.addr)); - if (It != cacheFS.end()) + binary2FS::const_iterator It = m_cacheFS.find(vm::get_ptr(rsx_fp.addr)); + if (It != m_cacheFS.end()) { shader = It->second; return true; @@ -46,8 +49,8 @@ bool PipelineStateObjectCache::SearchFp(const RSXFragmentProgram& rsx_fp, Shader bool PipelineStateObjectCache::SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader) { - binary2VS::const_iterator It = cacheVS.find((void*)rsx_vp.data.data()); - if (It != cacheVS.end()) + binary2VS::const_iterator It = m_cacheVS.find((void*)rsx_vp.data.data()); + if (It != m_cacheVS.end()) { shader = It->second; return true; @@ -59,8 +62,8 @@ ID3D12PipelineState *PipelineStateObjectCache::GetProg(u32 fp, u32 vp) const { u64 vpLong = vp; u64 key = vpLong << 32 | fp; - std::unordered_map::const_iterator It = cachePSO.find(key); - if (It == cachePSO.end()) + std::unordered_map::const_iterator It = m_cachePSO.find(key); + if (It == m_cachePSO.end()) return nullptr; return It->second; } @@ -70,8 +73,8 @@ void PipelineStateObjectCache::AddVertexProgram(Shader& vp, RSXVertexProgram& rs size_t actualVPSize = rsx_vp.data.size() * 4; void *fpShadowCopy = malloc(actualVPSize); memcpy(fpShadowCopy, rsx_vp.data.data(), actualVPSize); - vp.Id = (u32)currentShaderId++; - cacheVS.insert(std::make_pair(fpShadowCopy, vp)); + vp.Id = (u32)m_currentShaderId++; + m_cacheVS.insert(std::make_pair(fpShadowCopy, vp)); } void PipelineStateObjectCache::AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp) @@ -79,15 +82,15 @@ void PipelineStateObjectCache::AddFragmentProgram(Shader& fp, RSXFragmentProgram size_t actualFPSize = getFPBinarySize(vm::get_ptr(rsx_fp.addr)); void *fpShadowCopy = malloc(actualFPSize); memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); - fp.Id = (u32)currentShaderId++; - cacheFS.insert(std::make_pair(fpShadowCopy, fp)); + fp.Id = (u32)m_currentShaderId++; + m_cacheFS.insert(std::make_pair(fpShadowCopy, fp)); } void PipelineStateObjectCache::Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp) { u64 vpLong = vp.Id; u64 key = vpLong << 32 | fp.Id; - cachePSO.insert(std::make_pair(key, prog)); + m_cachePSO.insert(std::make_pair(key, prog)); } ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Device *device, ID3D12RootSignature *rootSignature, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const std::vector &IASet) @@ -253,9 +256,13 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev void Shader::Compile(SHADER_TYPE st) { static const char VSstring[] = TO_STRING( - cbuffer CONSTANT : register(b0) + cbuffer SCALE_OFFSET : register(b0) { float4x4 scaleOffsetMat; + }; + + cbuffer CONSTANT : register(b1) + { float4 vc[468]; }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index f8fa6a5ea2..f3fb266d4b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -4,6 +4,7 @@ #include #include "Emu/RSX/RSXFragmentProgram.h" #include "Emu/RSX/RSXVertexProgram.h" +#include enum class SHADER_TYPE @@ -22,7 +23,7 @@ public: ~Shader() {} u32 Id; - ID3DBlob *bytecode; + Microsoft::WRL::ComPtr bytecode; /** * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. @@ -145,11 +146,11 @@ typedef std::unordered_map cachePSO; + std::unordered_map m_cachePSO; bool SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader); bool SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index e6c34301cf..f991353060 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -11,7 +11,6 @@ D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDepthFormat, size_t width, size_t height) { - D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; descriptorHeapDesc.NumDescriptors = 1; descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; @@ -80,8 +79,8 @@ D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDep D3D12_RESOURCE_DESC resourceDesc = {}; resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - resourceDesc.Width = width; - resourceDesc.Height = height; + resourceDesc.Width = (UINT)width; + resourceDesc.Height = (UINT)height; resourceDesc.DepthOrArraySize = 1; resourceDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; resourceDesc.SampleDesc.Count = 1; @@ -118,7 +117,11 @@ D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDep D3D12RenderTargetSets::~D3D12RenderTargetSets() { - + for (unsigned i = 0; i < 4; i++) + m_rtts[i]->Release(); + m_rttDescriptorHeap->Release(); + m_depthStencilTexture->Release(); + m_depthStencilDescriptorHeap->Release(); } D3D12_CPU_DESCRIPTOR_HANDLE D3D12RenderTargetSets::getRTTCPUHandle(u8 baseFBO) const From da5b047c5821892cdd1f9c54873f75f6ebcb0349 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 19:08:47 +0200 Subject: [PATCH 033/343] d3d12: Change some variable name to better reflect their behavior --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 14 ++++++-------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a80791dc5c..15066d83d8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -21,7 +21,7 @@ D3D12GSRender::D3D12GSRender() : GSRender(), m_fbo(nullptr), m_PSO(nullptr) { memset(m_vertexBufferSize, 0, sizeof(m_vertexBufferSize)); - m_constantsBufferOffset = 0; + m_constantsBufferSize = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; // Enable d3d debug layer @@ -391,8 +391,6 @@ void D3D12GSRender::setScaleOffset() void D3D12GSRender::FillVertexShaderConstantsBuffer() { - size_t bufferSize = 0; - void *constantsBufferMap; check(m_constantsBuffer->Map(0, nullptr, &constantsBufferMap)); @@ -402,15 +400,15 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() float vector[] = { c.x, c.y, c.z, c.w }; memcpy((char*)constantsBufferMap + offset, vector, 4 * sizeof(float)); size_t bufferSizeCandidate = offset + 4 * sizeof(float); - bufferSize = bufferSizeCandidate > bufferSize ? bufferSizeCandidate : bufferSize; + m_constantsBufferSize = bufferSizeCandidate > m_constantsBufferSize ? bufferSizeCandidate : m_constantsBufferSize; } m_constantsBuffer->Unmap(0, nullptr); - // Align to 256 byte - bufferSize = (bufferSize + 255) & ~255; + // make it multiple of 256 bytes + m_constantsBufferSize = (m_constantsBufferSize + 255) & ~255; D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = m_constantsBuffer->GetGPUVirtualAddress(); - constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; + constantBufferViewDesc.SizeInBytes = (UINT)m_constantsBufferSize; D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); @@ -947,7 +945,7 @@ void D3D12GSRender::Flip() gfxCommandList->Release(); m_inflightCommandList.clear(); memset(m_vertexBufferSize, 0, sizeof(m_vertexBufferSize)); - m_constantsBufferOffset = 0; + m_constantsBufferSize = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 0d091451a7..d400a0d4d9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -56,7 +56,7 @@ private: ID3D12Resource *m_indexBuffer, *m_vertexBuffer[m_vertex_count]; ID3D12Resource *m_constantsBuffer; ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; - size_t m_constantsBufferOffset, m_constantsBufferIndex; + size_t m_constantsBufferSize, m_constantsBufferIndex; ID3D12Resource *m_scaleOffsetBuffer; ID3D12DescriptorHeap *m_scaleOffsetDescriptorHeap; From 75219be066868d8413a4dee68db114299a5dc857 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 22:24:53 +0200 Subject: [PATCH 034/343] d3d12: cache PSO State too --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 5 ++- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 28 +++++++------ rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 47 +++++++++++++++++++--- 3 files changed, 62 insertions(+), 18 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 15066d83d8..15a4106367 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -457,7 +457,10 @@ bool D3D12GSRender::LoadProgram() return false; } - m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_rootSignature, m_cur_vertex_prog, m_cur_fragment_prog, m_IASet); + PipelineProperties prop = {}; + prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_rootSignature, m_cur_vertex_prog, m_cur_fragment_prog, prop, m_IASet); return m_PSO != nullptr; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 601f06fa45..c9eb3ece15 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -58,11 +58,9 @@ bool PipelineStateObjectCache::SearchVp(const RSXVertexProgram& rsx_vp, Shader& return false; } -ID3D12PipelineState *PipelineStateObjectCache::GetProg(u32 fp, u32 vp) const +ID3D12PipelineState *PipelineStateObjectCache::GetProg(const PSOKey &key) const { - u64 vpLong = vp; - u64 key = vpLong << 32 | fp; - std::unordered_map::const_iterator It = m_cachePSO.find(key); + std::unordered_map::const_iterator It = m_cachePSO.find(key); if (It == m_cachePSO.end()) return nullptr; return It->second; @@ -86,14 +84,18 @@ void PipelineStateObjectCache::AddFragmentProgram(Shader& fp, RSXFragmentProgram m_cacheFS.insert(std::make_pair(fpShadowCopy, fp)); } -void PipelineStateObjectCache::Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp) +void PipelineStateObjectCache::Add(ID3D12PipelineState *prog, const PSOKey& PSOKey) { - u64 vpLong = vp.Id; - u64 key = vpLong << 32 | fp.Id; - m_cachePSO.insert(std::make_pair(key, prog)); + m_cachePSO.insert(std::make_pair(PSOKey, prog)); } -ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Device *device, ID3D12RootSignature *rootSignature, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const std::vector &IASet) +ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( + ID3D12Device *device, + ID3D12RootSignature *rootSignature, + RSXVertexProgram *vertexShader, + RSXFragmentProgram *fragmentShader, + const PipelineProperties &pipelineProperties, + const std::vector &IASet) { ID3D12PipelineState *result = nullptr; Shader m_vertex_prog, m_fragment_prog; @@ -123,7 +125,9 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev } if (m_fp_buf_num && m_vp_buf_num) - result = GetProg(m_fragment_prog.Id, m_vertex_prog.Id); + { + result = GetProg({ m_vertex_prog.Id, m_fragment_prog.Id, pipelineProperties }); + } if (result != nullptr) { @@ -221,7 +225,7 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev graphicPipelineStateDesc.BlendState = CD3D12_BLEND_DESC; graphicPipelineStateDesc.DepthStencilState = CD3D12_DEPTH_STENCIL_DESC; graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; - graphicPipelineStateDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + graphicPipelineStateDesc.PrimitiveTopologyType = pipelineProperties.Topology; graphicPipelineStateDesc.NumRenderTargets = 1; graphicPipelineStateDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; @@ -234,7 +238,7 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState(ID3D12Dev graphicPipelineStateDesc.NodeMask = 1; device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result)); - Add(result, m_fragment_prog, m_vertex_prog); + Add(result, {m_vertex_prog.Id, m_fragment_prog.Id, pipelineProperties }); // RSX Debugger /*if (Ini.GSLogPrograms.GetValue()) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index f3fb266d4b..2e8f92fc35 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -13,6 +13,11 @@ enum class SHADER_TYPE SHADER_TYPE_FRAGMENT }; +struct PipelineProperties +{ + D3D12_PRIMITIVE_TOPOLOGY_TYPE Topology; +}; + /** Storage for a shader * Embeds the D3DBlob corresponding to */ @@ -139,6 +144,31 @@ struct FragmentProgramCompare typedef std::unordered_map binary2VS; typedef std::unordered_map binary2FS; +struct PSOKey +{ + u32 vpIdx; + u32 fpIdx; + PipelineProperties properties; +}; + +struct PSOKeyHash +{ + size_t operator()(const PSOKey &key) const + { + size_t hashValue = 0; + hashValue ^= std::hash()(key.vpIdx); + return hashValue; + } +}; + +struct PSOKeyCompare +{ + size_t operator()(const PSOKey &key1, const PSOKey &key2) const + { + return (key1.vpIdx == key2.vpIdx) && (key1.fpIdx == key2.fpIdx) && (key1.properties.Topology == key2.properties.Topology); + } +}; + /** * Cache for shader blobs and Pipeline state object * The class is responsible for creating the object so the state only has to call getGraphicPipelineState @@ -149,20 +179,27 @@ private: size_t m_currentShaderId; binary2VS m_cacheVS; binary2FS m_cacheFS; - // Key is vertex << 32 | fragment ids - std::unordered_map m_cachePSO; + + std::unordered_map m_cachePSO; bool SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader); bool SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader); - ID3D12PipelineState *GetProg(u32 fp, u32 vp) const; + ID3D12PipelineState *GetProg(const PSOKey &psoKey) const; void AddVertexProgram(Shader& vp, RSXVertexProgram& rsx_vp); void AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp); - void Add(ID3D12PipelineState *prog, Shader& fp, Shader& vp); + void Add(ID3D12PipelineState *prog, const PSOKey& PSOKey); public: PipelineStateObjectCache(); ~PipelineStateObjectCache(); // Note: the last param is not taken into account if the PSO is not regenerated - ID3D12PipelineState *getGraphicPipelineState(ID3D12Device *device, ID3D12RootSignature *rootSignature, RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const std::vector &IASet); + ID3D12PipelineState *getGraphicPipelineState( + ID3D12Device *device, + ID3D12RootSignature *rootSignature, + RSXVertexProgram *vertexShader, + RSXFragmentProgram *fragmentShader, + const PipelineProperties &pipelineProperties, + const std::vector &IASet + ); }; From d93e95b614510ea6b290c86f090e5671a130975e Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 13 May 2015 22:33:08 +0200 Subject: [PATCH 035/343] d3d12: Add support for various primitives --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 74 ++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 15a4106367..d1c3128946 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -458,7 +458,38 @@ bool D3D12GSRender::LoadProgram() } PipelineProperties prop = {}; - prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + /* + #define GL_POINTS 0x0000 + #define GL_LINES 0x0001 + #define GL_LINE_LOOP 0x0002 + #define GL_LINE_STRIP 0x0003 + #define GL_TRIANGLES 0x0004 + #define GL_TRIANGLE_STRIP 0x0005 + #define GL_TRIANGLE_FAN 0x0006 + #define GL_QUADS 0x0007 + #define GL_QUAD_STRIP 0x0008 + #define GL_POLYGON 0x0009 + */ + switch (m_draw_mode - 1) + { + case 0: + prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + break; + case 1: + case 2: + case 3: + prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + break; + case 4: + case 5: + case 6: + prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + break; + default: +// LOG_ERROR(RSX, "Unsupported primitive type"); + prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + break; + } m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_rootSignature, m_cur_vertex_prog, m_cur_fragment_prog, prop, m_IASet); return m_PSO != nullptr; @@ -560,7 +591,46 @@ void D3D12GSRender::ExecCMD() }; commandList->RSSetScissorRects(1, &box); - commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + /* + #define GL_POINTS 0x0000 + #define GL_LINES 0x0001 + #define GL_LINE_LOOP 0x0002 + #define GL_LINE_STRIP 0x0003 + #define GL_TRIANGLES 0x0004 + #define GL_TRIANGLE_STRIP 0x0005 + #define GL_TRIANGLE_FAN 0x0006 + #define GL_QUADS 0x0007 + #define GL_QUAD_STRIP 0x0008 + #define GL_POLYGON 0x0009 + */ + switch (m_draw_mode - 1) + { + case 0: + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_POINTLIST); + break; + case 1: + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_LINELIST); + break; + case 2: + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ); + break; + case 3: + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_LINESTRIP); + break; + case 4: + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + break; + case 5: + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + break; + case 6: + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ); + break; + default: + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ); +// LOG_ERROR(RSX, "Unsupported primitive type"); + break; + } if (m_indexed_array.m_count) { From 87d51665dc684b7895943f1ad525476afe695587 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 17:16:48 +0200 Subject: [PATCH 036/343] d3d12: Fix some warning --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 4 ++-- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 30be9c4e11..a8a1e2deba 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -94,7 +94,7 @@ std::vector getIALayout(ID3D12Device *device, bool ind { if (!vertexData[i].IsEnabled()) continue; const size_t item_size = vertexData[i].GetTypeSize() * vertexData[i].size; - offset_list[i] = item_size; + offset_list[i] = (u32)item_size; } #if DUMP_VERTEX_DATA @@ -211,7 +211,7 @@ std::vector getIALayout(ID3D12Device *device, bool ind { IAElement.SemanticName = "TEXCOORD"; IAElement.SemanticIndex = i; - IAElement.InputSlot = inputSlot; + IAElement.InputSlot = (UINT)inputSlot; IAElement.Format = getFormat(vertexData[i].type - 1, vertexData[i].size); inputSlot++; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index c9eb3ece15..945154ba00 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -187,7 +187,7 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = { D3D12_FILL_MODE_SOLID, - D3D12_CULL_MODE_BACK, + D3D12_CULL_MODE_NONE, FALSE, D3D12_DEFAULT_DEPTH_BIAS, D3D12_DEFAULT_DEPTH_BIAS_CLAMP, From 3853dffce235b8061847c74734c1fbcd082fbba6 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 17:56:24 +0200 Subject: [PATCH 037/343] d3d12: Start working on Vertex program decompilation --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 7 +- .../Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 782 ++++++++++++++++++ rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h | 296 +++++++ rpcs3/emucore.vcxproj.filters | 6 + 4 files changed, 1089 insertions(+), 2 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 945154ba00..d54dc1af49 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -8,6 +8,8 @@ #include #include #include +#include "VertexProgramDecompiler.h" +#include "Utilities/File.h" #pragma comment (lib, "d3dcompiler.lib") @@ -116,12 +118,13 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( if (!m_vp_buf_num) { LOG_WARNING(RSX, "VP not found in buffer!"); - // m_vertex_prog.Decompile(*vertexShader); + VertexDecompiler VS(vertexShader->data); + VS.Decompile(); m_vertex_prog.Compile(SHADER_TYPE::SHADER_TYPE_VERTEX); AddVertexProgram(m_vertex_prog, *vertexShader); // TODO: This shouldn't use current dir -// fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(m_vertex_prog.shader.c_str(), m_vertex_prog.shader.size()); + fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(VS.m_shader.c_str(), VS.m_shader.size()); } if (m_fp_buf_num && m_vp_buf_num) diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp new file mode 100644 index 0000000000..dd0056cdb7 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -0,0 +1,782 @@ +#include "stdafx.h" +#if defined(DX12_SUPPORT) +#include "VertexProgramDecompiler.h" + +#include "Utilities/Log.h" +#include "Emu/System.h" + + +std::string VertexDecompiler::GetMask(bool is_sca) +{ + std::string ret; + + if (is_sca) + { + if (d3.sca_writemask_x) ret += "x"; + if (d3.sca_writemask_y) ret += "y"; + if (d3.sca_writemask_z) ret += "z"; + if (d3.sca_writemask_w) ret += "w"; + } + else + { + if (d3.vec_writemask_x) ret += "x"; + if (d3.vec_writemask_y) ret += "y"; + if (d3.vec_writemask_z) ret += "z"; + if (d3.vec_writemask_w) ret += "w"; + } + + return ret.empty() || ret == "xyzw" ? "" : ("." + ret); +} + +std::string VertexDecompiler::GetVecMask() +{ + return GetMask(false); +} + +std::string VertexDecompiler::GetScaMask() +{ + return GetMask(true); +} + +std::string VertexDecompiler::GetDST(bool isSca) +{ + std::string ret; + + switch (isSca ? 0x1f : d3.dst) + { + case 0x1f: + ret += m_parr.AddParam(PARAM_NONE, "vec4", std::string("tmp") + std::to_string(isSca ? d3.sca_dst_tmp : d0.dst_tmp)); + break; + + default: + if (d3.dst > 15) + LOG_ERROR(RSX, fmt::Format("dst index out of range: %u", d3.dst)); + ret += m_parr.AddParam(PARAM_NONE, "vec4", std::string("dst_reg") + std::to_string(d3.dst), d3.dst == 0 ? "vec4(0.0f, 0.0f, 0.0f, 1.0f)" : "vec4(0.0)"); + break; + } + + return ret; +} + +std::string VertexDecompiler::GetSRC(const u32 n) +{ + static const std::string reg_table[] = + { + "in_pos", "in_weight", "in_normal", + "in_diff_color", "in_spec_color", + "in_fog", + "in_point_size", "in_7", + "in_tc0", "in_tc1", "in_tc2", "in_tc3", + "in_tc4", "in_tc5", "in_tc6", "in_tc7" + }; + + std::string ret; + + switch (src[n].reg_type) + { + case 1: //temp + ret += m_parr.AddParam(PARAM_NONE, "vec4", "tmp" + std::to_string(src[n].tmp_src)); + break; + case 2: //input + if (d1.input_src < (sizeof(reg_table) / sizeof(reg_table[0]))) + { + ret += m_parr.AddParam(PARAM_IN, "vec4", reg_table[d1.input_src], d1.input_src); + } + else + { + LOG_ERROR(RSX, "Bad input src num: %d", fmt::by_value(d1.input_src)); + ret += m_parr.AddParam(PARAM_IN, "vec4", "in_unk", d1.input_src); + } + break; + case 3: //const + m_parr.AddParam(PARAM_UNIFORM, "vec4", std::string("vc[468]")); + ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]"; + break; + + default: + LOG_ERROR(RSX, fmt::Format("Bad src%u reg type: %d", n, fmt::by_value(src[n].reg_type))); + Emu.Pause(); + break; + } + + static const std::string f = "xyzw"; + + std::string swizzle; + + swizzle += f[src[n].swz_x]; + swizzle += f[src[n].swz_y]; + swizzle += f[src[n].swz_z]; + swizzle += f[src[n].swz_w]; + + if (swizzle != f) ret += '.' + swizzle; + + bool abs; + + switch (n) + { + case 0: abs = d0.src0_abs; break; + case 1: abs = d0.src1_abs; break; + case 2: abs = d0.src2_abs; break; + } + + if (abs) ret = "abs(" + ret + ")"; + if (src[n].neg) ret = "-" + ret; + + return ret; +} + +void VertexDecompiler::SetDST(bool is_sca, std::string value) +{ + if (d0.cond == 0) return; + + enum + { + lt = 0x1, + eq = 0x2, + gt = 0x4, + }; + + std::string mask = GetMask(is_sca); + + value += mask; + + if (is_sca && d0.vec_result) + { + //value = "vec4(" + value + ")"; + } + + if (d0.staturate) + { + value = "clamp(" + value + ", 0.0, 1.0)"; + } + + std::string dest; + + if (d0.cond_update_enable_0 && d0.cond_update_enable_1) + { + dest = m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(d0.cond_reg_sel_1), "vec4(0.0)") + mask; + } + else if (d3.dst != 0x1f || (is_sca ? d3.sca_dst_tmp != 0x3f : d0.dst_tmp != 0x3f)) + { + dest = GetDST(is_sca) + mask; + } + + //std::string code; + //if (d0.cond_test_enable) + // code += "$ifcond "; + //code += dest + value; + //AddCode(code + ";"); + + AddCodeCond(Format(dest), value); +} + +std::string VertexDecompiler::GetFunc() +{ + std::string name = "func$a"; + + for (const auto& func : m_funcs) { + if (func.name.compare(name) == 0) { + return name + "()"; + } + } + + m_funcs.emplace_back(); + FuncInfo &idx = m_funcs.back(); + idx.offset = GetAddr(); + idx.name = name; + + return name + "()"; +} + +std::string VertexDecompiler::GetTex() +{ + return m_parr.AddParam(PARAM_UNIFORM, "sampler2D", std::string("vtex") + std::to_string(/*?.tex_num*/0)); +} + +std::string VertexDecompiler::Format(const std::string& code) +{ + const std::pair> repl_list[] = + { + { "$$", []() -> std::string { return "$"; } }, + { "$0", std::bind(std::mem_fn(&VertexDecompiler::GetSRC), this, 0) }, + { "$1", std::bind(std::mem_fn(&VertexDecompiler::GetSRC), this, 1) }, + { "$2", std::bind(std::mem_fn(&VertexDecompiler::GetSRC), this, 2) }, + { "$s", std::bind(std::mem_fn(&VertexDecompiler::GetSRC), this, 2) }, + { "$am", std::bind(std::mem_fn(&VertexDecompiler::AddAddrMask), this) }, + { "$a", std::bind(std::mem_fn(&VertexDecompiler::AddAddrReg), this) }, + + { "$t", std::bind(std::mem_fn(&VertexDecompiler::GetTex), this) }, + + { "$fa", [this]()->std::string { return std::to_string(GetAddr()); } }, + { "$f()", std::bind(std::mem_fn(&VertexDecompiler::GetFunc), this) }, + { "$ifcond ", [this]() -> std::string + { + const std::string& cond = GetCond(); + if (cond == "true") return ""; + return "if(" + cond + ") "; + } + }, + { "$cond", std::bind(std::mem_fn(&VertexDecompiler::GetCond), this) } + }; + + return fmt::replace_all(code, repl_list); +} + +std::string VertexDecompiler::GetCond() +{ + enum + { + lt = 0x1, + eq = 0x2, + gt = 0x4, + }; + + if (d0.cond == 0) return "false"; + if (d0.cond == (lt | gt | eq)) return "true"; + + static const char* cond_string_table[(lt | gt | eq) + 1] = + { + "error", + "lessThan", + "equal", + "lessThanEqual", + "greaterThan", + "notEqual", + "greaterThanEqual", + "error" + }; + + static const char f[4] = { 'x', 'y', 'z', 'w' }; + + std::string swizzle; + swizzle += f[d0.mask_x]; + swizzle += f[d0.mask_y]; + swizzle += f[d0.mask_z]; + swizzle += f[d0.mask_w]; + + swizzle = swizzle == "xyzw" ? "" : "." + swizzle; + + return fmt::Format("any(%s(cc%d%s, vec4(0.0)%s))", cond_string_table[d0.cond], d0.cond_reg_sel_1, swizzle.c_str(), swizzle.c_str()); +} + +void VertexDecompiler::AddCodeCond(const std::string& dst, const std::string& src) +{ + enum + { + lt = 0x1, + eq = 0x2, + gt = 0x4, + }; + + + if (!d0.cond_test_enable || d0.cond == (lt | gt | eq)) + { + AddCode(dst + " = " + src + ";"); + return; + } + + if (d0.cond == 0) + { + AddCode("//" + dst + " = " + src + ";"); + return; + } + + static const char* cond_string_table[(lt | gt | eq) + 1] = + { + "error", + "lessThan", + "equal", + "lessThanEqual", + "greaterThan", + "notEqual", + "greaterThanEqual", + "error" + }; + + static const char f[4] = { 'x', 'y', 'z', 'w' }; + + std::string swizzle; + swizzle += f[d0.mask_x]; + swizzle += f[d0.mask_y]; + swizzle += f[d0.mask_z]; + swizzle += f[d0.mask_w]; + + swizzle = swizzle == "xyzw" ? "" : "." + swizzle; + + std::string cond = fmt::Format("%s(cc%d%s, vec4(0.0))", cond_string_table[d0.cond], d0.cond_reg_sel_1, swizzle.c_str()); + + ShaderVar dst_var(dst); + dst_var.symplify(); + + //const char *c_mask = f; + + if (dst_var.swizzles[0].length() == 1) + { + AddCode("if (" + cond + ".x) " + dst + " = vec4(" + src + ").x;"); + } + else + { + for (int i = 0; i < dst_var.swizzles[0].length(); ++i) + { + AddCode("if (" + cond + "." + f[i] + ") " + dst + "." + f[i] + " = " + src + "." + f[i] + ";"); + } + } +} + + +std::string VertexDecompiler::AddAddrMask() +{ + static const char f[] = { 'x', 'y', 'z', 'w' }; + return std::string(".") + f[d0.addr_swz]; +} + +std::string VertexDecompiler::AddAddrReg() +{ + static const char f[] = { 'x', 'y', 'z', 'w' }; + return m_parr.AddParam(PARAM_NONE, "ivec4", "a" + std::to_string(d0.addr_reg_sel_1), "ivec4(0)") + AddAddrMask(); +} + +u32 VertexDecompiler::GetAddr() +{ + return (d2.iaddrh << 3) | d3.iaddrl; +} + +void VertexDecompiler::AddCode(const std::string& code) +{ + m_body.push_back(Format(code) + ";"); + m_cur_instr->body.push_back(Format(code)); +} + +void VertexDecompiler::SetDSTVec(const std::string& code) +{ + SetDST(false, code); +} + +void VertexDecompiler::SetDSTSca(const std::string& code) +{ + SetDST(true, code); +} + +std::string VertexDecompiler::BuildFuncBody(const FuncInfo& func) +{ + std::string result; + + for (uint i = func.offset; i 0; --i) + { + fp += fmt::Format("void %s();\n", m_funcs[i].name.c_str()); + } + + f = fmt::Format("void %s()\n{\n\t%s();\n%s\tgl_Position = gl_Position * scaleOffsetMat;\n}\n", + m_funcs[0].name.c_str(), m_funcs[1].name.c_str(), f.c_str()); + + std::string main_body; + for (uint i = 0, lvl = 1; i < m_instr_count; i++) + { + lvl -= m_instructions[i].close_scopes; + if (lvl < 1) lvl = 1; + //assert(lvl >= 1); + for (uint j = 0; j < m_instructions[i].put_close_scopes; ++j) + { + --lvl; + if (lvl < 1) lvl = 1; + main_body.append(lvl, '\t') += "}\n"; + } + + for (uint j = 0; j < m_instructions[i].do_count; ++j) + { + main_body.append(lvl, '\t') += "do\n"; + main_body.append(lvl, '\t') += "{\n"; + lvl++; + } + + for (uint j = 0; j < m_instructions[i].body.size(); ++j) + { + main_body.append(lvl, '\t') += m_instructions[i].body[j] + "\n"; + } + + lvl += m_instructions[i].open_scopes; + } + + f += fmt::Format("\nvoid %s()\n{\n%s}\n", m_funcs[1].name.c_str(), main_body.c_str()); + + for (uint i = 2; i& data) : + m_data(data) +{ + m_funcs.emplace_back(); + m_funcs[0].offset = 0; + m_funcs[0].name = "main"; + m_funcs.emplace_back(); + m_funcs[1].offset = 0; + m_funcs[1].name = "func0"; + //m_cur_func->body = "\tgl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n"; +} + +void VertexDecompiler::Decompile() +{ + m_parr.params.clear(); + m_instr_count = 0; + + for (int i = 0; i < m_max_instr_count; ++i) + { + m_instructions[i].reset(); + } + + bool is_has_BRA = false; + + for (u32 i = 1; m_instr_count < m_max_instr_count; m_instr_count++) + { + m_cur_instr = &m_instructions[m_instr_count]; + + if (is_has_BRA) + { + d3.HEX = m_data[i]; + i += 4; + } + else + { + d1.HEX = m_data[i++]; + + switch (d1.sca_opcode) + { + case 0x08: //BRA + LOG_ERROR(RSX, "BRA found. Please report to RPCS3 team."); + is_has_BRA = true; + m_jump_lvls.clear(); + d3.HEX = m_data[++i]; + i += 4; + break; + + case 0x09: //BRI + d2.HEX = m_data[i++]; + d3.HEX = m_data[i]; + i += 2; + m_jump_lvls.emplace(GetAddr()); + break; + + default: + d3.HEX = m_data[++i]; + i += 2; + break; + } + } + + if (d3.end) + { + m_instr_count++; + + if (i < m_data.size()) + { + LOG_ERROR(RSX, "Program end before buffer end."); + } + + break; + } + } + + uint jump_position = 0; + + if (is_has_BRA || !m_jump_lvls.empty()) + { + m_cur_instr = &m_instructions[0]; + AddCode("int jump_position = 0;"); + AddCode("while (true)"); + AddCode("{"); + m_cur_instr->open_scopes++; + + AddCode(fmt::Format("if (jump_position <= %u)", jump_position++)); + AddCode("{"); + m_cur_instr->open_scopes++; + } + + for (u32 i = 0; i < m_instr_count; ++i) + { + m_cur_instr = &m_instructions[i]; + + d0.HEX = m_data[i * 4 + 0]; + d1.HEX = m_data[i * 4 + 1]; + d2.HEX = m_data[i * 4 + 2]; + d3.HEX = m_data[i * 4 + 3]; + + src[0].src0l = d2.src0l; + src[0].src0h = d1.src0h; + src[1].src1 = d2.src1; + src[2].src2l = d3.src2l; + src[2].src2h = d2.src2h; + + if (i && (is_has_BRA || std::find(m_jump_lvls.begin(), m_jump_lvls.end(), i) != m_jump_lvls.end())) + { + m_cur_instr->close_scopes++; + AddCode("}"); + AddCode(""); + + AddCode(fmt::Format("if (jump_position <= %u)", jump_position++)); + AddCode("{"); + m_cur_instr->open_scopes++; + } + + if (!d1.sca_opcode && !d1.vec_opcode) + { + AddCode("//nop"); + } + + switch (d1.sca_opcode) + { + case RSX_SCA_OPCODE_NOP: break; + case RSX_SCA_OPCODE_MOV: SetDSTSca("$s"); break; + case RSX_SCA_OPCODE_RCP: SetDSTSca("(1.0 / $s)"); break; + case RSX_SCA_OPCODE_RCC: SetDSTSca("clamp(1.0 / $s, 5.42101e-20, 1.884467e19)"); break; + case RSX_SCA_OPCODE_RSQ: SetDSTSca("inversesqrt(abs($s))"); break; + case RSX_SCA_OPCODE_EXP: SetDSTSca("exp($s)"); break; + case RSX_SCA_OPCODE_LOG: SetDSTSca("log($s)"); break; + case RSX_SCA_OPCODE_LIT: SetDSTSca("vec4(1.0, $s.x, ($s.x > 0.0 ? exp($s.w * log2($s.y)) : 0.0), 1.0)"); break; + case RSX_SCA_OPCODE_BRA: + { + AddCode("$if ($cond)"); + AddCode("{"); + m_cur_instr->open_scopes++; + AddCode("jump_position = $a$am;"); + AddCode("continue;"); + m_cur_instr->close_scopes++; + AddCode("}"); + } + break; + /* This triggers opengl driver lost connection error code 7 + case RSX_SCA_OPCODE_BRI: // works differently (BRI o[1].x(TR) L0;) + { + uint jump_position; + + if (is_has_BRA) + { + jump_position = GetAddr(); + } + else + { + int addr = GetAddr(); + + jump_position = 0; + for (auto pos : m_jump_lvls) + { + if (addr == pos) + break; + + ++jump_position; + } + } + + AddCode("$ifcond "); + AddCode("{"); + m_cur_instr->open_scopes++; + AddCode(fmt::Format("jump_position = %u;", jump_position)); + AddCode("continue;"); + m_cur_instr->close_scopes++; + AddCode("}"); + } + break; + */ + case RSX_SCA_OPCODE_CAL: + // works same as BRI + AddCode("$ifcond $f(); //CAL"); + break; + case RSX_SCA_OPCODE_CLI: + // works same as BRI + AddCode("$ifcond $f(); //CLI"); + break; + case RSX_SCA_OPCODE_RET: + // works like BRI but shorter (RET o[1].x(TR);) + AddCode("$ifcond return;"); + break; + case RSX_SCA_OPCODE_LG2: SetDSTSca("log2($s)"); break; + case RSX_SCA_OPCODE_EX2: SetDSTSca("exp2($s)"); break; + case RSX_SCA_OPCODE_SIN: SetDSTSca("sin($s)"); break; + case RSX_SCA_OPCODE_COS: SetDSTSca("cos($s)"); break; + case RSX_SCA_OPCODE_BRB: + // works differently (BRB o[1].x !b0, L0;) + LOG_ERROR(RSX, "Unimplemented sca_opcode BRB"); + break; + case RSX_SCA_OPCODE_CLB: break; + // works same as BRB + LOG_ERROR(RSX, "Unimplemented sca_opcode CLB"); + break; + case RSX_SCA_OPCODE_PSH: break; + // works differently (PSH o[1].x A0;) + LOG_ERROR(RSX, "Unimplemented sca_opcode PSH"); + break; + case RSX_SCA_OPCODE_POP: break; + // works differently (POP o[1].x;) + LOG_ERROR(RSX, "Unimplemented sca_opcode POP"); + break; + + default: + AddCode(fmt::Format("//Unknown vp sca_opcode 0x%x", fmt::by_value(d1.sca_opcode))); + LOG_ERROR(RSX, "Unknown vp sca_opcode 0x%x", fmt::by_value(d1.sca_opcode)); + Emu.Pause(); + break; + } + + switch (d1.vec_opcode) + { + case RSX_VEC_OPCODE_NOP: break; + case RSX_VEC_OPCODE_MOV: SetDSTVec("$0"); break; + case RSX_VEC_OPCODE_MUL: SetDSTVec("($0 * $1)"); break; + case RSX_VEC_OPCODE_ADD: SetDSTVec("($0 + $2)"); break; + case RSX_VEC_OPCODE_MAD: SetDSTVec("($0 * $1 + $2)"); break; + case RSX_VEC_OPCODE_DP3: SetDSTVec("vec4(dot($0.xyz, $1.xyz))"); break; + case RSX_VEC_OPCODE_DPH: SetDSTVec("vec4(dot(vec4($0.xyz, 1.0), $1))"); break; + case RSX_VEC_OPCODE_DP4: SetDSTVec("vec4(dot($0, $1))"); break; + case RSX_VEC_OPCODE_DST: SetDSTVec("vec4(distance($0, $1))"); break; + case RSX_VEC_OPCODE_MIN: SetDSTVec("min($0, $1)"); break; + case RSX_VEC_OPCODE_MAX: SetDSTVec("max($0, $1)"); break; + case RSX_VEC_OPCODE_SLT: SetDSTVec("vec4(lessThan($0, $1))"); break; + case RSX_VEC_OPCODE_SGE: SetDSTVec("vec4(greaterThanEqual($0, $1))"); break; + case RSX_VEC_OPCODE_ARL: AddCode("$ifcond $a = ivec4($0)$am;"); break; + case RSX_VEC_OPCODE_FRC: SetDSTVec("fract($0)"); break; + case RSX_VEC_OPCODE_FLR: SetDSTVec("floor($0)"); break; + case RSX_VEC_OPCODE_SEQ: SetDSTVec("vec4(equal($0, $1))"); break; + case RSX_VEC_OPCODE_SFL: SetDSTVec("vec4(equal($0, vec4(0.0)))"); break; + case RSX_VEC_OPCODE_SGT: SetDSTVec("vec4(greaterThan($0, $1))"); break; + case RSX_VEC_OPCODE_SLE: SetDSTVec("vec4(lessThanEqual($0, $1))"); break; + case RSX_VEC_OPCODE_SNE: SetDSTVec("vec4(notEqual($0, $1))"); break; + case RSX_VEC_OPCODE_STR: SetDSTVec("vec4(equal($0, vec4(1.0)))"); break; + case RSX_VEC_OPCODE_SSG: SetDSTVec("sign($0)"); break; + case RSX_VEC_OPCODE_TXL: SetDSTVec("texture($t, $0.xy)"); break; + + default: + AddCode(fmt::Format("//Unknown vp opcode 0x%x", fmt::by_value(d1.vec_opcode))); + LOG_ERROR(RSX, "Unknown vp opcode 0x%x", fmt::by_value(d1.vec_opcode)); + Emu.Pause(); + break; + } + } + + if (is_has_BRA || !m_jump_lvls.empty()) + { + m_cur_instr = &m_instructions[m_instr_count - 1]; + m_cur_instr->close_scopes++; + AddCode("}"); + AddCode("break;"); + m_cur_instr->close_scopes++; + AddCode("}"); + } + + m_shader = BuildCode(); + + m_jump_lvls.clear(); + m_body.clear(); + if (m_funcs.size() > 2) + { + m_funcs.erase(m_funcs.begin() + 2, m_funcs.end()); + } +} + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h new file mode 100644 index 0000000000..2b9c9a02a9 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h @@ -0,0 +1,296 @@ +#pragma once +#if defined(DX12_SUPPORT) +#include "Emu/RSX/RSXVertexProgram.h" +#include +#include + +enum ParamFlag +{ + PARAM_IN, + PARAM_OUT, + PARAM_UNIFORM, + PARAM_CONST, + PARAM_NONE, +}; + +struct GLParamItem +{ + std::string name; + std::string location; + std::string value; + + GLParamItem(const std::string& _name, int _location, const std::string& _value = "") + : name(_name) + , value(_value) + { + if (_location > -1) + location = "layout (location = " + std::to_string(_location) + ") "; + else + location = ""; + } +}; + +struct ParamType +{ + const ParamFlag flag; + std::string type; + std::vector items; + + ParamType(const ParamFlag _flag, const std::string& _type) + : flag(_flag) + , type(_type) + { + } + + bool SearchName(const std::string& name) + { + for (u32 i = 0; i params; + + ParamType* SearchParam(const std::string& type) + { + for (u32 i = 0; iSearchName(name); + } + + std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, const std::string& value) + { + type = GetParamFlag(flag) + type; + ParamType* t = SearchParam(type); + + if (t) + { + if (!t->SearchName(name)) t->items.emplace_back(name, -1, value); + } + else + { + const u32 num = params.size(); + params.emplace_back(flag, type); + params[num].items.emplace_back(name, -1, value); + } + + return name; + } + + std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, int location = -1) + { + type = GetParamFlag(flag) + type; + ParamType* t = SearchParam(type); + + if (t) + { + if (!t->SearchName(name)) t->items.emplace_back(name, location); + } + else + { + const u32 num = params.size(); + params.emplace_back(flag, type); + params[num].items.emplace_back(name, location); + } + + return name; + } +}; + +class ShaderVar +{ +public: + std::string name; + std::vector swizzles; + + ShaderVar() = default; + ShaderVar(const std::string& var) + { + auto var_blocks = fmt::split(var, { "." }); + + if (var_blocks.size() == 0) + { + assert(0); + } + + name = var_blocks[0]; + + if (var_blocks.size() == 1) + { + swizzles.push_back("xyzw"); + } + else + { + swizzles = std::vector(var_blocks.begin() + 1, var_blocks.end()); + } + } + + int get_vector_size() const + { + return swizzles[swizzles.size() - 1].length(); + } + + ShaderVar& symplify() + { + std::unordered_map swizzle; + + static std::unordered_map pos_to_swizzle = + { + { 0, 'x' }, + { 1, 'y' }, + { 2, 'z' }, + { 3, 'w' } + }; + + for (auto &i : pos_to_swizzle) + { + swizzle[i.second] = swizzles[0].length() > i.first ? swizzles[0][i.first] : 0; + } + + for (int i = 1; i < swizzles.size(); ++i) + { + std::unordered_map new_swizzle; + + for (auto &sw : pos_to_swizzle) + { + new_swizzle[sw.second] = swizzle[swizzles[i].length() <= sw.first ? '\0' : swizzles[i][sw.first]]; + } + + swizzle = new_swizzle; + } + + swizzles.clear(); + std::string new_swizzle; + + for (auto &i : pos_to_swizzle) + { + if (swizzle[i.second] != '\0') + new_swizzle += swizzle[i.second]; + } + + swizzles.push_back(new_swizzle); + + return *this; + } + + std::string get() const + { + if (swizzles.size() == 1 && swizzles[0] == "xyzw") + { + return name; + } + + return name + "." + fmt::merge({ swizzles }, "."); + } +}; + +struct VertexDecompiler +{ + struct FuncInfo + { + u32 offset; + std::string name; + }; + + struct Instruction + { + std::vector body; + int open_scopes; + int close_scopes; + int put_close_scopes; + int do_count; + + void reset() + { + body.clear(); + put_close_scopes = open_scopes = close_scopes = do_count = 0; + } + }; + + static const size_t m_max_instr_count = 512; + Instruction m_instructions[m_max_instr_count]; + Instruction* m_cur_instr; + size_t m_instr_count; + + std::set m_jump_lvls; + std::vector m_body; + std::vector m_funcs; + + //wxString main; + + std::vector& m_data; + ParamArray m_parr; + + std::string GetMask(bool is_sca); + std::string GetVecMask(); + std::string GetScaMask(); + std::string GetDST(bool is_sca = false); + std::string GetSRC(const u32 n); + std::string GetFunc(); + std::string GetTex(); + std::string GetCond(); + std::string AddAddrMask(); + std::string AddAddrReg(); + u32 GetAddr(); + std::string Format(const std::string& code); + + void AddCodeCond(const std::string& dst, const std::string& src); + void AddCode(const std::string& code); + void SetDST(bool is_sca, std::string value); + void SetDSTVec(const std::string& code); + void SetDSTSca(const std::string& code); + std::string BuildFuncBody(const FuncInfo& func); + std::string BuildCode(); + +public: + std::string m_shader; + VertexDecompiler(std::vector& data); + void Decompile(); +}; +#endif \ No newline at end of file diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 07f2c03563..5674643b14 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -974,6 +974,9 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + @@ -1846,5 +1849,8 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + \ No newline at end of file From 5a1b756c14975dd6f64ccbaf01624997b3a9aa79 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 18:55:59 +0200 Subject: [PATCH 038/343] d3d12: Dump program parameters --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 4 +- .../Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 80 +++++++++++++---- rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h | 89 ++++++------------- 3 files changed, 95 insertions(+), 78 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index d54dc1af49..54601e7934 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -119,12 +119,12 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( { LOG_WARNING(RSX, "VP not found in buffer!"); VertexDecompiler VS(vertexShader->data); - VS.Decompile(); + std::string shaderCode = VS.Decompile(); m_vertex_prog.Compile(SHADER_TYPE::SHADER_TYPE_VERTEX); AddVertexProgram(m_vertex_prog, *vertexShader); // TODO: This shouldn't use current dir - fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(VS.m_shader.c_str(), VS.m_shader.size()); + fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(shaderCode.c_str(), shaderCode.size()); } if (m_fp_buf_num && m_vp_buf_num) diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index dd0056cdb7..789434e473 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -449,11 +449,6 @@ std::string VertexDecompiler::BuildCode() } std::string p; - - for (auto& param : m_parr.params) { - p += param.Format(); - } - std::string fp; for (int i = m_funcs.size() - 1; i > 0; --i) @@ -499,15 +494,66 @@ std::string VertexDecompiler::BuildCode() f += fmt::Format("\nvoid %s()\n{\n%s}\n", m_funcs[i].name.c_str(), BuildFuncBody(m_funcs[i]).c_str()); } - static const std::string& prot = - "#version 420\n" - "\n" - "uniform mat4 scaleOffsetMat = mat4(1.0);\n" - "%s\n" - "%s\n" - "%s"; + std::stringstream OS; + insertHeader(OS); - return fmt::Format(prot.c_str(), p.c_str(), fp.c_str(), f.c_str()); + insertInputs(OS, m_parr.params[PARAM_IN]); + OS << std::endl; + insertOutputs(OS, m_parr.params[PARAM_OUT]); + OS << std::endl; + insertConstants(OS, m_parr.params[PARAM_UNIFORM]); + OS << std::endl; + + + OS << fp.c_str() << std::endl; + OS << f.c_str() << std::endl; + + return OS.str(); +} + +void VertexDecompiler::insertHeader(std::stringstream &OS) +{ + OS << "cbuffer SCALE_OFFSET : register(b0)" << std::endl; + OS << "{" << std::endl; + OS << " float4x4 scaleOffsetMat;" << std::endl; + OS << "};" << std::endl; +} + +void VertexDecompiler::insertInputs(std::stringstream & OS, const std::vector& inputs) +{ + OS << "struct VertexInput" << std::endl; + OS << "{" << std::endl; + for (const ParamType PT : inputs) + { + for (const ParamItem &PI : PT.items) + OS << " " << PT.type << " " << PI.name << ": TEXCOORD" << PI.location << ";" << std::endl; + } + OS << "};" << std::endl; +} + +void VertexDecompiler::insertConstants(std::stringstream & OS, const std::vector & constants) +{ + OS << "cbuffer CONSTANT_BUFFER" << std::endl; + OS << "{" << std::endl; + for (const ParamType PT : constants) + { + for (const ParamItem &PI : PT.items) + OS << " " << PT.type << " " << PI.name << ";" << std::endl; + } + OS << "};" << std::endl; +} + +void VertexDecompiler::insertOutputs(std::stringstream & OS, const std::vector & outputs) +{ + OS << "struct PixelInput" << std::endl; + OS << "{" << std::endl; + OS << " float4 position : SV_POSITION;" << std::endl; + for (const ParamType PT : outputs) + { + for (const ParamItem &PI : PT.items) + OS << " " << PT.type << " " << PI.name << ": TEXCOORD" << PI.location << ";" << std::endl; + } + OS << "};" << std::endl; } VertexDecompiler::VertexDecompiler(std::vector& data) : @@ -522,9 +568,10 @@ VertexDecompiler::VertexDecompiler(std::vector& data) : //m_cur_func->body = "\tgl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n"; } -void VertexDecompiler::Decompile() +std::string VertexDecompiler::Decompile() { - m_parr.params.clear(); + for (unsigned i = 0; i < PARAM_COUNT; i++) + m_parr.params[i].clear(); m_instr_count = 0; for (int i = 0; i < m_max_instr_count; ++i) @@ -769,7 +816,7 @@ void VertexDecompiler::Decompile() AddCode("}"); } - m_shader = BuildCode(); + std::string result = BuildCode(); m_jump_lvls.clear(); m_body.clear(); @@ -777,6 +824,7 @@ void VertexDecompiler::Decompile() { m_funcs.erase(m_funcs.begin() + 2, m_funcs.end()); } + return result; } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h index 2b9c9a02a9..c0e020113a 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h @@ -1,8 +1,8 @@ #pragma once #if defined(DX12_SUPPORT) #include "Emu/RSX/RSXVertexProgram.h" -#include #include +#include enum ParamFlag { @@ -11,30 +11,27 @@ enum ParamFlag PARAM_UNIFORM, PARAM_CONST, PARAM_NONE, + PARAM_COUNT, }; -struct GLParamItem +struct ParamItem { std::string name; - std::string location; std::string value; + int location; - GLParamItem(const std::string& _name, int _location, const std::string& _value = "") + ParamItem(const std::string& _name, int _location, const std::string& _value = "") : name(_name) - , value(_value) - { - if (_location > -1) - location = "layout (location = " + std::to_string(_location) + ") "; - else - location = ""; - } + , value(_value), + location(_location) + { } }; struct ParamType { const ParamFlag flag; std::string type; - std::vector items; + std::vector items; ParamType(const ParamFlag _flag, const std::string& _type) : flag(_flag) @@ -51,63 +48,32 @@ struct ParamType return false; } - - std::string Format() - { - std::string ret = ""; - - for (u32 n = 0; n params; + std::vector params[PARAM_COUNT]; - ParamType* SearchParam(const std::string& type) + ParamType* SearchParam(const ParamFlag &flag, const std::string& type) { - for (u32 i = 0; iSearchName(name); } std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, const std::string& value) { - type = GetParamFlag(flag) + type; - ParamType* t = SearchParam(type); + ParamType* t = SearchParam(flag, type); if (t) { @@ -115,9 +81,9 @@ struct ParamArray } else { - const u32 num = params.size(); - params.emplace_back(flag, type); - params[num].items.emplace_back(name, -1, value); + const u32 num = params[flag].size(); + params[flag].emplace_back(flag, type); + params[flag][num].items.emplace_back(name, -1, value); } return name; @@ -125,8 +91,7 @@ struct ParamArray std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, int location = -1) { - type = GetParamFlag(flag) + type; - ParamType* t = SearchParam(type); + ParamType* t = SearchParam(flag, type); if (t) { @@ -134,9 +99,9 @@ struct ParamArray } else { - const u32 num = params.size(); - params.emplace_back(flag, type); - params[num].items.emplace_back(name, location); + const u32 num = params[flag].size(); + params[flag].emplace_back(flag, type); + params[flag][num].items.emplace_back(name, location); } return name; @@ -288,9 +253,13 @@ struct VertexDecompiler std::string BuildFuncBody(const FuncInfo& func); std::string BuildCode(); +protected: + virtual void insertHeader(std::stringstream &OS); + virtual void insertInputs(std::stringstream &OS, const std::vector &inputs); + virtual void insertConstants(std::stringstream &OS, const std::vector &constants); + virtual void insertOutputs(std::stringstream &OS, const std::vector &outputs); public: - std::string m_shader; VertexDecompiler(std::vector& data); - void Decompile(); + std::string Decompile(); }; #endif \ No newline at end of file From 96c4e87bf5d0beed47b9455cb66ff4145778eedc Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 19:25:49 +0200 Subject: [PATCH 039/343] d3d12: Some extra code again --- .../Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 132 +++++++++--------- rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h | 2 + 2 files changed, 69 insertions(+), 65 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index 789434e473..968732705d 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -390,72 +390,10 @@ std::string VertexDecompiler::BuildFuncBody(const FuncInfo& func) std::string VertexDecompiler::BuildCode() { - struct reg_info - { - std::string name; - bool need_declare; - std::string src_reg; - std::string src_reg_mask; - bool need_cast; - }; - - static const reg_info reg_table[] = - { - { "gl_Position", false, "dst_reg0", "", false }, - { "diff_color", true, "dst_reg1", "", false }, - { "spec_color", true, "dst_reg2", "", false }, - { "front_diff_color", true, "dst_reg3", "", false }, - { "front_spec_color", true, "dst_reg4", "", false }, - { "fogc", true, "dst_reg5", ".x", true }, - { "gl_ClipDistance[0]", false, "dst_reg5", ".y", false }, - { "gl_ClipDistance[1]", false, "dst_reg5", ".z", false }, - { "gl_ClipDistance[2]", false, "dst_reg5", ".w", false }, - { "gl_PointSize", false, "dst_reg6", ".x", false }, - { "gl_ClipDistance[3]", false, "dst_reg6", ".y", false }, - { "gl_ClipDistance[4]", false, "dst_reg6", ".z", false }, - { "gl_ClipDistance[5]", false, "dst_reg6", ".w", false }, - { "tc0", true, "dst_reg7", "", false }, - { "tc1", true, "dst_reg8", "", false }, - { "tc2", true, "dst_reg9", "", false }, - { "tc3", true, "dst_reg10", "", false }, - { "tc4", true, "dst_reg11", "", false }, - { "tc5", true, "dst_reg12", "", false }, - { "tc6", true, "dst_reg13", "", false }, - { "tc7", true, "dst_reg14", "", false }, - { "tc8", true, "dst_reg15", "", false }, - { "tc9", true, "dst_reg6", "", false } // In this line, dst_reg6 is correct since dst_reg goes from 0 to 15. - }; std::string f; - - for (auto &i : reg_table) - { - if (m_parr.HasParam(PARAM_NONE, "vec4", i.src_reg)) - { - if (i.need_declare) - { - m_parr.AddParam(PARAM_OUT, "vec4", i.name); - } - - if (i.need_cast) - { - f += "\t" + i.name + " = vec4(" + i.src_reg + i.src_reg_mask + ");\n"; - } - else - { - f += "\t" + i.name + " = " + i.src_reg + i.src_reg_mask + ";\n"; - } - } - } - - std::string p; std::string fp; - for (int i = m_funcs.size() - 1; i > 0; --i) - { - fp += fmt::Format("void %s();\n", m_funcs[i].name.c_str()); - } - f = fmt::Format("void %s()\n{\n\t%s();\n%s\tgl_Position = gl_Position * scaleOffsetMat;\n}\n", m_funcs[0].name.c_str(), m_funcs[1].name.c_str(), f.c_str()); @@ -504,9 +442,9 @@ std::string VertexDecompiler::BuildCode() insertConstants(OS, m_parr.params[PARAM_UNIFORM]); OS << std::endl; - - OS << fp.c_str() << std::endl; - OS << f.c_str() << std::endl; + insertMainStart(OS); + OS << main_body.c_str() << std::endl; + insertMainEnd(OS); return OS.str(); } @@ -556,6 +494,70 @@ void VertexDecompiler::insertOutputs(std::stringstream & OS, const std::vector

& data) : m_data(data) { diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h index c0e020113a..86cbf2e33e 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h @@ -258,6 +258,8 @@ protected: virtual void insertInputs(std::stringstream &OS, const std::vector &inputs); virtual void insertConstants(std::stringstream &OS, const std::vector &constants); virtual void insertOutputs(std::stringstream &OS, const std::vector &outputs); + virtual void insertMainStart(std::stringstream &OS); + virtual void insertMainEnd(std::stringstream &OS); public: VertexDecompiler(std::vector& data); std::string Decompile(); From 3960555e451a2b45f971d6f55987df71eea3b0e5 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 19:48:49 +0200 Subject: [PATCH 040/343] d3d12: Use the compiler vertex program It works with the primitive sample. --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 49 +++------------- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 2 +- .../Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 57 ++++++++++++------- 3 files changed, 46 insertions(+), 62 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 54601e7934..1bc7f2d827 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -108,7 +108,7 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( { LOG_WARNING(RSX, "FP not found in buffer!"); // Decompile(*fragmentShader); - m_fragment_prog.Compile(SHADER_TYPE::SHADER_TYPE_FRAGMENT); + m_fragment_prog.Compile("", SHADER_TYPE::SHADER_TYPE_FRAGMENT); AddFragmentProgram(m_fragment_prog, *fragmentShader); // TODO: This shouldn't use current dir @@ -120,7 +120,7 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( LOG_WARNING(RSX, "VP not found in buffer!"); VertexDecompiler VS(vertexShader->data); std::string shaderCode = VS.Decompile(); - m_vertex_prog.Compile(SHADER_TYPE::SHADER_TYPE_VERTEX); + m_vertex_prog.Compile(shaderCode, SHADER_TYPE::SHADER_TYPE_VERTEX); AddVertexProgram(m_vertex_prog, *vertexShader); // TODO: This shouldn't use current dir @@ -260,51 +260,16 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( #define TO_STRING(x) #x -void Shader::Compile(SHADER_TYPE st) +void Shader::Compile(const std::string &code, SHADER_TYPE st) { - static const char VSstring[] = TO_STRING( - cbuffer SCALE_OFFSET : register(b0) - { - float4x4 scaleOffsetMat; - }; - - cbuffer CONSTANT : register(b1) - { - float4 vc[468]; - }; - - struct vertex { - float4 pos : TEXCOORD0; - float4 color : TEXCOORD3; - }; - - struct pixel { - float4 pos : SV_POSITION; - float4 color : TEXCOORD0; - }; - - pixel main(vertex In) - { - pixel Out; - float4 pos = In.pos; - pos.w = dot(pos, vc[259]); - pos.z = dot(pos, vc[258]); - pos.y = dot(pos, vc[257]); - pos.x = dot(pos, vc[256]); - pos.z = -pos.z; - Out.pos = mul(pos, scaleOffsetMat); - Out.color = In.color; - return Out; - }); - static const char FSstring[] = TO_STRING( struct pixel { - float4 pos : SV_POSITION; - float4 color : TEXCOORD0; + float4 dst_reg0 : SV_POSITION; + float4 dst_reg1 : TEXCOORD0; }; float4 main(pixel In) : SV_TARGET { - return In.color; + return In.dst_reg1; }); HRESULT hr; @@ -312,7 +277,7 @@ void Shader::Compile(SHADER_TYPE st) switch (st) { case SHADER_TYPE::SHADER_TYPE_VERTEX: - hr = D3DCompile(VSstring, sizeof(VSstring), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + hr = D3DCompile(code.c_str(), code.size(), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); if (hr != S_OK) LOG_ERROR(RSX, "VS build failed:%s", errorBlob->GetBufferPointer()); break; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 2e8f92fc35..1cf484fadc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -37,7 +37,7 @@ public: // void Decompile(RSXFragmentProgram& prog) /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ - void Compile(SHADER_TYPE st); + void Compile(const std::string &code, SHADER_TYPE st); }; // Based on diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index 968732705d..ac2bc0e078 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -5,6 +5,13 @@ #include "Utilities/Log.h" #include "Emu/System.h" +static std::string typeName[] = +{ + "float", + "float2", + "float3", + "float4" +}; std::string VertexDecompiler::GetMask(bool is_sca) { @@ -45,13 +52,13 @@ std::string VertexDecompiler::GetDST(bool isSca) switch (isSca ? 0x1f : d3.dst) { case 0x1f: - ret += m_parr.AddParam(PARAM_NONE, "vec4", std::string("tmp") + std::to_string(isSca ? d3.sca_dst_tmp : d0.dst_tmp)); + ret += m_parr.AddParam(PARAM_NONE, typeName[3], std::string("tmp") + std::to_string(isSca ? d3.sca_dst_tmp : d0.dst_tmp)); break; default: if (d3.dst > 15) LOG_ERROR(RSX, fmt::Format("dst index out of range: %u", d3.dst)); - ret += m_parr.AddParam(PARAM_NONE, "vec4", std::string("dst_reg") + std::to_string(d3.dst), d3.dst == 0 ? "vec4(0.0f, 0.0f, 0.0f, 1.0f)" : "vec4(0.0)"); + ret += m_parr.AddParam(PARAM_NONE, typeName[3], std::string("dst_reg") + std::to_string(d3.dst), d3.dst == 0 ? typeName[3] + "(0.0f, 0.0f, 0.0f, 1.0f)" : typeName[3] + "(0.0, 0.0, 0.0, 0.0)"); break; } @@ -75,21 +82,21 @@ std::string VertexDecompiler::GetSRC(const u32 n) switch (src[n].reg_type) { case 1: //temp - ret += m_parr.AddParam(PARAM_NONE, "vec4", "tmp" + std::to_string(src[n].tmp_src)); + ret += m_parr.AddParam(PARAM_NONE, typeName[3], "tmp" + std::to_string(src[n].tmp_src)); break; case 2: //input if (d1.input_src < (sizeof(reg_table) / sizeof(reg_table[0]))) { - ret += m_parr.AddParam(PARAM_IN, "vec4", reg_table[d1.input_src], d1.input_src); + ret += m_parr.AddParam(PARAM_IN, typeName[3], reg_table[d1.input_src], d1.input_src); } else { LOG_ERROR(RSX, "Bad input src num: %d", fmt::by_value(d1.input_src)); - ret += m_parr.AddParam(PARAM_IN, "vec4", "in_unk", d1.input_src); + ret += m_parr.AddParam(PARAM_IN, typeName[3], "in_unk", d1.input_src); } break; case 3: //const - m_parr.AddParam(PARAM_UNIFORM, "vec4", std::string("vc[468]")); + m_parr.AddParam(PARAM_UNIFORM, typeName[3], std::string("vc[468]")); ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]"; break; @@ -154,7 +161,7 @@ void VertexDecompiler::SetDST(bool is_sca, std::string value) if (d0.cond_update_enable_0 && d0.cond_update_enable_1) { - dest = m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(d0.cond_reg_sel_1), "vec4(0.0)") + mask; + dest = m_parr.AddParam(PARAM_NONE, typeName[3], "cc" + std::to_string(d0.cond_reg_sel_1), typeName[3] + "(0.0)") + mask; } else if (d3.dst != 0x1f || (is_sca ? d3.sca_dst_tmp != 0x3f : d0.dst_tmp != 0x3f)) { @@ -312,7 +319,7 @@ void VertexDecompiler::AddCodeCond(const std::string& dst, const std::string& sr if (dst_var.swizzles[0].length() == 1) { - AddCode("if (" + cond + ".x) " + dst + " = vec4(" + src + ").x;"); + AddCode("if (" + cond + ".x) " + dst + " = " + typeName[3] + "(" + src + ").x;"); } else { @@ -437,7 +444,7 @@ std::string VertexDecompiler::BuildCode() insertInputs(OS, m_parr.params[PARAM_IN]); OS << std::endl; - insertOutputs(OS, m_parr.params[PARAM_OUT]); + insertOutputs(OS, m_parr.params[PARAM_NONE]); OS << std::endl; insertConstants(OS, m_parr.params[PARAM_UNIFORM]); OS << std::endl; @@ -471,7 +478,7 @@ void VertexDecompiler::insertInputs(std::stringstream & OS, const std::vector & constants) { - OS << "cbuffer CONSTANT_BUFFER" << std::endl; + OS << "cbuffer CONSTANT_BUFFER : register(b1)" << std::endl; OS << "{" << std::endl; for (const ParamType PT : constants) { @@ -485,11 +492,15 @@ void VertexDecompiler::insertOutputs(std::stringstream & OS, const std::vector

0.0 ? exp($s.w * log2($s.y)) : 0.0), 1.0)"); break; + case RSX_SCA_OPCODE_LIT: SetDSTSca(typeName[3] + "(1.0, $s.x, ($s.x > 0.0 ? exp($s.w * log2($s.y)) : 0.0), 1.0)"); break; case RSX_SCA_OPCODE_BRA: { AddCode("$if ($cond)"); @@ -782,7 +801,7 @@ std::string VertexDecompiler::Decompile() case RSX_VEC_OPCODE_MAD: SetDSTVec("($0 * $1 + $2)"); break; case RSX_VEC_OPCODE_DP3: SetDSTVec("vec4(dot($0.xyz, $1.xyz))"); break; case RSX_VEC_OPCODE_DPH: SetDSTVec("vec4(dot(vec4($0.xyz, 1.0), $1))"); break; - case RSX_VEC_OPCODE_DP4: SetDSTVec("vec4(dot($0, $1))"); break; + case RSX_VEC_OPCODE_DP4: SetDSTVec(typeName[3] + "(dot($0, $1), dot($0, $1), dot($0, $1), dot($0, $1))"); break; case RSX_VEC_OPCODE_DST: SetDSTVec("vec4(distance($0, $1))"); break; case RSX_VEC_OPCODE_MIN: SetDSTVec("min($0, $1)"); break; case RSX_VEC_OPCODE_MAX: SetDSTVec("max($0, $1)"); break; From 16f40fb69c34310d5dbe27540ff57e86f265b428 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 19:51:15 +0200 Subject: [PATCH 041/343] d3d12: Fix some warnings --- rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 4 ++-- rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index ac2bc0e078..11b925753e 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -410,14 +410,14 @@ std::string VertexDecompiler::BuildCode() lvl -= m_instructions[i].close_scopes; if (lvl < 1) lvl = 1; //assert(lvl >= 1); - for (uint j = 0; j < m_instructions[i].put_close_scopes; ++j) + for (int j = 0; j < m_instructions[i].put_close_scopes; ++j) { --lvl; if (lvl < 1) lvl = 1; main_body.append(lvl, '\t') += "}\n"; } - for (uint j = 0; j < m_instructions[i].do_count; ++j) + for (int j = 0; j < m_instructions[i].do_count; ++j) { main_body.append(lvl, '\t') += "do\n"; main_body.append(lvl, '\t') += "{\n"; diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h index 86cbf2e33e..f141effdd0 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h @@ -81,9 +81,8 @@ struct ParamArray } else { - const u32 num = params[flag].size(); params[flag].emplace_back(flag, type); - params[flag][num].items.emplace_back(name, -1, value); + params[flag].back().items.emplace_back(name, -1, value); } return name; @@ -99,9 +98,8 @@ struct ParamArray } else { - const u32 num = params[flag].size(); params[flag].emplace_back(flag, type); - params[flag][num].items.emplace_back(name, location); + params[flag].back().items.emplace_back(name, location); } return name; @@ -136,7 +134,7 @@ public: } } - int get_vector_size() const + size_t get_vector_size() const { return swizzles[swizzles.size() - 1].length(); } From caab6cbc6005b72d91b2468ba9d8123c29c4853d Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 19:52:13 +0200 Subject: [PATCH 042/343] d3d12: Some cleaning --- rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index 11b925753e..982f3f3aed 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -397,13 +397,6 @@ std::string VertexDecompiler::BuildFuncBody(const FuncInfo& func) std::string VertexDecompiler::BuildCode() { - - std::string f; - std::string fp; - - f = fmt::Format("void %s()\n{\n\t%s();\n%s\tgl_Position = gl_Position * scaleOffsetMat;\n}\n", - m_funcs[0].name.c_str(), m_funcs[1].name.c_str(), f.c_str()); - std::string main_body; for (uint i = 0, lvl = 1; i < m_instr_count; i++) { @@ -432,13 +425,6 @@ std::string VertexDecompiler::BuildCode() lvl += m_instructions[i].open_scopes; } - f += fmt::Format("\nvoid %s()\n{\n%s}\n", m_funcs[1].name.c_str(), main_body.c_str()); - - for (uint i = 2; i Date: Thu, 14 May 2015 20:13:47 +0200 Subject: [PATCH 043/343] d3d12: Move/clean code --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 1 - .../RSX/D3D12/D3D12ProgramDisassembler.cpp | 268 ------------------ .../Emu/RSX/D3D12/D3D12ProgramDisassembler.h | 6 - rpcs3/Emu/RSX/D3D12/ShaderParam.cpp | 6 + rpcs3/Emu/RSX/D3D12/ShaderParam.h | 194 +++++++++++++ rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h | 190 +------------ rpcs3/emucore.vcxproj | 9 + rpcs3/emucore.vcxproj.filters | 15 + 8 files changed, 225 insertions(+), 464 deletions(-) delete mode 100644 rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.cpp delete mode 100644 rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.h create mode 100644 rpcs3/Emu/RSX/D3D12/ShaderParam.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/ShaderParam.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 1bc7f2d827..550eb01dc5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -2,7 +2,6 @@ #if defined (DX12_SUPPORT) #include "D3D12PipelineState.h" -#include "D3D12ProgramDisassembler.h" #include "Emu/Memory/vm.h" #include "Utilities/Log.h" #include diff --git a/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.cpp deleted file mode 100644 index af3451cbfc..0000000000 --- a/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.cpp +++ /dev/null @@ -1,268 +0,0 @@ -#include "stdafx.h" -#if defined (DX12_SUPPORT) -#include "D3D12ProgramDisassembler.h" -#include "Emu/Memory/vm.h" -#include "Utilities/Log.h" - -static u32 GetData(const u32 d) { return d << 16 | d >> 16; } - -void Decompile(RSXFragmentProgram& prog) -{ - auto data = vm::ptr::make(prog.addr); - size_t m_size = 0; - size_t m_location = 0; - size_t m_loop_count = 0; - size_t m_code_level = 1; - - enum - { - FORCE_NONE, - FORCE_SCT, - FORCE_SCB, - }; - - int forced_unit = FORCE_NONE; - - OPDEST operandDST; - - while (true) - { - operandDST.HEX = GetData(data[0]); -/* for (auto finded = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); - finded != m_end_offsets.end(); - finded = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size)) - { - m_end_offsets.erase(finded); - m_code_level--; - AddCode("}"); - m_loop_count--; - }*/ - -/* for (auto finded = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); - finded != m_else_offsets.end(); - finded = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size)) - { - m_else_offsets.erase(finded); - m_code_level--; - AddCode("}"); - AddCode("else"); - AddCode("{"); - m_code_level++; - } - - dst.HEX = GetData(data[0]); - src0.HEX = GetData(data[1]); - src1.HEX = GetData(data[2]); - src2.HEX = GetData(data[3]); - - m_offset = 4 * sizeof(u32); - - const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); - - auto SCT = [&]() - { - switch (opcode) - { - case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; - case RSX_FP_OPCODE_DIV: SetDst("($0 / $1)"); break; - case RSX_FP_OPCODE_DIVSQ: SetDst("($0 / sqrt($1))"); break; - case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; - case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; - case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; - case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; - case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; - case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; - case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; - case RSX_FP_OPCODE_MOV: SetDst("$0"); break; - case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; - case RSX_FP_OPCODE_RCP: SetDst("1 / $0"); break; - case RSX_FP_OPCODE_RSQ: SetDst("inversesqrt(abs($0))"); break; - case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; - case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; - case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; - case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; - case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; - case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; - - default: - return false; - } - - return true; - }; - - auto SCB = [&]() - { - switch (opcode) - { - case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; - case RSX_FP_OPCODE_COS: SetDst("cos($0)"); break; - case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; - case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; - case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; - case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; - case RSX_FP_OPCODE_DST: SetDst("vec4(distance($0, $1))"); break; - case RSX_FP_OPCODE_REFL: LOG_ERROR(RSX, "Unimplemented SCB instruction: REFL"); break; // TODO: Is this in the right category? - case RSX_FP_OPCODE_EX2: SetDst("exp2($0)"); break; - case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); break; - case RSX_FP_OPCODE_FRC: SetDst("fract($0)"); break; - case RSX_FP_OPCODE_LIT: SetDst("vec4(1.0, $0.x, ($0.x > 0.0 ? exp($0.w * log2($0.y)) : 0.0), 1.0)"); break; - case RSX_FP_OPCODE_LIF: SetDst("vec4(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); break; - case RSX_FP_OPCODE_LRP: LOG_ERROR(RSX, "Unimplemented SCB instruction: LRP"); break; // TODO: Is this in the right category? - case RSX_FP_OPCODE_LG2: SetDst("log2($0)"); break; - case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; - case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; - case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; - case RSX_FP_OPCODE_MOV: SetDst("$0"); break; - case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; - case RSX_FP_OPCODE_PK2: SetDst("packSnorm2x16($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) - case RSX_FP_OPCODE_PK4: SetDst("packSnorm4x8($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) - case RSX_FP_OPCODE_PK16: LOG_ERROR(RSX, "Unimplemented SCB instruction: PK16"); break; - case RSX_FP_OPCODE_PKB: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKB"); break; - case RSX_FP_OPCODE_PKG: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKG"); break; - case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; - case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; - case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; - case RSX_FP_OPCODE_SIN: SetDst("sin($0)"); break; - case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; - case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; - case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; - - default: - return false; - } - - return true; - }; - - auto TEX_SRB = [&]() - { - switch (opcode) - { - case RSX_FP_OPCODE_DDX: SetDst("dFdx($0)"); break; - case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; - case RSX_FP_OPCODE_NRM: SetDst("normalize($0)"); break; - case RSX_FP_OPCODE_BEM: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: BEM"); break; - case RSX_FP_OPCODE_TEX: SetDst("texture($t, $0.xy)"); break; - case RSX_FP_OPCODE_TEXBEM: SetDst("texture($t, $0.xy, $1.x)"); break; - case RSX_FP_OPCODE_TXP: SetDst("textureProj($t, $0.xyz, $1.x)"); break; //TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478) and The Simpsons Arcade Game (NPUB30563)) - case RSX_FP_OPCODE_TXPBEM: SetDst("textureProj($t, $0.xyz, $1.x)"); break; - case RSX_FP_OPCODE_TXD: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: TXD"); break; - case RSX_FP_OPCODE_TXB: SetDst("texture($t, $0.xy, $1.x)"); break; - case RSX_FP_OPCODE_TXL: SetDst("textureLod($t, $0.xy, $1.x)"); break; - case RSX_FP_OPCODE_UP2: SetDst("unpackSnorm2x16($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) - case RSX_FP_OPCODE_UP4: SetDst("unpackSnorm4x8($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) - case RSX_FP_OPCODE_UP16: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UP16"); break; - case RSX_FP_OPCODE_UPB: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPB"); break; - case RSX_FP_OPCODE_UPG: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPG"); break; - - default: - return false; - } - - return true; - }; - - auto SIP = [&]() - { - switch (opcode) - { - case RSX_FP_OPCODE_BRK: SetDst("break"); break; - case RSX_FP_OPCODE_CAL: LOG_ERROR(RSX, "Unimplemented SIP instruction: CAL"); break; - case RSX_FP_OPCODE_FENCT: forced_unit = FORCE_SCT; break; - case RSX_FP_OPCODE_FENCB: forced_unit = FORCE_SCB; break; - case RSX_FP_OPCODE_IFE: - AddCode("if($cond)"); - m_else_offsets.push_back(src1.else_offset << 2); - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - break; - case RSX_FP_OPCODE_LOOP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_REP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::Format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_RET: SetDst("return"); break; - - default: - return false; - } - - return true; - }; - - switch (opcode) - { - case RSX_FP_OPCODE_NOP: break; - case RSX_FP_OPCODE_KIL: SetDst("discard", false); break; - - default: - if (forced_unit == FORCE_NONE) - { - if (SIP()) break; - if (SCT()) break; - if (TEX_SRB()) break; - if (SCB()) break; - } - else if (forced_unit == FORCE_SCT) - { - forced_unit = FORCE_NONE; - if (SCT()) break; - } - else if (forced_unit == FORCE_SCB) - { - forced_unit = FORCE_NONE; - if (SCB()) break; - } - - LOG_ERROR(RSX, "Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, forced_unit); - break; - } - - m_size += m_offset;*/ - - if (operandDST.end) break; - -// assert(m_offset % sizeof(u32) == 0); - data += 4 / sizeof(u32); - } - - // flush m_code_level - m_code_level = 1; -/* m_shader = BuildCode(); - main.clear(); - m_parr.params.clear();*/ -} -#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.h b/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.h deleted file mode 100644 index 011cef292e..0000000000 --- a/rpcs3/Emu/RSX/D3D12/D3D12ProgramDisassembler.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once -#if defined (DX12_SUPPORT) -#include "Emu/RSX/RSXFragmentProgram.h" - -void Decompile(RSXFragmentProgram& prog); -#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/ShaderParam.cpp b/rpcs3/Emu/RSX/D3D12/ShaderParam.cpp new file mode 100644 index 0000000000..a28d9bf21b --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/ShaderParam.cpp @@ -0,0 +1,6 @@ +#include "stdafx.h" +#if defined(DX12_SUPPORT) +#include "ShaderParam.h" + + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/ShaderParam.h b/rpcs3/Emu/RSX/D3D12/ShaderParam.h new file mode 100644 index 0000000000..5ef5d3e3d3 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/ShaderParam.h @@ -0,0 +1,194 @@ +#pragma once +#if defined(DX12_SUPPORT) +#include +#include + +enum ParamFlag +{ + PARAM_IN, + PARAM_OUT, + PARAM_UNIFORM, + PARAM_CONST, + PARAM_NONE, + PARAM_COUNT, +}; + +struct ParamItem +{ + std::string name; + std::string value; + int location; + + ParamItem(const std::string& _name, int _location, const std::string& _value = "") + : name(_name) + , value(_value), + location(_location) + { } +}; + +struct ParamType +{ + const ParamFlag flag; + std::string type; + std::vector items; + + ParamType(const ParamFlag _flag, const std::string& _type) + : flag(_flag) + , type(_type) + { + } + + bool SearchName(const std::string& name) + { + for (u32 i = 0; i params[PARAM_COUNT]; + + ParamType* SearchParam(const ParamFlag &flag, const std::string& type) + { + for (u32 i = 0; iSearchName(name); + } + + std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, const std::string& value) + { + ParamType* t = SearchParam(flag, type); + + if (t) + { + if (!t->SearchName(name)) t->items.emplace_back(name, -1, value); + } + else + { + params[flag].emplace_back(flag, type); + params[flag].back().items.emplace_back(name, -1, value); + } + + return name; + } + + std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, int location = -1) + { + ParamType* t = SearchParam(flag, type); + + if (t) + { + if (!t->SearchName(name)) t->items.emplace_back(name, location); + } + else + { + params[flag].emplace_back(flag, type); + params[flag].back().items.emplace_back(name, location); + } + + return name; + } +}; + +class ShaderVar +{ +public: + std::string name; + std::vector swizzles; + + ShaderVar() = default; + ShaderVar(const std::string& var) + { + auto var_blocks = fmt::split(var, { "." }); + + if (var_blocks.size() == 0) + { + assert(0); + } + + name = var_blocks[0]; + + if (var_blocks.size() == 1) + { + swizzles.push_back("xyzw"); + } + else + { + swizzles = std::vector(var_blocks.begin() + 1, var_blocks.end()); + } + } + + size_t get_vector_size() const + { + return swizzles[swizzles.size() - 1].length(); + } + + ShaderVar& symplify() + { + std::unordered_map swizzle; + + static std::unordered_map pos_to_swizzle = + { + { 0, 'x' }, + { 1, 'y' }, + { 2, 'z' }, + { 3, 'w' } + }; + + for (auto &i : pos_to_swizzle) + { + swizzle[i.second] = swizzles[0].length() > i.first ? swizzles[0][i.first] : 0; + } + + for (int i = 1; i < swizzles.size(); ++i) + { + std::unordered_map new_swizzle; + + for (auto &sw : pos_to_swizzle) + { + new_swizzle[sw.second] = swizzle[swizzles[i].length() <= sw.first ? '\0' : swizzles[i][sw.first]]; + } + + swizzle = new_swizzle; + } + + swizzles.clear(); + std::string new_swizzle; + + for (auto &i : pos_to_swizzle) + { + if (swizzle[i.second] != '\0') + new_swizzle += swizzle[i.second]; + } + + swizzles.push_back(new_swizzle); + + return *this; + } + + std::string get() const + { + if (swizzles.size() == 1 && swizzles[0] == "xyzw") + { + return name; + } + + return name + "." + fmt::merge({ swizzles }, "."); + } +}; +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h index f141effdd0..7fcbc97a30 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h @@ -3,195 +3,7 @@ #include "Emu/RSX/RSXVertexProgram.h" #include #include - -enum ParamFlag -{ - PARAM_IN, - PARAM_OUT, - PARAM_UNIFORM, - PARAM_CONST, - PARAM_NONE, - PARAM_COUNT, -}; - -struct ParamItem -{ - std::string name; - std::string value; - int location; - - ParamItem(const std::string& _name, int _location, const std::string& _value = "") - : name(_name) - , value(_value), - location(_location) - { } -}; - -struct ParamType -{ - const ParamFlag flag; - std::string type; - std::vector items; - - ParamType(const ParamFlag _flag, const std::string& _type) - : flag(_flag) - , type(_type) - { - } - - bool SearchName(const std::string& name) - { - for (u32 i = 0; i params[PARAM_COUNT]; - - ParamType* SearchParam(const ParamFlag &flag, const std::string& type) - { - for (u32 i = 0; iSearchName(name); - } - - std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, const std::string& value) - { - ParamType* t = SearchParam(flag, type); - - if (t) - { - if (!t->SearchName(name)) t->items.emplace_back(name, -1, value); - } - else - { - params[flag].emplace_back(flag, type); - params[flag].back().items.emplace_back(name, -1, value); - } - - return name; - } - - std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, int location = -1) - { - ParamType* t = SearchParam(flag, type); - - if (t) - { - if (!t->SearchName(name)) t->items.emplace_back(name, location); - } - else - { - params[flag].emplace_back(flag, type); - params[flag].back().items.emplace_back(name, location); - } - - return name; - } -}; - -class ShaderVar -{ -public: - std::string name; - std::vector swizzles; - - ShaderVar() = default; - ShaderVar(const std::string& var) - { - auto var_blocks = fmt::split(var, { "." }); - - if (var_blocks.size() == 0) - { - assert(0); - } - - name = var_blocks[0]; - - if (var_blocks.size() == 1) - { - swizzles.push_back("xyzw"); - } - else - { - swizzles = std::vector(var_blocks.begin() + 1, var_blocks.end()); - } - } - - size_t get_vector_size() const - { - return swizzles[swizzles.size() - 1].length(); - } - - ShaderVar& symplify() - { - std::unordered_map swizzle; - - static std::unordered_map pos_to_swizzle = - { - { 0, 'x' }, - { 1, 'y' }, - { 2, 'z' }, - { 3, 'w' } - }; - - for (auto &i : pos_to_swizzle) - { - swizzle[i.second] = swizzles[0].length() > i.first ? swizzles[0][i.first] : 0; - } - - for (int i = 1; i < swizzles.size(); ++i) - { - std::unordered_map new_swizzle; - - for (auto &sw : pos_to_swizzle) - { - new_swizzle[sw.second] = swizzle[swizzles[i].length() <= sw.first ? '\0' : swizzles[i][sw.first]]; - } - - swizzle = new_swizzle; - } - - swizzles.clear(); - std::string new_swizzle; - - for (auto &i : pos_to_swizzle) - { - if (swizzle[i.second] != '\0') - new_swizzle += swizzle[i.second]; - } - - swizzles.push_back(new_swizzle); - - return *this; - } - - std::string get() const - { - if (swizzles.size() == 1 && swizzles[0] == "xyzw") - { - return name; - } - - return name + "." + fmt::merge({ swizzles }, "."); - } -}; +#include "ShaderParam.h" struct VertexDecompiler { diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 4d595b4823..232b4bd521 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -41,7 +41,12 @@ + + + + + @@ -499,8 +504,12 @@ + + + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 5674643b14..f30d1fdaee 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -974,6 +974,15 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + + + Emu\GPU\RSX\D3D12 + + + Emu\GPU\RSX\D3D12 + Emu\GPU\RSX\D3D12 @@ -1849,6 +1858,12 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + + + Emu\GPU\RSX\D3D12 + Emu\GPU\RSX\D3D12 From bb643070bd8dd9df3f42f4d71958fec4e801349d Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 20:27:05 +0200 Subject: [PATCH 044/343] d3d12: Start working on fragment decompiler --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 6 +- .../RSX/D3D12/FragmentProgramDecompiler.cpp | 630 ++++++++++++++++++ .../Emu/RSX/D3D12/FragmentProgramDecompiler.h | 43 ++ rpcs3/emucore.vcxproj | 2 + rpcs3/emucore.vcxproj.filters | 6 + 5 files changed, 685 insertions(+), 2 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 550eb01dc5..1feb6e1964 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -8,6 +8,7 @@ #include #include #include "VertexProgramDecompiler.h" +#include "FragmentProgramDecompiler.h" #include "Utilities/File.h" #pragma comment (lib, "d3dcompiler.lib") @@ -106,12 +107,13 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( if (!m_fp_buf_num) { LOG_WARNING(RSX, "FP not found in buffer!"); - // Decompile(*fragmentShader); + FragmentDecompiler FS(fragmentShader->addr, fragmentShader->size, fragmentShader->offset); + const std::string &shader = FS.Decompile(); m_fragment_prog.Compile("", SHADER_TYPE::SHADER_TYPE_FRAGMENT); AddFragmentProgram(m_fragment_prog, *fragmentShader); // TODO: This shouldn't use current dir - //fs::file("./FragmentProgram.txt", o_write | o_create | o_trunc).write(m_fragment_prog.shader.c_str(), m_fragment_prog.shader.size()); + fs::file("./FragmentProgram.txt", o_write | o_create | o_trunc).write(shader.c_str(), shader.size()); } if (!m_vp_buf_num) diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp new file mode 100644 index 0000000000..c965b6dda1 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -0,0 +1,630 @@ +#include "stdafx.h" +#if defined(DX12_SUPPORT) +#include "FragmentProgramDecompiler.h" + +#include "Utilities/Log.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" + +FragmentDecompiler::FragmentDecompiler(u32 addr, u32& size, u32 ctrl) : + m_addr(addr), + m_size(size), + m_const_index(0), + m_location(0), + m_ctrl(ctrl) +{ + m_size = 0; +} + + +void FragmentDecompiler::SetDst(std::string code, bool append_mask) +{ + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) return; + + switch (src1.scale) + { + case 0: break; + case 1: code = "(" + code + " * 2.0)"; break; + case 2: code = "(" + code + " * 4.0)"; break; + case 3: code = "(" + code + " * 8.0)"; break; + case 5: code = "(" + code + " / 2.0)"; break; + case 6: code = "(" + code + " / 4.0)"; break; + case 7: code = "(" + code + " / 8.0)"; break; + + default: + LOG_ERROR(RSX, "Bad scale: %d", fmt::by_value(src1.scale)); + Emu.Pause(); + break; + } + + if (dst.saturate) + { + code = "clamp(" + code + ", 0.0, 1.0)"; + } + + code += (append_mask ? "$m" : ""); + + if (dst.no_dest) + { + if (dst.set_cond) + { + AddCode("$ifcond " + m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + code + ";"); + } + else + { + AddCode("$ifcond " + code + ";"); + } + + return; + } + + std::string dest = AddReg(dst.dest_reg, dst.fp16) + "$m"; + + AddCodeCond(Format(dest), code); + //AddCode("$ifcond " + dest + code + (append_mask ? "$m;" : ";")); + + if (dst.set_cond) + { + AddCode(m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";"); + } +} + +void FragmentDecompiler::AddCode(const std::string& code) +{ + main.append(m_code_level, '\t') += Format(code) + "\n"; +} + +std::string FragmentDecompiler::GetMask() +{ + std::string ret; + + static const char dst_mask[4] = + { + 'x', 'y', 'z', 'w', + }; + + if (dst.mask_x) ret += dst_mask[0]; + if (dst.mask_y) ret += dst_mask[1]; + if (dst.mask_z) ret += dst_mask[2]; + if (dst.mask_w) ret += dst_mask[3]; + + return ret.empty() || strncmp(ret.c_str(), dst_mask, 4) == 0 ? "" : ("." + ret); +} + +std::string FragmentDecompiler::AddReg(u32 index, int fp16) +{ + return m_parr.AddParam(PARAM_NONE, "vec4", std::string(fp16 ? "h" : "r") + std::to_string(index), "vec4(0.0)"); +} + +bool FragmentDecompiler::HasReg(u32 index, int fp16) +{ + return m_parr.HasParam(PARAM_NONE, "vec4", + std::string(fp16 ? "h" : "r") + std::to_string(index)); +} + +std::string FragmentDecompiler::AddCond() +{ + return m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_reg_index)); +} + +std::string FragmentDecompiler::AddConst() +{ + std::string name = std::string("fc") + std::to_string(m_size + 4 * 4); + if (m_parr.HasParam(PARAM_UNIFORM, "vec4", name)) + { + return name; + } + + auto data = vm::ptr::make(m_addr + m_size + 4 * sizeof(u32)); + + m_offset = 2 * 4 * sizeof(u32); + u32 x = GetData(data[0]); + u32 y = GetData(data[1]); + u32 z = GetData(data[2]); + u32 w = GetData(data[3]); + return m_parr.AddParam(PARAM_UNIFORM, "vec4", name, + std::string("vec4(") + std::to_string((float&)x) + ", " + std::to_string((float&)y) + + ", " + std::to_string((float&)z) + ", " + std::to_string((float&)w) + ")"); +} + +std::string FragmentDecompiler::AddTex() +{ + return m_parr.AddParam(PARAM_UNIFORM, "sampler2D", std::string("tex") + std::to_string(dst.tex_num)); +} + +std::string FragmentDecompiler::Format(const std::string& code) +{ + const std::pair> repl_list[] = + { + { "$$", []() -> std::string { return "$"; } }, + { "$0", [this]() -> std::string {return GetSRC(src0);} },//std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetSRC), *this, src0) }, + { "$1", [this]() -> std::string {return GetSRC(src1);} },//std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetSRC), this, src1) }, + { "$2", [this]() -> std::string {return GetSRC(src2);} },//std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetSRC), this, src2) }, + { "$t", std::bind(std::mem_fn(&FragmentDecompiler::AddTex), this) }, + { "$m", std::bind(std::mem_fn(&FragmentDecompiler::GetMask), this) }, + { "$ifcond ", [this]() -> std::string + { + const std::string& cond = GetCond(); + if (cond == "true") return ""; + return "if(" + cond + ") "; + } + }, + { "$cond", std::bind(std::mem_fn(&FragmentDecompiler::GetCond), this) }, + { "$c", std::bind(std::mem_fn(&FragmentDecompiler::AddConst), this) } + }; + + return fmt::replace_all(code, repl_list); +} + +std::string FragmentDecompiler::GetCond() +{ + if (src0.exec_if_gr && src0.exec_if_lt && src0.exec_if_eq) + { + return "true"; + } + else if (!src0.exec_if_gr && !src0.exec_if_lt && !src0.exec_if_eq) + { + return "false"; + } + + static const char f[4] = { 'x', 'y', 'z', 'w' }; + + std::string swizzle, cond; + swizzle += f[src0.cond_swizzle_x]; + swizzle += f[src0.cond_swizzle_y]; + swizzle += f[src0.cond_swizzle_z]; + swizzle += f[src0.cond_swizzle_w]; + swizzle = swizzle == "xyzw" ? "" : "." + swizzle; + + if (src0.exec_if_gr && src0.exec_if_eq) + { + cond = "greaterThanEqual"; + } + else if (src0.exec_if_lt && src0.exec_if_eq) + { + cond = "lessThanEqual"; + } + else if (src0.exec_if_gr && src0.exec_if_lt) + { + cond = "notEqual"; + } + else if (src0.exec_if_gr) + { + cond = "greaterThan"; + } + else if (src0.exec_if_lt) + { + cond = "lessThan"; + } + else //if(src0.exec_if_eq) + { + cond = "equal"; + } + + return "any(" + cond + "(" + AddCond() + swizzle + ", vec4(0.0)))"; +} + +void FragmentDecompiler::AddCodeCond(const std::string& dst, const std::string& src) +{ + if (src0.exec_if_gr && src0.exec_if_lt && src0.exec_if_eq) + { + AddCode(dst + " = " + src + ";"); + return; + } + + if (!src0.exec_if_gr && !src0.exec_if_lt && !src0.exec_if_eq) + { + AddCode("//" + dst + " = " + src + ";"); + return; + } + + static const char f[4] = { 'x', 'y', 'z', 'w' }; + + std::string swizzle, cond; + swizzle += f[src0.cond_swizzle_x]; + swizzle += f[src0.cond_swizzle_y]; + swizzle += f[src0.cond_swizzle_z]; + swizzle += f[src0.cond_swizzle_w]; + swizzle = swizzle == "xyzw" ? "" : "." + swizzle; + + if (src0.exec_if_gr && src0.exec_if_eq) + { + cond = "greaterThanEqual"; + } + else if (src0.exec_if_lt && src0.exec_if_eq) + { + cond = "lessThanEqual"; + } + else if (src0.exec_if_gr && src0.exec_if_lt) + { + cond = "notEqual"; + } + else if (src0.exec_if_gr) + { + cond = "greaterThan"; + } + else if (src0.exec_if_lt) + { + cond = "lessThan"; + } + else //if(src0.exec_if_eq) + { + cond = "equal"; + } + + cond = cond + "(" + AddCond() + swizzle + ", vec4(0.0))"; + + ShaderVar dst_var(dst); + dst_var.symplify(); + + //const char *c_mask = f; + + if (dst_var.swizzles[0].length() == 1) + { + AddCode("if (" + cond + ".x) " + dst + " = vec4(" + src + ").x;"); + } + else + { + for (int i = 0; i < dst_var.swizzles[0].length(); ++i) + { + AddCode("if (" + cond + "." + f[i] + ") " + dst + "." + f[i] + " = " + src + "." + f[i] + ";"); + } + } +} + +template std::string FragmentDecompiler::GetSRC(T src) +{ + std::string ret; + + switch (src.reg_type) + { + case 0: //tmp + ret += AddReg(src.tmp_reg_index, src.fp16); + break; + + case 1: //input + { + static const std::string reg_table[] = + { + "gl_Position", + "diff_color", "spec_color", + "fogc", + "tc0", "tc1", "tc2", "tc3", "tc4", "tc5", "tc6", "tc7", "tc8", "tc9", + "ssa" + }; + + switch (dst.src_attr_reg_num) + { + case 0x00: ret += reg_table[0]; break; + default: + if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0])) + { + ret += m_parr.AddParam(PARAM_IN, "vec4", reg_table[dst.src_attr_reg_num]); + } + else + { + LOG_ERROR(RSX, "Bad src reg num: %d", fmt::by_value(dst.src_attr_reg_num)); + ret += m_parr.AddParam(PARAM_IN, "vec4", "unk"); + Emu.Pause(); + } + break; + } + } + break; + + case 2: //const + ret += AddConst(); + break; + + default: + LOG_ERROR(RSX, "Bad src type %d", fmt::by_value(src.reg_type)); + Emu.Pause(); + break; + } + + static const char f[4] = { 'x', 'y', 'z', 'w' }; + + std::string swizzle = ""; + swizzle += f[src.swizzle_x]; + swizzle += f[src.swizzle_y]; + swizzle += f[src.swizzle_z]; + swizzle += f[src.swizzle_w]; + + if (strncmp(swizzle.c_str(), f, 4) != 0) ret += "." + swizzle; + + if (src.abs) ret = "abs(" + ret + ")"; + if (src.neg) ret = "-" + ret; + + return ret; +} + +std::string FragmentDecompiler::BuildCode() +{ + //main += fmt::Format("\tgl_FragColor = %c0;\n", m_ctrl & 0x40 ? 'r' : 'h'); + const std::pair table[] = + { + { "ocol0", m_ctrl & 0x40 ? "r0" : "h0" }, + { "ocol1", m_ctrl & 0x40 ? "r2" : "h4" }, + { "ocol2", m_ctrl & 0x40 ? "r3" : "h6" }, + { "ocol3", m_ctrl & 0x40 ? "r4" : "h8" }, + }; + + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PARAM_NONE, "vec4", table[i].second)) + AddCode(m_parr.AddParam(PARAM_OUT, "vec4", table[i].first, i) + " = " + table[i].second + ";"); + } + + if (m_ctrl & 0xe) main += m_ctrl & 0x40 ? "\tgl_FragDepth = r1.z;\n" : "\tgl_FragDepth = h2.z;\n"; + + std::string p; + + for (auto& param : m_parr.params) { +// p += param.Format(); + } + + return std::string("#version 420\n" + "\n" + + p + "\n" + "void main()\n{\n" + main + "}\n"); +} + +std::string FragmentDecompiler::Decompile() +{ + auto data = vm::ptr::make(m_addr); + m_size = 0; + m_location = 0; + m_loop_count = 0; + m_code_level = 1; + + enum + { + FORCE_NONE, + FORCE_SCT, + FORCE_SCB, + }; + + int forced_unit = FORCE_NONE; + + while (true) + { + for (auto finded = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); + finded != m_end_offsets.end(); + finded = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size)) + { + m_end_offsets.erase(finded); + m_code_level--; + AddCode("}"); + m_loop_count--; + } + + for (auto finded = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); + finded != m_else_offsets.end(); + finded = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size)) + { + m_else_offsets.erase(finded); + m_code_level--; + AddCode("}"); + AddCode("else"); + AddCode("{"); + m_code_level++; + } + + dst.HEX = GetData(data[0]); + src0.HEX = GetData(data[1]); + src1.HEX = GetData(data[2]); + src2.HEX = GetData(data[3]); + + m_offset = 4 * sizeof(u32); + + const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + + auto SCT = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; + case RSX_FP_OPCODE_DIV: SetDst("($0 / $1)"); break; + case RSX_FP_OPCODE_DIVSQ: SetDst("($0 / sqrt($1))"); break; + case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; + case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; + case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; + case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; + case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; + case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; + case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; + case RSX_FP_OPCODE_MOV: SetDst("$0"); break; + case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; + case RSX_FP_OPCODE_RCP: SetDst("1 / $0"); break; + case RSX_FP_OPCODE_RSQ: SetDst("inversesqrt(abs($0))"); break; + case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; + case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; + case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; + case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; + case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; + case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; + + default: + return false; + } + + return true; + }; + + auto SCB = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; + case RSX_FP_OPCODE_COS: SetDst("cos($0)"); break; + case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; + case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; + case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; + case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; + case RSX_FP_OPCODE_DST: SetDst("vec4(distance($0, $1))"); break; + case RSX_FP_OPCODE_REFL: LOG_ERROR(RSX, "Unimplemented SCB instruction: REFL"); break; // TODO: Is this in the right category? + case RSX_FP_OPCODE_EX2: SetDst("exp2($0)"); break; + case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); break; + case RSX_FP_OPCODE_FRC: SetDst("fract($0)"); break; + case RSX_FP_OPCODE_LIT: SetDst("vec4(1.0, $0.x, ($0.x > 0.0 ? exp($0.w * log2($0.y)) : 0.0), 1.0)"); break; + case RSX_FP_OPCODE_LIF: SetDst("vec4(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); break; + case RSX_FP_OPCODE_LRP: LOG_ERROR(RSX, "Unimplemented SCB instruction: LRP"); break; // TODO: Is this in the right category? + case RSX_FP_OPCODE_LG2: SetDst("log2($0)"); break; + case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; + case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; + case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; + case RSX_FP_OPCODE_MOV: SetDst("$0"); break; + case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; + case RSX_FP_OPCODE_PK2: SetDst("packSnorm2x16($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) + case RSX_FP_OPCODE_PK4: SetDst("packSnorm4x8($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) + case RSX_FP_OPCODE_PK16: LOG_ERROR(RSX, "Unimplemented SCB instruction: PK16"); break; + case RSX_FP_OPCODE_PKB: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKB"); break; + case RSX_FP_OPCODE_PKG: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKG"); break; + case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; + case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; + case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; + case RSX_FP_OPCODE_SIN: SetDst("sin($0)"); break; + case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; + case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; + case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; + + default: + return false; + } + + return true; + }; + + auto TEX_SRB = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_DDX: SetDst("dFdx($0)"); break; + case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; + case RSX_FP_OPCODE_NRM: SetDst("normalize($0)"); break; + case RSX_FP_OPCODE_BEM: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: BEM"); break; + case RSX_FP_OPCODE_TEX: SetDst("texture($t, $0.xy)"); break; + case RSX_FP_OPCODE_TEXBEM: SetDst("texture($t, $0.xy, $1.x)"); break; + case RSX_FP_OPCODE_TXP: SetDst("textureProj($t, $0.xyz, $1.x)"); break; //TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478) and The Simpsons Arcade Game (NPUB30563)) + case RSX_FP_OPCODE_TXPBEM: SetDst("textureProj($t, $0.xyz, $1.x)"); break; + case RSX_FP_OPCODE_TXD: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: TXD"); break; + case RSX_FP_OPCODE_TXB: SetDst("texture($t, $0.xy, $1.x)"); break; + case RSX_FP_OPCODE_TXL: SetDst("textureLod($t, $0.xy, $1.x)"); break; + case RSX_FP_OPCODE_UP2: SetDst("unpackSnorm2x16($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) + case RSX_FP_OPCODE_UP4: SetDst("unpackSnorm4x8($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) + case RSX_FP_OPCODE_UP16: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UP16"); break; + case RSX_FP_OPCODE_UPB: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPB"); break; + case RSX_FP_OPCODE_UPG: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPG"); break; + + default: + return false; + } + + return true; + }; + + auto SIP = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_BRK: SetDst("break"); break; + case RSX_FP_OPCODE_CAL: LOG_ERROR(RSX, "Unimplemented SIP instruction: CAL"); break; + case RSX_FP_OPCODE_FENCT: forced_unit = FORCE_SCT; break; + case RSX_FP_OPCODE_FENCB: forced_unit = FORCE_SCB; break; + case RSX_FP_OPCODE_IFE: + AddCode("if($cond)"); + m_else_offsets.push_back(src1.else_offset << 2); + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + break; + case RSX_FP_OPCODE_LOOP: + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) + { + AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); + } + else + { + AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + m_loop_count++; + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + } + break; + case RSX_FP_OPCODE_REP: + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) + { + AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); + } + else + { + AddCode(fmt::Format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + m_loop_count++; + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + } + break; + case RSX_FP_OPCODE_RET: SetDst("return"); break; + + default: + return false; + } + + return true; + }; + + switch (opcode) + { + case RSX_FP_OPCODE_NOP: break; + case RSX_FP_OPCODE_KIL: SetDst("discard", false); break; + + default: + if (forced_unit == FORCE_NONE) + { + if (SIP()) break; + if (SCT()) break; + if (TEX_SRB()) break; + if (SCB()) break; + } + else if (forced_unit == FORCE_SCT) + { + forced_unit = FORCE_NONE; + if (SCT()) break; + } + else if (forced_unit == FORCE_SCB) + { + forced_unit = FORCE_NONE; + if (SCB()) break; + } + + LOG_ERROR(RSX, "Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, forced_unit); + break; + } + + m_size += m_offset; + + if (dst.end) break; + + assert(m_offset % sizeof(u32) == 0); + data += m_offset / sizeof(u32); + } + + // flush m_code_level + m_code_level = 1; + std::string m_shader = BuildCode(); + main.clear(); +// m_parr.params.clear(); + return m_shader; +} + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h new file mode 100644 index 0000000000..c406b3d269 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h @@ -0,0 +1,43 @@ +#pragma once +#if defined(DX12_SUPPORT) +#include "ShaderParam.h" +#include "Emu/RSX/RSXFragmentProgram.h" + +class FragmentDecompiler +{ + std::string main; + ParamArray m_parr; + u32 m_addr; + u32& m_size; + u32 m_const_index; + u32 m_offset; + u32 m_location; + u32 m_ctrl; + u32 m_loop_count; + int m_code_level; + std::vector m_end_offsets; + std::vector m_else_offsets; + + std::string GetMask(); + + void SetDst(std::string code, bool append_mask = true); + void AddCode(const std::string& code); + std::string AddReg(u32 index, int fp16); + bool HasReg(u32 index, int fp16); + std::string AddCond(); + std::string AddConst(); + std::string AddTex(); + std::string Format(const std::string& code); + + void AddCodeCond(const std::string& dst, const std::string& src); + std::string GetCond(); + template std::string GetSRC(T src); + std::string BuildCode(); + + u32 GetData(const u32 d) const { return d << 16 | d >> 16; } +public: + FragmentDecompiler(u32 addr, u32& size, u32 ctrl); + std::string Decompile(); +}; + +#endif \ No newline at end of file diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 232b4bd521..5ff058f7d5 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -45,6 +45,7 @@ + @@ -508,6 +509,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index f30d1fdaee..cc40c3f295 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -980,6 +980,9 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + Emu\GPU\RSX\D3D12 @@ -1861,6 +1864,9 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + Emu\GPU\RSX\D3D12 From 9cb87552b81d6bde04137bd7327006716b78372c Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 20:57:33 +0200 Subject: [PATCH 045/343] d3d12: Fragment program decompiler can decompile basic shader --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 21 ++--- .../RSX/D3D12/FragmentProgramDecompiler.cpp | 90 ++++++++++++++----- .../Emu/RSX/D3D12/FragmentProgramDecompiler.h | 6 ++ 3 files changed, 79 insertions(+), 38 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 1feb6e1964..5ce6ac1b4b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -109,11 +109,11 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( LOG_WARNING(RSX, "FP not found in buffer!"); FragmentDecompiler FS(fragmentShader->addr, fragmentShader->size, fragmentShader->offset); const std::string &shader = FS.Decompile(); - m_fragment_prog.Compile("", SHADER_TYPE::SHADER_TYPE_FRAGMENT); + m_fragment_prog.Compile(shader, SHADER_TYPE::SHADER_TYPE_FRAGMENT); AddFragmentProgram(m_fragment_prog, *fragmentShader); // TODO: This shouldn't use current dir - fs::file("./FragmentProgram.txt", o_write | o_create | o_trunc).write(shader.c_str(), shader.size()); + fs::file("./FragmentProgram.hlsl", o_write | o_create | o_trunc).write(shader.c_str(), shader.size()); } if (!m_vp_buf_num) @@ -125,7 +125,7 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( AddVertexProgram(m_vertex_prog, *vertexShader); // TODO: This shouldn't use current dir - fs::file("./VertexProgram.txt", o_write | o_create | o_trunc).write(shaderCode.c_str(), shaderCode.size()); + fs::file("./VertexProgram.hlsl", o_write | o_create | o_trunc).write(shaderCode.c_str(), shaderCode.size()); } if (m_fp_buf_num && m_vp_buf_num) @@ -262,18 +262,7 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( #define TO_STRING(x) #x void Shader::Compile(const std::string &code, SHADER_TYPE st) -{ - static const char FSstring[] = TO_STRING( - struct pixel { - float4 dst_reg0 : SV_POSITION; - float4 dst_reg1 : TEXCOORD0; - }; - float4 main(pixel In) : SV_TARGET - { - return In.dst_reg1; - }); - - HRESULT hr; +{ HRESULT hr; Microsoft::WRL::ComPtr errorBlob; switch (st) { @@ -283,7 +272,7 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st) LOG_ERROR(RSX, "VS build failed:%s", errorBlob->GetBufferPointer()); break; case SHADER_TYPE::SHADER_TYPE_FRAGMENT: - hr = D3DCompile(FSstring, sizeof(FSstring), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + hr = D3DCompile(code.c_str(), code.size(), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); if (hr != S_OK) LOG_ERROR(RSX, "FS build failed:%s", errorBlob->GetBufferPointer()); break; diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index c965b6dda1..aace4ab9e6 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -6,6 +6,14 @@ #include "Emu/Memory/Memory.h" #include "Emu/System.h" +static std::string typeName[] = +{ + "float", + "float2", + "float3", + "float4" +}; + FragmentDecompiler::FragmentDecompiler(u32 addr, u32& size, u32 ctrl) : m_addr(addr), m_size(size), @@ -48,7 +56,7 @@ void FragmentDecompiler::SetDst(std::string code, bool append_mask) { if (dst.set_cond) { - AddCode("$ifcond " + m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + code + ";"); + AddCode("$ifcond " + m_parr.AddParam(PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + code + ";"); } else { @@ -65,7 +73,7 @@ void FragmentDecompiler::SetDst(std::string code, bool append_mask) if (dst.set_cond) { - AddCode(m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";"); + AddCode(m_parr.AddParam(PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";"); } } @@ -93,24 +101,24 @@ std::string FragmentDecompiler::GetMask() std::string FragmentDecompiler::AddReg(u32 index, int fp16) { - return m_parr.AddParam(PARAM_NONE, "vec4", std::string(fp16 ? "h" : "r") + std::to_string(index), "vec4(0.0)"); + return m_parr.AddParam(PARAM_NONE, typeName[3], std::string(fp16 ? "h" : "r") + std::to_string(index), typeName[3] + "(0.0)"); } bool FragmentDecompiler::HasReg(u32 index, int fp16) { - return m_parr.HasParam(PARAM_NONE, "vec4", + return m_parr.HasParam(PARAM_NONE, typeName[3], std::string(fp16 ? "h" : "r") + std::to_string(index)); } std::string FragmentDecompiler::AddCond() { - return m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_reg_index)); + return m_parr.AddParam(PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_reg_index)); } std::string FragmentDecompiler::AddConst() { std::string name = std::string("fc") + std::to_string(m_size + 4 * 4); - if (m_parr.HasParam(PARAM_UNIFORM, "vec4", name)) + if (m_parr.HasParam(PARAM_UNIFORM, typeName[3], name)) { return name; } @@ -122,8 +130,8 @@ std::string FragmentDecompiler::AddConst() u32 y = GetData(data[1]); u32 z = GetData(data[2]); u32 w = GetData(data[3]); - return m_parr.AddParam(PARAM_UNIFORM, "vec4", name, - std::string("vec4(") + std::to_string((float&)x) + ", " + std::to_string((float&)y) + return m_parr.AddParam(PARAM_UNIFORM, typeName[3], name, + std::string(typeName[3] + "(") + std::to_string((float&)x) + ", " + std::to_string((float&)y) + ", " + std::to_string((float&)z) + ", " + std::to_string((float&)w) + ")"); } @@ -201,7 +209,7 @@ std::string FragmentDecompiler::GetCond() cond = "equal"; } - return "any(" + cond + "(" + AddCond() + swizzle + ", vec4(0.0)))"; + return "any(" + cond + "(" + AddCond() + swizzle + ", " + typeName[3] +"(0.0)))"; } void FragmentDecompiler::AddCodeCond(const std::string& dst, const std::string& src) @@ -252,7 +260,7 @@ void FragmentDecompiler::AddCodeCond(const std::string& dst, const std::string& cond = "equal"; } - cond = cond + "(" + AddCond() + swizzle + ", vec4(0.0))"; + cond = cond + "(" + AddCond() + swizzle + ", " + typeName[3] + "(0.0))"; ShaderVar dst_var(dst); dst_var.symplify(); @@ -261,7 +269,7 @@ void FragmentDecompiler::AddCodeCond(const std::string& dst, const std::string& if (dst_var.swizzles[0].length() == 1) { - AddCode("if (" + cond + ".x) " + dst + " = vec4(" + src + ").x;"); + AddCode("if (" + cond + ".x) " + dst + " = " + typeName[3] + "(" + src + ").x;"); } else { @@ -299,12 +307,12 @@ template std::string FragmentDecompiler::GetSRC(T src) default: if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0])) { - ret += m_parr.AddParam(PARAM_IN, "vec4", reg_table[dst.src_attr_reg_num]); + ret += m_parr.AddParam(PARAM_IN, typeName[3], reg_table[dst.src_attr_reg_num]); } else { LOG_ERROR(RSX, "Bad src reg num: %d", fmt::by_value(dst.src_attr_reg_num)); - ret += m_parr.AddParam(PARAM_IN, "vec4", "unk"); + ret += m_parr.AddParam(PARAM_IN, typeName[3], "unk"); Emu.Pause(); } break; @@ -351,22 +359,60 @@ std::string FragmentDecompiler::BuildCode() for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) { - if (m_parr.HasParam(PARAM_NONE, "vec4", table[i].second)) - AddCode(m_parr.AddParam(PARAM_OUT, "vec4", table[i].first, i) + " = " + table[i].second + ";"); + if (m_parr.HasParam(PARAM_NONE, typeName[3], table[i].second)) + AddCode(m_parr.AddParam(PARAM_OUT, typeName[3], table[i].first, i) + " = " + table[i].second + ";"); } if (m_ctrl & 0xe) main += m_ctrl & 0x40 ? "\tgl_FragDepth = r1.z;\n" : "\tgl_FragDepth = h2.z;\n"; - std::string p; + std::stringstream OS; + insertHeader(OS); + insertIntputs(OS); + insertMainStart(OS); + OS << main << std::endl; + insertMainEnd(OS); - for (auto& param : m_parr.params) { -// p += param.Format(); + return OS.str(); +} + +void FragmentDecompiler::insertHeader(std::stringstream & OS) +{ + OS << "// Nothing" << std::endl; +} + +void FragmentDecompiler::insertIntputs(std::stringstream & OS) +{ + OS << "struct PSInput" << std::endl; + OS << "{" << std::endl; + OS << " float4 dst_reg0 : SV_POSITION;" << std::endl; + size_t index = 0; + for (ParamType PT : m_parr.params[PARAM_IN]) + { + for (ParamItem PI : PT.items) + { + OS << " " << PT.type << " " << PI.name << " : TEXCOORD" << index << ";" << std::endl; + index++; + } } + OS << "};" << std::endl; +} - return std::string("#version 420\n" - "\n" - + p + "\n" - "void main()\n{\n" + main + "}\n"); +void FragmentDecompiler::insertMainStart(std::stringstream & OS) +{ + OS << "float4 main(PSInput In) : SV_TARGET" << std::endl; + OS << "{" << std::endl; + OS << " float4 r0;" << std::endl; + for (ParamType PT : m_parr.params[PARAM_IN]) + { + for (ParamItem PI : PT.items) + OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; + } +} + +void FragmentDecompiler::insertMainEnd(std::stringstream & OS) +{ + OS << " return r0;" << std::endl; + OS << "}" << std::endl; } std::string FragmentDecompiler::Decompile() diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h index c406b3d269..b24ff72f73 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h @@ -2,6 +2,7 @@ #if defined(DX12_SUPPORT) #include "ShaderParam.h" #include "Emu/RSX/RSXFragmentProgram.h" +#include class FragmentDecompiler { @@ -35,6 +36,11 @@ class FragmentDecompiler std::string BuildCode(); u32 GetData(const u32 d) const { return d << 16 | d >> 16; } +protected: + virtual void insertHeader(std::stringstream &OS); + virtual void insertIntputs(std::stringstream &OS); + virtual void insertMainStart(std::stringstream &OS); + virtual void insertMainEnd(std::stringstream &OS); public: FragmentDecompiler(u32 addr, u32& size, u32 ctrl); std::string Decompile(); From ee3e5cfe1f50fff1660a56220afd0acb34f59d45 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 14 May 2015 21:21:51 +0200 Subject: [PATCH 046/343] d3d12: Support for fragment constant buffer --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 63 +++++++++++-------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 6 +- .../RSX/D3D12/FragmentProgramDecompiler.cpp | 13 ++++ .../Emu/RSX/D3D12/FragmentProgramDecompiler.h | 1 + 5 files changed, 54 insertions(+), 31 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index d1c3128946..b547e917df 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -105,7 +105,16 @@ D3D12GSRender::D3D12GSRender() &resDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS(&m_constantsBuffer) + IID_PPV_ARGS(&m_constantsVertexBuffer) + )); + + check(m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_constantsFragmentBuffer) )); D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; @@ -139,7 +148,7 @@ D3D12GSRender::D3D12GSRender() descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; // Constants descriptorRange[1].BaseShaderRegister = 1; - descriptorRange[1].NumDescriptors = 1; + descriptorRange[1].NumDescriptors = 2; descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; D3D12_ROOT_PARAMETER RP[2] = {}; RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; @@ -176,7 +185,8 @@ D3D12GSRender::~D3D12GSRender() m_backbufferAsRendertarget[1]->Release(); m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); - m_constantsBuffer->Release(); + m_constantsVertexBuffer->Release(); + m_constantsFragmentBuffer->Release(); m_scaleOffsetBuffer->Release(); for (unsigned i = 0; i < 32; i++) m_vertexBuffer[i]->Release(); @@ -392,7 +402,7 @@ void D3D12GSRender::setScaleOffset() void D3D12GSRender::FillVertexShaderConstantsBuffer() { void *constantsBufferMap; - check(m_constantsBuffer->Map(0, nullptr, &constantsBufferMap)); + check(m_constantsVertexBuffer->Map(0, nullptr, &constantsBufferMap)); for (const RSXTransformConstant& c : m_transform_constants) { @@ -402,12 +412,12 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() size_t bufferSizeCandidate = offset + 4 * sizeof(float); m_constantsBufferSize = bufferSizeCandidate > m_constantsBufferSize ? bufferSizeCandidate : m_constantsBufferSize; } - m_constantsBuffer->Unmap(0, nullptr); + m_constantsVertexBuffer->Unmap(0, nullptr); // make it multiple of 256 bytes m_constantsBufferSize = (m_constantsBufferSize + 255) & ~255; D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = m_constantsBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.BufferLocation = m_constantsVertexBuffer->GetGPUVirtualAddress(); constantBufferViewDesc.SizeInBytes = (UINT)m_constantsBufferSize; D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); @@ -416,28 +426,25 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() void D3D12GSRender::FillPixelShaderConstantsBuffer() { -/* if (!m_cur_fragment_prog) - { - LOG_ERROR(RSX, "InitFragmentData: m_cur_shader_prog == NULL"); - return; - } - + size_t index = 0; + void *constantsBufferMap; + check(m_constantsFragmentBuffer->Map(0, nullptr, &constantsBufferMap)); for (const RSXTransformConstant& c : m_fragment_constants) { u32 id = c.id - m_cur_fragment_prog->offset; - //LOG_WARNING(RSX,"fc%u[0x%x - 0x%x] = (%f, %f, %f, %f)", id, c.id, m_cur_shader_prog->offset, c.x, c.y, c.z, c.w); - - const std::string name = fmt::Format("fc%u", id); - const int l = m_program.GetLocation(name); - checkForGlError("glGetUniformLocation " + name); - - glUniform4f(l, c.x, c.y, c.z, c.w); - checkForGlError("glUniform4f " + name + fmt::Format(" %u [%f %f %f %f]", l, c.x, c.y, c.z, c.w)); + float vector[] = { c.x, c.y, c.z, c.w }; + memcpy(constantsBufferMap, vector, 4 * sizeof(float)); + index++; } + m_constantsFragmentBuffer->Unmap(0, nullptr); - //if (m_fragment_constants.GetCount())*/ - // LOG_NOTICE(HLE, ""); + D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; + constantBufferViewDesc.BufferLocation = m_constantsFragmentBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.SizeInBytes = (UINT) index * 4 * sizeof(float); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); } @@ -534,14 +541,16 @@ void D3D12GSRender::ExecCMD() commandList->SetGraphicsRootDescriptorTable(0, Handle); m_currentScaleOffsetBufferIndex++; + size_t currentBufferIndex = m_constantsBufferIndex; FillVertexShaderConstantsBuffer(); - commandList->SetDescriptorHeaps(1, &m_constantsBufferDescriptorsHeap); - Handle = m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetGraphicsRootDescriptorTable(1, Handle); + m_constantsBufferIndex++; + FillPixelShaderConstantsBuffer(); m_constantsBufferIndex++; - FillPixelShaderConstantsBuffer(); + commandList->SetDescriptorHeaps(1, &m_constantsBufferDescriptorsHeap); + Handle = m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += currentBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetGraphicsRootDescriptorTable(1, Handle); } if (!LoadProgram()) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index d400a0d4d9..db01de64b6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -54,7 +54,7 @@ private: // GLTexture m_gl_vertex_textures[m_textures_count]; ID3D12Resource *m_indexBuffer, *m_vertexBuffer[m_vertex_count]; - ID3D12Resource *m_constantsBuffer; + ID3D12Resource *m_constantsVertexBuffer, *m_constantsFragmentBuffer; ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; size_t m_constantsBufferSize, m_constantsBufferIndex; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 5ce6ac1b4b..6aa6bf9a0d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -168,9 +168,9 @@ ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( } else { - LOG_WARNING(RSX, "Add program :"); - LOG_WARNING(RSX, "*** vp id = %d", m_vertex_prog.Id); - LOG_WARNING(RSX, "*** fp id = %d", m_fragment_prog.Id); +// LOG_WARNING(RSX, "Add program :"); +// LOG_WARNING(RSX, "*** vp id = %d", m_vertex_prog.Id); +// LOG_WARNING(RSX, "*** fp id = %d", m_fragment_prog.Id); D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index aace4ab9e6..6652b7ec63 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -367,6 +367,7 @@ std::string FragmentDecompiler::BuildCode() std::stringstream OS; insertHeader(OS); + insertConstants(OS); insertIntputs(OS); insertMainStart(OS); OS << main << std::endl; @@ -397,6 +398,18 @@ void FragmentDecompiler::insertIntputs(std::stringstream & OS) OS << "};" << std::endl; } +void FragmentDecompiler::insertConstants(std::stringstream & OS) +{ + OS << "cbuffer CONSTANT : register(b2)" << std::endl; + OS << "{" << std::endl; + for (ParamType PT : m_parr.params[PARAM_UNIFORM]) + { + for (ParamItem PI : PT.items) + OS << " " << PT.type << " " << PI.name << ";" << std::endl; + } + OS << "};" << std::endl; +} + void FragmentDecompiler::insertMainStart(std::stringstream & OS) { OS << "float4 main(PSInput In) : SV_TARGET" << std::endl; diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h index b24ff72f73..e22f5204f6 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h @@ -39,6 +39,7 @@ class FragmentDecompiler protected: virtual void insertHeader(std::stringstream &OS); virtual void insertIntputs(std::stringstream &OS); + virtual void insertConstants(std::stringstream &OS); virtual void insertMainStart(std::stringstream &OS); virtual void insertMainEnd(std::stringstream &OS); public: From a276391e870b64335a2a5e672fa47edaf1de4244 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 00:18:30 +0200 Subject: [PATCH 047/343] d3d12: Fix linkage between VS and PS --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 11 +++---- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + .../RSX/D3D12/FragmentProgramDecompiler.cpp | 30 +++++++++++-------- .../Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 24 +++++++++------ 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index b547e917df..f19f2d9a2b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -24,6 +24,7 @@ D3D12GSRender::D3D12GSRender() m_constantsBufferSize = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; + constantsFragmentSize = 0; // Enable d3d debug layer Microsoft::WRL::ComPtr debugInterface; D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); @@ -432,16 +433,15 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() for (const RSXTransformConstant& c : m_fragment_constants) { u32 id = c.id - m_cur_fragment_prog->offset; - - float vector[] = { c.x, c.y, c.z, c.w }; - memcpy(constantsBufferMap, vector, 4 * sizeof(float)); - index++; } + float vector[] = { 0.,1.,0.,0. }; +// memcpy((char*)constantsBufferMap, vector, 4 * sizeof(float)); + index++; m_constantsFragmentBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = m_constantsFragmentBuffer->GetGPUVirtualAddress(); - constantBufferViewDesc.SizeInBytes = (UINT) index * 4 * sizeof(float); + constantBufferViewDesc.SizeInBytes = (UINT)256; D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); @@ -1030,5 +1030,6 @@ void D3D12GSRender::Flip() m_constantsBufferSize = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; + constantsFragmentSize = 0; } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index db01de64b6..4835fdf031 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -55,6 +55,7 @@ private: ID3D12Resource *m_indexBuffer, *m_vertexBuffer[m_vertex_count]; ID3D12Resource *m_constantsVertexBuffer, *m_constantsFragmentBuffer; + size_t constantsFragmentSize; ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; size_t m_constantsBufferSize, m_constantsBufferIndex; diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index 6652b7ec63..e75c5c6864 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -383,18 +383,24 @@ void FragmentDecompiler::insertHeader(std::stringstream & OS) void FragmentDecompiler::insertIntputs(std::stringstream & OS) { - OS << "struct PSInput" << std::endl; + OS << "struct PixelInput" << std::endl; OS << "{" << std::endl; - OS << " float4 dst_reg0 : SV_POSITION;" << std::endl; - size_t index = 0; - for (ParamType PT : m_parr.params[PARAM_IN]) - { - for (ParamItem PI : PT.items) - { - OS << " " << PT.type << " " << PI.name << " : TEXCOORD" << index << ";" << std::endl; - index++; - } - } + OS << " float4 Position : SV_POSITION;" << std::endl; + OS << " float4 diff_color : COLOR0;" << std::endl; + OS << " float4 spec_color : COLOR1;" << std::endl; + OS << " float4 dst_reg3 : COLOR2;" << std::endl; + OS << " float4 dst_reg4 : COLOR3;" << std::endl; + OS << " float fogc : FOG;" << std::endl; + OS << " float4 dummy : COLOR4;" << std::endl; + OS << " float4 tc0 : TEXCOORD0;" << std::endl; + OS << " float4 tc1 : TEXCOORD1;" << std::endl; + OS << " float4 tc2 : TEXCOORD2;" << std::endl; + OS << " float4 tc3 : TEXCOORD3;" << std::endl; + OS << " float4 tc4 : TEXCOORD4;" << std::endl; + OS << " float4 tc5 : TEXCOORD5;" << std::endl; + OS << " float4 tc6 : TEXCOORD6;" << std::endl; + OS << " float4 tc7 : TEXCOORD7;" << std::endl; + OS << " float4 tc8 : TEXCOORD8;" << std::endl; OS << "};" << std::endl; } @@ -412,7 +418,7 @@ void FragmentDecompiler::insertConstants(std::stringstream & OS) void FragmentDecompiler::insertMainStart(std::stringstream & OS) { - OS << "float4 main(PSInput In) : SV_TARGET" << std::endl; + OS << "float4 main(PixelInput In) : SV_TARGET" << std::endl; OS << "{" << std::endl; OS << " float4 r0;" << std::endl; for (ParamType PT : m_parr.params[PARAM_IN]) diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index 982f3f3aed..65e1e8b866 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -479,15 +479,21 @@ void VertexDecompiler::insertOutputs(std::stringstream & OS, const std::vector

Date: Fri, 15 May 2015 00:21:09 +0200 Subject: [PATCH 048/343] d3d12: Print fps in windows title --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index f19f2d9a2b..8852ae0cb7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1031,5 +1031,6 @@ void D3D12GSRender::Flip() m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; constantsFragmentSize = 0; + m_frame->Flip(nullptr); } #endif \ No newline at end of file From 88aea7010018d7ff5e36fa201118e12922ea3cb7 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 00:51:26 +0200 Subject: [PATCH 049/343] d3d12: some fix for fragment program decompiler --- .../RSX/D3D12/FragmentProgramDecompiler.cpp | 100 +++++++++++++----- .../Emu/RSX/D3D12/FragmentProgramDecompiler.h | 1 + 2 files changed, 73 insertions(+), 28 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index e75c5c6864..aad765f258 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -14,6 +14,19 @@ static std::string typeName[] = "float4" }; +enum FUNCTION { + FUNCTION_SATURATE, + FUNCTION_DP2, + FUNCTION_FRACT, +}; + +static std::string functionName[] = +{ + "saturate", + "float4(dot($0.xy, $1.xy), dot($0.xy, $1.xy), dot($0.xy, $1.xy), dot($0.xy, $1.xy))", + "frac($0)", +}; + FragmentDecompiler::FragmentDecompiler(u32 addr, u32& size, u32 ctrl) : m_addr(addr), m_size(size), @@ -47,7 +60,7 @@ void FragmentDecompiler::SetDst(std::string code, bool append_mask) if (dst.saturate) { - code = "clamp(" + code + ", 0.0, 1.0)"; + code = functionName[FUNCTION_SATURATE] + "(" + code + ")"; } code += (append_mask ? "$m" : ""); @@ -237,30 +250,30 @@ void FragmentDecompiler::AddCodeCond(const std::string& dst, const std::string& if (src0.exec_if_gr && src0.exec_if_eq) { - cond = "greaterThanEqual"; + cond = ">="; } else if (src0.exec_if_lt && src0.exec_if_eq) { - cond = "lessThanEqual"; + cond = "<="; } else if (src0.exec_if_gr && src0.exec_if_lt) { - cond = "notEqual"; + cond = "!="; } else if (src0.exec_if_gr) { - cond = "greaterThan"; + cond = ">"; } else if (src0.exec_if_lt) { - cond = "lessThan"; + cond = "<"; } else //if(src0.exec_if_eq) { - cond = "equal"; + cond = "=="; } - cond = cond + "(" + AddCond() + swizzle + ", " + typeName[3] + "(0.0))"; + cond = "(" + AddCond() + swizzle + " " + cond + " " + typeName[3] + "(0., 0., 0., 0.))"; ShaderVar dst_var(dst); dst_var.symplify(); @@ -269,7 +282,7 @@ void FragmentDecompiler::AddCodeCond(const std::string& dst, const std::string& if (dst_var.swizzles[0].length() == 1) { - AddCode("if (" + cond + ".x) " + dst + " = " + typeName[3] + "(" + src + ").x;"); + AddCode("if (" + cond + ".x) " + dst + " = " + src + ";"); } else { @@ -349,26 +362,18 @@ template std::string FragmentDecompiler::GetSRC(T src) std::string FragmentDecompiler::BuildCode() { //main += fmt::Format("\tgl_FragColor = %c0;\n", m_ctrl & 0x40 ? 'r' : 'h'); - const std::pair table[] = - { - { "ocol0", m_ctrl & 0x40 ? "r0" : "h0" }, - { "ocol1", m_ctrl & 0x40 ? "r2" : "h4" }, - { "ocol2", m_ctrl & 0x40 ? "r3" : "h6" }, - { "ocol3", m_ctrl & 0x40 ? "r4" : "h8" }, - }; - - for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) - { - if (m_parr.HasParam(PARAM_NONE, typeName[3], table[i].second)) - AddCode(m_parr.AddParam(PARAM_OUT, typeName[3], table[i].first, i) + " = " + table[i].second + ";"); - } if (m_ctrl & 0xe) main += m_ctrl & 0x40 ? "\tgl_FragDepth = r1.z;\n" : "\tgl_FragDepth = h2.z;\n"; std::stringstream OS; insertHeader(OS); + OS << std::endl; insertConstants(OS); + OS << std::endl; insertIntputs(OS); + OS << std::endl; + insertOutputs(OS); + OS << std::endl; insertMainStart(OS); OS << main << std::endl; insertMainEnd(OS); @@ -378,7 +383,7 @@ std::string FragmentDecompiler::BuildCode() void FragmentDecompiler::insertHeader(std::stringstream & OS) { - OS << "// Nothing" << std::endl; + OS << "// Header" << std::endl; } void FragmentDecompiler::insertIntputs(std::stringstream & OS) @@ -404,6 +409,26 @@ void FragmentDecompiler::insertIntputs(std::stringstream & OS) OS << "};" << std::endl; } +void FragmentDecompiler::insertOutputs(std::stringstream & OS) +{ + OS << "struct PixelOutput" << std::endl; + OS << "{" << std::endl; + const std::pair table[] = + { + { "ocol0", m_ctrl & 0x40 ? "r0" : "h0" }, + { "ocol1", m_ctrl & 0x40 ? "r2" : "h4" }, + { "ocol2", m_ctrl & 0x40 ? "r3" : "h6" }, + { "ocol3", m_ctrl & 0x40 ? "r4" : "h8" }, + }; + + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PARAM_NONE, typeName[3], table[i].second)) + OS << " " << typeName[3] << " " << table[i].first << " : SV_TARGET" << i << ";" << std::endl; + } + OS << "};" << std::endl; +} + void FragmentDecompiler::insertConstants(std::stringstream & OS) { OS << "cbuffer CONSTANT : register(b2)" << std::endl; @@ -418,19 +443,38 @@ void FragmentDecompiler::insertConstants(std::stringstream & OS) void FragmentDecompiler::insertMainStart(std::stringstream & OS) { - OS << "float4 main(PixelInput In) : SV_TARGET" << std::endl; + OS << "PixelOutput main(PixelInput In)" << std::endl; OS << "{" << std::endl; - OS << " float4 r0;" << std::endl; for (ParamType PT : m_parr.params[PARAM_IN]) { for (ParamItem PI : PT.items) OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; } + // Declare output + for (ParamType PT : m_parr.params[PARAM_NONE]) + { + for (ParamItem PI : PT.items) + OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 0.);" << std::endl; + } } void FragmentDecompiler::insertMainEnd(std::stringstream & OS) { - OS << " return r0;" << std::endl; + const std::pair table[] = + { + { "ocol0", m_ctrl & 0x40 ? "r0" : "h0" }, + { "ocol1", m_ctrl & 0x40 ? "r2" : "h4" }, + { "ocol2", m_ctrl & 0x40 ? "r3" : "h6" }, + { "ocol3", m_ctrl & 0x40 ? "r4" : "h8" }, + }; + + OS << " PixelOutput Out;" << std::endl; + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PARAM_NONE, typeName[3], table[i].second)) + OS << " Out." << table[i].first << " = " << table[i].second << ";" << std::endl; + } + OS << " return Out;" << std::endl; OS << "}" << std::endl; } @@ -491,7 +535,7 @@ std::string FragmentDecompiler::Decompile() case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; case RSX_FP_OPCODE_DIV: SetDst("($0 / $1)"); break; case RSX_FP_OPCODE_DIVSQ: SetDst("($0 / sqrt($1))"); break; - case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; + case RSX_FP_OPCODE_DP2: SetDst(functionName[FUNCTION_DP2]); break; case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; @@ -532,7 +576,7 @@ std::string FragmentDecompiler::Decompile() case RSX_FP_OPCODE_REFL: LOG_ERROR(RSX, "Unimplemented SCB instruction: REFL"); break; // TODO: Is this in the right category? case RSX_FP_OPCODE_EX2: SetDst("exp2($0)"); break; case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); break; - case RSX_FP_OPCODE_FRC: SetDst("fract($0)"); break; + case RSX_FP_OPCODE_FRC: SetDst(functionName[FUNCTION_FRACT]); break; case RSX_FP_OPCODE_LIT: SetDst("vec4(1.0, $0.x, ($0.x > 0.0 ? exp($0.w * log2($0.y)) : 0.0), 1.0)"); break; case RSX_FP_OPCODE_LIF: SetDst("vec4(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); break; case RSX_FP_OPCODE_LRP: LOG_ERROR(RSX, "Unimplemented SCB instruction: LRP"); break; // TODO: Is this in the right category? diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h index e22f5204f6..48f0791dc9 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h @@ -39,6 +39,7 @@ class FragmentDecompiler protected: virtual void insertHeader(std::stringstream &OS); virtual void insertIntputs(std::stringstream &OS); + virtual void insertOutputs(std::stringstream &OS); virtual void insertConstants(std::stringstream &OS); virtual void insertMainStart(std::stringstream &OS); virtual void insertMainEnd(std::stringstream &OS); From b6cc7af8cd4b9eaf3441202609b2b1fb6d3ceedf Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 00:56:58 +0200 Subject: [PATCH 050/343] d3d12: Support for fragment constants --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 8852ae0cb7..56e92b2567 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -427,24 +427,27 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() void D3D12GSRender::FillPixelShaderConstantsBuffer() { - size_t index = 0; + size_t offset = 0; void *constantsBufferMap; check(m_constantsFragmentBuffer->Map(0, nullptr, &constantsBufferMap)); for (const RSXTransformConstant& c : m_fragment_constants) { u32 id = c.id - m_cur_fragment_prog->offset; + float vector[] = { c.x, c.y, c.z, c.w }; + memcpy((char*)constantsBufferMap + constantsFragmentSize + offset, vector, 4 * sizeof(float)); + offset += 4 * sizeof(float); } - float vector[] = { 0.,1.,0.,0. }; -// memcpy((char*)constantsBufferMap, vector, 4 * sizeof(float)); - index++; m_constantsFragmentBuffer->Unmap(0, nullptr); + // Multiple of 256 + offset = (offset + 255) & ~255; D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = m_constantsFragmentBuffer->GetGPUVirtualAddress(); - constantBufferViewDesc.SizeInBytes = (UINT)256; + constantBufferViewDesc.BufferLocation = m_constantsFragmentBuffer->GetGPUVirtualAddress() + constantsFragmentSize; + constantBufferViewDesc.SizeInBytes = (UINT)offset; D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); + constantsFragmentSize += offset; } From 98a924874f2f95911d8b7a59c6178236a3ad574d Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 01:03:46 +0200 Subject: [PATCH 051/343] d3d12: Inline constants It's slow --- rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index aad765f258..7dbccdff74 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -431,14 +431,21 @@ void FragmentDecompiler::insertOutputs(std::stringstream & OS) void FragmentDecompiler::insertConstants(std::stringstream & OS) { - OS << "cbuffer CONSTANT : register(b2)" << std::endl; + // TODO : Avoid constant recompilation and properly use constant buffer +/* OS << "cbuffer CONSTANT : register(b2)" << std::endl; OS << "{" << std::endl; for (ParamType PT : m_parr.params[PARAM_UNIFORM]) { for (ParamItem PI : PT.items) OS << " " << PT.type << " " << PI.name << ";" << std::endl; } - OS << "};" << std::endl; + OS << "};" << std::endl;*/ + + for (ParamType PT : m_parr.params[PARAM_UNIFORM]) + { + for (ParamItem PI : PT.items) + OS << PT.type << " " << PI.name << " = " << PI.value << ";" << std::endl; + } } void FragmentDecompiler::insertMainStart(std::stringstream & OS) From f4465013399f605615e92b406b57d229688ea57c Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 01:27:02 +0200 Subject: [PATCH 052/343] d3d12: Try to fix fragment shaders first test --- .../Emu/RSX/D3D12/FragmentProgramDecompiler.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index 7dbccdff74..4bc39d27bc 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -415,10 +415,10 @@ void FragmentDecompiler::insertOutputs(std::stringstream & OS) OS << "{" << std::endl; const std::pair table[] = { - { "ocol0", m_ctrl & 0x40 ? "r0" : "h0" }, - { "ocol1", m_ctrl & 0x40 ? "r2" : "h4" }, - { "ocol2", m_ctrl & 0x40 ? "r3" : "h6" }, - { "ocol3", m_ctrl & 0x40 ? "r4" : "h8" }, + { "ocol0", "r0" }, + { "ocol1", "r2" }, + { "ocol2", "r3" }, + { "ocol3", "r4" }, }; for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) @@ -469,10 +469,10 @@ void FragmentDecompiler::insertMainEnd(std::stringstream & OS) { const std::pair table[] = { - { "ocol0", m_ctrl & 0x40 ? "r0" : "h0" }, - { "ocol1", m_ctrl & 0x40 ? "r2" : "h4" }, - { "ocol2", m_ctrl & 0x40 ? "r3" : "h6" }, - { "ocol3", m_ctrl & 0x40 ? "r4" : "h8" }, + { "ocol0", "r0" }, + { "ocol1", "r2" }, + { "ocol2", "r3" }, + { "ocol3", "r4" }, }; OS << " PixelOutput Out;" << std::endl; From 789ed767e23326bd46d925b35464c071e9baeb88 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 01:49:51 +0200 Subject: [PATCH 053/343] d3d12: Move Constant inside main --- rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index 4bc39d27bc..1cf237caf8 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -368,14 +368,16 @@ std::string FragmentDecompiler::BuildCode() std::stringstream OS; insertHeader(OS); OS << std::endl; - insertConstants(OS); + OS << std::endl; insertIntputs(OS); OS << std::endl; insertOutputs(OS); OS << std::endl; insertMainStart(OS); + insertConstants(OS); OS << main << std::endl; + insertMainEnd(OS); return OS.str(); From 5cb0fe63b833bb0061c62a45660c86d79e160b06 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 17:38:56 +0200 Subject: [PATCH 054/343] d3d12: Avoid recompiling fragment shader if constants change --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 27 +-- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 215 ++++++++++++--------- 2 files changed, 143 insertions(+), 99 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 6aa6bf9a0d..0d3b221c01 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -13,20 +13,23 @@ #pragma comment (lib, "d3dcompiler.lib") - -size_t getFPBinarySize(void *ptr) +namespace ProgramHashUtil { - const qword *instBuffer = (const qword*)ptr; - size_t instIndex = 0; - while (true) + + size_t getFPBinarySize(void *ptr) { - const qword& inst = instBuffer[instIndex]; - bool end = (inst.word[0] >> 8) & 0x1; - if (end) - return (instIndex + 1) * 4; - instIndex++; + const qword *instBuffer = (const qword*)ptr; + size_t instIndex = 0; + while (true) + { + const qword& inst = instBuffer[instIndex]; + bool end = (inst.word[0] >> 8) & 0x1; + if (end) + return (instIndex + 1) * 4 * 4; + instIndex++; + } } -} +}; PipelineStateObjectCache::PipelineStateObjectCache() : m_currentShaderId(0) @@ -79,7 +82,7 @@ void PipelineStateObjectCache::AddVertexProgram(Shader& vp, RSXVertexProgram& rs void PipelineStateObjectCache::AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp) { - size_t actualFPSize = getFPBinarySize(vm::get_ptr(rsx_fp.addr)); + size_t actualFPSize = ProgramHashUtil::getFPBinarySize(vm::get_ptr(rsx_fp.addr)); void *fpShadowCopy = malloc(actualFPSize); memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); fp.Id = (u32)m_currentShaderId++; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 1cf484fadc..2fc6f00936 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -40,109 +40,150 @@ public: void Compile(const std::string &code, SHADER_TYPE st); }; -// Based on -// https://github.com/AlexAltea/nucleus/blob/master/nucleus/gpu/rsx_pgraph.cpp -union qword -{ - u64 dword[2]; - u32 word[4]; -}; -struct HashVertexProgram + +namespace ProgramHashUtil { - size_t operator()(const void *program) const + // Based on + // https://github.com/AlexAltea/nucleus/blob/master/nucleus/gpu/rsx_pgraph.cpp + union qword { - // 64-bit Fowler/Noll/Vo FNV-1a hash code - size_t hash = 0xCBF29CE484222325ULL; - const qword *instbuffer = (const qword*)program; - size_t instIndex = 0; - bool end = false; - return 0; - while (true) - { - const qword inst = instbuffer[instIndex]; - bool end = inst.word[0] >> 31; - if (end) - return hash; - hash ^= inst.dword[0]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - hash ^= inst.dword[1]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - instIndex++; - } - return 0; - } -}; + u64 dword[2]; + u32 word[4]; + }; -struct HashFragmentProgram -{ - size_t operator()(const void *program) const + struct HashVertexProgram { - // 64-bit Fowler/Noll/Vo FNV-1a hash code - size_t hash = 0xCBF29CE484222325ULL; - const qword *instbuffer = (const qword*)program; - size_t instIndex = 0; - while (true) + size_t operator()(const void *program) const { - const qword& inst = instbuffer[instIndex]; - bool end = (inst.word[0] >> 8) & 0x1; - if (end) - return hash; - hash ^= inst.dword[0]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - hash ^= inst.dword[1]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - instIndex++; + // 64-bit Fowler/Noll/Vo FNV-1a hash code + size_t hash = 0xCBF29CE484222325ULL; + const qword *instbuffer = (const qword*)program; + size_t instIndex = 0; + bool end = false; + return 0; + while (true) + { + const qword inst = instbuffer[instIndex]; + bool end = inst.word[0] >> 31; + if (end) + return hash; + hash ^= inst.dword[0]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + hash ^= inst.dword[1]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + instIndex++; + } + return 0; } - return 0; - } -}; + }; -struct VertexProgramCompare -{ - bool operator()(const void *binary1, const void *binary2) const + + struct VertexProgramCompare { - const qword *instBuffer1 = (const qword*)binary1; - const qword *instBuffer2 = (const qword*)binary2; - size_t instIndex = 0; - return true; - while (true) + bool operator()(const void *binary1, const void *binary2) const { - const qword& inst1 = instBuffer1[instIndex]; - const qword& inst2 = instBuffer2[instIndex]; - bool end = (inst1.word[0] >> 31) && (inst2.word[0] >> 31); - if (end) - return true; - if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) - return false; - instIndex++; + const qword *instBuffer1 = (const qword*)binary1; + const qword *instBuffer2 = (const qword*)binary2; + size_t instIndex = 0; + return true; + while (true) + { + const qword& inst1 = instBuffer1[instIndex]; + const qword& inst2 = instBuffer2[instIndex]; + bool end = (inst1.word[0] >> 31) && (inst2.word[0] >> 31); + if (end) + return true; + if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) + return false; + instIndex++; + } } - } -}; + }; -struct FragmentProgramCompare -{ - bool operator()(const void *binary1, const void *binary2) const + struct FragmentHashUtil { - const qword *instBuffer1 = (const qword*)binary1; - const qword *instBuffer2 = (const qword*)binary2; - size_t instIndex = 0; - while (true) + /** + * RSX fragment program constants are inlined inside shader code. + * This function takes an instruction from a fragment program and + * returns an equivalent instruction where inlined constants + * are masked. + * This allows to hash/compare fragment programs even if their + * inlined constants are modified inbetween + */ + static qword fragmentMaskConstant(const qword &initialQword) { - const qword& inst1 = instBuffer1[instIndex]; - const qword& inst2 = instBuffer2[instIndex]; - bool end = ((inst1.word[0] >> 8) & 0x1) && ((inst2.word[0] >> 8) & 0x1); - if (end) - return true; - if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) - return false; - instIndex++; + qword result = initialQword; + u64 dword0Mask = 0, dword1Mask = 0;; + // Check if there is a constant and mask word if there is + SRC0 s0 = { initialQword.word[1] }; + SRC1 s1 = { initialQword.word[2] }; + SRC2 s2 = { initialQword.word[3] }; + if (s0.reg_type == 2) + result.word[1] = 0; + if (s1.reg_type == 2) + result.word[2] = 0; + if (s2.reg_type == 2) + result.word[3] = 0; + return result; } - } -}; + }; -typedef std::unordered_map binary2VS; -typedef std::unordered_map binary2FS; + struct HashFragmentProgram + { + size_t operator()(const void *program) const + { + // 64-bit Fowler/Noll/Vo FNV-1a hash code + size_t hash = 0xCBF29CE484222325ULL; + const qword *instbuffer = (const qword*)program; + size_t instIndex = 0; + while (true) + { + const qword& inst = instbuffer[instIndex]; + bool end = (inst.word[0] >> 8) & 0x1; + if (end) + return hash; + const qword& maskedInst = FragmentHashUtil::fragmentMaskConstant(inst); + + hash ^= maskedInst.dword[0]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + hash ^= maskedInst.dword[1]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + instIndex++; + } + return 0; + } + }; + + struct FragmentProgramCompare + { + bool operator()(const void *binary1, const void *binary2) const + { + const qword *instBuffer1 = (const qword*)binary1; + const qword *instBuffer2 = (const qword*)binary2; + size_t instIndex = 0; + while (true) + { + const qword& inst1 = instBuffer1[instIndex]; + const qword& inst2 = instBuffer2[instIndex]; + bool end = ((inst1.word[0] >> 8) & 0x1) && ((inst2.word[0] >> 8) & 0x1); + if (end) + return true; + + const qword& maskedInst1 = FragmentHashUtil::fragmentMaskConstant(inst1); + const qword& maskedInst2 = FragmentHashUtil::fragmentMaskConstant(inst2); + + if (maskedInst1.dword[0] != maskedInst2.dword[0] || maskedInst1.dword[1] != maskedInst2.dword[1]) + return false; + instIndex++; + } + } + }; + +} + +typedef std::unordered_map binary2VS; +typedef std::unordered_map binary2FS; struct PSOKey { From d1ebc47867523c7782e0ef8a480397d90569fd38 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 17:46:51 +0200 Subject: [PATCH 055/343] d3d12: Use actual hw in release mode + honor vsync option --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 56e92b2567..0d86154084 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -26,16 +26,20 @@ D3D12GSRender::D3D12GSRender() m_currentScaleOffsetBufferIndex = 0; constantsFragmentSize = 0; // Enable d3d debug layer +#ifdef DEBUG Microsoft::WRL::ComPtr debugInterface; D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); debugInterface->EnableDebugLayer(); +#endif - // Create adapter Microsoft::WRL::ComPtr dxgiFactory; check(CreateDXGIFactory(IID_PPV_ARGS(&dxgiFactory))); - IDXGIAdapter* warpAdapter; - check(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter))); - check(D3D12CreateDevice(warpAdapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); + // Create adapter + IDXGIAdapter* adaptater = nullptr; +#ifdef DEBUG + check(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&adaptater))); +#endif + check(D3D12CreateDevice(adaptater, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); // Queues D3D12_COMMAND_QUEUE_DESC copyQueueDesc = {}, graphicQueueDesc = {}; @@ -1015,7 +1019,7 @@ void D3D12GSRender::Flip() } } - check(m_swapChain->Present(1, 0)); + check(m_swapChain->Present(Ini.GSVSyncEnable.GetValue() ? 1 : 0, 0)); // Wait execution is over // TODO: It's suboptimal, we should use 2 command allocator Microsoft::WRL::ComPtr fence; From 2ac3c66c80b7fda6380a60dbb539ea64a0e24d36 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 20:00:48 +0200 Subject: [PATCH 056/343] d3d12: Fix hash/compare/getSize for fragment program It didn't properly take constants into account --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 17 ++++++++-- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 38 ++++++++++++++-------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 0d3b221c01..f968913911 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -11,6 +11,8 @@ #include "FragmentProgramDecompiler.h" #include "Utilities/File.h" +#include + #pragma comment (lib, "d3dcompiler.lib") namespace ProgramHashUtil @@ -23,10 +25,21 @@ namespace ProgramHashUtil while (true) { const qword& inst = instBuffer[instIndex]; + bool isSRC0Constant = ((inst.word[1] >> 8) & 0x3) == 2; + bool isSRC1Constant = ((inst.word[2] >> 8) & 0x3) == 2; + bool isSRC2Constant = ((inst.word[3] >> 8) & 0x3) == 2; bool end = (inst.word[0] >> 8) & 0x1; - if (end) - return (instIndex + 1) * 4 * 4; + + if (isSRC0Constant || isSRC1Constant || isSRC2Constant) + { + instIndex += 2; + if (end) + return instIndex * 4 * 4; + continue; + } instIndex++; + if (end) + return (instIndex) * 4 * 4; } } }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 2fc6f00936..b3a4d6d8c5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -101,8 +101,16 @@ namespace ProgramHashUtil } }; - struct FragmentHashUtil + struct FragmentProgramUtil { + /** + * returns true if the given source Operand is a constant + */ + static bool isConstant(u32 sourceOperand) + { + return ((sourceOperand >> 8) & 0x3) == 2; + } + /** * RSX fragment program constants are inlined inside shader code. * This function takes an instruction from a fragment program and @@ -114,16 +122,11 @@ namespace ProgramHashUtil static qword fragmentMaskConstant(const qword &initialQword) { qword result = initialQword; - u64 dword0Mask = 0, dword1Mask = 0;; - // Check if there is a constant and mask word if there is - SRC0 s0 = { initialQword.word[1] }; - SRC1 s1 = { initialQword.word[2] }; - SRC2 s2 = { initialQword.word[3] }; - if (s0.reg_type == 2) + if (isConstant(initialQword.word[1])) result.word[1] = 0; - if (s1.reg_type == 2) + if (isConstant(initialQword.word[2])) result.word[2] = 0; - if (s2.reg_type == 2) + if (isConstant(initialQword.word[3])) result.word[3] = 0; return result; } @@ -143,13 +146,17 @@ namespace ProgramHashUtil bool end = (inst.word[0] >> 8) & 0x1; if (end) return hash; - const qword& maskedInst = FragmentHashUtil::fragmentMaskConstant(inst); - + const qword& maskedInst = FragmentProgramUtil::fragmentMaskConstant(inst); hash ^= maskedInst.dword[0]; hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); hash ^= maskedInst.dword[1]; hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); instIndex++; + // Skip constants + if (FragmentProgramUtil::isConstant(inst.word[1]) || + FragmentProgramUtil::isConstant(inst.word[2]) || + FragmentProgramUtil::isConstant(inst.word[3])) + instIndex++; } return 0; } @@ -170,12 +177,17 @@ namespace ProgramHashUtil if (end) return true; - const qword& maskedInst1 = FragmentHashUtil::fragmentMaskConstant(inst1); - const qword& maskedInst2 = FragmentHashUtil::fragmentMaskConstant(inst2); + const qword& maskedInst1 = FragmentProgramUtil::fragmentMaskConstant(inst1); + const qword& maskedInst2 = FragmentProgramUtil::fragmentMaskConstant(inst2); if (maskedInst1.dword[0] != maskedInst2.dword[0] || maskedInst1.dword[1] != maskedInst2.dword[1]) return false; instIndex++; + // Skip constants + if (FragmentProgramUtil::isConstant(inst1.word[1]) || + FragmentProgramUtil::isConstant(inst1.word[2]) || + FragmentProgramUtil::isConstant(inst1.word[3])) + instIndex++; } } }; From a58974eac83ba26fcc530e1f9dd9279db644a681 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 20:23:29 +0200 Subject: [PATCH 057/343] d3d12: Fix reversed output --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 0d86154084..8cdac6a905 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -368,14 +368,14 @@ void D3D12GSRender::setScaleOffset() float scaleOffsetMat[16] = { 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, -1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f }; // Scale - scaleOffsetMat[0] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 0)] / (RSXThread::m_width / RSXThread::m_width_scale); - scaleOffsetMat[5] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 1)] / (RSXThread::m_height / RSXThread::m_height_scale); + scaleOffsetMat[0] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 0)] / (RSXThread::m_width / RSXThread::m_width_scale); + scaleOffsetMat[5] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 1)] / (RSXThread::m_height / RSXThread::m_height_scale); scaleOffsetMat[10] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 2)]; // Offset From ba66992ee3bd06ad8e4cc42822329e9d8a805609 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 15 May 2015 21:48:43 +0200 Subject: [PATCH 058/343] d3d12: Use template class for caching --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 269 -------------- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 336 +++++++----------- .../RSX/D3D12/FragmentProgramDecompiler.cpp | 32 +- rpcs3/Emu/RSX/D3D12/ShaderParam.h | 22 +- .../Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 34 +- 6 files changed, 172 insertions(+), 527 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 8cdac6a905..96a845b4dd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -471,7 +471,7 @@ bool D3D12GSRender::LoadProgram() return false; } - PipelineProperties prop = {}; + D3D12PipelineProperties prop = {}; /* #define GL_POINTS 0x0000 #define GL_LINES 0x0001 @@ -504,8 +504,8 @@ bool D3D12GSRender::LoadProgram() prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; } - - m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_rootSignature, m_cur_vertex_prog, m_cur_fragment_prog, prop, m_IASet); + prop.IASet = m_IASet; + m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_rootSignature, m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignature)); return m_PSO != nullptr; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index f968913911..6506826981 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -2,279 +2,10 @@ #if defined (DX12_SUPPORT) #include "D3D12PipelineState.h" -#include "Emu/Memory/vm.h" -#include "Utilities/Log.h" -#include #include -#include -#include "VertexProgramDecompiler.h" -#include "FragmentProgramDecompiler.h" -#include "Utilities/File.h" - -#include #pragma comment (lib, "d3dcompiler.lib") -namespace ProgramHashUtil -{ - - size_t getFPBinarySize(void *ptr) - { - const qword *instBuffer = (const qword*)ptr; - size_t instIndex = 0; - while (true) - { - const qword& inst = instBuffer[instIndex]; - bool isSRC0Constant = ((inst.word[1] >> 8) & 0x3) == 2; - bool isSRC1Constant = ((inst.word[2] >> 8) & 0x3) == 2; - bool isSRC2Constant = ((inst.word[3] >> 8) & 0x3) == 2; - bool end = (inst.word[0] >> 8) & 0x1; - - if (isSRC0Constant || isSRC1Constant || isSRC2Constant) - { - instIndex += 2; - if (end) - return instIndex * 4 * 4; - continue; - } - instIndex++; - if (end) - return (instIndex) * 4 * 4; - } - } -}; - - -PipelineStateObjectCache::PipelineStateObjectCache() : m_currentShaderId(0) -{} - -PipelineStateObjectCache::~PipelineStateObjectCache() -{ - for (auto pair : m_cachePSO) - pair.second->Release(); -} - -bool PipelineStateObjectCache::SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader) -{ - binary2FS::const_iterator It = m_cacheFS.find(vm::get_ptr(rsx_fp.addr)); - if (It != m_cacheFS.end()) - { - shader = It->second; - return true; - } - return false; -} - -bool PipelineStateObjectCache::SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader) -{ - binary2VS::const_iterator It = m_cacheVS.find((void*)rsx_vp.data.data()); - if (It != m_cacheVS.end()) - { - shader = It->second; - return true; - } - return false; -} - -ID3D12PipelineState *PipelineStateObjectCache::GetProg(const PSOKey &key) const -{ - std::unordered_map::const_iterator It = m_cachePSO.find(key); - if (It == m_cachePSO.end()) - return nullptr; - return It->second; -} - -void PipelineStateObjectCache::AddVertexProgram(Shader& vp, RSXVertexProgram& rsx_vp) -{ - size_t actualVPSize = rsx_vp.data.size() * 4; - void *fpShadowCopy = malloc(actualVPSize); - memcpy(fpShadowCopy, rsx_vp.data.data(), actualVPSize); - vp.Id = (u32)m_currentShaderId++; - m_cacheVS.insert(std::make_pair(fpShadowCopy, vp)); -} - -void PipelineStateObjectCache::AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp) -{ - size_t actualFPSize = ProgramHashUtil::getFPBinarySize(vm::get_ptr(rsx_fp.addr)); - void *fpShadowCopy = malloc(actualFPSize); - memcpy(fpShadowCopy, vm::get_ptr(rsx_fp.addr), actualFPSize); - fp.Id = (u32)m_currentShaderId++; - m_cacheFS.insert(std::make_pair(fpShadowCopy, fp)); -} - -void PipelineStateObjectCache::Add(ID3D12PipelineState *prog, const PSOKey& PSOKey) -{ - m_cachePSO.insert(std::make_pair(PSOKey, prog)); -} - -ID3D12PipelineState *PipelineStateObjectCache::getGraphicPipelineState( - ID3D12Device *device, - ID3D12RootSignature *rootSignature, - RSXVertexProgram *vertexShader, - RSXFragmentProgram *fragmentShader, - const PipelineProperties &pipelineProperties, - const std::vector &IASet) -{ - ID3D12PipelineState *result = nullptr; - Shader m_vertex_prog, m_fragment_prog; - bool m_fp_buf_num = SearchFp(*fragmentShader, m_fragment_prog); - bool m_vp_buf_num = SearchVp(*vertexShader, m_vertex_prog); - - if (!m_fp_buf_num) - { - LOG_WARNING(RSX, "FP not found in buffer!"); - FragmentDecompiler FS(fragmentShader->addr, fragmentShader->size, fragmentShader->offset); - const std::string &shader = FS.Decompile(); - m_fragment_prog.Compile(shader, SHADER_TYPE::SHADER_TYPE_FRAGMENT); - AddFragmentProgram(m_fragment_prog, *fragmentShader); - - // TODO: This shouldn't use current dir - fs::file("./FragmentProgram.hlsl", o_write | o_create | o_trunc).write(shader.c_str(), shader.size()); - } - - if (!m_vp_buf_num) - { - LOG_WARNING(RSX, "VP not found in buffer!"); - VertexDecompiler VS(vertexShader->data); - std::string shaderCode = VS.Decompile(); - m_vertex_prog.Compile(shaderCode, SHADER_TYPE::SHADER_TYPE_VERTEX); - AddVertexProgram(m_vertex_prog, *vertexShader); - - // TODO: This shouldn't use current dir - fs::file("./VertexProgram.hlsl", o_write | o_create | o_trunc).write(shaderCode.c_str(), shaderCode.size()); - } - - if (m_fp_buf_num && m_vp_buf_num) - { - result = GetProg({ m_vertex_prog.Id, m_fragment_prog.Id, pipelineProperties }); - } - - if (result != nullptr) - { - return result; - /* // RSX Debugger: Check if this program was modified and update it - if (Ini.GSLogPrograms.GetValue()) - { - for (auto& program : m_debug_programs) - { - if (program.id == m_program.id && program.modified) - { - // TODO: This isn't working perfectly. Is there any better/shorter way to update the program - m_vertex_prog.shader = program.vp_shader; - m_fragment_prog.shader = program.fp_shader; - m_vertex_prog.Wait(); - m_vertex_prog.Compile(); - checkForGlError("m_vertex_prog.Compile"); - m_fragment_prog.Wait(); - m_fragment_prog.Compile(); - checkForGlError("m_fragment_prog.Compile"); - glAttachShader(m_program.id, m_vertex_prog.id); - glAttachShader(m_program.id, m_fragment_prog.id); - glLinkProgram(m_program.id); - checkForGlError("glLinkProgram"); - glDetachShader(m_program.id, m_vertex_prog.id); - glDetachShader(m_program.id, m_fragment_prog.id); - program.vp_id = m_vertex_prog.id; - program.fp_id = m_fragment_prog.id; - program.modified = false; - } - } - } - m_program.Use();*/ - } - else - { -// LOG_WARNING(RSX, "Add program :"); -// LOG_WARNING(RSX, "*** vp id = %d", m_vertex_prog.Id); -// LOG_WARNING(RSX, "*** fp id = %d", m_fragment_prog.Id); - - D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; - - if (m_vertex_prog.bytecode != nullptr) - { - graphicPipelineStateDesc.VS.BytecodeLength = m_vertex_prog.bytecode->GetBufferSize(); - graphicPipelineStateDesc.VS.pShaderBytecode = m_vertex_prog.bytecode->GetBufferPointer(); - } - if (m_fragment_prog.bytecode != nullptr) - { - graphicPipelineStateDesc.PS.BytecodeLength = m_fragment_prog.bytecode->GetBufferSize(); - graphicPipelineStateDesc.PS.pShaderBytecode = m_fragment_prog.bytecode->GetBufferPointer(); - } - - graphicPipelineStateDesc.pRootSignature = rootSignature; - - // Sensible default value - static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = - { - D3D12_FILL_MODE_SOLID, - D3D12_CULL_MODE_NONE, - FALSE, - D3D12_DEFAULT_DEPTH_BIAS, - D3D12_DEFAULT_DEPTH_BIAS_CLAMP, - D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, - TRUE, - FALSE, - FALSE, - 0, - D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, - }; - - static D3D12_DEPTH_STENCIL_DESC CD3D12_DEPTH_STENCIL_DESC = - { - TRUE, - D3D12_DEPTH_WRITE_MASK_ALL, - D3D12_COMPARISON_FUNC_LESS_EQUAL, - FALSE, - D3D12_DEFAULT_STENCIL_READ_MASK, - D3D12_DEFAULT_STENCIL_WRITE_MASK, - }; - - static D3D12_BLEND_DESC CD3D12_BLEND_DESC = - { - FALSE, - FALSE, - { - FALSE,FALSE, - D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, - D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, - D3D12_LOGIC_OP_NOOP, - D3D12_COLOR_WRITE_ENABLE_ALL, - } - }; - - graphicPipelineStateDesc.BlendState = CD3D12_BLEND_DESC; - graphicPipelineStateDesc.DepthStencilState = CD3D12_DEPTH_STENCIL_DESC; - graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; - graphicPipelineStateDesc.PrimitiveTopologyType = pipelineProperties.Topology; - - graphicPipelineStateDesc.NumRenderTargets = 1; - graphicPipelineStateDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; - graphicPipelineStateDesc.DSVFormat = DXGI_FORMAT_D16_UNORM; - - graphicPipelineStateDesc.InputLayout.pInputElementDescs = IASet.data(); - graphicPipelineStateDesc.InputLayout.NumElements = (UINT)IASet.size(); - graphicPipelineStateDesc.SampleDesc.Count = 1; - graphicPipelineStateDesc.SampleMask = UINT_MAX; - graphicPipelineStateDesc.NodeMask = 1; - - device->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result)); - Add(result, {m_vertex_prog.Id, m_fragment_prog.Id, pipelineProperties }); - - // RSX Debugger - /*if (Ini.GSLogPrograms.GetValue()) - { - RSXDebuggerProgram program; - program.id = m_program.id; - program.vp_id = m_vertex_prog.id; - program.fp_id = m_fragment_prog.id; - program.vp_shader = m_vertex_prog.shader; - program.fp_shader = m_fragment_prog.shader; - m_debug_programs.push_back(program); - }*/ - } - return result; -} - #define TO_STRING(x) #x void Shader::Compile(const std::string &code, SHADER_TYPE st) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index b3a4d6d8c5..e64ab9fdaa 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -2,28 +2,37 @@ #if defined (DX12_SUPPORT) #include -#include "Emu/RSX/RSXFragmentProgram.h" -#include "Emu/RSX/RSXVertexProgram.h" #include +#include "../Common/ProgramStateCache.h" +#include "VertexProgramDecompiler.h" +#include "FragmentProgramDecompiler.h" +#include "Utilities/File.h" -enum class SHADER_TYPE -{ - SHADER_TYPE_VERTEX, - SHADER_TYPE_FRAGMENT -}; -struct PipelineProperties +struct D3D12PipelineProperties { D3D12_PRIMITIVE_TOPOLOGY_TYPE Topology; + std::vector IASet; + + bool operator==(const D3D12PipelineProperties &in) const + { + return Topology == in.Topology; + } }; /** Storage for a shader -* Embeds the D3DBlob corresponding to +* Embeds the D3DBlob */ -class Shader +struct Shader { public: + enum class SHADER_TYPE + { + SHADER_TYPE_VERTEX, + SHADER_TYPE_FRAGMENT + }; + Shader() : bytecode(nullptr) {} ~Shader() {} @@ -37,222 +46,127 @@ public: // void Decompile(RSXFragmentProgram& prog) /** Compile the decompiled fragment shader into a format we can use with OpenGL. */ - void Compile(const std::string &code, SHADER_TYPE st); + void Compile(const std::string &code, enum class SHADER_TYPE st); }; - - -namespace ProgramHashUtil +struct D3D12Traits { - // Based on - // https://github.com/AlexAltea/nucleus/blob/master/nucleus/gpu/rsx_pgraph.cpp - union qword - { - u64 dword[2]; - u32 word[4]; - }; + typedef Shader VertexProgramData; + typedef Shader FragmentProgramData; + typedef ID3D12PipelineState PipelineData; + typedef D3D12PipelineProperties PipelineProperties; + typedef std::pair ExtraData; - struct HashVertexProgram + static + void RecompileFragmentProgram(RSXFragmentProgram *RSXFP, FragmentProgramData& fragmentProgramData, size_t ID) { - size_t operator()(const void *program) const + FragmentDecompiler FS(RSXFP->addr, RSXFP->size, RSXFP->offset); + const std::string &shader = FS.Decompile(); + fragmentProgramData.Compile(shader, Shader::SHADER_TYPE::SHADER_TYPE_FRAGMENT); + + // TODO: This shouldn't use current dir + fs::file("./FragmentProgram.hlsl", o_write | o_create | o_trunc).write(shader.c_str(), shader.size()); + fragmentProgramData.Id = (u32)ID; + } + + static + void RecompileVertexProgram(RSXVertexProgram *RSXVP, VertexProgramData& vertexProgramData, size_t ID) + { + VertexDecompiler VS(RSXVP->data); + std::string shaderCode = VS.Decompile(); + vertexProgramData.Compile(shaderCode, Shader::SHADER_TYPE::SHADER_TYPE_VERTEX); + + // TODO: This shouldn't use current dir + fs::file("./VertexProgram.hlsl", o_write | o_create | o_trunc).write(shaderCode.c_str(), shaderCode.size()); + vertexProgramData.Id = (u32)ID; + } + + static + PipelineData *BuildProgram(VertexProgramData &vertexProgramData, FragmentProgramData &fragmentProgramData, const PipelineProperties &pipelineProperties, const ExtraData& extraData) + { + ID3D12PipelineState *result; + D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; + + if (vertexProgramData.bytecode != nullptr) { - // 64-bit Fowler/Noll/Vo FNV-1a hash code - size_t hash = 0xCBF29CE484222325ULL; - const qword *instbuffer = (const qword*)program; - size_t instIndex = 0; - bool end = false; - return 0; - while (true) + graphicPipelineStateDesc.VS.BytecodeLength = vertexProgramData.bytecode->GetBufferSize(); + graphicPipelineStateDesc.VS.pShaderBytecode = vertexProgramData.bytecode->GetBufferPointer(); + } + if (fragmentProgramData.bytecode != nullptr) + { + graphicPipelineStateDesc.PS.BytecodeLength = fragmentProgramData.bytecode->GetBufferSize(); + graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer(); + } + + graphicPipelineStateDesc.pRootSignature = extraData.second; + + // Sensible default value + static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = + { + D3D12_FILL_MODE_SOLID, + D3D12_CULL_MODE_NONE, + FALSE, + D3D12_DEFAULT_DEPTH_BIAS, + D3D12_DEFAULT_DEPTH_BIAS_CLAMP, + D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, + TRUE, + FALSE, + FALSE, + 0, + D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, + }; + + static D3D12_DEPTH_STENCIL_DESC CD3D12_DEPTH_STENCIL_DESC = + { + TRUE, + D3D12_DEPTH_WRITE_MASK_ALL, + D3D12_COMPARISON_FUNC_LESS_EQUAL, + FALSE, + D3D12_DEFAULT_STENCIL_READ_MASK, + D3D12_DEFAULT_STENCIL_WRITE_MASK, + }; + + static D3D12_BLEND_DESC CD3D12_BLEND_DESC = + { + FALSE, + FALSE, { - const qword inst = instbuffer[instIndex]; - bool end = inst.word[0] >> 31; - if (end) - return hash; - hash ^= inst.dword[0]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - hash ^= inst.dword[1]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - instIndex++; + FALSE,FALSE, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, } - return 0; - } - }; + }; + graphicPipelineStateDesc.BlendState = CD3D12_BLEND_DESC; + graphicPipelineStateDesc.DepthStencilState = CD3D12_DEPTH_STENCIL_DESC; + graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; + graphicPipelineStateDesc.PrimitiveTopologyType = pipelineProperties.Topology; - struct VertexProgramCompare + graphicPipelineStateDesc.NumRenderTargets = 1; + graphicPipelineStateDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + graphicPipelineStateDesc.DSVFormat = DXGI_FORMAT_D16_UNORM; + + graphicPipelineStateDesc.InputLayout.pInputElementDescs = pipelineProperties.IASet.data(); + graphicPipelineStateDesc.InputLayout.NumElements = (UINT)pipelineProperties.IASet.size(); + graphicPipelineStateDesc.SampleDesc.Count = 1; + graphicPipelineStateDesc.SampleMask = UINT_MAX; + graphicPipelineStateDesc.NodeMask = 1; + + extraData.first->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result)); + return result; + } + + static + void DeleteProgram(PipelineData *ptr) { - bool operator()(const void *binary1, const void *binary2) const - { - const qword *instBuffer1 = (const qword*)binary1; - const qword *instBuffer2 = (const qword*)binary2; - size_t instIndex = 0; - return true; - while (true) - { - const qword& inst1 = instBuffer1[instIndex]; - const qword& inst2 = instBuffer2[instIndex]; - bool end = (inst1.word[0] >> 31) && (inst2.word[0] >> 31); - if (end) - return true; - if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) - return false; - instIndex++; - } - } - }; - - struct FragmentProgramUtil - { - /** - * returns true if the given source Operand is a constant - */ - static bool isConstant(u32 sourceOperand) - { - return ((sourceOperand >> 8) & 0x3) == 2; - } - - /** - * RSX fragment program constants are inlined inside shader code. - * This function takes an instruction from a fragment program and - * returns an equivalent instruction where inlined constants - * are masked. - * This allows to hash/compare fragment programs even if their - * inlined constants are modified inbetween - */ - static qword fragmentMaskConstant(const qword &initialQword) - { - qword result = initialQword; - if (isConstant(initialQword.word[1])) - result.word[1] = 0; - if (isConstant(initialQword.word[2])) - result.word[2] = 0; - if (isConstant(initialQword.word[3])) - result.word[3] = 0; - return result; - } - }; - - struct HashFragmentProgram - { - size_t operator()(const void *program) const - { - // 64-bit Fowler/Noll/Vo FNV-1a hash code - size_t hash = 0xCBF29CE484222325ULL; - const qword *instbuffer = (const qword*)program; - size_t instIndex = 0; - while (true) - { - const qword& inst = instbuffer[instIndex]; - bool end = (inst.word[0] >> 8) & 0x1; - if (end) - return hash; - const qword& maskedInst = FragmentProgramUtil::fragmentMaskConstant(inst); - hash ^= maskedInst.dword[0]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - hash ^= maskedInst.dword[1]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - instIndex++; - // Skip constants - if (FragmentProgramUtil::isConstant(inst.word[1]) || - FragmentProgramUtil::isConstant(inst.word[2]) || - FragmentProgramUtil::isConstant(inst.word[3])) - instIndex++; - } - return 0; - } - }; - - struct FragmentProgramCompare - { - bool operator()(const void *binary1, const void *binary2) const - { - const qword *instBuffer1 = (const qword*)binary1; - const qword *instBuffer2 = (const qword*)binary2; - size_t instIndex = 0; - while (true) - { - const qword& inst1 = instBuffer1[instIndex]; - const qword& inst2 = instBuffer2[instIndex]; - bool end = ((inst1.word[0] >> 8) & 0x1) && ((inst2.word[0] >> 8) & 0x1); - if (end) - return true; - - const qword& maskedInst1 = FragmentProgramUtil::fragmentMaskConstant(inst1); - const qword& maskedInst2 = FragmentProgramUtil::fragmentMaskConstant(inst2); - - if (maskedInst1.dword[0] != maskedInst2.dword[0] || maskedInst1.dword[1] != maskedInst2.dword[1]) - return false; - instIndex++; - // Skip constants - if (FragmentProgramUtil::isConstant(inst1.word[1]) || - FragmentProgramUtil::isConstant(inst1.word[2]) || - FragmentProgramUtil::isConstant(inst1.word[3])) - instIndex++; - } - } - }; - -} - -typedef std::unordered_map binary2VS; -typedef std::unordered_map binary2FS; - -struct PSOKey -{ - u32 vpIdx; - u32 fpIdx; - PipelineProperties properties; -}; - -struct PSOKeyHash -{ - size_t operator()(const PSOKey &key) const - { - size_t hashValue = 0; - hashValue ^= std::hash()(key.vpIdx); - return hashValue; + ptr->Release(); } }; -struct PSOKeyCompare +class PipelineStateObjectCache : public ProgramStateCache { - size_t operator()(const PSOKey &key1, const PSOKey &key2) const - { - return (key1.vpIdx == key2.vpIdx) && (key1.fpIdx == key2.fpIdx) && (key1.properties.Topology == key2.properties.Topology); - } -}; - -/** - * Cache for shader blobs and Pipeline state object - * The class is responsible for creating the object so the state only has to call getGraphicPipelineState - */ -class PipelineStateObjectCache -{ -private: - size_t m_currentShaderId; - binary2VS m_cacheVS; - binary2FS m_cacheFS; - - std::unordered_map m_cachePSO; - - bool SearchFp(const RSXFragmentProgram& rsx_fp, Shader& shader); - bool SearchVp(const RSXVertexProgram& rsx_vp, Shader& shader); - ID3D12PipelineState *GetProg(const PSOKey &psoKey) const; - void AddVertexProgram(Shader& vp, RSXVertexProgram& rsx_vp); - void AddFragmentProgram(Shader& fp, RSXFragmentProgram& rsx_fp); - void Add(ID3D12PipelineState *prog, const PSOKey& PSOKey); -public: - PipelineStateObjectCache(); - ~PipelineStateObjectCache(); - // Note: the last param is not taken into account if the PSO is not regenerated - ID3D12PipelineState *getGraphicPipelineState( - ID3D12Device *device, - ID3D12RootSignature *rootSignature, - RSXVertexProgram *vertexShader, - RSXFragmentProgram *fragmentShader, - const PipelineProperties &pipelineProperties, - const std::vector &IASet - ); }; diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index 1cf237caf8..8a88a246e4 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -69,7 +69,7 @@ void FragmentDecompiler::SetDst(std::string code, bool append_mask) { if (dst.set_cond) { - AddCode("$ifcond " + m_parr.AddParam(PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + code + ";"); + AddCode("$ifcond " + m_parr.AddParam(PF_PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + code + ";"); } else { @@ -86,7 +86,7 @@ void FragmentDecompiler::SetDst(std::string code, bool append_mask) if (dst.set_cond) { - AddCode(m_parr.AddParam(PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";"); + AddCode(m_parr.AddParam(PF_PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";"); } } @@ -114,24 +114,24 @@ std::string FragmentDecompiler::GetMask() std::string FragmentDecompiler::AddReg(u32 index, int fp16) { - return m_parr.AddParam(PARAM_NONE, typeName[3], std::string(fp16 ? "h" : "r") + std::to_string(index), typeName[3] + "(0.0)"); + return m_parr.AddParam(PF_PARAM_NONE, typeName[3], std::string(fp16 ? "h" : "r") + std::to_string(index), typeName[3] + "(0.0)"); } bool FragmentDecompiler::HasReg(u32 index, int fp16) { - return m_parr.HasParam(PARAM_NONE, typeName[3], + return m_parr.HasParam(PF_PARAM_NONE, typeName[3], std::string(fp16 ? "h" : "r") + std::to_string(index)); } std::string FragmentDecompiler::AddCond() { - return m_parr.AddParam(PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_reg_index)); + return m_parr.AddParam(PF_PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_reg_index)); } std::string FragmentDecompiler::AddConst() { std::string name = std::string("fc") + std::to_string(m_size + 4 * 4); - if (m_parr.HasParam(PARAM_UNIFORM, typeName[3], name)) + if (m_parr.HasParam(PF_PARAM_UNIFORM, typeName[3], name)) { return name; } @@ -143,14 +143,14 @@ std::string FragmentDecompiler::AddConst() u32 y = GetData(data[1]); u32 z = GetData(data[2]); u32 w = GetData(data[3]); - return m_parr.AddParam(PARAM_UNIFORM, typeName[3], name, + return m_parr.AddParam(PF_PARAM_UNIFORM, typeName[3], name, std::string(typeName[3] + "(") + std::to_string((float&)x) + ", " + std::to_string((float&)y) + ", " + std::to_string((float&)z) + ", " + std::to_string((float&)w) + ")"); } std::string FragmentDecompiler::AddTex() { - return m_parr.AddParam(PARAM_UNIFORM, "sampler2D", std::string("tex") + std::to_string(dst.tex_num)); + return m_parr.AddParam(PF_PARAM_UNIFORM, "sampler2D", std::string("tex") + std::to_string(dst.tex_num)); } std::string FragmentDecompiler::Format(const std::string& code) @@ -275,7 +275,7 @@ void FragmentDecompiler::AddCodeCond(const std::string& dst, const std::string& cond = "(" + AddCond() + swizzle + " " + cond + " " + typeName[3] + "(0., 0., 0., 0.))"; - ShaderVar dst_var(dst); + ShaderVariable dst_var(dst); dst_var.symplify(); //const char *c_mask = f; @@ -320,12 +320,12 @@ template std::string FragmentDecompiler::GetSRC(T src) default: if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0])) { - ret += m_parr.AddParam(PARAM_IN, typeName[3], reg_table[dst.src_attr_reg_num]); + ret += m_parr.AddParam(PF_PARAM_IN, typeName[3], reg_table[dst.src_attr_reg_num]); } else { LOG_ERROR(RSX, "Bad src reg num: %d", fmt::by_value(dst.src_attr_reg_num)); - ret += m_parr.AddParam(PARAM_IN, typeName[3], "unk"); + ret += m_parr.AddParam(PF_PARAM_IN, typeName[3], "unk"); Emu.Pause(); } break; @@ -425,7 +425,7 @@ void FragmentDecompiler::insertOutputs(std::stringstream & OS) for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) { - if (m_parr.HasParam(PARAM_NONE, typeName[3], table[i].second)) + if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], table[i].second)) OS << " " << typeName[3] << " " << table[i].first << " : SV_TARGET" << i << ";" << std::endl; } OS << "};" << std::endl; @@ -443,7 +443,7 @@ void FragmentDecompiler::insertConstants(std::stringstream & OS) } OS << "};" << std::endl;*/ - for (ParamType PT : m_parr.params[PARAM_UNIFORM]) + for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) { for (ParamItem PI : PT.items) OS << PT.type << " " << PI.name << " = " << PI.value << ";" << std::endl; @@ -454,13 +454,13 @@ void FragmentDecompiler::insertMainStart(std::stringstream & OS) { OS << "PixelOutput main(PixelInput In)" << std::endl; OS << "{" << std::endl; - for (ParamType PT : m_parr.params[PARAM_IN]) + for (ParamType PT : m_parr.params[PF_PARAM_IN]) { for (ParamItem PI : PT.items) OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; } // Declare output - for (ParamType PT : m_parr.params[PARAM_NONE]) + for (ParamType PT : m_parr.params[PF_PARAM_NONE]) { for (ParamItem PI : PT.items) OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 0.);" << std::endl; @@ -480,7 +480,7 @@ void FragmentDecompiler::insertMainEnd(std::stringstream & OS) OS << " PixelOutput Out;" << std::endl; for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) { - if (m_parr.HasParam(PARAM_NONE, typeName[3], table[i].second)) + if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], table[i].second)) OS << " Out." << table[i].first << " = " << table[i].second << ";" << std::endl; } OS << " return Out;" << std::endl; diff --git a/rpcs3/Emu/RSX/D3D12/ShaderParam.h b/rpcs3/Emu/RSX/D3D12/ShaderParam.h index 5ef5d3e3d3..c063a9e793 100644 --- a/rpcs3/Emu/RSX/D3D12/ShaderParam.h +++ b/rpcs3/Emu/RSX/D3D12/ShaderParam.h @@ -5,12 +5,12 @@ enum ParamFlag { - PARAM_IN, - PARAM_OUT, - PARAM_UNIFORM, - PARAM_CONST, - PARAM_NONE, - PARAM_COUNT, + PF_PARAM_IN, + PF_PARAM_OUT, + PF_PARAM_UNIFORM, + PF_PARAM_CONST, + PF_PARAM_NONE, + PF_PARAM_COUNT, }; struct ParamItem @@ -51,7 +51,7 @@ struct ParamType struct ParamArray { - std::vector params[PARAM_COUNT]; + std::vector params[PF_PARAM_COUNT]; ParamType* SearchParam(const ParamFlag &flag, const std::string& type) { @@ -105,14 +105,14 @@ struct ParamArray } }; -class ShaderVar +class ShaderVariable { public: std::string name; std::vector swizzles; - ShaderVar() = default; - ShaderVar(const std::string& var) + ShaderVariable() = default; + ShaderVariable(const std::string& var) { auto var_blocks = fmt::split(var, { "." }); @@ -138,7 +138,7 @@ public: return swizzles[swizzles.size() - 1].length(); } - ShaderVar& symplify() + ShaderVariable& symplify() { std::unordered_map swizzle; diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index 65e1e8b866..0aa100682a 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -52,13 +52,13 @@ std::string VertexDecompiler::GetDST(bool isSca) switch (isSca ? 0x1f : d3.dst) { case 0x1f: - ret += m_parr.AddParam(PARAM_NONE, typeName[3], std::string("tmp") + std::to_string(isSca ? d3.sca_dst_tmp : d0.dst_tmp)); + ret += m_parr.AddParam(PF_PARAM_NONE, typeName[3], std::string("tmp") + std::to_string(isSca ? d3.sca_dst_tmp : d0.dst_tmp)); break; default: if (d3.dst > 15) LOG_ERROR(RSX, fmt::Format("dst index out of range: %u", d3.dst)); - ret += m_parr.AddParam(PARAM_NONE, typeName[3], std::string("dst_reg") + std::to_string(d3.dst), d3.dst == 0 ? typeName[3] + "(0.0f, 0.0f, 0.0f, 1.0f)" : typeName[3] + "(0.0, 0.0, 0.0, 0.0)"); + ret += m_parr.AddParam(PF_PARAM_NONE, typeName[3], std::string("dst_reg") + std::to_string(d3.dst), d3.dst == 0 ? typeName[3] + "(0.0f, 0.0f, 0.0f, 1.0f)" : typeName[3] + "(0.0, 0.0, 0.0, 0.0)"); break; } @@ -82,21 +82,21 @@ std::string VertexDecompiler::GetSRC(const u32 n) switch (src[n].reg_type) { case 1: //temp - ret += m_parr.AddParam(PARAM_NONE, typeName[3], "tmp" + std::to_string(src[n].tmp_src)); + ret += m_parr.AddParam(PF_PARAM_NONE, typeName[3], "tmp" + std::to_string(src[n].tmp_src)); break; case 2: //input if (d1.input_src < (sizeof(reg_table) / sizeof(reg_table[0]))) { - ret += m_parr.AddParam(PARAM_IN, typeName[3], reg_table[d1.input_src], d1.input_src); + ret += m_parr.AddParam(PF_PARAM_IN, typeName[3], reg_table[d1.input_src], d1.input_src); } else { LOG_ERROR(RSX, "Bad input src num: %d", fmt::by_value(d1.input_src)); - ret += m_parr.AddParam(PARAM_IN, typeName[3], "in_unk", d1.input_src); + ret += m_parr.AddParam(PF_PARAM_IN, typeName[3], "in_unk", d1.input_src); } break; case 3: //const - m_parr.AddParam(PARAM_UNIFORM, typeName[3], std::string("vc[468]")); + m_parr.AddParam(PF_PARAM_UNIFORM, typeName[3], std::string("vc[468]")); ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]"; break; @@ -161,7 +161,7 @@ void VertexDecompiler::SetDST(bool is_sca, std::string value) if (d0.cond_update_enable_0 && d0.cond_update_enable_1) { - dest = m_parr.AddParam(PARAM_NONE, typeName[3], "cc" + std::to_string(d0.cond_reg_sel_1), typeName[3] + "(0.0)") + mask; + dest = m_parr.AddParam(PF_PARAM_NONE, typeName[3], "cc" + std::to_string(d0.cond_reg_sel_1), typeName[3] + "(0.0)") + mask; } else if (d3.dst != 0x1f || (is_sca ? d3.sca_dst_tmp != 0x3f : d0.dst_tmp != 0x3f)) { @@ -197,7 +197,7 @@ std::string VertexDecompiler::GetFunc() std::string VertexDecompiler::GetTex() { - return m_parr.AddParam(PARAM_UNIFORM, "sampler2D", std::string("vtex") + std::to_string(/*?.tex_num*/0)); + return m_parr.AddParam(PF_PARAM_UNIFORM, "sampler2D", std::string("vtex") + std::to_string(/*?.tex_num*/0)); } std::string VertexDecompiler::Format(const std::string& code) @@ -312,7 +312,7 @@ void VertexDecompiler::AddCodeCond(const std::string& dst, const std::string& sr std::string cond = fmt::Format("%s(cc%d%s, vec4(0.0))", cond_string_table[d0.cond], d0.cond_reg_sel_1, swizzle.c_str()); - ShaderVar dst_var(dst); + ShaderVariable dst_var(dst); dst_var.symplify(); //const char *c_mask = f; @@ -340,7 +340,7 @@ std::string VertexDecompiler::AddAddrMask() std::string VertexDecompiler::AddAddrReg() { static const char f[] = { 'x', 'y', 'z', 'w' }; - return m_parr.AddParam(PARAM_NONE, "ivec4", "a" + std::to_string(d0.addr_reg_sel_1), "ivec4(0)") + AddAddrMask(); + return m_parr.AddParam(PF_PARAM_NONE, "ivec4", "a" + std::to_string(d0.addr_reg_sel_1), "ivec4(0)") + AddAddrMask(); } u32 VertexDecompiler::GetAddr() @@ -428,11 +428,11 @@ std::string VertexDecompiler::BuildCode() std::stringstream OS; insertHeader(OS); - insertInputs(OS, m_parr.params[PARAM_IN]); + insertInputs(OS, m_parr.params[PF_PARAM_IN]); OS << std::endl; - insertOutputs(OS, m_parr.params[PARAM_NONE]); + insertOutputs(OS, m_parr.params[PF_PARAM_NONE]); OS << std::endl; - insertConstants(OS, m_parr.params[PARAM_UNIFORM]); + insertConstants(OS, m_parr.params[PF_PARAM_UNIFORM]); OS << std::endl; insertMainStart(OS); @@ -541,11 +541,11 @@ void VertexDecompiler::insertMainStart(std::stringstream & OS) // Declare inside main function for (auto &i : reg_table) { - if (m_parr.HasParam(PARAM_NONE, typeName[3], i.src_reg)) + if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], i.src_reg)) OS << " float4 " << i.src_reg << ";" << std::endl; } - for (const ParamType PT : m_parr.params[PARAM_IN]) + for (const ParamType PT : m_parr.params[PF_PARAM_IN]) { for (const ParamItem &PI : PT.items) OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; @@ -559,7 +559,7 @@ void VertexDecompiler::insertMainEnd(std::stringstream & OS) // Declare inside main function for (auto &i : reg_table) { - if (m_parr.HasParam(PARAM_NONE, typeName[3], i.src_reg)) + if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], i.src_reg)) OS << " Out." << i.src_reg << " = " << i.src_reg << ";" << std::endl; } // TODO: Find why I need to do this @@ -583,7 +583,7 @@ VertexDecompiler::VertexDecompiler(std::vector& data) : std::string VertexDecompiler::Decompile() { - for (unsigned i = 0; i < PARAM_COUNT; i++) + for (unsigned i = 0; i < PF_PARAM_COUNT; i++) m_parr.params[i].clear(); m_instr_count = 0; From 9d1993cdee4063e9fadf0e7762eafb72c4d78f24 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 16 May 2015 01:09:51 +0200 Subject: [PATCH 059/343] d3d12: Fix for d3d12 pipeline cache --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 96a845b4dd..a3591f27ad 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -505,7 +505,7 @@ bool D3D12GSRender::LoadProgram() break; } prop.IASet = m_IASet; - m_PSO = m_cachePSO.getGraphicPipelineState(m_device, m_rootSignature, m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignature)); + m_PSO = m_cachePSO.getGraphicPipelineState(m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignature)); return m_PSO != nullptr; } From 7b6ad026c6da6e0e5b7e666e5e92a609a7880e46 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 16 May 2015 18:37:36 +0200 Subject: [PATCH 060/343] d3d12: Add fragment constant offset cache --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index e64ab9fdaa..3f454dc271 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -38,6 +38,7 @@ public: u32 Id; Microsoft::WRL::ComPtr bytecode; + std::vector FragmentConstantOffsetCache; /** * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. From c3e162b442079ea09be07470858e621dbee608bb Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 16 May 2015 19:10:38 +0200 Subject: [PATCH 061/343] d3d12: Read constants directly from fp --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 56 +++++++++++-------- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 9 +++ .../RSX/D3D12/FragmentProgramDecompiler.cpp | 14 ++--- .../Emu/RSX/D3D12/FragmentProgramDecompiler.h | 2 +- 4 files changed, 49 insertions(+), 32 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a3591f27ad..26993e9e8f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -431,15 +431,24 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() void D3D12GSRender::FillPixelShaderConstantsBuffer() { + // Get constant from fragment program + const std::vector &fragmentOffset = m_cachePSO.getFragmentConstantOffsetsCache(m_cur_fragment_prog); + size_t offset = 0; void *constantsBufferMap; check(m_constantsFragmentBuffer->Map(0, nullptr, &constantsBufferMap)); - for (const RSXTransformConstant& c : m_fragment_constants) + for (size_t offsetInFP : fragmentOffset) { - u32 id = c.id - m_cur_fragment_prog->offset; - float vector[] = { c.x, c.y, c.z, c.w }; - memcpy((char*)constantsBufferMap + constantsFragmentSize + offset, vector, 4 * sizeof(float)); - offset += 4 * sizeof(float); + auto data = vm::ptr::make(m_cur_fragment_prog->addr + (u32)offsetInFP); + + u32 c0 = (data[0] >> 16 | data[0] << 16); + u32 c1 = (data[1] >> 16 | data[1] << 16); + u32 c2 = (data[2] >> 16 | data[2] << 16); + u32 c3 = (data[3] >> 16 | data[3] << 16); + + u32 vector[] = { c0, c1, c2, c3 }; + memcpy((char*)constantsBufferMap + constantsFragmentSize + offset, vector, 4 * sizeof(u32)); + offset += 4 * sizeof(u32); } m_constantsFragmentBuffer->Unmap(0, nullptr); // Multiple of 256 @@ -540,24 +549,6 @@ void D3D12GSRender::ExecCMD() assert((m_draw_array_first + m_draw_array_count) * item_size <= m_vertexBufferSize[i]); } commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); - - setScaleOffset(); - commandList->SetDescriptorHeaps(1, &m_scaleOffsetDescriptorHeap); - D3D12_GPU_DESCRIPTOR_HANDLE Handle = m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetGraphicsRootDescriptorTable(0, Handle); - m_currentScaleOffsetBufferIndex++; - - size_t currentBufferIndex = m_constantsBufferIndex; - FillVertexShaderConstantsBuffer(); - m_constantsBufferIndex++; - FillPixelShaderConstantsBuffer(); - m_constantsBufferIndex++; - - commandList->SetDescriptorHeaps(1, &m_constantsBufferDescriptorsHeap); - Handle = m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += currentBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetGraphicsRootDescriptorTable(1, Handle); } if (!LoadProgram()) @@ -566,6 +557,25 @@ void D3D12GSRender::ExecCMD() Emu.Pause(); return; } + + // Constants + setScaleOffset(); + commandList->SetDescriptorHeaps(1, &m_scaleOffsetDescriptorHeap); + D3D12_GPU_DESCRIPTOR_HANDLE Handle = m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetGraphicsRootDescriptorTable(0, Handle); + m_currentScaleOffsetBufferIndex++; + + size_t currentBufferIndex = m_constantsBufferIndex; + FillVertexShaderConstantsBuffer(); + m_constantsBufferIndex++; + FillPixelShaderConstantsBuffer(); + m_constantsBufferIndex++; + + commandList->SetDescriptorHeaps(1, &m_constantsBufferDescriptorsHeap); + Handle = m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += currentBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetGraphicsRootDescriptorTable(1, Handle); commandList->SetPipelineState(m_PSO); InitDrawBuffers(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 3f454dc271..ec97c8006a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -65,6 +65,15 @@ struct D3D12Traits const std::string &shader = FS.Decompile(); fragmentProgramData.Compile(shader, Shader::SHADER_TYPE::SHADER_TYPE_FRAGMENT); + for (const ParamType& PT : FS.m_parr.params[PF_PARAM_UNIFORM]) + { + for (const ParamItem PI : PT.items) + { + size_t offset = atoi(PI.name.c_str() + 2); + fragmentProgramData.FragmentConstantOffsetCache.push_back(offset); + } + } + // TODO: This shouldn't use current dir fs::file("./FragmentProgram.hlsl", o_write | o_create | o_trunc).write(shader.c_str(), shader.size()); fragmentProgramData.Id = (u32)ID; diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index 8a88a246e4..9d22dbb786 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -368,16 +368,14 @@ std::string FragmentDecompiler::BuildCode() std::stringstream OS; insertHeader(OS); OS << std::endl; - + insertConstants(OS); OS << std::endl; insertIntputs(OS); OS << std::endl; insertOutputs(OS); OS << std::endl; insertMainStart(OS); - insertConstants(OS); OS << main << std::endl; - insertMainEnd(OS); return OS.str(); @@ -434,20 +432,20 @@ void FragmentDecompiler::insertOutputs(std::stringstream & OS) void FragmentDecompiler::insertConstants(std::stringstream & OS) { // TODO : Avoid constant recompilation and properly use constant buffer -/* OS << "cbuffer CONSTANT : register(b2)" << std::endl; + OS << "cbuffer CONSTANT : register(b2)" << std::endl; OS << "{" << std::endl; - for (ParamType PT : m_parr.params[PARAM_UNIFORM]) + for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) { for (ParamItem PI : PT.items) OS << " " << PT.type << " " << PI.name << ";" << std::endl; } - OS << "};" << std::endl;*/ + OS << "};" << std::endl; - for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) +/* for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) { for (ParamItem PI : PT.items) OS << PT.type << " " << PI.name << " = " << PI.value << ";" << std::endl; - } + }*/ } void FragmentDecompiler::insertMainStart(std::stringstream & OS) diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h index 48f0791dc9..c0642a2051 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h @@ -7,7 +7,6 @@ class FragmentDecompiler { std::string main; - ParamArray m_parr; u32 m_addr; u32& m_size; u32 m_const_index; @@ -44,6 +43,7 @@ protected: virtual void insertMainStart(std::stringstream &OS); virtual void insertMainEnd(std::stringstream &OS); public: + ParamArray m_parr; FragmentDecompiler(u32 addr, u32& size, u32 ctrl); std::string Decompile(); }; From f1ab5abe2e58ce32e3fd26274f8a5a450cefc891 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 16 May 2015 19:11:01 +0200 Subject: [PATCH 062/343] d3d12: Cleaning --- rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index 9d22dbb786..0eb811a512 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -431,7 +431,6 @@ void FragmentDecompiler::insertOutputs(std::stringstream & OS) void FragmentDecompiler::insertConstants(std::stringstream & OS) { - // TODO : Avoid constant recompilation and properly use constant buffer OS << "cbuffer CONSTANT : register(b2)" << std::endl; OS << "{" << std::endl; for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) @@ -440,12 +439,6 @@ void FragmentDecompiler::insertConstants(std::stringstream & OS) OS << " " << PT.type << " " << PI.name << ";" << std::endl; } OS << "};" << std::endl; - -/* for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) - { - for (ParamItem PI : PT.items) - OS << PT.type << " " << PI.name << " = " << PI.value << ";" << std::endl; - }*/ } void FragmentDecompiler::insertMainStart(std::stringstream & OS) From 16e8d6349e4aafde73f6d2728132cd7457f46865 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 17 May 2015 23:16:24 +0200 Subject: [PATCH 063/343] d3d12: Make compilation more robust --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 4 ++-- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 19 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 6506826981..56c8f6f4e9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -14,12 +14,12 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st) switch (st) { case SHADER_TYPE::SHADER_TYPE_VERTEX: - hr = D3DCompile(code.c_str(), code.size(), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + hr = D3DCompile(code.c_str(), code.size(), "VertexProgram.hlsl", nullptr, nullptr, "main", "vs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); if (hr != S_OK) LOG_ERROR(RSX, "VS build failed:%s", errorBlob->GetBufferPointer()); break; case SHADER_TYPE::SHADER_TYPE_FRAGMENT: - hr = D3DCompile(code.c_str(), code.size(), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + hr = D3DCompile(code.c_str(), code.size(), "FragmentProgram.hlsl", nullptr, nullptr, "main", "ps_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); if (hr != S_OK) LOG_ERROR(RSX, "FS build failed:%s", errorBlob->GetBufferPointer()); break; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index ec97c8006a..fcd4a66333 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -97,16 +97,15 @@ struct D3D12Traits ID3D12PipelineState *result; D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; - if (vertexProgramData.bytecode != nullptr) - { - graphicPipelineStateDesc.VS.BytecodeLength = vertexProgramData.bytecode->GetBufferSize(); - graphicPipelineStateDesc.VS.pShaderBytecode = vertexProgramData.bytecode->GetBufferPointer(); - } - if (fragmentProgramData.bytecode != nullptr) - { - graphicPipelineStateDesc.PS.BytecodeLength = fragmentProgramData.bytecode->GetBufferSize(); - graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer(); - } + if (vertexProgramData.bytecode == nullptr) + return nullptr; + graphicPipelineStateDesc.VS.BytecodeLength = vertexProgramData.bytecode->GetBufferSize(); + graphicPipelineStateDesc.VS.pShaderBytecode = vertexProgramData.bytecode->GetBufferPointer(); + + if (fragmentProgramData.bytecode == nullptr) + return nullptr; + graphicPipelineStateDesc.PS.BytecodeLength = fragmentProgramData.bytecode->GetBufferSize(); + graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer(); graphicPipelineStateDesc.pRootSignature = extraData.second; From 43f4ed49b4f31cb9102bfc4791a217baf251f3e1 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 17 May 2015 23:34:06 +0200 Subject: [PATCH 064/343] d3d12: Some fix for VPDecompiler --- .../Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index 0aa100682a..d3711aba39 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -291,12 +291,12 @@ void VertexDecompiler::AddCodeCond(const std::string& dst, const std::string& sr static const char* cond_string_table[(lt | gt | eq) + 1] = { "error", - "lessThan", - "equal", - "lessThanEqual", - "greaterThan", - "notEqual", - "greaterThanEqual", + "<", + "==", + "<=", + ">", + "!=", + ">=", "error" }; @@ -310,7 +310,7 @@ void VertexDecompiler::AddCodeCond(const std::string& dst, const std::string& sr swizzle = swizzle == "xyzw" ? "" : "." + swizzle; - std::string cond = fmt::Format("%s(cc%d%s, vec4(0.0))", cond_string_table[d0.cond], d0.cond_reg_sel_1, swizzle.c_str()); + std::string cond = fmt::Format("(cc%d%s %s float4(0., 0., 0., 0.))", d0.cond_reg_sel_1, swizzle.c_str(), cond_string_table[d0.cond]); ShaderVariable dst_var(dst); dst_var.symplify(); @@ -319,7 +319,7 @@ void VertexDecompiler::AddCodeCond(const std::string& dst, const std::string& sr if (dst_var.swizzles[0].length() == 1) { - AddCode("if (" + cond + ".x) " + dst + " = " + typeName[3] + "(" + src + ").x;"); + AddCode("if (" + cond + ".x) " + dst + " = " + typeName[3] + "(" + src + ".xxxx).x;"); } else { @@ -539,10 +539,10 @@ void VertexDecompiler::insertMainStart(std::stringstream & OS) OS << "{" << std::endl; // Declare inside main function - for (auto &i : reg_table) + for (const ParamType PT : m_parr.params[PF_PARAM_NONE]) { - if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], i.src_reg)) - OS << " float4 " << i.src_reg << ";" << std::endl; + for (const ParamItem &PI : PT.items) + OS << " " << PT.type << " " << PI.name << ";" << std::endl; } for (const ParamType PT : m_parr.params[PF_PARAM_IN]) @@ -550,6 +550,8 @@ void VertexDecompiler::insertMainStart(std::stringstream & OS) for (const ParamItem &PI : PT.items) OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; } + + } @@ -696,7 +698,7 @@ std::string VertexDecompiler::Decompile() case RSX_SCA_OPCODE_MOV: SetDSTSca("$s"); break; case RSX_SCA_OPCODE_RCP: SetDSTSca("(1.0 / $s)"); break; case RSX_SCA_OPCODE_RCC: SetDSTSca("clamp(1.0 / $s, 5.42101e-20, 1.884467e19)"); break; - case RSX_SCA_OPCODE_RSQ: SetDSTSca("inversesqrt(abs($s))"); break; + case RSX_SCA_OPCODE_RSQ: SetDSTSca("1.F / sqrt($s)"); break; case RSX_SCA_OPCODE_EXP: SetDSTSca("exp($s)"); break; case RSX_SCA_OPCODE_LOG: SetDSTSca("log($s)"); break; case RSX_SCA_OPCODE_LIT: SetDSTSca(typeName[3] + "(1.0, $s.x, ($s.x > 0.0 ? exp($s.w * log2($s.y)) : 0.0), 1.0)"); break; @@ -791,9 +793,9 @@ std::string VertexDecompiler::Decompile() case RSX_VEC_OPCODE_MUL: SetDSTVec("($0 * $1)"); break; case RSX_VEC_OPCODE_ADD: SetDSTVec("($0 + $2)"); break; case RSX_VEC_OPCODE_MAD: SetDSTVec("($0 * $1 + $2)"); break; - case RSX_VEC_OPCODE_DP3: SetDSTVec("vec4(dot($0.xyz, $1.xyz))"); break; - case RSX_VEC_OPCODE_DPH: SetDSTVec("vec4(dot(vec4($0.xyz, 1.0), $1))"); break; - case RSX_VEC_OPCODE_DP4: SetDSTVec(typeName[3] + "(dot($0, $1), dot($0, $1), dot($0, $1), dot($0, $1))"); break; + case RSX_VEC_OPCODE_DP3: SetDSTVec("dot($0.xyz, $1.xyz).xxxx"); break; + case RSX_VEC_OPCODE_DPH: SetDSTVec("dot(float4($0.xyz, 1.0), $1).xxxx"); break; + case RSX_VEC_OPCODE_DP4: SetDSTVec("dot($0, $1).xxxx"); break; case RSX_VEC_OPCODE_DST: SetDSTVec("vec4(distance($0, $1))"); break; case RSX_VEC_OPCODE_MIN: SetDSTVec("min($0, $1)"); break; case RSX_VEC_OPCODE_MAX: SetDSTVec("max($0, $1)"); break; From 263294d2635c8b940da548af97cd871ade9fd0f6 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 17 May 2015 23:40:19 +0200 Subject: [PATCH 065/343] d3d12: Some fixes to VP/FP decompiler --- rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp | 8 ++++---- rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index 0eb811a512..074505494e 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -568,9 +568,9 @@ std::string FragmentDecompiler::Decompile() { case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; case RSX_FP_OPCODE_COS: SetDst("cos($0)"); break; - case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; - case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; - case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; + case RSX_FP_OPCODE_DP2: SetDst("dot($0.xy, $1.xy).xxxx"); break; + case RSX_FP_OPCODE_DP3: SetDst("dot($0.xyz, $1.xyz).xxxx"); break; + case RSX_FP_OPCODE_DP4: SetDst("dot($0, $1).xxxx"); break; case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; case RSX_FP_OPCODE_DST: SetDst("vec4(distance($0, $1))"); break; case RSX_FP_OPCODE_REFL: LOG_ERROR(RSX, "Unimplemented SCB instruction: REFL"); break; // TODO: Is this in the right category? @@ -616,7 +616,7 @@ std::string FragmentDecompiler::Decompile() case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; case RSX_FP_OPCODE_NRM: SetDst("normalize($0)"); break; case RSX_FP_OPCODE_BEM: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: BEM"); break; - case RSX_FP_OPCODE_TEX: SetDst("texture($t, $0.xy)"); break; + case RSX_FP_OPCODE_TEX: SetDst("float4(0., 0., 0., 0.);//texture($t, $0.xy)"); break; case RSX_FP_OPCODE_TEXBEM: SetDst("texture($t, $0.xy, $1.x)"); break; case RSX_FP_OPCODE_TXP: SetDst("textureProj($t, $0.xyz, $1.x)"); break; //TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478) and The Simpsons Arcade Game (NPUB30563)) case RSX_FP_OPCODE_TXPBEM: SetDst("textureProj($t, $0.xyz, $1.x)"); break; diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index d3711aba39..e7ea5ca365 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -542,7 +542,7 @@ void VertexDecompiler::insertMainStart(std::stringstream & OS) for (const ParamType PT : m_parr.params[PF_PARAM_NONE]) { for (const ParamItem &PI : PT.items) - OS << " " << PT.type << " " << PI.name << ";" << std::endl; + OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 0.);" << std::endl; } for (const ParamType PT : m_parr.params[PF_PARAM_IN]) @@ -802,7 +802,7 @@ std::string VertexDecompiler::Decompile() case RSX_VEC_OPCODE_SLT: SetDSTVec("vec4(lessThan($0, $1))"); break; case RSX_VEC_OPCODE_SGE: SetDSTVec("vec4(greaterThanEqual($0, $1))"); break; case RSX_VEC_OPCODE_ARL: AddCode("$ifcond $a = ivec4($0)$am;"); break; - case RSX_VEC_OPCODE_FRC: SetDSTVec("fract($0)"); break; + case RSX_VEC_OPCODE_FRC: SetDSTVec("frac($0)"); break; case RSX_VEC_OPCODE_FLR: SetDSTVec("floor($0)"); break; case RSX_VEC_OPCODE_SEQ: SetDSTVec("vec4(equal($0, $1))"); break; case RSX_VEC_OPCODE_SFL: SetDSTVec("vec4(equal($0, vec4(0.0)))"); break; From 39039514d159dd22f26932056863b7e0d27af95d Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 18 May 2015 17:53:48 +0200 Subject: [PATCH 066/343] d3d12: Start implementing texture upload and usage --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 156 +++++++++++++++++++++++++- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 7 +- 2 files changed, 156 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 26993e9e8f..5b720c8e30 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -25,12 +25,14 @@ D3D12GSRender::D3D12GSRender() m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; constantsFragmentSize = 0; + m_currentStorageOffset = 0; + m_currentTextureIndex = 0; // Enable d3d debug layer -#ifdef DEBUG +//#ifdef DEBUG Microsoft::WRL::ComPtr debugInterface; D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); debugInterface->EnableDebugLayer(); -#endif +//#endif Microsoft::WRL::ComPtr dxgiFactory; check(CreateDXGIFactory(IID_PPV_ARGS(&dxgiFactory))); @@ -146,7 +148,7 @@ D3D12GSRender::D3D12GSRender() // Common root signature - D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {}; + D3D12_DESCRIPTOR_RANGE descriptorRange[4] = {}; // Scale Offset data descriptorRange[0].BaseShaderRegister = 0; descriptorRange[0].NumDescriptors = 1; @@ -155,7 +157,15 @@ D3D12GSRender::D3D12GSRender() descriptorRange[1].BaseShaderRegister = 1; descriptorRange[1].NumDescriptors = 2; descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - D3D12_ROOT_PARAMETER RP[2] = {}; + // Textures + descriptorRange[2].BaseShaderRegister = 0; + descriptorRange[2].NumDescriptors = 1; + descriptorRange[2].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + // Samplers + descriptorRange[3].BaseShaderRegister = 0; + descriptorRange[3].NumDescriptors = 1; + descriptorRange[3].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + D3D12_ROOT_PARAMETER RP[4] = {}; RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; @@ -164,10 +174,18 @@ D3D12GSRender::D3D12GSRender() RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1]; RP[1].DescriptorTable.NumDescriptorRanges = 1; + RP[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[2].DescriptorTable.pDescriptorRanges = &descriptorRange[2]; + RP[2].DescriptorTable.NumDescriptorRanges = 1; + RP[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[3].DescriptorTable.pDescriptorRanges = &descriptorRange[3]; + RP[3].DescriptorTable.NumDescriptorRanges = 1; D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - rootSignatureDesc.NumParameters = 2; + rootSignatureDesc.NumParameters = 4; rootSignatureDesc.pParameters = RP; Microsoft::WRL::ComPtr rootSignatureBlob; @@ -178,6 +196,25 @@ D3D12GSRender::D3D12GSRender() rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)); + + // Texture + D3D12_HEAP_DESC heapDescription = {}; + heapDescription.SizeInBytes = 256 * 256 * 256; + heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; + check(m_device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); + + heapDescription.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; + check(m_device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_textureStorage))); + + D3D12_DESCRIPTOR_HEAP_DESC textureDescriptorDesc = {}; + textureDescriptorDesc.NumDescriptors = 1000; // For safety + textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + textureDescriptorDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + check(m_device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_textureDescriptorsHeap))); + + textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + check(m_device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap))); } D3D12GSRender::~D3D12GSRender() @@ -197,6 +234,10 @@ D3D12GSRender::~D3D12GSRender() m_vertexBuffer[i]->Release(); if (m_fbo) delete m_fbo; + m_textureDescriptorsHeap->Release(); + m_textureStorage->Release(); + m_uploadTextureHeap->Release(); + m_samplerDescriptorHeap->Release(); m_rootSignature->Release(); m_backBuffer[0]->Release(); m_backBuffer[1]->Release(); @@ -578,6 +619,109 @@ void D3D12GSRender::ExecCMD() commandList->SetGraphicsRootDescriptorTable(1, Handle); commandList->SetPipelineState(m_PSO); + + size_t usedTexture = 0; + + for (u32 i = 0; i < m_textures_count; ++i) + { + if (!m_textures[i].IsEnabled()) continue; + + Microsoft::WRL::ComPtr Texture, vramTexture; + size_t textureSize = m_textures[i].GetWidth() * m_textures[i].GetHeight() * 4; + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + textureDesc.Width = textureSize; + textureDesc.Height = 1; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.MipLevels = 1; + textureDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + check(m_device->CreatePlacedResource( + m_uploadTextureHeap, + m_currentStorageOffset, + &textureDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&Texture) + )); + + const u32 texaddr = GetAddress(m_textures[i].GetOffset(), m_textures[i].GetLocation()); + auto pixels = vm::get_ptr(texaddr); + void *textureData; + check(Texture->Map(0, nullptr, (void**)&textureData)); + memcpy(textureData, pixels, textureSize); + Texture->Unmap(0, nullptr); + + D3D12_RESOURCE_DESC vramTextureDesc = {}; + vramTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + vramTextureDesc.Width = m_textures[i].GetWidth(); + vramTextureDesc.Height = m_textures[i].GetHeight(); + vramTextureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + vramTextureDesc.DepthOrArraySize = 1; + vramTextureDesc.SampleDesc.Count = 1; + vramTextureDesc.MipLevels = 1; + check(m_device->CreatePlacedResource( + m_textureStorage, + m_currentStorageOffset, + &vramTextureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&vramTexture) + )); + + m_currentStorageOffset += textureSize; + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + dst.pResource = vramTexture.Get(); + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = Texture.Get(); + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.PlacedFootprint.Footprint.Depth = 1; + src.PlacedFootprint.Footprint.Width = m_textures[i].GetWidth(); + src.PlacedFootprint.Footprint.Height = m_textures[i].GetHeight(); + src.PlacedFootprint.Footprint.RowPitch = m_textures[i].GetWidth() * 4; + src.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + + commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = vramTexture.Get(); + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; + commandList->ResourceBarrier(1, &barrier); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Texture2D.MipLevels = 1; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateShaderResourceView(vramTexture.Get(), &srvDesc, Handle); + + // TODO : Correctly define sampler + D3D12_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + Handle = m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateSampler(&samplerDesc, Handle); + + usedTexture++; + } + + Handle = m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetDescriptorHeaps(1, &m_textureDescriptorsHeap); + commandList->SetGraphicsRootDescriptorTable(2, Handle); + + Handle = m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + commandList->SetDescriptorHeaps(1, &m_samplerDescriptorHeap); + commandList->SetGraphicsRootDescriptorTable(3, Handle); + + m_currentTextureIndex += usedTexture; + InitDrawBuffers(); switch (m_surface_color_target) { @@ -1048,6 +1192,8 @@ void D3D12GSRender::Flip() m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; constantsFragmentSize = 0; + m_currentStorageOffset = 0; + m_currentTextureIndex = 0; m_frame->Flip(nullptr); } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 4835fdf031..c052af7e1d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -50,8 +50,11 @@ private: ID3D12PipelineState *m_PSO; ID3D12RootSignature *m_rootSignature; - // GLTexture m_gl_textures[m_textures_count]; - // GLTexture m_gl_vertex_textures[m_textures_count]; + ID3D12Heap *m_uploadTextureHeap, *m_textureStorage; + size_t m_currentStorageOffset; + ID3D12DescriptorHeap *m_textureDescriptorsHeap; + ID3D12DescriptorHeap *m_samplerDescriptorHeap; + size_t m_currentTextureIndex; ID3D12Resource *m_indexBuffer, *m_vertexBuffer[m_vertex_count]; ID3D12Resource *m_constantsVertexBuffer, *m_constantsFragmentBuffer; From 782a11b47fcb35a4215e4b6ea4269306fafd1b5b Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 18 May 2015 17:55:09 +0200 Subject: [PATCH 067/343] d3d12: Add dump hlsl file to gitignore list --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 02dc0915c5..361bc51abd 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,8 @@ /bin/rpcs3.iobj /bin/FragmentProgram.txt /bin/VertexProgram.txt +/bin/FragmentProgram.hlsl +/bin/VertexProgram.hlsl /bin/BreakPoints.dat /bin/textures /bin/*.lib From 3a75e6a357a4a6536bb9cb9f86d6957721a27bc1 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 18 May 2015 18:07:13 +0200 Subject: [PATCH 068/343] d3d12: Some fixe to textures --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 13 ++++++++----- .../RSX/D3D12/FragmentProgramDecompiler.cpp | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 5b720c8e30..0dc87fa18a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -626,7 +626,7 @@ void D3D12GSRender::ExecCMD() { if (!m_textures[i].IsEnabled()) continue; - Microsoft::WRL::ComPtr Texture, vramTexture; + ID3D12Resource *Texture, *vramTexture; size_t textureSize = m_textures[i].GetWidth() * m_textures[i].GetHeight() * 4; D3D12_RESOURCE_DESC textureDesc = {}; textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; @@ -672,9 +672,9 @@ void D3D12GSRender::ExecCMD() m_currentStorageOffset += textureSize; D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - dst.pResource = vramTexture.Get(); + dst.pResource = vramTexture; dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = Texture.Get(); + src.pResource = Texture; src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; src.PlacedFootprint.Footprint.Depth = 1; src.PlacedFootprint.Footprint.Width = m_textures[i].GetWidth(); @@ -686,7 +686,7 @@ void D3D12GSRender::ExecCMD() D3D12_RESOURCE_BARRIER barrier = {}; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = vramTexture.Get(); + barrier.Transition.pResource = vramTexture; barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; commandList->ResourceBarrier(1, &barrier); @@ -698,11 +698,14 @@ void D3D12GSRender::ExecCMD() srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - m_device->CreateShaderResourceView(vramTexture.Get(), &srvDesc, Handle); + m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); // TODO : Correctly define sampler D3D12_SAMPLER_DESC samplerDesc = {}; samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; Handle = m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateSampler(&samplerDesc, Handle); diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index 074505494e..765d026b98 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -435,10 +435,24 @@ void FragmentDecompiler::insertConstants(std::stringstream & OS) OS << "{" << std::endl; for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) { + if (PT.type == "sampler2D") + continue; for (ParamItem PI : PT.items) OS << " " << PT.type << " " << PI.name << ";" << std::endl; } - OS << "};" << std::endl; + OS << "};" << std::endl << std::endl; + size_t textureIndex = 0; + for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (PT.type != "sampler2D") + continue; + for (ParamItem PI : PT.items) + { + OS << "Texture2D " << PI.name << " : register(t" << textureIndex << ");" << std::endl; + OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl; + textureIndex++; + } + } } void FragmentDecompiler::insertMainStart(std::stringstream & OS) @@ -616,7 +630,7 @@ std::string FragmentDecompiler::Decompile() case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; case RSX_FP_OPCODE_NRM: SetDst("normalize($0)"); break; case RSX_FP_OPCODE_BEM: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: BEM"); break; - case RSX_FP_OPCODE_TEX: SetDst("float4(0., 0., 0., 0.);//texture($t, $0.xy)"); break; + case RSX_FP_OPCODE_TEX: SetDst("$t.Sample($tsampler, $0.xy)"); break; case RSX_FP_OPCODE_TEXBEM: SetDst("texture($t, $0.xy, $1.x)"); break; case RSX_FP_OPCODE_TXP: SetDst("textureProj($t, $0.xyz, $1.x)"); break; //TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478) and The Simpsons Arcade Game (NPUB30563)) case RSX_FP_OPCODE_TXPBEM: SetDst("textureProj($t, $0.xyz, $1.x)"); break; From 1f9c8f757a5f2875508cd52e6cc34cb467b737a8 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 18 May 2015 22:32:04 +0200 Subject: [PATCH 069/343] d3d12: Add a swizzle to texture Looks like shader4componentmapping doesnt work, maybe a bug. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 12 +++++++++++- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 1 + rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp | 2 +- rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 0dc87fa18a..f0d520b9c7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -649,7 +649,17 @@ void D3D12GSRender::ExecCMD() auto pixels = vm::get_ptr(texaddr); void *textureData; check(Texture->Map(0, nullptr, (void**)&textureData)); + std::vector texdata; + texdata.resize(textureSize); memcpy(textureData, pixels, textureSize); + memcpy(texdata.data(), pixels, textureSize); + for (unsigned i = 0; i < textureSize / 4; i++) + { +// ((char*)textureData)[4 * i] = ((char*)textureData)[4 * i + 1];// *(i % 2); +// ((char*)textureData)[4 * i + 1] = 255 *(i % 2); +// ((char*)textureData)[4 * i + 2] = ((char*)textureData)[4 * i + 1];// *(i % 2); +// ((char*)textureData)[4 * i + 3] = ((char*)textureData)[4 * i + 1];// *(i % 2); + } Texture->Unmap(0, nullptr); D3D12_RESOURCE_DESC vramTextureDesc = {}; @@ -695,7 +705,7 @@ void D3D12GSRender::ExecCMD() srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index fcd4a66333..f8a89aa5db 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -67,6 +67,7 @@ struct D3D12Traits for (const ParamType& PT : FS.m_parr.params[PF_PARAM_UNIFORM]) { + if (PT.type == "sampler2D") continue; for (const ParamItem PI : PT.items) { size_t offset = atoi(PI.name.c_str() + 2); diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index 765d026b98..c82f22a6a6 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -630,7 +630,7 @@ std::string FragmentDecompiler::Decompile() case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; case RSX_FP_OPCODE_NRM: SetDst("normalize($0)"); break; case RSX_FP_OPCODE_BEM: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: BEM"); break; - case RSX_FP_OPCODE_TEX: SetDst("$t.Sample($tsampler, $0.xy)"); break; + case RSX_FP_OPCODE_TEX: SetDst("$t.Sample($tsampler, $0.xy).yzwx"); break; case RSX_FP_OPCODE_TEXBEM: SetDst("texture($t, $0.xy, $1.x)"); break; case RSX_FP_OPCODE_TXP: SetDst("textureProj($t, $0.xyz, $1.x)"); break; //TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478) and The Simpsons Arcade Game (NPUB30563)) case RSX_FP_OPCODE_TXPBEM: SetDst("textureProj($t, $0.xyz, $1.x)"); break; diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index e7ea5ca365..10facd5b5c 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -565,7 +565,7 @@ void VertexDecompiler::insertMainEnd(std::stringstream & OS) OS << " Out." << i.src_reg << " = " << i.src_reg << ";" << std::endl; } // TODO: Find why I need to do this - OS << " Out.dst_reg0.z *= -1.;" << std::endl; +// OS << " Out.dst_reg0.z *= -1.;" << std::endl; OS << " Out.dst_reg0 = mul(Out.dst_reg0, scaleOffsetMat);" << std::endl; OS << " return Out;" << std::endl; OS << "}" << std::endl; From 4114df50c273546168a823b45989032ec3d193c7 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 18 May 2015 22:46:31 +0200 Subject: [PATCH 070/343] d3d12: Use correct VS debug macro + cleaning --- rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp index c82f22a6a6..765d026b98 100644 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp @@ -630,7 +630,7 @@ std::string FragmentDecompiler::Decompile() case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; case RSX_FP_OPCODE_NRM: SetDst("normalize($0)"); break; case RSX_FP_OPCODE_BEM: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: BEM"); break; - case RSX_FP_OPCODE_TEX: SetDst("$t.Sample($tsampler, $0.xy).yzwx"); break; + case RSX_FP_OPCODE_TEX: SetDst("$t.Sample($tsampler, $0.xy)"); break; case RSX_FP_OPCODE_TEXBEM: SetDst("texture($t, $0.xy, $1.x)"); break; case RSX_FP_OPCODE_TXP: SetDst("textureProj($t, $0.xyz, $1.x)"); break; //TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478) and The Simpsons Arcade Game (NPUB30563)) case RSX_FP_OPCODE_TXPBEM: SetDst("textureProj($t, $0.xyz, $1.x)"); break; From 5640e81eb554fe19b35e17b42a77b663fe5a9427 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 18 May 2015 22:46:54 +0200 Subject: [PATCH 071/343] d3d12: Forgot hunk --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index f0d520b9c7..c31cf89522 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -28,17 +28,17 @@ D3D12GSRender::D3D12GSRender() m_currentStorageOffset = 0; m_currentTextureIndex = 0; // Enable d3d debug layer -//#ifdef DEBUG +#ifdef _DEBUG Microsoft::WRL::ComPtr debugInterface; D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); debugInterface->EnableDebugLayer(); -//#endif +#endif Microsoft::WRL::ComPtr dxgiFactory; check(CreateDXGIFactory(IID_PPV_ARGS(&dxgiFactory))); // Create adapter IDXGIAdapter* adaptater = nullptr; -#ifdef DEBUG +#ifdef _DEBUG check(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&adaptater))); #endif check(D3D12CreateDevice(adaptater, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); @@ -649,17 +649,7 @@ void D3D12GSRender::ExecCMD() auto pixels = vm::get_ptr(texaddr); void *textureData; check(Texture->Map(0, nullptr, (void**)&textureData)); - std::vector texdata; - texdata.resize(textureSize); memcpy(textureData, pixels, textureSize); - memcpy(texdata.data(), pixels, textureSize); - for (unsigned i = 0; i < textureSize / 4; i++) - { -// ((char*)textureData)[4 * i] = ((char*)textureData)[4 * i + 1];// *(i % 2); -// ((char*)textureData)[4 * i + 1] = 255 *(i % 2); -// ((char*)textureData)[4 * i + 2] = ((char*)textureData)[4 * i + 1];// *(i % 2); -// ((char*)textureData)[4 * i + 3] = ((char*)textureData)[4 * i + 1];// *(i % 2); - } Texture->Unmap(0, nullptr); D3D12_RESOURCE_DESC vramTextureDesc = {}; @@ -705,7 +695,7 @@ void D3D12GSRender::ExecCMD() srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); From cbd7d3d6147ce5200994059dd828cd7996531b22 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 18 May 2015 23:02:03 +0200 Subject: [PATCH 072/343] d3d12: Fix depth being offseted by .5 + add depth buffer support --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index c31cf89522..fd3ffe0e31 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -422,7 +422,7 @@ void D3D12GSRender::setScaleOffset() // Offset scaleOffsetMat[3] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 0)] - (RSXThread::m_width / RSXThread::m_width_scale); scaleOffsetMat[7] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 1)] - (RSXThread::m_height / RSXThread::m_height_scale); - scaleOffsetMat[11] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 2)] - 1 / 2.0f; + scaleOffsetMat[11] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 2)]; scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; @@ -726,23 +726,24 @@ void D3D12GSRender::ExecCMD() m_currentTextureIndex += usedTexture; InitDrawBuffers(); + switch (m_surface_color_target) { case CELL_GCM_SURFACE_TARGET_NONE: break; case CELL_GCM_SURFACE_TARGET_0: - commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(0), true, nullptr); + commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(0), true, &m_fbo->getDSVCPUHandle()); break; case CELL_GCM_SURFACE_TARGET_1: - commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(1), true, nullptr); + commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(1), true, &m_fbo->getDSVCPUHandle()); break; case CELL_GCM_SURFACE_TARGET_MRT1: - commandList->OMSetRenderTargets(2, &m_fbo->getRTTCPUHandle(0), true, nullptr); + commandList->OMSetRenderTargets(2, &m_fbo->getRTTCPUHandle(0), true, &m_fbo->getDSVCPUHandle()); break; case CELL_GCM_SURFACE_TARGET_MRT2: - commandList->OMSetRenderTargets(3, &m_fbo->getRTTCPUHandle(0), true, nullptr); + commandList->OMSetRenderTargets(3, &m_fbo->getRTTCPUHandle(0), true, &m_fbo->getDSVCPUHandle()); break; case CELL_GCM_SURFACE_TARGET_MRT3: - commandList->OMSetRenderTargets(4, &m_fbo->getRTTCPUHandle(0), true, nullptr); + commandList->OMSetRenderTargets(4, &m_fbo->getRTTCPUHandle(0), true, &m_fbo->getDSVCPUHandle()); break; default: LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); From a279aebf315406d3ebae8a5af67408727a8d4272 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 18 May 2015 23:41:52 +0200 Subject: [PATCH 073/343] d3d12: Use an indexed draw for unindexed quad --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 66 ++++++++++++++++++++++++++- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 ++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index fd3ffe0e31..3e5d40ab21 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -106,6 +106,15 @@ D3D12GSRender::D3D12GSRender() )); } + check(m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_indexBuffer) + )); + check(m_device->CreateCommittedResource( &heapProp, D3D12_HEAP_FLAG_NONE, @@ -402,6 +411,43 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) indexBufferView.SizeInBytes = (UINT)m_indexed_array.m_data.size(); indexBufferView.BufferLocation = m_indexBuffer->GetGPUVirtualAddress();*/ } + + switch (m_draw_mode - 1) + { + default: + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + m_forcedIndexBuffer = false; + case 7: + m_forcedIndexBuffer = true; + } + + if (m_forcedIndexBuffer) + { + unsigned short *bufferMap; + check(m_indexBuffer->Map(0, nullptr, (void**)&bufferMap)); + + memcpy(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size()); + m_indexBufferCount = 0; + // QUADS + for (unsigned i = 0; i < m_draw_array_count / 4; i++) + { + // First triangle + bufferMap[6 * i] = 4 * i; + bufferMap[6 * i + 1] = 4 * i + 1; + bufferMap[6 * i + 2] = 4 * i + 2; + bufferMap[6 * i + 3] = 4 * i + 1; + bufferMap[6 * i + 4] = 4 * i + 2; + bufferMap[6 * i + 5] = 4 * i + 3; + m_indexBufferCount += 6; + } + m_indexBuffer->Unmap(0, nullptr); + } } void D3D12GSRender::setScaleOffset() @@ -590,6 +636,14 @@ void D3D12GSRender::ExecCMD() assert((m_draw_array_first + m_draw_array_count) * item_size <= m_vertexBufferSize[i]); } commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); + if (m_forcedIndexBuffer) + { + D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; + indexBufferView.SizeInBytes = (UINT)m_indexBufferCount * sizeof(unsigned short); + indexBufferView.BufferLocation = m_indexBuffer->GetGPUVirtualAddress(); + indexBufferView.Format = DXGI_FORMAT_R16_UINT; + commandList->IASetIndexBuffer(&indexBufferView); + } } if (!LoadProgram()) @@ -777,6 +831,8 @@ void D3D12GSRender::ExecCMD() #define GL_QUAD_STRIP 0x0008 #define GL_POLYGON 0x0009 */ + + bool requireIndexBuffer = false; switch (m_draw_mode - 1) { case 0: @@ -800,12 +856,20 @@ void D3D12GSRender::ExecCMD() case 6: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ); break; + case 7: + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + requireIndexBuffer = true; default: - commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ); + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); // LOG_ERROR(RSX, "Unsupported primitive type"); break; } + if (m_forcedIndexBuffer) + { + commandList->DrawIndexedInstanced((UINT)m_indexBufferCount, 1, 0, (UINT)m_draw_array_first, 0); + } + if (m_indexed_array.m_count) { /* switch (m_indexed_array.m_type) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index c052af7e1d..2094aef657 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -56,6 +56,9 @@ private: ID3D12DescriptorHeap *m_samplerDescriptorHeap; size_t m_currentTextureIndex; + + bool m_forcedIndexBuffer; + size_t m_indexBufferCount; ID3D12Resource *m_indexBuffer, *m_vertexBuffer[m_vertex_count]; ID3D12Resource *m_constantsVertexBuffer, *m_constantsFragmentBuffer; size_t constantsFragmentSize; From 2985f2d6e3e2e6395bd93d6a348ecfcc6cc63296 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 00:13:21 +0200 Subject: [PATCH 074/343] d3d12: Use naive vertex buffer offset --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 33 ++++++++++++--------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3e5d40ab21..0f9b2f2a71 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -363,8 +363,9 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } -void D3D12GSRender::EnableVertexData(bool indexed_draw) +std::vector D3D12GSRender::EnableVertexData(bool indexed_draw) { + std::vector result; m_IASet = getIALayout(m_device, indexed_draw, m_vertex_data); const u32 data_offset = indexed_draw ? 0 : m_draw_array_first; @@ -378,10 +379,17 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) // TODO: Use default heap and upload data void *bufferMap; check(m_vertexBuffer[i]->Map(0, nullptr, (void**)&bufferMap)); - memcpy((char*)bufferMap + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); + memcpy((char*)bufferMap + m_vertexBufferSize[i] + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); m_vertexBuffer[i]->Unmap(0, nullptr); - size_t newOffset = (data_offset + data_size) * item_size; - m_vertexBufferSize[i] = newOffset > m_vertexBufferSize[i] ? newOffset : m_vertexBufferSize[i]; + + size_t subBufferSize = (data_offset + data_size) * item_size; + + D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; + vertexBufferView.BufferLocation = m_vertexBuffer[i]->GetGPUVirtualAddress() + m_vertexBufferSize[i]; + vertexBufferView.SizeInBytes = (UINT)subBufferSize; + vertexBufferView.StrideInBytes = (UINT)item_size; + result.push_back(vertexBufferView); + m_vertexBufferSize[i] += subBufferSize; } if (indexed_draw) @@ -448,6 +456,7 @@ void D3D12GSRender::EnableVertexData(bool indexed_draw) } m_indexBuffer->Unmap(0, nullptr); } + return result; } void D3D12GSRender::setScaleOffset() @@ -620,21 +629,7 @@ void D3D12GSRender::ExecCMD() if (m_indexed_array.m_count || m_draw_array_count) { - EnableVertexData(m_indexed_array.m_count ? true : false); - std::vector vertexBufferViews; - for (u32 i = 0; i < m_vertex_count; ++i) - { - if (!m_vertex_data[i].IsEnabled()) continue; - const size_t item_size = m_vertex_data[i].GetTypeSize() * m_vertex_data[i].size; - D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; - - vertexBufferView.BufferLocation = m_vertexBuffer[i]->GetGPUVirtualAddress(); - vertexBufferView.SizeInBytes = (UINT)m_vertexBufferSize[i]; - vertexBufferView.StrideInBytes = (UINT)item_size; - vertexBufferViews.push_back(vertexBufferView); - - assert((m_draw_array_first + m_draw_array_count) * item_size <= m_vertexBufferSize[i]); - } + const std::vector &vertexBufferViews = EnableVertexData(m_indexed_array.m_count ? true : false); commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); if (m_forcedIndexBuffer) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 2094aef657..f418b43542 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -94,7 +94,7 @@ private: virtual void Close() override; bool LoadProgram(); - void EnableVertexData(bool indexed_draw = false); + std::vector EnableVertexData(bool indexed_draw = false); void setScaleOffset(); void FillVertexShaderConstantsBuffer(); void FillPixelShaderConstantsBuffer(); From 445c7dd884f957c60bcec4ddfd101bfb9ec835f8 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 00:27:43 +0200 Subject: [PATCH 075/343] d3d12: Use a default value of 1 for .w channel of output in Vertex Program --- rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp index 10facd5b5c..9fc6af2b09 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp @@ -542,7 +542,7 @@ void VertexDecompiler::insertMainStart(std::stringstream & OS) for (const ParamType PT : m_parr.params[PF_PARAM_NONE]) { for (const ParamItem &PI : PT.items) - OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 0.);" << std::endl; + OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 1.);" << std::endl; } for (const ParamType PT : m_parr.params[PF_PARAM_IN]) From edb9a97c172911b35249b4a5df54edc493b2ed96 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 00:57:19 +0200 Subject: [PATCH 076/343] d3d12: Some fix to quad emulation mode --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 0f9b2f2a71..5ea88bdd09 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -431,8 +431,10 @@ std::vector D3D12GSRender::EnableVertexData(bool index case 5: case 6: m_forcedIndexBuffer = false; + break; case 7: m_forcedIndexBuffer = true; + break; } if (m_forcedIndexBuffer) @@ -449,7 +451,7 @@ std::vector D3D12GSRender::EnableVertexData(bool index bufferMap[6 * i] = 4 * i; bufferMap[6 * i + 1] = 4 * i + 1; bufferMap[6 * i + 2] = 4 * i + 2; - bufferMap[6 * i + 3] = 4 * i + 1; + bufferMap[6 * i + 3] = 4 * i; bufferMap[6 * i + 4] = 4 * i + 2; bufferMap[6 * i + 5] = 4 * i + 3; m_indexBufferCount += 6; @@ -861,9 +863,9 @@ void D3D12GSRender::ExecCMD() } if (m_forcedIndexBuffer) - { commandList->DrawIndexedInstanced((UINT)m_indexBufferCount, 1, 0, (UINT)m_draw_array_first, 0); - } + else if (m_draw_array_count) + commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); if (m_indexed_array.m_count) { @@ -889,11 +891,7 @@ void D3D12GSRender::ExecCMD() m_indexed_array.Reset();*/ } - if (m_draw_array_count) - { - //LOG_WARNING(RSX,"glDrawArrays(%d,%d,%d)", m_draw_mode - 1, m_draw_array_first, m_draw_array_count); - commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); - } + check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); From c465b6699a368746d7f7ae7568220234ab7f9805 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 18:17:33 +0200 Subject: [PATCH 077/343] d3d12: Use the fragment decompile class --- .../D3D12/D3D12FragmentProgramDecompiler.cpp | 141 ++++ .../D3D12/D3D12FragmentProgramDecompiler.h | 21 + rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 6 +- ...r.cpp => D3D12VertexProgramDecompiler.cpp} | 2 +- ...piler.h => D3D12VertexProgramDecompiler.h} | 2 +- .../RSX/D3D12/FragmentProgramDecompiler.cpp | 753 ------------------ .../Emu/RSX/D3D12/FragmentProgramDecompiler.h | 51 -- rpcs3/Emu/RSX/D3D12/ShaderParam.cpp | 6 - rpcs3/Emu/RSX/D3D12/ShaderParam.h | 194 ----- rpcs3/emucore.vcxproj | 10 +- rpcs3/emucore.vcxproj.filters | 14 +- 11 files changed, 175 insertions(+), 1025 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h rename rpcs3/Emu/RSX/D3D12/{VertexProgramDecompiler.cpp => D3D12VertexProgramDecompiler.cpp} (99%) rename rpcs3/Emu/RSX/D3D12/{VertexProgramDecompiler.h => D3D12VertexProgramDecompiler.h} (98%) delete mode 100644 rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp delete mode 100644 rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h delete mode 100644 rpcs3/Emu/RSX/D3D12/ShaderParam.cpp delete mode 100644 rpcs3/Emu/RSX/D3D12/ShaderParam.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp new file mode 100644 index 0000000000..c75b9ddfff --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -0,0 +1,141 @@ +#include "stdafx.h" +#if defined(DX12_SUPPORT) +#include "D3D12FragmentProgramDecompiler.h" + +#include "Utilities/Log.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" + +static std::string typeName[] = +{ + "float", + "float2", + "float3", + "float4" +}; + +static std::string functionName[] = +{ + "saturate", + "float4(dot($0.xy, $1.xy), dot($0.xy, $1.xy), dot($0.xy, $1.xy), dot($0.xy, $1.xy))", + "frac($0)", +}; + +D3D12FragmentDecompiler::D3D12FragmentDecompiler(u32 addr, u32& size, u32 ctrl) : + FragmentProgramDecompiler(addr, size, ctrl) +{ + +} + +void D3D12FragmentDecompiler::insertHeader(std::stringstream & OS) +{ + OS << "// Header" << std::endl; +} + +void D3D12FragmentDecompiler::insertIntputs(std::stringstream & OS) +{ + OS << "struct PixelInput" << std::endl; + OS << "{" << std::endl; + OS << " float4 Position : SV_POSITION;" << std::endl; + OS << " float4 diff_color : COLOR0;" << std::endl; + OS << " float4 spec_color : COLOR1;" << std::endl; + OS << " float4 dst_reg3 : COLOR2;" << std::endl; + OS << " float4 dst_reg4 : COLOR3;" << std::endl; + OS << " float fogc : FOG;" << std::endl; + OS << " float4 dummy : COLOR4;" << std::endl; + OS << " float4 tc0 : TEXCOORD0;" << std::endl; + OS << " float4 tc1 : TEXCOORD1;" << std::endl; + OS << " float4 tc2 : TEXCOORD2;" << std::endl; + OS << " float4 tc3 : TEXCOORD3;" << std::endl; + OS << " float4 tc4 : TEXCOORD4;" << std::endl; + OS << " float4 tc5 : TEXCOORD5;" << std::endl; + OS << " float4 tc6 : TEXCOORD6;" << std::endl; + OS << " float4 tc7 : TEXCOORD7;" << std::endl; + OS << " float4 tc8 : TEXCOORD8;" << std::endl; + OS << "};" << std::endl; +} + +void D3D12FragmentDecompiler::insertOutputs(std::stringstream & OS) +{ + OS << "struct PixelOutput" << std::endl; + OS << "{" << std::endl; + const std::pair table[] = + { + { "ocol0", "r0" }, + { "ocol1", "r2" }, + { "ocol2", "r3" }, + { "ocol3", "r4" }, + }; + + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], table[i].second)) + OS << " " << typeName[3] << " " << table[i].first << " : SV_TARGET" << i << ";" << std::endl; + } + OS << "};" << std::endl; +} + +void D3D12FragmentDecompiler::insertConstants(std::stringstream & OS) +{ + OS << "cbuffer CONSTANT : register(b2)" << std::endl; + OS << "{" << std::endl; + for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (PT.type == "sampler2D") + continue; + for (ParamItem PI : PT.items) + OS << " " << PT.type << " " << PI.name << ";" << std::endl; + } + OS << "};" << std::endl << std::endl; + size_t textureIndex = 0; + for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (PT.type != "sampler2D") + continue; + for (ParamItem PI : PT.items) + { + OS << "Texture2D " << PI.name << " : register(t" << textureIndex << ");" << std::endl; + OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl; + textureIndex++; + } + } +} + +void D3D12FragmentDecompiler::insertMainStart(std::stringstream & OS) +{ + OS << "PixelOutput main(PixelInput In)" << std::endl; + OS << "{" << std::endl; + for (ParamType PT : m_parr.params[PF_PARAM_IN]) + { + for (ParamItem PI : PT.items) + OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; + } + // Declare output + for (ParamType PT : m_parr.params[PF_PARAM_NONE]) + { + for (ParamItem PI : PT.items) + OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 0.);" << std::endl; + } +} + +void D3D12FragmentDecompiler::insertMainEnd(std::stringstream & OS) +{ + const std::pair table[] = + { + { "ocol0", "r0" }, + { "ocol1", "r2" }, + { "ocol2", "r3" }, + { "ocol3", "r4" }, + }; + + OS << " PixelOutput Out;" << std::endl; + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], table[i].second)) + OS << " Out." << table[i].first << " = " << table[i].second << ";" << std::endl; + } + OS << " return Out;" << std::endl; + OS << "}" << std::endl; +} + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h new file mode 100644 index 0000000000..94d0985d9e --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h @@ -0,0 +1,21 @@ +#pragma once +#if defined(DX12_SUPPORT) +#include "Emu/RSX/RSXFragmentProgram.h" +#include + +#include "../Common/FragmentProgramDecompiler.h" + +class D3D12FragmentDecompiler : public FragmentProgramDecompiler +{ +protected: + virtual void insertHeader(std::stringstream &OS) override; + virtual void insertIntputs(std::stringstream &OS) override; + virtual void insertOutputs(std::stringstream &OS) override; + virtual void insertConstants(std::stringstream &OS) override; + virtual void insertMainStart(std::stringstream &OS) override; + virtual void insertMainEnd(std::stringstream &OS) override; +public: + D3D12FragmentDecompiler(u32 addr, u32& size, u32 ctrl); +}; + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index f8a89aa5db..60eaad2750 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -4,8 +4,8 @@ #include #include #include "../Common/ProgramStateCache.h" -#include "VertexProgramDecompiler.h" -#include "FragmentProgramDecompiler.h" +#include "D3D12VertexProgramDecompiler.h" +#include "D3D12FragmentProgramDecompiler.h" #include "Utilities/File.h" @@ -61,7 +61,7 @@ struct D3D12Traits static void RecompileFragmentProgram(RSXFragmentProgram *RSXFP, FragmentProgramData& fragmentProgramData, size_t ID) { - FragmentDecompiler FS(RSXFP->addr, RSXFP->size, RSXFP->offset); + D3D12FragmentDecompiler FS(RSXFP->addr, RSXFP->size, RSXFP->offset); const std::string &shader = FS.Decompile(); fragmentProgramData.Compile(shader, Shader::SHADER_TYPE::SHADER_TYPE_FRAGMENT); diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp similarity index 99% rename from rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp rename to rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 9fc6af2b09..015307e8d6 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -1,6 +1,6 @@ #include "stdafx.h" #if defined(DX12_SUPPORT) -#include "VertexProgramDecompiler.h" +#include "D3D12VertexProgramDecompiler.h" #include "Utilities/Log.h" #include "Emu/System.h" diff --git a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h similarity index 98% rename from rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h rename to rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h index 7fcbc97a30..11195bdf41 100644 --- a/rpcs3/Emu/RSX/D3D12/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h @@ -3,7 +3,7 @@ #include "Emu/RSX/RSXVertexProgram.h" #include #include -#include "ShaderParam.h" +#include "../Common/ShaderParam.h" struct VertexDecompiler { diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp deleted file mode 100644 index 765d026b98..0000000000 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.cpp +++ /dev/null @@ -1,753 +0,0 @@ -#include "stdafx.h" -#if defined(DX12_SUPPORT) -#include "FragmentProgramDecompiler.h" - -#include "Utilities/Log.h" -#include "Emu/Memory/Memory.h" -#include "Emu/System.h" - -static std::string typeName[] = -{ - "float", - "float2", - "float3", - "float4" -}; - -enum FUNCTION { - FUNCTION_SATURATE, - FUNCTION_DP2, - FUNCTION_FRACT, -}; - -static std::string functionName[] = -{ - "saturate", - "float4(dot($0.xy, $1.xy), dot($0.xy, $1.xy), dot($0.xy, $1.xy), dot($0.xy, $1.xy))", - "frac($0)", -}; - -FragmentDecompiler::FragmentDecompiler(u32 addr, u32& size, u32 ctrl) : - m_addr(addr), - m_size(size), - m_const_index(0), - m_location(0), - m_ctrl(ctrl) -{ - m_size = 0; -} - - -void FragmentDecompiler::SetDst(std::string code, bool append_mask) -{ - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) return; - - switch (src1.scale) - { - case 0: break; - case 1: code = "(" + code + " * 2.0)"; break; - case 2: code = "(" + code + " * 4.0)"; break; - case 3: code = "(" + code + " * 8.0)"; break; - case 5: code = "(" + code + " / 2.0)"; break; - case 6: code = "(" + code + " / 4.0)"; break; - case 7: code = "(" + code + " / 8.0)"; break; - - default: - LOG_ERROR(RSX, "Bad scale: %d", fmt::by_value(src1.scale)); - Emu.Pause(); - break; - } - - if (dst.saturate) - { - code = functionName[FUNCTION_SATURATE] + "(" + code + ")"; - } - - code += (append_mask ? "$m" : ""); - - if (dst.no_dest) - { - if (dst.set_cond) - { - AddCode("$ifcond " + m_parr.AddParam(PF_PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + code + ";"); - } - else - { - AddCode("$ifcond " + code + ";"); - } - - return; - } - - std::string dest = AddReg(dst.dest_reg, dst.fp16) + "$m"; - - AddCodeCond(Format(dest), code); - //AddCode("$ifcond " + dest + code + (append_mask ? "$m;" : ";")); - - if (dst.set_cond) - { - AddCode(m_parr.AddParam(PF_PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";"); - } -} - -void FragmentDecompiler::AddCode(const std::string& code) -{ - main.append(m_code_level, '\t') += Format(code) + "\n"; -} - -std::string FragmentDecompiler::GetMask() -{ - std::string ret; - - static const char dst_mask[4] = - { - 'x', 'y', 'z', 'w', - }; - - if (dst.mask_x) ret += dst_mask[0]; - if (dst.mask_y) ret += dst_mask[1]; - if (dst.mask_z) ret += dst_mask[2]; - if (dst.mask_w) ret += dst_mask[3]; - - return ret.empty() || strncmp(ret.c_str(), dst_mask, 4) == 0 ? "" : ("." + ret); -} - -std::string FragmentDecompiler::AddReg(u32 index, int fp16) -{ - return m_parr.AddParam(PF_PARAM_NONE, typeName[3], std::string(fp16 ? "h" : "r") + std::to_string(index), typeName[3] + "(0.0)"); -} - -bool FragmentDecompiler::HasReg(u32 index, int fp16) -{ - return m_parr.HasParam(PF_PARAM_NONE, typeName[3], - std::string(fp16 ? "h" : "r") + std::to_string(index)); -} - -std::string FragmentDecompiler::AddCond() -{ - return m_parr.AddParam(PF_PARAM_NONE, typeName[3], "cc" + std::to_string(src0.cond_reg_index)); -} - -std::string FragmentDecompiler::AddConst() -{ - std::string name = std::string("fc") + std::to_string(m_size + 4 * 4); - if (m_parr.HasParam(PF_PARAM_UNIFORM, typeName[3], name)) - { - return name; - } - - auto data = vm::ptr::make(m_addr + m_size + 4 * sizeof(u32)); - - m_offset = 2 * 4 * sizeof(u32); - u32 x = GetData(data[0]); - u32 y = GetData(data[1]); - u32 z = GetData(data[2]); - u32 w = GetData(data[3]); - return m_parr.AddParam(PF_PARAM_UNIFORM, typeName[3], name, - std::string(typeName[3] + "(") + std::to_string((float&)x) + ", " + std::to_string((float&)y) - + ", " + std::to_string((float&)z) + ", " + std::to_string((float&)w) + ")"); -} - -std::string FragmentDecompiler::AddTex() -{ - return m_parr.AddParam(PF_PARAM_UNIFORM, "sampler2D", std::string("tex") + std::to_string(dst.tex_num)); -} - -std::string FragmentDecompiler::Format(const std::string& code) -{ - const std::pair> repl_list[] = - { - { "$$", []() -> std::string { return "$"; } }, - { "$0", [this]() -> std::string {return GetSRC(src0);} },//std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetSRC), *this, src0) }, - { "$1", [this]() -> std::string {return GetSRC(src1);} },//std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetSRC), this, src1) }, - { "$2", [this]() -> std::string {return GetSRC(src2);} },//std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetSRC), this, src2) }, - { "$t", std::bind(std::mem_fn(&FragmentDecompiler::AddTex), this) }, - { "$m", std::bind(std::mem_fn(&FragmentDecompiler::GetMask), this) }, - { "$ifcond ", [this]() -> std::string - { - const std::string& cond = GetCond(); - if (cond == "true") return ""; - return "if(" + cond + ") "; - } - }, - { "$cond", std::bind(std::mem_fn(&FragmentDecompiler::GetCond), this) }, - { "$c", std::bind(std::mem_fn(&FragmentDecompiler::AddConst), this) } - }; - - return fmt::replace_all(code, repl_list); -} - -std::string FragmentDecompiler::GetCond() -{ - if (src0.exec_if_gr && src0.exec_if_lt && src0.exec_if_eq) - { - return "true"; - } - else if (!src0.exec_if_gr && !src0.exec_if_lt && !src0.exec_if_eq) - { - return "false"; - } - - static const char f[4] = { 'x', 'y', 'z', 'w' }; - - std::string swizzle, cond; - swizzle += f[src0.cond_swizzle_x]; - swizzle += f[src0.cond_swizzle_y]; - swizzle += f[src0.cond_swizzle_z]; - swizzle += f[src0.cond_swizzle_w]; - swizzle = swizzle == "xyzw" ? "" : "." + swizzle; - - if (src0.exec_if_gr && src0.exec_if_eq) - { - cond = "greaterThanEqual"; - } - else if (src0.exec_if_lt && src0.exec_if_eq) - { - cond = "lessThanEqual"; - } - else if (src0.exec_if_gr && src0.exec_if_lt) - { - cond = "notEqual"; - } - else if (src0.exec_if_gr) - { - cond = "greaterThan"; - } - else if (src0.exec_if_lt) - { - cond = "lessThan"; - } - else //if(src0.exec_if_eq) - { - cond = "equal"; - } - - return "any(" + cond + "(" + AddCond() + swizzle + ", " + typeName[3] +"(0.0)))"; -} - -void FragmentDecompiler::AddCodeCond(const std::string& dst, const std::string& src) -{ - if (src0.exec_if_gr && src0.exec_if_lt && src0.exec_if_eq) - { - AddCode(dst + " = " + src + ";"); - return; - } - - if (!src0.exec_if_gr && !src0.exec_if_lt && !src0.exec_if_eq) - { - AddCode("//" + dst + " = " + src + ";"); - return; - } - - static const char f[4] = { 'x', 'y', 'z', 'w' }; - - std::string swizzle, cond; - swizzle += f[src0.cond_swizzle_x]; - swizzle += f[src0.cond_swizzle_y]; - swizzle += f[src0.cond_swizzle_z]; - swizzle += f[src0.cond_swizzle_w]; - swizzle = swizzle == "xyzw" ? "" : "." + swizzle; - - if (src0.exec_if_gr && src0.exec_if_eq) - { - cond = ">="; - } - else if (src0.exec_if_lt && src0.exec_if_eq) - { - cond = "<="; - } - else if (src0.exec_if_gr && src0.exec_if_lt) - { - cond = "!="; - } - else if (src0.exec_if_gr) - { - cond = ">"; - } - else if (src0.exec_if_lt) - { - cond = "<"; - } - else //if(src0.exec_if_eq) - { - cond = "=="; - } - - cond = "(" + AddCond() + swizzle + " " + cond + " " + typeName[3] + "(0., 0., 0., 0.))"; - - ShaderVariable dst_var(dst); - dst_var.symplify(); - - //const char *c_mask = f; - - if (dst_var.swizzles[0].length() == 1) - { - AddCode("if (" + cond + ".x) " + dst + " = " + src + ";"); - } - else - { - for (int i = 0; i < dst_var.swizzles[0].length(); ++i) - { - AddCode("if (" + cond + "." + f[i] + ") " + dst + "." + f[i] + " = " + src + "." + f[i] + ";"); - } - } -} - -template std::string FragmentDecompiler::GetSRC(T src) -{ - std::string ret; - - switch (src.reg_type) - { - case 0: //tmp - ret += AddReg(src.tmp_reg_index, src.fp16); - break; - - case 1: //input - { - static const std::string reg_table[] = - { - "gl_Position", - "diff_color", "spec_color", - "fogc", - "tc0", "tc1", "tc2", "tc3", "tc4", "tc5", "tc6", "tc7", "tc8", "tc9", - "ssa" - }; - - switch (dst.src_attr_reg_num) - { - case 0x00: ret += reg_table[0]; break; - default: - if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0])) - { - ret += m_parr.AddParam(PF_PARAM_IN, typeName[3], reg_table[dst.src_attr_reg_num]); - } - else - { - LOG_ERROR(RSX, "Bad src reg num: %d", fmt::by_value(dst.src_attr_reg_num)); - ret += m_parr.AddParam(PF_PARAM_IN, typeName[3], "unk"); - Emu.Pause(); - } - break; - } - } - break; - - case 2: //const - ret += AddConst(); - break; - - default: - LOG_ERROR(RSX, "Bad src type %d", fmt::by_value(src.reg_type)); - Emu.Pause(); - break; - } - - static const char f[4] = { 'x', 'y', 'z', 'w' }; - - std::string swizzle = ""; - swizzle += f[src.swizzle_x]; - swizzle += f[src.swizzle_y]; - swizzle += f[src.swizzle_z]; - swizzle += f[src.swizzle_w]; - - if (strncmp(swizzle.c_str(), f, 4) != 0) ret += "." + swizzle; - - if (src.abs) ret = "abs(" + ret + ")"; - if (src.neg) ret = "-" + ret; - - return ret; -} - -std::string FragmentDecompiler::BuildCode() -{ - //main += fmt::Format("\tgl_FragColor = %c0;\n", m_ctrl & 0x40 ? 'r' : 'h'); - - if (m_ctrl & 0xe) main += m_ctrl & 0x40 ? "\tgl_FragDepth = r1.z;\n" : "\tgl_FragDepth = h2.z;\n"; - - std::stringstream OS; - insertHeader(OS); - OS << std::endl; - insertConstants(OS); - OS << std::endl; - insertIntputs(OS); - OS << std::endl; - insertOutputs(OS); - OS << std::endl; - insertMainStart(OS); - OS << main << std::endl; - insertMainEnd(OS); - - return OS.str(); -} - -void FragmentDecompiler::insertHeader(std::stringstream & OS) -{ - OS << "// Header" << std::endl; -} - -void FragmentDecompiler::insertIntputs(std::stringstream & OS) -{ - OS << "struct PixelInput" << std::endl; - OS << "{" << std::endl; - OS << " float4 Position : SV_POSITION;" << std::endl; - OS << " float4 diff_color : COLOR0;" << std::endl; - OS << " float4 spec_color : COLOR1;" << std::endl; - OS << " float4 dst_reg3 : COLOR2;" << std::endl; - OS << " float4 dst_reg4 : COLOR3;" << std::endl; - OS << " float fogc : FOG;" << std::endl; - OS << " float4 dummy : COLOR4;" << std::endl; - OS << " float4 tc0 : TEXCOORD0;" << std::endl; - OS << " float4 tc1 : TEXCOORD1;" << std::endl; - OS << " float4 tc2 : TEXCOORD2;" << std::endl; - OS << " float4 tc3 : TEXCOORD3;" << std::endl; - OS << " float4 tc4 : TEXCOORD4;" << std::endl; - OS << " float4 tc5 : TEXCOORD5;" << std::endl; - OS << " float4 tc6 : TEXCOORD6;" << std::endl; - OS << " float4 tc7 : TEXCOORD7;" << std::endl; - OS << " float4 tc8 : TEXCOORD8;" << std::endl; - OS << "};" << std::endl; -} - -void FragmentDecompiler::insertOutputs(std::stringstream & OS) -{ - OS << "struct PixelOutput" << std::endl; - OS << "{" << std::endl; - const std::pair table[] = - { - { "ocol0", "r0" }, - { "ocol1", "r2" }, - { "ocol2", "r3" }, - { "ocol3", "r4" }, - }; - - for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) - { - if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], table[i].second)) - OS << " " << typeName[3] << " " << table[i].first << " : SV_TARGET" << i << ";" << std::endl; - } - OS << "};" << std::endl; -} - -void FragmentDecompiler::insertConstants(std::stringstream & OS) -{ - OS << "cbuffer CONSTANT : register(b2)" << std::endl; - OS << "{" << std::endl; - for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) - { - if (PT.type == "sampler2D") - continue; - for (ParamItem PI : PT.items) - OS << " " << PT.type << " " << PI.name << ";" << std::endl; - } - OS << "};" << std::endl << std::endl; - size_t textureIndex = 0; - for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) - { - if (PT.type != "sampler2D") - continue; - for (ParamItem PI : PT.items) - { - OS << "Texture2D " << PI.name << " : register(t" << textureIndex << ");" << std::endl; - OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl; - textureIndex++; - } - } -} - -void FragmentDecompiler::insertMainStart(std::stringstream & OS) -{ - OS << "PixelOutput main(PixelInput In)" << std::endl; - OS << "{" << std::endl; - for (ParamType PT : m_parr.params[PF_PARAM_IN]) - { - for (ParamItem PI : PT.items) - OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; - } - // Declare output - for (ParamType PT : m_parr.params[PF_PARAM_NONE]) - { - for (ParamItem PI : PT.items) - OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 0.);" << std::endl; - } -} - -void FragmentDecompiler::insertMainEnd(std::stringstream & OS) -{ - const std::pair table[] = - { - { "ocol0", "r0" }, - { "ocol1", "r2" }, - { "ocol2", "r3" }, - { "ocol3", "r4" }, - }; - - OS << " PixelOutput Out;" << std::endl; - for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) - { - if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], table[i].second)) - OS << " Out." << table[i].first << " = " << table[i].second << ";" << std::endl; - } - OS << " return Out;" << std::endl; - OS << "}" << std::endl; -} - -std::string FragmentDecompiler::Decompile() -{ - auto data = vm::ptr::make(m_addr); - m_size = 0; - m_location = 0; - m_loop_count = 0; - m_code_level = 1; - - enum - { - FORCE_NONE, - FORCE_SCT, - FORCE_SCB, - }; - - int forced_unit = FORCE_NONE; - - while (true) - { - for (auto finded = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); - finded != m_end_offsets.end(); - finded = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size)) - { - m_end_offsets.erase(finded); - m_code_level--; - AddCode("}"); - m_loop_count--; - } - - for (auto finded = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); - finded != m_else_offsets.end(); - finded = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size)) - { - m_else_offsets.erase(finded); - m_code_level--; - AddCode("}"); - AddCode("else"); - AddCode("{"); - m_code_level++; - } - - dst.HEX = GetData(data[0]); - src0.HEX = GetData(data[1]); - src1.HEX = GetData(data[2]); - src2.HEX = GetData(data[3]); - - m_offset = 4 * sizeof(u32); - - const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); - - auto SCT = [&]() - { - switch (opcode) - { - case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; - case RSX_FP_OPCODE_DIV: SetDst("($0 / $1)"); break; - case RSX_FP_OPCODE_DIVSQ: SetDst("($0 / sqrt($1))"); break; - case RSX_FP_OPCODE_DP2: SetDst(functionName[FUNCTION_DP2]); break; - case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; - case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; - case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; - case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; - case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; - case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; - case RSX_FP_OPCODE_MOV: SetDst("$0"); break; - case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; - case RSX_FP_OPCODE_RCP: SetDst("1 / $0"); break; - case RSX_FP_OPCODE_RSQ: SetDst("inversesqrt(abs($0))"); break; - case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; - case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; - case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; - case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; - case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; - case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; - - default: - return false; - } - - return true; - }; - - auto SCB = [&]() - { - switch (opcode) - { - case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; - case RSX_FP_OPCODE_COS: SetDst("cos($0)"); break; - case RSX_FP_OPCODE_DP2: SetDst("dot($0.xy, $1.xy).xxxx"); break; - case RSX_FP_OPCODE_DP3: SetDst("dot($0.xyz, $1.xyz).xxxx"); break; - case RSX_FP_OPCODE_DP4: SetDst("dot($0, $1).xxxx"); break; - case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; - case RSX_FP_OPCODE_DST: SetDst("vec4(distance($0, $1))"); break; - case RSX_FP_OPCODE_REFL: LOG_ERROR(RSX, "Unimplemented SCB instruction: REFL"); break; // TODO: Is this in the right category? - case RSX_FP_OPCODE_EX2: SetDst("exp2($0)"); break; - case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); break; - case RSX_FP_OPCODE_FRC: SetDst(functionName[FUNCTION_FRACT]); break; - case RSX_FP_OPCODE_LIT: SetDst("vec4(1.0, $0.x, ($0.x > 0.0 ? exp($0.w * log2($0.y)) : 0.0), 1.0)"); break; - case RSX_FP_OPCODE_LIF: SetDst("vec4(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); break; - case RSX_FP_OPCODE_LRP: LOG_ERROR(RSX, "Unimplemented SCB instruction: LRP"); break; // TODO: Is this in the right category? - case RSX_FP_OPCODE_LG2: SetDst("log2($0)"); break; - case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; - case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; - case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; - case RSX_FP_OPCODE_MOV: SetDst("$0"); break; - case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; - case RSX_FP_OPCODE_PK2: SetDst("packSnorm2x16($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) - case RSX_FP_OPCODE_PK4: SetDst("packSnorm4x8($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) - case RSX_FP_OPCODE_PK16: LOG_ERROR(RSX, "Unimplemented SCB instruction: PK16"); break; - case RSX_FP_OPCODE_PKB: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKB"); break; - case RSX_FP_OPCODE_PKG: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKG"); break; - case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; - case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; - case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; - case RSX_FP_OPCODE_SIN: SetDst("sin($0)"); break; - case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; - case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; - case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; - - default: - return false; - } - - return true; - }; - - auto TEX_SRB = [&]() - { - switch (opcode) - { - case RSX_FP_OPCODE_DDX: SetDst("dFdx($0)"); break; - case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; - case RSX_FP_OPCODE_NRM: SetDst("normalize($0)"); break; - case RSX_FP_OPCODE_BEM: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: BEM"); break; - case RSX_FP_OPCODE_TEX: SetDst("$t.Sample($tsampler, $0.xy)"); break; - case RSX_FP_OPCODE_TEXBEM: SetDst("texture($t, $0.xy, $1.x)"); break; - case RSX_FP_OPCODE_TXP: SetDst("textureProj($t, $0.xyz, $1.x)"); break; //TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478) and The Simpsons Arcade Game (NPUB30563)) - case RSX_FP_OPCODE_TXPBEM: SetDst("textureProj($t, $0.xyz, $1.x)"); break; - case RSX_FP_OPCODE_TXD: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: TXD"); break; - case RSX_FP_OPCODE_TXB: SetDst("texture($t, $0.xy, $1.x)"); break; - case RSX_FP_OPCODE_TXL: SetDst("textureLod($t, $0.xy, $1.x)"); break; - case RSX_FP_OPCODE_UP2: SetDst("unpackSnorm2x16($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) - case RSX_FP_OPCODE_UP4: SetDst("unpackSnorm4x8($0)"); break; // TODO: More testing (Sonic The Hedgehog (NPUB-30442/NPEB-00478)) - case RSX_FP_OPCODE_UP16: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UP16"); break; - case RSX_FP_OPCODE_UPB: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPB"); break; - case RSX_FP_OPCODE_UPG: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPG"); break; - - default: - return false; - } - - return true; - }; - - auto SIP = [&]() - { - switch (opcode) - { - case RSX_FP_OPCODE_BRK: SetDst("break"); break; - case RSX_FP_OPCODE_CAL: LOG_ERROR(RSX, "Unimplemented SIP instruction: CAL"); break; - case RSX_FP_OPCODE_FENCT: forced_unit = FORCE_SCT; break; - case RSX_FP_OPCODE_FENCB: forced_unit = FORCE_SCB; break; - case RSX_FP_OPCODE_IFE: - AddCode("if($cond)"); - m_else_offsets.push_back(src1.else_offset << 2); - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - break; - case RSX_FP_OPCODE_LOOP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_REP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::Format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_RET: SetDst("return"); break; - - default: - return false; - } - - return true; - }; - - switch (opcode) - { - case RSX_FP_OPCODE_NOP: break; - case RSX_FP_OPCODE_KIL: SetDst("discard", false); break; - - default: - if (forced_unit == FORCE_NONE) - { - if (SIP()) break; - if (SCT()) break; - if (TEX_SRB()) break; - if (SCB()) break; - } - else if (forced_unit == FORCE_SCT) - { - forced_unit = FORCE_NONE; - if (SCT()) break; - } - else if (forced_unit == FORCE_SCB) - { - forced_unit = FORCE_NONE; - if (SCB()) break; - } - - LOG_ERROR(RSX, "Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, forced_unit); - break; - } - - m_size += m_offset; - - if (dst.end) break; - - assert(m_offset % sizeof(u32) == 0); - data += m_offset / sizeof(u32); - } - - // flush m_code_level - m_code_level = 1; - std::string m_shader = BuildCode(); - main.clear(); -// m_parr.params.clear(); - return m_shader; -} - -#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h deleted file mode 100644 index c0642a2051..0000000000 --- a/rpcs3/Emu/RSX/D3D12/FragmentProgramDecompiler.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once -#if defined(DX12_SUPPORT) -#include "ShaderParam.h" -#include "Emu/RSX/RSXFragmentProgram.h" -#include - -class FragmentDecompiler -{ - std::string main; - u32 m_addr; - u32& m_size; - u32 m_const_index; - u32 m_offset; - u32 m_location; - u32 m_ctrl; - u32 m_loop_count; - int m_code_level; - std::vector m_end_offsets; - std::vector m_else_offsets; - - std::string GetMask(); - - void SetDst(std::string code, bool append_mask = true); - void AddCode(const std::string& code); - std::string AddReg(u32 index, int fp16); - bool HasReg(u32 index, int fp16); - std::string AddCond(); - std::string AddConst(); - std::string AddTex(); - std::string Format(const std::string& code); - - void AddCodeCond(const std::string& dst, const std::string& src); - std::string GetCond(); - template std::string GetSRC(T src); - std::string BuildCode(); - - u32 GetData(const u32 d) const { return d << 16 | d >> 16; } -protected: - virtual void insertHeader(std::stringstream &OS); - virtual void insertIntputs(std::stringstream &OS); - virtual void insertOutputs(std::stringstream &OS); - virtual void insertConstants(std::stringstream &OS); - virtual void insertMainStart(std::stringstream &OS); - virtual void insertMainEnd(std::stringstream &OS); -public: - ParamArray m_parr; - FragmentDecompiler(u32 addr, u32& size, u32 ctrl); - std::string Decompile(); -}; - -#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/ShaderParam.cpp b/rpcs3/Emu/RSX/D3D12/ShaderParam.cpp deleted file mode 100644 index a28d9bf21b..0000000000 --- a/rpcs3/Emu/RSX/D3D12/ShaderParam.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include "stdafx.h" -#if defined(DX12_SUPPORT) -#include "ShaderParam.h" - - -#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/ShaderParam.h b/rpcs3/Emu/RSX/D3D12/ShaderParam.h deleted file mode 100644 index c063a9e793..0000000000 --- a/rpcs3/Emu/RSX/D3D12/ShaderParam.h +++ /dev/null @@ -1,194 +0,0 @@ -#pragma once -#if defined(DX12_SUPPORT) -#include -#include - -enum ParamFlag -{ - PF_PARAM_IN, - PF_PARAM_OUT, - PF_PARAM_UNIFORM, - PF_PARAM_CONST, - PF_PARAM_NONE, - PF_PARAM_COUNT, -}; - -struct ParamItem -{ - std::string name; - std::string value; - int location; - - ParamItem(const std::string& _name, int _location, const std::string& _value = "") - : name(_name) - , value(_value), - location(_location) - { } -}; - -struct ParamType -{ - const ParamFlag flag; - std::string type; - std::vector items; - - ParamType(const ParamFlag _flag, const std::string& _type) - : flag(_flag) - , type(_type) - { - } - - bool SearchName(const std::string& name) - { - for (u32 i = 0; i params[PF_PARAM_COUNT]; - - ParamType* SearchParam(const ParamFlag &flag, const std::string& type) - { - for (u32 i = 0; iSearchName(name); - } - - std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, const std::string& value) - { - ParamType* t = SearchParam(flag, type); - - if (t) - { - if (!t->SearchName(name)) t->items.emplace_back(name, -1, value); - } - else - { - params[flag].emplace_back(flag, type); - params[flag].back().items.emplace_back(name, -1, value); - } - - return name; - } - - std::string AddParam(const ParamFlag flag, std::string type, const std::string& name, int location = -1) - { - ParamType* t = SearchParam(flag, type); - - if (t) - { - if (!t->SearchName(name)) t->items.emplace_back(name, location); - } - else - { - params[flag].emplace_back(flag, type); - params[flag].back().items.emplace_back(name, location); - } - - return name; - } -}; - -class ShaderVariable -{ -public: - std::string name; - std::vector swizzles; - - ShaderVariable() = default; - ShaderVariable(const std::string& var) - { - auto var_blocks = fmt::split(var, { "." }); - - if (var_blocks.size() == 0) - { - assert(0); - } - - name = var_blocks[0]; - - if (var_blocks.size() == 1) - { - swizzles.push_back("xyzw"); - } - else - { - swizzles = std::vector(var_blocks.begin() + 1, var_blocks.end()); - } - } - - size_t get_vector_size() const - { - return swizzles[swizzles.size() - 1].length(); - } - - ShaderVariable& symplify() - { - std::unordered_map swizzle; - - static std::unordered_map pos_to_swizzle = - { - { 0, 'x' }, - { 1, 'y' }, - { 2, 'z' }, - { 3, 'w' } - }; - - for (auto &i : pos_to_swizzle) - { - swizzle[i.second] = swizzles[0].length() > i.first ? swizzles[0][i.first] : 0; - } - - for (int i = 1; i < swizzles.size(); ++i) - { - std::unordered_map new_swizzle; - - for (auto &sw : pos_to_swizzle) - { - new_swizzle[sw.second] = swizzle[swizzles[i].length() <= sw.first ? '\0' : swizzles[i][sw.first]]; - } - - swizzle = new_swizzle; - } - - swizzles.clear(); - std::string new_swizzle; - - for (auto &i : pos_to_swizzle) - { - if (swizzle[i.second] != '\0') - new_swizzle += swizzle[i.second]; - } - - swizzles.push_back(new_swizzle); - - return *this; - } - - std::string get() const - { - if (swizzles.size() == 1 && swizzles[0] == "xyzw") - { - return name; - } - - return name + "." + fmt::merge({ swizzles }, "."); - } -}; -#endif \ No newline at end of file diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 5ff058f7d5..88d174a108 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -42,12 +42,11 @@ + - - - + @@ -506,12 +505,11 @@ + - - - + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index cc40c3f295..21b05ad7e5 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -980,13 +980,10 @@ Emu\GPU\RSX\D3D12 - + Emu\GPU\RSX\D3D12 - - Emu\GPU\RSX\D3D12 - - + Emu\GPU\RSX\D3D12 @@ -1864,13 +1861,10 @@ Emu\GPU\RSX\D3D12 - + Emu\GPU\RSX\D3D12 - - Emu\GPU\RSX\D3D12 - - + Emu\GPU\RSX\D3D12 From bf8a48e3bd87008e084cc78592c51a1fd10f1710 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 18:38:59 +0200 Subject: [PATCH 078/343] d3d12: Some work to decompiler --- .../D3D12/D3D12FragmentProgramDecompiler.cpp | 53 ++++++++++++------- .../D3D12/D3D12FragmentProgramDecompiler.h | 3 ++ 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index c75b9ddfff..38f1c88e8e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -6,27 +6,44 @@ #include "Emu/Memory/Memory.h" #include "Emu/System.h" -static std::string typeName[] = -{ - "float", - "float2", - "float3", - "float4" -}; - -static std::string functionName[] = -{ - "saturate", - "float4(dot($0.xy, $1.xy), dot($0.xy, $1.xy), dot($0.xy, $1.xy), dot($0.xy, $1.xy))", - "frac($0)", -}; - D3D12FragmentDecompiler::D3D12FragmentDecompiler(u32 addr, u32& size, u32 ctrl) : FragmentProgramDecompiler(addr, size, ctrl) { } +std::string D3D12FragmentDecompiler::getFloatTypeName(size_t elementCount) +{ + switch (elementCount) + { + default: + abort(); + case 1: + return "float"; + case 2: + return "float2"; + case 3: + return "float3"; + case 4: + return "float4"; + } +} + +std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) +{ + switch (f) + { + default: + abort(); + case FUNCTION::FUNCTION_SATURATE: + return "saturate"; + case FUNCTION::FUNCTION_DP2: + return "dot($0.xy, $1.xy).xxxx"; + case FUNCTION::FUNCTION_FRACT: + return "frac($0)"; + } +} + void D3D12FragmentDecompiler::insertHeader(std::stringstream & OS) { OS << "// Header" << std::endl; @@ -69,8 +86,8 @@ void D3D12FragmentDecompiler::insertOutputs(std::stringstream & OS) for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) { - if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], table[i].second)) - OS << " " << typeName[3] << " " << table[i].first << " : SV_TARGET" << i << ";" << std::endl; + if (m_parr.HasParam(PF_PARAM_NONE, "float4", table[i].second)) + OS << " " << "float4" << " " << table[i].first << " : SV_TARGET" << i << ";" << std::endl; } OS << "};" << std::endl; } @@ -131,7 +148,7 @@ void D3D12FragmentDecompiler::insertMainEnd(std::stringstream & OS) OS << " PixelOutput Out;" << std::endl; for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) { - if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], table[i].second)) + if (m_parr.HasParam(PF_PARAM_NONE, "float4", table[i].second)) OS << " Out." << table[i].first << " = " << table[i].second << ";" << std::endl; } OS << " return Out;" << std::endl; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h index 94d0985d9e..a32a2654f2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h @@ -8,6 +8,9 @@ class D3D12FragmentDecompiler : public FragmentProgramDecompiler { protected: + virtual std::string getFloatTypeName(size_t elementCount) override; + virtual std::string getFunction(enum class FUNCTION) override; + virtual void insertHeader(std::stringstream &OS) override; virtual void insertIntputs(std::stringstream &OS) override; virtual void insertOutputs(std::stringstream &OS) override; From 72c84cb8aa51261e093c848190a17387f5fa4957 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 18:45:50 +0200 Subject: [PATCH 079/343] d3d12: Add some more functions --- .../D3D12/D3D12FragmentProgramDecompiler.cpp | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index 38f1c88e8e..df7cfb1ea9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -39,6 +39,28 @@ std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) return "saturate"; case FUNCTION::FUNCTION_DP2: return "dot($0.xy, $1.xy).xxxx"; + case FUNCTION::FUNCTION_DP2A: + return ""; + case FUNCTION::FUNCTION_DP3: + return "dot($0.xyz, $1.xyz).xxxx"; + case FUNCTION::FUNCTION_DP4: + return "dot($0, $1).xxxx"; + case FUNCTION::FUNCTION_SEQ: + return "($0 == $1).xxxx"; + case FUNCTION::FUNCTION_SFL: + return "float4(0., 0., 0., 0.)"; + case FUNCTION::FUNCTION_SGE: + return "($0 >= $1).xxxx"; + case FUNCTION::FUNCTION_SGT: + return "($0 > $1).xxxx"; + case FUNCTION::FUNCTION_SLE: + return "($0 <= $1).xxxx"; + case FUNCTION::FUNCTION_SLT: + return "($0 < $1).xxxx"; + case FUNCTION::FUNCTION_SNE: + return "($0 != $1).xxxx"; + case FUNCTION::FUNCTION_STR: + return "float4(1., 1., 1., 1.)"; case FUNCTION::FUNCTION_FRACT: return "frac($0)"; } From 07449abfc68a41dfd18fb76324c77018179dd1ef Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 18:57:08 +0200 Subject: [PATCH 080/343] d3d12: Add more functions --- rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index df7cfb1ea9..84149928e2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -63,6 +63,12 @@ std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) return "float4(1., 1., 1., 1.)"; case FUNCTION::FUNCTION_FRACT: return "frac($0)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE: + return "$t.Sample($tsampler, $0.xy)"; + case FUNCTION::FUNCTION_DFDX: + return "ddx($0)"; + case FUNCTION::FUNCTION_DFDY: + return "ddy($0)"; } } From 05d6c8df602eeb1a6263db85b63b340b02b821f2 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 19:43:16 +0200 Subject: [PATCH 081/343] d3d12: Update --- rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp | 7 +++++-- rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index 84149928e2..6c84fd518f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -35,8 +35,6 @@ std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) { default: abort(); - case FUNCTION::FUNCTION_SATURATE: - return "saturate"; case FUNCTION::FUNCTION_DP2: return "dot($0.xy, $1.xy).xxxx"; case FUNCTION::FUNCTION_DP2A: @@ -72,6 +70,11 @@ std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) } } +std::string D3D12FragmentDecompiler::saturate(const std::string & code) +{ + return "saturate(" + code + ")"; +} + void D3D12FragmentDecompiler::insertHeader(std::stringstream & OS) { OS << "// Header" << std::endl; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h index a32a2654f2..69aeb93181 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h @@ -10,6 +10,7 @@ class D3D12FragmentDecompiler : public FragmentProgramDecompiler protected: virtual std::string getFloatTypeName(size_t elementCount) override; virtual std::string getFunction(enum class FUNCTION) override; + virtual std::string saturate(const std::string &code) override; virtual void insertHeader(std::stringstream &OS) override; virtual void insertIntputs(std::stringstream &OS) override; From 5681781ab03ece85ea49e5779381bfe575ba84ce Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 19:43:28 +0200 Subject: [PATCH 082/343] GL: Forgot hunk --- rpcs3/Emu/RSX/CgBinaryProgram.h | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/CgBinaryProgram.h b/rpcs3/Emu/RSX/CgBinaryProgram.h index 0e1b4c858d..87bb3cedc5 100644 --- a/rpcs3/Emu/RSX/CgBinaryProgram.h +++ b/rpcs3/Emu/RSX/CgBinaryProgram.h @@ -332,6 +332,7 @@ public: else { + GLParamArray param_array; auto& vprog = GetCgRef(prog.program); m_arb_shader += "\n"; m_arb_shader += fmt::format("# binaryFormatRevision 0x%x\n", (u32)prog.binaryFormatRevision); From d2beafca4de19bd5b6af18ceb4d53ab655b300e4 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 19 May 2015 20:21:37 +0200 Subject: [PATCH 083/343] d3d12: Update --- .../D3D12/D3D12FragmentProgramDecompiler.cpp | 33 ++++++++++++------- .../D3D12/D3D12FragmentProgramDecompiler.h | 1 + 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index 6c84fd518f..1d8d3cbf8d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -43,20 +43,8 @@ std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) return "dot($0.xyz, $1.xyz).xxxx"; case FUNCTION::FUNCTION_DP4: return "dot($0, $1).xxxx"; - case FUNCTION::FUNCTION_SEQ: - return "($0 == $1).xxxx"; case FUNCTION::FUNCTION_SFL: return "float4(0., 0., 0., 0.)"; - case FUNCTION::FUNCTION_SGE: - return "($0 >= $1).xxxx"; - case FUNCTION::FUNCTION_SGT: - return "($0 > $1).xxxx"; - case FUNCTION::FUNCTION_SLE: - return "($0 <= $1).xxxx"; - case FUNCTION::FUNCTION_SLT: - return "($0 < $1).xxxx"; - case FUNCTION::FUNCTION_SNE: - return "($0 != $1).xxxx"; case FUNCTION::FUNCTION_STR: return "float4(1., 1., 1., 1.)"; case FUNCTION::FUNCTION_FRACT: @@ -75,6 +63,27 @@ std::string D3D12FragmentDecompiler::saturate(const std::string & code) return "saturate(" + code + ")"; } +std::string D3D12FragmentDecompiler::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) +{ + switch (f) + { + default: + abort(); + case COMPARE::FUNCTION_SEQ: + return "(" + Op0 + " == " + Op1 + ".xxxx"; + case COMPARE::FUNCTION_SGE: + return "(" + Op0 + " >= " + Op1 +").xxxx"; + case COMPARE::FUNCTION_SGT: + return "(" + Op0 + " > " + Op1 + ").xxxx"; + case COMPARE::FUNCTION_SLE: + return "(" + Op0 + " <= " + Op1 + ").xxxx"; + case COMPARE::FUNCTION_SLT: + return "(" + Op0 + " < " + Op1 + ").xxxx"; + case COMPARE::FUNCTION_SNE: + return "(" + Op0 + " != " + Op1 + ").xxxx"; + } +} + void D3D12FragmentDecompiler::insertHeader(std::stringstream & OS) { OS << "// Header" << std::endl; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h index 69aeb93181..70b151a73f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h @@ -11,6 +11,7 @@ protected: virtual std::string getFloatTypeName(size_t elementCount) override; virtual std::string getFunction(enum class FUNCTION) override; virtual std::string saturate(const std::string &code) override; + virtual std::string compareFunction(enum class COMPARE, const std::string &, const std::string &) override; virtual void insertHeader(std::stringstream &OS) override; virtual void insertIntputs(std::stringstream &OS) override; From 422a9f1fdc84f4dd4ffc1f1412617545857abe07 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 20 May 2015 01:21:27 +0200 Subject: [PATCH 084/343] d3d12: Use VertexProgramDecompiler --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 2 +- .../D3D12/D3D12VertexProgramDecompiler.cpp | 747 +----------------- .../RSX/D3D12/D3D12VertexProgramDecompiler.h | 67 +- 3 files changed, 50 insertions(+), 766 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 60eaad2750..e03bd038f8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -83,7 +83,7 @@ struct D3D12Traits static void RecompileVertexProgram(RSXVertexProgram *RSXVP, VertexProgramData& vertexProgramData, size_t ID) { - VertexDecompiler VS(RSXVP->data); + D3D12VertexProgramDecompiler VS(RSXVP->data); std::string shaderCode = VS.Decompile(); vertexProgramData.Compile(shaderCode, Shader::SHADER_TYPE::SHADER_TYPE_VERTEX); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 015307e8d6..7afbf4bab5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -1,448 +1,57 @@ #include "stdafx.h" #if defined(DX12_SUPPORT) #include "D3D12VertexProgramDecompiler.h" - #include "Utilities/Log.h" #include "Emu/System.h" -static std::string typeName[] = -{ - "float", - "float2", - "float3", - "float4" -}; -std::string VertexDecompiler::GetMask(bool is_sca) +std::string D3D12VertexProgramDecompiler::getFloatTypeName(size_t elementCount) { - std::string ret; - - if (is_sca) + switch (elementCount) { - if (d3.sca_writemask_x) ret += "x"; - if (d3.sca_writemask_y) ret += "y"; - if (d3.sca_writemask_z) ret += "z"; - if (d3.sca_writemask_w) ret += "w"; - } - else - { - if (d3.vec_writemask_x) ret += "x"; - if (d3.vec_writemask_y) ret += "y"; - if (d3.vec_writemask_z) ret += "z"; - if (d3.vec_writemask_w) ret += "w"; - } - - return ret.empty() || ret == "xyzw" ? "" : ("." + ret); -} - -std::string VertexDecompiler::GetVecMask() -{ - return GetMask(false); -} - -std::string VertexDecompiler::GetScaMask() -{ - return GetMask(true); -} - -std::string VertexDecompiler::GetDST(bool isSca) -{ - std::string ret; - - switch (isSca ? 0x1f : d3.dst) - { - case 0x1f: - ret += m_parr.AddParam(PF_PARAM_NONE, typeName[3], std::string("tmp") + std::to_string(isSca ? d3.sca_dst_tmp : d0.dst_tmp)); - break; - default: - if (d3.dst > 15) - LOG_ERROR(RSX, fmt::Format("dst index out of range: %u", d3.dst)); - ret += m_parr.AddParam(PF_PARAM_NONE, typeName[3], std::string("dst_reg") + std::to_string(d3.dst), d3.dst == 0 ? typeName[3] + "(0.0f, 0.0f, 0.0f, 1.0f)" : typeName[3] + "(0.0, 0.0, 0.0, 0.0)"); - break; + abort(); + case 1: + return "float"; + case 2: + return "float2"; + case 3: + return "float3"; + case 4: + return "float4"; } - - return ret; } -std::string VertexDecompiler::GetSRC(const u32 n) +std::string D3D12VertexProgramDecompiler::getFunction(enum class FUNCTION f) { - static const std::string reg_table[] = + switch (f) { - "in_pos", "in_weight", "in_normal", - "in_diff_color", "in_spec_color", - "in_fog", - "in_point_size", "in_7", - "in_tc0", "in_tc1", "in_tc2", "in_tc3", - "in_tc4", "in_tc5", "in_tc6", "in_tc7" - }; - - std::string ret; - - switch (src[n].reg_type) - { - case 1: //temp - ret += m_parr.AddParam(PF_PARAM_NONE, typeName[3], "tmp" + std::to_string(src[n].tmp_src)); - break; - case 2: //input - if (d1.input_src < (sizeof(reg_table) / sizeof(reg_table[0]))) - { - ret += m_parr.AddParam(PF_PARAM_IN, typeName[3], reg_table[d1.input_src], d1.input_src); - } - else - { - LOG_ERROR(RSX, "Bad input src num: %d", fmt::by_value(d1.input_src)); - ret += m_parr.AddParam(PF_PARAM_IN, typeName[3], "in_unk", d1.input_src); - } - break; - case 3: //const - m_parr.AddParam(PF_PARAM_UNIFORM, typeName[3], std::string("vc[468]")); - ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]"; - break; - default: - LOG_ERROR(RSX, fmt::Format("Bad src%u reg type: %d", n, fmt::by_value(src[n].reg_type))); - Emu.Pause(); - break; - } - - static const std::string f = "xyzw"; - - std::string swizzle; - - swizzle += f[src[n].swz_x]; - swizzle += f[src[n].swz_y]; - swizzle += f[src[n].swz_z]; - swizzle += f[src[n].swz_w]; - - if (swizzle != f) ret += '.' + swizzle; - - bool abs; - - switch (n) - { - case 0: abs = d0.src0_abs; break; - case 1: abs = d0.src1_abs; break; - case 2: abs = d0.src2_abs; break; - } - - if (abs) ret = "abs(" + ret + ")"; - if (src[n].neg) ret = "-" + ret; - - return ret; -} - -void VertexDecompiler::SetDST(bool is_sca, std::string value) -{ - if (d0.cond == 0) return; - - enum - { - lt = 0x1, - eq = 0x2, - gt = 0x4, - }; - - std::string mask = GetMask(is_sca); - - value += mask; - - if (is_sca && d0.vec_result) - { - //value = "vec4(" + value + ")"; - } - - if (d0.staturate) - { - value = "clamp(" + value + ", 0.0, 1.0)"; - } - - std::string dest; - - if (d0.cond_update_enable_0 && d0.cond_update_enable_1) - { - dest = m_parr.AddParam(PF_PARAM_NONE, typeName[3], "cc" + std::to_string(d0.cond_reg_sel_1), typeName[3] + "(0.0)") + mask; - } - else if (d3.dst != 0x1f || (is_sca ? d3.sca_dst_tmp != 0x3f : d0.dst_tmp != 0x3f)) - { - dest = GetDST(is_sca) + mask; - } - - //std::string code; - //if (d0.cond_test_enable) - // code += "$ifcond "; - //code += dest + value; - //AddCode(code + ";"); - - AddCodeCond(Format(dest), value); -} - -std::string VertexDecompiler::GetFunc() -{ - std::string name = "func$a"; - - for (const auto& func : m_funcs) { - if (func.name.compare(name) == 0) { - return name + "()"; - } - } - - m_funcs.emplace_back(); - FuncInfo &idx = m_funcs.back(); - idx.offset = GetAddr(); - idx.name = name; - - return name + "()"; -} - -std::string VertexDecompiler::GetTex() -{ - return m_parr.AddParam(PF_PARAM_UNIFORM, "sampler2D", std::string("vtex") + std::to_string(/*?.tex_num*/0)); -} - -std::string VertexDecompiler::Format(const std::string& code) -{ - const std::pair> repl_list[] = - { - { "$$", []() -> std::string { return "$"; } }, - { "$0", std::bind(std::mem_fn(&VertexDecompiler::GetSRC), this, 0) }, - { "$1", std::bind(std::mem_fn(&VertexDecompiler::GetSRC), this, 1) }, - { "$2", std::bind(std::mem_fn(&VertexDecompiler::GetSRC), this, 2) }, - { "$s", std::bind(std::mem_fn(&VertexDecompiler::GetSRC), this, 2) }, - { "$am", std::bind(std::mem_fn(&VertexDecompiler::AddAddrMask), this) }, - { "$a", std::bind(std::mem_fn(&VertexDecompiler::AddAddrReg), this) }, - - { "$t", std::bind(std::mem_fn(&VertexDecompiler::GetTex), this) }, - - { "$fa", [this]()->std::string { return std::to_string(GetAddr()); } }, - { "$f()", std::bind(std::mem_fn(&VertexDecompiler::GetFunc), this) }, - { "$ifcond ", [this]() -> std::string - { - const std::string& cond = GetCond(); - if (cond == "true") return ""; - return "if(" + cond + ") "; - } - }, - { "$cond", std::bind(std::mem_fn(&VertexDecompiler::GetCond), this) } - }; - - return fmt::replace_all(code, repl_list); -} - -std::string VertexDecompiler::GetCond() -{ - enum - { - lt = 0x1, - eq = 0x2, - gt = 0x4, - }; - - if (d0.cond == 0) return "false"; - if (d0.cond == (lt | gt | eq)) return "true"; - - static const char* cond_string_table[(lt | gt | eq) + 1] = - { - "error", - "lessThan", - "equal", - "lessThanEqual", - "greaterThan", - "notEqual", - "greaterThanEqual", - "error" - }; - - static const char f[4] = { 'x', 'y', 'z', 'w' }; - - std::string swizzle; - swizzle += f[d0.mask_x]; - swizzle += f[d0.mask_y]; - swizzle += f[d0.mask_z]; - swizzle += f[d0.mask_w]; - - swizzle = swizzle == "xyzw" ? "" : "." + swizzle; - - return fmt::Format("any(%s(cc%d%s, vec4(0.0)%s))", cond_string_table[d0.cond], d0.cond_reg_sel_1, swizzle.c_str(), swizzle.c_str()); -} - -void VertexDecompiler::AddCodeCond(const std::string& dst, const std::string& src) -{ - enum - { - lt = 0x1, - eq = 0x2, - gt = 0x4, - }; - - - if (!d0.cond_test_enable || d0.cond == (lt | gt | eq)) - { - AddCode(dst + " = " + src + ";"); - return; - } - - if (d0.cond == 0) - { - AddCode("//" + dst + " = " + src + ";"); - return; - } - - static const char* cond_string_table[(lt | gt | eq) + 1] = - { - "error", - "<", - "==", - "<=", - ">", - "!=", - ">=", - "error" - }; - - static const char f[4] = { 'x', 'y', 'z', 'w' }; - - std::string swizzle; - swizzle += f[d0.mask_x]; - swizzle += f[d0.mask_y]; - swizzle += f[d0.mask_z]; - swizzle += f[d0.mask_w]; - - swizzle = swizzle == "xyzw" ? "" : "." + swizzle; - - std::string cond = fmt::Format("(cc%d%s %s float4(0., 0., 0., 0.))", d0.cond_reg_sel_1, swizzle.c_str(), cond_string_table[d0.cond]); - - ShaderVariable dst_var(dst); - dst_var.symplify(); - - //const char *c_mask = f; - - if (dst_var.swizzles[0].length() == 1) - { - AddCode("if (" + cond + ".x) " + dst + " = " + typeName[3] + "(" + src + ".xxxx).x;"); - } - else - { - for (int i = 0; i < dst_var.swizzles[0].length(); ++i) - { - AddCode("if (" + cond + "." + f[i] + ") " + dst + "." + f[i] + " = " + src + "." + f[i] + ";"); - } + abort(); + case FUNCTION::FUNCTION_DP2: + return "dot($0.xy, $1.xy).xxxx"; + case FUNCTION::FUNCTION_DP2A: + return ""; + case FUNCTION::FUNCTION_DP3: + return "dot($0.xyz, $1.xyz).xxxx"; + case FUNCTION::FUNCTION_DP4: + return "dot($0, $1).xxxx"; + case FUNCTION::FUNCTION_SFL: + return "float4(0., 0., 0., 0.)"; + case FUNCTION::FUNCTION_STR: + return "float4(1., 1., 1., 1.)"; + case FUNCTION::FUNCTION_FRACT: + return "frac($0)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE: + return "$t.Sample($tsampler, $0.xy)"; + case FUNCTION::FUNCTION_DFDX: + return "ddx($0)"; + case FUNCTION::FUNCTION_DFDY: + return "ddy($0)"; } } - -std::string VertexDecompiler::AddAddrMask() -{ - static const char f[] = { 'x', 'y', 'z', 'w' }; - return std::string(".") + f[d0.addr_swz]; -} - -std::string VertexDecompiler::AddAddrReg() -{ - static const char f[] = { 'x', 'y', 'z', 'w' }; - return m_parr.AddParam(PF_PARAM_NONE, "ivec4", "a" + std::to_string(d0.addr_reg_sel_1), "ivec4(0)") + AddAddrMask(); -} - -u32 VertexDecompiler::GetAddr() -{ - return (d2.iaddrh << 3) | d3.iaddrl; -} - -void VertexDecompiler::AddCode(const std::string& code) -{ - m_body.push_back(Format(code) + ";"); - m_cur_instr->body.push_back(Format(code)); -} - -void VertexDecompiler::SetDSTVec(const std::string& code) -{ - SetDST(false, code); -} - -void VertexDecompiler::SetDSTSca(const std::string& code) -{ - SetDST(true, code); -} - -std::string VertexDecompiler::BuildFuncBody(const FuncInfo& func) -{ - std::string result; - - for (uint i = func.offset; i= 1); - for (int j = 0; j < m_instructions[i].put_close_scopes; ++j) - { - --lvl; - if (lvl < 1) lvl = 1; - main_body.append(lvl, '\t') += "}\n"; - } - - for (int j = 0; j < m_instructions[i].do_count; ++j) - { - main_body.append(lvl, '\t') += "do\n"; - main_body.append(lvl, '\t') += "{\n"; - lvl++; - } - - for (uint j = 0; j < m_instructions[i].body.size(); ++j) - { - main_body.append(lvl, '\t') += m_instructions[i].body[j] + "\n"; - } - - lvl += m_instructions[i].open_scopes; - } - - std::stringstream OS; - insertHeader(OS); - - insertInputs(OS, m_parr.params[PF_PARAM_IN]); - OS << std::endl; - insertOutputs(OS, m_parr.params[PF_PARAM_NONE]); - OS << std::endl; - insertConstants(OS, m_parr.params[PF_PARAM_UNIFORM]); - OS << std::endl; - - insertMainStart(OS); - OS << main_body.c_str() << std::endl; - insertMainEnd(OS); - - return OS.str(); -} - -void VertexDecompiler::insertHeader(std::stringstream &OS) +void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS) { OS << "cbuffer SCALE_OFFSET : register(b0)" << std::endl; OS << "{" << std::endl; @@ -450,7 +59,7 @@ void VertexDecompiler::insertHeader(std::stringstream &OS) OS << "};" << std::endl; } -void VertexDecompiler::insertInputs(std::stringstream & OS, const std::vector& inputs) +void D3D12VertexProgramDecompiler::insertInputs(std::stringstream & OS, const std::vector& inputs) { OS << "struct VertexInput" << std::endl; OS << "{" << std::endl; @@ -462,7 +71,7 @@ void VertexDecompiler::insertInputs(std::stringstream & OS, const std::vector & constants) +void D3D12VertexProgramDecompiler::insertConstants(std::stringstream & OS, const std::vector & constants) { OS << "cbuffer CONSTANT_BUFFER : register(b1)" << std::endl; OS << "{" << std::endl; @@ -474,7 +83,7 @@ void VertexDecompiler::insertConstants(std::stringstream & OS, const std::vector OS << "};" << std::endl; } -void VertexDecompiler::insertOutputs(std::stringstream & OS, const std::vector & outputs) +void D3D12VertexProgramDecompiler::insertOutputs(std::stringstream & OS, const std::vector & outputs) { OS << "struct PixelInput" << std::endl; OS << "{" << std::endl; @@ -533,7 +142,7 @@ static const reg_info reg_table[] = { "tc9", true, "dst_reg6", "", false } // In this line, dst_reg6 is correct since dst_reg goes from 0 to 15. }; -void VertexDecompiler::insertMainStart(std::stringstream & OS) +void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) { OS << "PixelInput main(VertexInput In)" << std::endl; OS << "{" << std::endl; @@ -550,296 +159,26 @@ void VertexDecompiler::insertMainStart(std::stringstream & OS) for (const ParamItem &PI : PT.items) OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; } - - } -void VertexDecompiler::insertMainEnd(std::stringstream & OS) +void D3D12VertexProgramDecompiler::insertMainEnd(std::stringstream & OS) { OS << " PixelInput Out;" << std::endl; // Declare inside main function for (auto &i : reg_table) { - if (m_parr.HasParam(PF_PARAM_NONE, typeName[3], i.src_reg)) + if (m_parr.HasParam(PF_PARAM_NONE, "float4", i.src_reg)) OS << " Out." << i.src_reg << " = " << i.src_reg << ";" << std::endl; } - // TODO: Find why I need to do this -// OS << " Out.dst_reg0.z *= -1.;" << std::endl; OS << " Out.dst_reg0 = mul(Out.dst_reg0, scaleOffsetMat);" << std::endl; OS << " return Out;" << std::endl; OS << "}" << std::endl; } -VertexDecompiler::VertexDecompiler(std::vector& data) : - m_data(data) +D3D12VertexProgramDecompiler::D3D12VertexProgramDecompiler(std::vector& data) : + VertexProgramDecompiler(data) { - m_funcs.emplace_back(); - m_funcs[0].offset = 0; - m_funcs[0].name = "main"; - m_funcs.emplace_back(); - m_funcs[1].offset = 0; - m_funcs[1].name = "func0"; - //m_cur_func->body = "\tgl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n"; -} - -std::string VertexDecompiler::Decompile() -{ - for (unsigned i = 0; i < PF_PARAM_COUNT; i++) - m_parr.params[i].clear(); - m_instr_count = 0; - - for (int i = 0; i < m_max_instr_count; ++i) - { - m_instructions[i].reset(); - } - - bool is_has_BRA = false; - - for (u32 i = 1; m_instr_count < m_max_instr_count; m_instr_count++) - { - m_cur_instr = &m_instructions[m_instr_count]; - - if (is_has_BRA) - { - d3.HEX = m_data[i]; - i += 4; - } - else - { - d1.HEX = m_data[i++]; - - switch (d1.sca_opcode) - { - case 0x08: //BRA - LOG_ERROR(RSX, "BRA found. Please report to RPCS3 team."); - is_has_BRA = true; - m_jump_lvls.clear(); - d3.HEX = m_data[++i]; - i += 4; - break; - - case 0x09: //BRI - d2.HEX = m_data[i++]; - d3.HEX = m_data[i]; - i += 2; - m_jump_lvls.emplace(GetAddr()); - break; - - default: - d3.HEX = m_data[++i]; - i += 2; - break; - } - } - - if (d3.end) - { - m_instr_count++; - - if (i < m_data.size()) - { - LOG_ERROR(RSX, "Program end before buffer end."); - } - - break; - } - } - - uint jump_position = 0; - - if (is_has_BRA || !m_jump_lvls.empty()) - { - m_cur_instr = &m_instructions[0]; - AddCode("int jump_position = 0;"); - AddCode("while (true)"); - AddCode("{"); - m_cur_instr->open_scopes++; - - AddCode(fmt::Format("if (jump_position <= %u)", jump_position++)); - AddCode("{"); - m_cur_instr->open_scopes++; - } - - for (u32 i = 0; i < m_instr_count; ++i) - { - m_cur_instr = &m_instructions[i]; - - d0.HEX = m_data[i * 4 + 0]; - d1.HEX = m_data[i * 4 + 1]; - d2.HEX = m_data[i * 4 + 2]; - d3.HEX = m_data[i * 4 + 3]; - - src[0].src0l = d2.src0l; - src[0].src0h = d1.src0h; - src[1].src1 = d2.src1; - src[2].src2l = d3.src2l; - src[2].src2h = d2.src2h; - - if (i && (is_has_BRA || std::find(m_jump_lvls.begin(), m_jump_lvls.end(), i) != m_jump_lvls.end())) - { - m_cur_instr->close_scopes++; - AddCode("}"); - AddCode(""); - - AddCode(fmt::Format("if (jump_position <= %u)", jump_position++)); - AddCode("{"); - m_cur_instr->open_scopes++; - } - - if (!d1.sca_opcode && !d1.vec_opcode) - { - AddCode("//nop"); - } - - switch (d1.sca_opcode) - { - case RSX_SCA_OPCODE_NOP: break; - case RSX_SCA_OPCODE_MOV: SetDSTSca("$s"); break; - case RSX_SCA_OPCODE_RCP: SetDSTSca("(1.0 / $s)"); break; - case RSX_SCA_OPCODE_RCC: SetDSTSca("clamp(1.0 / $s, 5.42101e-20, 1.884467e19)"); break; - case RSX_SCA_OPCODE_RSQ: SetDSTSca("1.F / sqrt($s)"); break; - case RSX_SCA_OPCODE_EXP: SetDSTSca("exp($s)"); break; - case RSX_SCA_OPCODE_LOG: SetDSTSca("log($s)"); break; - case RSX_SCA_OPCODE_LIT: SetDSTSca(typeName[3] + "(1.0, $s.x, ($s.x > 0.0 ? exp($s.w * log2($s.y)) : 0.0), 1.0)"); break; - case RSX_SCA_OPCODE_BRA: - { - AddCode("$if ($cond)"); - AddCode("{"); - m_cur_instr->open_scopes++; - AddCode("jump_position = $a$am;"); - AddCode("continue;"); - m_cur_instr->close_scopes++; - AddCode("}"); - } - break; - /* This triggers opengl driver lost connection error code 7 - case RSX_SCA_OPCODE_BRI: // works differently (BRI o[1].x(TR) L0;) - { - uint jump_position; - - if (is_has_BRA) - { - jump_position = GetAddr(); - } - else - { - int addr = GetAddr(); - - jump_position = 0; - for (auto pos : m_jump_lvls) - { - if (addr == pos) - break; - - ++jump_position; - } - } - - AddCode("$ifcond "); - AddCode("{"); - m_cur_instr->open_scopes++; - AddCode(fmt::Format("jump_position = %u;", jump_position)); - AddCode("continue;"); - m_cur_instr->close_scopes++; - AddCode("}"); - } - break; - */ - case RSX_SCA_OPCODE_CAL: - // works same as BRI - AddCode("$ifcond $f(); //CAL"); - break; - case RSX_SCA_OPCODE_CLI: - // works same as BRI - AddCode("$ifcond $f(); //CLI"); - break; - case RSX_SCA_OPCODE_RET: - // works like BRI but shorter (RET o[1].x(TR);) - AddCode("$ifcond return;"); - break; - case RSX_SCA_OPCODE_LG2: SetDSTSca("log2($s)"); break; - case RSX_SCA_OPCODE_EX2: SetDSTSca("exp2($s)"); break; - case RSX_SCA_OPCODE_SIN: SetDSTSca("sin($s)"); break; - case RSX_SCA_OPCODE_COS: SetDSTSca("cos($s)"); break; - case RSX_SCA_OPCODE_BRB: - // works differently (BRB o[1].x !b0, L0;) - LOG_ERROR(RSX, "Unimplemented sca_opcode BRB"); - break; - case RSX_SCA_OPCODE_CLB: break; - // works same as BRB - LOG_ERROR(RSX, "Unimplemented sca_opcode CLB"); - break; - case RSX_SCA_OPCODE_PSH: break; - // works differently (PSH o[1].x A0;) - LOG_ERROR(RSX, "Unimplemented sca_opcode PSH"); - break; - case RSX_SCA_OPCODE_POP: break; - // works differently (POP o[1].x;) - LOG_ERROR(RSX, "Unimplemented sca_opcode POP"); - break; - - default: - AddCode(fmt::Format("//Unknown vp sca_opcode 0x%x", fmt::by_value(d1.sca_opcode))); - LOG_ERROR(RSX, "Unknown vp sca_opcode 0x%x", fmt::by_value(d1.sca_opcode)); - Emu.Pause(); - break; - } - - switch (d1.vec_opcode) - { - case RSX_VEC_OPCODE_NOP: break; - case RSX_VEC_OPCODE_MOV: SetDSTVec("$0"); break; - case RSX_VEC_OPCODE_MUL: SetDSTVec("($0 * $1)"); break; - case RSX_VEC_OPCODE_ADD: SetDSTVec("($0 + $2)"); break; - case RSX_VEC_OPCODE_MAD: SetDSTVec("($0 * $1 + $2)"); break; - case RSX_VEC_OPCODE_DP3: SetDSTVec("dot($0.xyz, $1.xyz).xxxx"); break; - case RSX_VEC_OPCODE_DPH: SetDSTVec("dot(float4($0.xyz, 1.0), $1).xxxx"); break; - case RSX_VEC_OPCODE_DP4: SetDSTVec("dot($0, $1).xxxx"); break; - case RSX_VEC_OPCODE_DST: SetDSTVec("vec4(distance($0, $1))"); break; - case RSX_VEC_OPCODE_MIN: SetDSTVec("min($0, $1)"); break; - case RSX_VEC_OPCODE_MAX: SetDSTVec("max($0, $1)"); break; - case RSX_VEC_OPCODE_SLT: SetDSTVec("vec4(lessThan($0, $1))"); break; - case RSX_VEC_OPCODE_SGE: SetDSTVec("vec4(greaterThanEqual($0, $1))"); break; - case RSX_VEC_OPCODE_ARL: AddCode("$ifcond $a = ivec4($0)$am;"); break; - case RSX_VEC_OPCODE_FRC: SetDSTVec("frac($0)"); break; - case RSX_VEC_OPCODE_FLR: SetDSTVec("floor($0)"); break; - case RSX_VEC_OPCODE_SEQ: SetDSTVec("vec4(equal($0, $1))"); break; - case RSX_VEC_OPCODE_SFL: SetDSTVec("vec4(equal($0, vec4(0.0)))"); break; - case RSX_VEC_OPCODE_SGT: SetDSTVec("vec4(greaterThan($0, $1))"); break; - case RSX_VEC_OPCODE_SLE: SetDSTVec("vec4(lessThanEqual($0, $1))"); break; - case RSX_VEC_OPCODE_SNE: SetDSTVec("vec4(notEqual($0, $1))"); break; - case RSX_VEC_OPCODE_STR: SetDSTVec("vec4(equal($0, vec4(1.0)))"); break; - case RSX_VEC_OPCODE_SSG: SetDSTVec("sign($0)"); break; - case RSX_VEC_OPCODE_TXL: SetDSTVec("texture($t, $0.xy)"); break; - - default: - AddCode(fmt::Format("//Unknown vp opcode 0x%x", fmt::by_value(d1.vec_opcode))); - LOG_ERROR(RSX, "Unknown vp opcode 0x%x", fmt::by_value(d1.vec_opcode)); - Emu.Pause(); - break; - } - } - - if (is_has_BRA || !m_jump_lvls.empty()) - { - m_cur_instr = &m_instructions[m_instr_count - 1]; - m_cur_instr->close_scopes++; - AddCode("}"); - AddCode("break;"); - m_cur_instr->close_scopes++; - AddCode("}"); - } - - std::string result = BuildCode(); - - m_jump_lvls.clear(); - m_body.clear(); - if (m_funcs.size() > 2) - { - m_funcs.erase(m_funcs.begin() + 2, m_funcs.end()); - } - return result; } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h index 11195bdf41..0c5fcdce52 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h @@ -1,69 +1,15 @@ #pragma once #if defined(DX12_SUPPORT) -#include "Emu/RSX/RSXVertexProgram.h" #include #include -#include "../Common/ShaderParam.h" +#include "../Common/VertexProgramDecompiler.h" -struct VertexDecompiler +struct D3D12VertexProgramDecompiler : public VertexProgramDecompiler { - struct FuncInfo - { - u32 offset; - std::string name; - }; - - struct Instruction - { - std::vector body; - int open_scopes; - int close_scopes; - int put_close_scopes; - int do_count; - - void reset() - { - body.clear(); - put_close_scopes = open_scopes = close_scopes = do_count = 0; - } - }; - - static const size_t m_max_instr_count = 512; - Instruction m_instructions[m_max_instr_count]; - Instruction* m_cur_instr; - size_t m_instr_count; - - std::set m_jump_lvls; - std::vector m_body; - std::vector m_funcs; - - //wxString main; - - std::vector& m_data; - ParamArray m_parr; - - std::string GetMask(bool is_sca); - std::string GetVecMask(); - std::string GetScaMask(); - std::string GetDST(bool is_sca = false); - std::string GetSRC(const u32 n); - std::string GetFunc(); - std::string GetTex(); - std::string GetCond(); - std::string AddAddrMask(); - std::string AddAddrReg(); - u32 GetAddr(); - std::string Format(const std::string& code); - - void AddCodeCond(const std::string& dst, const std::string& src); - void AddCode(const std::string& code); - void SetDST(bool is_sca, std::string value); - void SetDSTVec(const std::string& code); - void SetDSTSca(const std::string& code); - std::string BuildFuncBody(const FuncInfo& func); - std::string BuildCode(); - protected: + virtual std::string getFloatTypeName(size_t elementCount) override; + virtual std::string getFunction(enum class FUNCTION) override; + virtual void insertHeader(std::stringstream &OS); virtual void insertInputs(std::stringstream &OS, const std::vector &inputs); virtual void insertConstants(std::stringstream &OS, const std::vector &constants); @@ -71,7 +17,6 @@ protected: virtual void insertMainStart(std::stringstream &OS); virtual void insertMainEnd(std::stringstream &OS); public: - VertexDecompiler(std::vector& data); - std::string Decompile(); + D3D12VertexProgramDecompiler(std::vector& data); }; #endif \ No newline at end of file From 3f319760806f156105cc31e9f51e05670fa336dd Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 20 May 2015 01:59:24 +0200 Subject: [PATCH 085/343] d3d12: Update --- .../D3D12/D3D12VertexProgramDecompiler.cpp | 21 +++++++++++++++++++ .../RSX/D3D12/D3D12VertexProgramDecompiler.h | 1 + 2 files changed, 22 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 7afbf4bab5..be52bab93a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -51,6 +51,27 @@ std::string D3D12VertexProgramDecompiler::getFunction(enum class FUNCTION f) } } +std::string D3D12VertexProgramDecompiler::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) +{ + switch (f) + { + default: + abort(); + case COMPARE::FUNCTION_SEQ: + return "(" + Op0 + " == " + Op1 + ".xxxx"; + case COMPARE::FUNCTION_SGE: + return "(" + Op0 + " >= " + Op1 + ").xxxx"; + case COMPARE::FUNCTION_SGT: + return "(" + Op0 + " > " + Op1 + ").xxxx"; + case COMPARE::FUNCTION_SLE: + return "(" + Op0 + " <= " + Op1 + ").xxxx"; + case COMPARE::FUNCTION_SLT: + return "(" + Op0 + " < " + Op1 + ").xxxx"; + case COMPARE::FUNCTION_SNE: + return "(" + Op0 + " != " + Op1 + ").xxxx"; + } +} + void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS) { OS << "cbuffer SCALE_OFFSET : register(b0)" << std::endl; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h index 0c5fcdce52..d61642c18b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h @@ -9,6 +9,7 @@ struct D3D12VertexProgramDecompiler : public VertexProgramDecompiler protected: virtual std::string getFloatTypeName(size_t elementCount) override; virtual std::string getFunction(enum class FUNCTION) override; + virtual std::string compareFunction(enum class COMPARE, const std::string &, const std::string &) override; virtual void insertHeader(std::stringstream &OS); virtual void insertInputs(std::stringstream &OS, const std::vector &inputs); From 7b25483b676567c4fe710be1d3fb116cee38b765 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 20 May 2015 02:15:07 +0200 Subject: [PATCH 086/343] d3d12: depth read doesnt crash anymore --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 5ea88bdd09..567bb79287 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -87,7 +87,7 @@ D3D12GSRender::D3D12GSRender() D3D12_RESOURCE_DESC resDesc = {}; resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = (UINT)1024 * 1024; + resDesc.Width = (UINT)1024 * 1024 * 16; resDesc.Height = 1; resDesc.DepthOrArraySize = 1; resDesc.SampleDesc.Count = 1; @@ -208,7 +208,7 @@ D3D12GSRender::D3D12GSRender() // Texture D3D12_HEAP_DESC heapDescription = {}; - heapDescription.SizeInBytes = 256 * 256 * 256; + heapDescription.SizeInBytes = 256 * 256 * 256 * 16; heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; check(m_device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); @@ -676,7 +676,6 @@ void D3D12GSRender::ExecCMD() for (u32 i = 0; i < m_textures_count; ++i) { if (!m_textures[i].IsEnabled()) continue; - ID3D12Resource *Texture, *vramTexture; size_t textureSize = m_textures[i].GetWidth() * m_textures[i].GetHeight() * 4; D3D12_RESOURCE_DESC textureDesc = {}; @@ -721,6 +720,7 @@ void D3D12GSRender::ExecCMD() )); m_currentStorageOffset += textureSize; + m_currentStorageOffset = (m_currentStorageOffset + 65536 - 1) & ~65535; D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; dst.pResource = vramTexture; From 2aedd0633f11b22827e99282169b69fe9ca14e5f Mon Sep 17 00:00:00 2001 From: Zangetsu38 Date: Wed, 20 May 2015 11:03:49 +0200 Subject: [PATCH 087/343] Fix crash after launch game one mode release. Fix Temporary. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 567bb79287..1143321305 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -38,9 +38,7 @@ D3D12GSRender::D3D12GSRender() check(CreateDXGIFactory(IID_PPV_ARGS(&dxgiFactory))); // Create adapter IDXGIAdapter* adaptater = nullptr; -#ifdef _DEBUG check(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&adaptater))); -#endif check(D3D12CreateDevice(adaptater, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); // Queues From f2985f12c145675b969eaedd927bedf74d0587e0 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 20 May 2015 23:54:28 +0200 Subject: [PATCH 088/343] d3d12: Use heap for vertex buffer It should consume less memory. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 72 +++++++++++++++++---------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 7 ++- 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 1143321305..024405d4b0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -20,7 +20,7 @@ static void check(HRESULT hr) D3D12GSRender::D3D12GSRender() : GSRender(), m_fbo(nullptr), m_PSO(nullptr) { - memset(m_vertexBufferSize, 0, sizeof(m_vertexBufferSize)); + m_currentVertexBuffersHeapOffset = 0; m_constantsBufferSize = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; @@ -79,31 +79,27 @@ D3D12GSRender::D3D12GSRender() m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[1])); m_device->CreateRenderTargetView(m_backBuffer[1], &rttDesc, m_backbufferAsRendertarget[1]->GetCPUDescriptorHandleForHeapStart()); - // Create global vertex buffers (1 MB, hopefully big enough...) + // Create heap for vertex buffers + D3D12_HEAP_DESC vertexBufferHeapDesc = {}; + // 16 MB wide + vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 16; + vertexBufferHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + vertexBufferHeapDesc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; + check(m_device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexBuffersHeap))); + + D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; D3D12_RESOURCE_DESC resDesc = {}; resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = (UINT)1024 * 1024 * 16; + resDesc.Width = (UINT) 1024 * 1024; resDesc.Height = 1; resDesc.DepthOrArraySize = 1; resDesc.SampleDesc.Count = 1; resDesc.MipLevels = 1; resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - for (unsigned i = 0; i < m_vertex_count; i++) - { - check(m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_vertexBuffer[i]) - )); - } - check(m_device->CreateCommittedResource( &heapProp, D3D12_HEAP_FLAG_NONE, @@ -237,10 +233,11 @@ D3D12GSRender::~D3D12GSRender() m_constantsVertexBuffer->Release(); m_constantsFragmentBuffer->Release(); m_scaleOffsetBuffer->Release(); - for (unsigned i = 0; i < 32; i++) - m_vertexBuffer[i]->Release(); + m_vertexBuffersHeap->Release(); if (m_fbo) delete m_fbo; + for (auto tmp : m_inflightVertexBuffers) + tmp->Release(); m_textureDescriptorsHeap->Release(); m_textureStorage->Release(); m_uploadTextureHeap->Release(); @@ -373,21 +370,39 @@ std::vector D3D12GSRender::EnableVertexData(bool index if (!m_vertex_data[i].IsEnabled()) continue; const size_t item_size = m_vertex_data[i].GetTypeSize() * m_vertex_data[i].size; const size_t data_size = m_vertex_data[i].data.size() - data_offset * item_size; - - // TODO: Use default heap and upload data - void *bufferMap; - check(m_vertexBuffer[i]->Map(0, nullptr, (void**)&bufferMap)); - memcpy((char*)bufferMap + m_vertexBufferSize[i] + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); - m_vertexBuffer[i]->Unmap(0, nullptr); - size_t subBufferSize = (data_offset + data_size) * item_size; + ID3D12Resource *vertexBuffer; + D3D12_RESOURCE_DESC vertexBufferDesc = {}; + vertexBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + vertexBufferDesc.Width = (UINT)subBufferSize; + vertexBufferDesc.Height = 1; + vertexBufferDesc.DepthOrArraySize = 1; + vertexBufferDesc.SampleDesc.Count = 1; + vertexBufferDesc.MipLevels = 1; + vertexBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + check(m_device->CreatePlacedResource( + m_vertexBuffersHeap, + m_currentVertexBuffersHeapOffset, + &vertexBufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&vertexBuffer) + )); + void *bufferMap; + check(vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); + memcpy((char*)bufferMap + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); + vertexBuffer->Unmap(0, nullptr); + m_inflightVertexBuffers.push_back(vertexBuffer); + D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; - vertexBufferView.BufferLocation = m_vertexBuffer[i]->GetGPUVirtualAddress() + m_vertexBufferSize[i]; + vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); vertexBufferView.SizeInBytes = (UINT)subBufferSize; vertexBufferView.StrideInBytes = (UINT)item_size; result.push_back(vertexBufferView); - m_vertexBufferSize[i] += subBufferSize; + + // 65536 alignment + m_currentVertexBuffersHeapOffset += (subBufferSize + 65536 - 1) & ~65535; } if (indexed_draw) @@ -1246,7 +1261,10 @@ void D3D12GSRender::Flip() for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) gfxCommandList->Release(); m_inflightCommandList.clear(); - memset(m_vertexBufferSize, 0, sizeof(m_vertexBufferSize)); + for (ID3D12Resource *vertexBuffer : m_inflightVertexBuffers) + vertexBuffer->Release(); + m_inflightVertexBuffers.clear(); + m_currentVertexBuffersHeapOffset = 0; m_constantsBufferSize = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index f418b43542..8b9b3f719a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -43,7 +43,7 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value); class D3D12GSRender : public GSRender { private: - size_t m_vertexBufferSize[32]; + // std::vector m_post_draw_objs; PipelineStateObjectCache m_cachePSO; @@ -58,8 +58,11 @@ private: bool m_forcedIndexBuffer; + size_t m_currentVertexBuffersHeapOffset; + std::vector m_inflightVertexBuffers; + ID3D12Heap *m_vertexBuffersHeap; size_t m_indexBufferCount; - ID3D12Resource *m_indexBuffer, *m_vertexBuffer[m_vertex_count]; + ID3D12Resource *m_indexBuffer; ID3D12Resource *m_constantsVertexBuffer, *m_constantsFragmentBuffer; size_t constantsFragmentSize; ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; From eb1b8b748a399087e526776c4355e4d69a3cd08b Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 21 May 2015 00:06:50 +0200 Subject: [PATCH 089/343] d3d12: Fix for fragment decompiler --- rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index 1d8d3cbf8d..e0f238677e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -70,7 +70,7 @@ std::string D3D12FragmentDecompiler::compareFunction(COMPARE f, const std::strin default: abort(); case COMPARE::FUNCTION_SEQ: - return "(" + Op0 + " == " + Op1 + ".xxxx"; + return "(" + Op0 + " == " + Op1 + ").xxxx"; case COMPARE::FUNCTION_SGE: return "(" + Op0 + " >= " + Op1 +").xxxx"; case COMPARE::FUNCTION_SGT: From a9425fcf2aa0707055a5e88f63b8e0a6992bf44a Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 21 May 2015 00:33:26 +0200 Subject: [PATCH 090/343] d3d12: Pass first clear value as clear optimised value to RTTs --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 9 ++++++++- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 17 ++++++++++++++--- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 2 +- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 024405d4b0..5a5049c996 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -264,8 +264,15 @@ void D3D12GSRender::InitDrawBuffers() m_lastWidth = RSXThread::m_width; m_lastHeight = RSXThread::m_height; m_lastDepth = m_surface_depth_format; + float clearColor[] = + { + m_clear_surface_color_r / 255.0f, + m_clear_surface_color_g / 255.0f, + m_clear_surface_color_b / 255.0f, + m_clear_surface_color_a / 255.0f + }; - m_fbo = new D3D12RenderTargetSets(m_device, (u8)m_lastDepth, m_lastWidth, m_lastHeight); + m_fbo = new D3D12RenderTargetSets(m_device, (u8)m_lastDepth, m_lastWidth, m_lastHeight, clearColor, 1.f); } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index f991353060..1614c27c5a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -9,7 +9,7 @@ #include "Emu/System.h" #include "Emu/RSX/GSRender.h" -D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDepthFormat, size_t width, size_t height) +D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDepthFormat, size_t width, size_t height, float clearColor[4], float clearDepth) { D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; descriptorHeapDesc.NumDescriptors = 1; @@ -20,6 +20,9 @@ D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDep descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_rttDescriptorHeap)); + D3D12_CLEAR_VALUE clearDepthValue = {}; + clearDepthValue.DepthStencil.Depth = clearDepth; + // Every resource are committed for simplicity, later we could use heap D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; @@ -37,9 +40,11 @@ D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDep break; case CELL_GCM_SURFACE_Z16: resourceDesc.Format = DXGI_FORMAT_R16_TYPELESS; + clearDepthValue.Format = DXGI_FORMAT_D16_UNORM; break; case CELL_GCM_SURFACE_Z24S8: resourceDesc.Format = DXGI_FORMAT_R24G8_TYPELESS; + clearDepthValue.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; break; default: LOG_ERROR(RSX, "Bad depth format! (%d)", surfaceDepthFormat); @@ -51,7 +56,7 @@ D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDep D3D12_HEAP_FLAG_NONE, &resourceDesc, D3D12_RESOURCE_STATE_DEPTH_WRITE, - nullptr, // TODO: Assign sensible default clearvalue here + &clearDepthValue, IID_PPV_ARGS(&m_depthStencilTexture) ); D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {}; @@ -72,6 +77,12 @@ D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDep depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; device->CreateDepthStencilView(m_depthStencilTexture, &depthStencilViewDesc, m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); + D3D12_CLEAR_VALUE clearColorValue = {}; + clearColorValue.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + clearColorValue.Color[0] = clearColor[0]; + clearColorValue.Color[1] = clearColor[1]; + clearColorValue.Color[2] = clearColor[2]; + clearColorValue.Color[3] = clearColor[3]; g_RTTIncrement = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rttDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); for (int i = 0; i < 4; ++i) @@ -90,7 +101,7 @@ D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDep D3D12_HEAP_FLAG_NONE, &resourceDesc, D3D12_RESOURCE_STATE_RENDER_TARGET, - nullptr, // TODO: Assign sensible default clearvalue here + &clearColorValue, IID_PPV_ARGS(&m_rtts[i]) ); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index d6b292101e..1d39085d39 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -15,7 +15,7 @@ class D3D12RenderTargetSets ID3D12DescriptorHeap *m_rttDescriptorHeap; ID3D12DescriptorHeap *m_depthStencilDescriptorHeap; public: - D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDepthFormat, size_t width, size_t height); + D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDepthFormat, size_t width, size_t height, float clearColor[4], float clearDepth); ~D3D12RenderTargetSets(); /** * Return the base descriptor address for the give surface target. From 9fc50fcce5228ba8dcba653179e01fb00b6e3256 Mon Sep 17 00:00:00 2001 From: raven02 Date: Thu, 21 May 2015 07:14:21 +0800 Subject: [PATCH 091/343] d3d12: Fix for vertex decompiler --- rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index be52bab93a..39dc87365f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -58,7 +58,7 @@ std::string D3D12VertexProgramDecompiler::compareFunction(COMPARE f, const std:: default: abort(); case COMPARE::FUNCTION_SEQ: - return "(" + Op0 + " == " + Op1 + ".xxxx"; + return "(" + Op0 + " == " + Op1 + ").xxxx"; case COMPARE::FUNCTION_SGE: return "(" + Op0 + " >= " + Op1 + ").xxxx"; case COMPARE::FUNCTION_SGT: @@ -202,4 +202,4 @@ D3D12VertexProgramDecompiler::D3D12VertexProgramDecompiler(std::vector& dat { } -#endif \ No newline at end of file +#endif From 31ea9068dedf61c899263a4ebbe26213b4ba48f6 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 21 May 2015 01:23:46 +0200 Subject: [PATCH 092/343] d3d12: Use true default value for vertex decompiler --- rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 39dc87365f..7c560789de 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -172,7 +172,7 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) for (const ParamType PT : m_parr.params[PF_PARAM_NONE]) { for (const ParamItem &PI : PT.items) - OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 1.);" << std::endl; + OS << " " << PT.type << " " << PI.name << " = " << PI.value << ";" << std::endl; } for (const ParamType PT : m_parr.params[PF_PARAM_IN]) From b2577833f976e773c575bc9477be1b4d494b60ab Mon Sep 17 00:00:00 2001 From: raven02 Date: Thu, 21 May 2015 07:24:10 +0800 Subject: [PATCH 093/343] d3d12: DP2A --- rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 7c560789de..7b81daed63 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -31,7 +31,7 @@ std::string D3D12VertexProgramDecompiler::getFunction(enum class FUNCTION f) case FUNCTION::FUNCTION_DP2: return "dot($0.xy, $1.xy).xxxx"; case FUNCTION::FUNCTION_DP2A: - return ""; + return "(dot($0.xy, $1.xy) + $2.x).xxxx"; case FUNCTION::FUNCTION_DP3: return "dot($0.xyz, $1.xyz).xxxx"; case FUNCTION::FUNCTION_DP4: @@ -172,7 +172,7 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) for (const ParamType PT : m_parr.params[PF_PARAM_NONE]) { for (const ParamItem &PI : PT.items) - OS << " " << PT.type << " " << PI.name << " = " << PI.value << ";" << std::endl; + OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 1.);" << std::endl; } for (const ParamType PT : m_parr.params[PF_PARAM_IN]) From b5f91a6e75720110c0177314573dd981185971de Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 21 May 2015 23:19:07 +0200 Subject: [PATCH 094/343] d3d12: Fix for vertex decompiler --- rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 7b81daed63..4b606da907 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -172,7 +172,12 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) for (const ParamType PT : m_parr.params[PF_PARAM_NONE]) { for (const ParamItem &PI : PT.items) - OS << " " << PT.type << " " << PI.name << " = float4(0., 0., 0., 1.);" << std::endl; + { + OS << " " << PT.type << " " << PI.name; + if (!PI.value.empty()) + OS << " = " << PI.value; + OS << ";" << std::endl; + } } for (const ParamType PT : m_parr.params[PF_PARAM_IN]) From 1471afcfe62903673fd05b0b390095295ebb311d Mon Sep 17 00:00:00 2001 From: raven02 Date: Fri, 22 May 2015 08:17:35 +0800 Subject: [PATCH 095/343] D3DGS naming fix --- rpcs3/Gui/D3DGSFrame.cpp | 2 +- rpcs3/Gui/MainFrame.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Gui/D3DGSFrame.cpp b/rpcs3/Gui/D3DGSFrame.cpp index e8b22fcd73..b0b86c4ce2 100644 --- a/rpcs3/Gui/D3DGSFrame.cpp +++ b/rpcs3/Gui/D3DGSFrame.cpp @@ -6,7 +6,7 @@ #include "Utilities/Timer.h" D3DGSFrame::D3DGSFrame() - : GSFrame(nullptr, "GSFrame[OpenGL]") + : GSFrame(nullptr, "GSFrame[DirectX 12]") , m_frames(0) { canvas = new wxWindow(this, wxID_ANY); diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp index ef725668a4..f583d04ff1 100644 --- a/rpcs3/Gui/MainFrame.cpp +++ b/rpcs3/Gui/MainFrame.cpp @@ -475,7 +475,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) cbox_gs_render->Append("Null"); cbox_gs_render->Append("OpenGL"); #if defined(DX12_SUPPORT) - cbox_gs_render->Append("D3D12"); + cbox_gs_render->Append("DirectX 12"); #endif for(int i = 1; i < WXSIZEOF(ResolutionTable); ++i) From abbd244376eff60dd5de58a9d6e6e1d736675eb2 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 22 May 2015 21:05:06 +0200 Subject: [PATCH 096/343] d3d12: Fix fractal not animated in dynamic test 2 --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 30 +++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 5a5049c996..4c39ca7152 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -557,17 +557,35 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() check(m_constantsFragmentBuffer->Map(0, nullptr, &constantsBufferMap)); for (size_t offsetInFP : fragmentOffset) { - auto data = vm::ptr::make(m_cur_fragment_prog->addr + (u32)offsetInFP); + u32 vector[4]; + // Is it assigned by color register in command buffer ? + if (!m_fragment_constants.empty() && offsetInFP == m_fragment_constants.front().id - m_cur_fragment_prog->offset) + { + const RSXTransformConstant& c = m_fragment_constants.front(); + vector[0] = (u32&)c.x; + vector[1] = (u32&)c.y; + vector[2] = (u32&)c.z; + vector[3] = (u32&)c.w; + } + else + { + auto data = vm::ptr::make(m_cur_fragment_prog->addr + (u32)offsetInFP); - u32 c0 = (data[0] >> 16 | data[0] << 16); - u32 c1 = (data[1] >> 16 | data[1] << 16); - u32 c2 = (data[2] >> 16 | data[2] << 16); - u32 c3 = (data[3] >> 16 | data[3] << 16); + u32 c0 = (data[0] >> 16 | data[0] << 16); + u32 c1 = (data[1] >> 16 | data[1] << 16); + u32 c2 = (data[2] >> 16 | data[2] << 16); + u32 c3 = (data[3] >> 16 | data[3] << 16); + + vector[0] = c0; + vector[1] = c1; + vector[2] = c2; + vector[3] = c3; + } - u32 vector[] = { c0, c1, c2, c3 }; memcpy((char*)constantsBufferMap + constantsFragmentSize + offset, vector, 4 * sizeof(u32)); offset += 4 * sizeof(u32); } + m_constantsFragmentBuffer->Unmap(0, nullptr); // Multiple of 256 offset = (offset + 255) & ~255; From 5872144165c35124c2d7d81fb40cba9657e50ba5 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 22 May 2015 21:19:46 +0200 Subject: [PATCH 097/343] d3d12: Move texture code to another file + add the copy command as soon as possible --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 97 +-------------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 5 ++ rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 118 ++++++++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12Texture.h | 1 + rpcs3/emucore.vcxproj | 2 + rpcs3/emucore.vcxproj.filters | 6 ++ 6 files changed, 135 insertions(+), 94 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12Texture.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 4c39ca7152..3fdf445576 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -50,6 +50,7 @@ D3D12GSRender::D3D12GSRender() // Create a global command allocator m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); + m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); m_frame = GetGSFrame(); @@ -708,100 +709,7 @@ void D3D12GSRender::ExecCMD() commandList->SetGraphicsRootDescriptorTable(1, Handle); commandList->SetPipelineState(m_PSO); - - size_t usedTexture = 0; - - for (u32 i = 0; i < m_textures_count; ++i) - { - if (!m_textures[i].IsEnabled()) continue; - ID3D12Resource *Texture, *vramTexture; - size_t textureSize = m_textures[i].GetWidth() * m_textures[i].GetHeight() * 4; - D3D12_RESOURCE_DESC textureDesc = {}; - textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - textureDesc.Width = textureSize; - textureDesc.Height = 1; - textureDesc.DepthOrArraySize = 1; - textureDesc.SampleDesc.Count = 1; - textureDesc.MipLevels = 1; - textureDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - check(m_device->CreatePlacedResource( - m_uploadTextureHeap, - m_currentStorageOffset, - &textureDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&Texture) - )); - - const u32 texaddr = GetAddress(m_textures[i].GetOffset(), m_textures[i].GetLocation()); - auto pixels = vm::get_ptr(texaddr); - void *textureData; - check(Texture->Map(0, nullptr, (void**)&textureData)); - memcpy(textureData, pixels, textureSize); - Texture->Unmap(0, nullptr); - - D3D12_RESOURCE_DESC vramTextureDesc = {}; - vramTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - vramTextureDesc.Width = m_textures[i].GetWidth(); - vramTextureDesc.Height = m_textures[i].GetHeight(); - vramTextureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - vramTextureDesc.DepthOrArraySize = 1; - vramTextureDesc.SampleDesc.Count = 1; - vramTextureDesc.MipLevels = 1; - check(m_device->CreatePlacedResource( - m_textureStorage, - m_currentStorageOffset, - &vramTextureDesc, - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&vramTexture) - )); - - m_currentStorageOffset += textureSize; - m_currentStorageOffset = (m_currentStorageOffset + 65536 - 1) & ~65535; - - D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - dst.pResource = vramTexture; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = Texture; - src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src.PlacedFootprint.Footprint.Depth = 1; - src.PlacedFootprint.Footprint.Width = m_textures[i].GetWidth(); - src.PlacedFootprint.Footprint.Height = m_textures[i].GetHeight(); - src.PlacedFootprint.Footprint.RowPitch = m_textures[i].GetWidth() * 4; - src.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - - commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = vramTexture; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; - commandList->ResourceBarrier(1, &barrier); - - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); - - // TODO : Correctly define sampler - D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - Handle = m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - m_device->CreateSampler(&samplerDesc, Handle); - - usedTexture++; - } - + size_t usedTexture = UploadTextures(); Handle = m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); Handle.ptr += m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetDescriptorHeaps(1, &m_textureDescriptorsHeap); @@ -1283,6 +1191,7 @@ void D3D12GSRender::Flip() WaitForSingleObject(gfxqueuecompletion, INFINITE); CloseHandle(gfxqueuecompletion); m_commandAllocator->Reset(); + m_textureUploadCommandAllocator->Reset(); for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) gfxCommandList->Release(); m_inflightCommandList.clear(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 8b9b3f719a..aee262ec41 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -50,6 +50,7 @@ private: ID3D12PipelineState *m_PSO; ID3D12RootSignature *m_rootSignature; + ID3D12CommandAllocator *m_textureUploadCommandAllocator; ID3D12Heap *m_uploadTextureHeap, *m_textureStorage; size_t m_currentStorageOffset; ID3D12DescriptorHeap *m_textureDescriptorsHeap; @@ -101,6 +102,10 @@ private: void setScaleOffset(); void FillVertexShaderConstantsBuffer(); void FillPixelShaderConstantsBuffer(); + /** + * returns the number of texture uploaded + */ + size_t UploadTextures(); /*void DisableVertexData(); void WriteBuffers(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp new file mode 100644 index 0000000000..7cfe423087 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -0,0 +1,118 @@ +#include "stdafx.h" +#if defined(DX12_SUPPORT) +#include "D3D12GSRender.h" +// For clarity this code deals with texture but belongs to D3D12GSRender class + +static void check(HRESULT hr) +{ + if (hr != 0) + abort(); +} + +size_t D3D12GSRender::UploadTextures() +{ + size_t usedTexture = 0; + + for (u32 i = 0; i < m_textures_count; ++i) + { + if (!m_textures[i].IsEnabled()) continue; + + // Upload at each iteration to take advantage of overlapping transfer + ID3D12GraphicsCommandList *commandList; + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + + ID3D12Resource *Texture, *vramTexture; + size_t textureSize = m_textures[i].GetWidth() * m_textures[i].GetHeight() * 4; + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + textureDesc.Width = textureSize; + textureDesc.Height = 1; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.MipLevels = 1; + textureDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + check(m_device->CreatePlacedResource( + m_uploadTextureHeap, + m_currentStorageOffset, + &textureDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&Texture) + )); + + const u32 texaddr = GetAddress(m_textures[i].GetOffset(), m_textures[i].GetLocation()); + auto pixels = vm::get_ptr(texaddr); + void *textureData; + check(Texture->Map(0, nullptr, (void**)&textureData)); + memcpy(textureData, pixels, textureSize); + Texture->Unmap(0, nullptr); + + D3D12_RESOURCE_DESC vramTextureDesc = {}; + vramTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + vramTextureDesc.Width = m_textures[i].GetWidth(); + vramTextureDesc.Height = m_textures[i].GetHeight(); + vramTextureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + vramTextureDesc.DepthOrArraySize = 1; + vramTextureDesc.SampleDesc.Count = 1; + vramTextureDesc.MipLevels = 1; + check(m_device->CreatePlacedResource( + m_textureStorage, + m_currentStorageOffset, + &vramTextureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&vramTexture) + )); + + m_currentStorageOffset += textureSize; + m_currentStorageOffset = (m_currentStorageOffset + 65536 - 1) & ~65535; + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + dst.pResource = vramTexture; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = Texture; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.PlacedFootprint.Footprint.Depth = 1; + src.PlacedFootprint.Footprint.Width = m_textures[i].GetWidth(); + src.PlacedFootprint.Footprint.Height = m_textures[i].GetHeight(); + src.PlacedFootprint.Footprint.RowPitch = m_textures[i].GetWidth() * 4; + src.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + + commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = vramTexture; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; + commandList->ResourceBarrier(1, &barrier); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Texture2D.MipLevels = 1; + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); + + // TODO : Correctly define sampler + D3D12_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + Handle = m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateSampler(&samplerDesc, Handle); + + commandList->Close(); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); + + usedTexture++; + } + + return usedTexture; +} + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.h b/rpcs3/Emu/RSX/D3D12/D3D12Texture.h new file mode 100644 index 0000000000..6f70f09bee --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.h @@ -0,0 +1 @@ +#pragma once diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 88d174a108..8dccd27240 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -46,6 +46,7 @@ + @@ -509,6 +510,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 21b05ad7e5..204f2cc488 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -986,6 +986,9 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + @@ -1867,5 +1870,8 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + \ No newline at end of file From 71b9caf65ad41082181927eaedfef7654790c5ed Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 22 May 2015 23:10:38 +0200 Subject: [PATCH 098/343] d3d12: Duplicate all resource to do real double buffering --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 372 ++++++++++++++------------ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 65 +++-- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 22 +- 3 files changed, 250 insertions(+), 209 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3fdf445576..8742b1e586 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -17,8 +17,8 @@ static void check(HRESULT hr) abort(); } -D3D12GSRender::D3D12GSRender() - : GSRender(), m_fbo(nullptr), m_PSO(nullptr) + +void D3D12GSRender::ResourceStorage::Reset() { m_currentVertexBuffersHeapOffset = 0; m_constantsBufferSize = 0; @@ -27,6 +27,119 @@ D3D12GSRender::D3D12GSRender() constantsFragmentSize = 0; m_currentStorageOffset = 0; m_currentTextureIndex = 0; + + m_commandAllocator->Reset(); + m_textureUploadCommandAllocator->Reset(); + for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) + gfxCommandList->Release(); + m_inflightCommandList.clear(); + for (ID3D12Resource *vertexBuffer : m_inflightVertexBuffers) + vertexBuffer->Release(); + m_inflightVertexBuffers.clear(); +} + +void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) +{ + // Create a global command allocator + device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); + device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); + + + // Create heap for vertex buffers + D3D12_HEAP_DESC vertexBufferHeapDesc = {}; + // 16 MB wide + vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 16; + vertexBufferHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + vertexBufferHeapDesc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; + check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexBuffersHeap))); + + + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; + + D3D12_RESOURCE_DESC resDesc = {}; + resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resDesc.Width = (UINT)1024 * 1024; + resDesc.Height = 1; + resDesc.DepthOrArraySize = 1; + resDesc.SampleDesc.Count = 1; + resDesc.MipLevels = 1; + resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + check(device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_indexBuffer) + )); + + check(device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_constantsVertexBuffer) + )); + + check(device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_constantsFragmentBuffer) + )); + + D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; + descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + descriptorHeapDesc.NumDescriptors = 1000; // For safety + descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + check(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_constantsBufferDescriptorsHeap))); + + // Scale offset buffer + // Separate constant buffer + check(device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_scaleOffsetBuffer) + )); + descriptorHeapDesc = {}; + descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + descriptorHeapDesc.NumDescriptors = 1000; // For safety + descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + check(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_scaleOffsetDescriptorHeap))); + + // Texture + D3D12_HEAP_DESC heapDescription = {}; + heapDescription.SizeInBytes = 256 * 256 * 256 * 16; + heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; + check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); + + heapDescription.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; + check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_textureStorage))); + + D3D12_DESCRIPTOR_HEAP_DESC textureDescriptorDesc = {}; + textureDescriptorDesc.NumDescriptors = 1000; // For safety + textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + textureDescriptorDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_textureDescriptorsHeap))); + + textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap))); +} + + +D3D12GSRender::D3D12GSRender() + : GSRender(), m_fbo(nullptr), m_PSO(nullptr) +{ + // Enable d3d debug layer #ifdef _DEBUG Microsoft::WRL::ComPtr debugInterface; @@ -38,7 +151,7 @@ D3D12GSRender::D3D12GSRender() check(CreateDXGIFactory(IID_PPV_ARGS(&dxgiFactory))); // Create adapter IDXGIAdapter* adaptater = nullptr; - check(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&adaptater))); +// check(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&adaptater))); check(D3D12CreateDevice(adaptater, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); // Queues @@ -48,10 +161,6 @@ D3D12GSRender::D3D12GSRender() check(m_device->CreateCommandQueue(©QueueDesc, IID_PPV_ARGS(&m_commandQueueCopy))); check(m_device->CreateCommandQueue(&graphicQueueDesc, IID_PPV_ARGS(&m_commandQueueGraphic))); - // Create a global command allocator - m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); - m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); - m_frame = GetGSFrame(); // Create swap chain and put them in a descriptor heap as rendertarget @@ -66,8 +175,8 @@ D3D12GSRender::D3D12GSRender() swapChain.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; check(dxgiFactory->CreateSwapChain(m_commandQueueGraphic, &swapChain, (IDXGISwapChain**)&m_swapChain)); - m_swapChain->GetBuffer(0, IID_PPV_ARGS(&m_backBuffer[0])); - m_swapChain->GetBuffer(1, IID_PPV_ARGS(&m_backBuffer[1])); + m_swapChain->GetBuffer(0, IID_PPV_ARGS(&m_perFrameStorage[0].m_backBuffer)); + m_swapChain->GetBuffer(1, IID_PPV_ARGS(&m_perFrameStorage[1].m_backBuffer)); D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; heapDesc.NumDescriptors = 1; @@ -75,81 +184,10 @@ D3D12GSRender::D3D12GSRender() D3D12_RENDER_TARGET_VIEW_DESC rttDesc = {}; rttDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rttDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[0])); - m_device->CreateRenderTargetView(m_backBuffer[0], &rttDesc, m_backbufferAsRendertarget[0]->GetCPUDescriptorHandleForHeapStart()); - m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[1])); - m_device->CreateRenderTargetView(m_backBuffer[1], &rttDesc, m_backbufferAsRendertarget[1]->GetCPUDescriptorHandleForHeapStart()); - - // Create heap for vertex buffers - D3D12_HEAP_DESC vertexBufferHeapDesc = {}; - // 16 MB wide - vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 16; - vertexBufferHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - vertexBufferHeapDesc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; - check(m_device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexBuffersHeap))); - - - D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; - - D3D12_RESOURCE_DESC resDesc = {}; - resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = (UINT) 1024 * 1024; - resDesc.Height = 1; - resDesc.DepthOrArraySize = 1; - resDesc.SampleDesc.Count = 1; - resDesc.MipLevels = 1; - resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - check(m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_indexBuffer) - )); - - check(m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_constantsVertexBuffer) - )); - - check(m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_constantsFragmentBuffer) - )); - - D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; - descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - descriptorHeapDesc.NumDescriptors = 1000; // For safety - descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - check(m_device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_constantsBufferDescriptorsHeap))); - - // Scale offset buffer - // Separate constant buffer - check(m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_scaleOffsetBuffer) - )); - descriptorHeapDesc = {}; - descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - descriptorHeapDesc.NumDescriptors = 1000; // For safety - descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - check(m_device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_scaleOffsetDescriptorHeap))); - + m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_perFrameStorage[0].m_backbufferAsRendertarget)); + m_device->CreateRenderTargetView(m_perFrameStorage[0].m_backBuffer, &rttDesc, m_perFrameStorage[0].m_backbufferAsRendertarget->GetCPUDescriptorHandleForHeapStart()); + m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_perFrameStorage[1].m_backbufferAsRendertarget)); + m_device->CreateRenderTargetView(m_perFrameStorage[1].m_backBuffer, &rttDesc, m_perFrameStorage[1].m_backbufferAsRendertarget->GetCPUDescriptorHandleForHeapStart()); // Common root signature D3D12_DESCRIPTOR_RANGE descriptorRange[4] = {}; @@ -201,30 +239,18 @@ D3D12GSRender::D3D12GSRender() rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)); - // Texture - D3D12_HEAP_DESC heapDescription = {}; - heapDescription.SizeInBytes = 256 * 256 * 256 * 16; - heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; - check(m_device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); + m_perFrameStorage[0].Init(m_device); + m_perFrameStorage[0].Reset(); + m_perFrameStorage[1].Init(m_device); + m_perFrameStorage[1].Reset(); - heapDescription.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; - check(m_device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_textureStorage))); - - D3D12_DESCRIPTOR_HEAP_DESC textureDescriptorDesc = {}; - textureDescriptorDesc.NumDescriptors = 1000; // For safety - textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - textureDescriptorDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - check(m_device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_textureDescriptorsHeap))); - - textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; - check(m_device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap))); + m_currentResourceStorageIndex = m_swapChain->GetCurrentBackBufferIndex(); } D3D12GSRender::~D3D12GSRender() { // NOTE: Should be released only if no command are in flight ! - m_commandAllocator->Release(); +/* m_commandAllocator->Release(); m_commandQueueGraphic->Release(); m_commandQueueCopy->Release(); m_backbufferAsRendertarget[0]->Release(); @@ -247,7 +273,17 @@ D3D12GSRender::~D3D12GSRender() m_backBuffer[0]->Release(); m_backBuffer[1]->Release(); m_swapChain->Release(); - m_device->Release(); + m_device->Release();*/ +} + +D3D12GSRender::ResourceStorage &D3D12GSRender::getCurrentResourceStorage() +{ + return m_perFrameStorage[m_currentResourceStorageIndex]; +} + +D3D12GSRender::ResourceStorage &D3D12GSRender::getNonCurrentResourceStorage() +{ + return m_perFrameStorage[1 - m_currentResourceStorageIndex]; } void D3D12GSRender::Close() @@ -301,8 +337,8 @@ void D3D12GSRender::ExecCMD(u32 cmd) InitDrawBuffers(); ID3D12GraphicsCommandList *commandList; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); - m_inflightCommandList.push_back(commandList); + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); /* if (m_set_color_mask) { @@ -390,8 +426,8 @@ std::vector D3D12GSRender::EnableVertexData(bool index vertexBufferDesc.MipLevels = 1; vertexBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; check(m_device->CreatePlacedResource( - m_vertexBuffersHeap, - m_currentVertexBuffersHeapOffset, + getCurrentResourceStorage().m_vertexBuffersHeap, + getCurrentResourceStorage().m_currentVertexBuffersHeapOffset, &vertexBufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -401,7 +437,7 @@ std::vector D3D12GSRender::EnableVertexData(bool index check(vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); memcpy((char*)bufferMap + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); vertexBuffer->Unmap(0, nullptr); - m_inflightVertexBuffers.push_back(vertexBuffer); + getCurrentResourceStorage().m_inflightVertexBuffers.push_back(vertexBuffer); D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); @@ -410,7 +446,7 @@ std::vector D3D12GSRender::EnableVertexData(bool index result.push_back(vertexBufferView); // 65536 alignment - m_currentVertexBuffersHeapOffset += (subBufferSize + 65536 - 1) & ~65535; + getCurrentResourceStorage().m_currentVertexBuffersHeapOffset += (subBufferSize + 65536 - 1) & ~65535; } if (indexed_draw) @@ -461,10 +497,10 @@ std::vector D3D12GSRender::EnableVertexData(bool index if (m_forcedIndexBuffer) { unsigned short *bufferMap; - check(m_indexBuffer->Map(0, nullptr, (void**)&bufferMap)); + check(getCurrentResourceStorage().m_indexBuffer->Map(0, nullptr, (void**)&bufferMap)); memcpy(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size()); - m_indexBufferCount = 0; + getCurrentResourceStorage().m_indexBufferCount = 0; // QUADS for (unsigned i = 0; i < m_draw_array_count / 4; i++) { @@ -475,9 +511,9 @@ std::vector D3D12GSRender::EnableVertexData(bool index bufferMap[6 * i + 3] = 4 * i; bufferMap[6 * i + 4] = 4 * i + 2; bufferMap[6 * i + 5] = 4 * i + 3; - m_indexBufferCount += 6; + getCurrentResourceStorage().m_indexBufferCount += 6; } - m_indexBuffer->Unmap(0, nullptr); + getCurrentResourceStorage().m_indexBuffer->Unmap(0, nullptr); } return result; } @@ -506,27 +542,27 @@ void D3D12GSRender::setScaleOffset() scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; void *scaleOffsetMap; - size_t offset = m_currentScaleOffsetBufferIndex * 256; + size_t offset = getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * 256; D3D12_RANGE range = { offset, 1024 * 1024 - offset }; - check(m_scaleOffsetBuffer->Map(0, &range, &scaleOffsetMap)); + check(getCurrentResourceStorage().m_scaleOffsetBuffer->Map(0, &range, &scaleOffsetMap)); memcpy((char*)scaleOffsetMap + offset, scaleOffsetMat, 16 * sizeof(float)); - m_scaleOffsetBuffer->Unmap(0, &range); + getCurrentResourceStorage().m_scaleOffsetBuffer->Unmap(0, &range); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = m_scaleOffsetBuffer->GetGPUVirtualAddress() + offset; + constantBufferViewDesc.BufferLocation = getCurrentResourceStorage().m_scaleOffsetBuffer->GetGPUVirtualAddress() + offset; constantBufferViewDesc.SizeInBytes = (UINT)256; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); } void D3D12GSRender::FillVertexShaderConstantsBuffer() { void *constantsBufferMap; - check(m_constantsVertexBuffer->Map(0, nullptr, &constantsBufferMap)); + check(getCurrentResourceStorage().m_constantsVertexBuffer->Map(0, nullptr, &constantsBufferMap)); for (const RSXTransformConstant& c : m_transform_constants) { @@ -534,17 +570,17 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() float vector[] = { c.x, c.y, c.z, c.w }; memcpy((char*)constantsBufferMap + offset, vector, 4 * sizeof(float)); size_t bufferSizeCandidate = offset + 4 * sizeof(float); - m_constantsBufferSize = bufferSizeCandidate > m_constantsBufferSize ? bufferSizeCandidate : m_constantsBufferSize; + getCurrentResourceStorage().m_constantsBufferSize = bufferSizeCandidate > getCurrentResourceStorage().m_constantsBufferSize ? bufferSizeCandidate : getCurrentResourceStorage().m_constantsBufferSize; } - m_constantsVertexBuffer->Unmap(0, nullptr); + getCurrentResourceStorage().m_constantsVertexBuffer->Unmap(0, nullptr); // make it multiple of 256 bytes - m_constantsBufferSize = (m_constantsBufferSize + 255) & ~255; + getCurrentResourceStorage().m_constantsBufferSize = (getCurrentResourceStorage().m_constantsBufferSize + 255) & ~255; D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = m_constantsVertexBuffer->GetGPUVirtualAddress(); - constantBufferViewDesc.SizeInBytes = (UINT)m_constantsBufferSize; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + constantBufferViewDesc.BufferLocation = getCurrentResourceStorage().m_constantsVertexBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.SizeInBytes = (UINT)getCurrentResourceStorage().m_constantsBufferSize; + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); } @@ -555,7 +591,7 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() size_t offset = 0; void *constantsBufferMap; - check(m_constantsFragmentBuffer->Map(0, nullptr, &constantsBufferMap)); + check(getCurrentResourceStorage().m_constantsFragmentBuffer->Map(0, nullptr, &constantsBufferMap)); for (size_t offsetInFP : fragmentOffset) { u32 vector[4]; @@ -583,21 +619,21 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() vector[3] = c3; } - memcpy((char*)constantsBufferMap + constantsFragmentSize + offset, vector, 4 * sizeof(u32)); + memcpy((char*)constantsBufferMap + getCurrentResourceStorage().constantsFragmentSize + offset, vector, 4 * sizeof(u32)); offset += 4 * sizeof(u32); } - m_constantsFragmentBuffer->Unmap(0, nullptr); + getCurrentResourceStorage().m_constantsFragmentBuffer->Unmap(0, nullptr); // Multiple of 256 offset = (offset + 255) & ~255; D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = m_constantsFragmentBuffer->GetGPUVirtualAddress() + constantsFragmentSize; + constantBufferViewDesc.BufferLocation = getCurrentResourceStorage().m_constantsFragmentBuffer->GetGPUVirtualAddress() + getCurrentResourceStorage().constantsFragmentSize; constantBufferViewDesc.SizeInBytes = (UINT)offset; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - constantsFragmentSize += offset; + getCurrentResourceStorage().constantsFragmentSize += offset; } @@ -658,8 +694,8 @@ bool D3D12GSRender::LoadProgram() void D3D12GSRender::ExecCMD() { ID3D12GraphicsCommandList *commandList; - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); - m_inflightCommandList.push_back(commandList); + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); commandList->SetGraphicsRootSignature(m_rootSignature); @@ -675,8 +711,8 @@ void D3D12GSRender::ExecCMD() if (m_forcedIndexBuffer) { D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; - indexBufferView.SizeInBytes = (UINT)m_indexBufferCount * sizeof(unsigned short); - indexBufferView.BufferLocation = m_indexBuffer->GetGPUVirtualAddress(); + indexBufferView.SizeInBytes = (UINT)getCurrentResourceStorage().m_indexBufferCount * sizeof(unsigned short); + indexBufferView.BufferLocation = getCurrentResourceStorage().m_indexBuffer->GetGPUVirtualAddress(); indexBufferView.Format = DXGI_FORMAT_R16_UINT; commandList->IASetIndexBuffer(&indexBufferView); } @@ -691,36 +727,36 @@ void D3D12GSRender::ExecCMD() // Constants setScaleOffset(); - commandList->SetDescriptorHeaps(1, &m_scaleOffsetDescriptorHeap); - D3D12_GPU_DESCRIPTOR_HANDLE Handle = m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_scaleOffsetDescriptorHeap); + D3D12_GPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetGraphicsRootDescriptorTable(0, Handle); - m_currentScaleOffsetBufferIndex++; + getCurrentResourceStorage().m_currentScaleOffsetBufferIndex++; - size_t currentBufferIndex = m_constantsBufferIndex; + size_t currentBufferIndex = getCurrentResourceStorage().m_constantsBufferIndex; FillVertexShaderConstantsBuffer(); - m_constantsBufferIndex++; + getCurrentResourceStorage().m_constantsBufferIndex++; FillPixelShaderConstantsBuffer(); - m_constantsBufferIndex++; + getCurrentResourceStorage().m_constantsBufferIndex++; - commandList->SetDescriptorHeaps(1, &m_constantsBufferDescriptorsHeap); - Handle = m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_constantsBufferDescriptorsHeap); + Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); Handle.ptr += currentBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetGraphicsRootDescriptorTable(1, Handle); commandList->SetPipelineState(m_PSO); size_t usedTexture = UploadTextures(); - Handle = m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetDescriptorHeaps(1, &m_textureDescriptorsHeap); + Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_textureDescriptorsHeap); commandList->SetGraphicsRootDescriptorTable(2, Handle); - Handle = m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - commandList->SetDescriptorHeaps(1, &m_samplerDescriptorHeap); + Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_samplerDescriptorHeap); commandList->SetGraphicsRootDescriptorTable(3, Handle); - m_currentTextureIndex += usedTexture; + getCurrentResourceStorage().m_currentTextureIndex += usedTexture; InitDrawBuffers(); @@ -809,7 +845,7 @@ void D3D12GSRender::ExecCMD() } if (m_forcedIndexBuffer) - commandList->DrawIndexedInstanced((UINT)m_indexBufferCount, 1, 0, (UINT)m_draw_array_first, 0); + commandList->DrawIndexedInstanced((UINT)getCurrentResourceStorage().m_indexBufferCount, 1, 0, (UINT)m_draw_array_first, 0); else if (m_draw_array_count) commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); @@ -1140,8 +1176,8 @@ void D3D12GSRender::ExecCMD() void D3D12GSRender::Flip() { ID3D12GraphicsCommandList *commandList; - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); - m_inflightCommandList.push_back(commandList); + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); switch (m_surface_color_target) { @@ -1153,7 +1189,7 @@ void D3D12GSRender::Flip() { D3D12_RESOURCE_BARRIER barriers[2] = {}; barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[0].Transition.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; + barriers[0].Transition.pResource = getCurrentResourceStorage().m_backBuffer; barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; @@ -1166,7 +1202,7 @@ void D3D12GSRender::Flip() D3D12_TEXTURE_COPY_LOCATION src = {}, dst = {}; src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; src.SubresourceIndex = 0, dst.SubresourceIndex = 0; - src.pResource = m_fbo->getRenderTargetTexture(0), dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; + src.pResource = m_fbo->getRenderTargetTexture(0), dst.pResource = getCurrentResourceStorage().m_backBuffer; D3D12_BOX box = { 0, 0, 0, RSXThread::m_width, RSXThread::m_height, 1 }; commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, &box); @@ -1190,21 +1226,9 @@ void D3D12GSRender::Flip() m_commandQueueGraphic->Signal(fence.Get(), 1); WaitForSingleObject(gfxqueuecompletion, INFINITE); CloseHandle(gfxqueuecompletion); - m_commandAllocator->Reset(); - m_textureUploadCommandAllocator->Reset(); - for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) - gfxCommandList->Release(); - m_inflightCommandList.clear(); - for (ID3D12Resource *vertexBuffer : m_inflightVertexBuffers) - vertexBuffer->Release(); - m_inflightVertexBuffers.clear(); - m_currentVertexBuffersHeapOffset = 0; - m_constantsBufferSize = 0; - m_constantsBufferIndex = 0; - m_currentScaleOffsetBufferIndex = 0; - constantsFragmentSize = 0; - m_currentStorageOffset = 0; - m_currentTextureIndex = 0; + getNonCurrentResourceStorage().Reset(); + m_currentResourceStorageIndex = 1 - m_currentResourceStorageIndex; + m_frame->Flip(nullptr); } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index aee262ec41..eccf6da7c4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -50,42 +50,59 @@ private: ID3D12PipelineState *m_PSO; ID3D12RootSignature *m_rootSignature; - ID3D12CommandAllocator *m_textureUploadCommandAllocator; - ID3D12Heap *m_uploadTextureHeap, *m_textureStorage; - size_t m_currentStorageOffset; - ID3D12DescriptorHeap *m_textureDescriptorsHeap; - ID3D12DescriptorHeap *m_samplerDescriptorHeap; - size_t m_currentTextureIndex; + struct ResourceStorage + { + ID3D12CommandAllocator *m_commandAllocator; + std::list m_inflightCommandList; + // Vertex storage + size_t m_currentVertexBuffersHeapOffset; + std::vector m_inflightVertexBuffers; + ID3D12Heap *m_vertexBuffersHeap; + size_t m_indexBufferCount; + ID3D12Resource *m_indexBuffer; + + // Constants storage + ID3D12Resource *m_constantsVertexBuffer, *m_constantsFragmentBuffer; + size_t constantsFragmentSize; + ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; + size_t m_constantsBufferSize, m_constantsBufferIndex; + ID3D12Resource *m_scaleOffsetBuffer; + ID3D12DescriptorHeap *m_scaleOffsetDescriptorHeap; + size_t m_currentScaleOffsetBufferIndex; + + // Texture storage + ID3D12CommandAllocator *m_textureUploadCommandAllocator; + ID3D12Heap *m_uploadTextureHeap, *m_textureStorage; + size_t m_currentStorageOffset; + ID3D12DescriptorHeap *m_textureDescriptorsHeap; + ID3D12DescriptorHeap *m_samplerDescriptorHeap; + size_t m_currentTextureIndex; + + //BackBuffers + ID3D12Resource* m_backBuffer; + ID3D12DescriptorHeap *m_backbufferAsRendertarget; + + void Reset(); + void Init(ID3D12Device *device); + }; + + ResourceStorage m_perFrameStorage[2]; bool m_forcedIndexBuffer; - size_t m_currentVertexBuffersHeapOffset; - std::vector m_inflightVertexBuffers; - ID3D12Heap *m_vertexBuffersHeap; - size_t m_indexBufferCount; - ID3D12Resource *m_indexBuffer; - ID3D12Resource *m_constantsVertexBuffer, *m_constantsFragmentBuffer; - size_t constantsFragmentSize; - ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; - size_t m_constantsBufferSize, m_constantsBufferIndex; - - ID3D12Resource *m_scaleOffsetBuffer; - ID3D12DescriptorHeap *m_scaleOffsetDescriptorHeap; - size_t m_currentScaleOffsetBufferIndex; std::vector m_IASet; D3D12RenderTargetSets *m_fbo; ID3D12Device* m_device; ID3D12CommandQueue *m_commandQueueCopy; ID3D12CommandQueue *m_commandQueueGraphic; - ID3D12CommandAllocator *m_commandAllocator; - std::list m_inflightCommandList; - struct IDXGISwapChain3 *m_swapChain; - ID3D12Resource* m_backBuffer[2]; - ID3D12DescriptorHeap *m_backbufferAsRendertarget[2]; + struct IDXGISwapChain3 *m_swapChain; size_t m_lastWidth, m_lastHeight, m_lastDepth; + size_t m_currentResourceStorageIndex; + ResourceStorage& getCurrentResourceStorage(); + ResourceStorage& getNonCurrentResourceStorage(); public: GSFrameBase2 *m_frame; u32 m_draw_frames; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 7cfe423087..17085b1a23 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -19,7 +19,7 @@ size_t D3D12GSRender::UploadTextures() // Upload at each iteration to take advantage of overlapping transfer ID3D12GraphicsCommandList *commandList; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); ID3D12Resource *Texture, *vramTexture; size_t textureSize = m_textures[i].GetWidth() * m_textures[i].GetHeight() * 4; @@ -32,8 +32,8 @@ size_t D3D12GSRender::UploadTextures() textureDesc.MipLevels = 1; textureDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; check(m_device->CreatePlacedResource( - m_uploadTextureHeap, - m_currentStorageOffset, + getCurrentResourceStorage().m_uploadTextureHeap, + getCurrentResourceStorage().m_currentStorageOffset, &textureDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -56,16 +56,16 @@ size_t D3D12GSRender::UploadTextures() vramTextureDesc.SampleDesc.Count = 1; vramTextureDesc.MipLevels = 1; check(m_device->CreatePlacedResource( - m_textureStorage, - m_currentStorageOffset, + getCurrentResourceStorage().m_textureStorage, + getCurrentResourceStorage().m_currentStorageOffset, &vramTextureDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&vramTexture) )); - m_currentStorageOffset += textureSize; - m_currentStorageOffset = (m_currentStorageOffset + 65536 - 1) & ~65535; + getCurrentResourceStorage().m_currentStorageOffset += textureSize; + getCurrentResourceStorage().m_currentStorageOffset = (getCurrentResourceStorage().m_currentStorageOffset + 65536 - 1) & ~65535; D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; dst.pResource = vramTexture; @@ -92,8 +92,8 @@ size_t D3D12GSRender::UploadTextures() srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; srvDesc.Texture2D.MipLevels = 1; srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); // TODO : Correctly define sampler @@ -102,8 +102,8 @@ size_t D3D12GSRender::UploadTextures() samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - Handle = m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateSampler(&samplerDesc, Handle); commandList->Close(); From 727f54dd327947aeab79c2f108a89f4b1973f3a5 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 22 May 2015 23:24:49 +0200 Subject: [PATCH 099/343] d3d12: Record command while gpu is busy rendering previous frame + cleaning fixes --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 63 +++++++++++++++------------ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 ++ 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 8742b1e586..4b694c9065 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -40,11 +40,11 @@ void D3D12GSRender::ResourceStorage::Reset() void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) { + m_queueCompletion = 0; // Create a global command allocator device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); - // Create heap for vertex buffers D3D12_HEAP_DESC vertexBufferHeapDesc = {}; // 16 MB wide @@ -135,6 +135,27 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap))); } +void D3D12GSRender::ResourceStorage::Release() +{ + // NOTE: Should be released only if no command are in flight ! + m_backbufferAsRendertarget->Release(); + m_constantsBufferDescriptorsHeap->Release(); + m_scaleOffsetDescriptorHeap->Release(); + m_constantsVertexBuffer->Release(); + m_constantsFragmentBuffer->Release(); + m_scaleOffsetBuffer->Release(); + m_vertexBuffersHeap->Release(); + m_backBuffer->Release(); + for (auto tmp : m_inflightVertexBuffers) + tmp->Release(); + m_textureDescriptorsHeap->Release(); + m_textureStorage->Release(); + m_uploadTextureHeap->Release(); + m_samplerDescriptorHeap->Release(); + for (auto tmp : m_inflightCommandList) + tmp->Release(); + m_commandAllocator->Release(); +} D3D12GSRender::D3D12GSRender() : GSRender(), m_fbo(nullptr), m_PSO(nullptr) @@ -249,31 +270,15 @@ D3D12GSRender::D3D12GSRender() D3D12GSRender::~D3D12GSRender() { - // NOTE: Should be released only if no command are in flight ! -/* m_commandAllocator->Release(); + m_perFrameStorage[0].Release(); + m_perFrameStorage[1].Release(); m_commandQueueGraphic->Release(); m_commandQueueCopy->Release(); - m_backbufferAsRendertarget[0]->Release(); - m_backbufferAsRendertarget[1]->Release(); - m_constantsBufferDescriptorsHeap->Release(); - m_scaleOffsetDescriptorHeap->Release(); - m_constantsVertexBuffer->Release(); - m_constantsFragmentBuffer->Release(); - m_scaleOffsetBuffer->Release(); - m_vertexBuffersHeap->Release(); if (m_fbo) delete m_fbo; - for (auto tmp : m_inflightVertexBuffers) - tmp->Release(); - m_textureDescriptorsHeap->Release(); - m_textureStorage->Release(); - m_uploadTextureHeap->Release(); - m_samplerDescriptorHeap->Release(); m_rootSignature->Release(); - m_backBuffer[0]->Release(); - m_backBuffer[1]->Release(); m_swapChain->Release(); - m_device->Release();*/ + m_device->Release(); } D3D12GSRender::ResourceStorage &D3D12GSRender::getCurrentResourceStorage() @@ -1217,16 +1222,20 @@ void D3D12GSRender::Flip() } check(m_swapChain->Present(Ini.GSVSyncEnable.GetValue() ? 1 : 0, 0)); - // Wait execution is over - // TODO: It's suboptimal, we should use 2 command allocator + // Add an event signaling queue completion Microsoft::WRL::ComPtr fence; m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)); - HANDLE gfxqueuecompletion = CreateEvent(0, 0, 0, 0); - fence->SetEventOnCompletion(1, gfxqueuecompletion); + getCurrentResourceStorage().m_queueCompletion = CreateEvent(0, 0, 0, 0); + fence->SetEventOnCompletion(1, getCurrentResourceStorage().m_queueCompletion); m_commandQueueGraphic->Signal(fence.Get(), 1); - WaitForSingleObject(gfxqueuecompletion, INFINITE); - CloseHandle(gfxqueuecompletion); - getNonCurrentResourceStorage().Reset(); + + // Wait execution is over + if (getNonCurrentResourceStorage().m_queueCompletion) + { + WaitForSingleObject(getNonCurrentResourceStorage().m_queueCompletion, INFINITE); + CloseHandle(getNonCurrentResourceStorage().m_queueCompletion); + getNonCurrentResourceStorage().Reset(); + } m_currentResourceStorageIndex = 1 - m_currentResourceStorageIndex; m_frame->Flip(nullptr); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index eccf6da7c4..197f482d88 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -83,8 +83,12 @@ private: ID3D12Resource* m_backBuffer; ID3D12DescriptorHeap *m_backbufferAsRendertarget; + // Fence + HANDLE m_queueCompletion; + void Reset(); void Init(ID3D12Device *device); + void Release(); }; ResourceStorage m_perFrameStorage[2]; From 312ff7e8f5bcb078a4809ff5013260fff33a7b07 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 00:04:42 +0200 Subject: [PATCH 100/343] RSX: Fix for default value of temp reg --- rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp | 2 +- rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index 7a457bb15d..5bf8341ba5 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -92,7 +92,7 @@ std::string FragmentProgramDecompiler::GetMask() std::string FragmentProgramDecompiler::AddReg(u32 index, int fp16) { - return m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), std::string(fp16 ? "h" : "r") + std::to_string(index), getFloatTypeName(4) + "(0.0)"); + return m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), std::string(fp16 ? "h" : "r") + std::to_string(index), getFloatTypeName(4) + "(0., 0., 0., 0.)"); } bool FragmentProgramDecompiler::HasReg(u32 index, int fp16) diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp index 3c8792611c..b3a8c0a9c7 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp @@ -152,7 +152,7 @@ void VertexProgramDecompiler::SetDST(bool is_sca, std::string value) if (d0.cond_update_enable_0 && d0.cond_update_enable_1) { - dest = m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "cc" + std::to_string(d0.cond_reg_sel_1), getFloatTypeName(4) + "(0.0)") + mask; + dest = m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "cc" + std::to_string(d0.cond_reg_sel_1), getFloatTypeName(4) + "(0., 0., 0., 0.)") + mask; } else if (d3.dst != 0x1f || (is_sca ? d3.sca_dst_tmp != 0x3f : d0.dst_tmp != 0x3f)) { From 4a89432d39eccd8eb3fdf26420fa39f869e1a8c8 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 00:45:37 +0200 Subject: [PATCH 101/343] d3d12: Implement blend decoding --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 82 ++++++++++++++++++++---- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 17 +---- 2 files changed, 72 insertions(+), 27 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 4b694c9065..ea56f54f73 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -4,6 +4,9 @@ #include #include +// Some constants are the same between RSX and GL +#include + GetGSFrameCb2 GetGSFrame = nullptr; void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) @@ -589,6 +592,32 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); } +static +D3D12_BLEND_OP getBlendOp() +{ + return D3D12_BLEND_OP_ADD; +} + +static +D3D12_BLEND getBlendFactor(u16 glFactor) +{ + switch (glFactor) + { + default: LOG_WARNING(RSX, "Unsupported Blend Op %d", glFactor); + case GL_ZERO: return D3D12_BLEND_ZERO; + case GL_ONE: return D3D12_BLEND_ONE; + case GL_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; + case GL_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; + case GL_DST_COLOR: return D3D12_BLEND_DEST_COLOR; + case GL_ONE_MINUS_DST_COLOR: D3D12_BLEND_INV_DEST_COLOR; + case GL_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; + case GL_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; + case GL_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; + case GL_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; + case GL_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + } +} + void D3D12GSRender::FillPixelShaderConstantsBuffer() { // Get constant from fragment program @@ -691,7 +720,47 @@ bool D3D12GSRender::LoadProgram() prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; } + + static D3D12_BLEND_DESC CD3D12_BLEND_DESC = + { + FALSE, + FALSE, + { + FALSE,FALSE, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, + } + }; + prop.Blend = CD3D12_BLEND_DESC; + + if (m_set_blend_equation) + { +// glBlendEquationSeparate(m_blend_equation_rgb, m_blend_equation_alpha); +// checkForGlError("glBlendEquationSeparate"); + } + + if (m_set_blend_sfactor && m_set_blend_dfactor) + { + prop.Blend.RenderTarget[0].BlendEnable = true; + prop.Blend.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; + prop.Blend.RenderTarget[0].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); + prop.Blend.RenderTarget[0].DestBlend = getBlendFactor(m_blend_dfactor_rgb); + prop.Blend.RenderTarget[0].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); + prop.Blend.RenderTarget[0].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); + prop.Blend.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; + } + + if (m_set_blend_color) + { +// glBlendColor(m_blend_color_r, m_blend_color_g, m_blend_color_b, m_blend_color_a); +// checkForGlError("glBlendColor"); + } + prop.IASet = m_IASet; + + m_PSO = m_cachePSO.getGraphicPipelineState(m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignature)); return m_PSO != nullptr; } @@ -803,19 +872,6 @@ void D3D12GSRender::ExecCMD() }; commandList->RSSetScissorRects(1, &box); - /* - #define GL_POINTS 0x0000 - #define GL_LINES 0x0001 - #define GL_LINE_LOOP 0x0002 - #define GL_LINE_STRIP 0x0003 - #define GL_TRIANGLES 0x0004 - #define GL_TRIANGLE_STRIP 0x0005 - #define GL_TRIANGLE_FAN 0x0006 - #define GL_QUADS 0x0007 - #define GL_QUAD_STRIP 0x0008 - #define GL_POLYGON 0x0009 - */ - bool requireIndexBuffer = false; switch (m_draw_mode - 1) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index e03bd038f8..ea6b9925cf 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -14,9 +14,11 @@ struct D3D12PipelineProperties { D3D12_PRIMITIVE_TOPOLOGY_TYPE Topology; std::vector IASet; + D3D12_BLEND_DESC Blend; bool operator==(const D3D12PipelineProperties &in) const { + // TODO: blend and IASet equality return Topology == in.Topology; } }; @@ -136,20 +138,7 @@ struct D3D12Traits D3D12_DEFAULT_STENCIL_WRITE_MASK, }; - static D3D12_BLEND_DESC CD3D12_BLEND_DESC = - { - FALSE, - FALSE, - { - FALSE,FALSE, - D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, - D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, - D3D12_LOGIC_OP_NOOP, - D3D12_COLOR_WRITE_ENABLE_ALL, - } - }; - - graphicPipelineStateDesc.BlendState = CD3D12_BLEND_DESC; + graphicPipelineStateDesc.BlendState = pipelineProperties.Blend; graphicPipelineStateDesc.DepthStencilState = CD3D12_DEPTH_STENCIL_DESC; graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; graphicPipelineStateDesc.PrimitiveTopologyType = pipelineProperties.Topology; From e6146e4ecbbde1af59ed0e5e60169a6ecf13855b Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 01:18:10 +0200 Subject: [PATCH 102/343] d3d12: Fix single channel texture Should now display font properly --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 41 +++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 17085b1a23..d1f7d5ff93 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -16,13 +16,29 @@ size_t D3D12GSRender::UploadTextures() for (u32 i = 0; i < m_textures_count; ++i) { if (!m_textures[i].IsEnabled()) continue; + size_t w = m_textures[i].GetWidth(), h = m_textures[i].GetHeight(); // Upload at each iteration to take advantage of overlapping transfer ID3D12GraphicsCommandList *commandList; check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + DXGI_FORMAT dxgiFormat; + size_t pixelSize; + int format = m_textures[i].GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + switch (format) + { + default: + dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + pixelSize = 4; + break; + case CELL_GCM_TEXTURE_B8: + dxgiFormat = DXGI_FORMAT_R8_UNORM; + pixelSize = 1; + break; + } + ID3D12Resource *Texture, *vramTexture; - size_t textureSize = m_textures[i].GetWidth() * m_textures[i].GetHeight() * 4; + size_t textureSize = w * h * 4; D3D12_RESOURCE_DESC textureDesc = {}; textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; textureDesc.Width = textureSize; @@ -31,6 +47,7 @@ size_t D3D12GSRender::UploadTextures() textureDesc.SampleDesc.Count = 1; textureDesc.MipLevels = 1; textureDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + check(m_device->CreatePlacedResource( getCurrentResourceStorage().m_uploadTextureHeap, getCurrentResourceStorage().m_currentStorageOffset, @@ -44,14 +61,22 @@ size_t D3D12GSRender::UploadTextures() auto pixels = vm::get_ptr(texaddr); void *textureData; check(Texture->Map(0, nullptr, (void**)&textureData)); - memcpy(textureData, pixels, textureSize); + + // Multiple of 256 + size_t rowPitch = m_textures[i].GetWidth() * pixelSize; + rowPitch = (rowPitch + 255) & ~255; + // Upload with correct rowpitch + for (unsigned row = 0; row < m_textures[i].GetHeight(); row++) + { + memcpy((char*)textureData + row * rowPitch, pixels + row * m_textures[i].m_pitch, m_textures[i].m_pitch); + } Texture->Unmap(0, nullptr); D3D12_RESOURCE_DESC vramTextureDesc = {}; vramTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; vramTextureDesc.Width = m_textures[i].GetWidth(); vramTextureDesc.Height = m_textures[i].GetHeight(); - vramTextureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + vramTextureDesc.Format = dxgiFormat; vramTextureDesc.DepthOrArraySize = 1; vramTextureDesc.SampleDesc.Count = 1; vramTextureDesc.MipLevels = 1; @@ -67,6 +92,8 @@ size_t D3D12GSRender::UploadTextures() getCurrentResourceStorage().m_currentStorageOffset += textureSize; getCurrentResourceStorage().m_currentStorageOffset = (getCurrentResourceStorage().m_currentStorageOffset + 65536 - 1) & ~65535; + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; dst.pResource = vramTexture; dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; @@ -75,8 +102,8 @@ size_t D3D12GSRender::UploadTextures() src.PlacedFootprint.Footprint.Depth = 1; src.PlacedFootprint.Footprint.Width = m_textures[i].GetWidth(); src.PlacedFootprint.Footprint.Height = m_textures[i].GetHeight(); - src.PlacedFootprint.Footprint.RowPitch = m_textures[i].GetWidth() * 4; - src.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + src.PlacedFootprint.Footprint.RowPitch = rowPitch; + src.PlacedFootprint.Footprint.Format = dxgiFormat; commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); @@ -89,7 +116,7 @@ size_t D3D12GSRender::UploadTextures() D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Format = dxgiFormat; srvDesc.Texture2D.MipLevels = 1; srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); @@ -98,7 +125,7 @@ size_t D3D12GSRender::UploadTextures() // TODO : Correctly define sampler D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; From 740354d2c9bdcdfca3ed136e8cc1761bfb996b86 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 01:22:16 +0200 Subject: [PATCH 103/343] d3d12: Fix depth format mismatch --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 16 ++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 5 +++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index ea56f54f73..768f5d1ffa 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -758,6 +758,22 @@ bool D3D12GSRender::LoadProgram() // checkForGlError("glBlendColor"); } + switch (m_surface_depth_format) + { + case 0: + break; + case CELL_GCM_SURFACE_Z16: + prop.DepthStencilFormat = DXGI_FORMAT_D16_UNORM; + break; + case CELL_GCM_SURFACE_Z24S8: + prop.DepthStencilFormat = DXGI_FORMAT_D24_UNORM_S8_UINT; + break; + default: + LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); + assert(0); + } +; + prop.IASet = m_IASet; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index ea6b9925cf..77b14f114b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -13,13 +13,14 @@ struct D3D12PipelineProperties { D3D12_PRIMITIVE_TOPOLOGY_TYPE Topology; + DXGI_FORMAT DepthStencilFormat; std::vector IASet; D3D12_BLEND_DESC Blend; bool operator==(const D3D12PipelineProperties &in) const { // TODO: blend and IASet equality - return Topology == in.Topology; + return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat; } }; @@ -145,7 +146,7 @@ struct D3D12Traits graphicPipelineStateDesc.NumRenderTargets = 1; graphicPipelineStateDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; - graphicPipelineStateDesc.DSVFormat = DXGI_FORMAT_D16_UNORM; + graphicPipelineStateDesc.DSVFormat = pipelineProperties.DepthStencilFormat; graphicPipelineStateDesc.InputLayout.pInputElementDescs = pipelineProperties.IASet.data(); graphicPipelineStateDesc.InputLayout.NumElements = (UINT)pipelineProperties.IASet.size(); From 1076727c7532f7396fc24694c07b99d84fd6bf3f Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 18:34:59 +0200 Subject: [PATCH 104/343] d3d12: Use a heap for scale offset and fragment buffer too --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 117 ++++++++++++++++---------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 9 +- 2 files changed, 78 insertions(+), 48 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 768f5d1ffa..f5757321fd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -27,7 +27,7 @@ void D3D12GSRender::ResourceStorage::Reset() m_constantsBufferSize = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; - constantsFragmentSize = 0; + m_constantsBuffersHeapFreeSpace = 0; m_currentStorageOffset = 0; m_currentTextureIndex = 0; @@ -36,9 +36,9 @@ void D3D12GSRender::ResourceStorage::Reset() for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) gfxCommandList->Release(); m_inflightCommandList.clear(); - for (ID3D12Resource *vertexBuffer : m_inflightVertexBuffers) + for (ID3D12Resource *vertexBuffer : m_inflightResources) vertexBuffer->Release(); - m_inflightVertexBuffers.clear(); + m_inflightResources.clear(); } void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) @@ -48,13 +48,14 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); - // Create heap for vertex buffers + // Create heap for vertex and constants buffers D3D12_HEAP_DESC vertexBufferHeapDesc = {}; // 16 MB wide vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 16; vertexBufferHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; vertexBufferHeapDesc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexBuffersHeap))); + check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_constantsBuffersHeap))); D3D12_HEAP_PROPERTIES heapProp = {}; @@ -87,31 +88,13 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) IID_PPV_ARGS(&m_constantsVertexBuffer) )); - check(device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_constantsFragmentBuffer) - )); - D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; descriptorHeapDesc.NumDescriptors = 1000; // For safety descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; check(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_constantsBufferDescriptorsHeap))); - // Scale offset buffer - // Separate constant buffer - check(device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_scaleOffsetBuffer) - )); + descriptorHeapDesc = {}; descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; descriptorHeapDesc.NumDescriptors = 1000; // For safety @@ -145,11 +128,10 @@ void D3D12GSRender::ResourceStorage::Release() m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); m_constantsVertexBuffer->Release(); - m_constantsFragmentBuffer->Release(); - m_scaleOffsetBuffer->Release(); + m_constantsBuffersHeap->Release(); m_vertexBuffersHeap->Release(); m_backBuffer->Release(); - for (auto tmp : m_inflightVertexBuffers) + for (auto tmp : m_inflightResources) tmp->Release(); m_textureDescriptorsHeap->Release(); m_textureStorage->Release(); @@ -445,7 +427,7 @@ std::vector D3D12GSRender::EnableVertexData(bool index check(vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); memcpy((char*)bufferMap + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); vertexBuffer->Unmap(0, nullptr); - getCurrentResourceStorage().m_inflightVertexBuffers.push_back(vertexBuffer); + getCurrentResourceStorage().m_inflightResources.push_back(vertexBuffer); D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); @@ -549,22 +531,44 @@ void D3D12GSRender::setScaleOffset() scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; + size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; + // 65536 alignment + constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; + + D3D12_RESOURCE_DESC resDesc = {}; + resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resDesc.Width = 256; + resDesc.Height = 1; + resDesc.DepthOrArraySize = 1; + resDesc.SampleDesc.Count = 1; + resDesc.MipLevels = 1; + resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + // Scale offset buffer + // Separate constant buffer + ID3D12Resource *scaleOffsetBuffer; + check(m_device->CreatePlacedResource( + getCurrentResourceStorage().m_constantsBuffersHeap, + constantBuffersHeapOffset, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&scaleOffsetBuffer) + )); + void *scaleOffsetMap; - size_t offset = getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * 256; - D3D12_RANGE range = { - offset, - 1024 * 1024 - offset - }; - check(getCurrentResourceStorage().m_scaleOffsetBuffer->Map(0, &range, &scaleOffsetMap)); - memcpy((char*)scaleOffsetMap + offset, scaleOffsetMat, 16 * sizeof(float)); - getCurrentResourceStorage().m_scaleOffsetBuffer->Unmap(0, &range); + check(scaleOffsetBuffer->Map(0, nullptr, &scaleOffsetMap)); + memcpy((char*)scaleOffsetMap, scaleOffsetMat, 16 * sizeof(float)); + scaleOffsetBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = getCurrentResourceStorage().m_scaleOffsetBuffer->GetGPUVirtualAddress() + offset; + constantBufferViewDesc.BufferLocation = scaleOffsetBuffer->GetGPUVirtualAddress(); constantBufferViewDesc.SizeInBytes = (UINT)256; D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); + getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 256; + getCurrentResourceStorage().m_inflightResources.push_back(scaleOffsetBuffer); } void D3D12GSRender::FillVertexShaderConstantsBuffer() @@ -622,10 +626,36 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() { // Get constant from fragment program const std::vector &fragmentOffset = m_cachePSO.getFragmentConstantOffsetsCache(m_cur_fragment_prog); + size_t bufferSize = fragmentOffset.size() * 4 * sizeof(float) + 1; + // Multiple of 256 never 0 + bufferSize = (bufferSize + 255) & ~255; + + size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; + // 65536 alignment + constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; + + D3D12_RESOURCE_DESC resDesc = {}; + resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resDesc.Width = (UINT)bufferSize; + resDesc.Height = 1; + resDesc.DepthOrArraySize = 1; + resDesc.SampleDesc.Count = 1; + resDesc.MipLevels = 1; + resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + ID3D12Resource *constantsBuffer; + check(m_device->CreatePlacedResource( + getCurrentResourceStorage().m_constantsBuffersHeap, + constantBuffersHeapOffset, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&constantsBuffer) + )); size_t offset = 0; void *constantsBufferMap; - check(getCurrentResourceStorage().m_constantsFragmentBuffer->Map(0, nullptr, &constantsBufferMap)); + check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); for (size_t offsetInFP : fragmentOffset) { u32 vector[4]; @@ -653,21 +683,20 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() vector[3] = c3; } - memcpy((char*)constantsBufferMap + getCurrentResourceStorage().constantsFragmentSize + offset, vector, 4 * sizeof(u32)); + memcpy((char*)constantsBufferMap + offset, vector, 4 * sizeof(u32)); offset += 4 * sizeof(u32); } - getCurrentResourceStorage().m_constantsFragmentBuffer->Unmap(0, nullptr); - // Multiple of 256 - offset = (offset + 255) & ~255; + constantsBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = getCurrentResourceStorage().m_constantsFragmentBuffer->GetGPUVirtualAddress() + getCurrentResourceStorage().constantsFragmentSize; - constantBufferViewDesc.SizeInBytes = (UINT)offset; + constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - getCurrentResourceStorage().constantsFragmentSize += offset; + getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + bufferSize; + getCurrentResourceStorage().m_inflightResources.push_back(constantsBuffer); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 197f482d88..0770cbb85b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -55,19 +55,20 @@ private: ID3D12CommandAllocator *m_commandAllocator; std::list m_inflightCommandList; + std::vector m_inflightResources; + // Vertex storage size_t m_currentVertexBuffersHeapOffset; - std::vector m_inflightVertexBuffers; ID3D12Heap *m_vertexBuffersHeap; size_t m_indexBufferCount; ID3D12Resource *m_indexBuffer; // Constants storage - ID3D12Resource *m_constantsVertexBuffer, *m_constantsFragmentBuffer; - size_t constantsFragmentSize; + ID3D12Resource *m_constantsVertexBuffer; + ID3D12Heap *m_constantsBuffersHeap; + size_t m_constantsBuffersHeapFreeSpace; ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; size_t m_constantsBufferSize, m_constantsBufferIndex; - ID3D12Resource *m_scaleOffsetBuffer; ID3D12DescriptorHeap *m_scaleOffsetDescriptorHeap; size_t m_currentScaleOffsetBufferIndex; From 75202ac55b5319d7c553e9b791c1a839230ecb5e Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 18:45:21 +0200 Subject: [PATCH 105/343] d3d12: properly clean textures --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index d1f7d5ff93..b910093d2e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -91,7 +91,8 @@ size_t D3D12GSRender::UploadTextures() getCurrentResourceStorage().m_currentStorageOffset += textureSize; getCurrentResourceStorage().m_currentStorageOffset = (getCurrentResourceStorage().m_currentStorageOffset + 65536 - 1) & ~65535; - + getCurrentResourceStorage().m_inflightResources.push_back(Texture); + getCurrentResourceStorage().m_inflightResources.push_back(vramTexture); D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; From 986a39fcfb64f568a8f018368e55d6dcd085c735 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 18:50:20 +0200 Subject: [PATCH 106/343] d3d12: clean upload command lists as well --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index b910093d2e..edb91c9592 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -103,7 +103,7 @@ size_t D3D12GSRender::UploadTextures() src.PlacedFootprint.Footprint.Depth = 1; src.PlacedFootprint.Footprint.Width = m_textures[i].GetWidth(); src.PlacedFootprint.Footprint.Height = m_textures[i].GetHeight(); - src.PlacedFootprint.Footprint.RowPitch = rowPitch; + src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; src.PlacedFootprint.Footprint.Format = dxgiFormat; commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); @@ -136,6 +136,7 @@ size_t D3D12GSRender::UploadTextures() commandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); + getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); usedTexture++; } From fafcce6d5f49a30260930f812ade3f6c413492c9 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 19:37:48 +0200 Subject: [PATCH 107/343] d3d12: Implement write depth buffer callback --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 87 +++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 +- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 4 + rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 1 + 4 files changed, 93 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index f5757321fd..0f532f99c1 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -303,6 +303,7 @@ void D3D12GSRender::InitDrawBuffers() } } + void D3D12GSRender::OnInit() { m_frame->Show(); @@ -983,6 +984,8 @@ void D3D12GSRender::ExecCMD() check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); + WriteDepthBuffer(); + /* if (m_set_color_mask) { glColorMask(m_color_mask_r, m_color_mask_g, m_color_mask_b, m_color_mask_a); @@ -1341,4 +1344,88 @@ void D3D12GSRender::Flip() m_frame->Flip(nullptr); } + + +void D3D12GSRender::WriteDepthBuffer() +{ + if (!m_set_context_dma_z) + return; + + u32 address = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); + + auto ptr = vm::get_ptr(address); + + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_READBACK; + D3D12_RESOURCE_DESC resdesc = {}; + resdesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resdesc.Width = RSXThread::m_width * RSXThread::m_height * 4 * 2; // * 2 for safety + resdesc.Height = 1; + resdesc.DepthOrArraySize = 1; + resdesc.SampleDesc.Count = 1; + resdesc.MipLevels = 1; + resdesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + ID3D12Resource *writeDest; + check( + m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resdesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&writeDest) + ) + ); + + ID3D12GraphicsCommandList *downloadCommandList; + check( + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) + ); + + size_t rowPitch = RSXThread::m_width * sizeof(float); + rowPitch = (rowPitch + 255) & ~255; + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.pResource = m_fbo->getDepthStencilTexture(); + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.pResource = writeDest; + src.PlacedFootprint.Offset = 0; + src.PlacedFootprint.Footprint.Depth = 1; + src.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; + src.PlacedFootprint.Footprint.Height = RSXThread::m_height; + src.PlacedFootprint.Footprint.Width = RSXThread::m_width; + src.PlacedFootprint.Footprint.RowPitch = rowPitch; + downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + downloadCommandList->Close(); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); + + //Wait for result + ID3D12Fence *fence; + check( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) + ); + HANDLE handle = CreateEvent(0, FALSE, FALSE, 0); + fence->SetEventOnCompletion(1, handle); + m_commandQueueGraphic->Signal(fence, 1); + WaitForSingleObject(handle, INFINITE); + CloseHandle(handle); + + char *ptrAsChar = (char*)ptr; + float *writeDestPtr; + check(writeDest->Map(0, nullptr, (void**)&writeDestPtr)); + for (unsigned row = 0; row < RSXThread::m_height; row++) + { + for (unsigned i = 0; i < RSXThread::m_width; i++) + { + unsigned char c = writeDestPtr[row * rowPitch / 4 + i] * 255.; + ptrAsChar[row * RSXThread::m_width + i] = c; + } + } + + writeDest->Release(); + fence->Release(); + downloadCommandList->Release(); +} #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 0770cbb85b..903423202b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -131,7 +131,6 @@ private: /*void DisableVertexData(); void WriteBuffers(); - void WriteDepthBuffer(); void WriteColorBuffers(); void WriteColorBufferA(); void WriteColorBufferB(); @@ -140,7 +139,7 @@ private: void DrawObjects();*/ void InitDrawBuffers(); - + void WriteDepthBuffer(); protected: virtual void OnInit() override; virtual void OnInitThread() override; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 1614c27c5a..b25842192c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -150,4 +150,8 @@ ID3D12Resource * D3D12RenderTargetSets::getRenderTargetTexture(u8 Id) const { return m_rtts[Id]; } +ID3D12Resource * D3D12RenderTargetSets::getDepthStencilTexture() const +{ + return m_depthStencilTexture; +} #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 1d39085d39..9d9e81a75c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -24,5 +24,6 @@ public: D3D12_CPU_DESCRIPTOR_HANDLE getRTTCPUHandle(u8 baseFBO) const; D3D12_CPU_DESCRIPTOR_HANDLE getDSVCPUHandle() const; ID3D12Resource *getRenderTargetTexture(u8 Id) const; + ID3D12Resource *getDepthStencilTexture() const; }; #endif \ No newline at end of file From 203194479273215573ae70bc710811ff72de4dfd Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 20:13:04 +0200 Subject: [PATCH 108/343] d3d12: Do transition when reading depth buffer --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 0f532f99c1..83582a32ce 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1383,6 +1383,13 @@ void D3D12GSRender::WriteDepthBuffer() m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) ); + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = m_fbo->getDepthStencilTexture(); + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + downloadCommandList->ResourceBarrier(1, &barrier); + size_t rowPitch = RSXThread::m_width * sizeof(float); rowPitch = (rowPitch + 255) & ~255; @@ -1396,8 +1403,13 @@ void D3D12GSRender::WriteDepthBuffer() src.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; src.PlacedFootprint.Footprint.Height = RSXThread::m_height; src.PlacedFootprint.Footprint.Width = RSXThread::m_width; - src.PlacedFootprint.Footprint.RowPitch = rowPitch; + src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; + downloadCommandList->ResourceBarrier(1, &barrier); + downloadCommandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); @@ -1419,7 +1431,7 @@ void D3D12GSRender::WriteDepthBuffer() { for (unsigned i = 0; i < RSXThread::m_width; i++) { - unsigned char c = writeDestPtr[row * rowPitch / 4 + i] * 255.; + unsigned char c = (unsigned char) writeDestPtr[row * rowPitch / 4 + i] * 255.; ptrAsChar[row * RSXThread::m_width + i] = c; } } From c08ac779f4c0e482df0a03c0aa2f923772b175cf Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 20:27:15 +0200 Subject: [PATCH 109/343] d3d12: src and dst were inverted, fix it now depth buffer is properly read, but it is too little --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 83582a32ce..d2085fc9e2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1394,16 +1394,16 @@ void D3D12GSRender::WriteDepthBuffer() rowPitch = (rowPitch + 255) & ~255; D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.pResource = m_fbo->getDepthStencilTexture(); - src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src.pResource = writeDest; - src.PlacedFootprint.Offset = 0; - src.PlacedFootprint.Footprint.Depth = 1; - src.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; - src.PlacedFootprint.Footprint.Height = RSXThread::m_height; - src.PlacedFootprint.Footprint.Width = RSXThread::m_width; - src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = m_fbo->getDepthStencilTexture(); + dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst.pResource = writeDest; + dst.PlacedFootprint.Offset = 0; + dst.PlacedFootprint.Footprint.Depth = 1; + dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; + dst.PlacedFootprint.Footprint.Height = RSXThread::m_height; + dst.PlacedFootprint.Footprint.Width = RSXThread::m_width; + dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; @@ -1431,8 +1431,8 @@ void D3D12GSRender::WriteDepthBuffer() { for (unsigned i = 0; i < RSXThread::m_width; i++) { - unsigned char c = (unsigned char) writeDestPtr[row * rowPitch / 4 + i] * 255.; - ptrAsChar[row * RSXThread::m_width + i] = c; + unsigned char c = (unsigned char) (writeDestPtr[row * rowPitch / 4 + i] * 255.); + ptrAsChar[(row * RSXThread::m_width + i)] = c; } } From 18e3e74070367605aa743dc41accae5095329326 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 23 May 2015 20:28:53 +0200 Subject: [PATCH 110/343] d3d12: Fix depth read size --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index d2085fc9e2..cd0058d5c5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1427,12 +1427,16 @@ void D3D12GSRender::WriteDepthBuffer() char *ptrAsChar = (char*)ptr; float *writeDestPtr; check(writeDest->Map(0, nullptr, (void**)&writeDestPtr)); + // TODO : this should be done by the gpu for (unsigned row = 0; row < RSXThread::m_height; row++) { for (unsigned i = 0; i < RSXThread::m_width; i++) { unsigned char c = (unsigned char) (writeDestPtr[row * rowPitch / 4 + i] * 255.); - ptrAsChar[(row * RSXThread::m_width + i)] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i)] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i) + 1] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i) + 2] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i) + 3] = c; } } From 6e8b94a7e179f73ca19497d5d7bf4dfcf865d06f Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 24 May 2015 00:00:57 +0200 Subject: [PATCH 111/343] Add config option for d3d debug layer and adaptater --- rpcs3/Gui/MainFrame.cpp | 18 +++++++++++++++++- rpcs3/Ini.h | 8 ++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp index f583d04ff1..edf262159c 100644 --- a/rpcs3/Gui/MainFrame.cpp +++ b/rpcs3/Gui/MainFrame.cpp @@ -365,7 +365,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) wxDialog diag(this, wxID_ANY, "Settings", wxDefaultPosition); static const u32 width = 452; - static const u32 height = 460; + static const u32 height = 520; // Settings panels wxNotebook* nb_config = new wxNotebook(&diag, wxID_ANY, wxPoint(6,6), wxSize(width, height)); @@ -399,6 +399,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) // Graphics wxStaticBoxSizer* s_round_gs_render = new wxStaticBoxSizer(wxVERTICAL, p_graphics, _("Render")); + wxStaticBoxSizer* s_round_gs_d3d_adaptater = new wxStaticBoxSizer(wxVERTICAL, p_graphics, _("D3D Adaptater")); wxStaticBoxSizer* s_round_gs_res = new wxStaticBoxSizer(wxVERTICAL, p_graphics, _("Default resolution")); wxStaticBoxSizer* s_round_gs_aspect = new wxStaticBoxSizer(wxVERTICAL, p_graphics, _("Default aspect ratio")); wxStaticBoxSizer* s_round_gs_frame_limit = new wxStaticBoxSizer(wxVERTICAL, p_graphics, _("Frame limit")); @@ -426,6 +427,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) wxComboBox* cbox_cpu_decoder = new wxComboBox(p_core, wxID_ANY); wxComboBox* cbox_spu_decoder = new wxComboBox(p_core, wxID_ANY); wxComboBox* cbox_gs_render = new wxComboBox(p_graphics, wxID_ANY); + wxComboBox* cbox_gs_d3d_adaptater = new wxComboBox(p_graphics, wxID_ANY); wxComboBox* cbox_gs_resolution = new wxComboBox(p_graphics, wxID_ANY); wxComboBox* cbox_gs_aspect = new wxComboBox(p_graphics, wxID_ANY); wxComboBox* cbox_gs_frame_limit = new wxComboBox(p_graphics, wxID_ANY); @@ -447,6 +449,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) wxCheckBox* chbox_gs_dump_color = new wxCheckBox(p_graphics, wxID_ANY, "Write Color Buffers"); wxCheckBox* chbox_gs_read_color = new wxCheckBox(p_graphics, wxID_ANY, "Read Color Buffer"); wxCheckBox* chbox_gs_vsync = new wxCheckBox(p_graphics, wxID_ANY, "VSync"); + wxCheckBox* chbox_gs_debug_output = new wxCheckBox(p_graphics, wxID_ANY, "Debug Output"); wxCheckBox* chbox_gs_3dmonitor = new wxCheckBox(p_graphics, wxID_ANY, "3D Monitor"); wxCheckBox* chbox_audio_dump = new wxCheckBox(p_audio, wxID_ANY, "Dump to file"); wxCheckBox* chbox_audio_conv = new wxCheckBox(p_audio, wxID_ANY, "Convert to 16 bit"); @@ -478,6 +481,12 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) cbox_gs_render->Append("DirectX 12"); #endif + cbox_gs_d3d_adaptater->Append("WARP"); + cbox_gs_d3d_adaptater->Append("default"); + cbox_gs_d3d_adaptater->Append("renderer 0"); + cbox_gs_d3d_adaptater->Append("renderer 1"); + cbox_gs_d3d_adaptater->Append("renderer 2"); + for(int i = 1; i < WXSIZEOF(ResolutionTable); ++i) { cbox_gs_resolution->Append(wxString::Format("%dx%d", ResolutionTable[i].width.value(), ResolutionTable[i].height.value())); @@ -608,6 +617,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) chbox_gs_dump_color ->SetValue(Ini.GSDumpColorBuffers.GetValue()); chbox_gs_read_color ->SetValue(Ini.GSReadColorBuffer.GetValue()); chbox_gs_vsync ->SetValue(Ini.GSVSyncEnable.GetValue()); + chbox_gs_debug_output ->SetValue(Ini.GSDebugOutputEnable.GetValue()); chbox_gs_3dmonitor ->SetValue(Ini.GS3DTV.GetValue()); chbox_audio_dump ->SetValue(Ini.AudioDumpToFile.GetValue()); chbox_audio_conv ->SetValue(Ini.AudioConvertToU16.GetValue()); @@ -630,6 +640,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) cbox_cpu_decoder ->SetSelection(Ini.CPUDecoderMode.GetValue() ? Ini.CPUDecoderMode.GetValue() : 0); cbox_spu_decoder ->SetSelection(Ini.SPUDecoderMode.GetValue() ? Ini.SPUDecoderMode.GetValue() : 0); cbox_gs_render ->SetSelection(Ini.GSRenderMode.GetValue()); + cbox_gs_d3d_adaptater->SetSelection(Ini.GSD3DAdaptater.GetValue()); cbox_gs_resolution ->SetSelection(ResolutionIdToNum(Ini.GSResolution.GetValue()) - 1); cbox_gs_aspect ->SetSelection(Ini.GSAspectRatio.GetValue() - 1); cbox_gs_frame_limit ->SetSelection(Ini.GSFrameLimit.GetValue()); @@ -650,6 +661,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) // Rendering s_round_gs_render->Add(cbox_gs_render, wxSizerFlags().Border(wxALL, 5).Expand()); + s_round_gs_d3d_adaptater->Add(cbox_gs_d3d_adaptater, wxSizerFlags().Border(wxALL, 5).Expand()); s_round_gs_res->Add(cbox_gs_resolution, wxSizerFlags().Border(wxALL, 5).Expand()); s_round_gs_aspect->Add(cbox_gs_aspect, wxSizerFlags().Border(wxALL, 5).Expand()); s_round_gs_frame_limit->Add(cbox_gs_frame_limit, wxSizerFlags().Border(wxALL, 5).Expand()); @@ -679,6 +691,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) // Graphics s_subpanel_graphics->Add(s_round_gs_render, wxSizerFlags().Border(wxALL, 5).Expand()); + s_subpanel_graphics->Add(s_round_gs_d3d_adaptater, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(s_round_gs_res, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(s_round_gs_aspect, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(s_round_gs_frame_limit, wxSizerFlags().Border(wxALL, 5).Expand()); @@ -687,6 +700,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) s_subpanel_graphics->Add(chbox_gs_dump_color, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(chbox_gs_read_color, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(chbox_gs_vsync, wxSizerFlags().Border(wxALL, 5).Expand()); + s_subpanel_graphics->Add(chbox_gs_debug_output, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(chbox_gs_3dmonitor, wxSizerFlags().Border(wxALL, 5).Expand()); // Input - Output @@ -748,6 +762,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) Ini.HookStFunc.SetValue(chbox_core_hook_stfunc->GetValue()); Ini.LoadLibLv2.SetValue(chbox_core_load_liblv2->GetValue()); Ini.GSRenderMode.SetValue(cbox_gs_render->GetSelection()); + Ini.GSD3DAdaptater.SetValue(cbox_gs_d3d_adaptater->GetSelection()); Ini.GSResolution.SetValue(ResolutionNumToId(cbox_gs_resolution->GetSelection() + 1)); Ini.GSAspectRatio.SetValue(cbox_gs_aspect->GetSelection() + 1); Ini.GSFrameLimit.SetValue(cbox_gs_frame_limit->GetSelection()); @@ -756,6 +771,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) Ini.GSDumpColorBuffers.SetValue(chbox_gs_dump_color->GetValue()); Ini.GSReadColorBuffer.SetValue(chbox_gs_read_color->GetValue()); Ini.GSVSyncEnable.SetValue(chbox_gs_vsync->GetValue()); + Ini.GSDebugOutputEnable.SetValue(chbox_gs_debug_output->GetValue()); Ini.GS3DTV.SetValue(chbox_gs_3dmonitor->GetValue()); Ini.PadHandlerMode.SetValue(cbox_pad_handler->GetSelection()); Ini.KeyboardHandlerMode.SetValue(cbox_keyboard_handler->GetSelection()); diff --git a/rpcs3/Ini.h b/rpcs3/Ini.h index 5e43bc0353..11fa88ea70 100644 --- a/rpcs3/Ini.h +++ b/rpcs3/Ini.h @@ -98,6 +98,7 @@ public: // Graphics IniEntry GSRenderMode; + IniEntry GSD3DAdaptater; IniEntry GSResolution; IniEntry GSAspectRatio; IniEntry GSFrameLimit; @@ -107,6 +108,7 @@ public: IniEntry GSReadColorBuffer; IniEntry GSVSyncEnable; IniEntry GS3DTV; + IniEntry GSDebugOutputEnable; // Audio IniEntry AudioOutMode; @@ -182,6 +184,7 @@ public: // Graphics GSRenderMode.Init("GS_RenderMode", path); + GSD3DAdaptater.Init("GS_D3DAdaptater", path); GSResolution.Init("GS_Resolution", path); GSAspectRatio.Init("GS_AspectRatio", path); GSFrameLimit.Init("GS_FrameLimit", path); @@ -190,6 +193,7 @@ public: GSDumpDepthBuffer.Init("GS_DumpDepthBuffer", path); GSReadColorBuffer.Init("GS_GSReadColorBuffer", path); GSVSyncEnable.Init("GS_VSyncEnable", path); + GSDebugOutputEnable.Init("GS_DebugOutputEnable", path); GS3DTV.Init("GS_3DTV", path); // Audio @@ -262,6 +266,7 @@ public: // Graphics GSRenderMode.Load(1); + GSD3DAdaptater.Load(1); GSResolution.Load(4); GSAspectRatio.Load(2); GSFrameLimit.Load(0); @@ -270,6 +275,7 @@ public: GSDumpDepthBuffer.Load(false); GSReadColorBuffer.Load(false); GSVSyncEnable.Load(false); + GSDebugOutputEnable.Load(false); GS3DTV.Load(false); // Audio @@ -342,6 +348,7 @@ public: // Graphics GSRenderMode.Save(); + GSD3DAdaptater.Save(); GSResolution.Save(); GSAspectRatio.Save(); GSFrameLimit.Save(); @@ -350,6 +357,7 @@ public: GSDumpDepthBuffer.Save(); GSReadColorBuffer.Save(); GSVSyncEnable.Save(); + GSDebugOutputEnable.Save(); GS3DTV.Save(); // Audio From cb14697aa0ab54b576c3c7702cbf78b715e5789b Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 24 May 2015 00:15:06 +0200 Subject: [PATCH 112/343] d3d12: Honor adaptater selection + debug output layer --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 29 +++++++++++++++++++-------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + rpcs3/Gui/D3DGSFrame.cpp | 7 ++++++- rpcs3/Gui/D3DGSFrame.h | 2 ++ 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index cd0058d5c5..2e642ba06d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -145,19 +145,29 @@ void D3D12GSRender::ResourceStorage::Release() D3D12GSRender::D3D12GSRender() : GSRender(), m_fbo(nullptr), m_PSO(nullptr) { - - // Enable d3d debug layer -#ifdef _DEBUG - Microsoft::WRL::ComPtr debugInterface; - D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); - debugInterface->EnableDebugLayer(); -#endif + if (Ini.GSDebugOutputEnable.GetValue()) + { + Microsoft::WRL::ComPtr debugInterface; + D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); + debugInterface->EnableDebugLayer(); + } Microsoft::WRL::ComPtr dxgiFactory; check(CreateDXGIFactory(IID_PPV_ARGS(&dxgiFactory))); // Create adapter IDXGIAdapter* adaptater = nullptr; -// check(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&adaptater))); + switch (Ini.GSD3DAdaptater.GetValue()) + { + case 0: // WARP + check(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&adaptater))); + break; + case 1: // Default + dxgiFactory->EnumAdapters(0, &adaptater); + break; + default: // Adaptater 0, 1, ... + dxgiFactory->EnumAdapters(Ini.GSD3DAdaptater.GetValue() - 2,&adaptater); + break; + } check(D3D12CreateDevice(adaptater, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); // Queues @@ -168,6 +178,9 @@ D3D12GSRender::D3D12GSRender() check(m_device->CreateCommandQueue(&graphicQueueDesc, IID_PPV_ARGS(&m_commandQueueGraphic))); m_frame = GetGSFrame(); + DXGI_ADAPTER_DESC adaptaterDesc; + adaptater->GetDesc(&adaptaterDesc); + m_frame->SetAdaptaterName(adaptaterDesc.Description); // Create swap chain and put them in a descriptor heap as rendertarget DXGI_SWAP_CHAIN_DESC swapChain = {}; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 903423202b..0dbbe0477e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -33,6 +33,7 @@ public: virtual void DeleteContext(void* ctx) = 0; virtual void Flip(void* ctx) = 0; virtual HWND getHandle() const = 0; + virtual void SetAdaptaterName(const wchar_t *) = 0; }; typedef GSFrameBase2*(*GetGSFrameCb2)(); diff --git a/rpcs3/Gui/D3DGSFrame.cpp b/rpcs3/Gui/D3DGSFrame.cpp index b0b86c4ce2..48d31e2237 100644 --- a/rpcs3/Gui/D3DGSFrame.cpp +++ b/rpcs3/Gui/D3DGSFrame.cpp @@ -19,6 +19,11 @@ D3DGSFrame::~D3DGSFrame() { } +void D3DGSFrame::SetAdaptaterName(const wchar_t *name) +{ + AdaptaterName = name; +} + void D3DGSFrame::Close() { GSFrame::Close(); @@ -63,7 +68,7 @@ void D3DGSFrame::Flip(void* context) // canvas->SwapBuffers(); m_frames++; - const std::string sub_title = Emu.GetTitle() + (Emu.GetTitleID().length() ? " [" + Emu.GetTitleID() + "] | " : " | "); + const std::string sub_title = Emu.GetTitle() + (Emu.GetTitleID().length() ? " [" + Emu.GetTitleID() + "] | " : " | ") + AdaptaterName.ToStdString() + " | "; if (fps_t.GetElapsedTimeInSec() >= 0.5) { diff --git a/rpcs3/Gui/D3DGSFrame.h b/rpcs3/Gui/D3DGSFrame.h index 47de197d0a..fecd729d22 100644 --- a/rpcs3/Gui/D3DGSFrame.h +++ b/rpcs3/Gui/D3DGSFrame.h @@ -9,6 +9,7 @@ struct D3DGSFrame : public GSFrame, public GSFrameBase2 { wxWindow* canvas; u32 m_frames; + wxString AdaptaterName; D3DGSFrame(); ~D3DGSFrame(); @@ -28,6 +29,7 @@ struct D3DGSFrame : public GSFrame, public GSFrameBase2 virtual void SetViewport(int x, int y, u32 w, u32 h) override; virtual HWND getHandle() const override; + virtual void SetAdaptaterName(const wchar_t *) override; private: virtual void OnSize(wxSizeEvent& event); From e88d45b1bf2ee13e0f6f545af4203dcd169f3814 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 24 May 2015 00:18:35 +0200 Subject: [PATCH 113/343] d3d12: Fix for Intel HD4600 --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 2e642ba06d..4ead1889df 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -105,6 +105,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) D3D12_HEAP_DESC heapDescription = {}; heapDescription.SizeInBytes = 256 * 256 * 256 * 16; heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; + heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); heapDescription.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; From 487fd4dc2a91b9639de12950c920b5c3e1d54fde Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 24 May 2015 01:41:10 +0200 Subject: [PATCH 114/343] d3d12: Start implementing depth to u8 conversion shader Not used atm --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 4ead1889df..aa3d773b79 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -3,6 +3,7 @@ #include "D3D12GSRender.h" #include #include +#include // Some constants are the same between RSX and GL #include @@ -143,6 +144,61 @@ void D3D12GSRender::ResourceStorage::Release() m_commandAllocator->Release(); } +// 32 bits float to U8 unorm CS +#define STRINGIFY(x) #x +const char *shaderCode = STRINGIFY( +Texture2D InputTexture : register(t0); \n +RWTexture2D OutputTexture : register(u0);\n + +[numthreads(1, 1, 1)]\n +void main(uint3 Id : SV_DispatchThreadID)\n +{ \n + OutputTexture[Id.xy] = InputTexture.Load(uint3(Id.xy, 0));\n +} +); + +static void compileF32toU8CS() +{ + ID3DBlob *bytecode; + Microsoft::WRL::ComPtr errorBlob; + HRESULT hr = D3DCompile(shaderCode, strlen(shaderCode), "test", nullptr, nullptr, "main", "cs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } + D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {}; + // Textures + descriptorRange[0].BaseShaderRegister = 0; + descriptorRange[0].NumDescriptors = 1; + descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + descriptorRange[1].BaseShaderRegister = 0; + descriptorRange[1].NumDescriptors = 1; + descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + D3D12_ROOT_PARAMETER RP[2] = {}; + RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; + RP[0].DescriptorTable.NumDescriptorRanges = 1; + RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1]; + RP[1].DescriptorTable.NumDescriptorRanges = 1; + + D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; + rootSignatureDesc.NumParameters = 2; + rootSignatureDesc.pParameters = RP; + + ID3DBlob *rootSignatureBlob; + + hr = D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } +} + D3D12GSRender::D3D12GSRender() : GSRender(), m_fbo(nullptr), m_PSO(nullptr) { @@ -265,6 +321,7 @@ D3D12GSRender::D3D12GSRender() m_perFrameStorage[1].Reset(); m_currentResourceStorageIndex = m_swapChain->GetCurrentBackBufferIndex(); + compileF32toU8CS(); } D3D12GSRender::~D3D12GSRender() From 824b9aa628f261fcc7102f7de5ddb65aac600d05 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 24 May 2015 01:49:20 +0200 Subject: [PATCH 115/343] d3d12: make depth read optionnal --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 163 +++++++++++++------------- 1 file changed, 82 insertions(+), 81 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index aa3d773b79..266f07c3f1 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1419,100 +1419,101 @@ void D3D12GSRender::Flip() void D3D12GSRender::WriteDepthBuffer() { - if (!m_set_context_dma_z) - return; + if (!Ini.GSDumpDepthBuffer.GetValue()) + return; + if (!m_set_context_dma_z) + return; - u32 address = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); + u32 address = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); + auto ptr = vm::get_ptr(address); - auto ptr = vm::get_ptr(address); + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_READBACK; + D3D12_RESOURCE_DESC resdesc = {}; + resdesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resdesc.Width = RSXThread::m_width * RSXThread::m_height * 4 * 2; // * 2 for safety + resdesc.Height = 1; + resdesc.DepthOrArraySize = 1; + resdesc.SampleDesc.Count = 1; + resdesc.MipLevels = 1; + resdesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_READBACK; - D3D12_RESOURCE_DESC resdesc = {}; - resdesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resdesc.Width = RSXThread::m_width * RSXThread::m_height * 4 * 2; // * 2 for safety - resdesc.Height = 1; - resdesc.DepthOrArraySize = 1; - resdesc.SampleDesc.Count = 1; - resdesc.MipLevels = 1; - resdesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + ID3D12Resource *writeDest; + check( + m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resdesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&writeDest) + ) + ); - ID3D12Resource *writeDest; - check( - m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resdesc, - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&writeDest) - ) - ); + ID3D12GraphicsCommandList *downloadCommandList; + check( + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) + ); - ID3D12GraphicsCommandList *downloadCommandList; - check( - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) - ); + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = m_fbo->getDepthStencilTexture(); + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + downloadCommandList->ResourceBarrier(1, &barrier); - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = m_fbo->getDepthStencilTexture(); - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - downloadCommandList->ResourceBarrier(1, &barrier); + size_t rowPitch = RSXThread::m_width * sizeof(float); + rowPitch = (rowPitch + 255) & ~255; - size_t rowPitch = RSXThread::m_width * sizeof(float); - rowPitch = (rowPitch + 255) & ~255; + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = m_fbo->getDepthStencilTexture(); + dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst.pResource = writeDest; + dst.PlacedFootprint.Offset = 0; + dst.PlacedFootprint.Footprint.Depth = 1; + dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; + dst.PlacedFootprint.Footprint.Height = RSXThread::m_height; + dst.PlacedFootprint.Footprint.Width = RSXThread::m_width; + dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; + downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = m_fbo->getDepthStencilTexture(); - dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dst.pResource = writeDest; - dst.PlacedFootprint.Offset = 0; - dst.PlacedFootprint.Footprint.Depth = 1; - dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; - dst.PlacedFootprint.Footprint.Height = RSXThread::m_height; - dst.PlacedFootprint.Footprint.Width = RSXThread::m_width; - dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; - downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; + downloadCommandList->ResourceBarrier(1, &barrier); - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; - downloadCommandList->ResourceBarrier(1, &barrier); + downloadCommandList->Close(); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); - downloadCommandList->Close(); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); + //Wait for result + ID3D12Fence *fence; + check( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) + ); + HANDLE handle = CreateEvent(0, FALSE, FALSE, 0); + fence->SetEventOnCompletion(1, handle); + m_commandQueueGraphic->Signal(fence, 1); + WaitForSingleObject(handle, INFINITE); + CloseHandle(handle); - //Wait for result - ID3D12Fence *fence; - check( - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) - ); - HANDLE handle = CreateEvent(0, FALSE, FALSE, 0); - fence->SetEventOnCompletion(1, handle); - m_commandQueueGraphic->Signal(fence, 1); - WaitForSingleObject(handle, INFINITE); - CloseHandle(handle); - - char *ptrAsChar = (char*)ptr; - float *writeDestPtr; - check(writeDest->Map(0, nullptr, (void**)&writeDestPtr)); - // TODO : this should be done by the gpu - for (unsigned row = 0; row < RSXThread::m_height; row++) + char *ptrAsChar = (char*)ptr; + float *writeDestPtr; + check(writeDest->Map(0, nullptr, (void**)&writeDestPtr)); + // TODO : this should be done by the gpu + for (unsigned row = 0; row < RSXThread::m_height; row++) + { + for (unsigned i = 0; i < RSXThread::m_width; i++) { - for (unsigned i = 0; i < RSXThread::m_width; i++) - { - unsigned char c = (unsigned char) (writeDestPtr[row * rowPitch / 4 + i] * 255.); - ptrAsChar[4 * (row * RSXThread::m_width + i)] = c; - ptrAsChar[4 * (row * RSXThread::m_width + i) + 1] = c; - ptrAsChar[4 * (row * RSXThread::m_width + i) + 2] = c; - ptrAsChar[4 * (row * RSXThread::m_width + i) + 3] = c; - } + unsigned char c = (unsigned char) (writeDestPtr[row * rowPitch / 4 + i] * 255.); + ptrAsChar[4 * (row * RSXThread::m_width + i)] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i) + 1] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i) + 2] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i) + 3] = c; } + } - writeDest->Release(); - fence->Release(); - downloadCommandList->Release(); + writeDest->Release(); + fence->Release(); + downloadCommandList->Release(); } #endif \ No newline at end of file From 6d61e36f5d33cbcb2140a1f97ca70a8956791415 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 24 May 2015 18:36:22 +0200 Subject: [PATCH 116/343] d3d12: Fix for rsx_fp_dynamic_test2 The compare function shouldn't be swizzled. --- .../Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index e0f238677e..0416b4758a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -70,17 +70,17 @@ std::string D3D12FragmentDecompiler::compareFunction(COMPARE f, const std::strin default: abort(); case COMPARE::FUNCTION_SEQ: - return "(" + Op0 + " == " + Op1 + ").xxxx"; + return "(" + Op0 + " == " + Op1 + ")"; case COMPARE::FUNCTION_SGE: - return "(" + Op0 + " >= " + Op1 +").xxxx"; + return "(" + Op0 + " >= " + Op1 +")"; case COMPARE::FUNCTION_SGT: - return "(" + Op0 + " > " + Op1 + ").xxxx"; + return "(" + Op0 + " > " + Op1 + ")"; case COMPARE::FUNCTION_SLE: - return "(" + Op0 + " <= " + Op1 + ").xxxx"; + return "(" + Op0 + " <= " + Op1 + ")"; case COMPARE::FUNCTION_SLT: - return "(" + Op0 + " < " + Op1 + ").xxxx"; + return "(" + Op0 + " < " + Op1 + ")"; case COMPARE::FUNCTION_SNE: - return "(" + Op0 + " != " + Op1 + ").xxxx"; + return "(" + Op0 + " != " + Op1 + ")"; } } From 12fc6e6145ceabf3ad30b07442992d6ce9b61dc4 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 24 May 2015 19:10:18 +0200 Subject: [PATCH 117/343] d3d12: Use separate vertex constant buffer for good --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 58 +++++++++++++++++---------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 +- 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 266f07c3f1..176e759292 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -25,7 +25,6 @@ static void check(HRESULT hr) void D3D12GSRender::ResourceStorage::Reset() { m_currentVertexBuffersHeapOffset = 0; - m_constantsBufferSize = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; m_constantsBuffersHeapFreeSpace = 0; @@ -80,15 +79,6 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) IID_PPV_ARGS(&m_indexBuffer) )); - check(device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_constantsVertexBuffer) - )); - D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; descriptorHeapDesc.NumDescriptors = 1000; // For safety @@ -129,7 +119,6 @@ void D3D12GSRender::ResourceStorage::Release() m_backbufferAsRendertarget->Release(); m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); - m_constantsVertexBuffer->Release(); m_constantsBuffersHeap->Release(); m_vertexBuffersHeap->Release(); m_backBuffer->Release(); @@ -322,6 +311,8 @@ D3D12GSRender::D3D12GSRender() m_currentResourceStorageIndex = m_swapChain->GetCurrentBackBufferIndex(); compileF32toU8CS(); + + vertexConstantShadowCopy = new float[512 * 4]; } D3D12GSRender::~D3D12GSRender() @@ -335,6 +326,7 @@ D3D12GSRender::~D3D12GSRender() m_rootSignature->Release(); m_swapChain->Release(); m_device->Release(); + delete[] vertexConstantShadowCopy; } D3D12GSRender::ResourceStorage &D3D12GSRender::getCurrentResourceStorage() @@ -645,27 +637,49 @@ void D3D12GSRender::setScaleOffset() void D3D12GSRender::FillVertexShaderConstantsBuffer() { - void *constantsBufferMap; - check(getCurrentResourceStorage().m_constantsVertexBuffer->Map(0, nullptr, &constantsBufferMap)); - for (const RSXTransformConstant& c : m_transform_constants) { size_t offset = c.id * 4 * sizeof(float); float vector[] = { c.x, c.y, c.z, c.w }; - memcpy((char*)constantsBufferMap + offset, vector, 4 * sizeof(float)); - size_t bufferSizeCandidate = offset + 4 * sizeof(float); - getCurrentResourceStorage().m_constantsBufferSize = bufferSizeCandidate > getCurrentResourceStorage().m_constantsBufferSize ? bufferSizeCandidate : getCurrentResourceStorage().m_constantsBufferSize; + memcpy((char*)vertexConstantShadowCopy + offset, vector, 4 * sizeof(float)); } - getCurrentResourceStorage().m_constantsVertexBuffer->Unmap(0, nullptr); - // make it multiple of 256 bytes - getCurrentResourceStorage().m_constantsBufferSize = (getCurrentResourceStorage().m_constantsBufferSize + 255) & ~255; + + size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; + // 65536 alignment + constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; + + D3D12_RESOURCE_DESC resDesc = {}; + resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resDesc.Width = 512 * 4 * sizeof(float); + resDesc.Height = 1; + resDesc.DepthOrArraySize = 1; + resDesc.SampleDesc.Count = 1; + resDesc.MipLevels = 1; + resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + ID3D12Resource *constantsBuffer; + check(m_device->CreatePlacedResource( + getCurrentResourceStorage().m_constantsBuffersHeap, + constantBuffersHeapOffset, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&constantsBuffer) + )); + + void *constantsBufferMap; + check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); + memcpy(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); + constantsBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = getCurrentResourceStorage().m_constantsVertexBuffer->GetGPUVirtualAddress(); - constantBufferViewDesc.SizeInBytes = (UINT)getCurrentResourceStorage().m_constantsBufferSize; + constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.SizeInBytes = 512 * 4 * sizeof(float); D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); + getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 512 * 4 * sizeof(float); + getCurrentResourceStorage().m_inflightResources.push_back(constantsBuffer); } static diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 0dbbe0477e..00c11413d8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -65,11 +65,10 @@ private: ID3D12Resource *m_indexBuffer; // Constants storage - ID3D12Resource *m_constantsVertexBuffer; ID3D12Heap *m_constantsBuffersHeap; size_t m_constantsBuffersHeapFreeSpace; ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; - size_t m_constantsBufferSize, m_constantsBufferIndex; + size_t m_constantsBufferIndex; ID3D12DescriptorHeap *m_scaleOffsetDescriptorHeap; size_t m_currentScaleOffsetBufferIndex; @@ -113,6 +112,7 @@ public: GSFrameBase2 *m_frame; u32 m_draw_frames; u32 m_skip_frames; + float *vertexConstantShadowCopy; D3D12GSRender(); virtual ~D3D12GSRender(); From bd68d382bd8963af032dcd041b537cc835ddb8ac Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 24 May 2015 20:19:23 +0200 Subject: [PATCH 118/343] d3d12: Use __mm_stream_si128 to send texture/constants data --- rpcs3/Emu/RSX/D3D12/D3D12.h | 27 +++++++++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 13 +++---------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 10 +--------- rpcs3/emucore.vcxproj | 1 + rpcs3/emucore.vcxproj.filters | 3 +++ 6 files changed, 36 insertions(+), 20 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h new file mode 100644 index 0000000000..5582cc0e41 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -0,0 +1,27 @@ +#pragma once +#if defined(DX12_SUPPORT) + +#include + +inline +void check(HRESULT hr) +{ + if (hr != 0) + abort(); +} + +/** + * Send data to dst pointer without polluting cache. + * Usefull to write to mapped memory from upload heap. + */ +inline +void streamToBuffer(void* dst, void* src, size_t sizeInBytes) +{ + for (unsigned i = 0; i < sizeInBytes / 16; i++) + { + __m128i *srcPtr = (__m128i*) ((char*)src + i * 16); + _mm_stream_si128((__m128i*)((char*)dst + i * 16), *srcPtr); + } +} + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 176e759292..9f99cbd4c7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -15,13 +15,6 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) GetGSFrame = value; } -static void check(HRESULT hr) -{ - if (hr != 0) - abort(); -} - - void D3D12GSRender::ResourceStorage::Reset() { m_currentVertexBuffersHeapOffset = 0; @@ -622,7 +615,7 @@ void D3D12GSRender::setScaleOffset() void *scaleOffsetMap; check(scaleOffsetBuffer->Map(0, nullptr, &scaleOffsetMap)); - memcpy((char*)scaleOffsetMap, scaleOffsetMat, 16 * sizeof(float)); + streamToBuffer(scaleOffsetMap, scaleOffsetMat, 16 * sizeof(float)); scaleOffsetBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; @@ -669,7 +662,7 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() void *constantsBufferMap; check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); - memcpy(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); + streamToBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); constantsBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; @@ -769,7 +762,7 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() vector[3] = c3; } - memcpy((char*)constantsBufferMap + offset, vector, 4 * sizeof(u32)); + streamToBuffer((char*)constantsBufferMap + offset, vector, 4 * sizeof(u32)); offset += 4 * sizeof(u32); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 00c11413d8..0f864646a7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -1,7 +1,7 @@ #pragma once #if defined(DX12_SUPPORT) -#include +#include "D3D12.h" #include "rpcs3/Ini.h" #include "Utilities/rPlatform.h" // only for rImage #include "Utilities/File.h" diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index edb91c9592..e855ad7554 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -3,12 +3,6 @@ #include "D3D12GSRender.h" // For clarity this code deals with texture but belongs to D3D12GSRender class -static void check(HRESULT hr) -{ - if (hr != 0) - abort(); -} - size_t D3D12GSRender::UploadTextures() { size_t usedTexture = 0; @@ -67,9 +61,7 @@ size_t D3D12GSRender::UploadTextures() rowPitch = (rowPitch + 255) & ~255; // Upload with correct rowpitch for (unsigned row = 0; row < m_textures[i].GetHeight(); row++) - { - memcpy((char*)textureData + row * rowPitch, pixels + row * m_textures[i].m_pitch, m_textures[i].m_pitch); - } + streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_textures[i].m_pitch, m_textures[i].m_pitch); Texture->Unmap(0, nullptr); D3D12_RESOURCE_DESC vramTextureDesc = {}; diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 8dccd27240..db6314a0ce 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -505,6 +505,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 204f2cc488..fbc8a96a59 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1873,5 +1873,8 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + \ No newline at end of file From 5ce026126505821962845ad8b4ea62779a4a5eab Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 24 May 2015 20:28:26 +0200 Subject: [PATCH 119/343] d3d12: Release index buffer and upload command allocator --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 9f99cbd4c7..01557cdc6a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -109,6 +109,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) void D3D12GSRender::ResourceStorage::Release() { // NOTE: Should be released only if no command are in flight ! + m_indexBuffer->Release(); m_backbufferAsRendertarget->Release(); m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); @@ -124,6 +125,7 @@ void D3D12GSRender::ResourceStorage::Release() for (auto tmp : m_inflightCommandList) tmp->Release(); m_commandAllocator->Release(); + m_textureUploadCommandAllocator->Release(); } // 32 bits float to U8 unorm CS From 232c97cc823eb9781d1734ccf0684e35e870da29 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 00:23:08 +0200 Subject: [PATCH 120/343] d3d12: Add support for indexed draw --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 227 ++++++++++++-------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 7 +- 2 files changed, 110 insertions(+), 124 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 01557cdc6a..216fbaff0a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -17,7 +17,7 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) void D3D12GSRender::ResourceStorage::Reset() { - m_currentVertexBuffersHeapOffset = 0; + m_vertexIndexBuffersHeapFreeSpace = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; m_constantsBuffersHeapFreeSpace = 0; @@ -44,50 +44,28 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) // Create heap for vertex and constants buffers D3D12_HEAP_DESC vertexBufferHeapDesc = {}; // 16 MB wide - vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 16; + vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 256; vertexBufferHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; vertexBufferHeapDesc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; - check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexBuffersHeap))); + check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexIndexBuffersHeap))); check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_constantsBuffersHeap))); - - D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; - - D3D12_RESOURCE_DESC resDesc = {}; - resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = (UINT)1024 * 1024; - resDesc.Height = 1; - resDesc.DepthOrArraySize = 1; - resDesc.SampleDesc.Count = 1; - resDesc.MipLevels = 1; - resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - check(device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_indexBuffer) - )); - D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - descriptorHeapDesc.NumDescriptors = 1000; // For safety + descriptorHeapDesc.NumDescriptors = 10000; // For safety descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; check(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_constantsBufferDescriptorsHeap))); descriptorHeapDesc = {}; descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - descriptorHeapDesc.NumDescriptors = 1000; // For safety + descriptorHeapDesc.NumDescriptors = 10000; // For safety descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; check(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_scaleOffsetDescriptorHeap))); // Texture D3D12_HEAP_DESC heapDescription = {}; - heapDescription.SizeInBytes = 256 * 256 * 256 * 16; + heapDescription.SizeInBytes = 1024 * 1024 * 256; heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); @@ -97,7 +75,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_textureStorage))); D3D12_DESCRIPTOR_HEAP_DESC textureDescriptorDesc = {}; - textureDescriptorDesc.NumDescriptors = 1000; // For safety + textureDescriptorDesc.NumDescriptors = 2048; // For safety textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; textureDescriptorDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_textureDescriptorsHeap))); @@ -109,12 +87,11 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) void D3D12GSRender::ResourceStorage::Release() { // NOTE: Should be released only if no command are in flight ! - m_indexBuffer->Release(); m_backbufferAsRendertarget->Release(); m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); m_constantsBuffersHeap->Release(); - m_vertexBuffersHeap->Release(); + m_vertexIndexBuffersHeap->Release(); m_backBuffer->Release(); for (auto tmp : m_inflightResources) tmp->Release(); @@ -451,9 +428,9 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } -std::vector D3D12GSRender::EnableVertexData(bool indexed_draw) +std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12GSRender::EnableVertexData(bool indexed_draw) { - std::vector result; + std::pair, D3D12_INDEX_BUFFER_VIEW> result; m_IASet = getIALayout(m_device, indexed_draw, m_vertex_data); const u32 data_offset = indexed_draw ? 0 : m_draw_array_first; @@ -464,6 +441,9 @@ std::vector D3D12GSRender::EnableVertexData(bool index const size_t item_size = m_vertex_data[i].GetTypeSize() * m_vertex_data[i].size; const size_t data_size = m_vertex_data[i].data.size() - data_offset * item_size; size_t subBufferSize = (data_offset + data_size) * item_size; + // 65536 alignment + size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; + bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; ID3D12Resource *vertexBuffer; D3D12_RESOURCE_DESC vertexBufferDesc = {}; @@ -475,8 +455,8 @@ std::vector D3D12GSRender::EnableVertexData(bool index vertexBufferDesc.MipLevels = 1; vertexBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_vertexBuffersHeap, - getCurrentResourceStorage().m_currentVertexBuffersHeapOffset, + getCurrentResourceStorage().m_vertexIndexBuffersHeap, + bufferHeapOffset, &vertexBufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -492,40 +472,11 @@ std::vector D3D12GSRender::EnableVertexData(bool index vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); vertexBufferView.SizeInBytes = (UINT)subBufferSize; vertexBufferView.StrideInBytes = (UINT)item_size; - result.push_back(vertexBufferView); - - // 65536 alignment - getCurrentResourceStorage().m_currentVertexBuffersHeapOffset += (subBufferSize + 65536 - 1) & ~65535; - } - - if (indexed_draw) - { -/* D3D12_RESOURCE_DESC resDesc = {}; - resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = (UINT)m_indexed_array.m_data.size(); - resDesc.Height = 1; - resDesc.DepthOrArraySize = 1; - resDesc.SampleDesc.Count = 1; - resDesc.MipLevels = 1; - resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - check(m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_indexBuffer) - )); - - check(m_indexBuffer->Map(0, nullptr, (void**)&bufferMap)); - memcpy(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size()); - m_indexBuffer->Unmap(0, nullptr); - - D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; - indexBufferView.SizeInBytes = (UINT)m_indexed_array.m_data.size(); - indexBufferView.BufferLocation = m_indexBuffer->GetGPUVirtualAddress();*/ + result.first.push_back(vertexBufferView); + getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; } + // Only handle quads now switch (m_draw_mode - 1) { default: @@ -543,26 +494,86 @@ std::vector D3D12GSRender::EnableVertexData(bool index break; } - if (m_forcedIndexBuffer) + if (indexed_draw || m_forcedIndexBuffer) { - unsigned short *bufferMap; - check(getCurrentResourceStorage().m_indexBuffer->Map(0, nullptr, (void**)&bufferMap)); + size_t subBufferSize; + if (indexed_draw && !m_forcedIndexBuffer) + subBufferSize = m_indexed_array.m_data.size(); + else if (indexed_draw && m_forcedIndexBuffer) + subBufferSize = 6 * m_indexed_array.m_data.size() / 4; + else + subBufferSize = 2 * m_draw_array_count * 6 / 4; + // 65536 alignment + size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; + bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; - memcpy(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size()); - getCurrentResourceStorage().m_indexBufferCount = 0; - // QUADS - for (unsigned i = 0; i < m_draw_array_count / 4; i++) + + D3D12_RESOURCE_DESC resDesc = {}; + resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resDesc.Width = (UINT)subBufferSize; + resDesc.Height = 1; + resDesc.DepthOrArraySize = 1; + resDesc.SampleDesc.Count = 1; + resDesc.MipLevels = 1; + resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + ID3D12Resource *indexBuffer; + check(m_device->CreatePlacedResource( + getCurrentResourceStorage().m_vertexIndexBuffersHeap, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&indexBuffer) + )); + + unsigned short *bufferMap; + check(indexBuffer->Map(0, nullptr, (void**)&bufferMap)); + size_t forcedIndexCount = 0; + if (indexed_draw && !m_forcedIndexBuffer) + memcpy(bufferMap, m_indexed_array.m_data.data(), subBufferSize); + else if (indexed_draw && m_forcedIndexBuffer) { - // First triangle - bufferMap[6 * i] = 4 * i; - bufferMap[6 * i + 1] = 4 * i + 1; - bufferMap[6 * i + 2] = 4 * i + 2; - bufferMap[6 * i + 3] = 4 * i; - bufferMap[6 * i + 4] = 4 * i + 2; - bufferMap[6 * i + 5] = 4 * i + 3; - getCurrentResourceStorage().m_indexBufferCount += 6; + size_t indexcount = m_indexed_array.m_data.size() / 2; + unsigned short *indexList = (unsigned short*)m_indexed_array.m_data.data(); + for (unsigned i = 0; i < indexcount / 4; i++) + { + // First triangle + bufferMap[6 * i] = indexList[4 * i]; + bufferMap[6 * i + 1] = indexList[4 * i + 1]; + bufferMap[6 * i + 2] = indexList[4 * i + 2]; + // Second triangle + bufferMap[6 * i + 3] = indexList[4 * i]; + bufferMap[6 * i + 4] = indexList[4 * i + 2]; + bufferMap[6 * i + 5] = indexList[4 * i + 3]; + forcedIndexCount += 6; + } } - getCurrentResourceStorage().m_indexBuffer->Unmap(0, nullptr); + else + { + for (unsigned i = 0; i < m_draw_array_count / 4; i++) + { + // First triangle + bufferMap[6 * i] = 4 * i; + bufferMap[6 * i + 1] = 4 * i + 1; + bufferMap[6 * i + 2] = 4 * i + 2; + // Second triangle + bufferMap[6 * i + 3] = 4 * i; + bufferMap[6 * i + 4] = 4 * i + 2; + bufferMap[6 * i + 5] = 4 * i + 3; + forcedIndexCount += 6; + } + } + indexBuffer->Unmap(0, nullptr); + getCurrentResourceStorage().m_inflightResources.push_back(indexBuffer); + getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; + getCurrentResourceStorage().m_indexBufferCount = forcedIndexCount; + + D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; + indexBufferView.SizeInBytes = (UINT)subBufferSize; + indexBufferView.BufferLocation = indexBuffer->GetGPUVirtualAddress(); + indexBufferView.Format = DXGI_FORMAT_R16_UINT; + result.second = indexBufferView; } return result; } @@ -901,21 +912,15 @@ void D3D12GSRender::ExecCMD() if (m_indexed_array.m_count) { - // LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); + LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); } if (m_indexed_array.m_count || m_draw_array_count) { - const std::vector &vertexBufferViews = EnableVertexData(m_indexed_array.m_count ? true : false); - commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); - if (m_forcedIndexBuffer) - { - D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; - indexBufferView.SizeInBytes = (UINT)getCurrentResourceStorage().m_indexBufferCount * sizeof(unsigned short); - indexBufferView.BufferLocation = getCurrentResourceStorage().m_indexBuffer->GetGPUVirtualAddress(); - indexBufferView.Format = DXGI_FORMAT_R16_UINT; - commandList->IASetIndexBuffer(&indexBufferView); - } + const std::pair, D3D12_INDEX_BUFFER_VIEW> &vertexIndexBufferViews = EnableVertexData(m_indexed_array.m_count ? true : false); + commandList->IASetVertexBuffers(0, (UINT)vertexIndexBufferViews.first.size(), vertexIndexBufferViews.first.data()); + if (m_forcedIndexBuffer || m_indexed_array.m_count) + commandList->IASetIndexBuffer(&vertexIndexBufferViews.second); } if (!LoadProgram()) @@ -1031,36 +1036,18 @@ void D3D12GSRender::ExecCMD() break; } - if (m_forcedIndexBuffer) + // Indexed quad + if (m_forcedIndexBuffer && m_indexed_array.m_count) + commandList->DrawIndexedInstanced((UINT)getCurrentResourceStorage().m_indexBufferCount, 1, 0, 0, 0); + // Non indexed quad + else if (m_forcedIndexBuffer && !m_indexed_array.m_count) commandList->DrawIndexedInstanced((UINT)getCurrentResourceStorage().m_indexBufferCount, 1, 0, (UINT)m_draw_array_first, 0); + // Indexed triangles + else if (m_indexed_array.m_count) + commandList->DrawIndexedInstanced(m_indexed_array.m_count, 1, 0, (UINT)m_draw_array_first, 0); else if (m_draw_array_count) commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); - if (m_indexed_array.m_count) - { -/* switch (m_indexed_array.m_type) - { - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - commandList->DrawIndexedInstanced - glDrawElements(m_draw_mode - 1, m_indexed_array.m_count, GL_UNSIGNED_INT, nullptr); - checkForGlError("glDrawElements #4"); - break; - - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: - glDrawElements(m_draw_mode - 1, m_indexed_array.m_count, GL_UNSIGNED_SHORT, nullptr); - checkForGlError("glDrawElements #2"); - break; - - default: - LOG_ERROR(RSX, "Bad indexed array type (%d)", m_indexed_array.m_type); - break; - } - - DisableVertexData(); - m_indexed_array.Reset();*/ - } - - check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 0f864646a7..804aeec80a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -59,10 +59,9 @@ private: std::vector m_inflightResources; // Vertex storage - size_t m_currentVertexBuffersHeapOffset; - ID3D12Heap *m_vertexBuffersHeap; + size_t m_vertexIndexBuffersHeapFreeSpace; + ID3D12Heap *m_vertexIndexBuffersHeap; size_t m_indexBufferCount; - ID3D12Resource *m_indexBuffer; // Constants storage ID3D12Heap *m_constantsBuffersHeap; @@ -121,7 +120,7 @@ private: virtual void Close() override; bool LoadProgram(); - std::vector EnableVertexData(bool indexed_draw = false); + std::pair, D3D12_INDEX_BUFFER_VIEW> EnableVertexData(bool indexed_draw = false); void setScaleOffset(); void FillVertexShaderConstantsBuffer(); void FillPixelShaderConstantsBuffer(); From b8073b4e75865edaf75207bca09e8dd330e40b70 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 00:54:43 +0200 Subject: [PATCH 121/343] d3d12: Try to fix forced quad indexed buffer --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 216fbaff0a..2951a04a57 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -543,9 +543,9 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G bufferMap[6 * i + 1] = indexList[4 * i + 1]; bufferMap[6 * i + 2] = indexList[4 * i + 2]; // Second triangle - bufferMap[6 * i + 3] = indexList[4 * i]; - bufferMap[6 * i + 4] = indexList[4 * i + 2]; - bufferMap[6 * i + 5] = indexList[4 * i + 3]; + bufferMap[6 * i + 3] = indexList[4 * i + 2]; + bufferMap[6 * i + 4] = indexList[4 * i + 3]; + bufferMap[6 * i + 5] = indexList[4 * i]; forcedIndexCount += 6; } } @@ -911,9 +911,7 @@ void D3D12GSRender::ExecCMD() commandList->SetGraphicsRootSignature(m_rootSignature); if (m_indexed_array.m_count) - { LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); - } if (m_indexed_array.m_count || m_draw_array_count) { @@ -1050,7 +1048,7 @@ void D3D12GSRender::ExecCMD() check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); - + m_indexed_array.Reset(); WriteDepthBuffer(); /* if (m_set_color_mask) From 2df1220144849d3cb2c9cbe3200777ccfa95a357 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 01:13:42 +0200 Subject: [PATCH 122/343] d3d12: Use GL enum for primitive type --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 67 +++++++++++++-------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 2951a04a57..ddbbcd4aec 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -480,16 +480,18 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G switch (m_draw_mode - 1) { default: - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - case 6: + case GL_POINTS: + case GL_LINES: + case GL_LINE_LOOP: + case GL_LINE_STRIP: + case GL_TRIANGLES: + case GL_TRIANGLE_STRIP: + case GL_TRIANGLE_FAN: + case GL_QUAD_STRIP: + case GL_POLYGON: m_forcedIndexBuffer = false; break; - case 7: + case GL_QUADS: m_forcedIndexBuffer = true; break; } @@ -809,33 +811,24 @@ bool D3D12GSRender::LoadProgram() } D3D12PipelineProperties prop = {}; - /* - #define GL_POINTS 0x0000 - #define GL_LINES 0x0001 - #define GL_LINE_LOOP 0x0002 - #define GL_LINE_STRIP 0x0003 - #define GL_TRIANGLES 0x0004 - #define GL_TRIANGLE_STRIP 0x0005 - #define GL_TRIANGLE_FAN 0x0006 - #define GL_QUADS 0x0007 - #define GL_QUAD_STRIP 0x0008 - #define GL_POLYGON 0x0009 - */ switch (m_draw_mode - 1) { - case 0: + case GL_POINTS: prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; - case 1: - case 2: - case 3: + case GL_LINES: + case GL_LINE_LOOP: + case GL_LINE_STRIP: prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case 4: - case 5: - case 6: + case GL_TRIANGLES: + case GL_TRIANGLE_STRIP: + case GL_TRIANGLE_FAN: prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case GL_QUADS: + case GL_QUAD_STRIP: + case GL_POLYGON: default: // LOG_ERROR(RSX, "Unsupported primitive type"); prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; @@ -1004,30 +997,32 @@ void D3D12GSRender::ExecCMD() bool requireIndexBuffer = false; switch (m_draw_mode - 1) { - case 0: + case GL_POINTS: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_POINTLIST); break; - case 1: + case GL_LINES: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_LINELIST); break; - case 2: + case GL_LINE_LOOP: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ); break; - case 3: + case GL_LINE_STRIP: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_LINESTRIP); break; - case 4: + case GL_TRIANGLES: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); break; - case 5: + case GL_TRIANGLE_STRIP: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); break; - case 6: + case GL_TRIANGLE_FAN: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ); break; - case 7: + case GL_QUADS: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); requireIndexBuffer = true; + case GL_QUAD_STRIP: + case GL_POLYGON: default: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); // LOG_ERROR(RSX, "Unsupported primitive type"); @@ -1042,7 +1037,7 @@ void D3D12GSRender::ExecCMD() commandList->DrawIndexedInstanced((UINT)getCurrentResourceStorage().m_indexBufferCount, 1, 0, (UINT)m_draw_array_first, 0); // Indexed triangles else if (m_indexed_array.m_count) - commandList->DrawIndexedInstanced(m_indexed_array.m_count, 1, 0, (UINT)m_draw_array_first, 0); + commandList->DrawIndexedInstanced(m_indexed_array.m_data.size() / 2, 1, 0, (UINT)m_draw_array_first, 0); else if (m_draw_array_count) commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); From cd951af664841b2784bd5eaca5fc1d32054876bc Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 01:41:42 +0200 Subject: [PATCH 123/343] d3d12: Fix to run fw_vertex_attribute.ppu.elf A little dirty, need to be improved. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index ddbbcd4aec..556951333e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -574,7 +574,18 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; indexBufferView.SizeInBytes = (UINT)subBufferSize; indexBufferView.BufferLocation = indexBuffer->GetGPUVirtualAddress(); - indexBufferView.Format = DXGI_FORMAT_R16_UINT; + switch (m_indexed_array.m_type) + { + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + indexBufferView.Format = DXGI_FORMAT_R32_UINT; + break; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + indexBufferView.Format = DXGI_FORMAT_R16_UINT; + break; + } + if (m_forcedIndexBuffer) + indexBufferView.Format = DXGI_FORMAT_R16_UINT; + result.second = indexBufferView; } return result; @@ -1037,7 +1048,7 @@ void D3D12GSRender::ExecCMD() commandList->DrawIndexedInstanced((UINT)getCurrentResourceStorage().m_indexBufferCount, 1, 0, (UINT)m_draw_array_first, 0); // Indexed triangles else if (m_indexed_array.m_count) - commandList->DrawIndexedInstanced(m_indexed_array.m_data.size() / 2, 1, 0, (UINT)m_draw_array_first, 0); + commandList->DrawIndexedInstanced(m_indexed_array.m_data.size() / 4, 1, 0, (UINT)m_draw_array_first, 0); else if (m_draw_array_count) commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); From 9814a92a258d6447aca00f2f7114c20d5df07176 Mon Sep 17 00:00:00 2001 From: raven02 Date: Mon, 25 May 2015 16:12:13 +0800 Subject: [PATCH 124/343] Tex: define more sampler parameters --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 ++ rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 58 ++++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 804aeec80a..86a8808641 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -128,6 +128,9 @@ private: * returns the number of texture uploaded */ size_t UploadTextures(); + size_t GetMaxAniso(size_t aniso); + D3D12_TEXTURE_ADDRESS_MODE GetWrap(size_t wrap); + /*void DisableVertexData(); void WriteBuffers(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index e855ad7554..a19a89724a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -3,6 +3,52 @@ #include "D3D12GSRender.h" // For clarity this code deals with texture but belongs to D3D12GSRender class +static D3D12_COMPARISON_FUNC ComparisonFunc[] = +{ + D3D12_COMPARISON_FUNC_NEVER, + D3D12_COMPARISON_FUNC_LESS, + D3D12_COMPARISON_FUNC_EQUAL, + D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_COMPARISON_FUNC_GREATER, + D3D12_COMPARISON_FUNC_NOT_EQUAL, + D3D12_COMPARISON_FUNC_GREATER_EQUAL, + D3D12_COMPARISON_FUNC_ALWAYS +}; + +size_t D3D12GSRender::GetMaxAniso(size_t aniso) +{ + switch (aniso) + { + case CELL_GCM_TEXTURE_MAX_ANISO_1: return 1; + case CELL_GCM_TEXTURE_MAX_ANISO_2: return 2; + case CELL_GCM_TEXTURE_MAX_ANISO_4: return 4; + case CELL_GCM_TEXTURE_MAX_ANISO_6: return 6; + case CELL_GCM_TEXTURE_MAX_ANISO_8: return 8; + case CELL_GCM_TEXTURE_MAX_ANISO_10: return 10; + case CELL_GCM_TEXTURE_MAX_ANISO_12: return 12; + case CELL_GCM_TEXTURE_MAX_ANISO_16: return 16; + } + + return 1; +} + +D3D12_TEXTURE_ADDRESS_MODE D3D12GSRender::GetWrap(size_t wrap) +{ + switch (wrap) + { + case CELL_GCM_TEXTURE_WRAP: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + case CELL_GCM_TEXTURE_MIRROR: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + case CELL_GCM_TEXTURE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + case CELL_GCM_TEXTURE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER; + case CELL_GCM_TEXTURE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; + } + + return D3D12_TEXTURE_ADDRESS_MODE_WRAP; +} + size_t D3D12GSRender::UploadTextures() { size_t usedTexture = 0; @@ -119,9 +165,15 @@ size_t D3D12GSRender::UploadTextures() // TODO : Correctly define sampler D3D12_SAMPLER_DESC samplerDesc = {}; samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; - samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressU = GetWrap(m_textures[i].GetWrapS()); + samplerDesc.AddressV = GetWrap(m_textures[i].GetWrapT()); + samplerDesc.AddressW = GetWrap(m_textures[i].GetWrapR()); + samplerDesc.ComparisonFunc = ComparisonFunc[m_textures[i].GetZfunc()]; + samplerDesc.MaxAnisotropy = GetMaxAniso(m_textures[i].GetMaxAniso()); + samplerDesc.MipLODBias = m_textures[i].GetBias(); + samplerDesc.BorderColor[4] = m_textures[i].GetBorderColor(); + samplerDesc.MinLOD = m_textures[i].GetMinLOD() >> 8; + samplerDesc.MaxLOD = m_textures[i].GetMaxLOD() >> 8; Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateSampler(&samplerDesc, Handle); From 22f413be3a64c61e8d9abdde014463c21378a9ad Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 17:38:28 +0200 Subject: [PATCH 125/343] d3d12; Live buffer related code and factorise resource_desc for buffers --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 324 +++++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 354 -------------------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 + 3 files changed, 327 insertions(+), 354 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index a8a1e2deba..c6d07b64ac 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -3,6 +3,8 @@ #include "D3D12Buffer.h" #include "Utilities/Log.h" +#include "D3D12GSRender.h" + const int g_vertexCount = 32; // Where are these type defined ??? @@ -220,4 +222,326 @@ std::vector getIALayout(ID3D12Device *device, bool ind return result; } +static +D3D12_RESOURCE_DESC getBufferResourceDesc(size_t sizeInByte) +{ + D3D12_RESOURCE_DESC BufferDesc = {}; + BufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + BufferDesc.Width = (UINT)sizeInByte; + BufferDesc.Height = 1; + BufferDesc.DepthOrArraySize = 1; + BufferDesc.SampleDesc.Count = 1; + BufferDesc.MipLevels = 1; + BufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + return BufferDesc; +} + +// D3D12GS member handling buffers + +std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12GSRender::EnableVertexData(bool indexed_draw) +{ + std::pair, D3D12_INDEX_BUFFER_VIEW> result; + m_IASet = getIALayout(m_device, indexed_draw, m_vertex_data); + + const u32 data_offset = indexed_draw ? 0 : m_draw_array_first; + + for (u32 i = 0; i < m_vertex_count; ++i) + { + if (!m_vertex_data[i].IsEnabled()) continue; + const size_t item_size = m_vertex_data[i].GetTypeSize() * m_vertex_data[i].size; + const size_t data_size = m_vertex_data[i].data.size() - data_offset * item_size; + size_t subBufferSize = (data_offset + data_size) * item_size; + // 65536 alignment + size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; + bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; + + ID3D12Resource *vertexBuffer; + check(m_device->CreatePlacedResource( + getCurrentResourceStorage().m_vertexIndexBuffersHeap, + bufferHeapOffset, + &getBufferResourceDesc(subBufferSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&vertexBuffer) + )); + void *bufferMap; + check(vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); + memcpy((char*)bufferMap + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); + vertexBuffer->Unmap(0, nullptr); + getCurrentResourceStorage().m_inflightResources.push_back(vertexBuffer); + + D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; + vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); + vertexBufferView.SizeInBytes = (UINT)subBufferSize; + vertexBufferView.StrideInBytes = (UINT)item_size; + result.first.push_back(vertexBufferView); + getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; + } + + // Only handle quads now + switch (m_draw_mode - 1) + { + default: + case GL_POINTS: + case GL_LINES: + case GL_LINE_LOOP: + case GL_LINE_STRIP: + case GL_TRIANGLES: + case GL_TRIANGLE_STRIP: + case GL_TRIANGLE_FAN: + case GL_QUAD_STRIP: + case GL_POLYGON: + m_forcedIndexBuffer = false; + break; + case GL_QUADS: + m_forcedIndexBuffer = true; + break; + } + + if (indexed_draw || m_forcedIndexBuffer) + { + size_t subBufferSize; + if (indexed_draw && !m_forcedIndexBuffer) + subBufferSize = m_indexed_array.m_data.size(); + else if (indexed_draw && m_forcedIndexBuffer) + subBufferSize = 6 * m_indexed_array.m_data.size() / 4; + else + subBufferSize = 2 * m_draw_array_count * 6 / 4; + // 65536 alignment + size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; + bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; + + ID3D12Resource *indexBuffer; + check(m_device->CreatePlacedResource( + getCurrentResourceStorage().m_vertexIndexBuffersHeap, + D3D12_HEAP_FLAG_NONE, + &getBufferResourceDesc(subBufferSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&indexBuffer) + )); + + unsigned short *bufferMap; + check(indexBuffer->Map(0, nullptr, (void**)&bufferMap)); + size_t forcedIndexCount = 0; + if (indexed_draw && !m_forcedIndexBuffer) + memcpy(bufferMap, m_indexed_array.m_data.data(), subBufferSize); + else if (indexed_draw && m_forcedIndexBuffer) + { + size_t indexcount = m_indexed_array.m_data.size() / 2; + unsigned short *indexList = (unsigned short*)m_indexed_array.m_data.data(); + for (unsigned i = 0; i < indexcount / 4; i++) + { + // First triangle + bufferMap[6 * i] = indexList[4 * i]; + bufferMap[6 * i + 1] = indexList[4 * i + 1]; + bufferMap[6 * i + 2] = indexList[4 * i + 2]; + // Second triangle + bufferMap[6 * i + 3] = indexList[4 * i + 2]; + bufferMap[6 * i + 4] = indexList[4 * i + 3]; + bufferMap[6 * i + 5] = indexList[4 * i]; + forcedIndexCount += 6; + } + } + else + { + for (unsigned i = 0; i < m_draw_array_count / 4; i++) + { + // First triangle + bufferMap[6 * i] = 4 * i; + bufferMap[6 * i + 1] = 4 * i + 1; + bufferMap[6 * i + 2] = 4 * i + 2; + // Second triangle + bufferMap[6 * i + 3] = 4 * i; + bufferMap[6 * i + 4] = 4 * i + 2; + bufferMap[6 * i + 5] = 4 * i + 3; + forcedIndexCount += 6; + } + } + indexBuffer->Unmap(0, nullptr); + getCurrentResourceStorage().m_inflightResources.push_back(indexBuffer); + getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; + getCurrentResourceStorage().m_indexBufferCount = forcedIndexCount; + + D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; + indexBufferView.SizeInBytes = (UINT)subBufferSize; + indexBufferView.BufferLocation = indexBuffer->GetGPUVirtualAddress(); + switch (m_indexed_array.m_type) + { + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + indexBufferView.Format = DXGI_FORMAT_R32_UINT; + break; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + indexBufferView.Format = DXGI_FORMAT_R16_UINT; + break; + } + if (m_forcedIndexBuffer) + indexBufferView.Format = DXGI_FORMAT_R16_UINT; + + result.second = indexBufferView; + } + return result; +} + +void D3D12GSRender::setScaleOffset() +{ + float scaleOffsetMat[16] = + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, -1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; + + // Scale + scaleOffsetMat[0] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 0)] / (RSXThread::m_width / RSXThread::m_width_scale); + scaleOffsetMat[5] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 1)] / (RSXThread::m_height / RSXThread::m_height_scale); + scaleOffsetMat[10] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 2)]; + + // Offset + scaleOffsetMat[3] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 0)] - (RSXThread::m_width / RSXThread::m_width_scale); + scaleOffsetMat[7] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 1)] - (RSXThread::m_height / RSXThread::m_height_scale); + scaleOffsetMat[11] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 2)]; + + scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; + scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; + + size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; + // 65536 alignment + constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; + + // Scale offset buffer + // Separate constant buffer + ID3D12Resource *scaleOffsetBuffer; + check(m_device->CreatePlacedResource( + getCurrentResourceStorage().m_constantsBuffersHeap, + constantBuffersHeapOffset, + &getBufferResourceDesc(256), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&scaleOffsetBuffer) + )); + + void *scaleOffsetMap; + check(scaleOffsetBuffer->Map(0, nullptr, &scaleOffsetMap)); + streamToBuffer(scaleOffsetMap, scaleOffsetMat, 16 * sizeof(float)); + scaleOffsetBuffer->Unmap(0, nullptr); + + D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; + constantBufferViewDesc.BufferLocation = scaleOffsetBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.SizeInBytes = (UINT)256; + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); + getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 256; + getCurrentResourceStorage().m_inflightResources.push_back(scaleOffsetBuffer); +} + +void D3D12GSRender::FillVertexShaderConstantsBuffer() +{ + for (const RSXTransformConstant& c : m_transform_constants) + { + size_t offset = c.id * 4 * sizeof(float); + float vector[] = { c.x, c.y, c.z, c.w }; + memcpy((char*)vertexConstantShadowCopy + offset, vector, 4 * sizeof(float)); + } + + size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; + // 65536 alignment + constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; + + ID3D12Resource *constantsBuffer; + check(m_device->CreatePlacedResource( + getCurrentResourceStorage().m_constantsBuffersHeap, + constantBuffersHeapOffset, + &getBufferResourceDesc(512 * 4 * sizeof(float)), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&constantsBuffer) + )); + + void *constantsBufferMap; + check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); + streamToBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); + constantsBuffer->Unmap(0, nullptr); + + D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; + constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.SizeInBytes = 512 * 4 * sizeof(float); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); + getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 512 * 4 * sizeof(float); + getCurrentResourceStorage().m_inflightResources.push_back(constantsBuffer); +} + +void D3D12GSRender::FillPixelShaderConstantsBuffer() +{ + // Get constant from fragment program + const std::vector &fragmentOffset = m_cachePSO.getFragmentConstantOffsetsCache(m_cur_fragment_prog); + size_t bufferSize = fragmentOffset.size() * 4 * sizeof(float) + 1; + // Multiple of 256 never 0 + bufferSize = (bufferSize + 255) & ~255; + + size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; + // 65536 alignment + constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; + + ID3D12Resource *constantsBuffer; + check(m_device->CreatePlacedResource( + getCurrentResourceStorage().m_constantsBuffersHeap, + constantBuffersHeapOffset, + &getBufferResourceDesc(bufferSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&constantsBuffer) + )); + + size_t offset = 0; + void *constantsBufferMap; + check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); + for (size_t offsetInFP : fragmentOffset) + { + u32 vector[4]; + // Is it assigned by color register in command buffer ? + if (!m_fragment_constants.empty() && offsetInFP == m_fragment_constants.front().id - m_cur_fragment_prog->offset) + { + const RSXTransformConstant& c = m_fragment_constants.front(); + vector[0] = (u32&)c.x; + vector[1] = (u32&)c.y; + vector[2] = (u32&)c.z; + vector[3] = (u32&)c.w; + } + else + { + auto data = vm::ptr::make(m_cur_fragment_prog->addr + (u32)offsetInFP); + + u32 c0 = (data[0] >> 16 | data[0] << 16); + u32 c1 = (data[1] >> 16 | data[1] << 16); + u32 c2 = (data[2] >> 16 | data[2] << 16); + u32 c3 = (data[3] >> 16 | data[3] << 16); + + vector[0] = c0; + vector[1] = c1; + vector[2] = c2; + vector[3] = c3; + } + + streamToBuffer((char*)constantsBufferMap + offset, vector, 4 * sizeof(u32)); + offset += 4 * sizeof(u32); + } + + constantsBuffer->Unmap(0, nullptr); + + D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; + constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); + getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + bufferSize; + getCurrentResourceStorage().m_inflightResources.push_back(constantsBuffer); +} + + #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 556951333e..93bb435973 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -5,9 +5,6 @@ #include #include -// Some constants are the same between RSX and GL -#include - GetGSFrameCb2 GetGSFrame = nullptr; void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) @@ -428,279 +425,6 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } -std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12GSRender::EnableVertexData(bool indexed_draw) -{ - std::pair, D3D12_INDEX_BUFFER_VIEW> result; - m_IASet = getIALayout(m_device, indexed_draw, m_vertex_data); - - const u32 data_offset = indexed_draw ? 0 : m_draw_array_first; - - for (u32 i = 0; i < m_vertex_count; ++i) - { - if (!m_vertex_data[i].IsEnabled()) continue; - const size_t item_size = m_vertex_data[i].GetTypeSize() * m_vertex_data[i].size; - const size_t data_size = m_vertex_data[i].data.size() - data_offset * item_size; - size_t subBufferSize = (data_offset + data_size) * item_size; - // 65536 alignment - size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; - bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; - - ID3D12Resource *vertexBuffer; - D3D12_RESOURCE_DESC vertexBufferDesc = {}; - vertexBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - vertexBufferDesc.Width = (UINT)subBufferSize; - vertexBufferDesc.Height = 1; - vertexBufferDesc.DepthOrArraySize = 1; - vertexBufferDesc.SampleDesc.Count = 1; - vertexBufferDesc.MipLevels = 1; - vertexBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_vertexIndexBuffersHeap, - bufferHeapOffset, - &vertexBufferDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&vertexBuffer) - )); - void *bufferMap; - check(vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); - memcpy((char*)bufferMap + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); - vertexBuffer->Unmap(0, nullptr); - getCurrentResourceStorage().m_inflightResources.push_back(vertexBuffer); - - D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; - vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); - vertexBufferView.SizeInBytes = (UINT)subBufferSize; - vertexBufferView.StrideInBytes = (UINT)item_size; - result.first.push_back(vertexBufferView); - getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; - } - - // Only handle quads now - switch (m_draw_mode - 1) - { - default: - case GL_POINTS: - case GL_LINES: - case GL_LINE_LOOP: - case GL_LINE_STRIP: - case GL_TRIANGLES: - case GL_TRIANGLE_STRIP: - case GL_TRIANGLE_FAN: - case GL_QUAD_STRIP: - case GL_POLYGON: - m_forcedIndexBuffer = false; - break; - case GL_QUADS: - m_forcedIndexBuffer = true; - break; - } - - if (indexed_draw || m_forcedIndexBuffer) - { - size_t subBufferSize; - if (indexed_draw && !m_forcedIndexBuffer) - subBufferSize = m_indexed_array.m_data.size(); - else if (indexed_draw && m_forcedIndexBuffer) - subBufferSize = 6 * m_indexed_array.m_data.size() / 4; - else - subBufferSize = 2 * m_draw_array_count * 6 / 4; - // 65536 alignment - size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; - bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; - - - D3D12_RESOURCE_DESC resDesc = {}; - resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = (UINT)subBufferSize; - resDesc.Height = 1; - resDesc.DepthOrArraySize = 1; - resDesc.SampleDesc.Count = 1; - resDesc.MipLevels = 1; - resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - ID3D12Resource *indexBuffer; - check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_vertexIndexBuffersHeap, - D3D12_HEAP_FLAG_NONE, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&indexBuffer) - )); - - unsigned short *bufferMap; - check(indexBuffer->Map(0, nullptr, (void**)&bufferMap)); - size_t forcedIndexCount = 0; - if (indexed_draw && !m_forcedIndexBuffer) - memcpy(bufferMap, m_indexed_array.m_data.data(), subBufferSize); - else if (indexed_draw && m_forcedIndexBuffer) - { - size_t indexcount = m_indexed_array.m_data.size() / 2; - unsigned short *indexList = (unsigned short*)m_indexed_array.m_data.data(); - for (unsigned i = 0; i < indexcount / 4; i++) - { - // First triangle - bufferMap[6 * i] = indexList[4 * i]; - bufferMap[6 * i + 1] = indexList[4 * i + 1]; - bufferMap[6 * i + 2] = indexList[4 * i + 2]; - // Second triangle - bufferMap[6 * i + 3] = indexList[4 * i + 2]; - bufferMap[6 * i + 4] = indexList[4 * i + 3]; - bufferMap[6 * i + 5] = indexList[4 * i]; - forcedIndexCount += 6; - } - } - else - { - for (unsigned i = 0; i < m_draw_array_count / 4; i++) - { - // First triangle - bufferMap[6 * i] = 4 * i; - bufferMap[6 * i + 1] = 4 * i + 1; - bufferMap[6 * i + 2] = 4 * i + 2; - // Second triangle - bufferMap[6 * i + 3] = 4 * i; - bufferMap[6 * i + 4] = 4 * i + 2; - bufferMap[6 * i + 5] = 4 * i + 3; - forcedIndexCount += 6; - } - } - indexBuffer->Unmap(0, nullptr); - getCurrentResourceStorage().m_inflightResources.push_back(indexBuffer); - getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; - getCurrentResourceStorage().m_indexBufferCount = forcedIndexCount; - - D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; - indexBufferView.SizeInBytes = (UINT)subBufferSize; - indexBufferView.BufferLocation = indexBuffer->GetGPUVirtualAddress(); - switch (m_indexed_array.m_type) - { - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - indexBufferView.Format = DXGI_FORMAT_R32_UINT; - break; - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: - indexBufferView.Format = DXGI_FORMAT_R16_UINT; - break; - } - if (m_forcedIndexBuffer) - indexBufferView.Format = DXGI_FORMAT_R16_UINT; - - result.second = indexBufferView; - } - return result; -} - -void D3D12GSRender::setScaleOffset() -{ - float scaleOffsetMat[16] = - { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, -1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - }; - - // Scale - scaleOffsetMat[0] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 0)] / (RSXThread::m_width / RSXThread::m_width_scale); - scaleOffsetMat[5] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 1)] / (RSXThread::m_height / RSXThread::m_height_scale); - scaleOffsetMat[10] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 2)]; - - // Offset - scaleOffsetMat[3] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 0)] - (RSXThread::m_width / RSXThread::m_width_scale); - scaleOffsetMat[7] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 1)] - (RSXThread::m_height / RSXThread::m_height_scale); - scaleOffsetMat[11] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 2)]; - - scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; - scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; - - size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; - // 65536 alignment - constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; - - D3D12_RESOURCE_DESC resDesc = {}; - resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = 256; - resDesc.Height = 1; - resDesc.DepthOrArraySize = 1; - resDesc.SampleDesc.Count = 1; - resDesc.MipLevels = 1; - resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - // Scale offset buffer - // Separate constant buffer - ID3D12Resource *scaleOffsetBuffer; - check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_constantsBuffersHeap, - constantBuffersHeapOffset, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&scaleOffsetBuffer) - )); - - void *scaleOffsetMap; - check(scaleOffsetBuffer->Map(0, nullptr, &scaleOffsetMap)); - streamToBuffer(scaleOffsetMap, scaleOffsetMat, 16 * sizeof(float)); - scaleOffsetBuffer->Unmap(0, nullptr); - - D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = scaleOffsetBuffer->GetGPUVirtualAddress(); - constantBufferViewDesc.SizeInBytes = (UINT)256; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 256; - getCurrentResourceStorage().m_inflightResources.push_back(scaleOffsetBuffer); -} - -void D3D12GSRender::FillVertexShaderConstantsBuffer() -{ - for (const RSXTransformConstant& c : m_transform_constants) - { - size_t offset = c.id * 4 * sizeof(float); - float vector[] = { c.x, c.y, c.z, c.w }; - memcpy((char*)vertexConstantShadowCopy + offset, vector, 4 * sizeof(float)); - } - - size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; - // 65536 alignment - constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; - - D3D12_RESOURCE_DESC resDesc = {}; - resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = 512 * 4 * sizeof(float); - resDesc.Height = 1; - resDesc.DepthOrArraySize = 1; - resDesc.SampleDesc.Count = 1; - resDesc.MipLevels = 1; - resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - ID3D12Resource *constantsBuffer; - check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_constantsBuffersHeap, - constantBuffersHeapOffset, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&constantsBuffer) - )); - - void *constantsBufferMap; - check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); - streamToBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); - constantsBuffer->Unmap(0, nullptr); - - D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); - constantBufferViewDesc.SizeInBytes = 512 * 4 * sizeof(float); - D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 512 * 4 * sizeof(float); - getCurrentResourceStorage().m_inflightResources.push_back(constantsBuffer); -} - static D3D12_BLEND_OP getBlendOp() { @@ -727,84 +451,6 @@ D3D12_BLEND getBlendFactor(u16 glFactor) } } -void D3D12GSRender::FillPixelShaderConstantsBuffer() -{ - // Get constant from fragment program - const std::vector &fragmentOffset = m_cachePSO.getFragmentConstantOffsetsCache(m_cur_fragment_prog); - size_t bufferSize = fragmentOffset.size() * 4 * sizeof(float) + 1; - // Multiple of 256 never 0 - bufferSize = (bufferSize + 255) & ~255; - - size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; - // 65536 alignment - constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; - - D3D12_RESOURCE_DESC resDesc = {}; - resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resDesc.Width = (UINT)bufferSize; - resDesc.Height = 1; - resDesc.DepthOrArraySize = 1; - resDesc.SampleDesc.Count = 1; - resDesc.MipLevels = 1; - resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - ID3D12Resource *constantsBuffer; - check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_constantsBuffersHeap, - constantBuffersHeapOffset, - &resDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&constantsBuffer) - )); - - size_t offset = 0; - void *constantsBufferMap; - check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); - for (size_t offsetInFP : fragmentOffset) - { - u32 vector[4]; - // Is it assigned by color register in command buffer ? - if (!m_fragment_constants.empty() && offsetInFP == m_fragment_constants.front().id - m_cur_fragment_prog->offset) - { - const RSXTransformConstant& c = m_fragment_constants.front(); - vector[0] = (u32&)c.x; - vector[1] = (u32&)c.y; - vector[2] = (u32&)c.z; - vector[3] = (u32&)c.w; - } - else - { - auto data = vm::ptr::make(m_cur_fragment_prog->addr + (u32)offsetInFP); - - u32 c0 = (data[0] >> 16 | data[0] << 16); - u32 c1 = (data[1] >> 16 | data[1] << 16); - u32 c2 = (data[2] >> 16 | data[2] << 16); - u32 c3 = (data[3] >> 16 | data[3] << 16); - - vector[0] = c0; - vector[1] = c1; - vector[2] = c2; - vector[3] = c3; - } - - streamToBuffer((char*)constantsBufferMap + offset, vector, 4 * sizeof(u32)); - offset += 4 * sizeof(u32); - } - - constantsBuffer->Unmap(0, nullptr); - - D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); - constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + bufferSize; - getCurrentResourceStorage().m_inflightResources.push_back(constantsBuffer); -} - - bool D3D12GSRender::LoadProgram() { if (!m_cur_fragment_prog) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 86a8808641..e69e0cef51 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -14,6 +14,9 @@ #include "D3D12PipelineState.h" #include "D3D12Buffer.h" +// Some constants are the same between RSX and GL +#include + #pragma comment (lib, "d3d12.lib") #pragma comment (lib, "dxgi.lib") From 82545a7b5625cc576642dc6e21c572a53775f653 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 18:02:18 +0200 Subject: [PATCH 126/343] d3d12: Clean indexed draw buffer gen code --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 90 ++++++++++++++++----------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 +- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- 3 files changed, 58 insertions(+), 40 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index c6d07b64ac..23306f826d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -222,6 +222,24 @@ std::vector getIALayout(ID3D12Device *device, bool ind return result; } +template +void expandIndexedQuads(DstType *dst, const SrcType *src, size_t indexCount) +{ + IndexType *typedDst = reinterpret_cast(dst); + const IndexType *typedSrc = reinterpret_cast(src); + for (unsigned i = 0; i < indexCount / 4; i++) + { + // First triangle + typedDst[6 * i] = typedSrc[4 * i]; + typedDst[6 * i + 1] = typedSrc[4 * i + 1]; + typedDst[6 * i + 2] = typedSrc[4 * i + 2]; + // Second triangle + typedDst[6 * i + 3] = typedSrc[4 * i + 2]; + typedDst[6 * i + 4] = typedSrc[4 * i + 3]; + typedDst[6 * i + 5] = typedSrc[4 * i]; + } +} + static D3D12_RESOURCE_DESC getBufferResourceDesc(size_t sizeInByte) { @@ -300,13 +318,28 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G if (indexed_draw || m_forcedIndexBuffer) { - size_t subBufferSize; + D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; + size_t indexSize; + switch (m_indexed_array.m_type) + { + default: // If it's not indexed draw, use 16 bits unsigned short + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + indexBufferView.Format = DXGI_FORMAT_R16_UINT; + indexSize = 2; + break; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + indexBufferView.Format = DXGI_FORMAT_R32_UINT; + indexSize = 4; + break; + } + if (indexed_draw && !m_forcedIndexBuffer) - subBufferSize = m_indexed_array.m_data.size(); + indexCount = m_indexed_array.m_data.size() / indexSize; else if (indexed_draw && m_forcedIndexBuffer) - subBufferSize = 6 * m_indexed_array.m_data.size() / 4; + indexCount = 6 * m_indexed_array.m_data.size() / (4 * indexSize); else - subBufferSize = 2 * m_draw_array_count * 6 / 4; + indexCount = m_draw_array_count * 6 / 4; + size_t subBufferSize = indexCount * indexSize; // 65536 alignment size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; @@ -321,60 +354,45 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G IID_PPV_ARGS(&indexBuffer) )); - unsigned short *bufferMap; + void *bufferMap; check(indexBuffer->Map(0, nullptr, (void**)&bufferMap)); - size_t forcedIndexCount = 0; if (indexed_draw && !m_forcedIndexBuffer) memcpy(bufferMap, m_indexed_array.m_data.data(), subBufferSize); else if (indexed_draw && m_forcedIndexBuffer) { - size_t indexcount = m_indexed_array.m_data.size() / 2; - unsigned short *indexList = (unsigned short*)m_indexed_array.m_data.data(); - for (unsigned i = 0; i < indexcount / 4; i++) + switch (m_indexed_array.m_type) { - // First triangle - bufferMap[6 * i] = indexList[4 * i]; - bufferMap[6 * i + 1] = indexList[4 * i + 1]; - bufferMap[6 * i + 2] = indexList[4 * i + 2]; - // Second triangle - bufferMap[6 * i + 3] = indexList[4 * i + 2]; - bufferMap[6 * i + 4] = indexList[4 * i + 3]; - bufferMap[6 * i + 5] = indexList[4 * i]; - forcedIndexCount += 6; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + expandIndexedQuads(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 4); + break; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + expandIndexedQuads(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 2); + break; } } else { + unsigned short *typedDst = static_cast(bufferMap); for (unsigned i = 0; i < m_draw_array_count / 4; i++) { // First triangle - bufferMap[6 * i] = 4 * i; - bufferMap[6 * i + 1] = 4 * i + 1; - bufferMap[6 * i + 2] = 4 * i + 2; + typedDst[6 * i] = 4 * i; + typedDst[6 * i + 1] = 4 * i + 1; + typedDst[6 * i + 2] = 4 * i + 2; // Second triangle - bufferMap[6 * i + 3] = 4 * i; - bufferMap[6 * i + 4] = 4 * i + 2; - bufferMap[6 * i + 5] = 4 * i + 3; - forcedIndexCount += 6; + typedDst[6 * i + 3] = 4 * i + 2; + typedDst[6 * i + 4] = 4 * i + 3; + typedDst[6 * i + 5] = 4 * i; } } indexBuffer->Unmap(0, nullptr); getCurrentResourceStorage().m_inflightResources.push_back(indexBuffer); getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; - getCurrentResourceStorage().m_indexBufferCount = forcedIndexCount; - D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; + indexBufferView.SizeInBytes = (UINT)subBufferSize; indexBufferView.BufferLocation = indexBuffer->GetGPUVirtualAddress(); - switch (m_indexed_array.m_type) - { - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - indexBufferView.Format = DXGI_FORMAT_R32_UINT; - break; - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: - indexBufferView.Format = DXGI_FORMAT_R16_UINT; - break; - } + if (m_forcedIndexBuffer) indexBufferView.Format = DXGI_FORMAT_R16_UINT; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 93bb435973..5f21527ea5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -688,13 +688,13 @@ void D3D12GSRender::ExecCMD() // Indexed quad if (m_forcedIndexBuffer && m_indexed_array.m_count) - commandList->DrawIndexedInstanced((UINT)getCurrentResourceStorage().m_indexBufferCount, 1, 0, 0, 0); + commandList->DrawIndexedInstanced((UINT)indexCount, 1, 0, 0, 0); // Non indexed quad else if (m_forcedIndexBuffer && !m_indexed_array.m_count) - commandList->DrawIndexedInstanced((UINT)getCurrentResourceStorage().m_indexBufferCount, 1, 0, (UINT)m_draw_array_first, 0); + commandList->DrawIndexedInstanced((UINT)indexCount, 1, 0, (UINT)m_draw_array_first, 0); // Indexed triangles else if (m_indexed_array.m_count) - commandList->DrawIndexedInstanced(m_indexed_array.m_data.size() / 4, 1, 0, (UINT)m_draw_array_first, 0); + commandList->DrawIndexedInstanced((UINT)m_indexed_array.m_data.size() / 4, 1, 0, (UINT)m_draw_array_first, 0); else if (m_draw_array_count) commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index e69e0cef51..dcd1399d63 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -64,7 +64,6 @@ private: // Vertex storage size_t m_vertexIndexBuffersHeapFreeSpace; ID3D12Heap *m_vertexIndexBuffersHeap; - size_t m_indexBufferCount; // Constants storage ID3D12Heap *m_constantsBuffersHeap; @@ -97,6 +96,7 @@ private: ResourceStorage m_perFrameStorage[2]; bool m_forcedIndexBuffer; + size_t indexCount; std::vector m_IASet; D3D12RenderTargetSets *m_fbo; From e9fab57a0e1be0c905b226e1a7f72f2f80a7809a Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 18:47:54 +0200 Subject: [PATCH 127/343] d3d12: Fix indexed quad draw We were not positionning the index buffer correctly in the heap. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 31 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 23306f826d..797775ec5e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -320,17 +320,23 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G { D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; size_t indexSize; - switch (m_indexed_array.m_type) - { - default: // If it's not indexed draw, use 16 bits unsigned short - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + + if (!indexed_draw) indexBufferView.Format = DXGI_FORMAT_R16_UINT; - indexSize = 2; - break; - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - indexBufferView.Format = DXGI_FORMAT_R32_UINT; - indexSize = 4; - break; + else + { + switch (m_indexed_array.m_type) + { + default: abort(); + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + indexBufferView.Format = DXGI_FORMAT_R16_UINT; + indexSize = 2; + break; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + indexBufferView.Format = DXGI_FORMAT_R32_UINT; + indexSize = 4; + break; + } } if (indexed_draw && !m_forcedIndexBuffer) @@ -347,7 +353,7 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G ID3D12Resource *indexBuffer; check(m_device->CreatePlacedResource( getCurrentResourceStorage().m_vertexIndexBuffersHeap, - D3D12_HEAP_FLAG_NONE, + bufferHeapOffset, &getBufferResourceDesc(subBufferSize), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -393,9 +399,6 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G indexBufferView.SizeInBytes = (UINT)subBufferSize; indexBufferView.BufferLocation = indexBuffer->GetGPUVirtualAddress(); - if (m_forcedIndexBuffer) - indexBufferView.Format = DXGI_FORMAT_R16_UINT; - result.second = indexBufferView; } return result; From a6770813b18b890fff0563e827fb0989f4797cc4 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 18:50:51 +0200 Subject: [PATCH 128/343] d3d12: Fix var not initialized --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 797775ec5e..3a11858e96 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -322,7 +322,10 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G size_t indexSize; if (!indexed_draw) + { indexBufferView.Format = DXGI_FORMAT_R16_UINT; + indexSize = 2; + } else { switch (m_indexed_array.m_type) From 8b8385b0a49a10a838b0b978c6f58459c75e1250 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 19:14:05 +0200 Subject: [PATCH 129/343] d3d12: Reduce heap size --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 5f21527ea5..40c11e0072 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -41,7 +41,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) // Create heap for vertex and constants buffers D3D12_HEAP_DESC vertexBufferHeapDesc = {}; // 16 MB wide - vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 256; + vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 16; vertexBufferHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; vertexBufferHeapDesc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexIndexBuffersHeap))); @@ -62,7 +62,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) // Texture D3D12_HEAP_DESC heapDescription = {}; - heapDescription.SizeInBytes = 1024 * 1024 * 256; + heapDescription.SizeInBytes = 1024 * 1024 * 64; heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); @@ -72,7 +72,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_textureStorage))); D3D12_DESCRIPTOR_HEAP_DESC textureDescriptorDesc = {}; - textureDescriptorDesc.NumDescriptors = 2048; // For safety + textureDescriptorDesc.NumDescriptors = 1024; // For safety textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; textureDescriptorDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_textureDescriptorsHeap))); From 90e0a89e9b9cb67304a939fc906980bbd911a429 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 19:49:43 +0200 Subject: [PATCH 130/343] d3d12: Try to use stream function that doesn't pollute cache --- rpcs3/Emu/RSX/D3D12/D3D12.h | 36 +++++++++++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 2 +- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 5582cc0e41..bf93a190cc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -2,6 +2,7 @@ #if defined(DX12_SUPPORT) #include +#include inline void check(HRESULT hr) @@ -10,6 +11,17 @@ void check(HRESULT hr) abort(); } +/** + * Get next value that is aligned by the corresponding power of 2 + */ +inline +size_t powerOf2Align(size_t unalignedVal, size_t powerOf2) +{ + // check that powerOf2 is power of 2 + assert(!(powerOf2 & (powerOf2 - 1))); + return (unalignedVal + powerOf2 - 1) & ~powerOf2; +} + /** * Send data to dst pointer without polluting cache. * Usefull to write to mapped memory from upload heap. @@ -24,4 +36,28 @@ void streamToBuffer(void* dst, void* src, size_t sizeInBytes) } } +/** +* copy src to dst pointer without polluting cache. +* Usefull to write to mapped memory from upload heap. +*/ +inline +void streamBuffer(void* dst, void* src, size_t sizeInBytes) +{ + // Assume 64 bytes cache line + assert(powerOf2Align(sizeInBytes, 64)); + for (unsigned i = 0; i < sizeInBytes / 64; i++) + { + char *line = (char*)src + i * 64; + _mm_prefetch(line, _MM_HINT_NTA); + __m128i *srcPtr = (__m128i*) (line); + _mm_stream_si128((__m128i*)((char*)dst + i * 64), *srcPtr); + srcPtr = (__m128i*) (line + 16); + _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 16), *srcPtr); + srcPtr = (__m128i*) (line + 32); + _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 32), *srcPtr); + srcPtr = (__m128i*) (line + 48); + _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 48), *srcPtr); + } +} + #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 3a11858e96..9b6a2f2d3f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -486,7 +486,7 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() void *constantsBufferMap; check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); - streamToBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); + streamBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); constantsBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; From 2709a3e49af71fda1d1dc09be669b875b3689bc9 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 25 May 2015 19:56:40 +0200 Subject: [PATCH 131/343] d3d12: Use streaming buffer for index buffer --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 4 ++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 9b6a2f2d3f..e4ad9e3e22 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -348,7 +348,7 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G indexCount = 6 * m_indexed_array.m_data.size() / (4 * indexSize); else indexCount = m_draw_array_count * 6 / 4; - size_t subBufferSize = indexCount * indexSize; + size_t subBufferSize = powerOf2Align(indexCount * indexSize, 64); // 65536 alignment size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; @@ -366,7 +366,7 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G void *bufferMap; check(indexBuffer->Map(0, nullptr, (void**)&bufferMap)); if (indexed_draw && !m_forcedIndexBuffer) - memcpy(bufferMap, m_indexed_array.m_data.data(), subBufferSize); + streamBuffer(bufferMap, m_indexed_array.m_data.data(), subBufferSize); else if (indexed_draw && m_forcedIndexBuffer) { switch (m_indexed_array.m_type) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 40c11e0072..d812a05350 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -41,7 +41,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) // Create heap for vertex and constants buffers D3D12_HEAP_DESC vertexBufferHeapDesc = {}; // 16 MB wide - vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 16; + vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 128; vertexBufferHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; vertexBufferHeapDesc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexIndexBuffersHeap))); From 10cc2c080c8d18ffdbdaa42abb4222ed7dcf3304 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 00:05:57 +0200 Subject: [PATCH 132/343] d3d12: Try to reconstruct packed vertex buffers --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 235 ++++++++++++---------------- 1 file changed, 103 insertions(+), 132 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index e4ad9e3e22..653ea21454 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -86,138 +86,30 @@ DXGI_FORMAT getFormat(u8 type, u8 size) } } -std::vector getIALayout(ID3D12Device *device, bool indexedDraw, const RSXVertexData *vertexData) +struct VertexBufferFormat +{ + std::pair range; + std::vector attributeId; + size_t elementCount; + size_t stride; +}; + +std::vector getIALayout(ID3D12Device *device, const std::vector &vertexBufferFormat, const RSXVertexData *vertexData) { std::vector result; - u32 offset_list[g_vertexCount]; - u32 cur_offset = 0; - for (u32 i = 0; i < g_vertexCount; ++i) + for (size_t inputSlot = 0; inputSlot < vertexBufferFormat.size(); inputSlot++) { - if (!vertexData[i].IsEnabled()) continue; - const size_t item_size = vertexData[i].GetTypeSize() * vertexData[i].size; - offset_list[i] = (u32)item_size; - } - -#if DUMP_VERTEX_DATA - rFile dump("VertexDataArray.dump", rFile::write); -#endif - - size_t inputSlot = 0; - for (u32 i = 0; i < g_vertexCount; ++i) - { - if (!vertexData[i].IsEnabled()) continue; - -#if DUMP_VERTEX_DATA - dump.Write(wxString::Format("VertexData[%d]:\n", i)); - switch (m_vertex_data[i].type) - { - case CELL_GCM_VERTEX_S1: - for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) - { - dump.Write(wxString::Format("%d\n", *(u16*)&m_vertex_data[i].data[j])); - if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - case CELL_GCM_VERTEX_F: - for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 4) - { - dump.Write(wxString::Format("%.01f\n", *(float*)&m_vertex_data[i].data[j])); - if (!(((j + 4) / 4) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - case CELL_GCM_VERTEX_SF: - for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) - { - dump.Write(wxString::Format("%.01f\n", *(float*)&m_vertex_data[i].data[j])); - if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - case CELL_GCM_VERTEX_UB: - for (u32 j = 0; j < m_vertex_data[i].data.size(); ++j) - { - dump.Write(wxString::Format("%d\n", m_vertex_data[i].data[j])); - if (!((j + 1) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - case CELL_GCM_VERTEX_S32K: - for (u32 j = 0; j < m_vertex_data[i].data.size(); j += 2) - { - dump.Write(wxString::Format("%d\n", *(u16*)&m_vertex_data[i].data[j])); - if (!(((j + 2) / 2) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - // case CELL_GCM_VERTEX_CMP: - - case CELL_GCM_VERTEX_UB256: - for (u32 j = 0; j < m_vertex_data[i].data.size(); ++j) - { - dump.Write(wxString::Format("%d\n", m_vertex_data[i].data[j])); - if (!((j + 1) % m_vertex_data[i].size)) dump.Write("\n"); - } - break; - - default: - LOG_ERROR(HLE, "Bad cv type! %d", m_vertex_data[i].type); - return; - } - - dump.Write("\n"); -#endif - - if (vertexData[i].type < 1 || vertexData[i].type > 7) - { - LOG_ERROR(RSX, "GLGSRender::EnableVertexData: Bad vertex data type (%d)!", vertexData[i].type); - } - - D3D12_INPUT_ELEMENT_DESC IAElement = {}; - /* if (!m_vertex_data[i].addr) - { - switch (m_vertex_data[i].type) - { - case CELL_GCM_VERTEX_S32K: - case CELL_GCM_VERTEX_S1: - switch (m_vertex_data[i].size) - { - case 1: glVertexAttrib1s(i, (GLshort&)m_vertex_data[i].data[0]); break; - case 2: glVertexAttrib2sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; - case 3: glVertexAttrib3sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; - case 4: glVertexAttrib4sv(i, (GLshort*)&m_vertex_data[i].data[0]); break; - } - break; - - case CELL_GCM_VERTEX_F: - switch (m_vertex_data[i].size) - { - case 1: glVertexAttrib1f(i, (GLfloat&)m_vertex_data[i].data[0]); break; - case 2: glVertexAttrib2fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; - case 3: glVertexAttrib3fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; - case 4: glVertexAttrib4fv(i, (GLfloat*)&m_vertex_data[i].data[0]); break; - } - break; - - case CELL_GCM_VERTEX_CMP: - case CELL_GCM_VERTEX_UB: - glVertexAttrib4ubv(i, (GLubyte*)&m_vertex_data[i].data[0]); - break; - } - - checkForGlError("glVertexAttrib"); - } - else*/ + for (size_t attributeId : vertexBufferFormat[inputSlot].attributeId) { + D3D12_INPUT_ELEMENT_DESC IAElement = {}; IAElement.SemanticName = "TEXCOORD"; - IAElement.SemanticIndex = i; + IAElement.SemanticIndex = (UINT)attributeId; IAElement.InputSlot = (UINT)inputSlot; - IAElement.Format = getFormat(vertexData[i].type - 1, vertexData[i].size); - inputSlot++; + IAElement.Format = getFormat(vertexData[attributeId].type - 1, vertexData[attributeId].size); + IAElement.AlignedByteOffset = (UINT)(vertexData[attributeId].addr - vertexBufferFormat[inputSlot].range.first); + result.push_back(IAElement); } - result.push_back(IAElement); } return result; } @@ -256,22 +148,64 @@ D3D12_RESOURCE_DESC getBufferResourceDesc(size_t sizeInByte) // D3D12GS member handling buffers + + +#define MIN2(x, y) ((x) < (y)) ? (x) : (y) +#define MAX2(x, y) ((x) > (y)) ? (x) : (y) + +static +bool overlaps(const std::pair &range1, const std::pair &range2) +{ + return !(range1.second < range2.first || range2.second < range1.first); +} + +static +std::vector FormatVertexData(RSXVertexData *m_vertex_data) +{ + std::vector Result; + for (size_t i = 0; i < 32; ++i) + { + if (!m_vertex_data[i].IsEnabled()) continue; + size_t elementCount = m_vertex_data[i].data.size() / (m_vertex_data[i].size * m_vertex_data[i].GetTypeSize()); + std::pair range = std::make_pair(m_vertex_data[i].addr, m_vertex_data[i].addr + elementCount * m_vertex_data[i].stride); + bool isMerged = false; + for (VertexBufferFormat &vbf : Result) + { + if (overlaps(vbf.range, range)) + { + // Extend buffer if necessary + vbf.range.first = MIN2(vbf.range.first, range.first); + vbf.range.second = MAX2(vbf.range.second, range.second); + vbf.elementCount = MAX2(vbf.elementCount, elementCount); + assert(vbf.stride == m_vertex_data[i].stride); + vbf.attributeId.push_back(i); + isMerged = true; + break; + } + } + if (isMerged) + continue; + VertexBufferFormat newRange = { range, std::vector{ i }, elementCount, m_vertex_data[i].stride }; + Result.emplace_back(newRange); + } + return Result; +} + std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12GSRender::EnableVertexData(bool indexed_draw) { std::pair, D3D12_INDEX_BUFFER_VIEW> result; - m_IASet = getIALayout(m_device, indexed_draw, m_vertex_data); + const std::vector &vertexBufferFormat = FormatVertexData(m_vertex_data); + m_IASet = getIALayout(m_device, vertexBufferFormat, m_vertex_data); const u32 data_offset = indexed_draw ? 0 : m_draw_array_first; - for (u32 i = 0; i < m_vertex_count; ++i) + for (size_t buffer = 0; buffer < vertexBufferFormat.size(); buffer++) { - if (!m_vertex_data[i].IsEnabled()) continue; - const size_t item_size = m_vertex_data[i].GetTypeSize() * m_vertex_data[i].size; - const size_t data_size = m_vertex_data[i].data.size() - data_offset * item_size; - size_t subBufferSize = (data_offset + data_size) * item_size; + const VertexBufferFormat &vbf = vertexBufferFormat[buffer]; // 65536 alignment size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; + size_t subBufferSize = vbf.range.second - vbf.range.first; ID3D12Resource *vertexBuffer; check(m_device->CreatePlacedResource( @@ -284,14 +218,51 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G )); void *bufferMap; check(vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); - memcpy((char*)bufferMap + data_offset * item_size, &m_vertex_data[i].data[data_offset * item_size], data_size); + + for (unsigned vertex = 0; vertex < vbf.elementCount; vertex++) + { + for (size_t attributeId : vbf.attributeId) + { + size_t baseOffset = m_vertex_data[attributeId].addr - vbf.range.first; + size_t tsize = m_vertex_data[attributeId].GetTypeSize(); + size_t size = m_vertex_data[attributeId].size; + auto src = vm::get_ptr(m_vertex_data[attributeId].addr + vbf.stride * vertex); + char* dst = (char*)bufferMap + baseOffset + vbf.stride * vertex; + + switch (tsize) + { + case 1: + { + memcpy(dst, src, size); + break; + } + + case 2: + { + const u16* c_src = (const u16*)src; + u16* c_dst = (u16*)dst; + for (u32 j = 0; j < size; ++j) *c_dst++ = re16(*c_src++); + break; + } + + case 4: + { + const u32* c_src = (const u32*)src; + u32* c_dst = (u32*)dst; + for (u32 j = 0; j < size; ++j) *c_dst++ = re32(*c_src++); + break; + } + } + } + } + vertexBuffer->Unmap(0, nullptr); getCurrentResourceStorage().m_inflightResources.push_back(vertexBuffer); D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); vertexBufferView.SizeInBytes = (UINT)subBufferSize; - vertexBufferView.StrideInBytes = (UINT)item_size; + vertexBufferView.StrideInBytes = (UINT)vbf.stride; result.first.push_back(vertexBufferView); getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; } From 8ad1bc197012506230bde7e47bce01a916fd5128 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 00:09:57 +0200 Subject: [PATCH 133/343] d3d12: Fix powerOf2Align --- rpcs3/Emu/RSX/D3D12/D3D12.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index bf93a190cc..346dfb7ea7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -19,7 +19,7 @@ size_t powerOf2Align(size_t unalignedVal, size_t powerOf2) { // check that powerOf2 is power of 2 assert(!(powerOf2 & (powerOf2 - 1))); - return (unalignedVal + powerOf2 - 1) & ~powerOf2; + return (unalignedVal + powerOf2 - 1) & ~(powerOf2 - 1); } /** From 8bd678afc0442fd0c7b67c5ccad9206ff7a0e4ed Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 00:28:20 +0200 Subject: [PATCH 134/343] d3d12: Fix vertex buffer reconstruction with mismatching stride --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 653ea21454..0a19646bf4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -169,15 +169,16 @@ std::vector FormatVertexData(RSXVertexData *m_vertex_data) size_t elementCount = m_vertex_data[i].data.size() / (m_vertex_data[i].size * m_vertex_data[i].GetTypeSize()); std::pair range = std::make_pair(m_vertex_data[i].addr, m_vertex_data[i].addr + elementCount * m_vertex_data[i].stride); bool isMerged = false; + size_t stride = m_vertex_data[i].stride; for (VertexBufferFormat &vbf : Result) { - if (overlaps(vbf.range, range)) + if (overlaps(vbf.range, range) && vbf.stride == stride) { // Extend buffer if necessary vbf.range.first = MIN2(vbf.range.first, range.first); vbf.range.second = MAX2(vbf.range.second, range.second); vbf.elementCount = MAX2(vbf.elementCount, elementCount); - assert(vbf.stride == m_vertex_data[i].stride); + vbf.attributeId.push_back(i); isMerged = true; break; From 233450a7ca9f082f5412b961074f37c6bda1ef88 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 15:48:35 +0200 Subject: [PATCH 135/343] d3d12: Fix stencil reflect test --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 0a19646bf4..66bd5917f6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -167,9 +167,12 @@ std::vector FormatVertexData(RSXVertexData *m_vertex_data) { if (!m_vertex_data[i].IsEnabled()) continue; size_t elementCount = m_vertex_data[i].data.size() / (m_vertex_data[i].size * m_vertex_data[i].GetTypeSize()); - std::pair range = std::make_pair(m_vertex_data[i].addr, m_vertex_data[i].addr + elementCount * m_vertex_data[i].stride); - bool isMerged = false; + // If there is a single element, stride is 0, use the size of element instead size_t stride = m_vertex_data[i].stride; + size_t elementSize = m_vertex_data[i].GetTypeSize(); + std::pair range = std::make_pair(m_vertex_data[i].addr, m_vertex_data[i].addr + elementSize + (elementCount - 1) * stride); + bool isMerged = false; + for (VertexBufferFormat &vbf : Result) { if (overlaps(vbf.range, range) && vbf.stride == stride) @@ -186,7 +189,7 @@ std::vector FormatVertexData(RSXVertexData *m_vertex_data) } if (isMerged) continue; - VertexBufferFormat newRange = { range, std::vector{ i }, elementCount, m_vertex_data[i].stride }; + VertexBufferFormat newRange = { range, std::vector{ i }, elementCount, stride }; Result.emplace_back(newRange); } return Result; From c5a1f8930fa55016cb5bbbdcfab6362932bdcf0b Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 15:58:03 +0200 Subject: [PATCH 136/343] d3d12: Another fix for vertex buffer not big enough buffer --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 66bd5917f6..49c1c8aae3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -170,7 +170,7 @@ std::vector FormatVertexData(RSXVertexData *m_vertex_data) // If there is a single element, stride is 0, use the size of element instead size_t stride = m_vertex_data[i].stride; size_t elementSize = m_vertex_data[i].GetTypeSize(); - std::pair range = std::make_pair(m_vertex_data[i].addr, m_vertex_data[i].addr + elementSize + (elementCount - 1) * stride); + std::pair range = std::make_pair(m_vertex_data[i].addr, m_vertex_data[i].addr + elementSize + elementCount * stride); bool isMerged = false; for (VertexBufferFormat &vbf : Result) From 9c193e02829f63c653c3cc875479f2fc450710c9 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 18:36:05 +0200 Subject: [PATCH 137/343] d3d12: Implement semaphore --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 42 +++++++++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 ++ 2 files changed, 45 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index d812a05350..3363b60122 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1162,4 +1162,46 @@ void D3D12GSRender::WriteDepthBuffer() fence->Release(); downloadCommandList->Release(); } + + +void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) +{ + ID3D12Fence *fence; + check( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) + ); + HANDLE handle = CreateEvent(0, FALSE, FALSE, 0); + fence->SetEventOnCompletion(1, handle); + m_commandQueueGraphic->Signal(fence, 1); + + std::thread valueChangerThread([=]() { + WaitForSingleObject(handle, INFINITE); + CloseHandle(handle); + fence->Release(); + vm::write32(m_label_addr + offset, value); + }); + valueChangerThread.detach(); +} + +void D3D12GSRender::semaphorePFIFOAcquire(u32 offset, u32 value) +{ + + ID3D12Fence *fence; + check( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) + ); + m_commandQueueGraphic->Wait(fence, 1); + + std::thread valueChangerThread([=]() { + while (true) + { + u32 val = vm::read32(m_label_addr + offset); + if (val == value) break; + } + fence->Signal(1); + fence->Release(); + } + ); + valueChangerThread.join(); +} #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index dcd1399d63..697d3fbda4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -119,6 +119,9 @@ public: D3D12GSRender(); virtual ~D3D12GSRender(); + virtual void semaphorePGRAPHBackendRelease(u32 offset, u32 value) override; + virtual void semaphorePFIFOAcquire(u32 offset, u32 value) override; + private: virtual void Close() override; From bc25f4d6ad74a3cfd2f68a0fe7b268f3b5cc6200 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 18:57:46 +0200 Subject: [PATCH 138/343] d3d12: Make waiting thread sleeping --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3363b60122..65fa5b9fec 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include GetGSFrameCb2 GetGSFrame = nullptr; @@ -1185,7 +1187,6 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) void D3D12GSRender::semaphorePFIFOAcquire(u32 offset, u32 value) { - ID3D12Fence *fence; check( m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) @@ -1197,11 +1198,12 @@ void D3D12GSRender::semaphorePFIFOAcquire(u32 offset, u32 value) { u32 val = vm::read32(m_label_addr + offset); if (val == value) break; + std::this_thread::sleep_for(std::chrono::milliseconds(100)); } fence->Signal(1); fence->Release(); } ); - valueChangerThread.join(); + valueChangerThread.detach(); } #endif \ No newline at end of file From b72d435ce87d5ddd0a97fef7c7fab091b87017b1 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 19:19:04 +0200 Subject: [PATCH 139/343] d3d12: fix stencil reflect again --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 49c1c8aae3..b04733c984 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -227,6 +227,7 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G { for (size_t attributeId : vbf.attributeId) { + if (!m_vertex_data[attributeId].addr) continue; size_t baseOffset = m_vertex_data[attributeId].addr - vbf.range.first; size_t tsize = m_vertex_data[attributeId].GetTypeSize(); size_t size = m_vertex_data[attributeId].size; From 1228787355fdd618d4079aceed523f407cb7fe20 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 22:59:08 +0200 Subject: [PATCH 140/343] d3d12: DMA depth buffer when semaphore write is asked. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 189 +++++++++++++------------- 1 file changed, 93 insertions(+), 96 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 65fa5b9fec..a85e5ec1a0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1067,107 +1067,79 @@ void D3D12GSRender::Flip() void D3D12GSRender::WriteDepthBuffer() { - if (!Ini.GSDumpDepthBuffer.GetValue()) - return; - if (!m_set_context_dma_z) - return; - - u32 address = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); - auto ptr = vm::get_ptr(address); - - D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_READBACK; - D3D12_RESOURCE_DESC resdesc = {}; - resdesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resdesc.Width = RSXThread::m_width * RSXThread::m_height * 4 * 2; // * 2 for safety - resdesc.Height = 1; - resdesc.DepthOrArraySize = 1; - resdesc.SampleDesc.Count = 1; - resdesc.MipLevels = 1; - resdesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - ID3D12Resource *writeDest; - check( - m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resdesc, - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&writeDest) - ) - ); - - ID3D12GraphicsCommandList *downloadCommandList; - check( - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) - ); - - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = m_fbo->getDepthStencilTexture(); - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - downloadCommandList->ResourceBarrier(1, &barrier); - - size_t rowPitch = RSXThread::m_width * sizeof(float); - rowPitch = (rowPitch + 255) & ~255; - - D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = m_fbo->getDepthStencilTexture(); - dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dst.pResource = writeDest; - dst.PlacedFootprint.Offset = 0; - dst.PlacedFootprint.Footprint.Depth = 1; - dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; - dst.PlacedFootprint.Footprint.Height = RSXThread::m_height; - dst.PlacedFootprint.Footprint.Width = RSXThread::m_width; - dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; - downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; - downloadCommandList->ResourceBarrier(1, &barrier); - - downloadCommandList->Close(); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); - - //Wait for result - ID3D12Fence *fence; - check( - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) - ); - HANDLE handle = CreateEvent(0, FALSE, FALSE, 0); - fence->SetEventOnCompletion(1, handle); - m_commandQueueGraphic->Signal(fence, 1); - WaitForSingleObject(handle, INFINITE); - CloseHandle(handle); - - char *ptrAsChar = (char*)ptr; - float *writeDestPtr; - check(writeDest->Map(0, nullptr, (void**)&writeDestPtr)); - // TODO : this should be done by the gpu - for (unsigned row = 0; row < RSXThread::m_height; row++) - { - for (unsigned i = 0; i < RSXThread::m_width; i++) - { - unsigned char c = (unsigned char) (writeDestPtr[row * rowPitch / 4 + i] * 255.); - ptrAsChar[4 * (row * RSXThread::m_width + i)] = c; - ptrAsChar[4 * (row * RSXThread::m_width + i) + 1] = c; - ptrAsChar[4 * (row * RSXThread::m_width + i) + 2] = c; - ptrAsChar[4 * (row * RSXThread::m_width + i) + 3] = c; - } - } - - writeDest->Release(); - fence->Release(); - downloadCommandList->Release(); } void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { + // Add all buffer write + // Cell can't make any assumption about readyness of color/depth buffer + // Except when a semaphore is written by RSX + + +/* if (!Ini.GSDumpDepthBuffer.GetValue()) + return;*/ + ID3D12Resource *writeDest; + ID3D12GraphicsCommandList *downloadCommandList; + size_t rowPitch = RSXThread::m_width * sizeof(float); + rowPitch = (rowPitch + 255) & ~255; + if (m_set_context_dma_z) + { + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_READBACK; + D3D12_RESOURCE_DESC resdesc = {}; + resdesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resdesc.Width = RSXThread::m_width * RSXThread::m_height * 4 * 2; // * 2 for safety + resdesc.Height = 1; + resdesc.DepthOrArraySize = 1; + resdesc.SampleDesc.Count = 1; + resdesc.MipLevels = 1; + resdesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + check( + m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resdesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&writeDest) + ) + ); + + check( + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) + ); + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = m_fbo->getDepthStencilTexture(); + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + downloadCommandList->ResourceBarrier(1, &barrier); + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = m_fbo->getDepthStencilTexture(); + dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst.pResource = writeDest; + dst.PlacedFootprint.Offset = 0; + dst.PlacedFootprint.Footprint.Depth = 1; + dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; + dst.PlacedFootprint.Footprint.Height = RSXThread::m_height; + dst.PlacedFootprint.Footprint.Width = RSXThread::m_width; + dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; + downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; + downloadCommandList->ResourceBarrier(1, &barrier); + + downloadCommandList->Close(); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); + } + + //Wait for result ID3D12Fence *fence; check( m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) @@ -1180,6 +1152,31 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) WaitForSingleObject(handle, INFINITE); CloseHandle(handle); fence->Release(); + + if (m_set_context_dma_z) + { + u32 address = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); + auto ptr = vm::get_ptr(address); + char *ptrAsChar = (char*)ptr; + float *writeDestPtr; + check(writeDest->Map(0, nullptr, (void**)&writeDestPtr)); + // TODO : this should be done by the gpu + for (unsigned row = 0; row < RSXThread::m_height; row++) + { + for (unsigned i = 0; i < RSXThread::m_width; i++) + { + unsigned char c = (unsigned char)(writeDestPtr[row * rowPitch / 4 + i] * 255.); + ptrAsChar[4 * (row * RSXThread::m_width + i)] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i) + 1] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i) + 2] = c; + ptrAsChar[4 * (row * RSXThread::m_width + i) + 3] = c; + } + } + writeDest->Release(); + fence->Release(); + downloadCommandList->Release(); + } + vm::write32(m_label_addr + offset, value); }); valueChangerThread.detach(); From 56853de0efbb7f7f4ab73d6663b4303f4e32db9c Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 26 May 2015 23:04:27 +0200 Subject: [PATCH 141/343] d3d12: Do not bind depth stencil buffer if depth test is disabled --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a85e5ec1a0..9cb483101e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -615,23 +615,24 @@ void D3D12GSRender::ExecCMD() InitDrawBuffers(); + D3D12_CPU_DESCRIPTOR_HANDLE *DepthStencilHandle = m_set_depth_test ? &m_fbo->getDSVCPUHandle() : nullptr; switch (m_surface_color_target) { case CELL_GCM_SURFACE_TARGET_NONE: break; case CELL_GCM_SURFACE_TARGET_0: - commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(0), true, &m_fbo->getDSVCPUHandle()); + commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(0), true, DepthStencilHandle); break; case CELL_GCM_SURFACE_TARGET_1: - commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(1), true, &m_fbo->getDSVCPUHandle()); + commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(1), true, DepthStencilHandle); break; case CELL_GCM_SURFACE_TARGET_MRT1: - commandList->OMSetRenderTargets(2, &m_fbo->getRTTCPUHandle(0), true, &m_fbo->getDSVCPUHandle()); + commandList->OMSetRenderTargets(2, &m_fbo->getRTTCPUHandle(0), true, DepthStencilHandle); break; case CELL_GCM_SURFACE_TARGET_MRT2: - commandList->OMSetRenderTargets(3, &m_fbo->getRTTCPUHandle(0), true, &m_fbo->getDSVCPUHandle()); + commandList->OMSetRenderTargets(3, &m_fbo->getRTTCPUHandle(0), true, DepthStencilHandle); break; case CELL_GCM_SURFACE_TARGET_MRT3: - commandList->OMSetRenderTargets(4, &m_fbo->getRTTCPUHandle(0), true, &m_fbo->getDSVCPUHandle()); + commandList->OMSetRenderTargets(4, &m_fbo->getRTTCPUHandle(0), true, DepthStencilHandle); break; default: LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); From b50ab149d6cb14c5b418e688c3ebdc853276fe81 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 27 May 2015 00:06:58 +0200 Subject: [PATCH 142/343] d3d12: Convert depth buffer on gpu --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 146 +++++++++++++++++++++----- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 + 2 files changed, 122 insertions(+), 27 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 9cb483101e..7c26575dbc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -107,17 +107,22 @@ void D3D12GSRender::ResourceStorage::Release() // 32 bits float to U8 unorm CS #define STRINGIFY(x) #x const char *shaderCode = STRINGIFY( -Texture2D InputTexture : register(t0); \n -RWTexture2D OutputTexture : register(u0);\n + Texture2D InputTexture : register(t0); \n + RWTexture2D OutputTexture : register(u0);\n -[numthreads(1, 1, 1)]\n -void main(uint3 Id : SV_DispatchThreadID)\n + [numthreads(8, 8, 1)]\n + void main(uint3 Id : SV_DispatchThreadID)\n { \n - OutputTexture[Id.xy] = InputTexture.Load(uint3(Id.xy, 0));\n + OutputTexture[Id.xy] = InputTexture.Load(uint3(Id.xy, 0)) * 255.f;\n } ); -static void compileF32toU8CS() +/** + * returns bytecode and root signature of a Compute Shader converting texture from + * one format to another + */ +static +std::pair compileF32toU8CS() { ID3DBlob *bytecode; Microsoft::WRL::ComPtr errorBlob; @@ -135,18 +140,15 @@ static void compileF32toU8CS() descriptorRange[1].BaseShaderRegister = 0; descriptorRange[1].NumDescriptors = 1; descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + descriptorRange[1].OffsetInDescriptorsFromTableStart = 1; D3D12_ROOT_PARAMETER RP[2] = {}; RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; - RP[0].DescriptorTable.NumDescriptorRanges = 1; - RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1]; - RP[1].DescriptorTable.NumDescriptorRanges = 1; + RP[0].DescriptorTable.NumDescriptorRanges = 2; D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; - rootSignatureDesc.NumParameters = 2; + rootSignatureDesc.NumParameters = 1; rootSignatureDesc.pParameters = RP; ID3DBlob *rootSignatureBlob; @@ -157,6 +159,8 @@ static void compileF32toU8CS() const char *tmp = (const char*)errorBlob->GetBufferPointer(); LOG_ERROR(RSX, tmp); } + + return std::make_pair(bytecode, rootSignatureBlob); } D3D12GSRender::D3D12GSRender() @@ -281,13 +285,31 @@ D3D12GSRender::D3D12GSRender() m_perFrameStorage[1].Reset(); m_currentResourceStorageIndex = m_swapChain->GetCurrentBackBufferIndex(); - compileF32toU8CS(); - vertexConstantShadowCopy = new float[512 * 4]; + + // Convert shader + auto p = compileF32toU8CS(); + check( + m_device->CreateRootSignature(0, p.second->GetBufferPointer(), p.second->GetBufferSize(), IID_PPV_ARGS(&m_convertRootSignature)) + ); + + D3D12_COMPUTE_PIPELINE_STATE_DESC computePipelineStateDesc = {}; + computePipelineStateDesc.CS.BytecodeLength = p.first->GetBufferSize(); + computePipelineStateDesc.CS.pShaderBytecode = p.first->GetBufferPointer(); + computePipelineStateDesc.pRootSignature = m_convertRootSignature; + + check( + m_device->CreateComputePipelineState(&computePipelineStateDesc, IID_PPV_ARGS(&m_convertPSO)) + ); + + p.first->Release(); + p.second->Release(); } D3D12GSRender::~D3D12GSRender() { + m_convertPSO->Release(); + m_convertRootSignature->Release(); m_perFrameStorage[0].Release(); m_perFrameStorage[1].Release(); m_commandQueueGraphic->Release(); @@ -615,7 +637,7 @@ void D3D12GSRender::ExecCMD() InitDrawBuffers(); - D3D12_CPU_DESCRIPTOR_HANDLE *DepthStencilHandle = m_set_depth_test ? &m_fbo->getDSVCPUHandle() : nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE *DepthStencilHandle = &m_fbo->getDSVCPUHandle(); switch (m_surface_color_target) { case CELL_GCM_SURFACE_TARGET_NONE: break; @@ -1080,17 +1102,41 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) /* if (!Ini.GSDumpDepthBuffer.GetValue()) return;*/ - ID3D12Resource *writeDest; + ID3D12Resource *writeDest, *depthConverted; ID3D12GraphicsCommandList *downloadCommandList; - size_t rowPitch = RSXThread::m_width * sizeof(float); + ID3D12DescriptorHeap *descriptorHeap; + size_t rowPitch = RSXThread::m_width; rowPitch = (rowPitch + 255) & ~255; if (m_set_context_dma_z) { D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_READBACK; + heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; D3D12_RESOURCE_DESC resdesc = {}; + resdesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resdesc.Width = RSXThread::m_width; + resdesc.Height = RSXThread::m_height; + resdesc.DepthOrArraySize = 1; + resdesc.SampleDesc.Count = 1; + resdesc.MipLevels = 1; + resdesc.Format = DXGI_FORMAT_R8_UNORM; + resdesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + check( + m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resdesc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + nullptr, + IID_PPV_ARGS(&depthConverted) + ) + ); + + heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_READBACK; + resdesc = {}; resdesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resdesc.Width = RSXThread::m_width * RSXThread::m_height * 4 * 2; // * 2 for safety + resdesc.Width = RSXThread::m_width * RSXThread::m_height * 2; // * 2 for safety resdesc.Height = 1; resdesc.DepthOrArraySize = 1; resdesc.SampleDesc.Count = 1; @@ -1112,30 +1158,74 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) ); + D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; + descriptorHeapDesc.NumDescriptors = 2; + descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + check( + m_device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&descriptorHeap)) + ); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = descriptorHeap->GetCPUDescriptorHandleForHeapStart(); + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + m_device->CreateShaderResourceView(m_fbo->getDepthStencilTexture(), &srvDesc, Handle); + Handle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_R8_UNORM; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + m_device->CreateUnorderedAccessView(depthConverted, nullptr, &uavDesc, Handle); + + + // Convert D3D12_RESOURCE_BARRIER barrier = {}; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barrier.Transition.pResource = m_fbo->getDepthStencilTexture(); barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; + downloadCommandList->ResourceBarrier(1, &barrier); + + downloadCommandList->SetPipelineState(m_convertPSO); + downloadCommandList->SetComputeRootSignature(m_convertRootSignature); + downloadCommandList->SetDescriptorHeaps(1, &descriptorHeap); + downloadCommandList->SetComputeRootDescriptorTable(0, descriptorHeap->GetGPUDescriptorHandleForHeapStart()); + downloadCommandList->Dispatch(RSXThread::m_width / 8, RSXThread::m_height / 8, 1); + + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; + // Flush UAV + D3D12_RESOURCE_BARRIER uavbarrier = {}; + uavbarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + uavbarrier.UAV.pResource = depthConverted; + + D3D12_RESOURCE_BARRIER barriers[] = + { + barrier, + uavbarrier, + }; + downloadCommandList->ResourceBarrier(2, barriers); + + // Copy + barrier.Transition.pResource = depthConverted; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; downloadCommandList->ResourceBarrier(1, &barrier); D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = m_fbo->getDepthStencilTexture(); + src.pResource = depthConverted; dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; dst.pResource = writeDest; dst.PlacedFootprint.Offset = 0; dst.PlacedFootprint.Footprint.Depth = 1; - dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; + dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8_UNORM; dst.PlacedFootprint.Footprint.Height = RSXThread::m_height; dst.PlacedFootprint.Footprint.Width = RSXThread::m_width; dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; - downloadCommandList->ResourceBarrier(1, &barrier); - downloadCommandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); } @@ -1159,14 +1249,14 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) u32 address = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); auto ptr = vm::get_ptr(address); char *ptrAsChar = (char*)ptr; - float *writeDestPtr; + unsigned char *writeDestPtr; check(writeDest->Map(0, nullptr, (void**)&writeDestPtr)); // TODO : this should be done by the gpu for (unsigned row = 0; row < RSXThread::m_height; row++) { for (unsigned i = 0; i < RSXThread::m_width; i++) { - unsigned char c = (unsigned char)(writeDestPtr[row * rowPitch / 4 + i] * 255.); + unsigned char c = writeDestPtr[row * rowPitch + i]; ptrAsChar[4 * (row * RSXThread::m_width + i)] = c; ptrAsChar[4 * (row * RSXThread::m_width + i) + 1] = c; ptrAsChar[4 * (row * RSXThread::m_width + i) + 2] = c; @@ -1174,6 +1264,8 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) } } writeDest->Release(); + depthConverted->Release(); + descriptorHeap->Release(); fence->Release(); downloadCommandList->Release(); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 697d3fbda4..1655b40ea1 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -54,6 +54,9 @@ private: ID3D12PipelineState *m_PSO; ID3D12RootSignature *m_rootSignature; + ID3D12PipelineState *m_convertPSO; + ID3D12RootSignature *m_convertRootSignature; + struct ResourceStorage { ID3D12CommandAllocator *m_commandAllocator; From f6182479377d352fce578d26b4f7765cc046c828 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 27 May 2015 00:14:22 +0200 Subject: [PATCH 143/343] d3d12: Fix format of depth texture --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 7c26575dbc..ac2c8c6dd6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -113,7 +113,7 @@ const char *shaderCode = STRINGIFY( [numthreads(8, 8, 1)]\n void main(uint3 Id : SV_DispatchThreadID)\n { \n - OutputTexture[Id.xy] = InputTexture.Load(uint3(Id.xy, 0)) * 255.f;\n + OutputTexture[Id.xy] = InputTexture.Load(uint3(Id.xy, 0));\n } ); @@ -1167,7 +1167,20 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) ); D3D12_CPU_DESCRIPTOR_HANDLE Handle = descriptorHeap->GetCPUDescriptorHandleForHeapStart(); D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + switch (m_surface_depth_format) + { + case 0: + break; + case CELL_GCM_SURFACE_Z16: + srvDesc.Format = DXGI_FORMAT_R16_UNORM; + break; + case CELL_GCM_SURFACE_Z24S8: + srvDesc.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + break; + default: + LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); + assert(0); + } srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Texture2D.MipLevels = 1; srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; From 7a431e85f6c054b2624e8601390ca33f0c3ea47f Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 27 May 2015 00:27:15 +0200 Subject: [PATCH 144/343] d3d12: Use copy queue to transfer z buffer --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 64 ++++++++++++++++++--------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index ac2c8c6dd6..64f7beb003 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -25,6 +25,7 @@ void D3D12GSRender::ResourceStorage::Reset() m_commandAllocator->Reset(); m_textureUploadCommandAllocator->Reset(); + m_downloadCommandAllocator->Reset(); for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) gfxCommandList->Release(); m_inflightCommandList.clear(); @@ -39,6 +40,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) // Create a global command allocator device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); + check(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&m_downloadCommandAllocator))); // Create heap for vertex and constants buffers D3D12_HEAP_DESC vertexBufferHeapDesc = {}; @@ -102,6 +104,7 @@ void D3D12GSRender::ResourceStorage::Release() tmp->Release(); m_commandAllocator->Release(); m_textureUploadCommandAllocator->Release(); + m_downloadCommandAllocator->Release(); } // 32 bits float to U8 unorm CS @@ -1102,8 +1105,16 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) /* if (!Ini.GSDumpDepthBuffer.GetValue()) return;*/ + + ID3D12Fence *fence; + check( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) + ); + HANDLE handle = CreateEvent(0, FALSE, FALSE, 0); + fence->SetEventOnCompletion(1, handle); + ID3D12Resource *writeDest, *depthConverted; - ID3D12GraphicsCommandList *downloadCommandList; + ID3D12GraphicsCommandList *convertCommandList, *downloadCommandList; ID3D12DescriptorHeap *descriptorHeap; size_t rowPitch = RSXThread::m_width; rowPitch = (rowPitch + 255) & ~255; @@ -1155,7 +1166,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) ); check( - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&convertCommandList)) ); D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; @@ -1198,13 +1209,13 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) barrier.Transition.pResource = m_fbo->getDepthStencilTexture(); barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; - downloadCommandList->ResourceBarrier(1, &barrier); + convertCommandList->ResourceBarrier(1, &barrier); - downloadCommandList->SetPipelineState(m_convertPSO); - downloadCommandList->SetComputeRootSignature(m_convertRootSignature); - downloadCommandList->SetDescriptorHeaps(1, &descriptorHeap); - downloadCommandList->SetComputeRootDescriptorTable(0, descriptorHeap->GetGPUDescriptorHandleForHeapStart()); - downloadCommandList->Dispatch(RSXThread::m_width / 8, RSXThread::m_height / 8, 1); + convertCommandList->SetPipelineState(m_convertPSO); + convertCommandList->SetComputeRootSignature(m_convertRootSignature); + convertCommandList->SetDescriptorHeaps(1, &descriptorHeap); + convertCommandList->SetComputeRootDescriptorTable(0, descriptorHeap->GetGPUDescriptorHandleForHeapStart()); + convertCommandList->Dispatch(RSXThread::m_width / 8, RSXThread::m_height / 8, 1); barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ; barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; @@ -1218,14 +1229,27 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) barrier, uavbarrier, }; - downloadCommandList->ResourceBarrier(2, barriers); + convertCommandList->ResourceBarrier(2, barriers); - // Copy barrier.Transition.pResource = depthConverted; barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - downloadCommandList->ResourceBarrier(1, &barrier); + convertCommandList->ResourceBarrier(1, &barrier); + convertCommandList->Close(); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&convertCommandList); + + ID3D12Fence *convertDownloadFence; + check( + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&convertDownloadFence)) + ); + m_commandQueueGraphic->Signal(convertDownloadFence, 1); + + check( + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, getCurrentResourceStorage().m_downloadCommandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) + ); + + // Copy D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; src.pResource = depthConverted; @@ -1240,17 +1264,14 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); downloadCommandList->Close(); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); + m_commandQueueCopy->Wait(convertDownloadFence, 1); + m_commandQueueCopy->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); + //Wait for result + m_commandQueueCopy->Signal(fence, 1); + convertDownloadFence->Release(); } - - //Wait for result - ID3D12Fence *fence; - check( - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) - ); - HANDLE handle = CreateEvent(0, FALSE, FALSE, 0); - fence->SetEventOnCompletion(1, handle); - m_commandQueueGraphic->Signal(fence, 1); + else + m_commandQueueGraphic->Signal(fence, 1); std::thread valueChangerThread([=]() { WaitForSingleObject(handle, INFINITE); @@ -1281,6 +1302,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) descriptorHeap->Release(); fence->Release(); downloadCommandList->Release(); + convertCommandList->Release(); } vm::write32(m_label_addr + offset, value); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 1655b40ea1..9debeec461 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -60,6 +60,7 @@ private: struct ResourceStorage { ID3D12CommandAllocator *m_commandAllocator; + ID3D12CommandAllocator *m_downloadCommandAllocator; std::list m_inflightCommandList; std::vector m_inflightResources; From b81fd132329726caee5d823e1699769de36552b3 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 27 May 2015 00:41:58 +0200 Subject: [PATCH 145/343] d3d12: Fix double release --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 64f7beb003..db7b744208 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1300,7 +1300,6 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) writeDest->Release(); depthConverted->Release(); descriptorHeap->Release(); - fence->Release(); downloadCommandList->Release(); convertCommandList->Release(); } From c167a1228efeec1f1748fb6b1c2172b9b5271997 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 18:05:58 +0200 Subject: [PATCH 146/343] d3d12: Revert back to a signel resource storage Using two of them doesnt increase perf atm but eats twice the space --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 52 ++++++------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 107 +++++++++++--------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 12 +-- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 28 +++---- 4 files changed, 89 insertions(+), 110 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index b04733c984..4cb26c1358 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -207,13 +207,13 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G { const VertexBufferFormat &vbf = vertexBufferFormat[buffer]; // 65536 alignment - size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; + size_t bufferHeapOffset = m_perFrameStorage.m_vertexIndexBuffersHeapFreeSpace; bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; size_t subBufferSize = vbf.range.second - vbf.range.first; ID3D12Resource *vertexBuffer; check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_vertexIndexBuffersHeap, + m_perFrameStorage.m_vertexIndexBuffersHeap, bufferHeapOffset, &getBufferResourceDesc(subBufferSize), D3D12_RESOURCE_STATE_GENERIC_READ, @@ -262,14 +262,14 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G } vertexBuffer->Unmap(0, nullptr); - getCurrentResourceStorage().m_inflightResources.push_back(vertexBuffer); + m_perFrameStorage.m_inflightResources.push_back(vertexBuffer); D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); vertexBufferView.SizeInBytes = (UINT)subBufferSize; vertexBufferView.StrideInBytes = (UINT)vbf.stride; result.first.push_back(vertexBufferView); - getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; + m_perFrameStorage.m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; } // Only handle quads now @@ -326,12 +326,12 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G indexCount = m_draw_array_count * 6 / 4; size_t subBufferSize = powerOf2Align(indexCount * indexSize, 64); // 65536 alignment - size_t bufferHeapOffset = getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace; + size_t bufferHeapOffset = m_perFrameStorage.m_vertexIndexBuffersHeapFreeSpace; bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; ID3D12Resource *indexBuffer; check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_vertexIndexBuffersHeap, + m_perFrameStorage.m_vertexIndexBuffersHeap, bufferHeapOffset, &getBufferResourceDesc(subBufferSize), D3D12_RESOURCE_STATE_GENERIC_READ, @@ -371,8 +371,8 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G } } indexBuffer->Unmap(0, nullptr); - getCurrentResourceStorage().m_inflightResources.push_back(indexBuffer); - getCurrentResourceStorage().m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; + m_perFrameStorage.m_inflightResources.push_back(indexBuffer); + m_perFrameStorage.m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; indexBufferView.SizeInBytes = (UINT)subBufferSize; @@ -406,7 +406,7 @@ void D3D12GSRender::setScaleOffset() scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; - size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; + size_t constantBuffersHeapOffset = m_perFrameStorage.m_constantsBuffersHeapFreeSpace; // 65536 alignment constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; @@ -414,7 +414,7 @@ void D3D12GSRender::setScaleOffset() // Separate constant buffer ID3D12Resource *scaleOffsetBuffer; check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_constantsBuffersHeap, + m_perFrameStorage.m_constantsBuffersHeap, constantBuffersHeapOffset, &getBufferResourceDesc(256), D3D12_RESOURCE_STATE_GENERIC_READ, @@ -430,11 +430,11 @@ void D3D12GSRender::setScaleOffset() D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = scaleOffsetBuffer->GetGPUVirtualAddress(); constantBufferViewDesc.SizeInBytes = (UINT)256; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += m_perFrameStorage.m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 256; - getCurrentResourceStorage().m_inflightResources.push_back(scaleOffsetBuffer); + m_perFrameStorage.m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 256; + m_perFrameStorage.m_inflightResources.push_back(scaleOffsetBuffer); } void D3D12GSRender::FillVertexShaderConstantsBuffer() @@ -446,13 +446,13 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() memcpy((char*)vertexConstantShadowCopy + offset, vector, 4 * sizeof(float)); } - size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; + size_t constantBuffersHeapOffset = m_perFrameStorage.m_constantsBuffersHeapFreeSpace; // 65536 alignment constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; ID3D12Resource *constantsBuffer; check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_constantsBuffersHeap, + m_perFrameStorage.m_constantsBuffersHeap, constantBuffersHeapOffset, &getBufferResourceDesc(512 * 4 * sizeof(float)), D3D12_RESOURCE_STATE_GENERIC_READ, @@ -468,11 +468,11 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); constantBufferViewDesc.SizeInBytes = 512 * 4 * sizeof(float); - D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += m_perFrameStorage.m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 512 * 4 * sizeof(float); - getCurrentResourceStorage().m_inflightResources.push_back(constantsBuffer); + m_perFrameStorage.m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 512 * 4 * sizeof(float); + m_perFrameStorage.m_inflightResources.push_back(constantsBuffer); } void D3D12GSRender::FillPixelShaderConstantsBuffer() @@ -483,13 +483,13 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() // Multiple of 256 never 0 bufferSize = (bufferSize + 255) & ~255; - size_t constantBuffersHeapOffset = getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace; + size_t constantBuffersHeapOffset = m_perFrameStorage.m_constantsBuffersHeapFreeSpace; // 65536 alignment constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; ID3D12Resource *constantsBuffer; check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_constantsBuffersHeap, + m_perFrameStorage.m_constantsBuffersHeap, constantBuffersHeapOffset, &getBufferResourceDesc(bufferSize), D3D12_RESOURCE_STATE_GENERIC_READ, @@ -536,11 +536,11 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += m_perFrameStorage.m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - getCurrentResourceStorage().m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + bufferSize; - getCurrentResourceStorage().m_inflightResources.push_back(constantsBuffer); + m_perFrameStorage.m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + bufferSize; + m_perFrameStorage.m_inflightResources.push_back(constantsBuffer); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index db7b744208..61df8703b1 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -88,12 +88,11 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) void D3D12GSRender::ResourceStorage::Release() { // NOTE: Should be released only if no command are in flight ! - m_backbufferAsRendertarget->Release(); + m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); m_constantsBuffersHeap->Release(); m_vertexIndexBuffersHeap->Release(); - m_backBuffer->Release(); for (auto tmp : m_inflightResources) tmp->Release(); m_textureDescriptorsHeap->Release(); @@ -218,8 +217,8 @@ D3D12GSRender::D3D12GSRender() swapChain.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; check(dxgiFactory->CreateSwapChain(m_commandQueueGraphic, &swapChain, (IDXGISwapChain**)&m_swapChain)); - m_swapChain->GetBuffer(0, IID_PPV_ARGS(&m_perFrameStorage[0].m_backBuffer)); - m_swapChain->GetBuffer(1, IID_PPV_ARGS(&m_perFrameStorage[1].m_backBuffer)); + m_swapChain->GetBuffer(0, IID_PPV_ARGS(&m_backBuffer[0])); + m_swapChain->GetBuffer(1, IID_PPV_ARGS(&m_backBuffer[1])); D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; heapDesc.NumDescriptors = 1; @@ -227,10 +226,10 @@ D3D12GSRender::D3D12GSRender() D3D12_RENDER_TARGET_VIEW_DESC rttDesc = {}; rttDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rttDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_perFrameStorage[0].m_backbufferAsRendertarget)); - m_device->CreateRenderTargetView(m_perFrameStorage[0].m_backBuffer, &rttDesc, m_perFrameStorage[0].m_backbufferAsRendertarget->GetCPUDescriptorHandleForHeapStart()); - m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_perFrameStorage[1].m_backbufferAsRendertarget)); - m_device->CreateRenderTargetView(m_perFrameStorage[1].m_backBuffer, &rttDesc, m_perFrameStorage[1].m_backbufferAsRendertarget->GetCPUDescriptorHandleForHeapStart()); + m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[0])); + m_device->CreateRenderTargetView(m_backBuffer[0], &rttDesc, m_backbufferAsRendertarget[0]->GetCPUDescriptorHandleForHeapStart()); + m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[1])); + m_device->CreateRenderTargetView(m_backBuffer[1], &rttDesc, m_backbufferAsRendertarget[1]->GetCPUDescriptorHandleForHeapStart()); // Common root signature D3D12_DESCRIPTOR_RANGE descriptorRange[4] = {}; @@ -282,12 +281,9 @@ D3D12GSRender::D3D12GSRender() rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)); - m_perFrameStorage[0].Init(m_device); - m_perFrameStorage[0].Reset(); - m_perFrameStorage[1].Init(m_device); - m_perFrameStorage[1].Reset(); + m_perFrameStorage.Init(m_device); + m_perFrameStorage.Reset(); - m_currentResourceStorageIndex = m_swapChain->GetCurrentBackBufferIndex(); vertexConstantShadowCopy = new float[512 * 4]; // Convert shader @@ -313,10 +309,13 @@ D3D12GSRender::~D3D12GSRender() { m_convertPSO->Release(); m_convertRootSignature->Release(); - m_perFrameStorage[0].Release(); - m_perFrameStorage[1].Release(); + m_perFrameStorage.Release(); m_commandQueueGraphic->Release(); m_commandQueueCopy->Release(); + m_backbufferAsRendertarget[0]->Release(); + m_backBuffer[0]->Release(); + m_backbufferAsRendertarget[1]->Release(); + m_backBuffer[1]->Release(); if (m_fbo) delete m_fbo; m_rootSignature->Release(); @@ -325,16 +324,6 @@ D3D12GSRender::~D3D12GSRender() delete[] vertexConstantShadowCopy; } -D3D12GSRender::ResourceStorage &D3D12GSRender::getCurrentResourceStorage() -{ - return m_perFrameStorage[m_currentResourceStorageIndex]; -} - -D3D12GSRender::ResourceStorage &D3D12GSRender::getNonCurrentResourceStorage() -{ - return m_perFrameStorage[1 - m_currentResourceStorageIndex]; -} - void D3D12GSRender::Close() { Stop(); @@ -387,8 +376,8 @@ void D3D12GSRender::ExecCMD(u32 cmd) InitDrawBuffers(); ID3D12GraphicsCommandList *commandList; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); - getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + m_perFrameStorage.m_inflightCommandList.push_back(commandList); /* if (m_set_color_mask) { @@ -582,8 +571,8 @@ bool D3D12GSRender::LoadProgram() void D3D12GSRender::ExecCMD() { ID3D12GraphicsCommandList *commandList; - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); - getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + m_perFrameStorage.m_inflightCommandList.push_back(commandList); commandList->SetGraphicsRootSignature(m_rootSignature); @@ -607,36 +596,36 @@ void D3D12GSRender::ExecCMD() // Constants setScaleOffset(); - commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_scaleOffsetDescriptorHeap); - D3D12_GPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_scaleOffsetDescriptorHeap); + D3D12_GPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_perFrameStorage.m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetGraphicsRootDescriptorTable(0, Handle); - getCurrentResourceStorage().m_currentScaleOffsetBufferIndex++; + m_perFrameStorage.m_currentScaleOffsetBufferIndex++; - size_t currentBufferIndex = getCurrentResourceStorage().m_constantsBufferIndex; + size_t currentBufferIndex = m_perFrameStorage.m_constantsBufferIndex; FillVertexShaderConstantsBuffer(); - getCurrentResourceStorage().m_constantsBufferIndex++; + m_perFrameStorage.m_constantsBufferIndex++; FillPixelShaderConstantsBuffer(); - getCurrentResourceStorage().m_constantsBufferIndex++; + m_perFrameStorage.m_constantsBufferIndex++; - commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_constantsBufferDescriptorsHeap); - Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_constantsBufferDescriptorsHeap); + Handle = m_perFrameStorage.m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); Handle.ptr += currentBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetGraphicsRootDescriptorTable(1, Handle); commandList->SetPipelineState(m_PSO); size_t usedTexture = UploadTextures(); - Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_textureDescriptorsHeap); + Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_textureDescriptorsHeap); commandList->SetGraphicsRootDescriptorTable(2, Handle); - Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_samplerDescriptorHeap); + Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_samplerDescriptorHeap); commandList->SetGraphicsRootDescriptorTable(3, Handle); - getCurrentResourceStorage().m_currentTextureIndex += usedTexture; + m_perFrameStorage.m_currentTextureIndex += usedTexture; InitDrawBuffers(); @@ -1030,8 +1019,8 @@ void D3D12GSRender::ExecCMD() void D3D12GSRender::Flip() { ID3D12GraphicsCommandList *commandList; - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); - getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + m_perFrameStorage.m_inflightCommandList.push_back(commandList); switch (m_surface_color_target) { @@ -1043,7 +1032,7 @@ void D3D12GSRender::Flip() { D3D12_RESOURCE_BARRIER barriers[2] = {}; barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[0].Transition.pResource = getCurrentResourceStorage().m_backBuffer; + barriers[0].Transition.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; @@ -1056,7 +1045,7 @@ void D3D12GSRender::Flip() D3D12_TEXTURE_COPY_LOCATION src = {}, dst = {}; src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; src.SubresourceIndex = 0, dst.SubresourceIndex = 0; - src.pResource = m_fbo->getRenderTargetTexture(0), dst.pResource = getCurrentResourceStorage().m_backBuffer; + src.pResource = m_fbo->getRenderTargetTexture(0), dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; D3D12_BOX box = { 0, 0, 0, RSXThread::m_width, RSXThread::m_height, 1 }; commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, &box); @@ -1074,18 +1063,12 @@ void D3D12GSRender::Flip() // Add an event signaling queue completion Microsoft::WRL::ComPtr fence; m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)); - getCurrentResourceStorage().m_queueCompletion = CreateEvent(0, 0, 0, 0); - fence->SetEventOnCompletion(1, getCurrentResourceStorage().m_queueCompletion); + HANDLE handle = CreateEvent(0, 0, 0, 0); + fence->SetEventOnCompletion(1, handle); m_commandQueueGraphic->Signal(fence.Get(), 1); - - // Wait execution is over - if (getNonCurrentResourceStorage().m_queueCompletion) - { - WaitForSingleObject(getNonCurrentResourceStorage().m_queueCompletion, INFINITE); - CloseHandle(getNonCurrentResourceStorage().m_queueCompletion); - getNonCurrentResourceStorage().Reset(); - } - m_currentResourceStorageIndex = 1 - m_currentResourceStorageIndex; + WaitForSingleObject(handle, INFINITE); + CloseHandle(handle); + m_perFrameStorage.Reset(); m_frame->Flip(nullptr); } @@ -1166,7 +1149,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) ); check( - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&convertCommandList)) + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&convertCommandList)) ); D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; @@ -1246,7 +1229,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) m_commandQueueGraphic->Signal(convertDownloadFence, 1); check( - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, getCurrentResourceStorage().m_downloadCommandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, m_perFrameStorage.m_downloadCommandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) ); // Copy diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 9debeec461..82ae23fd18 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -85,10 +85,6 @@ private: ID3D12DescriptorHeap *m_samplerDescriptorHeap; size_t m_currentTextureIndex; - //BackBuffers - ID3D12Resource* m_backBuffer; - ID3D12DescriptorHeap *m_backbufferAsRendertarget; - // Fence HANDLE m_queueCompletion; @@ -97,7 +93,7 @@ private: void Release(); }; - ResourceStorage m_perFrameStorage[2]; + ResourceStorage m_perFrameStorage; bool m_forcedIndexBuffer; size_t indexCount; @@ -109,11 +105,11 @@ private: ID3D12CommandQueue *m_commandQueueGraphic; struct IDXGISwapChain3 *m_swapChain; + //BackBuffers + ID3D12Resource* m_backBuffer[2]; + ID3D12DescriptorHeap *m_backbufferAsRendertarget[2]; size_t m_lastWidth, m_lastHeight, m_lastDepth; - size_t m_currentResourceStorageIndex; - ResourceStorage& getCurrentResourceStorage(); - ResourceStorage& getNonCurrentResourceStorage(); public: GSFrameBase2 *m_frame; u32 m_draw_frames; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index a19a89724a..53c21731a5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -60,7 +60,7 @@ size_t D3D12GSRender::UploadTextures() // Upload at each iteration to take advantage of overlapping transfer ID3D12GraphicsCommandList *commandList; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); DXGI_FORMAT dxgiFormat; size_t pixelSize; @@ -89,8 +89,8 @@ size_t D3D12GSRender::UploadTextures() textureDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_uploadTextureHeap, - getCurrentResourceStorage().m_currentStorageOffset, + m_perFrameStorage.m_uploadTextureHeap, + m_perFrameStorage.m_currentStorageOffset, &textureDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -119,18 +119,18 @@ size_t D3D12GSRender::UploadTextures() vramTextureDesc.SampleDesc.Count = 1; vramTextureDesc.MipLevels = 1; check(m_device->CreatePlacedResource( - getCurrentResourceStorage().m_textureStorage, - getCurrentResourceStorage().m_currentStorageOffset, + m_perFrameStorage.m_textureStorage, + m_perFrameStorage.m_currentStorageOffset, &vramTextureDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&vramTexture) )); - getCurrentResourceStorage().m_currentStorageOffset += textureSize; - getCurrentResourceStorage().m_currentStorageOffset = (getCurrentResourceStorage().m_currentStorageOffset + 65536 - 1) & ~65535; - getCurrentResourceStorage().m_inflightResources.push_back(Texture); - getCurrentResourceStorage().m_inflightResources.push_back(vramTexture); + m_perFrameStorage.m_currentStorageOffset += textureSize; + m_perFrameStorage.m_currentStorageOffset = (m_perFrameStorage.m_currentStorageOffset + 65536 - 1) & ~65535; + m_perFrameStorage.m_inflightResources.push_back(Texture); + m_perFrameStorage.m_inflightResources.push_back(vramTexture); D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; @@ -158,8 +158,8 @@ size_t D3D12GSRender::UploadTextures() srvDesc.Format = dxgiFormat; srvDesc.Texture2D.MipLevels = 1; srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); - D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); // TODO : Correctly define sampler @@ -174,13 +174,13 @@ size_t D3D12GSRender::UploadTextures() samplerDesc.BorderColor[4] = m_textures[i].GetBorderColor(); samplerDesc.MinLOD = m_textures[i].GetMinLOD() >> 8; samplerDesc.MaxLOD = m_textures[i].GetMaxLOD() >> 8; - Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateSampler(&samplerDesc, Handle); commandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); - getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); + m_perFrameStorage.m_inflightCommandList.push_back(commandList); usedTexture++; } From f77e2acfbe99dbade0181f303f3815a02cbd37ad Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 18:36:57 +0200 Subject: [PATCH 147/343] d3d12: Start supporting mrt --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 27 +++++++++++++++++++----- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 8 ++++--- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 61df8703b1..b02e48caa5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -66,7 +66,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) // Texture D3D12_HEAP_DESC heapDescription = {}; - heapDescription.SizeInBytes = 1024 * 1024 * 64; + heapDescription.SizeInBytes = 1024 * 1024 * 512; heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); @@ -243,11 +243,11 @@ D3D12GSRender::D3D12GSRender() descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; // Textures descriptorRange[2].BaseShaderRegister = 0; - descriptorRange[2].NumDescriptors = 1; + descriptorRange[2].NumDescriptors = 16; descriptorRange[2].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; // Samplers descriptorRange[3].BaseShaderRegister = 0; - descriptorRange[3].NumDescriptors = 1; + descriptorRange[3].NumDescriptors = 16; descriptorRange[3].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; D3D12_ROOT_PARAMETER RP[4] = {}; RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; @@ -559,11 +559,28 @@ bool D3D12GSRender::LoadProgram() LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); assert(0); } -; + + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_0: + case CELL_GCM_SURFACE_TARGET_1: + prop.numMRT = 1; + break; + case CELL_GCM_SURFACE_TARGET_MRT1: + prop.numMRT = 2; + break; + case CELL_GCM_SURFACE_TARGET_MRT2: + prop.numMRT = 3; + break; + case CELL_GCM_SURFACE_TARGET_MRT3: + prop.numMRT = 4; + break; + default: + LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); + } prop.IASet = m_IASet; - m_PSO = m_cachePSO.getGraphicPipelineState(m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignature)); return m_PSO != nullptr; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 77b14f114b..22375bc3df 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -16,11 +16,12 @@ struct D3D12PipelineProperties DXGI_FORMAT DepthStencilFormat; std::vector IASet; D3D12_BLEND_DESC Blend; + unsigned numMRT : 3; bool operator==(const D3D12PipelineProperties &in) const { // TODO: blend and IASet equality - return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat; + return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT; } }; @@ -144,8 +145,9 @@ struct D3D12Traits graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; graphicPipelineStateDesc.PrimitiveTopologyType = pipelineProperties.Topology; - graphicPipelineStateDesc.NumRenderTargets = 1; - graphicPipelineStateDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + graphicPipelineStateDesc.NumRenderTargets = pipelineProperties.numMRT; + for (unsigned i = 0; i < pipelineProperties.numMRT; i++) + graphicPipelineStateDesc.RTVFormats[i] = DXGI_FORMAT_R8G8B8A8_UNORM; graphicPipelineStateDesc.DSVFormat = pipelineProperties.DepthStencilFormat; graphicPipelineStateDesc.InputLayout.pInputElementDescs = pipelineProperties.IASet.data(); From 79420e52a24da203e02d0013ca6f6de4e4ab61c1 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 18:49:50 +0200 Subject: [PATCH 148/343] d3d12: Disable depth test if not required --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 ++ rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index b02e48caa5..00b982c729 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -579,6 +579,8 @@ bool D3D12GSRender::LoadProgram() LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); } + prop.depthEnabled = m_set_depth_test; + prop.IASet = m_IASet; m_PSO = m_cachePSO.getGraphicPipelineState(m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignature)); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 22375bc3df..29e551a817 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -17,11 +17,12 @@ struct D3D12PipelineProperties std::vector IASet; D3D12_BLEND_DESC Blend; unsigned numMRT : 3; + bool depthEnabled : 1; bool operator==(const D3D12PipelineProperties &in) const { // TODO: blend and IASet equality - return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT; + return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT && depthEnabled == in.depthEnabled; } }; @@ -145,6 +146,8 @@ struct D3D12Traits graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; graphicPipelineStateDesc.PrimitiveTopologyType = pipelineProperties.Topology; + graphicPipelineStateDesc.DepthStencilState.DepthEnable = pipelineProperties.depthEnabled; + graphicPipelineStateDesc.NumRenderTargets = pipelineProperties.numMRT; for (unsigned i = 0; i < pipelineProperties.numMRT; i++) graphicPipelineStateDesc.RTVFormats[i] = DXGI_FORMAT_R8G8B8A8_UNORM; From 88d05a08cf18959915ea81863a66ad5316bb87d3 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 21:19:36 +0200 Subject: [PATCH 149/343] d3d12: Use a dummy texture to fill unused slots --- rpcs3/Emu/RSX/D3D12/D3D12.h | 29 +++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 14 +--------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 37 ++++++++++++++++++++++++++- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 +++ 4 files changed, 69 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 346dfb7ea7..f88ab827a2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -60,4 +60,33 @@ void streamBuffer(void* dst, void* src, size_t sizeInBytes) } } +inline +D3D12_RESOURCE_DESC getBufferResourceDesc(size_t sizeInByte) +{ + D3D12_RESOURCE_DESC BufferDesc = {}; + BufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + BufferDesc.Width = (UINT)sizeInByte; + BufferDesc.Height = 1; + BufferDesc.DepthOrArraySize = 1; + BufferDesc.SampleDesc.Count = 1; + BufferDesc.MipLevels = 1; + BufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + return BufferDesc; +} + +inline +D3D12_RESOURCE_DESC getTexture2DResourceDesc(size_t width, size_t height, DXGI_FORMAT dxgiFormat) +{ + D3D12_RESOURCE_DESC result; + result = {}; + result.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + result.Width = width; + result.Height = height; + result.Format = dxgiFormat; + result.DepthOrArraySize = 1; + result.SampleDesc.Count = 1; + result.MipLevels = 1; + return result; +} + #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 4cb26c1358..5955aaf556 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -132,19 +132,7 @@ void expandIndexedQuads(DstType *dst, const SrcType *src, size_t indexCount) } } -static -D3D12_RESOURCE_DESC getBufferResourceDesc(size_t sizeInByte) -{ - D3D12_RESOURCE_DESC BufferDesc = {}; - BufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - BufferDesc.Width = (UINT)sizeInByte; - BufferDesc.Height = 1; - BufferDesc.DepthOrArraySize = 1; - BufferDesc.SampleDesc.Count = 1; - BufferDesc.MipLevels = 1; - BufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - return BufferDesc; -} + // D3D12GS member handling buffers diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 00b982c729..46d4c0f4b3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -303,10 +303,23 @@ D3D12GSRender::D3D12GSRender() p.first->Release(); p.second->Release(); + + D3D12_HEAP_PROPERTIES hp = {}; + hp.Type = D3D12_HEAP_TYPE_DEFAULT; + check( + m_device->CreateCommittedResource( + &hp, + D3D12_HEAP_FLAG_NONE, + &getTexture2DResourceDesc(2, 2, DXGI_FORMAT_R8G8B8A8_UNORM), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_dummyTexture)) + ); } D3D12GSRender::~D3D12GSRender() { + m_dummyTexture->Release(); m_convertPSO->Release(); m_convertRootSignature->Release(); m_perFrameStorage.Release(); @@ -634,6 +647,28 @@ void D3D12GSRender::ExecCMD() commandList->SetPipelineState(m_PSO); size_t usedTexture = UploadTextures(); + // Drivers don't like undefined texture descriptors + for (; usedTexture < 16; usedTexture++) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Texture2D.MipLevels = 1; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, Handle); + + D3D12_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateSampler(&samplerDesc, Handle); + } + Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_textureDescriptorsHeap); @@ -644,7 +679,7 @@ void D3D12GSRender::ExecCMD() commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_samplerDescriptorHeap); commandList->SetGraphicsRootDescriptorTable(3, Handle); - m_perFrameStorage.m_currentTextureIndex += usedTexture; + m_perFrameStorage.m_currentTextureIndex += 16; InitDrawBuffers(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 82ae23fd18..8000c812e4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -104,6 +104,9 @@ private: ID3D12CommandQueue *m_commandQueueCopy; ID3D12CommandQueue *m_commandQueueGraphic; + // Used to fill unused texture slot + ID3D12Resource *m_dummyTexture; + struct IDXGISwapChain3 *m_swapChain; //BackBuffers ID3D12Resource* m_backBuffer[2]; From 52a68875fce53e23a83d1f239464ff31b40155ae Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 21:55:22 +0200 Subject: [PATCH 150/343] d3d12: Add some other texture case + factorise resource desc code --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 48 +++++++++++++++++----------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 53c21731a5..f5dda8c890 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -67,7 +67,33 @@ size_t D3D12GSRender::UploadTextures() int format = m_textures[i].GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); switch (format) { + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + case CELL_GCM_TEXTURE_X32_FLOAT: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_D8R8G8B8: + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: default: + LOG_ERROR(RSX, "Unimplemented Texture format"); + break; + case CELL_GCM_TEXTURE_A8R8G8B8: dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; pixelSize = 4; break; @@ -79,19 +105,11 @@ size_t D3D12GSRender::UploadTextures() ID3D12Resource *Texture, *vramTexture; size_t textureSize = w * h * 4; - D3D12_RESOURCE_DESC textureDesc = {}; - textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - textureDesc.Width = textureSize; - textureDesc.Height = 1; - textureDesc.DepthOrArraySize = 1; - textureDesc.SampleDesc.Count = 1; - textureDesc.MipLevels = 1; - textureDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; check(m_device->CreatePlacedResource( m_perFrameStorage.m_uploadTextureHeap, m_perFrameStorage.m_currentStorageOffset, - &textureDesc, + &getBufferResourceDesc(textureSize), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&Texture) @@ -107,21 +125,13 @@ size_t D3D12GSRender::UploadTextures() rowPitch = (rowPitch + 255) & ~255; // Upload with correct rowpitch for (unsigned row = 0; row < m_textures[i].GetHeight(); row++) - streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_textures[i].m_pitch, m_textures[i].m_pitch); + streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_textures[i].m_pitch, m_textures[i].m_pitch); Texture->Unmap(0, nullptr); - D3D12_RESOURCE_DESC vramTextureDesc = {}; - vramTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - vramTextureDesc.Width = m_textures[i].GetWidth(); - vramTextureDesc.Height = m_textures[i].GetHeight(); - vramTextureDesc.Format = dxgiFormat; - vramTextureDesc.DepthOrArraySize = 1; - vramTextureDesc.SampleDesc.Count = 1; - vramTextureDesc.MipLevels = 1; check(m_device->CreatePlacedResource( m_perFrameStorage.m_textureStorage, m_perFrameStorage.m_currentStorageOffset, - &vramTextureDesc, + &getTexture2DResourceDesc(m_textures[i].GetWidth(), m_textures[i].GetHeight(), dxgiFormat), D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&vramTexture) From b5a7637b49925582f95951d312a1decd01e400f6 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 22:07:54 +0200 Subject: [PATCH 151/343] d3d12: Implement texture swizzling and untiling --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 72 +++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index f5dda8c890..b426a9197b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -3,6 +3,38 @@ #include "D3D12GSRender.h" // For clarity this code deals with texture but belongs to D3D12GSRender class + +static +u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth) +{ + u32 offset = 0; + u32 shift_count = 0; + while (log2_width | log2_height | log2_depth) { + if (log2_width) + { + offset |= (x & 0x01) << shift_count; + x >>= 1; + ++shift_count; + --log2_width; + } + if (log2_height) + { + offset |= (y & 0x01) << shift_count; + y >>= 1; + ++shift_count; + --log2_height; + } + if (log2_depth) + { + offset |= (z & 0x01) << shift_count; + z >>= 1; + ++shift_count; + --log2_depth; + } + } + return offset; +} + static D3D12_COMPARISON_FUNC ComparisonFunc[] = { D3D12_COMPARISON_FUNC_NEVER, @@ -65,6 +97,7 @@ size_t D3D12GSRender::UploadTextures() DXGI_FORMAT dxgiFormat; size_t pixelSize; int format = m_textures[i].GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + bool is_swizzled = !(m_textures[i].GetFormat() & CELL_GCM_TEXTURE_LN); switch (format) { case CELL_GCM_TEXTURE_A1R5G5B5: @@ -125,7 +158,26 @@ size_t D3D12GSRender::UploadTextures() rowPitch = (rowPitch + 255) & ~255; // Upload with correct rowpitch for (unsigned row = 0; row < m_textures[i].GetHeight(); row++) + { + if (is_swizzled) + { + u32 *src, *dst; + u32 log2width, log2height; + + src = (u32*)pixels; + dst = (u32*)textureData; + + log2width = (u32)(logf(m_textures[i].GetWidth()) / logf(2.f)); + log2height = (u32)(logf(m_textures[i].GetHeight()) / logf(2.f)); + + for (int j = 0; j < m_textures[i].GetWidth(); j++) + { + dst[(row * rowPitch) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; + } + } + else streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_textures[i].m_pitch, m_textures[i].m_pitch); + } Texture->Unmap(0, nullptr); check(m_device->CreatePlacedResource( @@ -167,7 +219,25 @@ size_t D3D12GSRender::UploadTextures() srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Format = dxgiFormat; srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); + static const int RemapValue[4] = + { + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0 + }; + if (format != CELL_GCM_TEXTURE_B8 && format != CELL_GCM_TEXTURE_X16 && format != CELL_GCM_TEXTURE_X32_FLOAT) + { + u8 remap_a = m_textures[i].GetRemap() & 0x3; + u8 remap_r = (m_textures[i].GetRemap() >> 2) & 0x3; + u8 remap_g = (m_textures[i].GetRemap() >> 4) & 0x3; + u8 remap_b = (m_textures[i].GetRemap() >> 6) & 0x3; + + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(RemapValue[remap_a], RemapValue[remap_r], RemapValue[remap_g], RemapValue[remap_b]); + } + else + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(RemapValue[0], RemapValue[1], RemapValue[2], RemapValue[3]); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); From 7b571aab70b94b32437e6e7b5799e6754edd66f1 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 22:29:51 +0200 Subject: [PATCH 152/343] d3d12: Start supporting dxtc1 format --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 30 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index b426a9197b..ee84533c48 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -95,7 +95,7 @@ size_t D3D12GSRender::UploadTextures() check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); DXGI_FORMAT dxgiFormat; - size_t pixelSize; + size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel; int format = m_textures[i].GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); bool is_swizzled = !(m_textures[i].GetFormat() & CELL_GCM_TEXTURE_LN); switch (format) @@ -103,7 +103,6 @@ size_t D3D12GSRender::UploadTextures() case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: case CELL_GCM_TEXTURE_COMPRESSED_DXT23: case CELL_GCM_TEXTURE_COMPRESSED_DXT45: case CELL_GCM_TEXTURE_G8B8: @@ -128,16 +127,30 @@ size_t D3D12GSRender::UploadTextures() break; case CELL_GCM_TEXTURE_A8R8G8B8: dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - pixelSize = 4; + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + dxgiFormat = DXGI_FORMAT_BC1_UNORM; + blockSizeInByte = 8; + blockWidthInPixel = 4, blockHeightInPixel = 4; break; case CELL_GCM_TEXTURE_B8: dxgiFormat = DXGI_FORMAT_R8_UNORM; - pixelSize = 1; + blockSizeInByte = 1; + blockWidthInPixel = 1, blockHeightInPixel = 1; break; } + size_t heightInBlocks = (m_textures[i].GetHeight() + blockHeightInPixel - 1) / blockHeightInPixel; + size_t widthInBlocks = (m_textures[i].GetWidth() + blockWidthInPixel - 1) / blockWidthInPixel; + // Multiple of 256 + size_t rowPitch = blockSizeInByte * widthInBlocks; + rowPitch = (rowPitch + 255) & ~255; + + ID3D12Resource *Texture, *vramTexture; - size_t textureSize = w * h * 4; + size_t textureSize = rowPitch * heightInBlocks; check(m_device->CreatePlacedResource( m_perFrameStorage.m_uploadTextureHeap, @@ -153,13 +166,10 @@ size_t D3D12GSRender::UploadTextures() void *textureData; check(Texture->Map(0, nullptr, (void**)&textureData)); - // Multiple of 256 - size_t rowPitch = m_textures[i].GetWidth() * pixelSize; - rowPitch = (rowPitch + 255) & ~255; // Upload with correct rowpitch - for (unsigned row = 0; row < m_textures[i].GetHeight(); row++) + for (unsigned row = 0; row < heightInBlocks; row++) { - if (is_swizzled) + if (format == CELL_GCM_TEXTURE_A8R8G8B8 && is_swizzled) { u32 *src, *dst; u32 log2width, log2height; From 68a62f897d6b5bbfffc16c2332bee3d18b9b728e Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 22:56:41 +0200 Subject: [PATCH 153/343] d3d12: Fix texture loading for fw_fog.ppu.elf Color are wrong though --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index ee84533c48..33772c3b8a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -169,6 +169,8 @@ size_t D3D12GSRender::UploadTextures() // Upload with correct rowpitch for (unsigned row = 0; row < heightInBlocks; row++) { + size_t m_texture_pitch = m_textures[i].m_pitch; + if (!m_texture_pitch) m_texture_pitch = rowPitch; if (format == CELL_GCM_TEXTURE_A8R8G8B8 && is_swizzled) { u32 *src, *dst; @@ -186,7 +188,7 @@ size_t D3D12GSRender::UploadTextures() } } else - streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_textures[i].m_pitch, m_textures[i].m_pitch); + streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); } Texture->Unmap(0, nullptr); @@ -229,7 +231,7 @@ size_t D3D12GSRender::UploadTextures() srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Format = dxgiFormat; srvDesc.Texture2D.MipLevels = 1; - static const int RemapValue[4] = + const int RemapValue[4] = { D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, From 3a029efbd3b3347ab0724671c20f032ca539e000 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 23:00:40 +0200 Subject: [PATCH 154/343] d3d12: Add D8R8G8B8 format to texture Should fix human.ppu.elf --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 33772c3b8a..bcf28213f3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -118,13 +118,17 @@ size_t D3D12GSRender::UploadTextures() case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: case CELL_GCM_TEXTURE_X32_FLOAT: case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_D8R8G8B8: case CELL_GCM_TEXTURE_Y16_X16_FLOAT: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: default: LOG_ERROR(RSX, "Unimplemented Texture format"); break; + case CELL_GCM_TEXTURE_D8R8G8B8: + dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; case CELL_GCM_TEXTURE_A8R8G8B8: dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; blockSizeInByte = 4; From af69803ee087c3e6e5dd238dc0a28dcfaea2e8f6 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 23:12:16 +0200 Subject: [PATCH 155/343] d3d12: Factorize resource state change --- rpcs3/Emu/RSX/D3D12/D3D12.h | 11 +++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 19 ++++--------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index f88ab827a2..b87de5a8ea 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -89,4 +89,15 @@ D3D12_RESOURCE_DESC getTexture2DResourceDesc(size_t width, size_t height, DXGI_F return result; } +inline +D3D12_RESOURCE_BARRIER getResourceBarrierTransition(ID3D12Resource *res, D3D12_RESOURCE_STATES stateBefore, D3D12_RESOURCE_STATES stateAfter) +{ + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = res; + barrier.Transition.StateBefore = stateBefore; + barrier.Transition.StateAfter = stateAfter; + return barrier; +} + #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 46d4c0f4b3..3f59d1ac28 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -679,7 +679,7 @@ void D3D12GSRender::ExecCMD() commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_samplerDescriptorHeap); commandList->SetGraphicsRootDescriptorTable(3, Handle); - m_perFrameStorage.m_currentTextureIndex += 16; + m_perFrameStorage.m_currentTextureIndex += usedTexture; InitDrawBuffers(); @@ -1241,12 +1241,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) // Convert - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = m_fbo->getDepthStencilTexture(); - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; - convertCommandList->ResourceBarrier(1, &barrier); + convertCommandList->ResourceBarrier(1, &getResourceBarrierTransition(m_fbo->getDepthStencilTexture(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ)); convertCommandList->SetPipelineState(m_convertPSO); convertCommandList->SetComputeRootSignature(m_convertRootSignature); @@ -1254,8 +1249,6 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) convertCommandList->SetComputeRootDescriptorTable(0, descriptorHeap->GetGPUDescriptorHandleForHeapStart()); convertCommandList->Dispatch(RSXThread::m_width / 8, RSXThread::m_height / 8, 1); - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; // Flush UAV D3D12_RESOURCE_BARRIER uavbarrier = {}; uavbarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; @@ -1263,15 +1256,11 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) D3D12_RESOURCE_BARRIER barriers[] = { - barrier, + getResourceBarrierTransition(m_fbo->getDepthStencilTexture(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE), uavbarrier, }; convertCommandList->ResourceBarrier(2, barriers); - - barrier.Transition.pResource = depthConverted; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - convertCommandList->ResourceBarrier(1, &barrier); + convertCommandList->ResourceBarrier(1, &getResourceBarrierTransition(depthConverted, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE)); convertCommandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&convertCommandList); From 74dab18174f03e17fd1d10ef2db50a30ccf1e85c Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 23:15:53 +0200 Subject: [PATCH 156/343] d3d12: Use graphic queue to download depth buffer For now go to the easiest solution and improve later --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3f59d1ac28..eeffa35748 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1265,14 +1265,8 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) convertCommandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&convertCommandList); - ID3D12Fence *convertDownloadFence; check( - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&convertDownloadFence)) - ); - m_commandQueueGraphic->Signal(convertDownloadFence, 1); - - check( - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, m_perFrameStorage.m_downloadCommandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) ); // Copy @@ -1290,11 +1284,9 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); downloadCommandList->Close(); - m_commandQueueCopy->Wait(convertDownloadFence, 1); - m_commandQueueCopy->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); //Wait for result - m_commandQueueCopy->Signal(fence, 1); - convertDownloadFence->Release(); + m_commandQueueGraphic->Signal(fence, 1); } else m_commandQueueGraphic->Signal(fence, 1); From 53ac34db358848bc47372f1c7794738e497e99a6 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 23:48:34 +0200 Subject: [PATCH 157/343] d3d12: Fix a warning --- rpcs3/Emu/RSX/D3D12/D3D12.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index b87de5a8ea..622622a56e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -80,8 +80,8 @@ D3D12_RESOURCE_DESC getTexture2DResourceDesc(size_t width, size_t height, DXGI_F D3D12_RESOURCE_DESC result; result = {}; result.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - result.Width = width; - result.Height = height; + result.Width = (UINT)width; + result.Height = (UINT)height; result.Format = dxgiFormat; result.DepthOrArraySize = 1; result.SampleDesc.Count = 1; From f12cb9b5196df975b62edb4805dad4dc8be00e65 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 28 May 2015 23:57:19 +0200 Subject: [PATCH 158/343] d3d12: Send RTTs to cell memory when a semaphore is released --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 201 ++++++++++++++++++++++---- 1 file changed, 174 insertions(+), 27 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index eeffa35748..a111efe9a0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1132,6 +1132,56 @@ void D3D12GSRender::WriteDepthBuffer() { } +static +ID3D12Resource *writeColorBuffer(ID3D12Device *device, ID3D12Resource *RTT, ID3D12GraphicsCommandList *cmdlist, size_t rowPitch, size_t width, size_t height) +{ + ID3D12Resource *Result; + + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_READBACK; + D3D12_RESOURCE_DESC resdesc = getBufferResourceDesc(rowPitch * height); + + + check( + device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resdesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&Result) + ) + ); + + cmdlist->ResourceBarrier(1, &getResourceBarrierTransition(RTT, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = RTT; + dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst.pResource = Result; + dst.PlacedFootprint.Offset = 0; + dst.PlacedFootprint.Footprint.Depth = 1; + dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + dst.PlacedFootprint.Footprint.Height = (UINT)height; + dst.PlacedFootprint.Footprint.Width = (UINT)width; + dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; + cmdlist->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + cmdlist->ResourceBarrier(1, &getResourceBarrierTransition(RTT, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); + return Result; +} + +static +void copyToCellRamAndRelease(void *dstAddress, ID3D12Resource *res, size_t rowPitch, size_t width, size_t height) +{ + void *srcBuffer; + check(res->Map(0, nullptr, &srcBuffer)); + for (unsigned row = 0; row < height; row++) + memcpy((char*)dstAddress + row * width, ((char*)srcBuffer) + row * rowPitch, width); + res->Unmap(0, nullptr); + res->Release(); +} + void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { @@ -1151,22 +1201,18 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) fence->SetEventOnCompletion(1, handle); ID3D12Resource *writeDest, *depthConverted; - ID3D12GraphicsCommandList *convertCommandList, *downloadCommandList; + ID3D12GraphicsCommandList *convertCommandList; ID3D12DescriptorHeap *descriptorHeap; - size_t rowPitch = RSXThread::m_width; - rowPitch = (rowPitch + 255) & ~255; + size_t depthRowPitch = RSXThread::m_width; + depthRowPitch = (depthRowPitch + 255) & ~255; + + bool needTransfer = m_set_context_dma_z || m_set_context_dma_color_a || m_set_context_dma_color_b || m_set_context_dma_color_c || m_set_context_dma_color_d; + if (m_set_context_dma_z) { D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; - D3D12_RESOURCE_DESC resdesc = {}; - resdesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - resdesc.Width = RSXThread::m_width; - resdesc.Height = RSXThread::m_height; - resdesc.DepthOrArraySize = 1; - resdesc.SampleDesc.Count = 1; - resdesc.MipLevels = 1; - resdesc.Format = DXGI_FORMAT_R8_UNORM; + D3D12_RESOURCE_DESC resdesc = getTexture2DResourceDesc(RSXThread::m_width, RSXThread::m_height, DXGI_FORMAT_R8_UNORM); resdesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; check( @@ -1182,14 +1228,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_READBACK; - resdesc = {}; - resdesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resdesc.Width = RSXThread::m_width * RSXThread::m_height * 2; // * 2 for safety - resdesc.Height = 1; - resdesc.DepthOrArraySize = 1; - resdesc.SampleDesc.Count = 1; - resdesc.MipLevels = 1; - resdesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + resdesc = getBufferResourceDesc(depthRowPitch * RSXThread::m_height); check( m_device->CreateCommittedResource( @@ -1239,7 +1278,6 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; m_device->CreateUnorderedAccessView(depthConverted, nullptr, &uavDesc, Handle); - // Convert convertCommandList->ResourceBarrier(1, &getResourceBarrierTransition(m_fbo->getDepthStencilTexture(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ)); @@ -1264,11 +1302,18 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) convertCommandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&convertCommandList); + } + ID3D12GraphicsCommandList *downloadCommandList; + if (needTransfer) + { check( m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) ); + } + if (m_set_context_dma_z) + { // Copy D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; @@ -1280,16 +1325,52 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8_UNORM; dst.PlacedFootprint.Footprint.Height = RSXThread::m_height; dst.PlacedFootprint.Footprint.Width = RSXThread::m_width; - dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; + dst.PlacedFootprint.Footprint.RowPitch = (UINT)depthRowPitch; downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + } + size_t colorRowPitch = RSXThread::m_width * 4; + colorRowPitch = (colorRowPitch + 255) & ~255; + ID3D12Resource *rtt0, *rtt1, *rtt2, *rtt3; + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_NONE: + break; + + case CELL_GCM_SURFACE_TARGET_0: + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(0), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + break; + + case CELL_GCM_SURFACE_TARGET_1: + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(1), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + break; + + case CELL_GCM_SURFACE_TARGET_MRT1: + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(0), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(1), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + break; + + case CELL_GCM_SURFACE_TARGET_MRT2: + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(0), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(1), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(2), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + break; + + case CELL_GCM_SURFACE_TARGET_MRT3: + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(0), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(1), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(2), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_b) rtt3 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(3), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + break; + } + if (needTransfer) + { downloadCommandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); - //Wait for result - m_commandQueueGraphic->Signal(fence, 1); } - else - m_commandQueueGraphic->Signal(fence, 1); + + //Wait for result + m_commandQueueGraphic->Signal(fence, 1); std::thread valueChangerThread([=]() { WaitForSingleObject(handle, INFINITE); @@ -1308,7 +1389,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { for (unsigned i = 0; i < RSXThread::m_width; i++) { - unsigned char c = writeDestPtr[row * rowPitch + i]; + unsigned char c = writeDestPtr[row * depthRowPitch + i]; ptrAsChar[4 * (row * RSXThread::m_width + i)] = c; ptrAsChar[4 * (row * RSXThread::m_width + i) + 1] = c; ptrAsChar[4 * (row * RSXThread::m_width + i) + 2] = c; @@ -1318,10 +1399,76 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) writeDest->Release(); depthConverted->Release(); descriptorHeap->Release(); - downloadCommandList->Release(); convertCommandList->Release(); } + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_NONE: + break; + + case CELL_GCM_SURFACE_TARGET_0: + { + u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } + break; + + case CELL_GCM_SURFACE_TARGET_1: + { + u32 address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } + break; + + case CELL_GCM_SURFACE_TARGET_MRT1: + { + u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } + break; + + case CELL_GCM_SURFACE_TARGET_MRT2: + { + u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } + break; + + case CELL_GCM_SURFACE_TARGET_MRT3: + { + u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt3, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } + break; + } + + if (needTransfer) + downloadCommandList->Release(); + vm::write32(m_label_addr + offset, value); }); valueChangerThread.detach(); From 906146d8f69a01e4b4ad7233f12fc475aa83424b Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 29 May 2015 00:45:53 +0200 Subject: [PATCH 159/343] d3d12: Lower memory storage --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a111efe9a0..71001a70ac 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -66,7 +66,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) // Texture D3D12_HEAP_DESC heapDescription = {}; - heapDescription.SizeInBytes = 1024 * 1024 * 512; + heapDescription.SizeInBytes = 1024 * 1024 * 64; heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); From 262d3d0755d91905a6c3126614c304f975c837cd Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 29 May 2015 00:59:03 +0200 Subject: [PATCH 160/343] d3d12: Fix copyToCellRamAndRelease copy size --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 71001a70ac..9b186a9982 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1177,7 +1177,7 @@ void copyToCellRamAndRelease(void *dstAddress, ID3D12Resource *res, size_t rowPi void *srcBuffer; check(res->Map(0, nullptr, &srcBuffer)); for (unsigned row = 0; row < height; row++) - memcpy((char*)dstAddress + row * width, ((char*)srcBuffer) + row * rowPitch, width); + memcpy((char*)dstAddress + row * width * 4, (char*)srcBuffer + row * rowPitch, width * 4); res->Unmap(0, nullptr); res->Release(); } From 3e11598941b49c9a193c40ac44ddbce9b7c450f0 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 29 May 2015 01:09:53 +0200 Subject: [PATCH 161/343] d3d12: Fix increment size for sampler and row pitch for swizzled textures --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 ++-- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 9b186a9982..417be2686b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -76,7 +76,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_textureStorage))); D3D12_DESCRIPTOR_HEAP_DESC textureDescriptorDesc = {}; - textureDescriptorDesc.NumDescriptors = 1024; // For safety + textureDescriptorDesc.NumDescriptors = 2048; // For safety textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; textureDescriptorDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_textureDescriptorsHeap))); @@ -665,7 +665,7 @@ void D3D12GSRender::ExecCMD() samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); m_device->CreateSampler(&samplerDesc, Handle); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index bcf28213f3..f4de5a5bc7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -188,7 +188,7 @@ size_t D3D12GSRender::UploadTextures() for (int j = 0; j < m_textures[i].GetWidth(); j++) { - dst[(row * rowPitch) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; + dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; } } else @@ -271,7 +271,7 @@ size_t D3D12GSRender::UploadTextures() samplerDesc.MinLOD = m_textures[i].GetMinLOD() >> 8; samplerDesc.MaxLOD = m_textures[i].GetMaxLOD() >> 8; Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); m_device->CreateSampler(&samplerDesc, Handle); commandList->Close(); From 595392428d06cb11c08326e903989ce80997b845 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 29 May 2015 16:56:46 +0200 Subject: [PATCH 162/343] d3d12: First attempt at read color buffer using dma address change If address of m_context_dma_color_a change, copy the RTT in a texture that can be used for drawing. Currently a bit broken, the rtt is used for others textures too... --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 62 ++++++- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 6 +- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 2 + rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 176 +++++++++++--------- 4 files changed, 156 insertions(+), 90 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 417be2686b..ee438f32fb 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -36,7 +36,6 @@ void D3D12GSRender::ResourceStorage::Reset() void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) { - m_queueCompletion = 0; // Create a global command allocator device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); @@ -345,6 +344,57 @@ void D3D12GSRender::Close() void D3D12GSRender::InitDrawBuffers() { + // FBO location has changed, previous data might be copied + if (m_fbo != nullptr) + { + // TODO : move to texture heap + u32 address_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + if (m_fbo->m_address_color_a != address_a) + { + LOG_WARNING(RSX, "Copy draw buffer A"); + Microsoft::WRL::ComPtr Texture; + D3D12_HEAP_PROPERTIES hp = {}; + hp.Type = D3D12_HEAP_TYPE_DEFAULT; + check( + m_device->CreateCommittedResource( + &hp, + D3D12_HEAP_FLAG_NONE, + &getTexture2DResourceDesc(RSXThread::m_width, RSXThread::m_height, DXGI_FORMAT_R8G8B8A8_UNORM), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&Texture) + ) + ); + + ID3D12GraphicsCommandList *copycmdlist; + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); + + copycmdlist->ResourceBarrier(1, &getResourceBarrierTransition(m_fbo->getRenderTargetTexture(0), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.pResource = Texture.Get(); + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = m_fbo->getRenderTargetTexture(0); + + copycmdlist->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + + D3D12_RESOURCE_BARRIER barriers[2] = + { + getResourceBarrierTransition(m_fbo->getRenderTargetTexture(0), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET), + getResourceBarrierTransition(Texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ), + }; + copycmdlist->ResourceBarrier(2, barriers); + check(copycmdlist->Close()); + + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)©cmdlist); + + m_texturesRTTs[address_a] = Texture; + m_fbo->m_address_color_a = address_a; + } + + } + if (m_fbo == nullptr || RSXThread::m_width != m_lastWidth || RSXThread::m_height != m_lastHeight || m_lastDepth != m_surface_depth_format) { @@ -361,6 +411,11 @@ void D3D12GSRender::InitDrawBuffers() }; m_fbo = new D3D12RenderTargetSets(m_device, (u8)m_lastDepth, m_lastWidth, m_lastHeight, clearColor, 1.f); + m_fbo->m_address_color_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + m_fbo->m_address_color_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + m_fbo->m_address_color_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); + m_fbo->m_address_color_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); + m_fbo->m_address_z = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); } } @@ -602,6 +657,8 @@ bool D3D12GSRender::LoadProgram() void D3D12GSRender::ExecCMD() { + InitDrawBuffers(); + ID3D12GraphicsCommandList *commandList; m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); m_perFrameStorage.m_inflightCommandList.push_back(commandList); @@ -681,8 +738,6 @@ void D3D12GSRender::ExecCMD() m_perFrameStorage.m_currentTextureIndex += usedTexture; - InitDrawBuffers(); - D3D12_CPU_DESCRIPTOR_HANDLE *DepthStencilHandle = &m_fbo->getDSVCPUHandle(); switch (m_surface_color_target) { @@ -1123,6 +1178,7 @@ void D3D12GSRender::Flip() WaitForSingleObject(handle, INFINITE); CloseHandle(handle); m_perFrameStorage.Reset(); + m_texturesRTTs.clear(); m_frame->Flip(nullptr); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 8000c812e4..7f7492565b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -47,7 +47,8 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value); class D3D12GSRender : public GSRender { private: - + // Copy of RTT to be used as texture + std::unordered_map > m_texturesRTTs; // std::vector m_post_draw_objs; PipelineStateObjectCache m_cachePSO; @@ -85,9 +86,6 @@ private: ID3D12DescriptorHeap *m_samplerDescriptorHeap; size_t m_currentTextureIndex; - // Fence - HANDLE m_queueCompletion; - void Reset(); void Init(ID3D12Device *device); void Release(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 9d9e81a75c..0dbd553be2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -25,5 +25,7 @@ public: D3D12_CPU_DESCRIPTOR_HANDLE getDSVCPUHandle() const; ID3D12Resource *getRenderTargetTexture(u8 Id) const; ID3D12Resource *getDepthStencilTexture() const; + + u32 m_address_color_a, m_address_color_b, m_address_color_c, m_address_color_d, m_address_z; }; #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index f4de5a5bc7..60e2c3cd4b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -90,13 +90,13 @@ size_t D3D12GSRender::UploadTextures() if (!m_textures[i].IsEnabled()) continue; size_t w = m_textures[i].GetWidth(), h = m_textures[i].GetHeight(); - // Upload at each iteration to take advantage of overlapping transfer - ID3D12GraphicsCommandList *commandList; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + const u32 texaddr = GetAddress(m_textures[i].GetOffset(), m_textures[i].GetLocation()); + u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); DXGI_FORMAT dxgiFormat; size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel; int format = m_textures[i].GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + bool is_swizzled = !(m_textures[i].GetFormat() & CELL_GCM_TEXTURE_LN); switch (format) { @@ -146,90 +146,104 @@ size_t D3D12GSRender::UploadTextures() break; } - size_t heightInBlocks = (m_textures[i].GetHeight() + blockHeightInPixel - 1) / blockHeightInPixel; - size_t widthInBlocks = (m_textures[i].GetWidth() + blockWidthInPixel - 1) / blockWidthInPixel; - // Multiple of 256 - size_t rowPitch = blockSizeInByte * widthInBlocks; - rowPitch = (rowPitch + 255) & ~255; - - - ID3D12Resource *Texture, *vramTexture; - size_t textureSize = rowPitch * heightInBlocks; - - check(m_device->CreatePlacedResource( - m_perFrameStorage.m_uploadTextureHeap, - m_perFrameStorage.m_currentStorageOffset, - &getBufferResourceDesc(textureSize), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&Texture) - )); - - const u32 texaddr = GetAddress(m_textures[i].GetOffset(), m_textures[i].GetLocation()); - auto pixels = vm::get_ptr(texaddr); - void *textureData; - check(Texture->Map(0, nullptr, (void**)&textureData)); - - // Upload with correct rowpitch - for (unsigned row = 0; row < heightInBlocks; row++) + ID3D12Resource *vramTexture; + std::unordered_map >::const_iterator It = m_texturesRTTs.find(address); + if (It != m_texturesRTTs.end()) { - size_t m_texture_pitch = m_textures[i].m_pitch; - if (!m_texture_pitch) m_texture_pitch = rowPitch; - if (format == CELL_GCM_TEXTURE_A8R8G8B8 && is_swizzled) - { - u32 *src, *dst; - u32 log2width, log2height; - - src = (u32*)pixels; - dst = (u32*)textureData; - - log2width = (u32)(logf(m_textures[i].GetWidth()) / logf(2.f)); - log2height = (u32)(logf(m_textures[i].GetHeight()) / logf(2.f)); - - for (int j = 0; j < m_textures[i].GetWidth(); j++) - { - dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; - } - } - else - streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + vramTexture = It->second.Get(); } - Texture->Unmap(0, nullptr); + else + { + // Upload at each iteration to take advantage of overlapping transfer + ID3D12GraphicsCommandList *commandList; + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); - check(m_device->CreatePlacedResource( - m_perFrameStorage.m_textureStorage, - m_perFrameStorage.m_currentStorageOffset, - &getTexture2DResourceDesc(m_textures[i].GetWidth(), m_textures[i].GetHeight(), dxgiFormat), - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&vramTexture) - )); + size_t heightInBlocks = (m_textures[i].GetHeight() + blockHeightInPixel - 1) / blockHeightInPixel; + size_t widthInBlocks = (m_textures[i].GetWidth() + blockWidthInPixel - 1) / blockWidthInPixel; + // Multiple of 256 + size_t rowPitch = blockSizeInByte * widthInBlocks; + rowPitch = (rowPitch + 255) & ~255; - m_perFrameStorage.m_currentStorageOffset += textureSize; - m_perFrameStorage.m_currentStorageOffset = (m_perFrameStorage.m_currentStorageOffset + 65536 - 1) & ~65535; - m_perFrameStorage.m_inflightResources.push_back(Texture); - m_perFrameStorage.m_inflightResources.push_back(vramTexture); + ID3D12Resource *Texture; + size_t textureSize = rowPitch * heightInBlocks; + check(m_device->CreatePlacedResource( + m_perFrameStorage.m_uploadTextureHeap, + m_perFrameStorage.m_currentStorageOffset, + &getBufferResourceDesc(textureSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&Texture) + )); - D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - dst.pResource = vramTexture; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = Texture; - src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src.PlacedFootprint.Footprint.Depth = 1; - src.PlacedFootprint.Footprint.Width = m_textures[i].GetWidth(); - src.PlacedFootprint.Footprint.Height = m_textures[i].GetHeight(); - src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; - src.PlacedFootprint.Footprint.Format = dxgiFormat; + auto pixels = vm::get_ptr(texaddr); + void *textureData; + check(Texture->Map(0, nullptr, (void**)&textureData)); - commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + // Upload with correct rowpitch + for (unsigned row = 0; row < heightInBlocks; row++) + { + size_t m_texture_pitch = m_textures[i].m_pitch; + if (!m_texture_pitch) m_texture_pitch = rowPitch; + if (format == CELL_GCM_TEXTURE_A8R8G8B8 && is_swizzled) + { + u32 *src, *dst; + u32 log2width, log2height; - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = vramTexture; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; - commandList->ResourceBarrier(1, &barrier); + src = (u32*)pixels; + dst = (u32*)textureData; + + log2width = (u32)(logf(m_textures[i].GetWidth()) / logf(2.f)); + log2height = (u32)(logf(m_textures[i].GetHeight()) / logf(2.f)); + + for (int j = 0; j < m_textures[i].GetWidth(); j++) + { + dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; + } + } + else + streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + } + Texture->Unmap(0, nullptr); + + check(m_device->CreatePlacedResource( + m_perFrameStorage.m_textureStorage, + m_perFrameStorage.m_currentStorageOffset, + &getTexture2DResourceDesc(m_textures[i].GetWidth(), m_textures[i].GetHeight(), dxgiFormat), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&vramTexture) + )); + + m_perFrameStorage.m_currentStorageOffset += textureSize; + m_perFrameStorage.m_currentStorageOffset = (m_perFrameStorage.m_currentStorageOffset + 65536 - 1) & ~65535; + m_perFrameStorage.m_inflightResources.push_back(Texture); + m_perFrameStorage.m_inflightResources.push_back(vramTexture); + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + dst.pResource = vramTexture; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = Texture; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.PlacedFootprint.Footprint.Depth = 1; + src.PlacedFootprint.Footprint.Width = m_textures[i].GetWidth(); + src.PlacedFootprint.Footprint.Height = m_textures[i].GetHeight(); + src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; + src.PlacedFootprint.Footprint.Format = dxgiFormat; + + commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = vramTexture; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; + commandList->ResourceBarrier(1, &barrier); + + commandList->Close(); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); + m_perFrameStorage.m_inflightCommandList.push_back(commandList); + } D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; @@ -274,10 +288,6 @@ size_t D3D12GSRender::UploadTextures() Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); m_device->CreateSampler(&samplerDesc, Handle); - commandList->Close(); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); - m_perFrameStorage.m_inflightCommandList.push_back(commandList); - usedTexture++; } From cef58da8792be54b37b92d0676ed816448b23cee Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 29 May 2015 17:03:41 +0200 Subject: [PATCH 163/343] d3d12: Fix non mrt samples --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 60e2c3cd4b..2f9ce01ca1 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -91,7 +91,6 @@ size_t D3D12GSRender::UploadTextures() size_t w = m_textures[i].GetWidth(), h = m_textures[i].GetHeight(); const u32 texaddr = GetAddress(m_textures[i].GetOffset(), m_textures[i].GetLocation()); - u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); DXGI_FORMAT dxgiFormat; size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel; @@ -147,7 +146,7 @@ size_t D3D12GSRender::UploadTextures() } ID3D12Resource *vramTexture; - std::unordered_map >::const_iterator It = m_texturesRTTs.find(address); + std::unordered_map >::const_iterator It = m_texturesRTTs.find(texaddr); if (It != m_texturesRTTs.end()) { vramTexture = It->second.Get(); From da328c078ae024e08ad310ef4af5b093c480b8cc Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 29 May 2015 17:13:18 +0200 Subject: [PATCH 164/343] d3d12: Fix cube_mrt.ppu.elf It should display a (single) blue cube --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index ee438f32fb..a65ae91872 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -389,10 +389,9 @@ void D3D12GSRender::InitDrawBuffers() m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)©cmdlist); - m_texturesRTTs[address_a] = Texture; + m_texturesRTTs[m_fbo->m_address_color_a] = Texture; m_fbo->m_address_color_a = address_a; } - } if (m_fbo == nullptr || RSXThread::m_width != m_lastWidth || RSXThread::m_height != m_lastHeight || m_lastDepth != m_surface_depth_format) From 8e9cdc5fea88be477842a199de87543d396e3ba6 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 29 May 2015 17:16:14 +0200 Subject: [PATCH 165/343] d3d12: Fix memleak --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a65ae91872..17f06391f0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -318,6 +318,7 @@ D3D12GSRender::D3D12GSRender() D3D12GSRender::~D3D12GSRender() { + m_texturesRTTs.clear(); m_dummyTexture->Release(); m_convertPSO->Release(); m_convertRootSignature->Release(); @@ -391,6 +392,7 @@ void D3D12GSRender::InitDrawBuffers() m_texturesRTTs[m_fbo->m_address_color_a] = Texture; m_fbo->m_address_color_a = address_a; + m_perFrameStorage.m_inflightCommandList.push_back(copycmdlist); } } From f382ba03196a8873dc5cdb6610c5f6bdc3f4582b Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 29 May 2015 17:45:36 +0200 Subject: [PATCH 166/343] d3d12: Enable multiple mrt read --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 137 +++++++++++++------- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 1 + 2 files changed, 90 insertions(+), 48 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 17f06391f0..d968066f17 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -343,57 +343,97 @@ void D3D12GSRender::Close() m_frame->Hide(); } +static +void copyFBO(ID3D12Device* device, ID3D12Resource *rtt, ID3D12GraphicsCommandList *cmdList, + std::unordered_map > &texturesRTTs, + u32 ¤tFBOAddress, u32 newAddress, size_t width, size_t height) +{ + // TODO : move to texture heap + Microsoft::WRL::ComPtr Texture; + D3D12_HEAP_PROPERTIES hp = {}; + hp.Type = D3D12_HEAP_TYPE_DEFAULT; + check( + device->CreateCommittedResource( + &hp, + D3D12_HEAP_FLAG_NONE, + &getTexture2DResourceDesc(width, height, DXGI_FORMAT_R8G8B8A8_UNORM), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&Texture) + ) + ); + + cmdList->ResourceBarrier(1, &getResourceBarrierTransition(rtt, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.pResource = Texture.Get(); + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = rtt; + + cmdList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + + D3D12_RESOURCE_BARRIER barriers[2] = + { + getResourceBarrierTransition(rtt, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET), + getResourceBarrierTransition(Texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ), + }; + cmdList->ResourceBarrier(2, barriers); + + texturesRTTs[currentFBOAddress] = Texture; +} + void D3D12GSRender::InitDrawBuffers() { // FBO location has changed, previous data might be copied if (m_fbo != nullptr) { - // TODO : move to texture heap + ID3D12GraphicsCommandList *copycmdlist; + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); + m_perFrameStorage.m_inflightCommandList.push_back(copycmdlist); + u32 address_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - if (m_fbo->m_address_color_a != address_a) + u32 address_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + u32 address_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); + u32 address_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); + switch (m_fbo->m_target_type) { - LOG_WARNING(RSX, "Copy draw buffer A"); - Microsoft::WRL::ComPtr Texture; - D3D12_HEAP_PROPERTIES hp = {}; - hp.Type = D3D12_HEAP_TYPE_DEFAULT; - check( - m_device->CreateCommittedResource( - &hp, - D3D12_HEAP_FLAG_NONE, - &getTexture2DResourceDesc(RSXThread::m_width, RSXThread::m_height, DXGI_FORMAT_R8G8B8A8_UNORM), - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&Texture) - ) - ); - - ID3D12GraphicsCommandList *copycmdlist; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); - - copycmdlist->ResourceBarrier(1, &getResourceBarrierTransition(m_fbo->getRenderTargetTexture(0), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); - - D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.pResource = Texture.Get(); - src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = m_fbo->getRenderTargetTexture(0); - - copycmdlist->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - - D3D12_RESOURCE_BARRIER barriers[2] = - { - getResourceBarrierTransition(m_fbo->getRenderTargetTexture(0), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET), - getResourceBarrierTransition(Texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ), - }; - copycmdlist->ResourceBarrier(2, barriers); - check(copycmdlist->Close()); - - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)©cmdlist); - - m_texturesRTTs[m_fbo->m_address_color_a] = Texture; - m_fbo->m_address_color_a = address_a; - m_perFrameStorage.m_inflightCommandList.push_back(copycmdlist); + case CELL_GCM_SURFACE_TARGET_0: + if (m_fbo->m_address_color_a != address_a) + copyFBO(m_device, m_fbo->getRenderTargetTexture(0), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_a, address_a, RSXThread::m_width, RSXThread::m_height); + break; + case CELL_GCM_SURFACE_TARGET_1: + if (m_fbo->m_address_color_b != address_b) + copyFBO(m_device, m_fbo->getRenderTargetTexture(1), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_b, address_b, RSXThread::m_width, RSXThread::m_height); + break; + case CELL_GCM_SURFACE_TARGET_MRT1: + if (m_fbo->m_address_color_a != address_a) + copyFBO(m_device, m_fbo->getRenderTargetTexture(0), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_a, address_a, RSXThread::m_width, RSXThread::m_height); + if (m_fbo->m_address_color_b != address_b) + copyFBO(m_device, m_fbo->getRenderTargetTexture(1), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_b, address_b, RSXThread::m_width, RSXThread::m_height); + break; + case CELL_GCM_SURFACE_TARGET_MRT2: + if (m_fbo->m_address_color_a != address_a) + copyFBO(m_device, m_fbo->getRenderTargetTexture(0), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_a, address_a, RSXThread::m_width, RSXThread::m_height); + if (m_fbo->m_address_color_b != address_b) + copyFBO(m_device, m_fbo->getRenderTargetTexture(1), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_b, address_b, RSXThread::m_width, RSXThread::m_height); + if (m_fbo->m_address_color_c != address_c) + copyFBO(m_device, m_fbo->getRenderTargetTexture(2), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_c, address_c, RSXThread::m_width, RSXThread::m_height); + break; + case CELL_GCM_SURFACE_TARGET_MRT3: + if (m_fbo->m_address_color_a != address_a) + copyFBO(m_device, m_fbo->getRenderTargetTexture(0), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_a, address_a, RSXThread::m_width, RSXThread::m_height); + if (m_fbo->m_address_color_b != address_b) + copyFBO(m_device, m_fbo->getRenderTargetTexture(1), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_b, address_b, RSXThread::m_width, RSXThread::m_height); + if (m_fbo->m_address_color_c != address_c) + copyFBO(m_device, m_fbo->getRenderTargetTexture(2), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_c, address_c, RSXThread::m_width, RSXThread::m_height); + if (m_fbo->m_address_color_d != address_d) + copyFBO(m_device, m_fbo->getRenderTargetTexture(3), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_d, address_d, RSXThread::m_width, RSXThread::m_height); + break; } + + check(copycmdlist->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)©cmdlist); } if (m_fbo == nullptr || RSXThread::m_width != m_lastWidth || RSXThread::m_height != m_lastHeight || m_lastDepth != m_surface_depth_format) @@ -412,12 +452,13 @@ void D3D12GSRender::InitDrawBuffers() }; m_fbo = new D3D12RenderTargetSets(m_device, (u8)m_lastDepth, m_lastWidth, m_lastHeight, clearColor, 1.f); - m_fbo->m_address_color_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - m_fbo->m_address_color_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); - m_fbo->m_address_color_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); - m_fbo->m_address_color_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); - m_fbo->m_address_z = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); } + m_fbo->m_address_color_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + m_fbo->m_address_color_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + m_fbo->m_address_color_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); + m_fbo->m_address_color_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); + m_fbo->m_address_z = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); + m_fbo->m_target_type = m_surface_color_target; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 0dbd553be2..89e9792904 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -27,5 +27,6 @@ public: ID3D12Resource *getDepthStencilTexture() const; u32 m_address_color_a, m_address_color_b, m_address_color_c, m_address_color_d, m_address_z; + u32 m_target_type; }; #endif \ No newline at end of file From d8f76f5aeecce238943660f99b053dd3643f9ab7 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 30 May 2015 04:43:53 +0800 Subject: [PATCH 167/343] d3d12: Add LogicOp --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 35 ++++++++++++++++++++++++--- rpcs3/Emu/RSX/GCM.h | 17 +++++++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index d968066f17..19135d8f31 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -551,14 +551,12 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } -static -D3D12_BLEND_OP getBlendOp() +static D3D12_BLEND_OP getBlendOp() { return D3D12_BLEND_OP_ADD; } -static -D3D12_BLEND getBlendFactor(u16 glFactor) +static D3D12_BLEND getBlendFactor(u16 glFactor) { switch (glFactor) { @@ -577,6 +575,29 @@ D3D12_BLEND getBlendFactor(u16 glFactor) } } +static D3D12_LOGIC_OP getLogicOp(u32 op) +{ + switch (op) + { + default: LOG_WARNING(RSX, "Unsupported Logic Op %d", op); + case CELL_GCM_CLEAR: return D3D12_LOGIC_OP_CLEAR; + case CELL_GCM_AND: return D3D12_LOGIC_OP_AND; + case CELL_GCM_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; + case CELL_GCM_COPY: return D3D12_LOGIC_OP_COPY; + case CELL_GCM_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; + case CELL_GCM_NOOP: return D3D12_LOGIC_OP_NOOP; + case CELL_GCM_XOR: return D3D12_LOGIC_OP_XOR; + case CELL_GCM_OR: return D3D12_LOGIC_OP_OR; + case CELL_GCM_NOR: return D3D12_LOGIC_OP_NOR; + case CELL_GCM_EQUIV: return D3D12_LOGIC_OP_EQUIV; + case CELL_GCM_INVERT: return D3D12_LOGIC_OP_INVERT; + case CELL_GCM_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; + case CELL_GCM_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; + case CELL_GCM_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; + case CELL_GCM_NAND: return D3D12_LOGIC_OP_NAND; + } +} + bool D3D12GSRender::LoadProgram() { if (!m_cur_fragment_prog) @@ -649,6 +670,12 @@ bool D3D12GSRender::LoadProgram() prop.Blend.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; } + if (m_set_logic_op) + { + prop.Blend.RenderTarget[0].LogicOpEnable = true; + prop.Blend.RenderTarget[0].LogicOp = getLogicOp(m_logic_op); + } + if (m_set_blend_color) { // glBlendColor(m_blend_color_r, m_blend_color_g, m_blend_color_b, m_blend_color_a); diff --git a/rpcs3/Emu/RSX/GCM.h b/rpcs3/Emu/RSX/GCM.h index 71aa10ee82..7c694efc37 100644 --- a/rpcs3/Emu/RSX/GCM.h +++ b/rpcs3/Emu/RSX/GCM.h @@ -134,6 +134,23 @@ enum CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP_TO_EDGE = 6, CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER = 7, CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP = 8, + + // Logic Op + CELL_GCM_CLEAR = 1, + CELL_GCM_AND = 2, + CELL_GCM_AND_REVERSE = 3, + CELL_GCM_COPY = 4, + CELL_GCM_AND_INVERTED = 5, + CELL_GCM_NOOP = 6, + CELL_GCM_XOR = 7, + CELL_GCM_OR = 8, + CELL_GCM_NOR = 9, + CELL_GCM_EQUIV = 10, + CELL_GCM_INVERT = 11, + CELL_GCM_OR_REVERSE = 12, + CELL_GCM_COPY_INVERTED = 13, + CELL_GCM_OR_INVERTED = 14, + CELL_GCM_NAND = 15, }; // GCM Surface From 90fb4396feb176ebc25c48250d69e315ae8d2565 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 30 May 2015 01:22:31 +0200 Subject: [PATCH 168/343] d3d12; Add a texture cache --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 1 + rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 ++ rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 12 +++++++++--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 19135d8f31..581780eaa4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1248,6 +1248,7 @@ void D3D12GSRender::Flip() CloseHandle(handle); m_perFrameStorage.Reset(); m_texturesRTTs.clear(); + m_texturesCache.clear(); m_frame->Flip(nullptr); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 7f7492565b..01b5055dea 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -49,6 +49,8 @@ class D3D12GSRender : public GSRender private: // Copy of RTT to be used as texture std::unordered_map > m_texturesRTTs; + + std::unordered_map m_texturesCache; // std::vector m_post_draw_objs; PipelineStateObjectCache m_cachePSO; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 2f9ce01ca1..e2dd61a3d4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -146,10 +146,15 @@ size_t D3D12GSRender::UploadTextures() } ID3D12Resource *vramTexture; - std::unordered_map >::const_iterator It = m_texturesRTTs.find(texaddr); - if (It != m_texturesRTTs.end()) + std::unordered_map >::const_iterator ItRTT = m_texturesRTTs.find(texaddr); + std::unordered_map::const_iterator ItCache = m_texturesCache.find(texaddr); + if (ItRTT != m_texturesRTTs.end()) { - vramTexture = It->second.Get(); + vramTexture = ItRTT->second.Get(); + } + else if (ItCache != m_texturesCache.end()) + { + vramTexture = ItCache->second; } else { @@ -242,6 +247,7 @@ size_t D3D12GSRender::UploadTextures() commandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); m_perFrameStorage.m_inflightCommandList.push_back(commandList); + m_texturesCache[texaddr] = vramTexture; } D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; From 6877e5e3bac19edb518b164cc1b3e6c455c05f93 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 30 May 2015 01:58:16 +0200 Subject: [PATCH 169/343] d3d12: Start using heap for depth buffer readback --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 25 +++++++++++++++++++++---- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 10 ++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 581780eaa4..f81f352395 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -314,10 +314,19 @@ D3D12GSRender::D3D12GSRender() nullptr, IID_PPV_ARGS(&m_dummyTexture)) ); + + D3D12_HEAP_DESC hd = {}; + hd.SizeInBytes = 1024 * 1024 * 128; + hd.Properties.Type = D3D12_HEAP_TYPE_READBACK; + hd.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + check(m_device->CreateHeap(&hd, IID_PPV_ARGS(&m_readbackResources.m_heap))); + m_readbackResources.m_putPos = 0; + m_readbackResources.m_getPos = 1024 * 1024 * 128 - 1; } D3D12GSRender::~D3D12GSRender() { + m_readbackResources.m_heap->Release(); m_texturesRTTs.clear(); m_dummyTexture->Release(); m_convertPSO->Release(); @@ -1352,20 +1361,28 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) ) ); + size_t heapOffset = m_readbackResources.m_putPos.load(); + heapOffset = powerOf2Align(heapOffset, 65536); + size_t sizeInByte = depthRowPitch * RSXThread::m_height; + + if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size + heapOffset = 0; + heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_READBACK; - resdesc = getBufferResourceDesc(depthRowPitch * RSXThread::m_height); + resdesc = getBufferResourceDesc(sizeInByte); check( - m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, + m_device->CreatePlacedResource( + m_readbackResources.m_heap, + heapOffset, &resdesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&writeDest) ) ); + m_readbackResources.m_putPos.store(heapOffset + sizeInByte); check( m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&convertCommandList)) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 01b5055dea..237042263b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -95,6 +95,16 @@ private: ResourceStorage m_perFrameStorage; + + struct ReadbackHeap + { + ID3D12Heap *m_heap; + std::atomic m_putPos, // Start of free space + m_getPos; // End of free space + }; + + ReadbackHeap m_readbackResources; + bool m_forcedIndexBuffer; size_t indexCount; From 046e5228518d6ae2698141cd2f2e9ba4d8935a2a Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 30 May 2015 23:55:35 +0800 Subject: [PATCH 170/343] d3d12: Add DXT23 & DXT45 --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index e2dd61a3d4..e95c12668a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -102,8 +102,6 @@ size_t D3D12GSRender::UploadTextures() case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: case CELL_GCM_TEXTURE_G8B8: case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_DEPTH24_D8: @@ -121,7 +119,7 @@ size_t D3D12GSRender::UploadTextures() case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: default: - LOG_ERROR(RSX, "Unimplemented Texture format"); + LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); break; case CELL_GCM_TEXTURE_D8R8G8B8: dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; @@ -138,6 +136,16 @@ size_t D3D12GSRender::UploadTextures() blockSizeInByte = 8; blockWidthInPixel = 4, blockHeightInPixel = 4; break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + dxgiFormat = DXGI_FORMAT_BC2_UNORM; + blockSizeInByte = 16; + blockWidthInPixel = 4, blockHeightInPixel = 4; + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + dxgiFormat = DXGI_FORMAT_BC3_UNORM; + blockSizeInByte = 16; + blockWidthInPixel = 4, blockHeightInPixel = 4; + break; case CELL_GCM_TEXTURE_B8: dxgiFormat = DXGI_FORMAT_R8_UNORM; blockSizeInByte = 1; @@ -299,4 +307,4 @@ size_t D3D12GSRender::UploadTextures() return usedTexture; } -#endif \ No newline at end of file +#endif From 28c125618a0b1bdfe9dbf01e4a1246bb8d5ef4c6 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sun, 31 May 2015 00:36:25 +0800 Subject: [PATCH 171/343] d3d12 : add few more compressed format --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index e95c12668a..535125da3d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -116,6 +116,8 @@ size_t D3D12GSRender::UploadTextures() case CELL_GCM_TEXTURE_X32_FLOAT: case CELL_GCM_TEXTURE_D1R5G5B5: case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: default: @@ -151,6 +153,16 @@ size_t D3D12GSRender::UploadTextures() blockSizeInByte = 1; blockWidthInPixel = 1, blockHeightInPixel = 1; break; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + dxgiFormat = DXGI_FORMAT_G8R8_G8B8_UNORM; + blockSizeInByte = 4; + blockWidthInPixel = 2, blockHeightInPixel = 2; + break; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + dxgiFormat = DXGI_FORMAT_R8G8_B8G8_UNORM; + blockSizeInByte = 4; + blockWidthInPixel = 2, blockHeightInPixel = 2; + break; } ID3D12Resource *vramTexture; From 2636fb457ec63e0e124210d4b23988e884f8c768 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 30 May 2015 23:46:52 +0200 Subject: [PATCH 172/343] d3d12: Use placed resource for mrt readback Should be slightly faster (no allocation) --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 53 +++++++++++++++------------ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index f81f352395..56c1325eac 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1267,20 +1267,28 @@ void D3D12GSRender::WriteDepthBuffer() { } -static -ID3D12Resource *writeColorBuffer(ID3D12Device *device, ID3D12Resource *RTT, ID3D12GraphicsCommandList *cmdlist, size_t rowPitch, size_t width, size_t height) +ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12GraphicsCommandList * cmdlist) { ID3D12Resource *Result; + size_t rowPitch = RSXThread::m_width * 4; + rowPitch = (rowPitch + 255) & ~255; D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_READBACK; - D3D12_RESOURCE_DESC resdesc = getBufferResourceDesc(rowPitch * height); + D3D12_RESOURCE_DESC resdesc = getBufferResourceDesc(rowPitch * RSXThread::m_height); + size_t heapOffset = powerOf2Align(m_readbackResources.m_putPos.load(), 65536); + size_t sizeInByte = rowPitch * RSXThread::m_height; + + if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size + heapOffset = 0; + + resdesc = getBufferResourceDesc(sizeInByte); check( - device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, + m_device->CreatePlacedResource( + m_readbackResources.m_heap, + heapOffset, &resdesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, @@ -1298,8 +1306,8 @@ ID3D12Resource *writeColorBuffer(ID3D12Device *device, ID3D12Resource *RTT, ID3D dst.PlacedFootprint.Offset = 0; dst.PlacedFootprint.Footprint.Depth = 1; dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - dst.PlacedFootprint.Footprint.Height = (UINT)height; - dst.PlacedFootprint.Footprint.Width = (UINT)width; + dst.PlacedFootprint.Footprint.Height = (UINT)RSXThread::m_height; + dst.PlacedFootprint.Footprint.Width = (UINT)RSXThread::m_width; dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; cmdlist->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); cmdlist->ResourceBarrier(1, &getResourceBarrierTransition(RTT, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); @@ -1368,8 +1376,6 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size heapOffset = 0; - heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_READBACK; resdesc = getBufferResourceDesc(sizeInByte); check( @@ -1472,8 +1478,6 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); } - size_t colorRowPitch = RSXThread::m_width * 4; - colorRowPitch = (colorRowPitch + 255) & ~255; ID3D12Resource *rtt0, *rtt1, *rtt2, *rtt3; switch (m_surface_color_target) { @@ -1481,29 +1485,29 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) break; case CELL_GCM_SURFACE_TARGET_0: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(0), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_fbo->getRenderTargetTexture(0), downloadCommandList); break; case CELL_GCM_SURFACE_TARGET_1: - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(1), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_fbo->getRenderTargetTexture(1), downloadCommandList); break; case CELL_GCM_SURFACE_TARGET_MRT1: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(0), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(1), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_fbo->getRenderTargetTexture(0), downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_fbo->getRenderTargetTexture(1), downloadCommandList); break; case CELL_GCM_SURFACE_TARGET_MRT2: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(0), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(1), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(2), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_fbo->getRenderTargetTexture(0), downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_fbo->getRenderTargetTexture(1), downloadCommandList); + if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_fbo->getRenderTargetTexture(2), downloadCommandList); break; case CELL_GCM_SURFACE_TARGET_MRT3: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(0), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(1), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(2), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - if (m_context_dma_color_b) rtt3 = writeColorBuffer(m_device, m_fbo->getRenderTargetTexture(3), downloadCommandList, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_fbo->getRenderTargetTexture(0), downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_fbo->getRenderTargetTexture(1), downloadCommandList); + if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_fbo->getRenderTargetTexture(2), downloadCommandList); + if (m_context_dma_color_b) rtt3 = writeColorBuffer(m_fbo->getRenderTargetTexture(3), downloadCommandList); break; } if (needTransfer) @@ -1545,6 +1549,9 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) convertCommandList->Release(); } + size_t colorRowPitch = RSXThread::m_width * 4; + colorRowPitch = (colorRowPitch + 255) & ~255; + switch (m_surface_color_target) { case CELL_GCM_SURFACE_TARGET_NONE: diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 237042263b..12d3d891dd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -136,6 +136,7 @@ public: virtual void semaphorePFIFOAcquire(u32 offset, u32 value) override; private: + ID3D12Resource *writeColorBuffer(ID3D12Resource *RTT, ID3D12GraphicsCommandList *cmdlist); virtual void Close() override; bool LoadProgram(); From 01ac9f02d29ab5e3b2182b3c22266c4bae91b12e Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 30 May 2015 23:52:35 +0200 Subject: [PATCH 173/343] d3d12: Use placed resource for depth conversion result --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 24 ++++++++++++++++++------ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 8 ++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 56c1325eac..c5d92c3e9d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -322,10 +322,17 @@ D3D12GSRender::D3D12GSRender() check(m_device->CreateHeap(&hd, IID_PPV_ARGS(&m_readbackResources.m_heap))); m_readbackResources.m_putPos = 0; m_readbackResources.m_getPos = 1024 * 1024 * 128 - 1; + + hd.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + hd.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; + check(m_device->CreateHeap(&hd, IID_PPV_ARGS(&m_UAVHeap.m_heap))); + m_UAVHeap.m_putPos = 0; + m_UAVHeap.m_getPos = 1024 * 1024 * 128 - 1; } D3D12GSRender::~D3D12GSRender() { + m_UAVHeap.m_heap->Release(); m_readbackResources.m_heap->Release(); m_texturesRTTs.clear(); m_dummyTexture->Release(); @@ -1358,10 +1365,16 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) D3D12_RESOURCE_DESC resdesc = getTexture2DResourceDesc(RSXThread::m_width, RSXThread::m_height, DXGI_FORMAT_R8_UNORM); resdesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + size_t heapOffset = m_readbackResources.m_putPos.load(); + heapOffset = powerOf2Align(heapOffset, 65536); + size_t sizeInByte = RSXThread::m_width * RSXThread::m_height; + if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size + heapOffset = 0; + check( - m_device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, + m_device->CreatePlacedResource( + m_UAVHeap.m_heap, + heapOffset, &resdesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, @@ -1369,13 +1382,12 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) ) ); - size_t heapOffset = m_readbackResources.m_putPos.load(); + heapOffset = m_readbackResources.m_putPos.load(); heapOffset = powerOf2Align(heapOffset, 65536); - size_t sizeInByte = depthRowPitch * RSXThread::m_height; + sizeInByte = depthRowPitch * RSXThread::m_height; if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size heapOffset = 0; - resdesc = getBufferResourceDesc(sizeInByte); check( diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 12d3d891dd..f28bc91bbd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -95,6 +95,14 @@ private: ResourceStorage m_perFrameStorage; + struct UAVHeap + { + ID3D12Heap *m_heap; + std::atomic m_putPos, // Start of free space + m_getPos; // End of free space + }; + + UAVHeap m_UAVHeap; struct ReadbackHeap { From 5a57564b0c60847f0871e8461d239601b5bd322d Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 30 May 2015 23:55:01 +0200 Subject: [PATCH 174/343] d3d12: Forgot a hunk --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index c5d92c3e9d..983320efb3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1381,6 +1381,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) IID_PPV_ARGS(&depthConverted) ) ); + m_UAVHeap.m_putPos.store(heapOffset + sizeInByte); heapOffset = m_readbackResources.m_putPos.load(); heapOffset = powerOf2Align(heapOffset, 65536); From 001dc7cf577788335eea82fa204abf1c5d143cd9 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 31 May 2015 20:10:39 +0200 Subject: [PATCH 175/343] d3d12: Do not copy fbo, just use new one --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 301 +++++++++++------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 5 +- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 242 +++++++------- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 43 ++- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 6 +- 5 files changed, 330 insertions(+), 267 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 983320efb3..134a03d67d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -165,7 +165,7 @@ std::pair compileF32toU8CS() } D3D12GSRender::D3D12GSRender() - : GSRender(), m_fbo(nullptr), m_PSO(nullptr) + : GSRender(), m_PSO(nullptr) { if (Ini.GSDebugOutputEnable.GetValue()) { @@ -328,6 +328,8 @@ D3D12GSRender::D3D12GSRender() check(m_device->CreateHeap(&hd, IID_PPV_ARGS(&m_UAVHeap.m_heap))); m_UAVHeap.m_putPos = 0; m_UAVHeap.m_getPos = 1024 * 1024 * 128 - 1; + + m_rtts.Init(m_device); } D3D12GSRender::~D3D12GSRender() @@ -345,8 +347,7 @@ D3D12GSRender::~D3D12GSRender() m_backBuffer[0]->Release(); m_backbufferAsRendertarget[1]->Release(); m_backBuffer[1]->Release(); - if (m_fbo) - delete m_fbo; + m_rtts.Release(); m_rootSignature->Release(); m_swapChain->Release(); m_device->Release(); @@ -402,79 +403,152 @@ void copyFBO(ID3D12Device* device, ID3D12Resource *rtt, ID3D12GraphicsCommandLis void D3D12GSRender::InitDrawBuffers() { // FBO location has changed, previous data might be copied - if (m_fbo != nullptr) + u32 address_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + u32 address_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + u32 address_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); + u32 address_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); + u32 address_z = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); + + ID3D12GraphicsCommandList *copycmdlist; + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); + m_perFrameStorage.m_inflightCommandList.push_back(copycmdlist); + + // Make previous RTTs sampleable + for (unsigned i = 0; i < 4; i++) { - ID3D12GraphicsCommandList *copycmdlist; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); - m_perFrameStorage.m_inflightCommandList.push_back(copycmdlist); - - u32 address_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - u32 address_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); - u32 address_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); - u32 address_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); - switch (m_fbo->m_target_type) - { - case CELL_GCM_SURFACE_TARGET_0: - if (m_fbo->m_address_color_a != address_a) - copyFBO(m_device, m_fbo->getRenderTargetTexture(0), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_a, address_a, RSXThread::m_width, RSXThread::m_height); - break; - case CELL_GCM_SURFACE_TARGET_1: - if (m_fbo->m_address_color_b != address_b) - copyFBO(m_device, m_fbo->getRenderTargetTexture(1), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_b, address_b, RSXThread::m_width, RSXThread::m_height); - break; - case CELL_GCM_SURFACE_TARGET_MRT1: - if (m_fbo->m_address_color_a != address_a) - copyFBO(m_device, m_fbo->getRenderTargetTexture(0), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_a, address_a, RSXThread::m_width, RSXThread::m_height); - if (m_fbo->m_address_color_b != address_b) - copyFBO(m_device, m_fbo->getRenderTargetTexture(1), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_b, address_b, RSXThread::m_width, RSXThread::m_height); - break; - case CELL_GCM_SURFACE_TARGET_MRT2: - if (m_fbo->m_address_color_a != address_a) - copyFBO(m_device, m_fbo->getRenderTargetTexture(0), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_a, address_a, RSXThread::m_width, RSXThread::m_height); - if (m_fbo->m_address_color_b != address_b) - copyFBO(m_device, m_fbo->getRenderTargetTexture(1), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_b, address_b, RSXThread::m_width, RSXThread::m_height); - if (m_fbo->m_address_color_c != address_c) - copyFBO(m_device, m_fbo->getRenderTargetTexture(2), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_c, address_c, RSXThread::m_width, RSXThread::m_height); - break; - case CELL_GCM_SURFACE_TARGET_MRT3: - if (m_fbo->m_address_color_a != address_a) - copyFBO(m_device, m_fbo->getRenderTargetTexture(0), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_a, address_a, RSXThread::m_width, RSXThread::m_height); - if (m_fbo->m_address_color_b != address_b) - copyFBO(m_device, m_fbo->getRenderTargetTexture(1), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_b, address_b, RSXThread::m_width, RSXThread::m_height); - if (m_fbo->m_address_color_c != address_c) - copyFBO(m_device, m_fbo->getRenderTargetTexture(2), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_c, address_c, RSXThread::m_width, RSXThread::m_height); - if (m_fbo->m_address_color_d != address_d) - copyFBO(m_device, m_fbo->getRenderTargetTexture(3), copycmdlist, m_texturesRTTs, m_fbo->m_address_color_d, address_d, RSXThread::m_width, RSXThread::m_height); - break; - } - - check(copycmdlist->Close()); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)©cmdlist); + if (m_rtts.m_currentlyBoundRenderTargets[i] == nullptr) + continue; + copycmdlist->ResourceBarrier(1, &getResourceBarrierTransition(m_rtts.m_currentlyBoundRenderTargets[i], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ)); + m_rtts.m_renderTargets[m_rtts.m_currentlyBoundRenderTargetsAddress[i]] = m_rtts.m_currentlyBoundRenderTargets[i]; } - if (m_fbo == nullptr || RSXThread::m_width != m_lastWidth || RSXThread::m_height != m_lastHeight || m_lastDepth != m_surface_depth_format) - { - - LOG_WARNING(RSX, "New FBO (%dx%d)", RSXThread::m_width, RSXThread::m_height); - m_lastWidth = RSXThread::m_width; - m_lastHeight = RSXThread::m_height; - m_lastDepth = m_surface_depth_format; - float clearColor[] = - { - m_clear_surface_color_r / 255.0f, - m_clear_surface_color_g / 255.0f, - m_clear_surface_color_b / 255.0f, - m_clear_surface_color_a / 255.0f - }; + memset(m_rtts.m_currentlyBoundRenderTargetsAddress, 0, 4 * sizeof(u32)); + memset(m_rtts.m_currentlyBoundRenderTargets, 0, 4 * sizeof(ID3D12Resource *)); + m_rtts.m_currentlyBoundDepthStencil = nullptr; + m_rtts.m_currentlyBoundDepthStencilAddress = 0; - m_fbo = new D3D12RenderTargetSets(m_device, (u8)m_lastDepth, m_lastWidth, m_lastHeight, clearColor, 1.f); + + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + size_t g_RTTIncrement = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_0: + { + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0,address_a, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); + break; } - m_fbo->m_address_color_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - m_fbo->m_address_color_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); - m_fbo->m_address_color_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); - m_fbo->m_address_color_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); - m_fbo->m_address_z = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); - m_fbo->m_target_type = m_surface_color_target; + case CELL_GCM_SURFACE_TARGET_1: + { + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_b, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); + break; + } + case CELL_GCM_SURFACE_TARGET_MRT1: + { + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); + } + break; + case CELL_GCM_SURFACE_TARGET_MRT2: + { + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttC, &rttViewDesc, Handle); + break; + } + case CELL_GCM_SURFACE_TARGET_MRT3: + { + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1,address_b, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttC, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttD = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 3, address_d, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttD, &rttViewDesc, Handle); + break; + } + } + + ID3D12Resource *ds = m_rtts.bindAddressAsDepthStencil(m_device, copycmdlist, address_z, RSXThread::m_width, RSXThread::m_height, m_surface_depth_format, 1., 0); + + D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {}; + switch (m_surface_depth_format) + { + case 0: + break; + case CELL_GCM_SURFACE_Z16: + depthStencilViewDesc.Format = DXGI_FORMAT_D16_UNORM; + break; + case CELL_GCM_SURFACE_Z24S8: + depthStencilViewDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + break; + default: + LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); + assert(0); + } + depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + m_device->CreateDepthStencilView(ds, &depthStencilViewDesc, m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); + + check(copycmdlist->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)©cmdlist); } @@ -519,10 +593,10 @@ void D3D12GSRender::ExecCMD(u32 cmd) // TODO: Merge depth and stencil clear when possible if (m_clear_surface_mask & 0x1) - commandList->ClearDepthStencilView(m_fbo->getDSVCPUHandle(), D3D12_CLEAR_FLAG_DEPTH, m_clear_surface_z / (float)0xffffff, 0, 0, nullptr); + commandList->ClearDepthStencilView(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_DEPTH, m_clear_surface_z / (float)0xffffff, 0, 0, nullptr); if (m_clear_surface_mask & 0x2) - commandList->ClearDepthStencilView(m_fbo->getDSVCPUHandle(), D3D12_CLEAR_FLAG_STENCIL, 0.f, m_clear_surface_s, 0, nullptr); + commandList->ClearDepthStencilView(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_STENCIL, 0.f, m_clear_surface_s, 0, nullptr); if (m_clear_surface_mask & 0xF0) { @@ -533,30 +607,37 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f }; + + D3D12_CPU_DESCRIPTOR_HANDLE handle = m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + size_t g_RTTIncrement = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); switch (m_surface_color_target) { case CELL_GCM_SURFACE_TARGET_NONE: break; case CELL_GCM_SURFACE_TARGET_0: - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(0), clearColor, 0, nullptr); - break; case CELL_GCM_SURFACE_TARGET_1: - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(1), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); break; case CELL_GCM_SURFACE_TARGET_MRT1: - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(0), clearColor, 0, nullptr); - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(1), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + handle.ptr += g_RTTIncrement; + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); break; case CELL_GCM_SURFACE_TARGET_MRT2: - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(0), clearColor, 0, nullptr); - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(1), clearColor, 0, nullptr); - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(2), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + handle.ptr += g_RTTIncrement; + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + handle.ptr += g_RTTIncrement; + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); break; case CELL_GCM_SURFACE_TARGET_MRT3: - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(0), clearColor, 0, nullptr); - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(1), clearColor, 0, nullptr); - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(2), clearColor, 0, nullptr); - commandList->ClearRenderTargetView(m_fbo->getRTTCPUHandle(3), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + handle.ptr += g_RTTIncrement; + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + handle.ptr += g_RTTIncrement; + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + handle.ptr += g_RTTIncrement; + commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); break; default: LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); @@ -823,28 +904,30 @@ void D3D12GSRender::ExecCMD() m_perFrameStorage.m_currentTextureIndex += usedTexture; - D3D12_CPU_DESCRIPTOR_HANDLE *DepthStencilHandle = &m_fbo->getDSVCPUHandle(); + size_t numRTT; switch (m_surface_color_target) { case CELL_GCM_SURFACE_TARGET_NONE: break; case CELL_GCM_SURFACE_TARGET_0: - commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(0), true, DepthStencilHandle); - break; case CELL_GCM_SURFACE_TARGET_1: - commandList->OMSetRenderTargets(1, &m_fbo->getRTTCPUHandle(1), true, DepthStencilHandle); + numRTT = 1; break; case CELL_GCM_SURFACE_TARGET_MRT1: - commandList->OMSetRenderTargets(2, &m_fbo->getRTTCPUHandle(0), true, DepthStencilHandle); + numRTT = 2; break; case CELL_GCM_SURFACE_TARGET_MRT2: - commandList->OMSetRenderTargets(3, &m_fbo->getRTTCPUHandle(0), true, DepthStencilHandle); + numRTT = 3; break; case CELL_GCM_SURFACE_TARGET_MRT3: - commandList->OMSetRenderTargets(4, &m_fbo->getRTTCPUHandle(0), true, DepthStencilHandle); + numRTT = 4; break; default: LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); } + + D3D12_CPU_DESCRIPTOR_HANDLE *DepthStencilHandle = &m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + commandList->OMSetRenderTargets(numRTT, &m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(), true, DepthStencilHandle); + D3D12_VIEWPORT viewport = { 0.f, @@ -1231,7 +1314,7 @@ void D3D12GSRender::Flip() barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[1].Transition.pResource = m_fbo->getRenderTargetTexture(0); + barriers[1].Transition.pResource = m_rtts.m_currentlyBoundRenderTargets[0]; barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; @@ -1239,7 +1322,7 @@ void D3D12GSRender::Flip() D3D12_TEXTURE_COPY_LOCATION src = {}, dst = {}; src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; src.SubresourceIndex = 0, dst.SubresourceIndex = 0; - src.pResource = m_fbo->getRenderTargetTexture(0), dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; + src.pResource = m_rtts.m_currentlyBoundRenderTargets[0], dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; D3D12_BOX box = { 0, 0, 0, RSXThread::m_width, RSXThread::m_height, 1 }; commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, &box); @@ -1263,8 +1346,8 @@ void D3D12GSRender::Flip() WaitForSingleObject(handle, INFINITE); CloseHandle(handle); m_perFrameStorage.Reset(); - m_texturesRTTs.clear(); m_texturesCache.clear(); + m_texturesRTTs.clear(); m_frame->Flip(nullptr); } @@ -1433,7 +1516,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Texture2D.MipLevels = 1; srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - m_device->CreateShaderResourceView(m_fbo->getDepthStencilTexture(), &srvDesc, Handle); + m_device->CreateShaderResourceView(m_rtts.m_currentlyBoundDepthStencil, &srvDesc, Handle); Handle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; uavDesc.Format = DXGI_FORMAT_R8_UNORM; @@ -1441,7 +1524,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) m_device->CreateUnorderedAccessView(depthConverted, nullptr, &uavDesc, Handle); // Convert - convertCommandList->ResourceBarrier(1, &getResourceBarrierTransition(m_fbo->getDepthStencilTexture(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ)); + convertCommandList->ResourceBarrier(1, &getResourceBarrierTransition(m_rtts.m_currentlyBoundDepthStencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ)); convertCommandList->SetPipelineState(m_convertPSO); convertCommandList->SetComputeRootSignature(m_convertRootSignature); @@ -1456,7 +1539,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) D3D12_RESOURCE_BARRIER barriers[] = { - getResourceBarrierTransition(m_fbo->getDepthStencilTexture(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE), + getResourceBarrierTransition(m_rtts.m_currentlyBoundDepthStencil, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE), uavbarrier, }; convertCommandList->ResourceBarrier(2, barriers); @@ -1498,29 +1581,29 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) break; case CELL_GCM_SURFACE_TARGET_0: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_fbo->getRenderTargetTexture(0), downloadCommandList); + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); break; case CELL_GCM_SURFACE_TARGET_1: - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_fbo->getRenderTargetTexture(1), downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); break; case CELL_GCM_SURFACE_TARGET_MRT1: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_fbo->getRenderTargetTexture(0), downloadCommandList); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_fbo->getRenderTargetTexture(1), downloadCommandList); + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); break; case CELL_GCM_SURFACE_TARGET_MRT2: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_fbo->getRenderTargetTexture(0), downloadCommandList); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_fbo->getRenderTargetTexture(1), downloadCommandList); - if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_fbo->getRenderTargetTexture(2), downloadCommandList); + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); + if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); break; case CELL_GCM_SURFACE_TARGET_MRT3: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_fbo->getRenderTargetTexture(0), downloadCommandList); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_fbo->getRenderTargetTexture(1), downloadCommandList); - if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_fbo->getRenderTargetTexture(2), downloadCommandList); - if (m_context_dma_color_b) rtt3 = writeColorBuffer(m_fbo->getRenderTargetTexture(3), downloadCommandList); + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); + if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); + if (m_context_dma_color_b) rtt3 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[3], downloadCommandList); break; } if (needTransfer) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index f28bc91bbd..6db8df1972 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -48,7 +48,7 @@ class D3D12GSRender : public GSRender { private: // Copy of RTT to be used as texture - std::unordered_map > m_texturesRTTs; + std::unordered_map m_texturesRTTs; std::unordered_map m_texturesCache; // std::vector m_post_draw_objs; @@ -116,8 +116,9 @@ private: bool m_forcedIndexBuffer; size_t indexCount; + RenderTargets m_rtts; + std::vector m_IASet; - D3D12RenderTargetSets *m_fbo; ID3D12Device* m_device; ID3D12CommandQueue *m_commandQueueCopy; ID3D12CommandQueue *m_commandQueueGraphic; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index b25842192c..2ddc025065 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -9,7 +9,106 @@ #include "Emu/System.h" #include "Emu/RSX/GSRender.h" -D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDepthFormat, size_t width, size_t height, float clearColor[4], float clearDepth) +#include "D3D12.h" + +ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, size_t slot, u32 address, + size_t width, size_t height, float clearColorR, float clearColorG, float clearColorB, float clearColorA) +{ + ID3D12Resource* rtt; + auto It = m_renderTargets.find(address); + // TODO: Check if sizes match + if (It != m_renderTargets.end()) + { + rtt = It->second; + cmdList->ResourceBarrier(1, &getResourceBarrierTransition(rtt, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_RENDER_TARGET)); + } + else + { + LOG_WARNING(RSX, "Creating RTT"); + D3D12_CLEAR_VALUE clearColorValue = {}; + clearColorValue.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + clearColorValue.Color[0] = clearColorR; + clearColorValue.Color[1] = clearColorG; + clearColorValue.Color[2] = clearColorB; + clearColorValue.Color[3] = clearColorA; + + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; + + D3D12_RESOURCE_DESC resourceDesc = getTexture2DResourceDesc(width, height, DXGI_FORMAT_R8G8B8A8_UNORM); + resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_RENDER_TARGET, + &clearColorValue, + IID_PPV_ARGS(&rtt) + ); + m_renderTargets[address] = rtt; + } + m_currentlyBoundRenderTargetsAddress[slot] = address; + m_currentlyBoundRenderTargets[slot] = rtt; + return rtt; +} + +ID3D12Resource * RenderTargets::bindAddressAsDepthStencil(ID3D12Device * device, ID3D12GraphicsCommandList * cmdList, u32 address, size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear) +{ + ID3D12Resource* ds; + auto It = m_depthStencil.find(address); + // TODO: Check if sizes and surface depth format match + + if (It != m_depthStencil.end()) + { + ds = It->second; + cmdList->ResourceBarrier(1, &getResourceBarrierTransition(ds, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE)); + } + else + { + D3D12_CLEAR_VALUE clearDepthValue = {}; + clearDepthValue.DepthStencil.Depth = depthClear; + + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; + + DXGI_FORMAT dxgiFormat; + switch (surfaceDepthFormat) + { + case 0: + break; + case CELL_GCM_SURFACE_Z16: + dxgiFormat = DXGI_FORMAT_R16_TYPELESS; + clearDepthValue.Format = DXGI_FORMAT_D16_UNORM; + break; + case CELL_GCM_SURFACE_Z24S8: + dxgiFormat = DXGI_FORMAT_R24G8_TYPELESS; + clearDepthValue.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + break; + default: + LOG_ERROR(RSX, "Bad depth format! (%d)", surfaceDepthFormat); + assert(0); + } + + D3D12_RESOURCE_DESC resourceDesc = getTexture2DResourceDesc(width, height, dxgiFormat); + resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + + device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_DEPTH_WRITE, + &clearDepthValue, + IID_PPV_ARGS(&ds) + ); + m_depthStencil[address] = ds; + } + m_currentlyBoundDepthStencil = ds; + m_currentlyBoundDepthStencilAddress = address; + return ds; +} + +void RenderTargets::Init(ID3D12Device *device)//, u8 surfaceDepthFormat, size_t width, size_t height, float clearColor[4], float clearDepth) { D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; descriptorHeapDesc.NumDescriptors = 1; @@ -18,140 +117,21 @@ D3D12RenderTargetSets::D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDep descriptorHeapDesc.NumDescriptors = 4; descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; - device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_rttDescriptorHeap)); + device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_renderTargetsDescriptorsHeap)); - D3D12_CLEAR_VALUE clearDepthValue = {}; - clearDepthValue.DepthStencil.Depth = clearDepth; - - // Every resource are committed for simplicity, later we could use heap - D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; - D3D12_RESOURCE_DESC resourceDesc = {}; - resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; - resourceDesc.Width = (UINT)width; - resourceDesc.Height = (UINT)height; - resourceDesc.SampleDesc.Count = 1; - resourceDesc.DepthOrArraySize = 1; - - switch (surfaceDepthFormat) - { - case 0: - break; - case CELL_GCM_SURFACE_Z16: - resourceDesc.Format = DXGI_FORMAT_R16_TYPELESS; - clearDepthValue.Format = DXGI_FORMAT_D16_UNORM; - break; - case CELL_GCM_SURFACE_Z24S8: - resourceDesc.Format = DXGI_FORMAT_R24G8_TYPELESS; - clearDepthValue.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - break; - default: - LOG_ERROR(RSX, "Bad depth format! (%d)", surfaceDepthFormat); - assert(0); - } - - device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resourceDesc, - D3D12_RESOURCE_STATE_DEPTH_WRITE, - &clearDepthValue, - IID_PPV_ARGS(&m_depthStencilTexture) - ); - D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {}; - switch (surfaceDepthFormat) - { - case 0: - break; - case CELL_GCM_SURFACE_Z16: - depthStencilViewDesc.Format = DXGI_FORMAT_D16_UNORM; - break; - case CELL_GCM_SURFACE_Z24S8: - depthStencilViewDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - break; - default: - LOG_ERROR(RSX, "Bad depth format! (%d)", surfaceDepthFormat); - assert(0); - } - depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; - device->CreateDepthStencilView(m_depthStencilTexture, &depthStencilViewDesc, m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); - - D3D12_CLEAR_VALUE clearColorValue = {}; - clearColorValue.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - clearColorValue.Color[0] = clearColor[0]; - clearColorValue.Color[1] = clearColor[1]; - clearColorValue.Color[2] = clearColor[2]; - clearColorValue.Color[3] = clearColor[3]; - g_RTTIncrement = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rttDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - for (int i = 0; i < 4; ++i) - { - D3D12_RESOURCE_DESC resourceDesc = {}; - resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - resourceDesc.Width = (UINT)width; - resourceDesc.Height = (UINT)height; - resourceDesc.DepthOrArraySize = 1; - resourceDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - resourceDesc.SampleDesc.Count = 1; - - device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &resourceDesc, - D3D12_RESOURCE_STATE_RENDER_TARGET, - &clearColorValue, - IID_PPV_ARGS(&m_rtts[i]) - ); - - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - - device->CreateRenderTargetView(m_rtts[i], &rttViewDesc, Handle); - Handle.ptr += g_RTTIncrement; - } - - /*if (!m_set_surface_clip_horizontal) - { - m_surface_clip_x = 0; - m_surface_clip_w = RSXThread::m_width; - } - - if (!m_set_surface_clip_vertical) - { - m_surface_clip_y = 0; - m_surface_clip_h = RSXThread::m_height; - }*/ + memset(m_currentlyBoundRenderTargetsAddress, 0, 4 * sizeof(u32)); + memset(m_currentlyBoundRenderTargets, 0, 4 * sizeof(ID3D12Resource*)); + m_currentlyBoundDepthStencil = nullptr; + m_currentlyBoundDepthStencilAddress = 0; } -D3D12RenderTargetSets::~D3D12RenderTargetSets() +void RenderTargets::Release() { - for (unsigned i = 0; i < 4; i++) - m_rtts[i]->Release(); - m_rttDescriptorHeap->Release(); - m_depthStencilTexture->Release(); + for (auto tmp : m_renderTargets) + tmp.second->Release(); m_depthStencilDescriptorHeap->Release(); -} - -D3D12_CPU_DESCRIPTOR_HANDLE D3D12RenderTargetSets::getRTTCPUHandle(u8 baseFBO) const -{ - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rttDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += baseFBO * g_RTTIncrement; - return Handle; -} - -D3D12_CPU_DESCRIPTOR_HANDLE D3D12RenderTargetSets::getDSVCPUHandle() const -{ - return m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); -} -ID3D12Resource * D3D12RenderTargetSets::getRenderTargetTexture(u8 Id) const -{ - return m_rtts[Id]; -} -ID3D12Resource * D3D12RenderTargetSets::getDepthStencilTexture() const -{ - return m_depthStencilTexture; + m_renderTargetsDescriptorsHeap->Release(); + for (auto tmp : m_depthStencil) + tmp.second->Release(); } #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 89e9792904..493a3d8c21 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -3,30 +3,29 @@ #if defined(DX12_SUPPORT) #include -/** - * Class that embeds a RenderTargetDescriptor view and eventually a DepthStencil Descriptor View. - * Used to imitate OpenGL FrameBuffer concept. - */ -class D3D12RenderTargetSets +struct RenderTargets { - size_t g_RTTIncrement; - ID3D12Resource *m_depthStencilTexture; - ID3D12Resource *m_rtts[4]; - ID3D12DescriptorHeap *m_rttDescriptorHeap; + std::unordered_map m_renderTargets; + ID3D12Resource *m_currentlyBoundRenderTargets[4]; + u32 m_currentlyBoundRenderTargetsAddress[4]; + std::unordered_map m_depthStencil; + ID3D12Resource *m_currentlyBoundDepthStencil; + u32 m_currentlyBoundDepthStencilAddress; + ID3D12DescriptorHeap *m_renderTargetsDescriptorsHeap; ID3D12DescriptorHeap *m_depthStencilDescriptorHeap; -public: - D3D12RenderTargetSets(ID3D12Device *device, u8 surfaceDepthFormat, size_t width, size_t height, float clearColor[4], float clearDepth); - ~D3D12RenderTargetSets(); - /** - * Return the base descriptor address for the give surface target. - * All rtt's view descriptor are contigous. - */ - D3D12_CPU_DESCRIPTOR_HANDLE getRTTCPUHandle(u8 baseFBO) const; - D3D12_CPU_DESCRIPTOR_HANDLE getDSVCPUHandle() const; - ID3D12Resource *getRenderTargetTexture(u8 Id) const; - ID3D12Resource *getDepthStencilTexture() const; - u32 m_address_color_a, m_address_color_b, m_address_color_c, m_address_color_d, m_address_z; - u32 m_target_type; + /** + * If render target already exists at address, issue state change operation on cmdList. + * Otherwise create one with width, height, clearColor info. + * returns the corresponding render target resource. + */ + ID3D12Resource *bindAddressAsRenderTargets(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, size_t slot, u32 address, + size_t width, size_t height, float clearColorR, float clearColorG, float clearColorB, float clearColorA); + + ID3D12Resource *bindAddressAsDepthStencil(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address, + size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear); + + void Init(ID3D12Device *device); + void Release(); }; #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 535125da3d..879fc06677 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -166,11 +166,11 @@ size_t D3D12GSRender::UploadTextures() } ID3D12Resource *vramTexture; - std::unordered_map >::const_iterator ItRTT = m_texturesRTTs.find(texaddr); + std::unordered_map::const_iterator ItRTT = m_rtts.m_renderTargets.find(texaddr); std::unordered_map::const_iterator ItCache = m_texturesCache.find(texaddr); - if (ItRTT != m_texturesRTTs.end()) + if (ItRTT != m_rtts.m_renderTargets.end()) { - vramTexture = ItRTT->second.Get(); + vramTexture = ItRTT->second; } else if (ItCache != m_texturesCache.end()) { From aac93b906377fcd024ca2a6bdcaef952296b0b8b Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 31 May 2015 20:24:10 +0200 Subject: [PATCH 176/343] d3d12: Clean code + fix warnings --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 297 +------------------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 +- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 8 +- 3 files changed, 7 insertions(+), 302 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 134a03d67d..1b36ce99b9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -926,7 +926,7 @@ void D3D12GSRender::ExecCMD() } D3D12_CPU_DESCRIPTOR_HANDLE *DepthStencilHandle = &m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - commandList->OMSetRenderTargets(numRTT, &m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(), true, DepthStencilHandle); + commandList->OMSetRenderTargets((UINT)numRTT, &m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(), true, DepthStencilHandle); D3D12_VIEWPORT viewport = { @@ -996,301 +996,6 @@ void D3D12GSRender::ExecCMD() m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); m_indexed_array.Reset(); WriteDepthBuffer(); - -/* if (m_set_color_mask) - { - glColorMask(m_color_mask_r, m_color_mask_g, m_color_mask_b, m_color_mask_a); - checkForGlError("glColorMask"); - } - - if (!m_indexed_array.m_count && !m_draw_array_count) - { - u32 min_vertex_size = ~0; - for (auto &i : m_vertex_data) - { - if (!i.size) - continue; - - u32 vertex_size = i.data.size() / (i.size * i.GetTypeSize()); - - if (min_vertex_size > vertex_size) - min_vertex_size = vertex_size; - } - - m_draw_array_count = min_vertex_size; - m_draw_array_first = 0; - } - - Enable(m_set_depth_test, GL_DEPTH_TEST); - Enable(m_set_alpha_test, GL_ALPHA_TEST); - Enable(m_set_blend || m_set_blend_mrt1 || m_set_blend_mrt2 || m_set_blend_mrt3, GL_BLEND); - Enable(m_set_scissor_horizontal && m_set_scissor_vertical, GL_SCISSOR_TEST); - Enable(m_set_logic_op, GL_LOGIC_OP); - Enable(m_set_cull_face, GL_CULL_FACE); - Enable(m_set_dither, GL_DITHER); - Enable(m_set_stencil_test, GL_STENCIL_TEST); - Enable(m_set_line_smooth, GL_LINE_SMOOTH); - Enable(m_set_poly_smooth, GL_POLYGON_SMOOTH); - Enable(m_set_point_sprite_control, GL_POINT_SPRITE); - Enable(m_set_specular, GL_LIGHTING); - Enable(m_set_poly_offset_fill, GL_POLYGON_OFFSET_FILL); - Enable(m_set_poly_offset_line, GL_POLYGON_OFFSET_LINE); - Enable(m_set_poly_offset_point, GL_POLYGON_OFFSET_POINT); - Enable(m_set_restart_index, GL_PRIMITIVE_RESTART); - Enable(m_set_line_stipple, GL_LINE_STIPPLE); - Enable(m_set_polygon_stipple, GL_POLYGON_STIPPLE); - - if (m_set_clip_plane) - { - Enable(m_clip_plane_0, GL_CLIP_PLANE0); - Enable(m_clip_plane_1, GL_CLIP_PLANE1); - Enable(m_clip_plane_2, GL_CLIP_PLANE2); - Enable(m_clip_plane_3, GL_CLIP_PLANE3); - Enable(m_clip_plane_4, GL_CLIP_PLANE4); - Enable(m_clip_plane_5, GL_CLIP_PLANE5); - - checkForGlError("m_set_clip_plane"); - } - - checkForGlError("glEnable"); - - if (m_set_front_polygon_mode) - { - glPolygonMode(GL_FRONT, m_front_polygon_mode); - checkForGlError("glPolygonMode(Front)"); - } - - if (m_set_back_polygon_mode) - { - glPolygonMode(GL_BACK, m_back_polygon_mode); - checkForGlError("glPolygonMode(Back)"); - } - - if (m_set_point_size) - { - glPointSize(m_point_size); - checkForGlError("glPointSize"); - } - - if (m_set_poly_offset_mode) - { - glPolygonOffset(m_poly_offset_scale_factor, m_poly_offset_bias); - checkForGlError("glPolygonOffset"); - } - - if (m_set_logic_op) - { - glLogicOp(m_logic_op); - checkForGlError("glLogicOp"); - } - - if (m_set_scissor_horizontal && m_set_scissor_vertical) - { - glScissor(m_scissor_x, m_scissor_y, m_scissor_w, m_scissor_h); - checkForGlError("glScissor"); - } - - if (m_set_two_sided_stencil_test_enable) - { - if (m_set_stencil_fail && m_set_stencil_zfail && m_set_stencil_zpass) - { - glStencilOpSeparate(GL_FRONT, m_stencil_fail, m_stencil_zfail, m_stencil_zpass); - checkForGlError("glStencilOpSeparate"); - } - - if (m_set_stencil_mask) - { - glStencilMaskSeparate(GL_FRONT, m_stencil_mask); - checkForGlError("glStencilMaskSeparate"); - } - - if (m_set_stencil_func && m_set_stencil_func_ref && m_set_stencil_func_mask) - { - glStencilFuncSeparate(GL_FRONT, m_stencil_func, m_stencil_func_ref, m_stencil_func_mask); - checkForGlError("glStencilFuncSeparate"); - } - - if (m_set_back_stencil_fail && m_set_back_stencil_zfail && m_set_back_stencil_zpass) - { - glStencilOpSeparate(GL_BACK, m_back_stencil_fail, m_back_stencil_zfail, m_back_stencil_zpass); - checkForGlError("glStencilOpSeparate(GL_BACK)"); - } - - if (m_set_back_stencil_mask) - { - glStencilMaskSeparate(GL_BACK, m_back_stencil_mask); - checkForGlError("glStencilMaskSeparate(GL_BACK)"); - } - - if (m_set_back_stencil_func && m_set_back_stencil_func_ref && m_set_back_stencil_func_mask) - { - glStencilFuncSeparate(GL_BACK, m_back_stencil_func, m_back_stencil_func_ref, m_back_stencil_func_mask); - checkForGlError("glStencilFuncSeparate(GL_BACK)"); - } - } - else - { - if (m_set_stencil_fail && m_set_stencil_zfail && m_set_stencil_zpass) - { - glStencilOp(m_stencil_fail, m_stencil_zfail, m_stencil_zpass); - checkForGlError("glStencilOp"); - } - - if (m_set_stencil_mask) - { - glStencilMask(m_stencil_mask); - checkForGlError("glStencilMask"); - } - - if (m_set_stencil_func && m_set_stencil_func_ref && m_set_stencil_func_mask) - { - glStencilFunc(m_stencil_func, m_stencil_func_ref, m_stencil_func_mask); - checkForGlError("glStencilFunc"); - } - } - - // TODO: Use other glLightModel functions? - glLightModeli(GL_LIGHT_MODEL_TWO_SIDE, m_set_two_side_light_enable ? GL_TRUE : GL_FALSE); - checkForGlError("glLightModeli"); - - if (m_set_shade_mode) - { - glShadeModel(m_shade_mode); - checkForGlError("glShadeModel"); - } - - if (m_set_depth_mask) - { - glDepthMask(m_depth_mask); - checkForGlError("glDepthMask"); - } - - if (m_set_depth_func) - { - glDepthFunc(m_depth_func); - checkForGlError("glDepthFunc"); - } - - if (m_set_depth_bounds && !is_intel_vendor) - { - glDepthBoundsEXT(m_depth_bounds_min, m_depth_bounds_max); - checkForGlError("glDepthBounds"); - } - - if (m_set_clip) - { - glDepthRangef(m_clip_min, m_clip_max); - checkForGlError("glDepthRangef"); - } - - if (m_set_line_width) - { - glLineWidth(m_line_width); - checkForGlError("glLineWidth"); - } - - if (m_set_line_stipple) - { - glLineStipple(m_line_stipple_factor, m_line_stipple_pattern); - checkForGlError("glLineStipple"); - } - - if (m_set_polygon_stipple) - { - glPolygonStipple((const GLubyte*)m_polygon_stipple_pattern); - checkForGlError("glPolygonStipple"); - } - - if (m_set_blend_equation) - { - glBlendEquationSeparate(m_blend_equation_rgb, m_blend_equation_alpha); - checkForGlError("glBlendEquationSeparate"); - } - - if (m_set_blend_sfactor && m_set_blend_dfactor) - { - glBlendFuncSeparate(m_blend_sfactor_rgb, m_blend_dfactor_rgb, m_blend_sfactor_alpha, m_blend_dfactor_alpha); - checkForGlError("glBlendFuncSeparate"); - } - - if (m_set_blend_color) - { - glBlendColor(m_blend_color_r, m_blend_color_g, m_blend_color_b, m_blend_color_a); - checkForGlError("glBlendColor"); - } - - if (m_set_cull_face) - { - glCullFace(m_cull_face); - checkForGlError("glCullFace"); - } - - if (m_set_front_face) - { - glFrontFace(m_front_face); - checkForGlError("glFrontFace"); - } - - if (m_set_alpha_func && m_set_alpha_ref) - { - glAlphaFunc(m_alpha_func, m_alpha_ref); - checkForGlError("glAlphaFunc"); - } - - if (m_set_fog_mode) - { - glFogi(GL_FOG_MODE, m_fog_mode); - checkForGlError("glFogi(GL_FOG_MODE)"); - } - - if (m_set_fog_params) - { - glFogf(GL_FOG_START, m_fog_param0); - checkForGlError("glFogf(GL_FOG_START)"); - glFogf(GL_FOG_END, m_fog_param1); - checkForGlError("glFogf(GL_FOG_END)"); - } - - if (m_set_restart_index) - { - glPrimitiveRestartIndex(m_restart_index); - checkForGlError("glPrimitiveRestartIndex"); - } - - if (m_indexed_array.m_count && m_draw_array_count) - { - LOG_WARNING(RSX, "m_indexed_array.m_count && draw_array_count"); - } - - for (u32 i = 0; i < m_textures_count; ++i) - { - if (!m_textures[i].IsEnabled()) continue; - - glActiveTexture(GL_TEXTURE0 + i); - checkForGlError("glActiveTexture"); - m_gl_textures[i].Create(); - m_gl_textures[i].Bind(); - checkForGlError(fmt::Format("m_gl_textures[%d].Bind", i)); - m_program.SetTex(i); - m_gl_textures[i].Init(m_textures[i]); - checkForGlError(fmt::Format("m_gl_textures[%d].Init", i)); - } - - for (u32 i = 0; i < m_textures_count; ++i) - { - if (!m_vertex_textures[i].IsEnabled()) continue; - - glActiveTexture(GL_TEXTURE0 + m_textures_count + i); - checkForGlError("glActiveTexture"); - m_gl_vertex_textures[i].Create(); - m_gl_vertex_textures[i].Bind(); - checkForGlError(fmt::Format("m_gl_vertex_textures[%d].Bind", i)); - m_program.SetVTex(i); - m_gl_vertex_textures[i].Init(m_vertex_textures[i]); - checkForGlError(fmt::Format("m_gl_vertex_textures[%d].Init", i)); - }*/ - -// WriteBuffers(); } void D3D12GSRender::Flip() diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 6db8df1972..051e97fe58 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -98,7 +98,7 @@ private: struct UAVHeap { ID3D12Heap *m_heap; - std::atomic m_putPos, // Start of free space + std::atomic m_putPos, // Start of free space m_getPos; // End of free space }; @@ -107,7 +107,7 @@ private: struct ReadbackHeap { ID3D12Heap *m_heap; - std::atomic m_putPos, // Start of free space + std::atomic m_putPos, // Start of free space m_getPos; // End of free space }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 879fc06677..a83ec52212 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -304,11 +304,11 @@ size_t D3D12GSRender::UploadTextures() samplerDesc.AddressV = GetWrap(m_textures[i].GetWrapT()); samplerDesc.AddressW = GetWrap(m_textures[i].GetWrapR()); samplerDesc.ComparisonFunc = ComparisonFunc[m_textures[i].GetZfunc()]; - samplerDesc.MaxAnisotropy = GetMaxAniso(m_textures[i].GetMaxAniso()); + samplerDesc.MaxAnisotropy = (UINT)GetMaxAniso(m_textures[i].GetMaxAniso()); samplerDesc.MipLODBias = m_textures[i].GetBias(); - samplerDesc.BorderColor[4] = m_textures[i].GetBorderColor(); - samplerDesc.MinLOD = m_textures[i].GetMinLOD() >> 8; - samplerDesc.MaxLOD = m_textures[i].GetMaxLOD() >> 8; + samplerDesc.BorderColor[4] = (FLOAT)m_textures[i].GetBorderColor(); + samplerDesc.MinLOD = (FLOAT)(m_textures[i].GetMinLOD() >> 8); + samplerDesc.MaxLOD = (FLOAT)(m_textures[i].GetMaxLOD() >> 8); Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); m_device->CreateSampler(&samplerDesc, Handle); From e454f59f97e51569db3da761301e0ffe85ee600f Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 31 May 2015 20:30:21 +0200 Subject: [PATCH 177/343] d3d12: Fix depth buffer not being transitionned to generic read --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 192 ------------------ rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 154 ++++++++++++++ 2 files changed, 154 insertions(+), 192 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 1b36ce99b9..3a16c68310 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -360,198 +360,6 @@ void D3D12GSRender::Close() m_frame->Hide(); } -static -void copyFBO(ID3D12Device* device, ID3D12Resource *rtt, ID3D12GraphicsCommandList *cmdList, - std::unordered_map > &texturesRTTs, - u32 ¤tFBOAddress, u32 newAddress, size_t width, size_t height) -{ - // TODO : move to texture heap - Microsoft::WRL::ComPtr Texture; - D3D12_HEAP_PROPERTIES hp = {}; - hp.Type = D3D12_HEAP_TYPE_DEFAULT; - check( - device->CreateCommittedResource( - &hp, - D3D12_HEAP_FLAG_NONE, - &getTexture2DResourceDesc(width, height, DXGI_FORMAT_R8G8B8A8_UNORM), - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&Texture) - ) - ); - - cmdList->ResourceBarrier(1, &getResourceBarrierTransition(rtt, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); - - D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.pResource = Texture.Get(); - src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = rtt; - - cmdList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - - D3D12_RESOURCE_BARRIER barriers[2] = - { - getResourceBarrierTransition(rtt, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET), - getResourceBarrierTransition(Texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ), - }; - cmdList->ResourceBarrier(2, barriers); - - texturesRTTs[currentFBOAddress] = Texture; -} - -void D3D12GSRender::InitDrawBuffers() -{ - // FBO location has changed, previous data might be copied - u32 address_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - u32 address_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); - u32 address_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); - u32 address_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); - u32 address_z = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); - - ID3D12GraphicsCommandList *copycmdlist; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); - m_perFrameStorage.m_inflightCommandList.push_back(copycmdlist); - - // Make previous RTTs sampleable - for (unsigned i = 0; i < 4; i++) - { - if (m_rtts.m_currentlyBoundRenderTargets[i] == nullptr) - continue; - copycmdlist->ResourceBarrier(1, &getResourceBarrierTransition(m_rtts.m_currentlyBoundRenderTargets[i], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ)); - m_rtts.m_renderTargets[m_rtts.m_currentlyBoundRenderTargetsAddress[i]] = m_rtts.m_currentlyBoundRenderTargets[i]; - } - - memset(m_rtts.m_currentlyBoundRenderTargetsAddress, 0, 4 * sizeof(u32)); - memset(m_rtts.m_currentlyBoundRenderTargets, 0, 4 * sizeof(ID3D12Resource *)); - m_rtts.m_currentlyBoundDepthStencil = nullptr; - m_rtts.m_currentlyBoundDepthStencilAddress = 0; - - - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - size_t g_RTTIncrement = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - - switch (m_surface_color_target) - { - case CELL_GCM_SURFACE_TARGET_0: - { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0,address_a, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); - break; - } - case CELL_GCM_SURFACE_TARGET_1: - { - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_b, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); - break; - } - case CELL_GCM_SURFACE_TARGET_MRT1: - { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); - Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); - } - break; - case CELL_GCM_SURFACE_TARGET_MRT2: - { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); - Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); - Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttC, &rttViewDesc, Handle); - break; - } - case CELL_GCM_SURFACE_TARGET_MRT3: - { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); - Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1,address_b, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); - Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttC, &rttViewDesc, Handle); - Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttD = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 3, address_d, RSXThread::m_width, RSXThread::m_height, - m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateRenderTargetView(rttD, &rttViewDesc, Handle); - break; - } - } - - ID3D12Resource *ds = m_rtts.bindAddressAsDepthStencil(m_device, copycmdlist, address_z, RSXThread::m_width, RSXThread::m_height, m_surface_depth_format, 1., 0); - - D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {}; - switch (m_surface_depth_format) - { - case 0: - break; - case CELL_GCM_SURFACE_Z16: - depthStencilViewDesc.Format = DXGI_FORMAT_D16_UNORM; - break; - case CELL_GCM_SURFACE_Z24S8: - depthStencilViewDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - break; - default: - LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); - assert(0); - } - depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; - m_device->CreateDepthStencilView(ds, &depthStencilViewDesc, m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); - - check(copycmdlist->Close()); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)©cmdlist); -} - - void D3D12GSRender::OnInit() { m_frame->Show(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 2ddc025065..6ebea36b62 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -10,6 +10,160 @@ #include "Emu/RSX/GSRender.h" #include "D3D12.h" +#include "D3D12GSRender.h" + +void D3D12GSRender::InitDrawBuffers() +{ + // FBO location has changed, previous data might be copied + u32 address_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + u32 address_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + u32 address_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); + u32 address_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); + u32 address_z = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); + + ID3D12GraphicsCommandList *copycmdlist; + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); + m_perFrameStorage.m_inflightCommandList.push_back(copycmdlist); + + // Make previous RTTs sampleable + for (unsigned i = 0; i < 4; i++) + { + if (m_rtts.m_currentlyBoundRenderTargets[i] == nullptr) + continue; + copycmdlist->ResourceBarrier(1, &getResourceBarrierTransition(m_rtts.m_currentlyBoundRenderTargets[i], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ)); + } + // Same for depth buffer + if (m_rtts.m_currentlyBoundDepthStencil != nullptr) + copycmdlist->ResourceBarrier(1, &getResourceBarrierTransition(m_rtts.m_currentlyBoundDepthStencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ)); + + memset(m_rtts.m_currentlyBoundRenderTargetsAddress, 0, 4 * sizeof(u32)); + memset(m_rtts.m_currentlyBoundRenderTargets, 0, 4 * sizeof(ID3D12Resource *)); + m_rtts.m_currentlyBoundDepthStencil = nullptr; + m_rtts.m_currentlyBoundDepthStencilAddress = 0; + + + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + size_t g_RTTIncrement = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_0: + { + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); + break; + } + case CELL_GCM_SURFACE_TARGET_1: + { + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_b, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); + break; + } + case CELL_GCM_SURFACE_TARGET_MRT1: + { + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); + } + break; + case CELL_GCM_SURFACE_TARGET_MRT2: + { + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttC, &rttViewDesc, Handle); + break; + } + case CELL_GCM_SURFACE_TARGET_MRT3: + { + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttC, &rttViewDesc, Handle); + Handle.ptr += g_RTTIncrement; + ID3D12Resource *rttD = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 3, address_d, RSXThread::m_width, RSXThread::m_height, + m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); + rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateRenderTargetView(rttD, &rttViewDesc, Handle); + break; + } + } + + ID3D12Resource *ds = m_rtts.bindAddressAsDepthStencil(m_device, copycmdlist, address_z, RSXThread::m_width, RSXThread::m_height, m_surface_depth_format, 1., 0); + + D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {}; + switch (m_surface_depth_format) + { + case 0: + break; + case CELL_GCM_SURFACE_Z16: + depthStencilViewDesc.Format = DXGI_FORMAT_D16_UNORM; + break; + case CELL_GCM_SURFACE_Z24S8: + depthStencilViewDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + break; + default: + LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); + assert(0); + } + depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + m_device->CreateDepthStencilView(ds, &depthStencilViewDesc, m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); + + check(copycmdlist->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)©cmdlist); +} ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, size_t slot, u32 address, size_t width, size_t height, float clearColorR, float clearColorG, float clearColorB, float clearColorA) From fdf24545afe00652ce58e5c3a904033206e72cbd Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 31 May 2015 20:33:45 +0200 Subject: [PATCH 178/343] d3d12: Raise texture upload heap to make multi texture work The result is not in line with PS3 real output though --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3a16c68310..76fb34024c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -65,7 +65,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) // Texture D3D12_HEAP_DESC heapDescription = {}; - heapDescription.SizeInBytes = 1024 * 1024 * 64; + heapDescription.SizeInBytes = 1024 * 1024 * 256; heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); From 0be60890e7da5f1eada2497178eb1e40e7c320b5 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sun, 31 May 2015 23:55:47 +0800 Subject: [PATCH 179/343] d3d12: minor fixes --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 76fb34024c..753c28d3d9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1109,14 +1109,14 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) case CELL_GCM_SURFACE_TARGET_MRT2: if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); - if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); + if (m_context_dma_color_c) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); break; case CELL_GCM_SURFACE_TARGET_MRT3: if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); - if (m_context_dma_color_b) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); - if (m_context_dma_color_b) rtt3 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[3], downloadCommandList); + if (m_context_dma_color_c) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); + if (m_context_dma_color_d) rtt3 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[3], downloadCommandList); break; } if (needTransfer) @@ -1254,4 +1254,4 @@ void D3D12GSRender::semaphorePFIFOAcquire(u32 offset, u32 value) ); valueChangerThread.detach(); } -#endif \ No newline at end of file +#endif From 3aafb60629f818b8ebd16ac6e915ad4abfec3549 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 31 May 2015 21:52:22 +0200 Subject: [PATCH 180/343] d3d12: Fix offset in scale offset Depth Buffer should be correctly positionned in depth_read test --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 5955aaf556..69de6aadb4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -388,7 +388,7 @@ void D3D12GSRender::setScaleOffset() // Offset scaleOffsetMat[3] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 0)] - (RSXThread::m_width / RSXThread::m_width_scale); - scaleOffsetMat[7] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 1)] - (RSXThread::m_height / RSXThread::m_height_scale); + scaleOffsetMat[7] = -((float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 1)] - (RSXThread::m_height / RSXThread::m_height_scale)); scaleOffsetMat[11] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 2)]; scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; From 0b435afd1d2215cca30f19f3855671358075b0eb Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 31 May 2015 22:37:15 +0200 Subject: [PATCH 181/343] d3d12: Fix some texture format swizzles. --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 97 +++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index a83ec52212..fc10f050a8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -168,9 +168,11 @@ size_t D3D12GSRender::UploadTextures() ID3D12Resource *vramTexture; std::unordered_map::const_iterator ItRTT = m_rtts.m_renderTargets.find(texaddr); std::unordered_map::const_iterator ItCache = m_texturesCache.find(texaddr); + bool isRenderTarget = false; if (ItRTT != m_rtts.m_renderTargets.end()) { vramTexture = ItRTT->second; + isRenderTarget = true; } else if (ItCache != m_texturesCache.end()) { @@ -274,24 +276,99 @@ size_t D3D12GSRender::UploadTextures() srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Format = dxgiFormat; srvDesc.Texture2D.MipLevels = 1; - const int RemapValue[4] = + + switch (format) { - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0 - }; - if (format != CELL_GCM_TEXTURE_B8 && format != CELL_GCM_TEXTURE_X16 && format != CELL_GCM_TEXTURE_X32_FLOAT) + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + case CELL_GCM_TEXTURE_X32_FLOAT: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + default: + LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); + break; + case CELL_GCM_TEXTURE_D8R8G8B8: { + const int RemapValue[4] = + { + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0 + }; + u8 remap_a = m_textures[i].GetRemap() & 0x3; u8 remap_r = (m_textures[i].GetRemap() >> 2) & 0x3; u8 remap_g = (m_textures[i].GetRemap() >> 4) & 0x3; u8 remap_b = (m_textures[i].GetRemap() >> 6) & 0x3; - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(RemapValue[remap_a], RemapValue[remap_r], RemapValue[remap_g], RemapValue[remap_b]); + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + RemapValue[remap_a], + RemapValue[remap_r], + RemapValue[remap_g], + RemapValue[remap_b]); + break; + } + case CELL_GCM_TEXTURE_A8R8G8B8: + { + const int RemapValue[4] = + { + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0 + }; + + u8 remap_a = m_textures[i].GetRemap() & 0x3; + u8 remap_r = (m_textures[i].GetRemap() >> 2) & 0x3; + u8 remap_g = (m_textures[i].GetRemap() >> 4) & 0x3; + u8 remap_b = (m_textures[i].GetRemap() >> 6) & 0x3; + if (isRenderTarget) + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + else + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + RemapValue[remap_a], + RemapValue[remap_r], + RemapValue[remap_g], + RemapValue[remap_b]); + + break; + } + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + break; + case CELL_GCM_TEXTURE_B8: + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); + break; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + break; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + break; } - else - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(RemapValue[0], RemapValue[1], RemapValue[2], RemapValue[3]); D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); From 1c803e725b6edc188853c2dd809e415294bfd952 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 31 May 2015 22:58:28 +0200 Subject: [PATCH 182/343] d3d12: Do not overwrite hlsl file --- .gitignore | 3 +-- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 10 ++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 361bc51abd..309737a831 100644 --- a/.gitignore +++ b/.gitignore @@ -44,8 +44,7 @@ /bin/rpcs3.iobj /bin/FragmentProgram.txt /bin/VertexProgram.txt -/bin/FragmentProgram.hlsl -/bin/VertexProgram.hlsl +/bin/*.hlsl /bin/BreakPoints.dat /bin/textures /bin/*.lib diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 29e551a817..87756bc625 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -81,8 +81,9 @@ struct D3D12Traits } // TODO: This shouldn't use current dir - fs::file("./FragmentProgram.hlsl", o_write | o_create | o_trunc).write(shader.c_str(), shader.size()); - fragmentProgramData.Id = (u32)ID; + std::string filename = "./FragmentProgram" + std::to_string(ID) + ".hlsl"; + fs::file(filename, o_write | o_create | o_trunc).write(shader.c_str(), shader.size()); + fragmentProgramData.id = (u32)ID; } static @@ -93,8 +94,9 @@ struct D3D12Traits vertexProgramData.Compile(shaderCode, Shader::SHADER_TYPE::SHADER_TYPE_VERTEX); // TODO: This shouldn't use current dir - fs::file("./VertexProgram.hlsl", o_write | o_create | o_trunc).write(shaderCode.c_str(), shaderCode.size()); - vertexProgramData.Id = (u32)ID; + std::string filename = "./VertexProgram" + std::to_string(ID) + ".hlsl"; + fs::file(filename, o_write | o_create | o_trunc).write(shaderCode.c_str(), shaderCode.size()); + vertexProgramData.id = (u32)ID; } static From 043adab2cecd5a657cde729591586117d8a28221 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 00:02:32 +0200 Subject: [PATCH 183/343] d3d12: Fix fragment constant filling The command buffer does not necessarily set them in order... --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 69de6aadb4..05ef59f54d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -492,15 +492,22 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() { u32 vector[4]; // Is it assigned by color register in command buffer ? - if (!m_fragment_constants.empty() && offsetInFP == m_fragment_constants.front().id - m_cur_fragment_prog->offset) + // TODO : we loop every iteration, we might do better... + bool isCommandBufferSetConstant = false; + for (const RSXTransformConstant& c : m_fragment_constants) { - const RSXTransformConstant& c = m_fragment_constants.front(); - vector[0] = (u32&)c.x; - vector[1] = (u32&)c.y; - vector[2] = (u32&)c.z; - vector[3] = (u32&)c.w; + size_t fragmentId = c.id - m_cur_fragment_prog->offset; + if (fragmentId == offsetInFP) + { + isCommandBufferSetConstant = true; + vector[0] = (u32&)c.x; + vector[1] = (u32&)c.y; + vector[2] = (u32&)c.z; + vector[3] = (u32&)c.w; + break; + } } - else + if (!isCommandBufferSetConstant) { auto data = vm::ptr::make(m_cur_fragment_prog->addr + (u32)offsetInFP); From 6d9e542cde94c55b87783421f0bc495b2b9c7388 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 17:20:22 +0200 Subject: [PATCH 184/343] d3d12: Ignore files in bin/ --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 309737a831..ae718af172 100644 --- a/.gitignore +++ b/.gitignore @@ -31,7 +31,7 @@ *.wav /build - +/bin /libs /ipch /rpcs3/Debug From 25b10c5e3ee35a18f1a76101aaa8527b732d6120 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 17:44:26 +0200 Subject: [PATCH 185/343] d3d12: Consume less descriptor slot if not required --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 174 ++++++++++++----------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 5 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 24 ++-- 3 files changed, 110 insertions(+), 93 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 753c28d3d9..1d5714ea25 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -75,11 +75,12 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_textureStorage))); D3D12_DESCRIPTOR_HEAP_DESC textureDescriptorDesc = {}; - textureDescriptorDesc.NumDescriptors = 2048; // For safety + textureDescriptorDesc.NumDescriptors = 10000; // For safety textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; textureDescriptorDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_textureDescriptorsHeap))); + textureDescriptorDesc.NumDescriptors = 2048; // For safety textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap))); } @@ -230,55 +231,58 @@ D3D12GSRender::D3D12GSRender() m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[1])); m_device->CreateRenderTargetView(m_backBuffer[1], &rttDesc, m_backbufferAsRendertarget[1]->GetCPUDescriptorHandleForHeapStart()); - // Common root signature - D3D12_DESCRIPTOR_RANGE descriptorRange[4] = {}; - // Scale Offset data - descriptorRange[0].BaseShaderRegister = 0; - descriptorRange[0].NumDescriptors = 1; - descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - // Constants - descriptorRange[1].BaseShaderRegister = 1; - descriptorRange[1].NumDescriptors = 2; - descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - // Textures - descriptorRange[2].BaseShaderRegister = 0; - descriptorRange[2].NumDescriptors = 16; - descriptorRange[2].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - // Samplers - descriptorRange[3].BaseShaderRegister = 0; - descriptorRange[3].NumDescriptors = 16; - descriptorRange[3].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - D3D12_ROOT_PARAMETER RP[4] = {}; - RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; - RP[0].DescriptorTable.NumDescriptorRanges = 1; - RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1]; - RP[1].DescriptorTable.NumDescriptorRanges = 1; - RP[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - RP[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - RP[2].DescriptorTable.pDescriptorRanges = &descriptorRange[2]; - RP[2].DescriptorTable.NumDescriptorRanges = 1; - RP[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - RP[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - RP[3].DescriptorTable.pDescriptorRanges = &descriptorRange[3]; - RP[3].DescriptorTable.NumDescriptorRanges = 1; + // Common root signatures + for (unsigned textureCount = 0; textureCount < 17; textureCount++) + { + D3D12_DESCRIPTOR_RANGE descriptorRange[4] = {}; + // Scale Offset data + descriptorRange[0].BaseShaderRegister = 0; + descriptorRange[0].NumDescriptors = 1; + descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + // Constants + descriptorRange[1].BaseShaderRegister = 1; + descriptorRange[1].NumDescriptors = 2; + descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + // Textures + descriptorRange[2].BaseShaderRegister = 0; + descriptorRange[2].NumDescriptors = textureCount; + descriptorRange[2].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + // Samplers + descriptorRange[3].BaseShaderRegister = 0; + descriptorRange[3].NumDescriptors = textureCount; + descriptorRange[3].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + D3D12_ROOT_PARAMETER RP[4] = {}; + RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; + RP[0].DescriptorTable.NumDescriptorRanges = 1; + RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1]; + RP[1].DescriptorTable.NumDescriptorRanges = 1; + RP[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[2].DescriptorTable.pDescriptorRanges = &descriptorRange[2]; + RP[2].DescriptorTable.NumDescriptorRanges = 1; + RP[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[3].DescriptorTable.pDescriptorRanges = &descriptorRange[3]; + RP[3].DescriptorTable.NumDescriptorRanges = 1; - D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; - rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - rootSignatureDesc.NumParameters = 4; - rootSignatureDesc.pParameters = RP; + D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; + rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + rootSignatureDesc.NumParameters = (textureCount > 0) ? 4 : 2; + rootSignatureDesc.pParameters = RP; - Microsoft::WRL::ComPtr rootSignatureBlob; - Microsoft::WRL::ComPtr errorBlob; - check(D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); + Microsoft::WRL::ComPtr rootSignatureBlob; + Microsoft::WRL::ComPtr errorBlob; + check(D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); - m_device->CreateRootSignature(0, - rootSignatureBlob->GetBufferPointer(), - rootSignatureBlob->GetBufferSize(), - IID_PPV_ARGS(&m_rootSignature)); + m_device->CreateRootSignature(0, + rootSignatureBlob->GetBufferPointer(), + rootSignatureBlob->GetBufferSize(), + IID_PPV_ARGS(&m_rootSignatures[textureCount])); + } m_perFrameStorage.Init(m_device); m_perFrameStorage.Reset(); @@ -348,7 +352,8 @@ D3D12GSRender::~D3D12GSRender() m_backbufferAsRendertarget[1]->Release(); m_backBuffer[1]->Release(); m_rtts.Release(); - m_rootSignature->Release(); + for (unsigned i = 0; i < 17; i++) + m_rootSignatures[i]->Release(); m_swapChain->Release(); m_device->Release(); delete[] vertexConstantShadowCopy; @@ -625,7 +630,7 @@ bool D3D12GSRender::LoadProgram() prop.IASet = m_IASet; - m_PSO = m_cachePSO.getGraphicPipelineState(m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignature)); + m_PSO = m_cachePSO.getGraphicPipelineState(m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignatures)); return m_PSO != nullptr; } @@ -637,8 +642,6 @@ void D3D12GSRender::ExecCMD() m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); m_perFrameStorage.m_inflightCommandList.push_back(commandList); - commandList->SetGraphicsRootSignature(m_rootSignature); - if (m_indexed_array.m_count) LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); @@ -657,6 +660,8 @@ void D3D12GSRender::ExecCMD() return; } + commandList->SetGraphicsRootSignature(m_rootSignatures[m_PSO->second]); + // Constants setScaleOffset(); commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_scaleOffsetDescriptorHeap); @@ -675,43 +680,46 @@ void D3D12GSRender::ExecCMD() Handle = m_perFrameStorage.m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); Handle.ptr += currentBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetGraphicsRootDescriptorTable(1, Handle); - commandList->SetPipelineState(m_PSO); + commandList->SetPipelineState(m_PSO->first); - size_t usedTexture = UploadTextures(); - // Drivers don't like undefined texture descriptors - for (; usedTexture < 16; usedTexture++) + if (m_PSO->second > 0) { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, Handle); + size_t usedTexture = UploadTextures(); + // Drivers don't like undefined texture descriptors + /* for (; usedTexture < 16; usedTexture++) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Texture2D.MipLevels = 1; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, Handle); - D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - m_device->CreateSampler(&samplerDesc, Handle); + D3D12_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + m_device->CreateSampler(&samplerDesc, Handle); + }*/ + + Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_textureDescriptorsHeap); + commandList->SetGraphicsRootDescriptorTable(2, Handle); + + Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_samplerDescriptorHeap); + commandList->SetGraphicsRootDescriptorTable(3, Handle); + + m_perFrameStorage.m_currentTextureIndex += usedTexture; } - Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_textureDescriptorsHeap); - commandList->SetGraphicsRootDescriptorTable(2, Handle); - - Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_samplerDescriptorHeap); - commandList->SetGraphicsRootDescriptorTable(3, Handle); - - m_perFrameStorage.m_currentTextureIndex += usedTexture; - size_t numRTT; switch (m_surface_color_target) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 051e97fe58..80e3658444 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -54,8 +54,9 @@ private: // std::vector m_post_draw_objs; PipelineStateObjectCache m_cachePSO; - ID3D12PipelineState *m_PSO; - ID3D12RootSignature *m_rootSignature; + std::pair *m_PSO; + // m_rootSignatures[N] is RS with N texture/sample + ID3D12RootSignature *m_rootSignatures[17]; ID3D12PipelineState *m_convertPSO; ID3D12RootSignature *m_convertRootSignature; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 87756bc625..c264220385 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -44,6 +44,7 @@ public: u32 Id; Microsoft::WRL::ComPtr bytecode; std::vector FragmentConstantOffsetCache; + size_t m_textureCount; /** * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. @@ -59,9 +60,9 @@ struct D3D12Traits { typedef Shader VertexProgramData; typedef Shader FragmentProgramData; - typedef ID3D12PipelineState PipelineData; + typedef std::pair PipelineData; typedef D3D12PipelineProperties PipelineProperties; - typedef std::pair ExtraData; + typedef std::pair ExtraData; static void RecompileFragmentProgram(RSXFragmentProgram *RSXFP, FragmentProgramData& fragmentProgramData, size_t ID) @@ -69,12 +70,16 @@ struct D3D12Traits D3D12FragmentDecompiler FS(RSXFP->addr, RSXFP->size, RSXFP->offset); const std::string &shader = FS.Decompile(); fragmentProgramData.Compile(shader, Shader::SHADER_TYPE::SHADER_TYPE_FRAGMENT); - + fragmentProgramData.m_textureCount = 0; for (const ParamType& PT : FS.m_parr.params[PF_PARAM_UNIFORM]) { - if (PT.type == "sampler2D") continue; for (const ParamItem PI : PT.items) { + if (PT.type == "sampler2D") + { + fragmentProgramData.m_textureCount++; + continue; + } size_t offset = atoi(PI.name.c_str() + 2); fragmentProgramData.FragmentConstantOffsetCache.push_back(offset); } @@ -102,7 +107,8 @@ struct D3D12Traits static PipelineData *BuildProgram(VertexProgramData &vertexProgramData, FragmentProgramData &fragmentProgramData, const PipelineProperties &pipelineProperties, const ExtraData& extraData) { - ID3D12PipelineState *result; + + std::pair *result = new std::pair(); D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; if (vertexProgramData.bytecode == nullptr) @@ -115,7 +121,8 @@ struct D3D12Traits graphicPipelineStateDesc.PS.BytecodeLength = fragmentProgramData.bytecode->GetBufferSize(); graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer(); - graphicPipelineStateDesc.pRootSignature = extraData.second; + graphicPipelineStateDesc.pRootSignature = extraData.second[fragmentProgramData.m_textureCount]; + result->second = fragmentProgramData.m_textureCount; // Sensible default value static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = @@ -161,14 +168,15 @@ struct D3D12Traits graphicPipelineStateDesc.SampleMask = UINT_MAX; graphicPipelineStateDesc.NodeMask = 1; - extraData.first->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result)); + extraData.first->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result->first)); return result; } static void DeleteProgram(PipelineData *ptr) { - ptr->Release(); + ptr->first->Release(); + delete ptr; } }; From d8d72c4327fab6495b020cbc72e3ad0d50cc5da0 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 18:28:54 +0200 Subject: [PATCH 186/343] d3d12: Implement R5G6B5 texture format Make sonic works, but with wrong color --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index fc10f050a8..d91984425a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -101,7 +101,6 @@ size_t D3D12GSRender::UploadTextures() { case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G6B5: case CELL_GCM_TEXTURE_G8B8: case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_DEPTH24_D8: @@ -123,6 +122,11 @@ size_t D3D12GSRender::UploadTextures() default: LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); break; + case CELL_GCM_TEXTURE_R5G6B5: + dxgiFormat = DXGI_FORMAT_B5G6R5_UNORM; + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; case CELL_GCM_TEXTURE_D8R8G8B8: dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; blockSizeInByte = 4; @@ -281,7 +285,6 @@ size_t D3D12GSRender::UploadTextures() { case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G6B5: case CELL_GCM_TEXTURE_G8B8: case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_DEPTH24_D8: @@ -303,6 +306,9 @@ size_t D3D12GSRender::UploadTextures() default: LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); break; + case CELL_GCM_TEXTURE_R5G6B5: + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + break; case CELL_GCM_TEXTURE_D8R8G8B8: { const int RemapValue[4] = From 5ca02a505390b891f4357a415a41131fac22c320 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 18:55:04 +0200 Subject: [PATCH 187/343] d3d12: Fix R5G6B5 being byte swapped --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index d91984425a..dccfd04f70 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -231,6 +231,17 @@ size_t D3D12GSRender::UploadTextures() dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; } } + else if (format == CELL_GCM_TEXTURE_R5G6B5) + { + unsigned short *dst = (unsigned short *)textureData, + *src = (unsigned short *)pixels; + + for (int j = 0; j < m_textures[i].GetWidth(); j++) + { + u16 tmp = src[row * m_texture_pitch + j]; + dst[row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); + } + } else streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); } From c1abf80b40e3d7b04b6d702ef1228d6e1d94596d Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 19:02:11 +0200 Subject: [PATCH 188/343] d3d12: Fix r5g6b5 only using half texture --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index dccfd04f70..7f9d37710e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -238,7 +238,7 @@ size_t D3D12GSRender::UploadTextures() for (int j = 0; j < m_textures[i].GetWidth(); j++) { - u16 tmp = src[row * m_texture_pitch + j]; + u16 tmp = src[row * m_texture_pitch / 2 + j]; dst[row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); } } From e0cff6b0b478fa5644757b5f4f41780c0f4a37ef Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 22:48:30 +0200 Subject: [PATCH 189/343] d3d12: Start using a ring-like buffer for constants --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 12 ++--- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 70 +++++++++++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 19 ++++++++ 3 files changed, 94 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 05ef59f54d..c8919dafb3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -394,16 +394,15 @@ void D3D12GSRender::setScaleOffset() scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; - size_t constantBuffersHeapOffset = m_perFrameStorage.m_constantsBuffersHeapFreeSpace; - // 65536 alignment - constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; + assert(m_constantsData.canAlloc(256)); + size_t heapOffset = m_constantsData.alloc(256); // Scale offset buffer // Separate constant buffer ID3D12Resource *scaleOffsetBuffer; check(m_device->CreatePlacedResource( - m_perFrameStorage.m_constantsBuffersHeap, - constantBuffersHeapOffset, + m_constantsData.m_heap, + heapOffset, &getBufferResourceDesc(256), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -421,8 +420,7 @@ void D3D12GSRender::setScaleOffset() D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += m_perFrameStorage.m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - m_perFrameStorage.m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 256; - m_perFrameStorage.m_inflightResources.push_back(scaleOffsetBuffer); + m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, 256, scaleOffsetBuffer)); } void D3D12GSRender::FillVertexShaderConstantsBuffer() diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 1d5714ea25..cd49011045 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -14,6 +14,66 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) GetGSFrame = value; } +void DataHeap::Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type) +{ + m_size = heapSize; + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.SizeInBytes = m_size; + heapDesc.Properties.Type = type; + heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + check(device->CreateHeap(&heapDesc, IID_PPV_ARGS(&m_heap))); + m_putPos = 0; + m_getPos = m_size - 1; +} + + +bool DataHeap::canAlloc(size_t size) +{ + size_t putPos = m_putPos.load(), getPos = m_getPos.load(); + size_t allocSize = powerOf2Align(size, 65536); + if (putPos + allocSize < m_size) + { + // range before get + if (putPos + allocSize < getPos) + return true; + // range after get + if (putPos > getPos) + return true; + return false; + } + else + { + // ..]....[..get.. + if (putPos < getPos) + return false; + // ..get..]...[... + // Actually all resources extending beyond heap space starts at 0 + if (allocSize > getPos) + return false; + return true; + } +} + +size_t DataHeap::alloc(size_t size) +{ + assert(canAlloc(size)); + size_t putPos = m_putPos.load(); + if (putPos + size < m_size) + { + m_putPos += powerOf2Align(size, 65536); + return putPos; + } + else + { + m_putPos.store(powerOf2Align(size, 65536)); + return 0; + } +} + +void DataHeap::Release() +{ +} + void D3D12GSRender::ResourceStorage::Reset() { m_vertexIndexBuffersHeapFreeSpace = 0; @@ -334,10 +394,13 @@ D3D12GSRender::D3D12GSRender() m_UAVHeap.m_getPos = 1024 * 1024 * 128 - 1; m_rtts.Init(m_device); + + m_constantsData.Init(m_device, 1024 * 1024, D3D12_HEAP_TYPE_UPLOAD); } D3D12GSRender::~D3D12GSRender() { + m_constantsData.Release(); m_UAVHeap.m_heap->Release(); m_readbackResources.m_heap->Release(); m_texturesRTTs.clear(); @@ -870,6 +933,13 @@ void D3D12GSRender::Flip() m_texturesCache.clear(); m_texturesRTTs.clear(); + for (auto tmp : m_constantsData.m_resourceStoredSinceLastSync) + { + std::get<2>(tmp)->Release(); + m_constantsData.m_getPos.store(std::get<0>(tmp)); + } + m_constantsData.m_resourceStoredSinceLastSync.clear(); + m_frame->Flip(nullptr); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 80e3658444..e21b5f2415 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -44,6 +44,23 @@ typedef GSFrameBase2*(*GetGSFrameCb2)(); void SetGetD3DGSFrameCallback(GetGSFrameCb2 value); +struct DataHeap +{ + ID3D12Heap *m_heap; + size_t m_size; + std::atomic m_putPos, // Start of free space + m_getPos; // End of free space + std::vector > m_resourceStoredSinceLastSync; + + void Init(ID3D12Device *, size_t, D3D12_HEAP_TYPE); + /** + * Does alloc cross get position ? + */ + bool canAlloc(size_t size); + size_t alloc(size_t size); + void Release(); +}; + class D3D12GSRender : public GSRender { private: @@ -96,6 +113,8 @@ private: ResourceStorage m_perFrameStorage; + DataHeap m_constantsData; + struct UAVHeap { ID3D12Heap *m_heap; From 59aca7566e2f47a436afda60ed2d5a030618fdde Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 23:09:58 +0200 Subject: [PATCH 190/343] d3d12: Some fixes to avoid running out of constant spaces --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 15 ++++++++++----- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index cd49011045..84edfde30f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -29,7 +29,7 @@ void DataHeap::Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type) bool DataHeap::canAlloc(size_t size) { - size_t putPos = m_putPos.load(), getPos = m_getPos.load(); + size_t putPos = m_putPos, getPos = m_getPos; size_t allocSize = powerOf2Align(size, 65536); if (putPos + allocSize < m_size) { @@ -57,7 +57,7 @@ bool DataHeap::canAlloc(size_t size) size_t DataHeap::alloc(size_t size) { assert(canAlloc(size)); - size_t putPos = m_putPos.load(); + size_t putPos = m_putPos; if (putPos + size < m_size) { m_putPos += powerOf2Align(size, 65536); @@ -65,13 +65,18 @@ size_t DataHeap::alloc(size_t size) } else { - m_putPos.store(powerOf2Align(size, 65536)); + m_putPos = powerOf2Align(size, 65536); return 0; } } void DataHeap::Release() { + m_heap->Release(); + for (auto tmp : m_resourceStoredSinceLastSync) + { + std::get<2>(tmp)->Release(); + } } void D3D12GSRender::ResourceStorage::Reset() @@ -395,7 +400,7 @@ D3D12GSRender::D3D12GSRender() m_rtts.Init(m_device); - m_constantsData.Init(m_device, 1024 * 1024, D3D12_HEAP_TYPE_UPLOAD); + m_constantsData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD); } D3D12GSRender::~D3D12GSRender() @@ -936,7 +941,7 @@ void D3D12GSRender::Flip() for (auto tmp : m_constantsData.m_resourceStoredSinceLastSync) { std::get<2>(tmp)->Release(); - m_constantsData.m_getPos.store(std::get<0>(tmp)); + m_constantsData.m_getPos = std::get<0>(tmp); } m_constantsData.m_resourceStoredSinceLastSync.clear(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index e21b5f2415..d563eb702a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -48,7 +48,7 @@ struct DataHeap { ID3D12Heap *m_heap; size_t m_size; - std::atomic m_putPos, // Start of free space + size_t m_putPos, // Start of free space m_getPos; // End of free space std::vector > m_resourceStoredSinceLastSync; From f721f2cbaadbcd6f1b0412cba2df9cdf9ff55fa5 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 23:18:21 +0200 Subject: [PATCH 191/343] d3d12: Use ring like heap for vertex and fragment constants too --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 24 ++++++++++-------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 3 --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 +-- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index c8919dafb3..0bf9889d40 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -432,14 +432,13 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() memcpy((char*)vertexConstantShadowCopy + offset, vector, 4 * sizeof(float)); } - size_t constantBuffersHeapOffset = m_perFrameStorage.m_constantsBuffersHeapFreeSpace; - // 65536 alignment - constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; + assert(m_constantsData.canAlloc(512 * 4 * sizeof(float))); + size_t heapOffset = m_constantsData.alloc(512 * 4 * sizeof(float)); ID3D12Resource *constantsBuffer; check(m_device->CreatePlacedResource( - m_perFrameStorage.m_constantsBuffersHeap, - constantBuffersHeapOffset, + m_constantsData.m_heap, + heapOffset, &getBufferResourceDesc(512 * 4 * sizeof(float)), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -457,8 +456,7 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += m_perFrameStorage.m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - m_perFrameStorage.m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + 512 * 4 * sizeof(float); - m_perFrameStorage.m_inflightResources.push_back(constantsBuffer); + m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, 512 * 4 * sizeof(float), constantsBuffer)); } void D3D12GSRender::FillPixelShaderConstantsBuffer() @@ -469,14 +467,13 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() // Multiple of 256 never 0 bufferSize = (bufferSize + 255) & ~255; - size_t constantBuffersHeapOffset = m_perFrameStorage.m_constantsBuffersHeapFreeSpace; - // 65536 alignment - constantBuffersHeapOffset = (constantBuffersHeapOffset + 65536 - 1) & ~65535; + assert(m_constantsData.canAlloc(bufferSize)); + size_t heapOffset = m_constantsData.alloc(bufferSize); ID3D12Resource *constantsBuffer; check(m_device->CreatePlacedResource( - m_perFrameStorage.m_constantsBuffersHeap, - constantBuffersHeapOffset, + m_constantsData.m_heap, + heapOffset, &getBufferResourceDesc(bufferSize), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -532,8 +529,7 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += m_perFrameStorage.m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - m_perFrameStorage.m_constantsBuffersHeapFreeSpace = constantBuffersHeapOffset + bufferSize; - m_perFrameStorage.m_inflightResources.push_back(constantsBuffer); + m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, constantsBuffer)); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 84edfde30f..4ecec2a8a3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -84,7 +84,6 @@ void D3D12GSRender::ResourceStorage::Reset() m_vertexIndexBuffersHeapFreeSpace = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; - m_constantsBuffersHeapFreeSpace = 0; m_currentStorageOffset = 0; m_currentTextureIndex = 0; @@ -113,7 +112,6 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) vertexBufferHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; vertexBufferHeapDesc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexIndexBuffersHeap))); - check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_constantsBuffersHeap))); D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; @@ -156,7 +154,6 @@ void D3D12GSRender::ResourceStorage::Release() m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); - m_constantsBuffersHeap->Release(); m_vertexIndexBuffersHeap->Release(); for (auto tmp : m_inflightResources) tmp->Release(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index d563eb702a..b6ecd42d1e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -91,8 +91,6 @@ private: ID3D12Heap *m_vertexIndexBuffersHeap; // Constants storage - ID3D12Heap *m_constantsBuffersHeap; - size_t m_constantsBuffersHeapFreeSpace; ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; size_t m_constantsBufferIndex; ID3D12DescriptorHeap *m_scaleOffsetDescriptorHeap; @@ -113,6 +111,7 @@ private: ResourceStorage m_perFrameStorage; + // Constants storage DataHeap m_constantsData; struct UAVHeap From 6bb5dd212534404ea64173d597e1321bbaddd74d Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 1 Jun 2015 23:46:04 +0200 Subject: [PATCH 192/343] d3d12: Use ring like buffer for vertex index data --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 27 ++++++++++++--------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 18 ++++++++---------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 6 ++---- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 0bf9889d40..d1e2fa258f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -194,15 +194,15 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G for (size_t buffer = 0; buffer < vertexBufferFormat.size(); buffer++) { const VertexBufferFormat &vbf = vertexBufferFormat[buffer]; - // 65536 alignment - size_t bufferHeapOffset = m_perFrameStorage.m_vertexIndexBuffersHeapFreeSpace; - bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; + size_t subBufferSize = vbf.range.second - vbf.range.first; + assert(m_vertexIndexData.canAlloc(subBufferSize)); + size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); ID3D12Resource *vertexBuffer; check(m_device->CreatePlacedResource( - m_perFrameStorage.m_vertexIndexBuffersHeap, - bufferHeapOffset, + m_vertexIndexData.m_heap, + heapOffset, &getBufferResourceDesc(subBufferSize), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -250,14 +250,13 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G } vertexBuffer->Unmap(0, nullptr); - m_perFrameStorage.m_inflightResources.push_back(vertexBuffer); + m_vertexIndexData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, subBufferSize, vertexBuffer)); D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); vertexBufferView.SizeInBytes = (UINT)subBufferSize; vertexBufferView.StrideInBytes = (UINT)vbf.stride; result.first.push_back(vertexBufferView); - m_perFrameStorage.m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; } // Only handle quads now @@ -313,14 +312,14 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G else indexCount = m_draw_array_count * 6 / 4; size_t subBufferSize = powerOf2Align(indexCount * indexSize, 64); - // 65536 alignment - size_t bufferHeapOffset = m_perFrameStorage.m_vertexIndexBuffersHeapFreeSpace; - bufferHeapOffset = (bufferHeapOffset + 65536 - 1) & ~65535; + + assert(m_vertexIndexData.canAlloc(subBufferSize)); + size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); ID3D12Resource *indexBuffer; check(m_device->CreatePlacedResource( - m_perFrameStorage.m_vertexIndexBuffersHeap, - bufferHeapOffset, + m_vertexIndexData.m_heap, + heapOffset, &getBufferResourceDesc(subBufferSize), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, @@ -359,9 +358,7 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G } } indexBuffer->Unmap(0, nullptr); - m_perFrameStorage.m_inflightResources.push_back(indexBuffer); - m_perFrameStorage.m_vertexIndexBuffersHeapFreeSpace = bufferHeapOffset + subBufferSize; - + m_vertexIndexData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, subBufferSize, indexBuffer)); indexBufferView.SizeInBytes = (UINT)subBufferSize; indexBufferView.BufferLocation = indexBuffer->GetGPUVirtualAddress(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 4ecec2a8a3..0b69325707 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -81,7 +81,6 @@ void DataHeap::Release() void D3D12GSRender::ResourceStorage::Reset() { - m_vertexIndexBuffersHeapFreeSpace = 0; m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; m_currentStorageOffset = 0; @@ -105,14 +104,6 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); check(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&m_downloadCommandAllocator))); - // Create heap for vertex and constants buffers - D3D12_HEAP_DESC vertexBufferHeapDesc = {}; - // 16 MB wide - vertexBufferHeapDesc.SizeInBytes = 1024 * 1024 * 128; - vertexBufferHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - vertexBufferHeapDesc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; - check(device->CreateHeap(&vertexBufferHeapDesc, IID_PPV_ARGS(&m_vertexIndexBuffersHeap))); - D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; descriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; descriptorHeapDesc.NumDescriptors = 10000; // For safety @@ -154,7 +145,6 @@ void D3D12GSRender::ResourceStorage::Release() m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); - m_vertexIndexBuffersHeap->Release(); for (auto tmp : m_inflightResources) tmp->Release(); m_textureDescriptorsHeap->Release(); @@ -398,11 +388,13 @@ D3D12GSRender::D3D12GSRender() m_rtts.Init(m_device); m_constantsData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD); + m_vertexIndexData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD); } D3D12GSRender::~D3D12GSRender() { m_constantsData.Release(); + m_vertexIndexData.Release(); m_UAVHeap.m_heap->Release(); m_readbackResources.m_heap->Release(); m_texturesRTTs.clear(); @@ -941,6 +933,12 @@ void D3D12GSRender::Flip() m_constantsData.m_getPos = std::get<0>(tmp); } m_constantsData.m_resourceStoredSinceLastSync.clear(); + for (auto tmp : m_vertexIndexData.m_resourceStoredSinceLastSync) + { + std::get<2>(tmp)->Release(); + m_vertexIndexData.m_getPos = std::get<0>(tmp); + } + m_vertexIndexData.m_resourceStoredSinceLastSync.clear(); m_frame->Flip(nullptr); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index b6ecd42d1e..48daaaa84e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -86,10 +86,6 @@ private: std::vector m_inflightResources; - // Vertex storage - size_t m_vertexIndexBuffersHeapFreeSpace; - ID3D12Heap *m_vertexIndexBuffersHeap; - // Constants storage ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; size_t m_constantsBufferIndex; @@ -113,6 +109,8 @@ private: // Constants storage DataHeap m_constantsData; + // Vertex storage + DataHeap m_vertexIndexData; struct UAVHeap { From 9748007cd3551f1f204e09f1ce5390f3ba521347 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 2 Jun 2015 00:11:59 +0200 Subject: [PATCH 193/343] d3d12: Use ring buffer for textures too It looks like the texture size calculation is wrong, it can lead to crash --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 38 ++++++++++++++------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 7 ++--- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 20 +++++++------- 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 0b69325707..610eed9931 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -14,13 +14,13 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) GetGSFrame = value; } -void DataHeap::Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type) +void DataHeap::Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) { m_size = heapSize; D3D12_HEAP_DESC heapDesc = {}; heapDesc.SizeInBytes = m_size; heapDesc.Properties.Type = type; - heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + heapDesc.Flags = flags; check(device->CreateHeap(&heapDesc, IID_PPV_ARGS(&m_heap))); m_putPos = 0; m_getPos = m_size - 1; @@ -83,7 +83,6 @@ void D3D12GSRender::ResourceStorage::Reset() { m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; - m_currentStorageOffset = 0; m_currentTextureIndex = 0; m_commandAllocator->Reset(); @@ -117,17 +116,6 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) descriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; check(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_scaleOffsetDescriptorHeap))); - // Texture - D3D12_HEAP_DESC heapDescription = {}; - heapDescription.SizeInBytes = 1024 * 1024 * 256; - heapDescription.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; - heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_uploadTextureHeap))); - - heapDescription.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heapDescription.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; - check(device->CreateHeap(&heapDescription, IID_PPV_ARGS(&m_textureStorage))); - D3D12_DESCRIPTOR_HEAP_DESC textureDescriptorDesc = {}; textureDescriptorDesc.NumDescriptors = 10000; // For safety textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; @@ -148,8 +136,6 @@ void D3D12GSRender::ResourceStorage::Release() for (auto tmp : m_inflightResources) tmp->Release(); m_textureDescriptorsHeap->Release(); - m_textureStorage->Release(); - m_uploadTextureHeap->Release(); m_samplerDescriptorHeap->Release(); for (auto tmp : m_inflightCommandList) tmp->Release(); @@ -387,14 +373,18 @@ D3D12GSRender::D3D12GSRender() m_rtts.Init(m_device); - m_constantsData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD); - m_vertexIndexData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD); + m_constantsData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); + m_vertexIndexData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); + m_textureUploadData.Init(m_device, 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); + m_textureData.Init(m_device, 1024 * 1024 * 256, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES); } D3D12GSRender::~D3D12GSRender() { m_constantsData.Release(); m_vertexIndexData.Release(); + m_textureUploadData.Release(); + m_textureData.Release(); m_UAVHeap.m_heap->Release(); m_readbackResources.m_heap->Release(); m_texturesRTTs.clear(); @@ -939,6 +929,18 @@ void D3D12GSRender::Flip() m_vertexIndexData.m_getPos = std::get<0>(tmp); } m_vertexIndexData.m_resourceStoredSinceLastSync.clear(); + for (auto tmp : m_textureUploadData.m_resourceStoredSinceLastSync) + { + std::get<2>(tmp)->Release(); + m_textureUploadData.m_getPos = std::get<0>(tmp); + } + m_textureUploadData.m_resourceStoredSinceLastSync.clear(); + for (auto tmp : m_textureData.m_resourceStoredSinceLastSync) + { + std::get<2>(tmp)->Release(); + m_textureData.m_getPos = std::get<0>(tmp); + } + m_textureData.m_resourceStoredSinceLastSync.clear(); m_frame->Flip(nullptr); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 48daaaa84e..e4290301c2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -52,7 +52,7 @@ struct DataHeap m_getPos; // End of free space std::vector > m_resourceStoredSinceLastSync; - void Init(ID3D12Device *, size_t, D3D12_HEAP_TYPE); + void Init(ID3D12Device *, size_t, D3D12_HEAP_TYPE, D3D12_HEAP_FLAGS); /** * Does alloc cross get position ? */ @@ -94,8 +94,6 @@ private: // Texture storage ID3D12CommandAllocator *m_textureUploadCommandAllocator; - ID3D12Heap *m_uploadTextureHeap, *m_textureStorage; - size_t m_currentStorageOffset; ID3D12DescriptorHeap *m_textureDescriptorsHeap; ID3D12DescriptorHeap *m_samplerDescriptorHeap; size_t m_currentTextureIndex; @@ -111,6 +109,9 @@ private: DataHeap m_constantsData; // Vertex storage DataHeap m_vertexIndexData; + // Texture storage + DataHeap m_textureUploadData; + DataHeap m_textureData; struct UAVHeap { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 7f9d37710e..8ab549617d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -196,15 +196,18 @@ size_t D3D12GSRender::UploadTextures() ID3D12Resource *Texture; size_t textureSize = rowPitch * heightInBlocks; + assert(m_textureUploadData.canAlloc(textureSize)); + size_t heapOffset = m_textureUploadData.alloc(textureSize); check(m_device->CreatePlacedResource( - m_perFrameStorage.m_uploadTextureHeap, - m_perFrameStorage.m_currentStorageOffset, + m_textureUploadData.m_heap, + heapOffset, &getBufferResourceDesc(textureSize), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&Texture) )); + m_textureUploadData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, textureSize, Texture)); auto pixels = vm::get_ptr(texaddr); void *textureData; @@ -247,19 +250,18 @@ size_t D3D12GSRender::UploadTextures() } Texture->Unmap(0, nullptr); + assert(m_textureData.canAlloc(textureSize)); + size_t heapOffset2 = m_textureData.alloc(textureSize); + check(m_device->CreatePlacedResource( - m_perFrameStorage.m_textureStorage, - m_perFrameStorage.m_currentStorageOffset, + m_textureData.m_heap, + heapOffset2, &getTexture2DResourceDesc(m_textures[i].GetWidth(), m_textures[i].GetHeight(), dxgiFormat), D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&vramTexture) )); - - m_perFrameStorage.m_currentStorageOffset += textureSize; - m_perFrameStorage.m_currentStorageOffset = (m_perFrameStorage.m_currentStorageOffset + 65536 - 1) & ~65535; - m_perFrameStorage.m_inflightResources.push_back(Texture); - m_perFrameStorage.m_inflightResources.push_back(vramTexture); + m_textureData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset2, textureSize, vramTexture)); D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; dst.pResource = vramTexture; From 3a6abe16564ceeb87806709e882e2fa6b0e5add9 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 2 Jun 2015 00:15:17 +0200 Subject: [PATCH 194/343] d3d12: fix crash when inferring texture size --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 8ab549617d..ae238f1dc2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -250,8 +250,8 @@ size_t D3D12GSRender::UploadTextures() } Texture->Unmap(0, nullptr); - assert(m_textureData.canAlloc(textureSize)); - size_t heapOffset2 = m_textureData.alloc(textureSize); + assert(m_textureData.canAlloc(textureSize * 2)); + size_t heapOffset2 = m_textureData.alloc(textureSize * 2); check(m_device->CreatePlacedResource( m_textureData.m_heap, @@ -261,7 +261,7 @@ size_t D3D12GSRender::UploadTextures() nullptr, IID_PPV_ARGS(&vramTexture) )); - m_textureData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset2, textureSize, vramTexture)); + m_textureData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset2, textureSize * 2, vramTexture)); D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; dst.pResource = vramTexture; From 878a116c4a434d93c5fca0830c9a6a16b9ed75af Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 2 Jun 2015 18:35:04 +0200 Subject: [PATCH 195/343] Rebasing didn't went quite right...Fixes --- rpcs3/Emu/RSX/CgBinaryProgram.h | 2 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/CgBinaryProgram.h b/rpcs3/Emu/RSX/CgBinaryProgram.h index 87bb3cedc5..a67354fb3a 100644 --- a/rpcs3/Emu/RSX/CgBinaryProgram.h +++ b/rpcs3/Emu/RSX/CgBinaryProgram.h @@ -332,7 +332,7 @@ public: else { - GLParamArray param_array; + ParamArray param_array; auto& vprog = GetCgRef(prog.program); m_arb_shader += "\n"; m_arb_shader += fmt::format("# binaryFormatRevision 0x%x\n", (u32)prog.binaryFormatRevision); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index c264220385..854e095038 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -41,7 +41,7 @@ public: Shader() : bytecode(nullptr) {} ~Shader() {} - u32 Id; + u32 id; Microsoft::WRL::ComPtr bytecode; std::vector FragmentConstantOffsetCache; size_t m_textureCount; From 51d287d9b053a9025e96ab48a39c047381594090 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 2 Jun 2015 19:18:16 +0200 Subject: [PATCH 196/343] d3d12: Take alignment into account in streamBuffer And use it for texture upload --- rpcs3/Emu/RSX/D3D12/D3D12.h | 33 ++++++++++++++++------------ rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 2 +- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 622622a56e..e23cb4f0de 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -31,8 +31,9 @@ void streamToBuffer(void* dst, void* src, size_t sizeInBytes) { for (unsigned i = 0; i < sizeInBytes / 16; i++) { - __m128i *srcPtr = (__m128i*) ((char*)src + i * 16); - _mm_stream_si128((__m128i*)((char*)dst + i * 16), *srcPtr); + + const __m128i &srcPtr = _mm_loadu_si128((__m128i*) ((char*)src + i * 16)); + _mm_stream_si128((__m128i*)((char*)dst + i * 16), srcPtr); } } @@ -44,20 +45,24 @@ inline void streamBuffer(void* dst, void* src, size_t sizeInBytes) { // Assume 64 bytes cache line - assert(powerOf2Align(sizeInBytes, 64)); - for (unsigned i = 0; i < sizeInBytes / 64; i++) + unsigned offset = 0; + bool isAligned = !((size_t)src & 15); + for (; (offset + 64) < sizeInBytes; offset += 64) { - char *line = (char*)src + i * 64; - _mm_prefetch(line, _MM_HINT_NTA); - __m128i *srcPtr = (__m128i*) (line); - _mm_stream_si128((__m128i*)((char*)dst + i * 64), *srcPtr); - srcPtr = (__m128i*) (line + 16); - _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 16), *srcPtr); - srcPtr = (__m128i*) (line + 32); - _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 32), *srcPtr); - srcPtr = (__m128i*) (line + 48); - _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 48), *srcPtr); + char *line = (char*)src + offset; + char *dstline = (char*)dst + offset; + // prefetch next line + _mm_prefetch(line + 16, _MM_HINT_NTA); + __m128i srcPtr = isAligned ? _mm_load_si128((__m128i *)line) : _mm_loadu_si128((__m128i *)line); + _mm_stream_si128((__m128i*)dstline, srcPtr); + srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 16)) : _mm_loadu_si128((__m128i *)(line + 16)); + _mm_stream_si128((__m128i*)(dstline + 16), srcPtr); + srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 32)) : _mm_loadu_si128((__m128i *)(line + 32)); + _mm_stream_si128((__m128i*)(dstline + 32), srcPtr); + srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 48)) : _mm_loadu_si128((__m128i *)(line + 48)); + _mm_stream_si128((__m128i*)(dstline + 48), srcPtr); } + memcpy((char*)dst + offset, (char*)src + offset, sizeInBytes - offset); } inline diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index ae238f1dc2..e9ba950230 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -246,7 +246,7 @@ size_t D3D12GSRender::UploadTextures() } } else - streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); } Texture->Unmap(0, nullptr); From 461bf12c4f515a5bfd62c4f80a315a6c13462d1b Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 2 Jun 2015 19:22:32 +0200 Subject: [PATCH 197/343] d3d12: Cleaning --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 610eed9931..1764ab30d3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -732,27 +732,6 @@ void D3D12GSRender::ExecCMD() if (m_PSO->second > 0) { size_t usedTexture = UploadTextures(); - // Drivers don't like undefined texture descriptors - /* for (; usedTexture < 16; usedTexture++) - { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, Handle); - - D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - m_device->CreateSampler(&samplerDesc, Handle); - }*/ Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); From 7db35996483b992c94e1df307a74bdaedd7547a4 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 2 Jun 2015 23:54:24 +0200 Subject: [PATCH 198/343] d3d12: Ping pong between data to avoid gpu stall as much as possible --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 12 +- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 125 +++++++++++------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 10 +- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 4 +- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 12 +- 5 files changed, 98 insertions(+), 65 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index d1e2fa258f..f31d82984a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -414,8 +414,8 @@ void D3D12GSRender::setScaleOffset() D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = scaleOffsetBuffer->GetGPUVirtualAddress(); constantBufferViewDesc.SizeInBytes = (UINT)256; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += m_perFrameStorage.m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, 256, scaleOffsetBuffer)); } @@ -450,8 +450,8 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); constantBufferViewDesc.SizeInBytes = 512 * 4 * sizeof(float); - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += m_perFrameStorage.m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, 512 * 4 * sizeof(float), constantsBuffer)); } @@ -523,8 +523,8 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += m_perFrameStorage.m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, constantsBuffer)); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 1764ab30d3..b284e0b6a0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -98,6 +98,7 @@ void D3D12GSRender::ResourceStorage::Reset() void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) { + m_frameFinished = 0; // Create a global command allocator device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); @@ -322,8 +323,10 @@ D3D12GSRender::D3D12GSRender() IID_PPV_ARGS(&m_rootSignatures[textureCount])); } - m_perFrameStorage.Init(m_device); - m_perFrameStorage.Reset(); + m_perFrameStorage[0].Init(m_device); + m_perFrameStorage[0].Reset(); + m_perFrameStorage[1].Init(m_device); + m_perFrameStorage[1].Reset(); vertexConstantShadowCopy = new float[512 * 4]; @@ -391,7 +394,8 @@ D3D12GSRender::~D3D12GSRender() m_dummyTexture->Release(); m_convertPSO->Release(); m_convertRootSignature->Release(); - m_perFrameStorage.Release(); + m_perFrameStorage[0].Release(); + m_perFrameStorage[1].Release(); m_commandQueueGraphic->Release(); m_commandQueueCopy->Release(); m_backbufferAsRendertarget[0]->Release(); @@ -436,8 +440,8 @@ void D3D12GSRender::ExecCMD(u32 cmd) InitDrawBuffers(); ID3D12GraphicsCommandList *commandList; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); - m_perFrameStorage.m_inflightCommandList.push_back(commandList); + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); /* if (m_set_color_mask) { @@ -686,8 +690,8 @@ void D3D12GSRender::ExecCMD() InitDrawBuffers(); ID3D12GraphicsCommandList *commandList; - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); - m_perFrameStorage.m_inflightCommandList.push_back(commandList); + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); if (m_indexed_array.m_count) LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); @@ -711,20 +715,20 @@ void D3D12GSRender::ExecCMD() // Constants setScaleOffset(); - commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_scaleOffsetDescriptorHeap); - D3D12_GPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_perFrameStorage.m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_scaleOffsetDescriptorHeap); + D3D12_GPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetGraphicsRootDescriptorTable(0, Handle); - m_perFrameStorage.m_currentScaleOffsetBufferIndex++; + getCurrentResourceStorage().m_currentScaleOffsetBufferIndex++; - size_t currentBufferIndex = m_perFrameStorage.m_constantsBufferIndex; + size_t currentBufferIndex = getCurrentResourceStorage().m_constantsBufferIndex; FillVertexShaderConstantsBuffer(); - m_perFrameStorage.m_constantsBufferIndex++; + getCurrentResourceStorage().m_constantsBufferIndex++; FillPixelShaderConstantsBuffer(); - m_perFrameStorage.m_constantsBufferIndex++; + getCurrentResourceStorage().m_constantsBufferIndex++; - commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_constantsBufferDescriptorsHeap); - Handle = m_perFrameStorage.m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_constantsBufferDescriptorsHeap); + Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); Handle.ptr += currentBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetGraphicsRootDescriptorTable(1, Handle); commandList->SetPipelineState(m_PSO->first); @@ -733,17 +737,17 @@ void D3D12GSRender::ExecCMD() { size_t usedTexture = UploadTextures(); - Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_textureDescriptorsHeap); + Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_textureDescriptorsHeap); commandList->SetGraphicsRootDescriptorTable(2, Handle); - Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += m_perFrameStorage.m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - commandList->SetDescriptorHeaps(1, &m_perFrameStorage.m_samplerDescriptorHeap); + Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + Handle.ptr += getCurrentResourceStorage().m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_samplerDescriptorHeap); commandList->SetGraphicsRootDescriptorTable(3, Handle); - m_perFrameStorage.m_currentTextureIndex += usedTexture; + getCurrentResourceStorage().m_currentTextureIndex += usedTexture; } size_t numRTT; @@ -843,8 +847,8 @@ void D3D12GSRender::ExecCMD() void D3D12GSRender::Flip() { ID3D12GraphicsCommandList *commandList; - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); - m_perFrameStorage.m_inflightCommandList.push_back(commandList); + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); switch (m_surface_color_target) { @@ -887,43 +891,64 @@ void D3D12GSRender::Flip() // Add an event signaling queue completion Microsoft::WRL::ComPtr fence; m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)); - HANDLE handle = CreateEvent(0, 0, 0, 0); - fence->SetEventOnCompletion(1, handle); + getCurrentResourceStorage().m_frameFinished = CreateEvent(0, 0, 0, 0); + fence->SetEventOnCompletion(1, getCurrentResourceStorage().m_frameFinished); m_commandQueueGraphic->Signal(fence.Get(), 1); - WaitForSingleObject(handle, INFINITE); - CloseHandle(handle); - m_perFrameStorage.Reset(); + + // Flush + getCurrentResourceStorage().Reset(); m_texturesCache.clear(); m_texturesRTTs.clear(); - for (auto tmp : m_constantsData.m_resourceStoredSinceLastSync) + if (getNonCurrentResourceStorage().m_frameFinished) { - std::get<2>(tmp)->Release(); - m_constantsData.m_getPos = std::get<0>(tmp); + WaitForSingleObject(getNonCurrentResourceStorage().m_frameFinished, INFINITE); + CloseHandle(getNonCurrentResourceStorage().m_frameFinished); + + for (auto tmp : getNonCurrentResourceStorage().m_inUseConstantsBuffers) + { + std::get<2>(tmp)->Release(); + m_constantsData.m_getPos = std::get<0>(tmp); + } + for (auto tmp : getNonCurrentResourceStorage().m_inUseVertexIndexBuffers) + { + std::get<2>(tmp)->Release(); + m_vertexIndexData.m_getPos = std::get<0>(tmp); + } + for (auto tmp : getNonCurrentResourceStorage().m_inUseTextureUploadBuffers) + { + std::get<2>(tmp)->Release(); + m_textureUploadData.m_getPos = std::get<0>(tmp); + } + for (auto tmp : getNonCurrentResourceStorage().m_inUseTexture2D) + { + std::get<2>(tmp)->Release(); + m_textureData.m_getPos = std::get<0>(tmp); + } } + + getNonCurrentResourceStorage().m_inUseConstantsBuffers = m_constantsData.m_resourceStoredSinceLastSync; m_constantsData.m_resourceStoredSinceLastSync.clear(); - for (auto tmp : m_vertexIndexData.m_resourceStoredSinceLastSync) - { - std::get<2>(tmp)->Release(); - m_vertexIndexData.m_getPos = std::get<0>(tmp); - } + getNonCurrentResourceStorage().m_inUseVertexIndexBuffers = m_vertexIndexData.m_resourceStoredSinceLastSync; m_vertexIndexData.m_resourceStoredSinceLastSync.clear(); - for (auto tmp : m_textureUploadData.m_resourceStoredSinceLastSync) - { - std::get<2>(tmp)->Release(); - m_textureUploadData.m_getPos = std::get<0>(tmp); - } + getNonCurrentResourceStorage().m_inUseTextureUploadBuffers = m_textureUploadData.m_resourceStoredSinceLastSync; m_textureUploadData.m_resourceStoredSinceLastSync.clear(); - for (auto tmp : m_textureData.m_resourceStoredSinceLastSync) - { - std::get<2>(tmp)->Release(); - m_textureData.m_getPos = std::get<0>(tmp); - } + getNonCurrentResourceStorage().m_inUseTexture2D = m_textureData.m_resourceStoredSinceLastSync; m_textureData.m_resourceStoredSinceLastSync.clear(); m_frame->Flip(nullptr); } +D3D12GSRender::ResourceStorage& D3D12GSRender::getCurrentResourceStorage() +{ + return m_perFrameStorage[m_swapChain->GetCurrentBackBufferIndex()]; +} + +D3D12GSRender::ResourceStorage& D3D12GSRender::getNonCurrentResourceStorage() +{ + return m_perFrameStorage[1 - m_swapChain->GetCurrentBackBufferIndex()]; +} + void D3D12GSRender::WriteDepthBuffer() { @@ -1059,7 +1084,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) m_readbackResources.m_putPos.store(heapOffset + sizeInByte); check( - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&convertCommandList)) + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&convertCommandList)) ); D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = {}; @@ -1125,7 +1150,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) if (needTransfer) { check( - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&downloadCommandList)) ); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index e4290301c2..6cab1f5559 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -80,12 +80,18 @@ private: struct ResourceStorage { + HANDLE m_frameFinished; ID3D12CommandAllocator *m_commandAllocator; ID3D12CommandAllocator *m_downloadCommandAllocator; std::list m_inflightCommandList; std::vector m_inflightResources; + std::vector > m_inUseConstantsBuffers; + std::vector > m_inUseVertexIndexBuffers; + std::vector > m_inUseTextureUploadBuffers; + std::vector > m_inUseTexture2D; + // Constants storage ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; size_t m_constantsBufferIndex; @@ -103,7 +109,9 @@ private: void Release(); }; - ResourceStorage m_perFrameStorage; + ResourceStorage m_perFrameStorage[2]; + ResourceStorage &getCurrentResourceStorage(); + ResourceStorage &getNonCurrentResourceStorage(); // Constants storage DataHeap m_constantsData; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 6ebea36b62..bca0279b52 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -22,8 +22,8 @@ void D3D12GSRender::InitDrawBuffers() u32 address_z = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); ID3D12GraphicsCommandList *copycmdlist; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); - m_perFrameStorage.m_inflightCommandList.push_back(copycmdlist); + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); + getCurrentResourceStorage().m_inflightCommandList.push_back(copycmdlist); // Make previous RTTs sampleable for (unsigned i = 0; i < 4; i++) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index e9ba950230..76b93611a7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -186,7 +186,7 @@ size_t D3D12GSRender::UploadTextures() { // Upload at each iteration to take advantage of overlapping transfer ID3D12GraphicsCommandList *commandList; - check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_perFrameStorage.m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); + check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); size_t heightInBlocks = (m_textures[i].GetHeight() + blockHeightInPixel - 1) / blockHeightInPixel; size_t widthInBlocks = (m_textures[i].GetWidth() + blockWidthInPixel - 1) / blockWidthInPixel; @@ -285,7 +285,7 @@ size_t D3D12GSRender::UploadTextures() commandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); - m_perFrameStorage.m_inflightCommandList.push_back(commandList); + getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); m_texturesCache[texaddr] = vramTexture; } @@ -389,8 +389,8 @@ size_t D3D12GSRender::UploadTextures() break; } - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_perFrameStorage.m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); // TODO : Correctly define sampler @@ -405,8 +405,8 @@ size_t D3D12GSRender::UploadTextures() samplerDesc.BorderColor[4] = (FLOAT)m_textures[i].GetBorderColor(); samplerDesc.MinLOD = (FLOAT)(m_textures[i].GetMinLOD() >> 8); samplerDesc.MaxLOD = (FLOAT)(m_textures[i].GetMaxLOD() >> 8); - Handle = m_perFrameStorage.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (m_perFrameStorage.m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); m_device->CreateSampler(&samplerDesc, Handle); usedTexture++; From 9e2dfcaba34df125db2999ba28acc7f6f00d1609 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 3 Jun 2015 00:02:18 +0200 Subject: [PATCH 199/343] d3d12: fix memleak --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 31 ++++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index b284e0b6a0..8b3a3434ee 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -85,6 +85,15 @@ void D3D12GSRender::ResourceStorage::Reset() m_currentScaleOffsetBufferIndex = 0; m_currentTextureIndex = 0; + for (auto tmp : m_inUseConstantsBuffers) + std::get<2>(tmp)->Release(); + for (auto tmp : m_inUseVertexIndexBuffers) + std::get<2>(tmp)->Release(); + for (auto tmp : m_inUseTextureUploadBuffers) + std::get<2>(tmp)->Release(); + for (auto tmp : m_inUseTexture2D) + std::get<2>(tmp)->Release(); + m_commandAllocator->Reset(); m_textureUploadCommandAllocator->Reset(); m_downloadCommandAllocator->Reset(); @@ -131,6 +140,14 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) void D3D12GSRender::ResourceStorage::Release() { // NOTE: Should be released only if no command are in flight ! + for (auto tmp : m_inUseConstantsBuffers) + std::get<2>(tmp)->Release(); + for (auto tmp : m_inUseVertexIndexBuffers) + std::get<2>(tmp)->Release(); + for (auto tmp : m_inUseTextureUploadBuffers) + std::get<2>(tmp)->Release(); + for (auto tmp : m_inUseTexture2D) + std::get<2>(tmp)->Release(); m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); @@ -896,7 +913,6 @@ void D3D12GSRender::Flip() m_commandQueueGraphic->Signal(fence.Get(), 1); // Flush - getCurrentResourceStorage().Reset(); m_texturesCache.clear(); m_texturesRTTs.clear(); @@ -906,25 +922,14 @@ void D3D12GSRender::Flip() CloseHandle(getNonCurrentResourceStorage().m_frameFinished); for (auto tmp : getNonCurrentResourceStorage().m_inUseConstantsBuffers) - { - std::get<2>(tmp)->Release(); m_constantsData.m_getPos = std::get<0>(tmp); - } for (auto tmp : getNonCurrentResourceStorage().m_inUseVertexIndexBuffers) - { - std::get<2>(tmp)->Release(); m_vertexIndexData.m_getPos = std::get<0>(tmp); - } for (auto tmp : getNonCurrentResourceStorage().m_inUseTextureUploadBuffers) - { - std::get<2>(tmp)->Release(); m_textureUploadData.m_getPos = std::get<0>(tmp); - } for (auto tmp : getNonCurrentResourceStorage().m_inUseTexture2D) - { - std::get<2>(tmp)->Release(); m_textureData.m_getPos = std::get<0>(tmp); - } + getNonCurrentResourceStorage().Reset(); } getNonCurrentResourceStorage().m_inUseConstantsBuffers = m_constantsData.m_resourceStoredSinceLastSync; From b63c77ca7d974fb7ade5c70be2cc476a462e559e Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 3 Jun 2015 00:25:51 +0200 Subject: [PATCH 200/343] d3d12: Swap current/non current right after present has been called --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 28 +++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 8b3a3434ee..09f193a984 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -908,37 +908,37 @@ void D3D12GSRender::Flip() // Add an event signaling queue completion Microsoft::WRL::ComPtr fence; m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)); - getCurrentResourceStorage().m_frameFinished = CreateEvent(0, 0, 0, 0); - fence->SetEventOnCompletion(1, getCurrentResourceStorage().m_frameFinished); + getNonCurrentResourceStorage().m_frameFinished = CreateEvent(0, 0, 0, 0); + fence->SetEventOnCompletion(1, getNonCurrentResourceStorage().m_frameFinished); m_commandQueueGraphic->Signal(fence.Get(), 1); // Flush m_texturesCache.clear(); m_texturesRTTs.clear(); - if (getNonCurrentResourceStorage().m_frameFinished) + if (getCurrentResourceStorage().m_frameFinished) { - WaitForSingleObject(getNonCurrentResourceStorage().m_frameFinished, INFINITE); - CloseHandle(getNonCurrentResourceStorage().m_frameFinished); + WaitForSingleObject(getCurrentResourceStorage().m_frameFinished, INFINITE); + CloseHandle(getCurrentResourceStorage().m_frameFinished); - for (auto tmp : getNonCurrentResourceStorage().m_inUseConstantsBuffers) + for (auto tmp : getCurrentResourceStorage().m_inUseConstantsBuffers) m_constantsData.m_getPos = std::get<0>(tmp); - for (auto tmp : getNonCurrentResourceStorage().m_inUseVertexIndexBuffers) + for (auto tmp : getCurrentResourceStorage().m_inUseVertexIndexBuffers) m_vertexIndexData.m_getPos = std::get<0>(tmp); - for (auto tmp : getNonCurrentResourceStorage().m_inUseTextureUploadBuffers) + for (auto tmp : getCurrentResourceStorage().m_inUseTextureUploadBuffers) m_textureUploadData.m_getPos = std::get<0>(tmp); - for (auto tmp : getNonCurrentResourceStorage().m_inUseTexture2D) + for (auto tmp : getCurrentResourceStorage().m_inUseTexture2D) m_textureData.m_getPos = std::get<0>(tmp); - getNonCurrentResourceStorage().Reset(); + getCurrentResourceStorage().Reset(); } - getNonCurrentResourceStorage().m_inUseConstantsBuffers = m_constantsData.m_resourceStoredSinceLastSync; + getCurrentResourceStorage().m_inUseConstantsBuffers = m_constantsData.m_resourceStoredSinceLastSync; m_constantsData.m_resourceStoredSinceLastSync.clear(); - getNonCurrentResourceStorage().m_inUseVertexIndexBuffers = m_vertexIndexData.m_resourceStoredSinceLastSync; + getCurrentResourceStorage().m_inUseVertexIndexBuffers = m_vertexIndexData.m_resourceStoredSinceLastSync; m_vertexIndexData.m_resourceStoredSinceLastSync.clear(); - getNonCurrentResourceStorage().m_inUseTextureUploadBuffers = m_textureUploadData.m_resourceStoredSinceLastSync; + getCurrentResourceStorage().m_inUseTextureUploadBuffers = m_textureUploadData.m_resourceStoredSinceLastSync; m_textureUploadData.m_resourceStoredSinceLastSync.clear(); - getNonCurrentResourceStorage().m_inUseTexture2D = m_textureData.m_resourceStoredSinceLastSync; + getCurrentResourceStorage().m_inUseTexture2D = m_textureData.m_resourceStoredSinceLastSync; m_textureData.m_resourceStoredSinceLastSync.clear(); m_frame->Flip(nullptr); From ac352cd083d803cd7145361dc7475da93a466a92 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 3 Jun 2015 18:57:07 +0200 Subject: [PATCH 201/343] d3d12: Use custom loadvertex like call --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 28 ++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 09f193a984..b67d2771a7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -706,13 +706,35 @@ void D3D12GSRender::ExecCMD() { InitDrawBuffers(); + // Init vertex count + // TODO: Very hackish, clean this + if (m_indexed_array.m_count) + { + for (u32 i = 0; i < m_vertex_count; ++i) + { + if (!m_vertex_data[i].IsEnabled()) continue; + if (!m_vertex_data[i].addr) continue; + + const u32 tsize = m_vertex_data[i].GetTypeSize(); + m_vertex_data[i].data.resize((m_indexed_array.index_min + m_indexed_array.index_max - m_indexed_array.index_min + 1) * tsize * m_vertex_data[i].size); + } + } + else + { + for (u32 i = 0; i < m_vertex_count; ++i) + { + if (!m_vertex_data[i].IsEnabled()) continue; + if (!m_vertex_data[i].addr) continue; + + const u32 tsize = m_vertex_data[i].GetTypeSize(); + m_vertex_data[i].data.resize((m_draw_array_first + m_draw_array_count) * tsize * m_vertex_data[i].size); + } + } + ID3D12GraphicsCommandList *commandList; m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); - if (m_indexed_array.m_count) - LoadVertexData(m_indexed_array.index_min, m_indexed_array.index_max - m_indexed_array.index_min + 1); - if (m_indexed_array.m_count || m_draw_array_count) { const std::pair, D3D12_INDEX_BUFFER_VIEW> &vertexIndexBufferViews = EnableVertexData(m_indexed_array.m_count ? true : false); From a5fb8c95f4696990529c2fa6ca895272a08371b2 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 3 Jun 2015 20:30:20 +0200 Subject: [PATCH 202/343] d3d12: Fix interframe sync --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 19 ++++++++++--------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 ++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index b67d2771a7..dc0e04ccf5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -107,7 +107,7 @@ void D3D12GSRender::ResourceStorage::Reset() void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) { - m_frameFinished = 0; + m_frameFinishedHandle = 0; // Create a global command allocator device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_textureUploadCommandAllocator)); @@ -928,20 +928,21 @@ void D3D12GSRender::Flip() check(m_swapChain->Present(Ini.GSVSyncEnable.GetValue() ? 1 : 0, 0)); // Add an event signaling queue completion - Microsoft::WRL::ComPtr fence; - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)); - getNonCurrentResourceStorage().m_frameFinished = CreateEvent(0, 0, 0, 0); - fence->SetEventOnCompletion(1, getNonCurrentResourceStorage().m_frameFinished); - m_commandQueueGraphic->Signal(fence.Get(), 1); + + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&getNonCurrentResourceStorage().m_frameFinishedFence)); + getNonCurrentResourceStorage().m_frameFinishedHandle = CreateEvent(0, 0, 0, 0); + getNonCurrentResourceStorage().m_frameFinishedFence->SetEventOnCompletion(1, getNonCurrentResourceStorage().m_frameFinishedHandle); + m_commandQueueGraphic->Signal(getNonCurrentResourceStorage().m_frameFinishedFence, 1); // Flush m_texturesCache.clear(); m_texturesRTTs.clear(); - if (getCurrentResourceStorage().m_frameFinished) + if (getCurrentResourceStorage().m_frameFinishedHandle) { - WaitForSingleObject(getCurrentResourceStorage().m_frameFinished, INFINITE); - CloseHandle(getCurrentResourceStorage().m_frameFinished); + WaitForSingleObject(getCurrentResourceStorage().m_frameFinishedHandle, INFINITE); + CloseHandle(getCurrentResourceStorage().m_frameFinishedHandle); + getCurrentResourceStorage().m_frameFinishedFence->Release(); for (auto tmp : getCurrentResourceStorage().m_inUseConstantsBuffers) m_constantsData.m_getPos = std::get<0>(tmp); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 6cab1f5559..f0f36a225c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -80,7 +80,8 @@ private: struct ResourceStorage { - HANDLE m_frameFinished; + ID3D12Fence* m_frameFinishedFence; + HANDLE m_frameFinishedHandle; ID3D12CommandAllocator *m_commandAllocator; ID3D12CommandAllocator *m_downloadCommandAllocator; std::list m_inflightCommandList; From a751a06d01dbab0505dd2b243c57ada9e3424659 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 01:26:25 +0200 Subject: [PATCH 203/343] d3d12: Try not to overcommit texture memory --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 76b93611a7..7323dad7d5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -250,6 +250,9 @@ size_t D3D12GSRender::UploadTextures() } Texture->Unmap(0, nullptr); + size_t powerOf2Height = log2(heightInBlocks) + 1; + textureSize = rowPitch * powerOf2Height; + assert(m_textureData.canAlloc(textureSize * 2)); size_t heapOffset2 = m_textureData.alloc(textureSize * 2); From d886fd55d3d9712552b894c8cdbeb709dcddec3b Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 01:32:12 +0200 Subject: [PATCH 204/343] d3d12: Fix for size calculation --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 7323dad7d5..1dc4244744 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -251,10 +251,10 @@ size_t D3D12GSRender::UploadTextures() Texture->Unmap(0, nullptr); size_t powerOf2Height = log2(heightInBlocks) + 1; - textureSize = rowPitch * powerOf2Height; + textureSize = rowPitch * (1 << powerOf2Height); - assert(m_textureData.canAlloc(textureSize * 2)); - size_t heapOffset2 = m_textureData.alloc(textureSize * 2); + assert(m_textureData.canAlloc(textureSize)); + size_t heapOffset2 = m_textureData.alloc(textureSize); check(m_device->CreatePlacedResource( m_textureData.m_heap, @@ -264,7 +264,7 @@ size_t D3D12GSRender::UploadTextures() nullptr, IID_PPV_ARGS(&vramTexture) )); - m_textureData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset2, textureSize * 2, vramTexture)); + m_textureData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset2, textureSize, vramTexture)); D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; dst.pResource = vramTexture; From 593b9a494d243e8f1f77c8e87be69a81eb21e511 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 01:35:01 +0200 Subject: [PATCH 205/343] d3d12: Double texture storage size --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index dc0e04ccf5..025358125d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -396,7 +396,7 @@ D3D12GSRender::D3D12GSRender() m_constantsData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); m_vertexIndexData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); m_textureUploadData.Init(m_device, 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); - m_textureData.Init(m_device, 1024 * 1024 * 256, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES); + m_textureData.Init(m_device, 1024 * 1024 * 512, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES); } D3D12GSRender::~D3D12GSRender() From 76d52b4bb33a19e554ec5c11bf501b8583ee31f8 Mon Sep 17 00:00:00 2001 From: raven02 Date: Thu, 4 Jun 2015 20:51:46 +0800 Subject: [PATCH 206/343] d3d12: Implement A4R4G4B4 texture format with byte swapped Make the guided fate paradox works --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 53 ++++++++++++++++++---------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 1dc4244744..562188b37d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -100,7 +100,6 @@ size_t D3D12GSRender::UploadTextures() switch (format) { case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_G8B8: case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_DEPTH24_D8: @@ -122,6 +121,11 @@ size_t D3D12GSRender::UploadTextures() default: LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); break; + case CELL_GCM_TEXTURE_A4R4G4B4: + dxgiFormat = DXGI_FORMAT_B4G4R4A4_UNORM; + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; case CELL_GCM_TEXTURE_R5G6B5: dxgiFormat = DXGI_FORMAT_B5G6R5_UNORM; blockSizeInByte = 2; @@ -218,35 +222,48 @@ size_t D3D12GSRender::UploadTextures() { size_t m_texture_pitch = m_textures[i].m_pitch; if (!m_texture_pitch) m_texture_pitch = rowPitch; - if (format == CELL_GCM_TEXTURE_A8R8G8B8 && is_swizzled) + switch (format) { - u32 *src, *dst; - u32 log2width, log2height; - - src = (u32*)pixels; - dst = (u32*)textureData; - - log2width = (u32)(logf(m_textures[i].GetWidth()) / logf(2.f)); - log2height = (u32)(logf(m_textures[i].GetHeight()) / logf(2.f)); - - for (int j = 0; j < m_textures[i].GetWidth(); j++) + case CELL_GCM_TEXTURE_A8R8G8B8: + { + if (is_swizzled) { - dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; + u32 *src, *dst; + u32 log2width, log2height; + + src = (u32*)pixels; + dst = (u32*)textureData; + + log2width = (u32)(logf(m_textures[i].GetWidth()) / logf(2.f)); + log2height = (u32)(logf(m_textures[i].GetHeight()) / logf(2.f)); + + for (int j = 0; j < m_textures[i].GetWidth(); j++) + { + dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; + } } + else + streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + break; } - else if (format == CELL_GCM_TEXTURE_R5G6B5) + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: { - unsigned short *dst = (unsigned short *)textureData, - *src = (unsigned short *)pixels; + unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; for (int j = 0; j < m_textures[i].GetWidth(); j++) { u16 tmp = src[row * m_texture_pitch / 2 + j]; dst[row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); } + break; } - else + default: + { streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + break; + } + } } Texture->Unmap(0, nullptr); @@ -300,7 +317,6 @@ size_t D3D12GSRender::UploadTextures() switch (format) { case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_G8B8: case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_DEPTH24_D8: @@ -322,6 +338,7 @@ size_t D3D12GSRender::UploadTextures() default: LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); break; + case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; From a2b8d3a885ec87d1b07ac919d5a5a9d509439d81 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 01:48:32 +0200 Subject: [PATCH 207/343] d3d12: Fix memleak --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 025358125d..fee152295b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -160,6 +160,8 @@ void D3D12GSRender::ResourceStorage::Release() m_commandAllocator->Release(); m_textureUploadCommandAllocator->Release(); m_downloadCommandAllocator->Release(); + CloseHandle(m_frameFinishedHandle); + m_frameFinishedFence->Release(); } // 32 bits float to U8 unorm CS From 25c09c508890977ab5c7ba54a8c20eb082d0ccb7 Mon Sep 17 00:00:00 2001 From: raven02 Date: Thu, 4 Jun 2015 23:21:49 +0800 Subject: [PATCH 208/343] d3d12: Add Blend Op --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 59 +++++++++++++++++---------- rpcs3/Emu/RSX/GCM.h | 58 +++++++++++++++++++------- 2 files changed, 81 insertions(+), 36 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index fee152295b..21246d4d25 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -531,27 +531,42 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } -static D3D12_BLEND_OP getBlendOp() +static D3D12_BLEND_OP getBlendOp(u16 op) { - return D3D12_BLEND_OP_ADD; + switch (op) + { + case CELL_GCM_FUNC_ADD: return D3D12_BLEND_OP_ADD; + case CELL_GCM_FUNC_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; + case CELL_GCM_FUNC_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; + case CELL_GCM_MIN: return D3D12_BLEND_OP_MIN; + case CELL_GCM_MAX: return D3D12_BLEND_OP_MAX; + case CELL_GCM_FUNC_ADD_SIGNED: + case CELL_GCM_FUNC_REVERSE_ADD_SIGNED: + case CELL_GCM_FUNC_REVERSE_SUBTRACT_SIGNED: + LOG_WARNING(RSX, "Unsupported Blend Op %d", op); + } } -static D3D12_BLEND getBlendFactor(u16 glFactor) +static D3D12_BLEND getBlendFactor(u16 factor) { - switch (glFactor) + switch (factor) { - default: LOG_WARNING(RSX, "Unsupported Blend Op %d", glFactor); - case GL_ZERO: return D3D12_BLEND_ZERO; - case GL_ONE: return D3D12_BLEND_ONE; - case GL_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; - case GL_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; - case GL_DST_COLOR: return D3D12_BLEND_DEST_COLOR; - case GL_ONE_MINUS_DST_COLOR: D3D12_BLEND_INV_DEST_COLOR; - case GL_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; - case GL_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; - case GL_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; - case GL_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; - case GL_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + case CELL_GCM_ZERO: return D3D12_BLEND_ZERO; + case CELL_GCM_ONE: return D3D12_BLEND_ONE; + case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; + case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; + case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; + case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; + case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; + case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; + case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_COLOR; + case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; + case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + case CELL_GCM_CONSTANT_COLOR: + case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: + case CELL_GCM_CONSTANT_ALPHA: + case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: + LOG_WARNING(RSX, "Unsupported Blend Factor %d", factor); } } @@ -633,21 +648,23 @@ bool D3D12GSRender::LoadProgram() }; prop.Blend = CD3D12_BLEND_DESC; + if (m_set_blend) + { + prop.Blend.RenderTarget[0].BlendEnable = true; + } + if (m_set_blend_equation) { -// glBlendEquationSeparate(m_blend_equation_rgb, m_blend_equation_alpha); -// checkForGlError("glBlendEquationSeparate"); + prop.Blend.RenderTarget[0].BlendOp = getBlendOp(m_blend_equation_rgb); + prop.Blend.RenderTarget[0].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); } if (m_set_blend_sfactor && m_set_blend_dfactor) { - prop.Blend.RenderTarget[0].BlendEnable = true; - prop.Blend.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; prop.Blend.RenderTarget[0].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); prop.Blend.RenderTarget[0].DestBlend = getBlendFactor(m_blend_dfactor_rgb); prop.Blend.RenderTarget[0].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); prop.Blend.RenderTarget[0].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); - prop.Blend.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; } if (m_set_logic_op) diff --git a/rpcs3/Emu/RSX/GCM.h b/rpcs3/Emu/RSX/GCM.h index 7c694efc37..8772f27374 100644 --- a/rpcs3/Emu/RSX/GCM.h +++ b/rpcs3/Emu/RSX/GCM.h @@ -136,21 +136,49 @@ enum CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP = 8, // Logic Op - CELL_GCM_CLEAR = 1, - CELL_GCM_AND = 2, - CELL_GCM_AND_REVERSE = 3, - CELL_GCM_COPY = 4, - CELL_GCM_AND_INVERTED = 5, - CELL_GCM_NOOP = 6, - CELL_GCM_XOR = 7, - CELL_GCM_OR = 8, - CELL_GCM_NOR = 9, - CELL_GCM_EQUIV = 10, - CELL_GCM_INVERT = 11, - CELL_GCM_OR_REVERSE = 12, - CELL_GCM_COPY_INVERTED = 13, - CELL_GCM_OR_INVERTED = 14, - CELL_GCM_NAND = 15, + CELL_GCM_CLEAR = 0x1500, + CELL_GCM_AND = 0x1501, + CELL_GCM_AND_REVERSE = 0x1502, + CELL_GCM_COPY = 0x1503, + CELL_GCM_AND_INVERTED = 0x1504, + CELL_GCM_NOOP = 0x1505, + CELL_GCM_XOR = 0x1506, + CELL_GCM_OR = 0x1507, + CELL_GCM_NOR = 0x1508, + CELL_GCM_EQUIV = 0x1509, + CELL_GCM_INVERT = 0x150A, + CELL_GCM_OR_REVERSE = 0x150B, + CELL_GCM_COPY_INVERTED = 0x150C, + CELL_GCM_OR_INVERTED = 0x150D, + CELL_GCM_NAND = 0x150E, + CELL_GCM_SET = 0x150F, + + // Blend Op + CELL_GCM_FUNC_ADD = 0x8006, + CELL_GCM_MIN = 0x8007, + CELL_GCM_MAX = 0x8008, + CELL_GCM_FUNC_SUBTRACT = 0x800A, + CELL_GCM_FUNC_REVERSE_SUBTRACT = 0x800B, + CELL_GCM_FUNC_REVERSE_SUBTRACT_SIGNED = 0x0000F005, + CELL_GCM_FUNC_ADD_SIGNED = 0x0000F006, + CELL_GCM_FUNC_REVERSE_ADD_SIGNED = 0x0000F007, + + // Blend Factor + CELL_GCM_ZERO = 0, + CELL_GCM_ONE = 1, + CELL_GCM_SRC_COLOR = 0x0300, + CELL_GCM_ONE_MINUS_SRC_COLOR = 0x0301, + CELL_GCM_SRC_ALPHA = 0x0302, + CELL_GCM_ONE_MINUS_SRC_ALPHA = 0x0303, + CELL_GCM_DST_ALPHA = 0x0304, + CELL_GCM_ONE_MINUS_DST_ALPHA = 0x0305, + CELL_GCM_DST_COLOR = 0x0306, + CELL_GCM_ONE_MINUS_DST_COLOR = 0x0307, + CELL_GCM_SRC_ALPHA_SATURATE = 0x0308, + CELL_GCM_CONSTANT_COLOR = 0x8001, + CELL_GCM_ONE_MINUS_CONSTANT_COLOR = 0x8002, + CELL_GCM_CONSTANT_ALPHA = 0x8003, + CELL_GCM_ONE_MINUS_CONSTANT_ALPHA = 0x8004, }; // GCM Surface From f31282623a2d074aa5da0dc39730c14cbce73552 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 18:20:18 +0200 Subject: [PATCH 209/343] d3d12: Fix texture unswizzling --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 562188b37d..9fcdcce345 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -239,7 +239,7 @@ size_t D3D12GSRender::UploadTextures() for (int j = 0; j < m_textures[i].GetWidth(); j++) { - dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; + dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; } } else From 5882f9defb4c3302b298124060549ee6ba77466a Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 18:57:59 +0200 Subject: [PATCH 210/343] d3d12: Do not use texture pitch but compute it ourself Fix sonic cd splash screen --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 9fcdcce345..c159101ff8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -192,11 +192,10 @@ size_t D3D12GSRender::UploadTextures() ID3D12GraphicsCommandList *commandList; check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); - size_t heightInBlocks = (m_textures[i].GetHeight() + blockHeightInPixel - 1) / blockHeightInPixel; - size_t widthInBlocks = (m_textures[i].GetWidth() + blockWidthInPixel - 1) / blockWidthInPixel; + size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel; + size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel; // Multiple of 256 - size_t rowPitch = blockSizeInByte * widthInBlocks; - rowPitch = (rowPitch + 255) & ~255; + size_t rowPitch = powerOf2Align(blockSizeInByte * widthInBlocks, 256); ID3D12Resource *Texture; size_t textureSize = rowPitch * heightInBlocks; @@ -220,7 +219,7 @@ size_t D3D12GSRender::UploadTextures() // Upload with correct rowpitch for (unsigned row = 0; row < heightInBlocks; row++) { - size_t m_texture_pitch = m_textures[i].m_pitch; + size_t m_texture_pitch = powerOf2Align(w * blockSizeInByte, 4); if (!m_texture_pitch) m_texture_pitch = rowPitch; switch (format) { @@ -276,7 +275,7 @@ size_t D3D12GSRender::UploadTextures() check(m_device->CreatePlacedResource( m_textureData.m_heap, heapOffset2, - &getTexture2DResourceDesc(m_textures[i].GetWidth(), m_textures[i].GetHeight(), dxgiFormat), + &getTexture2DResourceDesc(w, h, dxgiFormat), D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&vramTexture) @@ -289,8 +288,8 @@ size_t D3D12GSRender::UploadTextures() src.pResource = Texture; src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; src.PlacedFootprint.Footprint.Depth = 1; - src.PlacedFootprint.Footprint.Width = m_textures[i].GetWidth(); - src.PlacedFootprint.Footprint.Height = m_textures[i].GetHeight(); + src.PlacedFootprint.Footprint.Width = w; + src.PlacedFootprint.Footprint.Height = h; src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; src.PlacedFootprint.Footprint.Format = dxgiFormat; From caf863be4d2433c55609838cad8f02ee3f39a9b9 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 19:03:24 +0200 Subject: [PATCH 211/343] d3d12: Fix crash at exit --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 21246d4d25..d518770e14 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -84,6 +84,8 @@ void D3D12GSRender::ResourceStorage::Reset() m_constantsBufferIndex = 0; m_currentScaleOffsetBufferIndex = 0; m_currentTextureIndex = 0; + m_frameFinishedFence = nullptr; + m_frameFinishedHandle = 0; for (auto tmp : m_inUseConstantsBuffers) std::get<2>(tmp)->Release(); @@ -160,8 +162,10 @@ void D3D12GSRender::ResourceStorage::Release() m_commandAllocator->Release(); m_textureUploadCommandAllocator->Release(); m_downloadCommandAllocator->Release(); - CloseHandle(m_frameFinishedHandle); - m_frameFinishedFence->Release(); + if (m_frameFinishedHandle) + CloseHandle(m_frameFinishedHandle); + if (m_frameFinishedFence) + m_frameFinishedFence->Release(); } // 32 bits float to U8 unorm CS From ce857ab1da8e95ffd1323dc2acd074c6619f0c19 Mon Sep 17 00:00:00 2001 From: raven02 Date: Fri, 22 May 2015 08:09:53 +0800 Subject: [PATCH 212/343] RSX: DP2A for fragment decompiler --- rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index 0416b4758a..43ebdba9b3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -38,7 +38,7 @@ std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) case FUNCTION::FUNCTION_DP2: return "dot($0.xy, $1.xy).xxxx"; case FUNCTION::FUNCTION_DP2A: - return ""; + return "(dot($0.xy, $1.xy) + $2.x).xxxx"; case FUNCTION::FUNCTION_DP3: return "dot($0.xyz, $1.xyz).xxxx"; case FUNCTION::FUNCTION_DP4: From cfe058dc82d36ad3077e076927498b3420f04e81 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 22:12:49 +0200 Subject: [PATCH 213/343] d3d12: Factorize texture upload code --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 319 ++++++++++++++++++--------- 1 file changed, 210 insertions(+), 109 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index c159101ff8..c9f93c14bc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -81,6 +81,215 @@ D3D12_TEXTURE_ADDRESS_MODE D3D12GSRender::GetWrap(size_t wrap) return D3D12_TEXTURE_ADDRESS_MODE_WRAP; } +/** + * Create a texture residing in default heap and generate uploads commands in commandList, + * using a temporary texture buffer. + */ +static +ID3D12Resource *uploadSingleTexture( + const RSXTexture &texture, + ID3D12Device *device, + ID3D12GraphicsCommandList *commandList, + DataHeap &textureBuffersHeap, + DataHeap &textureHeap) +{ + ID3D12Resource *vramTexture; + size_t w = texture.GetWidth(), h = texture.GetHeight(); + DXGI_FORMAT dxgiFormat; + size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel; + int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + + const u32 texaddr = GetAddress(texture.GetOffset(), texture.GetLocation()); + + bool is_swizzled = !(texture.GetFormat() & CELL_GCM_TEXTURE_LN); + switch (format) + { + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + case CELL_GCM_TEXTURE_X32_FLOAT: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + default: + LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); + break; + case CELL_GCM_TEXTURE_A4R4G4B4: + dxgiFormat = DXGI_FORMAT_B4G4R4A4_UNORM; + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_R5G6B5: + dxgiFormat = DXGI_FORMAT_B5G6R5_UNORM; + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_D8R8G8B8: + dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_A8R8G8B8: + dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + dxgiFormat = DXGI_FORMAT_BC1_UNORM; + blockSizeInByte = 8; + blockWidthInPixel = 4, blockHeightInPixel = 4; + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + dxgiFormat = DXGI_FORMAT_BC2_UNORM; + blockSizeInByte = 16; + blockWidthInPixel = 4, blockHeightInPixel = 4; + break; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + dxgiFormat = DXGI_FORMAT_BC3_UNORM; + blockSizeInByte = 16; + blockWidthInPixel = 4, blockHeightInPixel = 4; + break; + case CELL_GCM_TEXTURE_B8: + dxgiFormat = DXGI_FORMAT_R8_UNORM; + blockSizeInByte = 1; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + dxgiFormat = DXGI_FORMAT_G8R8_G8B8_UNORM; + blockSizeInByte = 4; + blockWidthInPixel = 2, blockHeightInPixel = 2; + break; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + dxgiFormat = DXGI_FORMAT_R8G8_B8G8_UNORM; + blockSizeInByte = 4; + blockWidthInPixel = 2, blockHeightInPixel = 2; + break; + } + + size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel; + size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel; + // Multiple of 256 + size_t rowPitch = powerOf2Align(blockSizeInByte * widthInBlocks, 256); + + ID3D12Resource *Texture; + size_t textureSize = rowPitch * heightInBlocks; + assert(textureBuffersHeap.canAlloc(textureSize)); + size_t heapOffset = textureBuffersHeap.alloc(textureSize); + + check(device->CreatePlacedResource( + textureBuffersHeap.m_heap, + heapOffset, + &getBufferResourceDesc(textureSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&Texture) + )); + textureBuffersHeap.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, textureSize, Texture)); + + auto pixels = vm::get_ptr(texaddr); + void *textureData; + check(Texture->Map(0, nullptr, (void**)&textureData)); + + // Upload with correct rowpitch + for (unsigned row = 0; row < heightInBlocks; row++) + { + size_t m_texture_pitch = powerOf2Align(w * blockSizeInByte, 4); + if (!m_texture_pitch) m_texture_pitch = rowPitch; + switch (format) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + { + if (is_swizzled) + { + u32 *src, *dst; + u32 log2width, log2height; + + src = (u32*)pixels; + dst = (u32*)textureData; + + log2width = (u32)(logf(w) / logf(2.f)); + log2height = (u32)(logf(h) / logf(2.f)); + + for (int j = 0; j < w; j++) + { + dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; + } + } + else + streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + break; + } + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + { + unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; + + for (int j = 0; j < w; j++) + { + u16 tmp = src[row * m_texture_pitch / 2 + j]; + dst[row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); + } + break; + } + default: + { + streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + break; + } + } + } + Texture->Unmap(0, nullptr); + + size_t powerOf2Height = log2(heightInBlocks) + 1; + textureSize = rowPitch * (1 << powerOf2Height); + + assert(textureHeap.canAlloc(textureSize)); + size_t heapOffset2 = textureHeap.alloc(textureSize); + + check(device->CreatePlacedResource( + textureHeap.m_heap, + heapOffset2, + &getTexture2DResourceDesc(w, h, dxgiFormat), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&vramTexture) + )); + textureHeap.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset2, textureSize, vramTexture)); + + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + dst.pResource = vramTexture; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.pResource = Texture; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.PlacedFootprint.Footprint.Depth = 1; + src.PlacedFootprint.Footprint.Width = w; + src.PlacedFootprint.Footprint.Height = h; + src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; + src.PlacedFootprint.Footprint.Format = dxgiFormat; + + commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = vramTexture; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; + commandList->ResourceBarrier(1, &barrier); + return vramTexture; +} + size_t D3D12GSRender::UploadTextures() { size_t usedTexture = 0; @@ -192,115 +401,7 @@ size_t D3D12GSRender::UploadTextures() ID3D12GraphicsCommandList *commandList; check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); - size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel; - size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel; - // Multiple of 256 - size_t rowPitch = powerOf2Align(blockSizeInByte * widthInBlocks, 256); - - ID3D12Resource *Texture; - size_t textureSize = rowPitch * heightInBlocks; - assert(m_textureUploadData.canAlloc(textureSize)); - size_t heapOffset = m_textureUploadData.alloc(textureSize); - - check(m_device->CreatePlacedResource( - m_textureUploadData.m_heap, - heapOffset, - &getBufferResourceDesc(textureSize), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&Texture) - )); - m_textureUploadData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, textureSize, Texture)); - - auto pixels = vm::get_ptr(texaddr); - void *textureData; - check(Texture->Map(0, nullptr, (void**)&textureData)); - - // Upload with correct rowpitch - for (unsigned row = 0; row < heightInBlocks; row++) - { - size_t m_texture_pitch = powerOf2Align(w * blockSizeInByte, 4); - if (!m_texture_pitch) m_texture_pitch = rowPitch; - switch (format) - { - case CELL_GCM_TEXTURE_A8R8G8B8: - { - if (is_swizzled) - { - u32 *src, *dst; - u32 log2width, log2height; - - src = (u32*)pixels; - dst = (u32*)textureData; - - log2width = (u32)(logf(m_textures[i].GetWidth()) / logf(2.f)); - log2height = (u32)(logf(m_textures[i].GetHeight()) / logf(2.f)); - - for (int j = 0; j < m_textures[i].GetWidth(); j++) - { - dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; - } - } - else - streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); - break; - } - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G6B5: - { - unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; - - for (int j = 0; j < m_textures[i].GetWidth(); j++) - { - u16 tmp = src[row * m_texture_pitch / 2 + j]; - dst[row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); - } - break; - } - default: - { - streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); - break; - } - } - } - Texture->Unmap(0, nullptr); - - size_t powerOf2Height = log2(heightInBlocks) + 1; - textureSize = rowPitch * (1 << powerOf2Height); - - assert(m_textureData.canAlloc(textureSize)); - size_t heapOffset2 = m_textureData.alloc(textureSize); - - check(m_device->CreatePlacedResource( - m_textureData.m_heap, - heapOffset2, - &getTexture2DResourceDesc(w, h, dxgiFormat), - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&vramTexture) - )); - m_textureData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset2, textureSize, vramTexture)); - - D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - dst.pResource = vramTexture; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = Texture; - src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src.PlacedFootprint.Footprint.Depth = 1; - src.PlacedFootprint.Footprint.Width = w; - src.PlacedFootprint.Footprint.Height = h; - src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; - src.PlacedFootprint.Footprint.Format = dxgiFormat; - - commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = vramTexture; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; - commandList->ResourceBarrier(1, &barrier); + vramTexture = uploadSingleTexture(m_textures[i], m_device, commandList, m_textureUploadData, m_textureData); commandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); From 459ab17d74170f969f1d69a0bce552d4a455eed9 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 22:32:46 +0200 Subject: [PATCH 214/343] d3d12: Use openMP to upload texture Does not really increase performance so far --- rpcs3/Emu/RSX/D3D12/D3D12.h | 9 +++++---- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 3 +-- rpcs3/emucore.vcxproj | 1 + rpcs3/rpcs3.vcxproj | 3 +++ 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index e23cb4f0de..71e94fa740 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -29,9 +29,9 @@ size_t powerOf2Align(size_t unalignedVal, size_t powerOf2) inline void streamToBuffer(void* dst, void* src, size_t sizeInBytes) { - for (unsigned i = 0; i < sizeInBytes / 16; i++) +#pragma omp parallel for + for (int i = 0; i < sizeInBytes / 16; i++) { - const __m128i &srcPtr = _mm_loadu_si128((__m128i*) ((char*)src + i * 16)); _mm_stream_si128((__m128i*)((char*)dst + i * 16), srcPtr); } @@ -45,9 +45,10 @@ inline void streamBuffer(void* dst, void* src, size_t sizeInBytes) { // Assume 64 bytes cache line - unsigned offset = 0; + int offset = 0; bool isAligned = !((size_t)src & 15); - for (; (offset + 64) < sizeInBytes; offset += 64) + #pragma omp parallel for + for (offset = 0; offset < sizeInBytes - 64; offset += 64) { char *line = (char*)src + offset; char *dstline = (char*)dst + offset; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index c9f93c14bc..d5d30cd603 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -222,10 +222,9 @@ ID3D12Resource *uploadSingleTexture( log2width = (u32)(logf(w) / logf(2.f)); log2height = (u32)(logf(h) / logf(2.f)); + #pragma omp parallel for for (int j = 0; j < w; j++) - { dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; - } } else streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index db6314a0ce..049f949613 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -758,6 +758,7 @@ true + true true diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 3ac2b0fd1a..742c4f112b 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -84,6 +84,7 @@ stdafx_gui.h $(IntDir)$(TargetName)_gui.pch _UNICODE;UNICODE;%(PreprocessorDefinitions);DX12_SUPPORT + true true @@ -114,6 +115,7 @@ Async stdafx_gui.h $(IntDir)$(TargetName)_gui.pch + true true @@ -150,6 +152,7 @@ true stdafx_gui.h $(IntDir)$(TargetName)_gui.pch + true Windows From 694d4e01a0af4ad40131c87d536ad0d586c232ec Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 22:43:57 +0200 Subject: [PATCH 215/343] d3d12: Use openMP for vertex buffer mapping and factorize function --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 122 +++++++++++++++------------- 1 file changed, 67 insertions(+), 55 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index f31d82984a..a24d2463c3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -183,6 +183,72 @@ std::vector FormatVertexData(RSXVertexData *m_vertex_data) return Result; } +/** + * Create a new vertex buffer with attributes from vbf using vertexIndexHeap as storage heap. + */ +static +ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVertexData *vertexData, ID3D12Device *device, DataHeap &vertexIndexHeap) +{ + size_t subBufferSize = vbf.range.second - vbf.range.first; + assert(vertexIndexHeap.canAlloc(subBufferSize)); + size_t heapOffset = vertexIndexHeap.alloc(subBufferSize); + + ID3D12Resource *vertexBuffer; + check(device->CreatePlacedResource( + vertexIndexHeap.m_heap, + heapOffset, + &getBufferResourceDesc(subBufferSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&vertexBuffer) + )); + void *bufferMap; + check(vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); + + #pragma omp parallel for + for (int vertex = 0; vertex < vbf.elementCount; vertex++) + { + for (size_t attributeId : vbf.attributeId) + { + if (!vertexData[attributeId].addr) continue; + size_t baseOffset = vertexData[attributeId].addr - vbf.range.first; + size_t tsize = vertexData[attributeId].GetTypeSize(); + size_t size = vertexData[attributeId].size; + auto src = vm::get_ptr(vertexData[attributeId].addr + vbf.stride * vertex); + char* dst = (char*)bufferMap + baseOffset + vbf.stride * vertex; + + switch (tsize) + { + case 1: + { + memcpy(dst, src, size); + break; + } + + case 2: + { + const u16* c_src = (const u16*)src; + u16* c_dst = (u16*)dst; + for (u32 j = 0; j < size; ++j) *c_dst++ = re16(*c_src++); + break; + } + + case 4: + { + const u32* c_src = (const u32*)src; + u32* c_dst = (u32*)dst; + for (u32 j = 0; j < size; ++j) *c_dst++ = re32(*c_src++); + break; + } + } + } + } + + vertexBuffer->Unmap(0, nullptr); + vertexIndexHeap.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, subBufferSize, vertexBuffer)); + return vertexBuffer; +} + std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12GSRender::EnableVertexData(bool indexed_draw) { std::pair, D3D12_INDEX_BUFFER_VIEW> result; @@ -194,63 +260,9 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G for (size_t buffer = 0; buffer < vertexBufferFormat.size(); buffer++) { const VertexBufferFormat &vbf = vertexBufferFormat[buffer]; - size_t subBufferSize = vbf.range.second - vbf.range.first; - assert(m_vertexIndexData.canAlloc(subBufferSize)); - size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); - ID3D12Resource *vertexBuffer; - check(m_device->CreatePlacedResource( - m_vertexIndexData.m_heap, - heapOffset, - &getBufferResourceDesc(subBufferSize), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&vertexBuffer) - )); - void *bufferMap; - check(vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); - - for (unsigned vertex = 0; vertex < vbf.elementCount; vertex++) - { - for (size_t attributeId : vbf.attributeId) - { - if (!m_vertex_data[attributeId].addr) continue; - size_t baseOffset = m_vertex_data[attributeId].addr - vbf.range.first; - size_t tsize = m_vertex_data[attributeId].GetTypeSize(); - size_t size = m_vertex_data[attributeId].size; - auto src = vm::get_ptr(m_vertex_data[attributeId].addr + vbf.stride * vertex); - char* dst = (char*)bufferMap + baseOffset + vbf.stride * vertex; - - switch (tsize) - { - case 1: - { - memcpy(dst, src, size); - break; - } - - case 2: - { - const u16* c_src = (const u16*)src; - u16* c_dst = (u16*)dst; - for (u32 j = 0; j < size; ++j) *c_dst++ = re16(*c_src++); - break; - } - - case 4: - { - const u32* c_src = (const u32*)src; - u32* c_dst = (u32*)dst; - for (u32 j = 0; j < size; ++j) *c_dst++ = re32(*c_src++); - break; - } - } - } - } - - vertexBuffer->Unmap(0, nullptr); - m_vertexIndexData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, subBufferSize, vertexBuffer)); + ID3D12Resource *vertexBuffer = createVertexBuffer(vbf, m_vertex_data, m_device, m_vertexIndexData); D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); From 744b56b12d1822077796695b20cd4edce40a4b2a Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 23:31:27 +0200 Subject: [PATCH 216/343] gl: Fix texture unswizzling Fix multi texture test --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 9b9ea7c484..7b9698c7de 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -184,9 +184,9 @@ void GLTexture::Init(RSXTexture& tex) log2width = log(tex.GetWidth()) / log(2); log2height = log(tex.GetHeight()) / log(2); - for (int i = 0; i < tex.GetHeight(); i++) + for (int i = 0; i < tex.GetWidth(); i++) { - for (int j = 0; j < tex.GetWidth(); j++) + for (int j = 0; j < tex.GetHeight(); j++) { dst[(i*tex.GetHeight()) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; } From ca90c05b0a73fec0f16a9df56d1b5ebe0e2c1a75 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 4 Jun 2015 23:45:45 +0200 Subject: [PATCH 217/343] gl : refix --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 7b9698c7de..3b6f1f43e1 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -184,11 +184,11 @@ void GLTexture::Init(RSXTexture& tex) log2width = log(tex.GetWidth()) / log(2); log2height = log(tex.GetHeight()) / log(2); - for (int i = 0; i < tex.GetWidth(); i++) + for (int i = 0; i < tex.GetHeight(); i++) { - for (int j = 0; j < tex.GetHeight(); j++) + for (int j = 0; j < tex.GetWidth(); j++) { - dst[(i*tex.GetHeight()) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; + dst[(i*tex.GetWidth()) + j] = src[LinearToSwizzleAddress(j, i, 0, log2width, log2height, 0)]; } } } From c3e19f34fae94d8aefb32fc2b22de07ed98ce73c Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 00:44:27 +0200 Subject: [PATCH 218/343] gl: sampler2d are constants --- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 50abd60569..5f6696c76e 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -155,6 +155,8 @@ void GLFragmentProgram::Decompile(RSXFragmentProgram& prog) { for (const ParamItem PI : PT.items) { + if (PT.type == "sampler2D") + continue; size_t offset = atoi(PI.name.c_str() + 2); FragmentConstantOffsetCache.push_back(offset); } From 22e67db0f240bf2ffa6d64635d2d4f388ad68066 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 20:47:22 +0200 Subject: [PATCH 219/343] d3d12: Add some others texture format --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 330 +++++++++++++++------------ 1 file changed, 179 insertions(+), 151 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index d5d30cd603..c3ec415954 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -81,6 +81,72 @@ D3D12_TEXTURE_ADDRESS_MODE D3D12GSRender::GetWrap(size_t wrap) return D3D12_TEXTURE_ADDRESS_MODE_WRAP; } +static +DXGI_FORMAT getDXGIFormat(int format) +{ + switch (format) + { + + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + default: + LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); + return DXGI_FORMAT(); + case CELL_GCM_TEXTURE_B8: + return DXGI_FORMAT_R8_UNORM; + case CELL_GCM_TEXTURE_A1R5G5B5: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case CELL_GCM_TEXTURE_A4R4G4B4: + return DXGI_FORMAT_B4G4R4A4_UNORM; + case CELL_GCM_TEXTURE_R5G6B5: + return DXGI_FORMAT_B5G6R5_UNORM; + case CELL_GCM_TEXTURE_A8R8G8B8: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + return DXGI_FORMAT_BC1_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + return DXGI_FORMAT_BC2_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + return DXGI_FORMAT_BC3_UNORM; + case CELL_GCM_TEXTURE_G8B8: + return DXGI_FORMAT_G8R8_G8B8_UNORM; + case CELL_GCM_TEXTURE_R6G5B5: + // Not native + return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_TEXTURE_DEPTH24_D8: + return DXGI_FORMAT_R32_UINT; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + case CELL_GCM_TEXTURE_DEPTH16: + return DXGI_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return DXGI_FORMAT_R16_FLOAT; + case CELL_GCM_TEXTURE_X16: + return DXGI_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_Y16_X16: + return DXGI_FORMAT_R16G16_UNORM; + case CELL_GCM_TEXTURE_R5G5B5A1: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + case CELL_GCM_TEXTURE_X32_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + case CELL_GCM_TEXTURE_D1R5G5B5: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case CELL_GCM_TEXTURE_D8R8G8B8: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + return DXGI_FORMAT_G8R8_G8B8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + return DXGI_FORMAT_R8G8_B8G8_UNORM; + } +} + /** * Create a texture residing in default heap and generate uploads commands in commandList, * using a temporary texture buffer. @@ -95,30 +161,16 @@ ID3D12Resource *uploadSingleTexture( { ID3D12Resource *vramTexture; size_t w = texture.GetWidth(), h = texture.GetHeight(); - DXGI_FORMAT dxgiFormat; + size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel; int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + DXGI_FORMAT dxgiFormat = getDXGIFormat(format); const u32 texaddr = GetAddress(texture.GetOffset(), texture.GetLocation()); bool is_swizzled = !(texture.GetFormat() & CELL_GCM_TEXTURE_LN); switch (format) { - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_G8B8: - case CELL_GCM_TEXTURE_R6G5B5: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - case CELL_GCM_TEXTURE_X32_FLOAT: - case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: case CELL_GCM_TEXTURE_COMPRESSED_HILO8: case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: @@ -126,53 +178,104 @@ ID3D12Resource *uploadSingleTexture( default: LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); break; + case CELL_GCM_TEXTURE_B8: + blockSizeInByte = 1; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_A1R5G5B5: + blockSizeInByte = 2; + blockHeightInPixel = 1, blockWidthInPixel = 1; + break; case CELL_GCM_TEXTURE_A4R4G4B4: - dxgiFormat = DXGI_FORMAT_B4G4R4A4_UNORM; blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; break; case CELL_GCM_TEXTURE_R5G6B5: - dxgiFormat = DXGI_FORMAT_B5G6R5_UNORM; blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; break; - case CELL_GCM_TEXTURE_D8R8G8B8: - dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; case CELL_GCM_TEXTURE_A8R8G8B8: - dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; break; case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - dxgiFormat = DXGI_FORMAT_BC1_UNORM; blockSizeInByte = 8; blockWidthInPixel = 4, blockHeightInPixel = 4; break; case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - dxgiFormat = DXGI_FORMAT_BC2_UNORM; blockSizeInByte = 16; blockWidthInPixel = 4, blockHeightInPixel = 4; break; case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - dxgiFormat = DXGI_FORMAT_BC3_UNORM; blockSizeInByte = 16; blockWidthInPixel = 4, blockHeightInPixel = 4; break; - case CELL_GCM_TEXTURE_B8: - dxgiFormat = DXGI_FORMAT_R8_UNORM; - blockSizeInByte = 1; + case CELL_GCM_TEXTURE_G8B8: + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_R6G5B5: + // Not native + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_DEPTH24_D8: + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_DEPTH16: + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_X16: + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_Y16_X16: + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_R5G5B5A1: + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + blockSizeInByte = 8; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + blockSizeInByte = 16; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_X32_FLOAT: + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_D1R5G5B5: + blockSizeInByte = 2; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + blockSizeInByte = 4; + blockWidthInPixel = 1, blockHeightInPixel = 1; + break; + case CELL_GCM_TEXTURE_D8R8G8B8: + blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; break; case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - dxgiFormat = DXGI_FORMAT_G8R8_G8B8_UNORM; blockSizeInByte = 4; blockWidthInPixel = 2, blockHeightInPixel = 2; break; case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - dxgiFormat = DXGI_FORMAT_R8G8_B8G8_UNORM; blockSizeInByte = 4; blockWidthInPixel = 2, blockHeightInPixel = 2; break; @@ -300,86 +403,9 @@ size_t D3D12GSRender::UploadTextures() const u32 texaddr = GetAddress(m_textures[i].GetOffset(), m_textures[i].GetLocation()); - DXGI_FORMAT dxgiFormat; - size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel; int format = m_textures[i].GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - + DXGI_FORMAT dxgiFormat = getDXGIFormat(format); bool is_swizzled = !(m_textures[i].GetFormat() & CELL_GCM_TEXTURE_LN); - switch (format) - { - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_G8B8: - case CELL_GCM_TEXTURE_R6G5B5: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - case CELL_GCM_TEXTURE_X32_FLOAT: - case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - default: - LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); - break; - case CELL_GCM_TEXTURE_A4R4G4B4: - dxgiFormat = DXGI_FORMAT_B4G4R4A4_UNORM; - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_R5G6B5: - dxgiFormat = DXGI_FORMAT_B5G6R5_UNORM; - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_D8R8G8B8: - dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_A8R8G8B8: - dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - dxgiFormat = DXGI_FORMAT_BC1_UNORM; - blockSizeInByte = 8; - blockWidthInPixel = 4, blockHeightInPixel = 4; - break; - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - dxgiFormat = DXGI_FORMAT_BC2_UNORM; - blockSizeInByte = 16; - blockWidthInPixel = 4, blockHeightInPixel = 4; - break; - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - dxgiFormat = DXGI_FORMAT_BC3_UNORM; - blockSizeInByte = 16; - blockWidthInPixel = 4, blockHeightInPixel = 4; - break; - case CELL_GCM_TEXTURE_B8: - dxgiFormat = DXGI_FORMAT_R8_UNORM; - blockSizeInByte = 1; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - dxgiFormat = DXGI_FORMAT_G8R8_G8B8_UNORM; - blockSizeInByte = 4; - blockWidthInPixel = 2, blockHeightInPixel = 2; - break; - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - dxgiFormat = DXGI_FORMAT_R8G8_B8G8_UNORM; - blockSizeInByte = 4; - blockWidthInPixel = 2, blockHeightInPixel = 2; - break; - } ID3D12Resource *vramTexture; std::unordered_map::const_iterator ItRTT = m_rtts.m_renderTargets.find(texaddr); @@ -415,21 +441,6 @@ size_t D3D12GSRender::UploadTextures() switch (format) { - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_G8B8: - case CELL_GCM_TEXTURE_R6G5B5: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - case CELL_GCM_TEXTURE_X32_FLOAT: - case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: case CELL_GCM_TEXTURE_COMPRESSED_HILO8: case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: @@ -437,32 +448,18 @@ size_t D3D12GSRender::UploadTextures() default: LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); break; + case CELL_GCM_TEXTURE_B8: + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); + break; + case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; - case CELL_GCM_TEXTURE_D8R8G8B8: - { - const int RemapValue[4] = - { - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0 - }; - - u8 remap_a = m_textures[i].GetRemap() & 0x3; - u8 remap_r = (m_textures[i].GetRemap() >> 2) & 0x3; - u8 remap_g = (m_textures[i].GetRemap() >> 4) & 0x3; - u8 remap_b = (m_textures[i].GetRemap() >> 6) & 0x3; - - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( - RemapValue[remap_a], - RemapValue[remap_r], - RemapValue[remap_g], - RemapValue[remap_b]); - break; - } case CELL_GCM_TEXTURE_A8R8G8B8: { const int RemapValue[4] = @@ -491,14 +488,45 @@ size_t D3D12GSRender::UploadTextures() case CELL_GCM_TEXTURE_COMPRESSED_DXT1: case CELL_GCM_TEXTURE_COMPRESSED_DXT23: case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + case CELL_GCM_TEXTURE_X32_FLOAT: + case CELL_GCM_TEXTURE_D1R5G5B5: srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; - case CELL_GCM_TEXTURE_B8: - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + case CELL_GCM_TEXTURE_D8R8G8B8: + { + const int RemapValue[4] = + { D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0 + }; + + u8 remap_a = m_textures[i].GetRemap() & 0x3; + u8 remap_r = (m_textures[i].GetRemap() >> 2) & 0x3; + u8 remap_g = (m_textures[i].GetRemap() >> 4) & 0x3; + u8 remap_b = (m_textures[i].GetRemap() >> 6) & 0x3; + + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + RemapValue[remap_a], + RemapValue[remap_r], + RemapValue[remap_g], + RemapValue[remap_b]); + break; + } + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; From e4435a93080770514299a4a33b71282593f721b7 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 20:53:36 +0200 Subject: [PATCH 220/343] DPH disappeared when merging, need to merge commit with the initial one --- rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp | 2 ++ rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index 43ebdba9b3..d996e4ab43 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -43,6 +43,8 @@ std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) return "dot($0.xyz, $1.xyz).xxxx"; case FUNCTION::FUNCTION_DP4: return "dot($0, $1).xxxx"; + case FUNCTION::FUNCTION_DPH: + return "dot(float4($0.xyz, 1.0), $1).xxxx"; case FUNCTION::FUNCTION_SFL: return "float4(0., 0., 0., 0.)"; case FUNCTION::FUNCTION_STR: diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 4b606da907..bedd8f0b9a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -36,6 +36,8 @@ std::string D3D12VertexProgramDecompiler::getFunction(enum class FUNCTION f) return "dot($0.xyz, $1.xyz).xxxx"; case FUNCTION::FUNCTION_DP4: return "dot($0, $1).xxxx"; + case FUNCTION::FUNCTION_DPH: + return "dot(float4($0.xyz, 1.0), $1).xxxx"; case FUNCTION::FUNCTION_SFL: return "float4(0., 0., 0., 0.)"; case FUNCTION::FUNCTION_STR: From 381260a4932af19a962a6f4bbffbd0db36ec828c Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 21:36:56 +0200 Subject: [PATCH 221/343] d3d12: Check IALayout equality in pipeline state --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 854e095038..003a5335fc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -21,7 +21,23 @@ struct D3D12PipelineProperties bool operator==(const D3D12PipelineProperties &in) const { - // TODO: blend and IASet equality + if (IASet.size() != in.IASet.size()) + return false; + for (unsigned i = 0; i < IASet.size(); i++) + { + const D3D12_INPUT_ELEMENT_DESC &a = IASet[i], &b = in.IASet[i]; + if (a.AlignedByteOffset != b.AlignedByteOffset) + return false; + if (a.Format != b.Format) + return false; + if (a.InputSlot != b.InputSlot) + return false; + if (a.InstanceDataStepRate != b.InstanceDataStepRate) + return false; + if (a.SemanticIndex != b.SemanticIndex) + return false; + } + // TODO: blend return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT && depthEnabled == in.depthEnabled; } }; From e3b73f9ea0bfca78979a8402dbee37048e5b38cf Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 21:57:44 +0200 Subject: [PATCH 222/343] d3d12: Fix vertex error in stencil reflect --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index a24d2463c3..9194587b54 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -94,7 +94,7 @@ struct VertexBufferFormat size_t stride; }; -std::vector getIALayout(ID3D12Device *device, const std::vector &vertexBufferFormat, const RSXVertexData *vertexData) +std::vector getIALayout(ID3D12Device *device, const std::vector &vertexBufferFormat, const RSXVertexData *m_vertex_data) { std::vector result; @@ -102,12 +102,13 @@ std::vector getIALayout(ID3D12Device *device, const st { for (size_t attributeId : vertexBufferFormat[inputSlot].attributeId) { + const RSXVertexData &vertexData = m_vertex_data[attributeId]; D3D12_INPUT_ELEMENT_DESC IAElement = {}; IAElement.SemanticName = "TEXCOORD"; IAElement.SemanticIndex = (UINT)attributeId; IAElement.InputSlot = (UINT)inputSlot; - IAElement.Format = getFormat(vertexData[attributeId].type - 1, vertexData[attributeId].size); - IAElement.AlignedByteOffset = (UINT)(vertexData[attributeId].addr - vertexBufferFormat[inputSlot].range.first); + IAElement.Format = getFormat(vertexData.type - 1, vertexData.size); + IAElement.AlignedByteOffset = (UINT)(vertexData.addr - vertexBufferFormat[inputSlot].range.first); result.push_back(IAElement); } } @@ -148,17 +149,19 @@ bool overlaps(const std::pair &range1, const std::pair FormatVertexData(RSXVertexData *m_vertex_data) +std::vector FormatVertexData(const RSXVertexData *m_vertex_data) { std::vector Result; for (size_t i = 0; i < 32; ++i) { - if (!m_vertex_data[i].IsEnabled()) continue; - size_t elementCount = m_vertex_data[i].data.size() / (m_vertex_data[i].size * m_vertex_data[i].GetTypeSize()); + const RSXVertexData &vertexData = m_vertex_data[i]; + if (!vertexData.IsEnabled()) continue; + + size_t elementCount = vertexData.data.size() / (vertexData.size * vertexData.GetTypeSize()); // If there is a single element, stride is 0, use the size of element instead - size_t stride = m_vertex_data[i].stride; - size_t elementSize = m_vertex_data[i].GetTypeSize(); - std::pair range = std::make_pair(m_vertex_data[i].addr, m_vertex_data[i].addr + elementSize + elementCount * stride); + size_t stride = vertexData.stride; + size_t elementSize = vertexData.GetTypeSize(); + std::pair range = std::make_pair(vertexData.addr, vertexData.addr + elementSize * vertexData.size + (elementCount - 1) * stride - 1); bool isMerged = false; for (VertexBufferFormat &vbf : Result) @@ -189,7 +192,7 @@ std::vector FormatVertexData(RSXVertexData *m_vertex_data) static ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVertexData *vertexData, ID3D12Device *device, DataHeap &vertexIndexHeap) { - size_t subBufferSize = vbf.range.second - vbf.range.first; + size_t subBufferSize = vbf.range.second - vbf.range.first + 1; assert(vertexIndexHeap.canAlloc(subBufferSize)); size_t heapOffset = vertexIndexHeap.alloc(subBufferSize); @@ -260,7 +263,7 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G for (size_t buffer = 0; buffer < vertexBufferFormat.size(); buffer++) { const VertexBufferFormat &vbf = vertexBufferFormat[buffer]; - size_t subBufferSize = vbf.range.second - vbf.range.first; + size_t subBufferSize = vbf.range.second - vbf.range.first + 1; ID3D12Resource *vertexBuffer = createVertexBuffer(vbf, m_vertex_data, m_device, m_vertexIndexData); From 02f15810ee13593bf829843a41d5b97ef7d7ccc3 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 22:45:27 +0200 Subject: [PATCH 223/343] d3d12: Implement stencil states Unfortunatly it doesnt solve stencil reflect test atm --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 70 +++++++++++++++++++++++- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 20 ++----- 2 files changed, 73 insertions(+), 17 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index d518770e14..19977e39ab 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -597,6 +597,48 @@ static D3D12_LOGIC_OP getLogicOp(u32 op) } } +static D3D12_STENCIL_OP getStencilOp(u32 op) +{ + switch (op) + { + case GL_KEEP: + return D3D12_STENCIL_OP_KEEP; + case GL_ZERO: + return D3D12_STENCIL_OP_ZERO; + case GL_REPLACE: + return D3D12_STENCIL_OP_REPLACE; + case GL_INCR: + return D3D12_STENCIL_OP_INCR; + case GL_DECR: + return D3D12_STENCIL_OP_DECR; + case GL_INVERT: + return D3D12_STENCIL_OP_INVERT; + } +} + +static D3D12_COMPARISON_FUNC getStencilFunc(u32 op) +{ + switch (op) + { + case GL_NEVER: + return D3D12_COMPARISON_FUNC_NEVER; + case GL_LESS: + return D3D12_COMPARISON_FUNC_LESS; + case GL_LEQUAL: + return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case GL_GREATER: + return D3D12_COMPARISON_FUNC_GREATER; + case GL_GEQUAL: + return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case GL_EQUAL: + return D3D12_COMPARISON_FUNC_EQUAL; + case GL_NOTEQUAL: + return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case GL_ALWAYS: + return D3D12_COMPARISON_FUNC_ALWAYS; + } +} + bool D3D12GSRender::LoadProgram() { if (!m_cur_fragment_prog) @@ -717,7 +759,32 @@ bool D3D12GSRender::LoadProgram() LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); } - prop.depthEnabled = m_set_depth_test; + prop.DepthStencil.DepthEnable = m_set_depth_test; + prop.DepthStencil.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + prop.DepthStencil.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; + prop.DepthStencil.StencilEnable = m_set_stencil_test; + prop.DepthStencil.StencilReadMask = m_stencil_func_mask; + prop.DepthStencil.StencilWriteMask = m_set_stencil_mask; + prop.DepthStencil.FrontFace.StencilPassOp = getStencilOp(m_stencil_zpass); + prop.DepthStencil.FrontFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); + prop.DepthStencil.FrontFace.StencilFailOp = getStencilOp(m_stencil_fail); + prop.DepthStencil.FrontFace.StencilFunc = getStencilFunc(m_stencil_func); + + if (m_set_two_sided_stencil_test_enable) + { + prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_stencil_fail); + prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_stencil_func); + prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_stencil_zpass); + prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); + } + else + { + prop.DepthStencil.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_NEVER; + prop.DepthStencil.BackFace.StencilFailOp = D3D12_STENCIL_OP_KEEP; + prop.DepthStencil.BackFace.StencilPassOp = D3D12_STENCIL_OP_KEEP; + prop.DepthStencil.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; + } + prop.IASet = m_IASet; @@ -774,6 +841,7 @@ void D3D12GSRender::ExecCMD() } commandList->SetGraphicsRootSignature(m_rootSignatures[m_PSO->second]); + commandList->OMSetStencilRef(m_stencil_func_ref); // Constants setScaleOffset(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 003a5335fc..fb774b38f6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -17,7 +17,7 @@ struct D3D12PipelineProperties std::vector IASet; D3D12_BLEND_DESC Blend; unsigned numMRT : 3; - bool depthEnabled : 1; + D3D12_DEPTH_STENCIL_DESC DepthStencil; bool operator==(const D3D12PipelineProperties &in) const { @@ -37,8 +37,8 @@ struct D3D12PipelineProperties if (a.SemanticIndex != b.SemanticIndex) return false; } - // TODO: blend - return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT && depthEnabled == in.depthEnabled; + // TODO: blend and depth stencil + return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT; } }; @@ -156,23 +156,11 @@ struct D3D12Traits D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, }; - static D3D12_DEPTH_STENCIL_DESC CD3D12_DEPTH_STENCIL_DESC = - { - TRUE, - D3D12_DEPTH_WRITE_MASK_ALL, - D3D12_COMPARISON_FUNC_LESS_EQUAL, - FALSE, - D3D12_DEFAULT_STENCIL_READ_MASK, - D3D12_DEFAULT_STENCIL_WRITE_MASK, - }; - graphicPipelineStateDesc.BlendState = pipelineProperties.Blend; - graphicPipelineStateDesc.DepthStencilState = CD3D12_DEPTH_STENCIL_DESC; + graphicPipelineStateDesc.DepthStencilState = pipelineProperties.DepthStencil; graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; graphicPipelineStateDesc.PrimitiveTopologyType = pipelineProperties.Topology; - graphicPipelineStateDesc.DepthStencilState.DepthEnable = pipelineProperties.depthEnabled; - graphicPipelineStateDesc.NumRenderTargets = pipelineProperties.numMRT; for (unsigned i = 0; i < pipelineProperties.numMRT; i++) graphicPipelineStateDesc.RTVFormats[i] = DXGI_FORMAT_R8G8B8A8_UNORM; From 64a555cacad391691f0fd6d5c57881e4d67dd0ad Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 23:18:29 +0200 Subject: [PATCH 224/343] d3d12: Write data to single vertex attrib stencil reflect has some color but it's still broken. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 9194587b54..9a5ea9c234 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -213,7 +213,11 @@ ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVerte { for (size_t attributeId : vbf.attributeId) { - if (!vertexData[attributeId].addr) continue; + if (!vertexData[attributeId].addr) + { + memcpy(bufferMap, vertexData[attributeId].data.data(), vertexData[attributeId].data.size()); + continue; + } size_t baseOffset = vertexData[attributeId].addr - vbf.range.first; size_t tsize = vertexData[attributeId].GetTypeSize(); size_t size = vertexData[attributeId].size; From 5f46b32616b1457bdb58d972cf5b32fae5e7b9a3 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 23:27:43 +0200 Subject: [PATCH 225/343] d3d12: Use instancing to pass constant vertex attribute Maybe a bit hackish but it works. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 9a5ea9c234..8e07aab7b4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -109,6 +109,8 @@ std::vector getIALayout(ID3D12Device *device, const st IAElement.InputSlot = (UINT)inputSlot; IAElement.Format = getFormat(vertexData.type - 1, vertexData.size); IAElement.AlignedByteOffset = (UINT)(vertexData.addr - vertexBufferFormat[inputSlot].range.first); + IAElement.InputSlotClass = (vertexData.addr > 0) ? D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA : D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; + IAElement.InstanceDataStepRate = (vertexData.addr > 0) ? 0 : 0; result.push_back(IAElement); } } From b2ad49bac80a92037a4d1db050159d9acfca26ef Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 23:41:29 +0200 Subject: [PATCH 226/343] d3d12: Check blend and depth stencil state when evaluating pso equality --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index fb774b38f6..20c9e30be3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -37,7 +37,11 @@ struct D3D12PipelineProperties if (a.SemanticIndex != b.SemanticIndex) return false; } - // TODO: blend and depth stencil + + if (memcmp(&DepthStencil, &in.DepthStencil, sizeof(D3D12_DEPTH_STENCIL_DESC))) + return false; + if (memcmp(&Blend, &in.Blend, sizeof(D3D12_BLEND_DESC))) + return false; return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT; } }; From d564c2524129d667e37069e49a872278b63a9af6 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 23:51:19 +0200 Subject: [PATCH 227/343] d3d12: Use real depth function too --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 19977e39ab..9116b20f6c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -639,6 +639,29 @@ static D3D12_COMPARISON_FUNC getStencilFunc(u32 op) } } +static D3D12_COMPARISON_FUNC getDepthFunc(u32 op) +{ + switch (op) + { + case GL_NEVER: + return D3D12_COMPARISON_FUNC_NEVER; + case GL_LESS: + return D3D12_COMPARISON_FUNC_LESS; + case GL_LEQUAL: + return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case GL_GREATER: + return D3D12_COMPARISON_FUNC_GREATER; + case GL_GEQUAL: + return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case GL_EQUAL: + return D3D12_COMPARISON_FUNC_EQUAL; + case GL_NOTEQUAL: + return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case GL_ALWAYS: + return D3D12_COMPARISON_FUNC_ALWAYS; + } +} + bool D3D12GSRender::LoadProgram() { if (!m_cur_fragment_prog) @@ -761,7 +784,7 @@ bool D3D12GSRender::LoadProgram() prop.DepthStencil.DepthEnable = m_set_depth_test; prop.DepthStencil.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; - prop.DepthStencil.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; + prop.DepthStencil.DepthFunc = getDepthFunc(m_depth_func); prop.DepthStencil.StencilEnable = m_set_stencil_test; prop.DepthStencil.StencilReadMask = m_stencil_func_mask; prop.DepthStencil.StencilWriteMask = m_set_stencil_mask; From c73d19161b293379878ea841d33e2da72b19e6b5 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 5 Jun 2015 23:58:25 +0200 Subject: [PATCH 228/343] d3d12: Disable depth/color buffer write if not requested Doesnt really increase performance... --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 170 +++++++++++++------------- 1 file changed, 87 insertions(+), 83 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 9116b20f6c..052e0509ca 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1162,9 +1162,6 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) // Except when a semaphore is written by RSX -/* if (!Ini.GSDumpDepthBuffer.GetValue()) - return;*/ - ID3D12Fence *fence; check( m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) @@ -1178,9 +1175,10 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) size_t depthRowPitch = RSXThread::m_width; depthRowPitch = (depthRowPitch + 255) & ~255; - bool needTransfer = m_set_context_dma_z || m_set_context_dma_color_a || m_set_context_dma_color_b || m_set_context_dma_color_c || m_set_context_dma_color_d; + bool needTransfer = (m_set_context_dma_z && Ini.GSDumpDepthBuffer.GetValue()) || + ((m_set_context_dma_color_a || m_set_context_dma_color_b || m_set_context_dma_color_c || m_set_context_dma_color_d) && Ini.GSDumpColorBuffers.GetValue()); - if (m_set_context_dma_z) + if (m_set_context_dma_z && Ini.GSDumpDepthBuffer.GetValue()) { D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; @@ -1296,7 +1294,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) ); } - if (m_set_context_dma_z) + if (m_set_context_dma_z && Ini.GSDumpDepthBuffer.GetValue()) { // Copy D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; @@ -1314,36 +1312,39 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) } ID3D12Resource *rtt0, *rtt1, *rtt2, *rtt3; - switch (m_surface_color_target) + if (Ini.GSDumpColorBuffers.GetValue()) { - case CELL_GCM_SURFACE_TARGET_NONE: - break; + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_NONE: + break; - case CELL_GCM_SURFACE_TARGET_0: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); - break; + case CELL_GCM_SURFACE_TARGET_0: + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); + break; - case CELL_GCM_SURFACE_TARGET_1: - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); - break; + case CELL_GCM_SURFACE_TARGET_1: + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); + break; - case CELL_GCM_SURFACE_TARGET_MRT1: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); - break; + case CELL_GCM_SURFACE_TARGET_MRT1: + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); + break; - case CELL_GCM_SURFACE_TARGET_MRT2: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); - if (m_context_dma_color_c) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); - break; + case CELL_GCM_SURFACE_TARGET_MRT2: + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); + if (m_context_dma_color_c) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); + break; - case CELL_GCM_SURFACE_TARGET_MRT3: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); - if (m_context_dma_color_c) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); - if (m_context_dma_color_d) rtt3 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[3], downloadCommandList); - break; + case CELL_GCM_SURFACE_TARGET_MRT3: + if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], downloadCommandList); + if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], downloadCommandList); + if (m_context_dma_color_c) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], downloadCommandList); + if (m_context_dma_color_d) rtt3 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[3], downloadCommandList); + break; + } } if (needTransfer) { @@ -1359,7 +1360,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) CloseHandle(handle); fence->Release(); - if (m_set_context_dma_z) + if (m_set_context_dma_z && Ini.GSDumpDepthBuffer.GetValue()) { u32 address = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); auto ptr = vm::get_ptr(address); @@ -1387,68 +1388,71 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) size_t colorRowPitch = RSXThread::m_width * 4; colorRowPitch = (colorRowPitch + 255) & ~255; - switch (m_surface_color_target) + if (Ini.GSDumpColorBuffers.GetValue()) { - case CELL_GCM_SURFACE_TARGET_NONE: + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_NONE: + break; + + case CELL_GCM_SURFACE_TARGET_0: + { + u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } break; - case CELL_GCM_SURFACE_TARGET_0: - { - u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - } + case CELL_GCM_SURFACE_TARGET_1: + { + u32 address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } break; - case CELL_GCM_SURFACE_TARGET_1: - { - u32 address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); - void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - } + case CELL_GCM_SURFACE_TARGET_MRT1: + { + u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } break; - case CELL_GCM_SURFACE_TARGET_MRT1: - { - u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); - dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - } + case CELL_GCM_SURFACE_TARGET_MRT2: + { + u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } break; - case CELL_GCM_SURFACE_TARGET_MRT2: - { - u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); - dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); - dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - } - break; - - case CELL_GCM_SURFACE_TARGET_MRT3: - { - u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); - dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); - dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - address = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); - dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt3, colorRowPitch, RSXThread::m_width, RSXThread::m_height); - } + case CELL_GCM_SURFACE_TARGET_MRT3: + { + u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); + void *dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + address = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); + dstAddress = vm::get_ptr(address); + copyToCellRamAndRelease(dstAddress, rtt3, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + } break; + } } if (needTransfer) From b465992178df642565b83aaaa73dbda4f986585b Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 02:02:05 +0200 Subject: [PATCH 229/343] d3d12: Fix stencil op Thanks to raven02 for noticing them ! --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 052e0509ca..4fed3862b2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -783,11 +783,11 @@ bool D3D12GSRender::LoadProgram() } prop.DepthStencil.DepthEnable = m_set_depth_test; - prop.DepthStencil.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + prop.DepthStencil.DepthWriteMask = m_depth_mask ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; prop.DepthStencil.DepthFunc = getDepthFunc(m_depth_func); prop.DepthStencil.StencilEnable = m_set_stencil_test; prop.DepthStencil.StencilReadMask = m_stencil_func_mask; - prop.DepthStencil.StencilWriteMask = m_set_stencil_mask; + prop.DepthStencil.StencilWriteMask = m_stencil_mask; prop.DepthStencil.FrontFace.StencilPassOp = getStencilOp(m_stencil_zpass); prop.DepthStencil.FrontFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); prop.DepthStencil.FrontFace.StencilFailOp = getStencilOp(m_stencil_fail); @@ -795,10 +795,10 @@ bool D3D12GSRender::LoadProgram() if (m_set_two_sided_stencil_test_enable) { - prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_stencil_fail); - prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_stencil_func); - prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_stencil_zpass); - prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); + prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_back_stencil_fail); + prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_back_stencil_func); + prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_back_stencil_zpass); + prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_back_stencil_zfail); } else { From acb8f82f84b22d03f7f5b10d3754a3f8b7298d5f Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 02:28:41 +0200 Subject: [PATCH 230/343] d3d12: Add front/back face culling setting --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 46 +++++++++++++++++++++--- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 21 +++-------- 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 4fed3862b2..a06b3da8c2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -802,12 +802,50 @@ bool D3D12GSRender::LoadProgram() } else { - prop.DepthStencil.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_NEVER; - prop.DepthStencil.BackFace.StencilFailOp = D3D12_STENCIL_OP_KEEP; - prop.DepthStencil.BackFace.StencilPassOp = D3D12_STENCIL_OP_KEEP; - prop.DepthStencil.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; + prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_stencil_zpass); + prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); + prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_stencil_fail); + prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_stencil_func); } + // Sensible default value + static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = + { + D3D12_FILL_MODE_SOLID, + D3D12_CULL_MODE_NONE, + FALSE, + D3D12_DEFAULT_DEPTH_BIAS, + D3D12_DEFAULT_DEPTH_BIAS_CLAMP, + D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, + TRUE, + FALSE, + FALSE, + 0, + D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, + }; + prop.Rasterization = CD3D12_RASTERIZER_DESC; + switch (m_set_cull_face) + { + case GL_FRONT: + prop.Rasterization.CullMode = D3D12_CULL_MODE_FRONT; + break; + case GL_BACK: + prop.Rasterization.CullMode = D3D12_CULL_MODE_BACK; + break; + default: + prop.Rasterization.CullMode = D3D12_CULL_MODE_NONE; + break; + } + + switch (m_front_face) + { + case GL_CW: + prop.Rasterization.FrontCounterClockwise = FALSE; + break; + case GL_CCW: + prop.Rasterization.FrontCounterClockwise = TRUE; + break; + } prop.IASet = m_IASet; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 20c9e30be3..8edd38f3a8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -18,6 +18,7 @@ struct D3D12PipelineProperties D3D12_BLEND_DESC Blend; unsigned numMRT : 3; D3D12_DEPTH_STENCIL_DESC DepthStencil; + D3D12_RASTERIZER_DESC Rasterization; bool operator==(const D3D12PipelineProperties &in) const { @@ -42,6 +43,8 @@ struct D3D12PipelineProperties return false; if (memcmp(&Blend, &in.Blend, sizeof(D3D12_BLEND_DESC))) return false; + if (memcmp(&Rasterization, &in.Rasterization, sizeof(D3D12_RASTERIZER_DESC))) + return false; return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT; } }; @@ -144,25 +147,9 @@ struct D3D12Traits graphicPipelineStateDesc.pRootSignature = extraData.second[fragmentProgramData.m_textureCount]; result->second = fragmentProgramData.m_textureCount; - // Sensible default value - static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = - { - D3D12_FILL_MODE_SOLID, - D3D12_CULL_MODE_NONE, - FALSE, - D3D12_DEFAULT_DEPTH_BIAS, - D3D12_DEFAULT_DEPTH_BIAS_CLAMP, - D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, - TRUE, - FALSE, - FALSE, - 0, - D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, - }; - graphicPipelineStateDesc.BlendState = pipelineProperties.Blend; graphicPipelineStateDesc.DepthStencilState = pipelineProperties.DepthStencil; - graphicPipelineStateDesc.RasterizerState = CD3D12_RASTERIZER_DESC; + graphicPipelineStateDesc.RasterizerState = pipelineProperties.Rasterization; graphicPipelineStateDesc.PrimitiveTopologyType = pipelineProperties.Topology; graphicPipelineStateDesc.NumRenderTargets = pipelineProperties.numMRT; From bdeb08e04523d6142544b523564703395b647bad Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 17:42:21 +0200 Subject: [PATCH 231/343] d3d12: Add color masking --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 ++ rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a06b3da8c2..fd5c63a9f7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -847,6 +847,8 @@ bool D3D12GSRender::LoadProgram() break; } + prop.SampleMask = m_color_mask_r | (m_color_mask_g << 1) | (m_color_mask_b << 2) | (m_color_mask_a << 3); + prop.IASet = m_IASet; m_PSO = m_cachePSO.getGraphicPipelineState(m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignatures)); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 8edd38f3a8..0b0326b806 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -19,6 +19,7 @@ struct D3D12PipelineProperties unsigned numMRT : 3; D3D12_DEPTH_STENCIL_DESC DepthStencil; D3D12_RASTERIZER_DESC Rasterization; + UINT SampleMask; bool operator==(const D3D12PipelineProperties &in) const { @@ -45,7 +46,7 @@ struct D3D12PipelineProperties return false; if (memcmp(&Rasterization, &in.Rasterization, sizeof(D3D12_RASTERIZER_DESC))) return false; - return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT; + return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT && SampleMask == in.SampleMask; } }; @@ -160,7 +161,7 @@ struct D3D12Traits graphicPipelineStateDesc.InputLayout.pInputElementDescs = pipelineProperties.IASet.data(); graphicPipelineStateDesc.InputLayout.NumElements = (UINT)pipelineProperties.IASet.size(); graphicPipelineStateDesc.SampleDesc.Count = 1; - graphicPipelineStateDesc.SampleMask = UINT_MAX; + graphicPipelineStateDesc.SampleMask = pipelineProperties.SampleMask; graphicPipelineStateDesc.NodeMask = 1; extraData.first->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result->first)); From 27e56b61992fcdf01f75be67bbe5bfb69912b101 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 17:58:00 +0200 Subject: [PATCH 232/343] d3d12: Do not set mask if not requested --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index fd5c63a9f7..7f4327174e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -847,7 +847,10 @@ bool D3D12GSRender::LoadProgram() break; } - prop.SampleMask = m_color_mask_r | (m_color_mask_g << 1) | (m_color_mask_b << 2) | (m_color_mask_a << 3); + if (m_set_color_mask) + prop.SampleMask = m_color_mask_r | (m_color_mask_g << 1) | (m_color_mask_b << 2) | (m_color_mask_a << 3); + else + prop.SampleMask = UINT_MAX; prop.IASet = m_IASet; From 8474cd8064c5b8b7fd4740bcc553701cc6115e1b Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 17:58:10 +0200 Subject: [PATCH 233/343] d3d12: Fix pitch for compressed textures --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 34 ++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index c3ec415954..80fe70c446 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -169,6 +169,7 @@ ID3D12Resource *uploadSingleTexture( const u32 texaddr = GetAddress(texture.GetOffset(), texture.GetLocation()); bool is_swizzled = !(texture.GetFormat() & CELL_GCM_TEXTURE_LN); + size_t srcPitch; switch (format) { case CELL_GCM_TEXTURE_COMPRESSED_HILO8: @@ -181,103 +182,128 @@ ID3D12Resource *uploadSingleTexture( case CELL_GCM_TEXTURE_B8: blockSizeInByte = 1; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w; break; case CELL_GCM_TEXTURE_A1R5G5B5: blockSizeInByte = 2; blockHeightInPixel = 1, blockWidthInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_A4R4G4B4: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_R5G6B5: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_A8R8G8B8: blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_COMPRESSED_DXT1: blockSizeInByte = 8; blockWidthInPixel = 4, blockHeightInPixel = 4; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_COMPRESSED_DXT23: blockSizeInByte = 16; blockWidthInPixel = 4, blockHeightInPixel = 4; + srcPitch = w * 8; break; case CELL_GCM_TEXTURE_COMPRESSED_DXT45: blockSizeInByte = 16; blockWidthInPixel = 4, blockHeightInPixel = 4; + srcPitch = w * 8; break; case CELL_GCM_TEXTURE_G8B8: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_R6G5B5: // Not native blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_DEPTH24_D8: blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_DEPTH16: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_DEPTH16_FLOAT: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_X16: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_Y16_X16: blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_R5G5B5A1: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: blockSizeInByte = 8; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 8; break; case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: blockSizeInByte = 16; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 16; break; case CELL_GCM_TEXTURE_X32_FLOAT: blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_D1R5G5B5: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_Y16_X16_FLOAT: blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_D8R8G8B8: blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: blockSizeInByte = 4; blockWidthInPixel = 2, blockHeightInPixel = 2; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: blockSizeInByte = 4; blockWidthInPixel = 2, blockHeightInPixel = 2; + srcPitch = w * 4; break; } @@ -308,8 +334,6 @@ ID3D12Resource *uploadSingleTexture( // Upload with correct rowpitch for (unsigned row = 0; row < heightInBlocks; row++) { - size_t m_texture_pitch = powerOf2Align(w * blockSizeInByte, 4); - if (!m_texture_pitch) m_texture_pitch = rowPitch; switch (format) { case CELL_GCM_TEXTURE_A8R8G8B8: @@ -330,7 +354,7 @@ ID3D12Resource *uploadSingleTexture( dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; } else - streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * srcPitch, srcPitch); break; } case CELL_GCM_TEXTURE_A4R4G4B4: @@ -340,14 +364,14 @@ ID3D12Resource *uploadSingleTexture( for (int j = 0; j < w; j++) { - u16 tmp = src[row * m_texture_pitch / 2 + j]; + u16 tmp = src[row * srcPitch / 2 + j]; dst[row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); } break; } default: { - streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * srcPitch, srcPitch); break; } } From e72d098ea16ab789766fb198f1d5198d6d15b843 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 6 Jun 2015 17:43:51 +0800 Subject: [PATCH 234/343] d3d12: use CELL_GCM suffix --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 76 ++++++++++----------------- rpcs3/Emu/RSX/GCM.h | 27 ++++++++++ 2 files changed, 55 insertions(+), 48 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 7f4327174e..74b5769dfe 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -601,18 +601,14 @@ static D3D12_STENCIL_OP getStencilOp(u32 op) { switch (op) { - case GL_KEEP: - return D3D12_STENCIL_OP_KEEP; - case GL_ZERO: - return D3D12_STENCIL_OP_ZERO; - case GL_REPLACE: - return D3D12_STENCIL_OP_REPLACE; - case GL_INCR: - return D3D12_STENCIL_OP_INCR; - case GL_DECR: - return D3D12_STENCIL_OP_DECR; - case GL_INVERT: - return D3D12_STENCIL_OP_INVERT; + case CELL_GCM_KEEP: return D3D12_STENCIL_OP_KEEP; + case CELL_GCM_ZERO: return D3D12_STENCIL_OP_ZERO; + case CELL_GCM_REPLACE: return D3D12_STENCIL_OP_REPLACE; + case CELL_GCM_INCR: return D3D12_STENCIL_OP_INCR; + case CELL_GCM_DECR: return D3D12_STENCIL_OP_DECR; + case CELL_GCM_INCR_WRAP: + case CELL_GCM_DECR_WRAP: + LOG_WARNING(RSX, "Unsupported Stencil Op %d", op); } } @@ -620,22 +616,14 @@ static D3D12_COMPARISON_FUNC getStencilFunc(u32 op) { switch (op) { - case GL_NEVER: - return D3D12_COMPARISON_FUNC_NEVER; - case GL_LESS: - return D3D12_COMPARISON_FUNC_LESS; - case GL_LEQUAL: - return D3D12_COMPARISON_FUNC_LESS_EQUAL; - case GL_GREATER: - return D3D12_COMPARISON_FUNC_GREATER; - case GL_GEQUAL: - return D3D12_COMPARISON_FUNC_GREATER_EQUAL; - case GL_EQUAL: - return D3D12_COMPARISON_FUNC_EQUAL; - case GL_NOTEQUAL: - return D3D12_COMPARISON_FUNC_NOT_EQUAL; - case GL_ALWAYS: - return D3D12_COMPARISON_FUNC_ALWAYS; + case CELL_GCM_NEVER: return D3D12_COMPARISON_FUNC_NEVER; + case CELL_GCM_LESS: return D3D12_COMPARISON_FUNC_LESS; + case CELL_GCM_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL; + case CELL_GCM_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case CELL_GCM_GREATER: return D3D12_COMPARISON_FUNC_GREATER; + case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; } } @@ -643,22 +631,14 @@ static D3D12_COMPARISON_FUNC getDepthFunc(u32 op) { switch (op) { - case GL_NEVER: - return D3D12_COMPARISON_FUNC_NEVER; - case GL_LESS: - return D3D12_COMPARISON_FUNC_LESS; - case GL_LEQUAL: - return D3D12_COMPARISON_FUNC_LESS_EQUAL; - case GL_GREATER: - return D3D12_COMPARISON_FUNC_GREATER; - case GL_GEQUAL: - return D3D12_COMPARISON_FUNC_GREATER_EQUAL; - case GL_EQUAL: - return D3D12_COMPARISON_FUNC_EQUAL; - case GL_NOTEQUAL: - return D3D12_COMPARISON_FUNC_NOT_EQUAL; - case GL_ALWAYS: - return D3D12_COMPARISON_FUNC_ALWAYS; + case CELL_GCM_NEVER: return D3D12_COMPARISON_FUNC_NEVER; + case CELL_GCM_LESS: return D3D12_COMPARISON_FUNC_LESS; + case CELL_GCM_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL; + case CELL_GCM_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case CELL_GCM_GREATER: return D3D12_COMPARISON_FUNC_GREATER; + case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; } } @@ -826,10 +806,10 @@ bool D3D12GSRender::LoadProgram() prop.Rasterization = CD3D12_RASTERIZER_DESC; switch (m_set_cull_face) { - case GL_FRONT: + case CELL_GCM_FRONT: prop.Rasterization.CullMode = D3D12_CULL_MODE_FRONT; break; - case GL_BACK: + case CELL_GCM_BACK: prop.Rasterization.CullMode = D3D12_CULL_MODE_BACK; break; default: @@ -839,10 +819,10 @@ bool D3D12GSRender::LoadProgram() switch (m_front_face) { - case GL_CW: + case CELL_GCM_CW: prop.Rasterization.FrontCounterClockwise = FALSE; break; - case GL_CCW: + case CELL_GCM_CCW: prop.Rasterization.FrontCounterClockwise = TRUE; break; } diff --git a/rpcs3/Emu/RSX/GCM.h b/rpcs3/Emu/RSX/GCM.h index 8772f27374..0c9e1cff24 100644 --- a/rpcs3/Emu/RSX/GCM.h +++ b/rpcs3/Emu/RSX/GCM.h @@ -179,6 +179,33 @@ enum CELL_GCM_ONE_MINUS_CONSTANT_COLOR = 0x8002, CELL_GCM_CONSTANT_ALPHA = 0x8003, CELL_GCM_ONE_MINUS_CONSTANT_ALPHA = 0x8004, + + // Stencil/Depth Compare Function + CELL_GCM_NEVER = 0x0200, + CELL_GCM_LESS = 0x0201, + CELL_GCM_EQUAL = 0x0202, + CELL_GCM_LEQUAL = 0x0203, + CELL_GCM_GREATER = 0x0204, + CELL_GCM_NOTEQUAL = 0x0205, + CELL_GCM_GEQUAL = 0x0206, + CELL_GCM_ALWAYS = 0x0207, + + // Stencil Op + CELL_GCM_KEEP = 0x1E00, + CELL_GCM_REPLACE = 0x1E01, + CELL_GCM_INCR = 0x1E02, + CELL_GCM_DECR = 0x1E03, + CELL_GCM_INCR_WRAP = 0x8507, + CELL_GCM_DECR_WRAP = 0x8508, + + // Front Face + CELL_GCM_FRONT = 0x0404, + CELL_GCM_BACK = 0x0405, + CELL_GCM_FRONT_AND_BACK = 0x0408, + + // Cull Face + CELL_GCM_CW = 0x0900, + CELL_GCM_CCW = 0x0901, }; // GCM Surface From 4ef66e6901113a9d65bbc2941a3e286d8891a954 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 6 Jun 2015 18:22:33 +0800 Subject: [PATCH 235/343] d3d12: Add blend for MRT --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 49 +++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 74b5769dfe..fcf6e81aaf 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -700,12 +700,37 @@ bool D3D12GSRender::LoadProgram() if (m_set_blend) { prop.Blend.RenderTarget[0].BlendEnable = true; + + if (m_set_blend_mrt1) + prop.Blend.RenderTarget[1].BlendEnable = true; + if (m_set_blend_mrt2) + prop.Blend.RenderTarget[2].BlendEnable = true; + if (m_set_blend_mrt3) + prop.Blend.RenderTarget[3].BlendEnable = true; } if (m_set_blend_equation) { prop.Blend.RenderTarget[0].BlendOp = getBlendOp(m_blend_equation_rgb); prop.Blend.RenderTarget[0].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); + + if (m_set_blend_mrt1) + { + prop.Blend.RenderTarget[1].BlendOp = getBlendOp(m_blend_equation_rgb); + prop.Blend.RenderTarget[1].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); + } + + if (m_set_blend_mrt2) + { + prop.Blend.RenderTarget[2].BlendOp = getBlendOp(m_blend_equation_rgb); + prop.Blend.RenderTarget[2].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); + } + + if (m_set_blend_mrt3) + { + prop.Blend.RenderTarget[3].BlendOp = getBlendOp(m_blend_equation_rgb); + prop.Blend.RenderTarget[3].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); + } } if (m_set_blend_sfactor && m_set_blend_dfactor) @@ -714,6 +739,30 @@ bool D3D12GSRender::LoadProgram() prop.Blend.RenderTarget[0].DestBlend = getBlendFactor(m_blend_dfactor_rgb); prop.Blend.RenderTarget[0].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); prop.Blend.RenderTarget[0].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); + + if (m_set_blend_mrt1) + { + prop.Blend.RenderTarget[1].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); + prop.Blend.RenderTarget[1].DestBlend = getBlendFactor(m_blend_dfactor_rgb); + prop.Blend.RenderTarget[1].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); + prop.Blend.RenderTarget[1].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); + } + + if (m_set_blend_mrt2) + { + prop.Blend.RenderTarget[2].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); + prop.Blend.RenderTarget[2].DestBlend = getBlendFactor(m_blend_dfactor_rgb); + prop.Blend.RenderTarget[2].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); + prop.Blend.RenderTarget[2].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); + } + + if (m_set_blend_mrt3) + { + prop.Blend.RenderTarget[3].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); + prop.Blend.RenderTarget[3].DestBlend = getBlendFactor(m_blend_dfactor_rgb); + prop.Blend.RenderTarget[3].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); + prop.Blend.RenderTarget[3].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); + } } if (m_set_logic_op) From 68d3f6f536ba87b0629597daf4ecd8b23ac4be5e Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 6 Jun 2015 18:58:51 +0800 Subject: [PATCH 236/343] d3d12: minor cleanup --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index fcf6e81aaf..e48b1b247d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1066,7 +1066,6 @@ void D3D12GSRender::ExecCMD() check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); m_indexed_array.Reset(); - WriteDepthBuffer(); } void D3D12GSRender::Flip() @@ -1163,11 +1162,6 @@ D3D12GSRender::ResourceStorage& D3D12GSRender::getNonCurrentResourceStorage() return m_perFrameStorage[1 - m_swapChain->GetCurrentBackBufferIndex()]; } - -void D3D12GSRender::WriteDepthBuffer() -{ -} - ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12GraphicsCommandList * cmdlist) { ID3D12Resource *Result; From 33daa81e6f0d5f0acc27664db1b4733f2578eed9 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 18:14:35 +0200 Subject: [PATCH 237/343] d3d12: Fix some src pitch --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 80fe70c446..f5b75a22c6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -187,17 +187,17 @@ ID3D12Resource *uploadSingleTexture( case CELL_GCM_TEXTURE_A1R5G5B5: blockSizeInByte = 2; blockHeightInPixel = 1, blockWidthInPixel = 1; - srcPitch = w * 4; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_A4R4G4B4: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; - srcPitch = w * 4; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_R5G6B5: blockSizeInByte = 2; blockWidthInPixel = 1, blockHeightInPixel = 1; - srcPitch = w * 4; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_A8R8G8B8: blockSizeInByte = 4; @@ -228,7 +228,7 @@ ID3D12Resource *uploadSingleTexture( // Not native blockSizeInByte = 4; blockWidthInPixel = 1, blockHeightInPixel = 1; - srcPitch = w * 4; + srcPitch = w * 2; break; case CELL_GCM_TEXTURE_DEPTH24_D8: blockSizeInByte = 4; From b8ba9026573ce425f6e2b4725063a3c30716e76a Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 22:31:52 +0200 Subject: [PATCH 238/343] d3d12: Fix rtt size (and crash in render_to_texture test) --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 69 ++++++++++--------- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 24 +++---- 2 files changed, 47 insertions(+), 46 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index e48b1b247d..1592521cf2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1003,8 +1003,8 @@ void D3D12GSRender::ExecCMD() { 0.f, 0.f, - (float)RSXThread::m_width, - (float)RSXThread::m_height, + (float)m_surface_clip_w, + (float)m_surface_clip_h, -1.f, 1.f }; @@ -1012,7 +1012,7 @@ void D3D12GSRender::ExecCMD() D3D12_RECT box = { 0, 0, - (LONG)RSXThread::m_width, (LONG)RSXThread::m_height, + (LONG)m_surface_clip_w, (LONG)m_surface_clip_h, }; commandList->RSSetScissorRects(1, &box); @@ -1098,7 +1098,7 @@ void D3D12GSRender::Flip() src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; src.SubresourceIndex = 0, dst.SubresourceIndex = 0; src.pResource = m_rtts.m_currentlyBoundRenderTargets[0], dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; - D3D12_BOX box = { 0, 0, 0, RSXThread::m_width, RSXThread::m_height, 1 }; + D3D12_BOX box = { 0, 0, 0, m_surface_clip_w, m_surface_clip_h, 1 }; commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, &box); barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; @@ -1165,15 +1165,16 @@ D3D12GSRender::ResourceStorage& D3D12GSRender::getNonCurrentResourceStorage() ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12GraphicsCommandList * cmdlist) { ID3D12Resource *Result; - size_t rowPitch = RSXThread::m_width * 4; + size_t w = m_surface_clip_w, h = m_surface_clip_h; + size_t rowPitch = w * 4; rowPitch = (rowPitch + 255) & ~255; D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_READBACK; - D3D12_RESOURCE_DESC resdesc = getBufferResourceDesc(rowPitch * RSXThread::m_height); + D3D12_RESOURCE_DESC resdesc = getBufferResourceDesc(rowPitch * h); size_t heapOffset = powerOf2Align(m_readbackResources.m_putPos.load(), 65536); - size_t sizeInByte = rowPitch * RSXThread::m_height; + size_t sizeInByte = rowPitch * h; if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size heapOffset = 0; @@ -1201,8 +1202,8 @@ ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12Gra dst.PlacedFootprint.Offset = 0; dst.PlacedFootprint.Footprint.Depth = 1; dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - dst.PlacedFootprint.Footprint.Height = (UINT)RSXThread::m_height; - dst.PlacedFootprint.Footprint.Width = (UINT)RSXThread::m_width; + dst.PlacedFootprint.Footprint.Height = (UINT)h; + dst.PlacedFootprint.Footprint.Width = (UINT)w; dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; cmdlist->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); cmdlist->ResourceBarrier(1, &getResourceBarrierTransition(RTT, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); @@ -1238,7 +1239,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) ID3D12Resource *writeDest, *depthConverted; ID3D12GraphicsCommandList *convertCommandList; ID3D12DescriptorHeap *descriptorHeap; - size_t depthRowPitch = RSXThread::m_width; + size_t depthRowPitch = m_surface_clip_w; depthRowPitch = (depthRowPitch + 255) & ~255; bool needTransfer = (m_set_context_dma_z && Ini.GSDumpDepthBuffer.GetValue()) || @@ -1248,12 +1249,12 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; - D3D12_RESOURCE_DESC resdesc = getTexture2DResourceDesc(RSXThread::m_width, RSXThread::m_height, DXGI_FORMAT_R8_UNORM); + D3D12_RESOURCE_DESC resdesc = getTexture2DResourceDesc(m_surface_clip_w, m_surface_clip_h, DXGI_FORMAT_R8_UNORM); resdesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; size_t heapOffset = m_readbackResources.m_putPos.load(); heapOffset = powerOf2Align(heapOffset, 65536); - size_t sizeInByte = RSXThread::m_width * RSXThread::m_height; + size_t sizeInByte = m_surface_clip_w * m_surface_clip_h; if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size heapOffset = 0; @@ -1271,7 +1272,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) heapOffset = m_readbackResources.m_putPos.load(); heapOffset = powerOf2Align(heapOffset, 65536); - sizeInByte = depthRowPitch * RSXThread::m_height; + sizeInByte = depthRowPitch * m_surface_clip_h; if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size heapOffset = 0; @@ -1333,7 +1334,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) convertCommandList->SetComputeRootSignature(m_convertRootSignature); convertCommandList->SetDescriptorHeaps(1, &descriptorHeap); convertCommandList->SetComputeRootDescriptorTable(0, descriptorHeap->GetGPUDescriptorHandleForHeapStart()); - convertCommandList->Dispatch(RSXThread::m_width / 8, RSXThread::m_height / 8, 1); + convertCommandList->Dispatch(m_surface_clip_w / 8, m_surface_clip_h / 8, 1); // Flush UAV D3D12_RESOURCE_BARRIER uavbarrier = {}; @@ -1371,8 +1372,8 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) dst.PlacedFootprint.Offset = 0; dst.PlacedFootprint.Footprint.Depth = 1; dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8_UNORM; - dst.PlacedFootprint.Footprint.Height = RSXThread::m_height; - dst.PlacedFootprint.Footprint.Width = RSXThread::m_width; + dst.PlacedFootprint.Footprint.Height = m_surface_clip_h; + dst.PlacedFootprint.Footprint.Width = m_surface_clip_w; dst.PlacedFootprint.Footprint.RowPitch = (UINT)depthRowPitch; downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); } @@ -1434,15 +1435,15 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) unsigned char *writeDestPtr; check(writeDest->Map(0, nullptr, (void**)&writeDestPtr)); // TODO : this should be done by the gpu - for (unsigned row = 0; row < RSXThread::m_height; row++) + for (unsigned row = 0; row < m_surface_clip_h; row++) { - for (unsigned i = 0; i < RSXThread::m_width; i++) + for (unsigned i = 0; i < m_surface_clip_w; i++) { unsigned char c = writeDestPtr[row * depthRowPitch + i]; - ptrAsChar[4 * (row * RSXThread::m_width + i)] = c; - ptrAsChar[4 * (row * RSXThread::m_width + i) + 1] = c; - ptrAsChar[4 * (row * RSXThread::m_width + i) + 2] = c; - ptrAsChar[4 * (row * RSXThread::m_width + i) + 3] = c; + ptrAsChar[4 * (row * m_surface_clip_w + i)] = c; + ptrAsChar[4 * (row * m_surface_clip_w + i) + 1] = c; + ptrAsChar[4 * (row * m_surface_clip_w + i) + 2] = c; + ptrAsChar[4 * (row * m_surface_clip_w + i) + 3] = c; } } writeDest->Release(); @@ -1451,7 +1452,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) convertCommandList->Release(); } - size_t colorRowPitch = RSXThread::m_width * 4; + size_t colorRowPitch = m_surface_clip_w * 4; colorRowPitch = (colorRowPitch + 255) & ~255; if (Ini.GSDumpColorBuffers.GetValue()) @@ -1465,7 +1466,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, m_surface_clip_w, m_surface_clip_h); } break; @@ -1473,7 +1474,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, m_surface_clip_w, m_surface_clip_h); } break; @@ -1481,10 +1482,10 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, m_surface_clip_w, m_surface_clip_h); } break; @@ -1492,13 +1493,13 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, m_surface_clip_w, m_surface_clip_h); } break; @@ -1506,16 +1507,16 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt3, colorRowPitch, RSXThread::m_width, RSXThread::m_height); + copyToCellRamAndRelease(dstAddress, rtt3, colorRowPitch, m_surface_clip_w, m_surface_clip_h); } break; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index bca0279b52..767fa50b16 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -49,7 +49,7 @@ void D3D12GSRender::InitDrawBuffers() { case CELL_GCM_SURFACE_TARGET_0: { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; @@ -59,7 +59,7 @@ void D3D12GSRender::InitDrawBuffers() } case CELL_GCM_SURFACE_TARGET_1: { - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_b, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_b, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; @@ -69,14 +69,14 @@ void D3D12GSRender::InitDrawBuffers() } case CELL_GCM_SURFACE_TARGET_MRT1: { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; @@ -86,21 +86,21 @@ void D3D12GSRender::InitDrawBuffers() break; case CELL_GCM_SURFACE_TARGET_MRT2: { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; @@ -110,28 +110,28 @@ void D3D12GSRender::InitDrawBuffers() } case CELL_GCM_SURFACE_TARGET_MRT3: { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttC, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttD = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 3, address_d, RSXThread::m_width, RSXThread::m_height, + ID3D12Resource *rttD = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 3, address_d, m_surface_clip_w, m_surface_clip_h, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; @@ -141,7 +141,7 @@ void D3D12GSRender::InitDrawBuffers() } } - ID3D12Resource *ds = m_rtts.bindAddressAsDepthStencil(m_device, copycmdlist, address_z, RSXThread::m_width, RSXThread::m_height, m_surface_depth_format, 1., 0); + ID3D12Resource *ds = m_rtts.bindAddressAsDepthStencil(m_device, copycmdlist, address_z, m_surface_clip_w, m_surface_clip_h, m_surface_depth_format, 1., 0); D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {}; switch (m_surface_depth_format) From c2d3c857b654ec01f5ac0b22ab5591195774ee5e Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 23:01:30 +0200 Subject: [PATCH 239/343] d3d12: Handle w16Z16Y16X16 rtt format It doesnt fix completly render_to_texture test but it helps. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 41 +++++++-- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 5 +- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 85 +++++++++---------- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 2 +- 4 files changed, 77 insertions(+), 56 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 1592521cf2..950daef552 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -792,6 +792,16 @@ bool D3D12GSRender::LoadProgram() assert(0); } + switch (m_surface_color_format) + { + case CELL_GCM_SURFACE_A8R8G8B8: + prop.RenderTargetsFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case CELL_GCM_SURFACE_F_W16Z16Y16X16: + prop.RenderTargetsFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + } + switch (m_surface_color_target) { case CELL_GCM_SURFACE_TARGET_0: @@ -1166,8 +1176,19 @@ ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12Gra { ID3D12Resource *Result; size_t w = m_surface_clip_w, h = m_surface_clip_h; - size_t rowPitch = w * 4; - rowPitch = (rowPitch + 255) & ~255; + DXGI_FORMAT dxgiFormat; + size_t rowPitch; + switch (m_surface_color_format) + { + case CELL_GCM_SURFACE_A8R8G8B8: + dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + rowPitch = powerOf2Align(w * 4, 256); + break; + case CELL_GCM_SURFACE_F_W16Z16Y16X16: + dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; + rowPitch = powerOf2Align(w * 8, 256); + break; + } D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_READBACK; @@ -1201,7 +1222,7 @@ ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12Gra dst.pResource = Result; dst.PlacedFootprint.Offset = 0; dst.PlacedFootprint.Footprint.Depth = 1; - dst.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + dst.PlacedFootprint.Footprint.Format = dxgiFormat; dst.PlacedFootprint.Footprint.Height = (UINT)h; dst.PlacedFootprint.Footprint.Width = (UINT)w; dst.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; @@ -1216,7 +1237,7 @@ void copyToCellRamAndRelease(void *dstAddress, ID3D12Resource *res, size_t rowPi void *srcBuffer; check(res->Map(0, nullptr, &srcBuffer)); for (unsigned row = 0; row < height; row++) - memcpy((char*)dstAddress + row * width * 4, (char*)srcBuffer + row * rowPitch, width * 4); + memcpy((char*)dstAddress + row * rowPitch, (char*)srcBuffer + row * rowPitch, rowPitch); res->Unmap(0, nullptr); res->Release(); } @@ -1452,8 +1473,16 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) convertCommandList->Release(); } - size_t colorRowPitch = m_surface_clip_w * 4; - colorRowPitch = (colorRowPitch + 255) & ~255; + size_t colorRowPitch; + switch (m_surface_color_format) + { + case CELL_GCM_SURFACE_A8R8G8B8: + colorRowPitch = powerOf2Align(m_surface_clip_w * 4, 256); + break; + case CELL_GCM_SURFACE_F_W16Z16Y16X16: + colorRowPitch = powerOf2Align(m_surface_clip_w * 8, 256); + break; + } if (Ini.GSDumpColorBuffers.GetValue()) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 0b0326b806..dc49e0c56b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -14,6 +14,7 @@ struct D3D12PipelineProperties { D3D12_PRIMITIVE_TOPOLOGY_TYPE Topology; DXGI_FORMAT DepthStencilFormat; + DXGI_FORMAT RenderTargetsFormat; std::vector IASet; D3D12_BLEND_DESC Blend; unsigned numMRT : 3; @@ -46,7 +47,7 @@ struct D3D12PipelineProperties return false; if (memcmp(&Rasterization, &in.Rasterization, sizeof(D3D12_RASTERIZER_DESC))) return false; - return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT && SampleMask == in.SampleMask; + return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT && SampleMask == in.SampleMask && RenderTargetsFormat == in.RenderTargetsFormat; } }; @@ -155,7 +156,7 @@ struct D3D12Traits graphicPipelineStateDesc.NumRenderTargets = pipelineProperties.numMRT; for (unsigned i = 0; i < pipelineProperties.numMRT; i++) - graphicPipelineStateDesc.RTVFormats[i] = DXGI_FORMAT_R8G8B8A8_UNORM; + graphicPipelineStateDesc.RTVFormats[i] = pipelineProperties.RenderTargetsFormat; graphicPipelineStateDesc.DSVFormat = pipelineProperties.DepthStencilFormat; graphicPipelineStateDesc.InputLayout.pInputElementDescs = pipelineProperties.IASet.data(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 767fa50b16..6bcdfbc56f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -45,97 +45,78 @@ void D3D12GSRender::InitDrawBuffers() D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); size_t g_RTTIncrement = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + DXGI_FORMAT dxgiFormat; + switch (m_surface_color_format) + { + case CELL_GCM_SURFACE_A8R8G8B8: + dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case CELL_GCM_SURFACE_F_W16Z16Y16X16: + dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + } + D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; + rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rttViewDesc.Format = dxgiFormat; + switch (m_surface_color_target) { case CELL_GCM_SURFACE_TARGET_0: { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); break; } case CELL_GCM_SURFACE_TARGET_1: { - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_b, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_b, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); break; } case CELL_GCM_SURFACE_TARGET_MRT1: { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); } break; case CELL_GCM_SURFACE_TARGET_MRT2: { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttC, &rttViewDesc, Handle); break; } case CELL_GCM_SURFACE_TARGET_MRT3: { - ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttA = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 0, address_a, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttA, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttB = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 1, address_b, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttB, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttC = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 2, address_c, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttC, &rttViewDesc, Handle); Handle.ptr += g_RTTIncrement; - ID3D12Resource *rttD = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 3, address_d, m_surface_clip_w, m_surface_clip_h, + ID3D12Resource *rttD = m_rtts.bindAddressAsRenderTargets(m_device, copycmdlist, 3, address_d, m_surface_clip_w, m_surface_clip_h, m_surface_color_format, m_clear_surface_color_r / 255.0f, m_clear_surface_color_g / 255.0f, m_clear_surface_color_b / 255.0f, m_clear_surface_color_a / 255.0f); - rttViewDesc = {}; - rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttViewDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; m_device->CreateRenderTargetView(rttD, &rttViewDesc, Handle); break; } @@ -166,7 +147,7 @@ void D3D12GSRender::InitDrawBuffers() } ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, size_t slot, u32 address, - size_t width, size_t height, float clearColorR, float clearColorG, float clearColorB, float clearColorA) + size_t width, size_t height, u8 surfaceColorFormat, float clearColorR, float clearColorG, float clearColorB, float clearColorA) { ID3D12Resource* rtt; auto It = m_renderTargets.find(address); @@ -179,8 +160,18 @@ ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device, else { LOG_WARNING(RSX, "Creating RTT"); + DXGI_FORMAT dxgiFormat; + switch (surfaceColorFormat) + { + case CELL_GCM_SURFACE_A8R8G8B8: + dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case CELL_GCM_SURFACE_F_W16Z16Y16X16: + dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + } D3D12_CLEAR_VALUE clearColorValue = {}; - clearColorValue.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + clearColorValue.Format = dxgiFormat; clearColorValue.Color[0] = clearColorR; clearColorValue.Color[1] = clearColorG; clearColorValue.Color[2] = clearColorB; @@ -189,7 +180,7 @@ ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device, D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; - D3D12_RESOURCE_DESC resourceDesc = getTexture2DResourceDesc(width, height, DXGI_FORMAT_R8G8B8A8_UNORM); + D3D12_RESOURCE_DESC resourceDesc = getTexture2DResourceDesc(width, height, dxgiFormat); resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; device->CreateCommittedResource( diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 493a3d8c21..719c189669 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -20,7 +20,7 @@ struct RenderTargets * returns the corresponding render target resource. */ ID3D12Resource *bindAddressAsRenderTargets(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, size_t slot, u32 address, - size_t width, size_t height, float clearColorR, float clearColorG, float clearColorB, float clearColorA); + size_t width, size_t height, u8 surfaceColorFormat, float clearColorR, float clearColorG, float clearColorB, float clearColorA); ID3D12Resource *bindAddressAsDepthStencil(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address, size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear); From ff9f348ec2651adc47200d39c15552a1f8eab650 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 23:25:00 +0200 Subject: [PATCH 240/343] d3d12: Use finer pitch when downloading rtt --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 34 ++++++++++++++------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 950daef552..237dce0c86 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1232,12 +1232,12 @@ ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12Gra } static -void copyToCellRamAndRelease(void *dstAddress, ID3D12Resource *res, size_t rowPitch, size_t width, size_t height) +void copyToCellRamAndRelease(void *dstAddress, ID3D12Resource *res, size_t dstPitch, size_t srcPitch, size_t width, size_t height) { void *srcBuffer; check(res->Map(0, nullptr, &srcBuffer)); for (unsigned row = 0; row < height; row++) - memcpy((char*)dstAddress + row * rowPitch, (char*)srcBuffer + row * rowPitch, rowPitch); + memcpy((char*)dstAddress + row * dstPitch, (char*)srcBuffer + row * srcPitch, srcPitch); res->Unmap(0, nullptr); res->Release(); } @@ -1473,14 +1473,16 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) convertCommandList->Release(); } - size_t colorRowPitch; + size_t srcPitch, dstPitch; switch (m_surface_color_format) { case CELL_GCM_SURFACE_A8R8G8B8: - colorRowPitch = powerOf2Align(m_surface_clip_w * 4, 256); + srcPitch = powerOf2Align(m_surface_clip_w * 4, 256); + dstPitch = m_surface_clip_w * 4; break; case CELL_GCM_SURFACE_F_W16Z16Y16X16: - colorRowPitch = powerOf2Align(m_surface_clip_w * 8, 256); + srcPitch = powerOf2Align(m_surface_clip_w * 8, 256); + dstPitch = m_surface_clip_w * 8; break; } @@ -1495,7 +1497,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt0, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); } break; @@ -1503,7 +1505,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt1, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); } break; @@ -1511,10 +1513,10 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt0, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt1, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); } break; @@ -1522,13 +1524,13 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt0, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt1, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt2, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); } break; @@ -1536,16 +1538,16 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { u32 address = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); void *dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt0, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt0, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt1, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt1, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt2, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt2, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); address = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); dstAddress = vm::get_ptr(address); - copyToCellRamAndRelease(dstAddress, rtt3, colorRowPitch, m_surface_clip_w, m_surface_clip_h); + copyToCellRamAndRelease(dstAddress, rtt3, srcPitch, dstPitch, m_surface_clip_w, m_surface_clip_h); } break; } From e38bf8d51f9648e51ae5be394221f5438e550f57 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 6 Jun 2015 23:52:28 +0200 Subject: [PATCH 241/343] d3d12: Fix rgba16float endianness for textures --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index f5b75a22c6..86d8ae81cd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -369,6 +369,17 @@ ID3D12Resource *uploadSingleTexture( } break; } + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + { + unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; + + for (int j = 0; j < w * 4; j++) + { + uint64_t tmp = src[row * w * 4 + j]; + dst[row * w * 4 + j] = (tmp >> 8) | (tmp << 8); + } + break; + } default: { streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * srcPitch, srcPitch); From 1837f40ed4ca30a50cb1fedd58d87a232c95d9eb Mon Sep 17 00:00:00 2001 From: raven02 Date: Sun, 7 Jun 2015 15:19:48 +0800 Subject: [PATCH 242/343] d3d12: Factorize common use functions among frag and vertex decompiler --- rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp | 71 +++++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.h | 6 ++ .../D3D12/D3D12FragmentProgramDecompiler.cpp | 62 ++-------------- .../D3D12/D3D12VertexProgramDecompiler.cpp | 61 ++-------------- rpcs3/emucore.vcxproj | 2 + rpcs3/emucore.vcxproj.filters | 6 ++ 6 files changed, 93 insertions(+), 115 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.h diff --git a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp new file mode 100644 index 0000000000..813f6b5f0b --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp @@ -0,0 +1,71 @@ +#include "stdafx.h" +#include "D3D12CommonDecompiler.h" + +std::string getFloatTypeNameImp(size_t elementCount) +{ + switch (elementCount) + { + default: + abort(); + case 1: + return "float"; + case 2: + return "float2"; + case 3: + return "float3"; + case 4: + return "float4"; + } +} + +std::string getFunctionImp(FUNCTION f) +{ + switch (f) + { + default: + abort(); + case FUNCTION::FUNCTION_DP2: + return "dot($0.xy, $1.xy).xxxx"; + case FUNCTION::FUNCTION_DP2A: + return "(dot($0.xy, $1.xy) + $2.x).xxxx"; + case FUNCTION::FUNCTION_DP3: + return "dot($0.xyz, $1.xyz).xxxx"; + case FUNCTION::FUNCTION_DP4: + return "dot($0, $1).xxxx"; + case FUNCTION::FUNCTION_DPH: + return "dot(float4($0.xyz, 1.0), $1).xxxx"; + case FUNCTION::FUNCTION_SFL: + return "float4(0., 0., 0., 0.)"; + case FUNCTION::FUNCTION_STR: + return "float4(1., 1., 1., 1.)"; + case FUNCTION::FUNCTION_FRACT: + return "frac($0)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE: + return "$t.Sample($tsampler, $0.xy)"; + case FUNCTION::FUNCTION_DFDX: + return "ddx($0)"; + case FUNCTION::FUNCTION_DFDY: + return "ddy($0)"; + } +} + +std::string compareFunctionImp(COMPARE f, const std::string &Op0, const std::string &Op1) +{ + switch (f) + { + default: + abort(); + case COMPARE::FUNCTION_SEQ: + return "(" + Op0 + " == " + Op1 + ")"; + case COMPARE::FUNCTION_SGE: + return "(" + Op0 + " >= " + Op1 + ")"; + case COMPARE::FUNCTION_SGT: + return "(" + Op0 + " > " + Op1 + ")"; + case COMPARE::FUNCTION_SLE: + return "(" + Op0 + " <= " + Op1 + ")"; + case COMPARE::FUNCTION_SLT: + return "(" + Op0 + " < " + Op1 + ")"; + case COMPARE::FUNCTION_SNE: + return "(" + Op0 + " != " + Op1 + ")"; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.h new file mode 100644 index 0000000000..fd9cfd9589 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.h @@ -0,0 +1,6 @@ +#pragma once +#include "../Common/ShaderParam.h" + +std::string getFloatTypeNameImp(size_t elementCount); +std::string getFunctionImp(FUNCTION f); +std::string compareFunctionImp(COMPARE f, const std::string &Op0, const std::string &Op1); \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index d996e4ab43..f9837e0185 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -1,7 +1,7 @@ #include "stdafx.h" #if defined(DX12_SUPPORT) #include "D3D12FragmentProgramDecompiler.h" - +#include "D3D12CommonDecompiler.h" #include "Utilities/Log.h" #include "Emu/Memory/Memory.h" #include "Emu/System.h" @@ -14,50 +14,12 @@ D3D12FragmentDecompiler::D3D12FragmentDecompiler(u32 addr, u32& size, u32 ctrl) std::string D3D12FragmentDecompiler::getFloatTypeName(size_t elementCount) { - switch (elementCount) - { - default: - abort(); - case 1: - return "float"; - case 2: - return "float2"; - case 3: - return "float3"; - case 4: - return "float4"; - } + return getFloatTypeNameImp(elementCount); } std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) { - switch (f) - { - default: - abort(); - case FUNCTION::FUNCTION_DP2: - return "dot($0.xy, $1.xy).xxxx"; - case FUNCTION::FUNCTION_DP2A: - return "(dot($0.xy, $1.xy) + $2.x).xxxx"; - case FUNCTION::FUNCTION_DP3: - return "dot($0.xyz, $1.xyz).xxxx"; - case FUNCTION::FUNCTION_DP4: - return "dot($0, $1).xxxx"; - case FUNCTION::FUNCTION_DPH: - return "dot(float4($0.xyz, 1.0), $1).xxxx"; - case FUNCTION::FUNCTION_SFL: - return "float4(0., 0., 0., 0.)"; - case FUNCTION::FUNCTION_STR: - return "float4(1., 1., 1., 1.)"; - case FUNCTION::FUNCTION_FRACT: - return "frac($0)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE: - return "$t.Sample($tsampler, $0.xy)"; - case FUNCTION::FUNCTION_DFDX: - return "ddx($0)"; - case FUNCTION::FUNCTION_DFDY: - return "ddy($0)"; - } + return getFunctionImp(f); } std::string D3D12FragmentDecompiler::saturate(const std::string & code) @@ -67,23 +29,7 @@ std::string D3D12FragmentDecompiler::saturate(const std::string & code) std::string D3D12FragmentDecompiler::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) { - switch (f) - { - default: - abort(); - case COMPARE::FUNCTION_SEQ: - return "(" + Op0 + " == " + Op1 + ")"; - case COMPARE::FUNCTION_SGE: - return "(" + Op0 + " >= " + Op1 +")"; - case COMPARE::FUNCTION_SGT: - return "(" + Op0 + " > " + Op1 + ")"; - case COMPARE::FUNCTION_SLE: - return "(" + Op0 + " <= " + Op1 + ")"; - case COMPARE::FUNCTION_SLT: - return "(" + Op0 + " < " + Op1 + ")"; - case COMPARE::FUNCTION_SNE: - return "(" + Op0 + " != " + Op1 + ")"; - } + return compareFunctionImp(f, Op0, Op1); } void D3D12FragmentDecompiler::insertHeader(std::stringstream & OS) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index bedd8f0b9a..6129ca5611 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -1,77 +1,24 @@ #include "stdafx.h" #if defined(DX12_SUPPORT) #include "D3D12VertexProgramDecompiler.h" +#include "D3D12CommonDecompiler.h" #include "Utilities/Log.h" #include "Emu/System.h" std::string D3D12VertexProgramDecompiler::getFloatTypeName(size_t elementCount) { - switch (elementCount) - { - default: - abort(); - case 1: - return "float"; - case 2: - return "float2"; - case 3: - return "float3"; - case 4: - return "float4"; - } + return getFloatTypeNameImp(elementCount); } std::string D3D12VertexProgramDecompiler::getFunction(enum class FUNCTION f) { - switch (f) - { - default: - abort(); - case FUNCTION::FUNCTION_DP2: - return "dot($0.xy, $1.xy).xxxx"; - case FUNCTION::FUNCTION_DP2A: - return "(dot($0.xy, $1.xy) + $2.x).xxxx"; - case FUNCTION::FUNCTION_DP3: - return "dot($0.xyz, $1.xyz).xxxx"; - case FUNCTION::FUNCTION_DP4: - return "dot($0, $1).xxxx"; - case FUNCTION::FUNCTION_DPH: - return "dot(float4($0.xyz, 1.0), $1).xxxx"; - case FUNCTION::FUNCTION_SFL: - return "float4(0., 0., 0., 0.)"; - case FUNCTION::FUNCTION_STR: - return "float4(1., 1., 1., 1.)"; - case FUNCTION::FUNCTION_FRACT: - return "frac($0)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE: - return "$t.Sample($tsampler, $0.xy)"; - case FUNCTION::FUNCTION_DFDX: - return "ddx($0)"; - case FUNCTION::FUNCTION_DFDY: - return "ddy($0)"; - } + return getFunctionImp(f); } std::string D3D12VertexProgramDecompiler::compareFunction(COMPARE f, const std::string &Op0, const std::string &Op1) { - switch (f) - { - default: - abort(); - case COMPARE::FUNCTION_SEQ: - return "(" + Op0 + " == " + Op1 + ").xxxx"; - case COMPARE::FUNCTION_SGE: - return "(" + Op0 + " >= " + Op1 + ").xxxx"; - case COMPARE::FUNCTION_SGT: - return "(" + Op0 + " > " + Op1 + ").xxxx"; - case COMPARE::FUNCTION_SLE: - return "(" + Op0 + " <= " + Op1 + ").xxxx"; - case COMPARE::FUNCTION_SLT: - return "(" + Op0 + " < " + Op1 + ").xxxx"; - case COMPARE::FUNCTION_SNE: - return "(" + Op0 + " != " + Op1 + ").xxxx"; - } + return compareFunctionImp(f, Op0, Op1); } void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS) diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 049f949613..2612992e5e 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -48,6 +48,7 @@ + @@ -513,6 +514,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index fbc8a96a59..1895f11773 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -986,6 +986,9 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + Emu\GPU\RSX\D3D12 @@ -1870,6 +1873,9 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + Emu\GPU\RSX\D3D12 From 4966ab565afd1871a388f55b15a53601e5bd06b5 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 7 Jun 2015 19:12:34 +0200 Subject: [PATCH 243/343] d3d12; Clean resource sooner and fix crash with VS debugger. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 237dce0c86..2540a6c547 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -95,6 +95,10 @@ void D3D12GSRender::ResourceStorage::Reset() std::get<2>(tmp)->Release(); for (auto tmp : m_inUseTexture2D) std::get<2>(tmp)->Release(); + m_inUseConstantsBuffers.clear(); + m_inUseVertexIndexBuffers.clear(); + m_inUseTextureUploadBuffers.clear(); + m_inUseTexture2D.clear(); m_commandAllocator->Reset(); m_textureUploadCommandAllocator->Reset(); @@ -1132,6 +1136,14 @@ void D3D12GSRender::Flip() // Flush m_texturesCache.clear(); m_texturesRTTs.clear(); + getNonCurrentResourceStorage().m_inUseConstantsBuffers = m_constantsData.m_resourceStoredSinceLastSync; + m_constantsData.m_resourceStoredSinceLastSync.clear(); + getNonCurrentResourceStorage().m_inUseVertexIndexBuffers = m_vertexIndexData.m_resourceStoredSinceLastSync; + m_vertexIndexData.m_resourceStoredSinceLastSync.clear(); + getNonCurrentResourceStorage().m_inUseTextureUploadBuffers = m_textureUploadData.m_resourceStoredSinceLastSync; + m_textureUploadData.m_resourceStoredSinceLastSync.clear(); + getNonCurrentResourceStorage().m_inUseTexture2D = m_textureData.m_resourceStoredSinceLastSync; + m_textureData.m_resourceStoredSinceLastSync.clear(); if (getCurrentResourceStorage().m_frameFinishedHandle) { @@ -1150,15 +1162,6 @@ void D3D12GSRender::Flip() getCurrentResourceStorage().Reset(); } - getCurrentResourceStorage().m_inUseConstantsBuffers = m_constantsData.m_resourceStoredSinceLastSync; - m_constantsData.m_resourceStoredSinceLastSync.clear(); - getCurrentResourceStorage().m_inUseVertexIndexBuffers = m_vertexIndexData.m_resourceStoredSinceLastSync; - m_vertexIndexData.m_resourceStoredSinceLastSync.clear(); - getCurrentResourceStorage().m_inUseTextureUploadBuffers = m_textureUploadData.m_resourceStoredSinceLastSync; - m_textureUploadData.m_resourceStoredSinceLastSync.clear(); - getCurrentResourceStorage().m_inUseTexture2D = m_textureData.m_resourceStoredSinceLastSync; - m_textureData.m_resourceStoredSinceLastSync.clear(); - m_frame->Flip(nullptr); } From 10b92d45d185ec465417ef17cbee33ce77753e56 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 7 Jun 2015 19:16:58 +0200 Subject: [PATCH 244/343] d3d12: Use RSXThread width height for viewport Thanks to raven02 for finding this, it fixes render to texture test --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 2540a6c547..bca32a7ce4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1017,8 +1017,8 @@ void D3D12GSRender::ExecCMD() { 0.f, 0.f, - (float)m_surface_clip_w, - (float)m_surface_clip_h, + (float)RSXThread::m_width, + (float)RSXThread::m_height, -1.f, 1.f }; From 2d0dbf4949554ac7047fcbc66f9d801cef6f6bd3 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 7 Jun 2015 20:38:48 +0200 Subject: [PATCH 245/343] d3d12: Async semaphore leads to deadlock, make it sync, but do resource garbage collection async --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 106 +++++++++++++++++--------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 14 ++++ 2 files changed, 83 insertions(+), 37 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index bca32a7ce4..bf8141e014 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -79,6 +79,44 @@ void DataHeap::Release() } } +GarbageCollectionThread::GarbageCollectionThread() +{ + m_worker = std::thread([this]() { + while (true) + { + std::unique_lock lock(m_mutex); + if (m_queue.empty()) + cv.wait(lock); + m_queue.front()(); + m_queue.pop(); + } + }); + m_worker.detach(); +} + +GarbageCollectionThread::~GarbageCollectionThread() +{ +} + +void GarbageCollectionThread::pushWork(std::function&& f) +{ + std::unique_lock lock(m_mutex); + m_queue.push(f); + cv.notify_all(); +} + +void GarbageCollectionThread::waitForCompletion() +{ + pushWork([]() {}); + while (true) + { + std::this_thread::yield(); + std::unique_lock lock(m_mutex); + if (m_queue.empty()) + return; + } +} + void D3D12GSRender::ResourceStorage::Reset() { m_constantsBufferIndex = 0; @@ -1128,40 +1166,45 @@ void D3D12GSRender::Flip() check(m_swapChain->Present(Ini.GSVSyncEnable.GetValue() ? 1 : 0, 0)); // Add an event signaling queue completion - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&getNonCurrentResourceStorage().m_frameFinishedFence)); - getNonCurrentResourceStorage().m_frameFinishedHandle = CreateEvent(0, 0, 0, 0); - getNonCurrentResourceStorage().m_frameFinishedFence->SetEventOnCompletion(1, getNonCurrentResourceStorage().m_frameFinishedHandle); - m_commandQueueGraphic->Signal(getNonCurrentResourceStorage().m_frameFinishedFence, 1); + ResourceStorage &storage = getNonCurrentResourceStorage(); + + m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&storage.m_frameFinishedFence)); + storage.m_frameFinishedHandle = CreateEvent(0, 0, 0, 0); + storage.m_frameFinishedFence->SetEventOnCompletion(1, storage.m_frameFinishedHandle); + m_commandQueueGraphic->Signal(storage.m_frameFinishedFence, 1); // Flush m_texturesCache.clear(); m_texturesRTTs.clear(); - getNonCurrentResourceStorage().m_inUseConstantsBuffers = m_constantsData.m_resourceStoredSinceLastSync; + + storage.m_inUseConstantsBuffers = m_constantsData.m_resourceStoredSinceLastSync; m_constantsData.m_resourceStoredSinceLastSync.clear(); - getNonCurrentResourceStorage().m_inUseVertexIndexBuffers = m_vertexIndexData.m_resourceStoredSinceLastSync; + storage.m_inUseVertexIndexBuffers = m_vertexIndexData.m_resourceStoredSinceLastSync; m_vertexIndexData.m_resourceStoredSinceLastSync.clear(); - getNonCurrentResourceStorage().m_inUseTextureUploadBuffers = m_textureUploadData.m_resourceStoredSinceLastSync; + storage.m_inUseTextureUploadBuffers = m_textureUploadData.m_resourceStoredSinceLastSync; m_textureUploadData.m_resourceStoredSinceLastSync.clear(); - getNonCurrentResourceStorage().m_inUseTexture2D = m_textureData.m_resourceStoredSinceLastSync; + storage.m_inUseTexture2D = m_textureData.m_resourceStoredSinceLastSync; m_textureData.m_resourceStoredSinceLastSync.clear(); - if (getCurrentResourceStorage().m_frameFinishedHandle) + m_GC.pushWork([&]() { - WaitForSingleObject(getCurrentResourceStorage().m_frameFinishedHandle, INFINITE); - CloseHandle(getCurrentResourceStorage().m_frameFinishedHandle); - getCurrentResourceStorage().m_frameFinishedFence->Release(); + WaitForSingleObject(storage.m_frameFinishedHandle, INFINITE); + CloseHandle(storage.m_frameFinishedHandle); + storage.m_frameFinishedFence->Release(); - for (auto tmp : getCurrentResourceStorage().m_inUseConstantsBuffers) + for (auto tmp : storage.m_inUseConstantsBuffers) m_constantsData.m_getPos = std::get<0>(tmp); - for (auto tmp : getCurrentResourceStorage().m_inUseVertexIndexBuffers) + for (auto tmp : storage.m_inUseVertexIndexBuffers) m_vertexIndexData.m_getPos = std::get<0>(tmp); - for (auto tmp : getCurrentResourceStorage().m_inUseTextureUploadBuffers) + for (auto tmp : storage.m_inUseTextureUploadBuffers) m_textureUploadData.m_getPos = std::get<0>(tmp); - for (auto tmp : getCurrentResourceStorage().m_inUseTexture2D) + for (auto tmp : storage.m_inUseTexture2D) m_textureData.m_getPos = std::get<0>(tmp); - getCurrentResourceStorage().Reset(); - } + storage.Reset(); + }); + while (getCurrentResourceStorage().m_frameFinishedHandle) + std::this_thread::yield(); m_frame->Flip(nullptr); } @@ -1446,7 +1489,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) //Wait for result m_commandQueueGraphic->Signal(fence, 1); - std::thread valueChangerThread([=]() { + m_GC.pushWork([=]() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); fence->Release(); @@ -1561,28 +1604,17 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) vm::write32(m_label_addr + offset, value); }); - valueChangerThread.detach(); + + m_GC.waitForCompletion(); } void D3D12GSRender::semaphorePFIFOAcquire(u32 offset, u32 value) { - ID3D12Fence *fence; - check( - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)) - ); - m_commandQueueGraphic->Wait(fence, 1); - - std::thread valueChangerThread([=]() { - while (true) - { - u32 val = vm::read32(m_label_addr + offset); - if (val == value) break; - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - } - fence->Signal(1); - fence->Release(); + while (true) + { + u32 val = vm::read32(m_label_addr + offset); + if (val == value) break; + std::this_thread::yield(); } - ); - valueChangerThread.detach(); } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index f0f36a225c..2d950b1d67 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -61,9 +61,23 @@ struct DataHeap void Release(); }; +struct GarbageCollectionThread +{ + std::mutex m_mutex; + std::condition_variable cv; + std::queue > m_queue; + std::thread m_worker; + + GarbageCollectionThread(); + ~GarbageCollectionThread(); + void pushWork(std::function&& f); + void waitForCompletion(); +}; + class D3D12GSRender : public GSRender { private: + GarbageCollectionThread m_GC; // Copy of RTT to be used as texture std::unordered_map m_texturesRTTs; From 8669dac5e7b168163198a6305fcdafd93abae610 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 7 Jun 2015 22:01:43 +0200 Subject: [PATCH 246/343] d3d12: Implement sampler filters properly Fix menu in the guided fate paradox --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 51 ++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 86d8ae81cd..b492551765 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -147,6 +147,54 @@ DXGI_FORMAT getDXGIFormat(int format) } } +static D3D12_FILTER getSamplerFilter(u32 minFilter, u32 magFilter) +{ + D3D12_FILTER_TYPE min, mag, mip; + switch (minFilter) + { + case 1: //GL_NEAREST + min = D3D12_FILTER_TYPE_POINT; + mip = D3D12_FILTER_TYPE_POINT; + break; + case 2: // GL_LINEAR + min = D3D12_FILTER_TYPE_LINEAR; + mip = D3D12_FILTER_TYPE_POINT; + break; + case 3: //GL_NEAREST_MIPMAP_NEAREST + min = D3D12_FILTER_TYPE_POINT; + mip = D3D12_FILTER_TYPE_POINT; + break; + case 4: // GL_LINEAR_MIPMAP_NEAREST + min = D3D12_FILTER_TYPE_LINEAR; + mip = D3D12_FILTER_TYPE_POINT; + break; + case 5: // GL_NEAREST_MIPMAP_LINEAR + min = D3D12_FILTER_TYPE_POINT; + mip = D3D12_FILTER_TYPE_LINEAR; + break; + case 6: //GL_LINEAR_MIPMAP_LINEAR + min = D3D12_FILTER_TYPE_LINEAR; + mip = D3D12_FILTER_TYPE_LINEAR; + break; + default: + LOG_ERROR(RSX, "Unknow min filter %x", minFilter); + } + + switch (magFilter) + { + case 1: // GL_NEAREST + mag = D3D12_FILTER_TYPE_POINT; + break; + case 2: // GL_LINEAR + mag = D3D12_FILTER_TYPE_LINEAR; + break; + default: + LOG_ERROR(RSX, "Unknow mag filter %x", magFilter); + } + + return D3D12_ENCODE_BASIC_FILTER(min, mag, mip, D3D12_FILTER_REDUCTION_TYPE_STANDARD); +} + /** * Create a texture residing in default heap and generate uploads commands in commandList, * using a temporary texture buffer. @@ -575,9 +623,8 @@ size_t D3D12GSRender::UploadTextures() Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); - // TODO : Correctly define sampler D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + samplerDesc.Filter = getSamplerFilter(m_textures[i].GetMinFilter(), m_textures[i].GetMagFilter()); samplerDesc.AddressU = GetWrap(m_textures[i].GetWrapS()); samplerDesc.AddressV = GetWrap(m_textures[i].GetWrapT()); samplerDesc.AddressW = GetWrap(m_textures[i].GetWrapR()); From e55949dbfa01e8ddabf8af4b2ba82d69d04fed9b Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 7 Jun 2015 23:43:17 +0200 Subject: [PATCH 247/343] d3d12: Use h0 in fragment decompiler when there is no r0 Partially fix alpha kill test. --- .../D3D12/D3D12FragmentProgramDecompiler.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index f9837e0185..ec1032a548 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -72,10 +72,19 @@ void D3D12FragmentDecompiler::insertOutputs(std::stringstream & OS) { "ocol3", "r4" }, }; + const std::pair table2[] = + { + { "ocol0", "h0" }, + { "ocol1", "h2" }, + { "ocol2", "h3" }, + { "ocol3", "h4" }, + }; for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) { if (m_parr.HasParam(PF_PARAM_NONE, "float4", table[i].second)) OS << " " << "float4" << " " << table[i].first << " : SV_TARGET" << i << ";" << std::endl; + else if (m_parr.HasParam(PF_PARAM_NONE, "float4", table2[i].second)) + OS << " " << "float4" << " " << table2[i].first << " : SV_TARGET" << i << ";" << std::endl; } OS << "};" << std::endl; } @@ -133,11 +142,21 @@ void D3D12FragmentDecompiler::insertMainEnd(std::stringstream & OS) { "ocol3", "r4" }, }; + const std::pair table2[] = + { + { "ocol0", "h0" }, + { "ocol1", "h2" }, + { "ocol2", "h3" }, + { "ocol3", "h4" }, + }; + OS << " PixelOutput Out;" << std::endl; for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) { if (m_parr.HasParam(PF_PARAM_NONE, "float4", table[i].second)) OS << " Out." << table[i].first << " = " << table[i].second << ";" << std::endl; + else if (m_parr.HasParam(PF_PARAM_NONE, "float4", table2[i].second)) + OS << " Out." << table2[i].first << " = " << table2[i].second << ";" << std::endl; } OS << " return Out;" << std::endl; OS << "}" << std::endl; From ad55cced13a97c28cdc3c85fa44feb39a98063da Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 8 Jun 2015 03:00:01 +0200 Subject: [PATCH 248/343] d3d12: Fix vertex buffer sometimes incomplete. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 8e07aab7b4..c6b976b491 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -195,6 +195,9 @@ static ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVertexData *vertexData, ID3D12Device *device, DataHeap &vertexIndexHeap) { size_t subBufferSize = vbf.range.second - vbf.range.first + 1; + // Make multiple of stride + if (vbf.stride) + subBufferSize = ((subBufferSize + vbf.stride - 1) / vbf.stride) * vbf.stride; assert(vertexIndexHeap.canAlloc(subBufferSize)); size_t heapOffset = vertexIndexHeap.alloc(subBufferSize); @@ -209,7 +212,7 @@ ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVerte )); void *bufferMap; check(vertexBuffer->Map(0, nullptr, (void**)&bufferMap)); - + memset(bufferMap, -1, subBufferSize); #pragma omp parallel for for (int vertex = 0; vertex < vbf.elementCount; vertex++) { @@ -269,7 +272,10 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G for (size_t buffer = 0; buffer < vertexBufferFormat.size(); buffer++) { const VertexBufferFormat &vbf = vertexBufferFormat[buffer]; + // Make multiple of stride size_t subBufferSize = vbf.range.second - vbf.range.first + 1; + if (vbf.stride) + subBufferSize = ((subBufferSize + vbf.stride - 1) / vbf.stride) * vbf.stride; ID3D12Resource *vertexBuffer = createVertexBuffer(vbf, m_vertex_data, m_device, m_vertexIndexData); From 224503d2dc54f61d05acfcf2a94009a289aebef0 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 8 Jun 2015 16:52:13 +0200 Subject: [PATCH 249/343] d3d12: Move program related code out of D3D12GSRender and some get* format functions --- rpcs3/Emu/RSX/D3D12/D3D12.h | 181 +++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 362 --------------------- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 259 +++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 69 +--- 4 files changed, 442 insertions(+), 429 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 71e94fa740..d1598b7502 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -3,6 +3,7 @@ #include #include +#include "utilities/Log.h" inline void check(HRESULT hr) @@ -106,4 +107,184 @@ D3D12_RESOURCE_BARRIER getResourceBarrierTransition(ID3D12Resource *res, D3D12_R return barrier; } +/** + * Convert GCM blend operator code to D3D12 one + */ +inline D3D12_BLEND_OP getBlendOp(u16 op) +{ + switch (op) + { + case CELL_GCM_FUNC_ADD: return D3D12_BLEND_OP_ADD; + case CELL_GCM_FUNC_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; + case CELL_GCM_FUNC_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; + case CELL_GCM_MIN: return D3D12_BLEND_OP_MIN; + case CELL_GCM_MAX: return D3D12_BLEND_OP_MAX; + case CELL_GCM_FUNC_ADD_SIGNED: + case CELL_GCM_FUNC_REVERSE_ADD_SIGNED: + case CELL_GCM_FUNC_REVERSE_SUBTRACT_SIGNED: + LOG_WARNING(RSX, "Unsupported Blend Op %d", op); + return D3D12_BLEND_OP(); + } +} + +/** + * Convert GCM blend factor code to D3D12 one + */ +inline D3D12_BLEND getBlendFactor(u16 factor) +{ + switch (factor) + { + case CELL_GCM_ZERO: return D3D12_BLEND_ZERO; + case CELL_GCM_ONE: return D3D12_BLEND_ONE; + case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; + case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; + case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; + case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; + case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; + case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; + case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_COLOR; + case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; + case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + case CELL_GCM_CONSTANT_COLOR: + case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: + case CELL_GCM_CONSTANT_ALPHA: + case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: + LOG_WARNING(RSX, "Unsupported Blend Factor %d", factor); + return D3D12_BLEND(); + } +} + +/** + * Convert GCM logic op code to D3D12 one + */ +inline D3D12_LOGIC_OP getLogicOp(u32 op) +{ + switch (op) + { + default: + LOG_WARNING(RSX, "Unsupported Logic Op %d", op); + return D3D12_LOGIC_OP(); + case CELL_GCM_CLEAR: return D3D12_LOGIC_OP_CLEAR; + case CELL_GCM_AND: return D3D12_LOGIC_OP_AND; + case CELL_GCM_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; + case CELL_GCM_COPY: return D3D12_LOGIC_OP_COPY; + case CELL_GCM_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; + case CELL_GCM_NOOP: return D3D12_LOGIC_OP_NOOP; + case CELL_GCM_XOR: return D3D12_LOGIC_OP_XOR; + case CELL_GCM_OR: return D3D12_LOGIC_OP_OR; + case CELL_GCM_NOR: return D3D12_LOGIC_OP_NOR; + case CELL_GCM_EQUIV: return D3D12_LOGIC_OP_EQUIV; + case CELL_GCM_INVERT: return D3D12_LOGIC_OP_INVERT; + case CELL_GCM_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; + case CELL_GCM_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; + case CELL_GCM_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; + case CELL_GCM_NAND: return D3D12_LOGIC_OP_NAND; + } +} + +/** + * Convert GCM stencil op code to D3D12 one + */ +inline D3D12_STENCIL_OP getStencilOp(u32 op) +{ + switch (op) + { + case CELL_GCM_KEEP: return D3D12_STENCIL_OP_KEEP; + case CELL_GCM_ZERO: return D3D12_STENCIL_OP_ZERO; + case CELL_GCM_REPLACE: return D3D12_STENCIL_OP_REPLACE; + case CELL_GCM_INCR: return D3D12_STENCIL_OP_INCR; + case CELL_GCM_DECR: return D3D12_STENCIL_OP_DECR; + case CELL_GCM_INCR_WRAP: + case CELL_GCM_DECR_WRAP: + LOG_WARNING(RSX, "Unsupported Stencil Op %d", op); + return D3D12_STENCIL_OP(); + } +} + +/** + * Convert GCM comparison function code to D3D12 one. + */ +inline D3D12_COMPARISON_FUNC getCompareFunc(u32 op) +{ + switch (op) + { + case CELL_GCM_NEVER: return D3D12_COMPARISON_FUNC_NEVER; + case CELL_GCM_LESS: return D3D12_COMPARISON_FUNC_LESS; + case CELL_GCM_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL; + case CELL_GCM_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case CELL_GCM_GREATER: return D3D12_COMPARISON_FUNC_GREATER; + case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; + } +} + +/** + * Convert GCM texture format to an equivalent one supported by D3D12. + * Destination format may require a byte swap or data conversion. + */ +inline DXGI_FORMAT getTextureDXGIFormat(int format) +{ + switch (format) + { + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + default: + LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); + return DXGI_FORMAT(); + case CELL_GCM_TEXTURE_B8: + return DXGI_FORMAT_R8_UNORM; + case CELL_GCM_TEXTURE_A1R5G5B5: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case CELL_GCM_TEXTURE_A4R4G4B4: + return DXGI_FORMAT_B4G4R4A4_UNORM; + case CELL_GCM_TEXTURE_R5G6B5: + return DXGI_FORMAT_B5G6R5_UNORM; + case CELL_GCM_TEXTURE_A8R8G8B8: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + return DXGI_FORMAT_BC1_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + return DXGI_FORMAT_BC2_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + return DXGI_FORMAT_BC3_UNORM; + case CELL_GCM_TEXTURE_G8B8: + return DXGI_FORMAT_G8R8_G8B8_UNORM; + case CELL_GCM_TEXTURE_R6G5B5: + // Not native + return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_TEXTURE_DEPTH24_D8: + return DXGI_FORMAT_R32_UINT; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + case CELL_GCM_TEXTURE_DEPTH16: + return DXGI_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return DXGI_FORMAT_R16_FLOAT; + case CELL_GCM_TEXTURE_X16: + return DXGI_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_Y16_X16: + return DXGI_FORMAT_R16G16_UNORM; + case CELL_GCM_TEXTURE_R5G5B5A1: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + case CELL_GCM_TEXTURE_X32_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + case CELL_GCM_TEXTURE_D1R5G5B5: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case CELL_GCM_TEXTURE_D8R8G8B8: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + return DXGI_FORMAT_G8R8_G8B8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + return DXGI_FORMAT_R8G8_B8G8_UNORM; + } +} + #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index bf8141e014..32b4f83d51 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -577,368 +577,6 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } -static D3D12_BLEND_OP getBlendOp(u16 op) -{ - switch (op) - { - case CELL_GCM_FUNC_ADD: return D3D12_BLEND_OP_ADD; - case CELL_GCM_FUNC_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; - case CELL_GCM_FUNC_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; - case CELL_GCM_MIN: return D3D12_BLEND_OP_MIN; - case CELL_GCM_MAX: return D3D12_BLEND_OP_MAX; - case CELL_GCM_FUNC_ADD_SIGNED: - case CELL_GCM_FUNC_REVERSE_ADD_SIGNED: - case CELL_GCM_FUNC_REVERSE_SUBTRACT_SIGNED: - LOG_WARNING(RSX, "Unsupported Blend Op %d", op); - } -} - -static D3D12_BLEND getBlendFactor(u16 factor) -{ - switch (factor) - { - case CELL_GCM_ZERO: return D3D12_BLEND_ZERO; - case CELL_GCM_ONE: return D3D12_BLEND_ONE; - case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; - case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; - case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; - case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; - case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; - case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; - case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_COLOR; - case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; - case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; - case CELL_GCM_CONSTANT_COLOR: - case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: - case CELL_GCM_CONSTANT_ALPHA: - case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: - LOG_WARNING(RSX, "Unsupported Blend Factor %d", factor); - } -} - -static D3D12_LOGIC_OP getLogicOp(u32 op) -{ - switch (op) - { - default: LOG_WARNING(RSX, "Unsupported Logic Op %d", op); - case CELL_GCM_CLEAR: return D3D12_LOGIC_OP_CLEAR; - case CELL_GCM_AND: return D3D12_LOGIC_OP_AND; - case CELL_GCM_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; - case CELL_GCM_COPY: return D3D12_LOGIC_OP_COPY; - case CELL_GCM_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; - case CELL_GCM_NOOP: return D3D12_LOGIC_OP_NOOP; - case CELL_GCM_XOR: return D3D12_LOGIC_OP_XOR; - case CELL_GCM_OR: return D3D12_LOGIC_OP_OR; - case CELL_GCM_NOR: return D3D12_LOGIC_OP_NOR; - case CELL_GCM_EQUIV: return D3D12_LOGIC_OP_EQUIV; - case CELL_GCM_INVERT: return D3D12_LOGIC_OP_INVERT; - case CELL_GCM_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; - case CELL_GCM_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; - case CELL_GCM_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; - case CELL_GCM_NAND: return D3D12_LOGIC_OP_NAND; - } -} - -static D3D12_STENCIL_OP getStencilOp(u32 op) -{ - switch (op) - { - case CELL_GCM_KEEP: return D3D12_STENCIL_OP_KEEP; - case CELL_GCM_ZERO: return D3D12_STENCIL_OP_ZERO; - case CELL_GCM_REPLACE: return D3D12_STENCIL_OP_REPLACE; - case CELL_GCM_INCR: return D3D12_STENCIL_OP_INCR; - case CELL_GCM_DECR: return D3D12_STENCIL_OP_DECR; - case CELL_GCM_INCR_WRAP: - case CELL_GCM_DECR_WRAP: - LOG_WARNING(RSX, "Unsupported Stencil Op %d", op); - } -} - -static D3D12_COMPARISON_FUNC getStencilFunc(u32 op) -{ - switch (op) - { - case CELL_GCM_NEVER: return D3D12_COMPARISON_FUNC_NEVER; - case CELL_GCM_LESS: return D3D12_COMPARISON_FUNC_LESS; - case CELL_GCM_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL; - case CELL_GCM_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL; - case CELL_GCM_GREATER: return D3D12_COMPARISON_FUNC_GREATER; - case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; - case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; - case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; - } -} - -static D3D12_COMPARISON_FUNC getDepthFunc(u32 op) -{ - switch (op) - { - case CELL_GCM_NEVER: return D3D12_COMPARISON_FUNC_NEVER; - case CELL_GCM_LESS: return D3D12_COMPARISON_FUNC_LESS; - case CELL_GCM_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL; - case CELL_GCM_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL; - case CELL_GCM_GREATER: return D3D12_COMPARISON_FUNC_GREATER; - case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; - case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; - case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; - } -} - -bool D3D12GSRender::LoadProgram() -{ - if (!m_cur_fragment_prog) - { - LOG_WARNING(RSX, "LoadProgram: m_cur_shader_prog == NULL"); - return false; - } - - m_cur_fragment_prog->ctrl = m_shader_ctrl; - - if (!m_cur_vertex_prog) - { - LOG_WARNING(RSX, "LoadProgram: m_cur_vertex_prog == NULL"); - return false; - } - - D3D12PipelineProperties prop = {}; - switch (m_draw_mode - 1) - { - case GL_POINTS: - prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; - break; - case GL_LINES: - case GL_LINE_LOOP: - case GL_LINE_STRIP: - prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; - break; - case GL_TRIANGLES: - case GL_TRIANGLE_STRIP: - case GL_TRIANGLE_FAN: - prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - break; - case GL_QUADS: - case GL_QUAD_STRIP: - case GL_POLYGON: - default: -// LOG_ERROR(RSX, "Unsupported primitive type"); - prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - break; - } - - static D3D12_BLEND_DESC CD3D12_BLEND_DESC = - { - FALSE, - FALSE, - { - FALSE,FALSE, - D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, - D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, - D3D12_LOGIC_OP_NOOP, - D3D12_COLOR_WRITE_ENABLE_ALL, - } - }; - prop.Blend = CD3D12_BLEND_DESC; - - if (m_set_blend) - { - prop.Blend.RenderTarget[0].BlendEnable = true; - - if (m_set_blend_mrt1) - prop.Blend.RenderTarget[1].BlendEnable = true; - if (m_set_blend_mrt2) - prop.Blend.RenderTarget[2].BlendEnable = true; - if (m_set_blend_mrt3) - prop.Blend.RenderTarget[3].BlendEnable = true; - } - - if (m_set_blend_equation) - { - prop.Blend.RenderTarget[0].BlendOp = getBlendOp(m_blend_equation_rgb); - prop.Blend.RenderTarget[0].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); - - if (m_set_blend_mrt1) - { - prop.Blend.RenderTarget[1].BlendOp = getBlendOp(m_blend_equation_rgb); - prop.Blend.RenderTarget[1].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); - } - - if (m_set_blend_mrt2) - { - prop.Blend.RenderTarget[2].BlendOp = getBlendOp(m_blend_equation_rgb); - prop.Blend.RenderTarget[2].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); - } - - if (m_set_blend_mrt3) - { - prop.Blend.RenderTarget[3].BlendOp = getBlendOp(m_blend_equation_rgb); - prop.Blend.RenderTarget[3].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); - } - } - - if (m_set_blend_sfactor && m_set_blend_dfactor) - { - prop.Blend.RenderTarget[0].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); - prop.Blend.RenderTarget[0].DestBlend = getBlendFactor(m_blend_dfactor_rgb); - prop.Blend.RenderTarget[0].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); - prop.Blend.RenderTarget[0].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); - - if (m_set_blend_mrt1) - { - prop.Blend.RenderTarget[1].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); - prop.Blend.RenderTarget[1].DestBlend = getBlendFactor(m_blend_dfactor_rgb); - prop.Blend.RenderTarget[1].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); - prop.Blend.RenderTarget[1].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); - } - - if (m_set_blend_mrt2) - { - prop.Blend.RenderTarget[2].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); - prop.Blend.RenderTarget[2].DestBlend = getBlendFactor(m_blend_dfactor_rgb); - prop.Blend.RenderTarget[2].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); - prop.Blend.RenderTarget[2].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); - } - - if (m_set_blend_mrt3) - { - prop.Blend.RenderTarget[3].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); - prop.Blend.RenderTarget[3].DestBlend = getBlendFactor(m_blend_dfactor_rgb); - prop.Blend.RenderTarget[3].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); - prop.Blend.RenderTarget[3].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); - } - } - - if (m_set_logic_op) - { - prop.Blend.RenderTarget[0].LogicOpEnable = true; - prop.Blend.RenderTarget[0].LogicOp = getLogicOp(m_logic_op); - } - - if (m_set_blend_color) - { -// glBlendColor(m_blend_color_r, m_blend_color_g, m_blend_color_b, m_blend_color_a); -// checkForGlError("glBlendColor"); - } - - switch (m_surface_depth_format) - { - case 0: - break; - case CELL_GCM_SURFACE_Z16: - prop.DepthStencilFormat = DXGI_FORMAT_D16_UNORM; - break; - case CELL_GCM_SURFACE_Z24S8: - prop.DepthStencilFormat = DXGI_FORMAT_D24_UNORM_S8_UINT; - break; - default: - LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); - assert(0); - } - - switch (m_surface_color_format) - { - case CELL_GCM_SURFACE_A8R8G8B8: - prop.RenderTargetsFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - break; - case CELL_GCM_SURFACE_F_W16Z16Y16X16: - prop.RenderTargetsFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; - break; - } - - switch (m_surface_color_target) - { - case CELL_GCM_SURFACE_TARGET_0: - case CELL_GCM_SURFACE_TARGET_1: - prop.numMRT = 1; - break; - case CELL_GCM_SURFACE_TARGET_MRT1: - prop.numMRT = 2; - break; - case CELL_GCM_SURFACE_TARGET_MRT2: - prop.numMRT = 3; - break; - case CELL_GCM_SURFACE_TARGET_MRT3: - prop.numMRT = 4; - break; - default: - LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); - } - - prop.DepthStencil.DepthEnable = m_set_depth_test; - prop.DepthStencil.DepthWriteMask = m_depth_mask ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; - prop.DepthStencil.DepthFunc = getDepthFunc(m_depth_func); - prop.DepthStencil.StencilEnable = m_set_stencil_test; - prop.DepthStencil.StencilReadMask = m_stencil_func_mask; - prop.DepthStencil.StencilWriteMask = m_stencil_mask; - prop.DepthStencil.FrontFace.StencilPassOp = getStencilOp(m_stencil_zpass); - prop.DepthStencil.FrontFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); - prop.DepthStencil.FrontFace.StencilFailOp = getStencilOp(m_stencil_fail); - prop.DepthStencil.FrontFace.StencilFunc = getStencilFunc(m_stencil_func); - - if (m_set_two_sided_stencil_test_enable) - { - prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_back_stencil_fail); - prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_back_stencil_func); - prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_back_stencil_zpass); - prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_back_stencil_zfail); - } - else - { - prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_stencil_zpass); - prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); - prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_stencil_fail); - prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_stencil_func); - } - - // Sensible default value - static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = - { - D3D12_FILL_MODE_SOLID, - D3D12_CULL_MODE_NONE, - FALSE, - D3D12_DEFAULT_DEPTH_BIAS, - D3D12_DEFAULT_DEPTH_BIAS_CLAMP, - D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, - TRUE, - FALSE, - FALSE, - 0, - D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, - }; - prop.Rasterization = CD3D12_RASTERIZER_DESC; - switch (m_set_cull_face) - { - case CELL_GCM_FRONT: - prop.Rasterization.CullMode = D3D12_CULL_MODE_FRONT; - break; - case CELL_GCM_BACK: - prop.Rasterization.CullMode = D3D12_CULL_MODE_BACK; - break; - default: - prop.Rasterization.CullMode = D3D12_CULL_MODE_NONE; - break; - } - - switch (m_front_face) - { - case CELL_GCM_CW: - prop.Rasterization.FrontCounterClockwise = FALSE; - break; - case CELL_GCM_CCW: - prop.Rasterization.FrontCounterClockwise = TRUE; - break; - } - - if (m_set_color_mask) - prop.SampleMask = m_color_mask_r | (m_color_mask_g << 1) | (m_color_mask_b << 2) | (m_color_mask_a << 3); - else - prop.SampleMask = UINT_MAX; - - prop.IASet = m_IASet; - - m_PSO = m_cachePSO.getGraphicPipelineState(m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignatures)); - return m_PSO != nullptr; -} - void D3D12GSRender::ExecCMD() { InitDrawBuffers(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 56c8f6f4e9..f6e2446015 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -3,6 +3,7 @@ #include "D3D12PipelineState.h" #include +#include "D3D12GSRender.h" #pragma comment (lib, "d3dcompiler.lib") @@ -27,4 +28,262 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st) } + + +bool D3D12GSRender::LoadProgram() +{ + if (!m_cur_fragment_prog) + { + LOG_WARNING(RSX, "LoadProgram: m_cur_shader_prog == NULL"); + return false; + } + + m_cur_fragment_prog->ctrl = m_shader_ctrl; + + if (!m_cur_vertex_prog) + { + LOG_WARNING(RSX, "LoadProgram: m_cur_vertex_prog == NULL"); + return false; + } + + D3D12PipelineProperties prop = {}; + switch (m_draw_mode - 1) + { + case GL_POINTS: + prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + break; + case GL_LINES: + case GL_LINE_LOOP: + case GL_LINE_STRIP: + prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + break; + case GL_TRIANGLES: + case GL_TRIANGLE_STRIP: + case GL_TRIANGLE_FAN: + prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + break; + case GL_QUADS: + case GL_QUAD_STRIP: + case GL_POLYGON: + default: + // LOG_ERROR(RSX, "Unsupported primitive type"); + prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + break; + } + + static D3D12_BLEND_DESC CD3D12_BLEND_DESC = + { + FALSE, + FALSE, + { + FALSE,FALSE, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, + } + }; + prop.Blend = CD3D12_BLEND_DESC; + + if (m_set_blend) + { + prop.Blend.RenderTarget[0].BlendEnable = true; + + if (m_set_blend_mrt1) + prop.Blend.RenderTarget[1].BlendEnable = true; + if (m_set_blend_mrt2) + prop.Blend.RenderTarget[2].BlendEnable = true; + if (m_set_blend_mrt3) + prop.Blend.RenderTarget[3].BlendEnable = true; + } + + if (m_set_blend_equation) + { + prop.Blend.RenderTarget[0].BlendOp = getBlendOp(m_blend_equation_rgb); + prop.Blend.RenderTarget[0].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); + + if (m_set_blend_mrt1) + { + prop.Blend.RenderTarget[1].BlendOp = getBlendOp(m_blend_equation_rgb); + prop.Blend.RenderTarget[1].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); + } + + if (m_set_blend_mrt2) + { + prop.Blend.RenderTarget[2].BlendOp = getBlendOp(m_blend_equation_rgb); + prop.Blend.RenderTarget[2].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); + } + + if (m_set_blend_mrt3) + { + prop.Blend.RenderTarget[3].BlendOp = getBlendOp(m_blend_equation_rgb); + prop.Blend.RenderTarget[3].BlendOpAlpha = getBlendOp(m_blend_equation_alpha); + } + } + + if (m_set_blend_sfactor && m_set_blend_dfactor) + { + prop.Blend.RenderTarget[0].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); + prop.Blend.RenderTarget[0].DestBlend = getBlendFactor(m_blend_dfactor_rgb); + prop.Blend.RenderTarget[0].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); + prop.Blend.RenderTarget[0].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); + + if (m_set_blend_mrt1) + { + prop.Blend.RenderTarget[1].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); + prop.Blend.RenderTarget[1].DestBlend = getBlendFactor(m_blend_dfactor_rgb); + prop.Blend.RenderTarget[1].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); + prop.Blend.RenderTarget[1].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); + } + + if (m_set_blend_mrt2) + { + prop.Blend.RenderTarget[2].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); + prop.Blend.RenderTarget[2].DestBlend = getBlendFactor(m_blend_dfactor_rgb); + prop.Blend.RenderTarget[2].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); + prop.Blend.RenderTarget[2].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); + } + + if (m_set_blend_mrt3) + { + prop.Blend.RenderTarget[3].SrcBlend = getBlendFactor(m_blend_sfactor_rgb); + prop.Blend.RenderTarget[3].DestBlend = getBlendFactor(m_blend_dfactor_rgb); + prop.Blend.RenderTarget[3].SrcBlendAlpha = getBlendFactor(m_blend_sfactor_alpha); + prop.Blend.RenderTarget[3].DestBlendAlpha = getBlendFactor(m_blend_dfactor_alpha); + } + } + + if (m_set_logic_op) + { + prop.Blend.RenderTarget[0].LogicOpEnable = true; + prop.Blend.RenderTarget[0].LogicOp = getLogicOp(m_logic_op); + } + + if (m_set_blend_color) + { + // glBlendColor(m_blend_color_r, m_blend_color_g, m_blend_color_b, m_blend_color_a); + // checkForGlError("glBlendColor"); + } + + switch (m_surface_depth_format) + { + case 0: + break; + case CELL_GCM_SURFACE_Z16: + prop.DepthStencilFormat = DXGI_FORMAT_D16_UNORM; + break; + case CELL_GCM_SURFACE_Z24S8: + prop.DepthStencilFormat = DXGI_FORMAT_D24_UNORM_S8_UINT; + break; + default: + LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface_depth_format); + assert(0); + } + + switch (m_surface_color_format) + { + case CELL_GCM_SURFACE_A8R8G8B8: + prop.RenderTargetsFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case CELL_GCM_SURFACE_F_W16Z16Y16X16: + prop.RenderTargetsFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + } + + switch (m_surface_color_target) + { + case CELL_GCM_SURFACE_TARGET_0: + case CELL_GCM_SURFACE_TARGET_1: + prop.numMRT = 1; + break; + case CELL_GCM_SURFACE_TARGET_MRT1: + prop.numMRT = 2; + break; + case CELL_GCM_SURFACE_TARGET_MRT2: + prop.numMRT = 3; + break; + case CELL_GCM_SURFACE_TARGET_MRT3: + prop.numMRT = 4; + break; + default: + LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); + } + + prop.DepthStencil.DepthEnable = m_set_depth_test; + prop.DepthStencil.DepthWriteMask = m_depth_mask ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + prop.DepthStencil.DepthFunc = getDepthFunc(m_depth_func); + prop.DepthStencil.StencilEnable = m_set_stencil_test; + prop.DepthStencil.StencilReadMask = m_stencil_func_mask; + prop.DepthStencil.StencilWriteMask = m_stencil_mask; + prop.DepthStencil.FrontFace.StencilPassOp = getStencilOp(m_stencil_zpass); + prop.DepthStencil.FrontFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); + prop.DepthStencil.FrontFace.StencilFailOp = getStencilOp(m_stencil_fail); + prop.DepthStencil.FrontFace.StencilFunc = getStencilFunc(m_stencil_func); + + if (m_set_two_sided_stencil_test_enable) + { + prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_back_stencil_fail); + prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_back_stencil_func); + prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_back_stencil_zpass); + prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_back_stencil_zfail); + } + else + { + prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_stencil_zpass); + prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); + prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_stencil_fail); + prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_stencil_func); + } + + // Sensible default value + static D3D12_RASTERIZER_DESC CD3D12_RASTERIZER_DESC = + { + D3D12_FILL_MODE_SOLID, + D3D12_CULL_MODE_NONE, + FALSE, + D3D12_DEFAULT_DEPTH_BIAS, + D3D12_DEFAULT_DEPTH_BIAS_CLAMP, + D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, + TRUE, + FALSE, + FALSE, + 0, + D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, + }; + prop.Rasterization = CD3D12_RASTERIZER_DESC; + switch (m_set_cull_face) + { + case CELL_GCM_FRONT: + prop.Rasterization.CullMode = D3D12_CULL_MODE_FRONT; + break; + case CELL_GCM_BACK: + prop.Rasterization.CullMode = D3D12_CULL_MODE_BACK; + break; + default: + prop.Rasterization.CullMode = D3D12_CULL_MODE_NONE; + break; + } + + switch (m_front_face) + { + case CELL_GCM_CW: + prop.Rasterization.FrontCounterClockwise = FALSE; + break; + case CELL_GCM_CCW: + prop.Rasterization.FrontCounterClockwise = TRUE; + break; + } + + if (m_set_color_mask) + prop.SampleMask = m_color_mask_r | (m_color_mask_g << 1) | (m_color_mask_b << 2) | (m_color_mask_a << 3); + else + prop.SampleMask = UINT_MAX; + + prop.IASet = m_IASet; + + m_PSO = m_cachePSO.getGraphicPipelineState(m_cur_vertex_prog, m_cur_fragment_prog, prop, std::make_pair(m_device, m_rootSignatures)); + return m_PSO != nullptr; +} + + #endif \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index b492551765..4f796a4f86 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -81,71 +81,6 @@ D3D12_TEXTURE_ADDRESS_MODE D3D12GSRender::GetWrap(size_t wrap) return D3D12_TEXTURE_ADDRESS_MODE_WRAP; } -static -DXGI_FORMAT getDXGIFormat(int format) -{ - switch (format) - { - - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - default: - LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); - return DXGI_FORMAT(); - case CELL_GCM_TEXTURE_B8: - return DXGI_FORMAT_R8_UNORM; - case CELL_GCM_TEXTURE_A1R5G5B5: - return DXGI_FORMAT_B5G5R5A1_UNORM; - case CELL_GCM_TEXTURE_A4R4G4B4: - return DXGI_FORMAT_B4G4R4A4_UNORM; - case CELL_GCM_TEXTURE_R5G6B5: - return DXGI_FORMAT_B5G6R5_UNORM; - case CELL_GCM_TEXTURE_A8R8G8B8: - return DXGI_FORMAT_R8G8B8A8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - return DXGI_FORMAT_BC1_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - return DXGI_FORMAT_BC2_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - return DXGI_FORMAT_BC3_UNORM; - case CELL_GCM_TEXTURE_G8B8: - return DXGI_FORMAT_G8R8_G8B8_UNORM; - case CELL_GCM_TEXTURE_R6G5B5: - // Not native - return DXGI_FORMAT_R8G8B8A8_UNORM; - case CELL_GCM_TEXTURE_DEPTH24_D8: - return DXGI_FORMAT_R32_UINT; - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - return DXGI_FORMAT_R32_FLOAT; - case CELL_GCM_TEXTURE_DEPTH16: - return DXGI_FORMAT_R16_UNORM; - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - return DXGI_FORMAT_R16_FLOAT; - case CELL_GCM_TEXTURE_X16: - return DXGI_FORMAT_R16_UNORM; - case CELL_GCM_TEXTURE_Y16_X16: - return DXGI_FORMAT_R16G16_UNORM; - case CELL_GCM_TEXTURE_R5G5B5A1: - return DXGI_FORMAT_B5G5R5A1_UNORM; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return DXGI_FORMAT_R16G16B16A16_FLOAT; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - return DXGI_FORMAT_R32G32B32A32_FLOAT; - case CELL_GCM_TEXTURE_X32_FLOAT: - return DXGI_FORMAT_R32_FLOAT; - case CELL_GCM_TEXTURE_D1R5G5B5: - return DXGI_FORMAT_B5G5R5A1_UNORM; - case CELL_GCM_TEXTURE_D8R8G8B8: - return DXGI_FORMAT_R8G8B8A8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - return DXGI_FORMAT_G8R8_G8B8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - return DXGI_FORMAT_R8G8_B8G8_UNORM; - } -} static D3D12_FILTER getSamplerFilter(u32 minFilter, u32 magFilter) { @@ -212,7 +147,7 @@ ID3D12Resource *uploadSingleTexture( size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel; int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - DXGI_FORMAT dxgiFormat = getDXGIFormat(format); + DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format); const u32 texaddr = GetAddress(texture.GetOffset(), texture.GetLocation()); @@ -487,7 +422,7 @@ size_t D3D12GSRender::UploadTextures() const u32 texaddr = GetAddress(m_textures[i].GetOffset(), m_textures[i].GetLocation()); int format = m_textures[i].GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - DXGI_FORMAT dxgiFormat = getDXGIFormat(format); + DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format); bool is_swizzled = !(m_textures[i].GetFormat() & CELL_GCM_TEXTURE_LN); ID3D12Resource *vramTexture; From 48e6db3a2f5171cd0a6d1486d4dc123f74a58de5 Mon Sep 17 00:00:00 2001 From: raven02 Date: Mon, 8 Jun 2015 20:08:51 +0800 Subject: [PATCH 250/343] d3d12: use CELL_GCM suffix for filter --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 40 ++++++++++++++++------------ rpcs3/Emu/RSX/GCM.h | 9 +++++++ 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 4f796a4f86..f80f74372c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -35,7 +35,8 @@ u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, return offset; } -static D3D12_COMPARISON_FUNC ComparisonFunc[] = +static +D3D12_COMPARISON_FUNC getSamplerCompFunc[] = { D3D12_COMPARISON_FUNC_NEVER, D3D12_COMPARISON_FUNC_LESS, @@ -47,7 +48,8 @@ static D3D12_COMPARISON_FUNC ComparisonFunc[] = D3D12_COMPARISON_FUNC_ALWAYS }; -size_t D3D12GSRender::GetMaxAniso(size_t aniso) +static +size_t getSamplerMaxAniso(size_t aniso) { switch (aniso) { @@ -64,7 +66,8 @@ size_t D3D12GSRender::GetMaxAniso(size_t aniso) return 1; } -D3D12_TEXTURE_ADDRESS_MODE D3D12GSRender::GetWrap(size_t wrap) +static +D3D12_TEXTURE_ADDRESS_MODE getSamplerWrap(size_t wrap) { switch (wrap) { @@ -82,45 +85,48 @@ D3D12_TEXTURE_ADDRESS_MODE D3D12GSRender::GetWrap(size_t wrap) } -static D3D12_FILTER getSamplerFilter(u32 minFilter, u32 magFilter) + +static +D3D12_FILTER getSamplerFilter(u32 minFilter, u32 magFilter) { D3D12_FILTER_TYPE min, mag, mip; switch (minFilter) { - case 1: //GL_NEAREST + case CELL_GCM_TEXTURE_NEAREST: min = D3D12_FILTER_TYPE_POINT; mip = D3D12_FILTER_TYPE_POINT; break; - case 2: // GL_LINEAR + case CELL_GCM_TEXTURE_LINEAR: min = D3D12_FILTER_TYPE_LINEAR; mip = D3D12_FILTER_TYPE_POINT; break; - case 3: //GL_NEAREST_MIPMAP_NEAREST + case CELL_GCM_TEXTURE_NEAREST_NEAREST: min = D3D12_FILTER_TYPE_POINT; mip = D3D12_FILTER_TYPE_POINT; break; - case 4: // GL_LINEAR_MIPMAP_NEAREST + case CELL_GCM_TEXTURE_LINEAR_NEAREST: min = D3D12_FILTER_TYPE_LINEAR; mip = D3D12_FILTER_TYPE_POINT; break; - case 5: // GL_NEAREST_MIPMAP_LINEAR + case CELL_GCM_TEXTURE_NEAREST_LINEAR: min = D3D12_FILTER_TYPE_POINT; mip = D3D12_FILTER_TYPE_LINEAR; break; - case 6: //GL_LINEAR_MIPMAP_LINEAR + case CELL_GCM_TEXTURE_LINEAR_LINEAR: min = D3D12_FILTER_TYPE_LINEAR; mip = D3D12_FILTER_TYPE_LINEAR; break; + case CELL_GCM_TEXTURE_CONVOLUTION_MIN: default: LOG_ERROR(RSX, "Unknow min filter %x", minFilter); } switch (magFilter) { - case 1: // GL_NEAREST + case CELL_GCM_TEXTURE_NEAREST: mag = D3D12_FILTER_TYPE_POINT; break; - case 2: // GL_LINEAR + case CELL_GCM_TEXTURE_LINEAR: mag = D3D12_FILTER_TYPE_LINEAR; break; default: @@ -560,11 +566,11 @@ size_t D3D12GSRender::UploadTextures() D3D12_SAMPLER_DESC samplerDesc = {}; samplerDesc.Filter = getSamplerFilter(m_textures[i].GetMinFilter(), m_textures[i].GetMagFilter()); - samplerDesc.AddressU = GetWrap(m_textures[i].GetWrapS()); - samplerDesc.AddressV = GetWrap(m_textures[i].GetWrapT()); - samplerDesc.AddressW = GetWrap(m_textures[i].GetWrapR()); - samplerDesc.ComparisonFunc = ComparisonFunc[m_textures[i].GetZfunc()]; - samplerDesc.MaxAnisotropy = (UINT)GetMaxAniso(m_textures[i].GetMaxAniso()); + samplerDesc.AddressU = getSamplerWrap(m_textures[i].GetWrapS()); + samplerDesc.AddressV = getSamplerWrap(m_textures[i].GetWrapT()); + samplerDesc.AddressW = getSamplerWrap(m_textures[i].GetWrapR()); + samplerDesc.ComparisonFunc = getSamplerCompFunc[m_textures[i].GetZfunc()]; + samplerDesc.MaxAnisotropy = (UINT)getSamplerMaxAniso(m_textures[i].GetMaxAniso()); samplerDesc.MipLODBias = m_textures[i].GetBias(); samplerDesc.BorderColor[4] = (FLOAT)m_textures[i].GetBorderColor(); samplerDesc.MinLOD = (FLOAT)(m_textures[i].GetMinLOD() >> 8); diff --git a/rpcs3/Emu/RSX/GCM.h b/rpcs3/Emu/RSX/GCM.h index 0c9e1cff24..72b753a12a 100644 --- a/rpcs3/Emu/RSX/GCM.h +++ b/rpcs3/Emu/RSX/GCM.h @@ -206,6 +206,15 @@ enum // Cull Face CELL_GCM_CW = 0x0900, CELL_GCM_CCW = 0x0901, + + // Texture Filter + CELL_GCM_TEXTURE_NEAREST = 1, + CELL_GCM_TEXTURE_LINEAR = 2, + CELL_GCM_TEXTURE_NEAREST_NEAREST = 3, + CELL_GCM_TEXTURE_LINEAR_NEAREST = 4, + CELL_GCM_TEXTURE_NEAREST_LINEAR = 5, + CELL_GCM_TEXTURE_LINEAR_LINEAR = 6, + CELL_GCM_TEXTURE_CONVOLUTION_MIN = 7, }; // GCM Surface From 8b631d486c294771ad96000efa235e52da163bd1 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 8 Jun 2015 16:59:11 +0200 Subject: [PATCH 251/343] d3d12: Fix build --- rpcs3/Emu/RSX/D3D12/D3D12.h | 8 ++++++++ rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index d1598b7502..4538ca4b35 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -4,6 +4,8 @@ #include #include #include "utilities/Log.h" +#include "Emu/Memory/vm.h" +#include "Emu/RSX/GCM.h" inline void check(HRESULT hr) @@ -119,6 +121,7 @@ inline D3D12_BLEND_OP getBlendOp(u16 op) case CELL_GCM_FUNC_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; case CELL_GCM_MIN: return D3D12_BLEND_OP_MIN; case CELL_GCM_MAX: return D3D12_BLEND_OP_MAX; + default: case CELL_GCM_FUNC_ADD_SIGNED: case CELL_GCM_FUNC_REVERSE_ADD_SIGNED: case CELL_GCM_FUNC_REVERSE_SUBTRACT_SIGNED: @@ -145,6 +148,7 @@ inline D3D12_BLEND getBlendFactor(u16 factor) case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_COLOR; case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + default: case CELL_GCM_CONSTANT_COLOR: case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: case CELL_GCM_CONSTANT_ALPHA: @@ -194,6 +198,7 @@ inline D3D12_STENCIL_OP getStencilOp(u32 op) case CELL_GCM_REPLACE: return D3D12_STENCIL_OP_REPLACE; case CELL_GCM_INCR: return D3D12_STENCIL_OP_INCR; case CELL_GCM_DECR: return D3D12_STENCIL_OP_DECR; + default: case CELL_GCM_INCR_WRAP: case CELL_GCM_DECR_WRAP: LOG_WARNING(RSX, "Unsupported Stencil Op %d", op); @@ -216,6 +221,9 @@ inline D3D12_COMPARISON_FUNC getCompareFunc(u32 op) case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; + default: + LOG_WARNING(RSX, "Unsupported Compare Op %d", op); + return D3D12_COMPARISON_FUNC(); } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index f6e2446015..06e76db2c7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -211,19 +211,19 @@ bool D3D12GSRender::LoadProgram() prop.DepthStencil.DepthEnable = m_set_depth_test; prop.DepthStencil.DepthWriteMask = m_depth_mask ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; - prop.DepthStencil.DepthFunc = getDepthFunc(m_depth_func); + prop.DepthStencil.DepthFunc = getCompareFunc(m_depth_func); prop.DepthStencil.StencilEnable = m_set_stencil_test; prop.DepthStencil.StencilReadMask = m_stencil_func_mask; prop.DepthStencil.StencilWriteMask = m_stencil_mask; prop.DepthStencil.FrontFace.StencilPassOp = getStencilOp(m_stencil_zpass); prop.DepthStencil.FrontFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); prop.DepthStencil.FrontFace.StencilFailOp = getStencilOp(m_stencil_fail); - prop.DepthStencil.FrontFace.StencilFunc = getStencilFunc(m_stencil_func); + prop.DepthStencil.FrontFace.StencilFunc = getCompareFunc(m_stencil_func); if (m_set_two_sided_stencil_test_enable) { prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_back_stencil_fail); - prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_back_stencil_func); + prop.DepthStencil.BackFace.StencilFunc = getCompareFunc(m_back_stencil_func); prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_back_stencil_zpass); prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_back_stencil_zfail); } @@ -232,7 +232,7 @@ bool D3D12GSRender::LoadProgram() prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(m_stencil_zpass); prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(m_stencil_zfail); prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(m_stencil_fail); - prop.DepthStencil.BackFace.StencilFunc = getStencilFunc(m_stencil_func); + prop.DepthStencil.BackFace.StencilFunc = getCompareFunc(m_stencil_func); } // Sensible default value From 93e20c08538f2048bef99428b0047e8da5dafc12 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 8 Jun 2015 18:09:10 +0200 Subject: [PATCH 252/343] d3d12: Fix index count --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 32b4f83d51..2b9e37f752 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -749,7 +749,7 @@ void D3D12GSRender::ExecCMD() commandList->DrawIndexedInstanced((UINT)indexCount, 1, 0, (UINT)m_draw_array_first, 0); // Indexed triangles else if (m_indexed_array.m_count) - commandList->DrawIndexedInstanced((UINT)m_indexed_array.m_data.size() / 4, 1, 0, (UINT)m_draw_array_first, 0); + commandList->DrawIndexedInstanced((UINT)m_indexed_array.m_data.size() / ((m_indexed_array.m_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? 2 : 4), 1, 0, 0, 0); else if (m_draw_array_count) commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); From a86e44deb4a80302f1ae5820307157a143ae76eb Mon Sep 17 00:00:00 2001 From: raven02 Date: Mon, 8 Jun 2015 06:41:56 +0800 Subject: [PATCH 253/343] RSX: Fix NV3089 Make convert_swizzle.elf works --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 30 ---- rpcs3/Emu/RSX/RSXThread.cpp | 250 ++++++++++++++++++++++++++++---- rpcs3/Emu/RSX/RSXThread.h | 4 + 3 files changed, 222 insertions(+), 62 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 3b6f1f43e1..c19e47b5b6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -2165,33 +2165,3 @@ void GLGSRender::semaphorePFIFOAcquire(u32 offset, u32 value) { } - -u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth) -{ - u32 offset = 0; - u32 shift_count = 0; - while (log2_width | log2_height | log2_depth){ - if (log2_width) - { - offset |= (x & 0x01) << shift_count; - x >>= 1; - ++shift_count; - --log2_width; - } - if (log2_height) - { - offset |= (y & 0x01) << shift_count; - y >>= 1; - ++shift_count; - --log2_height; - } - if (log2_depth) - { - offset |= (z & 0x01) << shift_count; - z >>= 1; - ++shift_count; - --log2_depth; - } - } - return offset; -} diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index d3cd7129a4..e89762f72c 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -23,6 +23,39 @@ extern u64 get_system_time(); u32 methodRegisters[0xffff]; +u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth) +{ + u32 offset = 0; + u32 shift_count = 0; + while (log2_width | log2_height | log2_depth) + { + if (log2_width) + { + offset |= (x & 0x01) << shift_count; + x >>= 1; + ++shift_count; + --log2_width; + } + + if (log2_height) + { + offset |= (y & 0x01) << shift_count; + y >>= 1; + ++shift_count; + --log2_height; + } + + if (log2_depth) + { + offset |= (z & 0x01) << shift_count; + z >>= 1; + ++shift_count; + --log2_depth; + } + } + return offset; +} + u32 GetAddress(u32 offset, u32 location) { u32 res = 0; @@ -2045,6 +2078,20 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const break; } + case NV3062_SET_PITCH: + { + if (count == 1) + { + m_color_format_src_pitch = ARGS(0); + m_color_format_dst_pitch = ARGS(0) >> 16; + } + else + { + LOG_ERROR(RSX, "NV3062_SET_PITCH: unknown arg count (%d)", count); + } + break; + } + // NV309E case NV309E_SET_CONTEXT_DMA_IMAGE: { @@ -2075,6 +2122,19 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const break; } + case NV309E_SET_OFFSET: + { + if (count == 1) + { + m_swizzle_offset = ARGS(0); + } + else + { + LOG_ERROR(RSX, "NV309E_SET_OFFSET: unknown arg count (%d)", count); + } + break; + } + // NV308A case NV308A_POINT: { @@ -2162,39 +2222,98 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const case NV3089_IMAGE_IN_SIZE: { - const u16 width = ARGS(0); - const u16 height = ARGS(0) >> 16; - const u16 pitch = ARGS(1); + if (count == 1) + { + m_img_in_size = ARGS(0); + + } + else + { + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown arg count (%d)", count); + } + break; + } + + case NV3089_IMAGE_IN_FORMAT: + { + if (count == 1) + { + m_img_in_format = ARGS(0); + + } + else + { + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown arg count (%d)", count); + } + break; + } + + case NV3089_IMAGE_IN_OFFSET: + { + if (count == 1) + { + m_src_offset = ARGS(0); + + } + else + { + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown arg count (%d)", count); + } + break; + } + + case NV3089_IMAGE_IN: + { + const u16 width = m_img_in_size; + const u16 height = m_img_in_size >> 16; + const u16 pitch = m_img_in_format; + const u8 origin = m_img_in_format >> 16; + const u8 inter = m_img_in_format >> 24; - const u8 origin = ARGS(1) >> 16; if (origin != 2 /* CELL_GCM_TRANSFER_ORIGIN_CORNER */) { LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", origin); } - const u8 inter = ARGS(1) >> 24; if (inter != 0 /* CELL_GCM_TRANSFER_INTERPOLATOR_ZOH */ && inter != 1 /* CELL_GCM_TRANSFER_INTERPOLATOR_FOH */) { LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", inter); } - const u32 offset = ARGS(2); + const u32 src_offset = m_src_offset; + const u32 src_dma = m_context_dma_img_src; - const u16 u = ARGS(3); // inX (currently ignored) - const u16 v = ARGS(3) >> 16; // inY (currently ignored) + u32 dst_offset; + u32 dst_dma = 0; - u8* pixels_src = vm::get_ptr(GetAddress(offset, m_context_dma_img_src - 0xfeed0000)); - u8* pixels_dst = vm::get_ptr(GetAddress(m_dst_offset, m_context_dma_img_dst - 0xfeed0000)); - - if (m_context_surface == CELL_GCM_CONTEXT_SWIZZLE2D) - { - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: Swizzle2D not implemented"); - } - else if (m_context_surface != CELL_GCM_CONTEXT_SURFACE2D) + switch (m_context_surface) { + case CELL_GCM_CONTEXT_SURFACE2D: + dst_dma = m_context_dma_img_dst; + dst_offset = m_dst_offset; + break; + + case CELL_GCM_CONTEXT_SWIZZLE2D: + dst_dma = m_context_dma_img_src; + dst_offset = m_swizzle_offset; + break; + + default: LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", m_context_surface); + break; } + if (!dst_dma) + break; + + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_offset, dst_offset); + + const u16 u = ARGS(0); // inX (currently ignored) + const u16 v = ARGS(0) >> 16; // inY (currently ignored) + + u8* pixels_src = vm::get_ptr(GetAddress(src_offset, src_dma)); + u8* pixels_dst = vm::get_ptr(GetAddress(dst_offset, dst_dma)); + if (m_color_format != 4 /* CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 */ && m_color_format != 10 /* CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8 */) { LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_color_format (%d)", m_color_format); @@ -2206,11 +2325,42 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const const s32 out_w = (s32)(u64(width) * (1 << 20) / m_color_conv_dsdx); const s32 out_h = (s32)(u64(height) * (1 << 20) / m_color_conv_dtdy); + if (m_context_surface == CELL_GCM_CONTEXT_SWIZZLE2D) + { + u8* linear_pixels = pixels_src; + u8* swizzled_pixels = new u8[in_bpp * width * height]; + + int sw_width = 1 << (int)log2(width); + int sw_height = 1 << (int)log2(height); + + for (int y = 0; y < sw_height; y++) + { + for (int x = 0; x < sw_width; x++) + { + switch (in_bpp) + { + case 1: + swizzled_pixels[LinearToSwizzleAddress(x, y, 0, sw_width, sw_height, 0)] = linear_pixels[y * sw_height + x]; + break; + case 2: + ((u16*)swizzled_pixels)[LinearToSwizzleAddress(x, y, 0, sw_width, sw_height, 0)] = ((u16*)linear_pixels)[y * sw_height + x]; + break; + case 4: + ((u32*)swizzled_pixels)[LinearToSwizzleAddress(x, y, 0, sw_width, sw_height, 0)] = ((u32*)linear_pixels)[y * sw_height + x]; + break; + } + + } + } + + pixels_src = swizzled_pixels; + } + LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: w=%d, h=%d, pitch=%d, offset=0x%x, inX=%f, inY=%f, scaleX=%f, scaleY=%f", - width, height, pitch, offset, double(u) / 16, double(v) / 16, double(1 << 20) / (m_color_conv_dsdx), double(1 << 20) / (m_color_conv_dtdy)); + width, height, pitch, src_offset, double(u) / 16, double(v) / 16, double(1 << 20) / (m_color_conv_dsdx), double(1 << 20) / (m_color_conv_dtdy)); std::unique_ptr temp; - + if (in_bpp != out_bpp && width != out_w && height != out_h) { // resize/convert if necessary @@ -2220,7 +2370,8 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const AVPixelFormat in_format = m_color_format == 4 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; // ??? AVPixelFormat out_format = m_color_conv_fmt == 7 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; // ??? - std::unique_ptr sws(sws_getContext(width, height, in_format, out_w, out_h, out_format, inter ? SWS_FAST_BILINEAR : SWS_POINT, NULL, NULL, NULL), sws_freeContext); + std::unique_ptr sws(sws_getContext(width, height, in_format, out_w, out_h, out_format, + inter ? SWS_FAST_BILINEAR : SWS_POINT, NULL, NULL, NULL), sws_freeContext); int in_line = in_bpp * width; u8* out_ptr = temp.get(); @@ -2272,6 +2423,11 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const memcpy(pixels_dst, pixels_src, out_w * out_h * out_bpp); } + if (m_context_surface == CELL_GCM_CONTEXT_SWIZZLE2D) + { + delete[] pixels_src; + } + break; } @@ -2308,6 +2464,49 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const break; } + case NV3089_SET_COLOR_FORMAT: + m_color_conv_fmt = ARGS(0); + if (m_color_conv_fmt != 3 /* CELL_GCM_TRANSFER_SCALE_FORMAT_A8R8G8B8 */ && m_color_conv_fmt != 7 /* CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 */) + { + LOG_ERROR(RSX, "NV3089_SET_COLOR_FORMAT: unknown format (%d)", m_color_conv_fmt); + } + break; + + case NV3089_SET_OPERATION: + m_color_conv_op = ARGS(0); + if (m_color_conv_op != 3 /* CELL_GCM_TRANSFER_OPERATION_SRCCOPY */) + { + LOG_ERROR(RSX, "NV3089_SET_OPERATION: unknown color conv op (%d)", m_color_conv_op); + } + break; + case NV3089_CLIP_POINT: + m_color_conv_clip_x = ARGS(0); + m_color_conv_clip_y = ARGS(0) >> 16; + break; + + case NV3089_CLIP_SIZE: + m_color_conv_clip_w = ARGS(0); + m_color_conv_clip_h = ARGS(0) >> 16; + break; + + case NV3089_IMAGE_OUT_POINT: + m_color_conv_out_x = ARGS(0); + m_color_conv_out_y = ARGS(0) >> 16; + break; + + case NV3089_IMAGE_OUT_SIZE: + m_color_conv_out_w = ARGS(0); + m_color_conv_out_h = ARGS(0) >> 16; + break; + + case NV3089_DS_DX: + m_color_conv_dsdx = ARGS(0); + break; + + case NV3089_DT_DY: + m_color_conv_dtdy = ARGS(0); + break; + case GCM_SET_USER_COMMAND: { const u32 cause = ARGS(0); @@ -2353,7 +2552,6 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const case NV3062_SET_OBJECT: case NV3062_SET_CONTEXT_DMA_NOTIFIES: case NV3062_SET_CONTEXT_DMA_IMAGE_SOURCE: - case NV3062_SET_PITCH: case NV3062_SET_OFFSET_SOURCE: { LOG_WARNING(RSX, "Unused NV3062 method 0x%x detected!", cmd); @@ -2381,7 +2579,6 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const case NV309E_SET_OBJECT: case NV309E_SET_CONTEXT_DMA_NOTIFIES: - case NV309E_SET_OFFSET: { LOG_WARNING(RSX, "Unused NV309E method 0x%x detected!", cmd); break; @@ -2393,17 +2590,6 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const case NV3089_SET_CONTEXT_ROP: case NV3089_SET_CONTEXT_BETA1: case NV3089_SET_CONTEXT_BETA4: - case NV3089_SET_COLOR_FORMAT: - case NV3089_SET_OPERATION: - case NV3089_CLIP_POINT: - case NV3089_CLIP_SIZE: - case NV3089_IMAGE_OUT_POINT: - case NV3089_IMAGE_OUT_SIZE: - case NV3089_DS_DX: - case NV3089_DT_DY: - case NV3089_IMAGE_IN_FORMAT: - case NV3089_IMAGE_IN_OFFSET: - case NV3089_IMAGE_IN: { LOG_WARNING(RSX, "Unused NV3089 methods 0x%x detected!", cmd); break; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index ded727a469..6dcf387b38 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -19,6 +19,7 @@ enum Method extern u32 methodRegisters[0xffff]; u32 GetAddress(u32 offset, u32 location); +u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth); struct RSXVertexData { @@ -407,6 +408,9 @@ public: u32 m_context_dma_buffer_in_src; u32 m_context_dma_buffer_in_dst; u32 m_dst_offset; + u32 m_src_offset; + u32 m_img_in_size; + u32 m_img_in_format; // Swizzle2D? u16 m_swizzle_format; From 724159c8b4159bfb430fe5559d00766b993cb4eb Mon Sep 17 00:00:00 2001 From: raven02 Date: Tue, 9 Jun 2015 00:51:41 +0800 Subject: [PATCH 254/343] d3d12: warning log fix --- rpcs3/Emu/RSX/D3D12/D3D12.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 4538ca4b35..25be93edc4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -222,7 +222,7 @@ inline D3D12_COMPARISON_FUNC getCompareFunc(u32 op) case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; default: - LOG_WARNING(RSX, "Unsupported Compare Op %d", op); + LOG_WARNING(RSX, "Unsupported Compare Function %d", op); return D3D12_COMPARISON_FUNC(); } } @@ -295,4 +295,4 @@ inline DXGI_FORMAT getTextureDXGIFormat(int format) } } -#endif \ No newline at end of file +#endif From dc1a57e71c8d2ecf9135a787a6b61a3b59f2180a Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 8 Jun 2015 19:33:41 +0200 Subject: [PATCH 255/343] d3d12: Fix color masking Wasn't using the correct PSO state variable --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 12 +++++++++--- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 5 ++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 06e76db2c7..a02f5f9c76 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -275,9 +275,15 @@ bool D3D12GSRender::LoadProgram() } if (m_set_color_mask) - prop.SampleMask = m_color_mask_r | (m_color_mask_g << 1) | (m_color_mask_b << 2) | (m_color_mask_a << 3); - else - prop.SampleMask = UINT_MAX; + { + UINT8 mask = 0; + mask |= m_color_mask_r ? D3D12_COLOR_WRITE_ENABLE_RED : 0; + mask |= m_color_mask_g ? D3D12_COLOR_WRITE_ENABLE_GREEN : 0; + mask |= m_color_mask_b ? D3D12_COLOR_WRITE_ENABLE_BLUE : 0; + mask |= m_color_mask_a ? D3D12_COLOR_WRITE_ENABLE_ALPHA : 0; + for (unsigned i = 0; i < prop.numMRT; i++) + prop.Blend.RenderTarget[i].RenderTargetWriteMask = mask; + } prop.IASet = m_IASet; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index dc49e0c56b..655ba0622f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -20,7 +20,6 @@ struct D3D12PipelineProperties unsigned numMRT : 3; D3D12_DEPTH_STENCIL_DESC DepthStencil; D3D12_RASTERIZER_DESC Rasterization; - UINT SampleMask; bool operator==(const D3D12PipelineProperties &in) const { @@ -47,7 +46,7 @@ struct D3D12PipelineProperties return false; if (memcmp(&Rasterization, &in.Rasterization, sizeof(D3D12_RASTERIZER_DESC))) return false; - return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT && SampleMask == in.SampleMask && RenderTargetsFormat == in.RenderTargetsFormat; + return Topology == in.Topology && DepthStencilFormat == in.DepthStencilFormat && numMRT == in.numMRT && RenderTargetsFormat == in.RenderTargetsFormat; } }; @@ -162,7 +161,7 @@ struct D3D12Traits graphicPipelineStateDesc.InputLayout.pInputElementDescs = pipelineProperties.IASet.data(); graphicPipelineStateDesc.InputLayout.NumElements = (UINT)pipelineProperties.IASet.size(); graphicPipelineStateDesc.SampleDesc.Count = 1; - graphicPipelineStateDesc.SampleMask = pipelineProperties.SampleMask; + graphicPipelineStateDesc.SampleMask = UINT_MAX; graphicPipelineStateDesc.NodeMask = 1; extraData.first->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result->first)); From 70b537c8c285e255d5c1533e8fe12c5229b88cdf Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 8 Jun 2015 19:59:19 +0200 Subject: [PATCH 256/343] d3d12: Implement discard Should make alpha test kill test almost working as it should. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 1 + rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp | 7 ++++++- rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index c6b976b491..569e93716a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -436,6 +436,7 @@ void D3D12GSRender::setScaleOffset() void *scaleOffsetMap; check(scaleOffsetBuffer->Map(0, nullptr, &scaleOffsetMap)); streamToBuffer(scaleOffsetMap, scaleOffsetMat, 16 * sizeof(float)); + streamToBuffer((char*)scaleOffsetMap + 16 * sizeof(float), &m_alpha_ref, sizeof(float)); scaleOffsetBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index ec1032a548..cc51b4450e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -34,7 +34,11 @@ std::string D3D12FragmentDecompiler::compareFunction(COMPARE f, const std::strin void D3D12FragmentDecompiler::insertHeader(std::stringstream & OS) { - OS << "// Header" << std::endl; + OS << "cbuffer SCALE_OFFSET : register(b0)" << std::endl; + OS << "{" << std::endl; + OS << " float4x4 scaleOffsetMat;" << std::endl; + OS << " float alphaRef;" << std::endl; + OS << "};" << std::endl; } void D3D12FragmentDecompiler::insertIntputs(std::stringstream & OS) @@ -158,6 +162,7 @@ void D3D12FragmentDecompiler::insertMainEnd(std::stringstream & OS) else if (m_parr.HasParam(PF_PARAM_NONE, "float4", table2[i].second)) OS << " Out." << table2[i].first << " = " << table2[i].second << ";" << std::endl; } + OS << " if (Out.ocol0.a <= alphaRef) discard;" << std::endl; OS << " return Out;" << std::endl; OS << "}" << std::endl; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 6129ca5611..50bf886b32 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -26,6 +26,7 @@ void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS) OS << "cbuffer SCALE_OFFSET : register(b0)" << std::endl; OS << "{" << std::endl; OS << " float4x4 scaleOffsetMat;" << std::endl; + OS << " float alphaRef;" << std::endl; OS << "};" << std::endl; } From cfde5698c77f4a8bfc4a18e2e8edf83b372cabed Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 8 Jun 2015 20:18:47 +0200 Subject: [PATCH 257/343] d3d12: Fix swizzling for D8R8G8B8 Fix human.ppu.elf demo --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index f80f74372c..aae0bbb9cc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -533,8 +533,8 @@ size_t D3D12GSRender::UploadTextures() { D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0 + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1 }; u8 remap_a = m_textures[i].GetRemap() & 0x3; From 24c23dc5f65bfcebdbbf8b2e430e097b3a44647b Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 8 Jun 2015 22:05:17 +0200 Subject: [PATCH 258/343] d3d12: Use dummy texture to fill empty texture slot vertex attribute test has a shader that reads a texture, but rpcs3 doesn't provide the texture ; this makes WARP crash. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 2b9e37f752..bca1301bbd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -652,6 +652,28 @@ void D3D12GSRender::ExecCMD() { size_t usedTexture = UploadTextures(); + // Fill empty slots + for (; usedTexture < m_PSO->second; usedTexture++) + { + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.Texture2D.MipLevels = 1; + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1); + m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, Handle); + + D3D12_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + m_device->CreateSampler(&samplerDesc, Handle); + } + Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_textureDescriptorsHeap); From cf27d4c2ca79ba58e7522a6efe1f24cc4cf4bc3e Mon Sep 17 00:00:00 2001 From: raven02 Date: Tue, 9 Jun 2015 09:35:31 +0800 Subject: [PATCH 259/343] d3d12: Fix pitch in COMPRESSED_DXT23/DXT45 Make pm_zcull.ppu.elf renders correctly. --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index aae0bbb9cc..6e2383fb75 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -201,12 +201,12 @@ ID3D12Resource *uploadSingleTexture( case CELL_GCM_TEXTURE_COMPRESSED_DXT23: blockSizeInByte = 16; blockWidthInPixel = 4, blockHeightInPixel = 4; - srcPitch = w * 8; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_COMPRESSED_DXT45: blockSizeInByte = 16; blockWidthInPixel = 4, blockHeightInPixel = 4; - srcPitch = w * 8; + srcPitch = w * 4; break; case CELL_GCM_TEXTURE_G8B8: blockSizeInByte = 2; From da923f8a3db9e102ffb028257571c640ac598ff0 Mon Sep 17 00:00:00 2001 From: raven02 Date: Tue, 9 Jun 2015 20:11:06 +0800 Subject: [PATCH 260/343] d3d12: declare bufferSize for allocation --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 569e93716a..777e9faa99 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -457,14 +457,16 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() memcpy((char*)vertexConstantShadowCopy + offset, vector, 4 * sizeof(float)); } - assert(m_constantsData.canAlloc(512 * 4 * sizeof(float))); - size_t heapOffset = m_constantsData.alloc(512 * 4 * sizeof(float)); + size_t bufferSize = 512 * 4 * sizeof(float); + + assert(m_constantsData.canAlloc(bufferSize)); + size_t heapOffset = m_constantsData.alloc(bufferSize); ID3D12Resource *constantsBuffer; check(m_device->CreatePlacedResource( m_constantsData.m_heap, heapOffset, - &getBufferResourceDesc(512 * 4 * sizeof(float)), + &getBufferResourceDesc(bufferSize), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&constantsBuffer) @@ -472,16 +474,16 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() void *constantsBufferMap; check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); - streamBuffer(constantsBufferMap, vertexConstantShadowCopy, 512 * 4 * sizeof(float)); + streamBuffer(constantsBufferMap, vertexConstantShadowCopy, bufferSize); constantsBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); - constantBufferViewDesc.SizeInBytes = 512 * 4 * sizeof(float); + constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, 512 * 4 * sizeof(float), constantsBuffer)); + m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, constantsBuffer)); } void D3D12GSRender::FillPixelShaderConstantsBuffer() @@ -558,4 +560,4 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() } -#endif \ No newline at end of file +#endif From 482a371bb09aa4b5d8aae139d6a5e8eba429fcaf Mon Sep 17 00:00:00 2001 From: raven02 Date: Wed, 10 Jun 2015 23:46:40 +0800 Subject: [PATCH 261/343] d3d12: set max_depth_value based on depth format --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index bca1301bbd..a611bf9dcc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -522,7 +522,10 @@ void D3D12GSRender::ExecCMD(u32 cmd) // TODO: Merge depth and stencil clear when possible if (m_clear_surface_mask & 0x1) - commandList->ClearDepthStencilView(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_DEPTH, m_clear_surface_z / (float)0xffffff, 0, 0, nullptr); + { + u32 max_depth_value = m_surface_depth_format == CELL_GCM_SURFACE_Z16 ? 0x0000ffff : 0x00ffffff; + commandList->ClearDepthStencilView(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_DEPTH, m_clear_surface_z / (float)max_depth_value, 0, 0, nullptr); + } if (m_clear_surface_mask & 0x2) commandList->ClearDepthStencilView(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_STENCIL, 0.f, m_clear_surface_s, 0, nullptr); @@ -807,9 +810,12 @@ void D3D12GSRender::Flip() commandList->ResourceBarrier(2, barriers); D3D12_TEXTURE_COPY_LOCATION src = {}, dst = {}; - src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.SubresourceIndex = 0, dst.SubresourceIndex = 0; - src.pResource = m_rtts.m_currentlyBoundRenderTargets[0], dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.SubresourceIndex = 0; + src.pResource = m_rtts.m_currentlyBoundRenderTargets[0], + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.SubresourceIndex = 0; + dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; D3D12_BOX box = { 0, 0, 0, m_surface_clip_w, m_surface_clip_h, 1 }; commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, &box); From 2f5448259210c4c4cdb9f499bcf57aa3bf51537b Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 11 Jun 2015 19:26:52 +0200 Subject: [PATCH 262/343] d3d12: Make depth test optionnal and use (0,0,0,0) when texture unit is disabled --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 4 +++- rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp | 3 ++- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 +++++- rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp | 1 + 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 777e9faa99..aee9dcb749 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -436,7 +436,9 @@ void D3D12GSRender::setScaleOffset() void *scaleOffsetMap; check(scaleOffsetBuffer->Map(0, nullptr, &scaleOffsetMap)); streamToBuffer(scaleOffsetMap, scaleOffsetMat, 16 * sizeof(float)); - streamToBuffer((char*)scaleOffsetMap + 16 * sizeof(float), &m_alpha_ref, sizeof(float)); + int isAlphaTested = m_set_alpha_test; + streamToBuffer((char*)scaleOffsetMap + 16 * sizeof(float), &isAlphaTested, sizeof(int)); + streamToBuffer((char*)scaleOffsetMap + 17 * sizeof(float), &m_alpha_ref, sizeof(float)); scaleOffsetBuffer->Unmap(0, nullptr); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index cc51b4450e..8916a9738b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -37,6 +37,7 @@ void D3D12FragmentDecompiler::insertHeader(std::stringstream & OS) OS << "cbuffer SCALE_OFFSET : register(b0)" << std::endl; OS << "{" << std::endl; OS << " float4x4 scaleOffsetMat;" << std::endl; + OS << " int isAlphaTested;" << std::endl; OS << " float alphaRef;" << std::endl; OS << "};" << std::endl; } @@ -162,7 +163,7 @@ void D3D12FragmentDecompiler::insertMainEnd(std::stringstream & OS) else if (m_parr.HasParam(PF_PARAM_NONE, "float4", table2[i].second)) OS << " Out." << table2[i].first << " = " << table2[i].second << ";" << std::endl; } - OS << " if (Out.ocol0.a <= alphaRef) discard;" << std::endl; + OS << " if (isAlphaTested && Out.ocol0.a <= alphaRef) discard;" << std::endl; OS << " return Out;" << std::endl; OS << "}" << std::endl; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a611bf9dcc..1ab8d52163 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -664,7 +664,11 @@ void D3D12GSRender::ExecCMD() srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1); + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, Handle); D3D12_SAMPLER_DESC samplerDesc = {}; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 50bf886b32..11d7ddf342 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -26,6 +26,7 @@ void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS) OS << "cbuffer SCALE_OFFSET : register(b0)" << std::endl; OS << "{" << std::endl; OS << " float4x4 scaleOffsetMat;" << std::endl; + OS << " int isAlphaTested;" << std::endl; OS << " float alphaRef;" << std::endl; OS << "};" << std::endl; } From d2c13bc4c10ebfd11d0f4e5bf6d2d95df4fdb1df Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 11 Jun 2015 20:42:38 +0200 Subject: [PATCH 263/343] d3d12: Use a commited resource for constant buffer Since we rarely use more than 1k of constant data we waste space due to alignment requirement with heap so use a commited resource instead. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 67 +++++---------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 67 +-------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 114 ++++++++++++++++++++++++-- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 4 +- 4 files changed, 129 insertions(+), 123 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index aee9dcb749..8dc28f8a86 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -192,7 +192,7 @@ std::vector FormatVertexData(const RSXVertexData *m_vertex_d * Create a new vertex buffer with attributes from vbf using vertexIndexHeap as storage heap. */ static -ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVertexData *vertexData, ID3D12Device *device, DataHeap &vertexIndexHeap) +ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVertexData *vertexData, ID3D12Device *device, DataHeap &vertexIndexHeap) { size_t subBufferSize = vbf.range.second - vbf.range.first + 1; // Make multiple of stride @@ -423,31 +423,23 @@ void D3D12GSRender::setScaleOffset() // Scale offset buffer // Separate constant buffer - ID3D12Resource *scaleOffsetBuffer; - check(m_device->CreatePlacedResource( - m_constantsData.m_heap, - heapOffset, - &getBufferResourceDesc(256), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&scaleOffsetBuffer) - )); + D3D12_RANGE range = { heapOffset, heapOffset + 256 }; void *scaleOffsetMap; - check(scaleOffsetBuffer->Map(0, nullptr, &scaleOffsetMap)); - streamToBuffer(scaleOffsetMap, scaleOffsetMat, 16 * sizeof(float)); + check(m_constantsData.m_heap->Map(0, &range, &scaleOffsetMap)); + streamToBuffer((char*)scaleOffsetMap + heapOffset, scaleOffsetMat, 16 * sizeof(float)); int isAlphaTested = m_set_alpha_test; - streamToBuffer((char*)scaleOffsetMap + 16 * sizeof(float), &isAlphaTested, sizeof(int)); - streamToBuffer((char*)scaleOffsetMap + 17 * sizeof(float), &m_alpha_ref, sizeof(float)); - scaleOffsetBuffer->Unmap(0, nullptr); + streamToBuffer((char*)scaleOffsetMap + heapOffset + 16 * sizeof(float), &isAlphaTested, sizeof(int)); + streamToBuffer((char*)scaleOffsetMap + heapOffset + 17 * sizeof(float), &m_alpha_ref, sizeof(float)); + m_constantsData.m_heap->Unmap(0, &range); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = scaleOffsetBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; constantBufferViewDesc.SizeInBytes = (UINT)256; D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, 256, scaleOffsetBuffer)); +// m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, 256, scaleOffsetBuffer)); } void D3D12GSRender::FillVertexShaderConstantsBuffer() @@ -464,28 +456,20 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() assert(m_constantsData.canAlloc(bufferSize)); size_t heapOffset = m_constantsData.alloc(bufferSize); - ID3D12Resource *constantsBuffer; - check(m_device->CreatePlacedResource( - m_constantsData.m_heap, - heapOffset, - &getBufferResourceDesc(bufferSize), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&constantsBuffer) - )); + D3D12_RANGE range = { heapOffset, heapOffset + bufferSize }; void *constantsBufferMap; - check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); - streamBuffer(constantsBufferMap, vertexConstantShadowCopy, bufferSize); - constantsBuffer->Unmap(0, nullptr); + check(m_constantsData.m_heap->Map(0, &range, &constantsBufferMap)); + streamBuffer((char*)constantsBufferMap + heapOffset, vertexConstantShadowCopy, bufferSize); + m_constantsData.m_heap->Unmap(0, &range); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, constantsBuffer)); +// m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, constantsBuffer)); } void D3D12GSRender::FillPixelShaderConstantsBuffer() @@ -499,19 +483,11 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() assert(m_constantsData.canAlloc(bufferSize)); size_t heapOffset = m_constantsData.alloc(bufferSize); - ID3D12Resource *constantsBuffer; - check(m_device->CreatePlacedResource( - m_constantsData.m_heap, - heapOffset, - &getBufferResourceDesc(bufferSize), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&constantsBuffer) - )); + D3D12_RANGE range = { heapOffset, heapOffset + bufferSize }; size_t offset = 0; void *constantsBufferMap; - check(constantsBuffer->Map(0, nullptr, &constantsBufferMap)); + check(m_constantsData.m_heap->Map(0, &range, &constantsBufferMap)); for (size_t offsetInFP : fragmentOffset) { u32 vector[4]; @@ -546,19 +522,18 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() vector[3] = c3; } - streamToBuffer((char*)constantsBufferMap + offset, vector, 4 * sizeof(u32)); + streamToBuffer((char*)constantsBufferMap + heapOffset + offset, vector, 4 * sizeof(u32)); offset += 4 * sizeof(u32); } - - constantsBuffer->Unmap(0, nullptr); + m_constantsData.m_heap->Unmap(0, &range); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = constantsBuffer->GetGPUVirtualAddress(); + constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); - m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, constantsBuffer)); +// m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, constantsBuffer)); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 1ab8d52163..944ac0c24f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -14,71 +14,6 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) GetGSFrame = value; } -void DataHeap::Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) -{ - m_size = heapSize; - D3D12_HEAP_DESC heapDesc = {}; - heapDesc.SizeInBytes = m_size; - heapDesc.Properties.Type = type; - heapDesc.Flags = flags; - check(device->CreateHeap(&heapDesc, IID_PPV_ARGS(&m_heap))); - m_putPos = 0; - m_getPos = m_size - 1; -} - - -bool DataHeap::canAlloc(size_t size) -{ - size_t putPos = m_putPos, getPos = m_getPos; - size_t allocSize = powerOf2Align(size, 65536); - if (putPos + allocSize < m_size) - { - // range before get - if (putPos + allocSize < getPos) - return true; - // range after get - if (putPos > getPos) - return true; - return false; - } - else - { - // ..]....[..get.. - if (putPos < getPos) - return false; - // ..get..]...[... - // Actually all resources extending beyond heap space starts at 0 - if (allocSize > getPos) - return false; - return true; - } -} - -size_t DataHeap::alloc(size_t size) -{ - assert(canAlloc(size)); - size_t putPos = m_putPos; - if (putPos + size < m_size) - { - m_putPos += powerOf2Align(size, 65536); - return putPos; - } - else - { - m_putPos = powerOf2Align(size, 65536); - return 0; - } -} - -void DataHeap::Release() -{ - m_heap->Release(); - for (auto tmp : m_resourceStoredSinceLastSync) - { - std::get<2>(tmp)->Release(); - } -} - GarbageCollectionThread::GarbageCollectionThread() { m_worker = std::thread([this]() { @@ -441,7 +376,7 @@ D3D12GSRender::D3D12GSRender() m_rtts.Init(m_device); - m_constantsData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); + m_constantsData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); m_vertexIndexData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); m_textureUploadData.Init(m_device, 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); m_textureData.Init(m_device, 1024 * 1024 * 512, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 2d950b1d67..d77f6e9def 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -43,22 +43,118 @@ typedef GSFrameBase2*(*GetGSFrameCb2)(); void SetGetD3DGSFrameCallback(GetGSFrameCb2 value); +template +struct InitHeap +{ + static T* Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags); +}; +template<> +struct InitHeap +{ + static ID3D12Heap* Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) + { + ID3D12Heap *result; + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.SizeInBytes = heapSize; + heapDesc.Properties.Type = type; + heapDesc.Flags = flags; + check(device->CreateHeap(&heapDesc, IID_PPV_ARGS(&result))); + return result; + } +}; + +template<> +struct InitHeap +{ + static ID3D12Resource* Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) + { + ID3D12Resource *result; + D3D12_HEAP_PROPERTIES heapProperties = {}; + heapProperties.Type = type; + check(device->CreateCommittedResource(&heapProperties, + flags, + &getBufferResourceDesc(heapSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&result)) + ); + + return result; + } +}; + +template struct DataHeap { - ID3D12Heap *m_heap; + T *m_heap; size_t m_size; size_t m_putPos, // Start of free space m_getPos; // End of free space std::vector > m_resourceStoredSinceLastSync; - void Init(ID3D12Device *, size_t, D3D12_HEAP_TYPE, D3D12_HEAP_FLAGS); + void Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) + { + m_size = heapSize; + m_heap = InitHeap::Init(device, heapSize, type, flags); + m_putPos = 0; + m_getPos = m_size - 1; + } + /** * Does alloc cross get position ? */ - bool canAlloc(size_t size); - size_t alloc(size_t size); - void Release(); + bool canAlloc(size_t size) + { + size_t putPos = m_putPos, getPos = m_getPos; + size_t allocSize = powerOf2Align(size, Alignment); + if (putPos + allocSize < m_size) + { + // range before get + if (putPos + allocSize < getPos) + return true; + // range after get + if (putPos > getPos) + return true; + return false; + } + else + { + // ..]....[..get.. + if (putPos < getPos) + return false; + // ..get..]...[... + // Actually all resources extending beyond heap space starts at 0 + if (allocSize > getPos) + return false; + return true; + } + } + + size_t alloc(size_t size) + { + assert(canAlloc(size)); + size_t putPos = m_putPos; + if (putPos + size < m_size) + { + m_putPos += powerOf2Align(size, Alignment); + return putPos; + } + else + { + m_putPos = powerOf2Align(size, Alignment); + return 0; + } + } + + void Release() + { + m_heap->Release(); + for (auto tmp : m_resourceStoredSinceLastSync) + { + std::get<2>(tmp)->Release(); + } + } }; struct GarbageCollectionThread @@ -129,12 +225,12 @@ private: ResourceStorage &getNonCurrentResourceStorage(); // Constants storage - DataHeap m_constantsData; + DataHeap m_constantsData; // Vertex storage - DataHeap m_vertexIndexData; + DataHeap m_vertexIndexData; // Texture storage - DataHeap m_textureUploadData; - DataHeap m_textureData; + DataHeap m_textureUploadData; + DataHeap m_textureData; struct UAVHeap { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 6e2383fb75..31e2fe8016 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -145,8 +145,8 @@ ID3D12Resource *uploadSingleTexture( const RSXTexture &texture, ID3D12Device *device, ID3D12GraphicsCommandList *commandList, - DataHeap &textureBuffersHeap, - DataHeap &textureHeap) + DataHeap &textureBuffersHeap, + DataHeap &textureHeap) { ID3D12Resource *vramTexture; size_t w = texture.GetWidth(), h = texture.GetHeight(); From c53828787a07cd40fe99792cdd78e81f1c94d17f Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 11 Jun 2015 21:01:24 +0200 Subject: [PATCH 264/343] d3d12: Emit an error if waiting for too long for semaphore --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 944ac0c24f..488586d3c5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1215,10 +1215,15 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) void D3D12GSRender::semaphorePFIFOAcquire(u32 offset, u32 value) { + const std::chrono::time_point enterWait = std::chrono::system_clock::now(); while (true) { u32 val = vm::read32(m_label_addr + offset); if (val == value) break; + std::chrono::time_point waitPoint = std::chrono::system_clock::now(); + int elapsedTime = std::chrono::duration_cast(waitPoint - enterWait).count(); + if (elapsedTime > 0) + LOG_ERROR(RSX, "Has wait for more than a second for semaphore acquire"); std::this_thread::yield(); } } From 75a52219e03936b6bb23b8d631fa504f553c8496 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 11 Jun 2015 21:08:00 +0200 Subject: [PATCH 265/343] d3d12: Rewind constant buffers get value --- rpcs3/Emu/RSX/D3D12/D3D12.h | 3 +++ rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 6 +++--- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 16 ++++++++-------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 25be93edc4..0bfe9a901b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -7,6 +7,9 @@ #include "Emu/Memory/vm.h" #include "Emu/RSX/GCM.h" + +#define SAFE_RELEASE(x) if (x) x->Release(); + inline void check(HRESULT hr) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 8dc28f8a86..c683aca841 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -439,7 +439,7 @@ void D3D12GSRender::setScaleOffset() D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); -// m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, 256, scaleOffsetBuffer)); + m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, 256, nullptr)); } void D3D12GSRender::FillVertexShaderConstantsBuffer() @@ -469,7 +469,7 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); -// m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, constantsBuffer)); + m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, nullptr)); } void D3D12GSRender::FillPixelShaderConstantsBuffer() @@ -533,7 +533,7 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer() D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); -// m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, constantsBuffer)); + m_constantsData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, bufferSize, nullptr)); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 488586d3c5..fdad3fecae 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -61,13 +61,13 @@ void D3D12GSRender::ResourceStorage::Reset() m_frameFinishedHandle = 0; for (auto tmp : m_inUseConstantsBuffers) - std::get<2>(tmp)->Release(); + SAFE_RELEASE(std::get<2>(tmp)); for (auto tmp : m_inUseVertexIndexBuffers) - std::get<2>(tmp)->Release(); + SAFE_RELEASE(std::get<2>(tmp)); for (auto tmp : m_inUseTextureUploadBuffers) - std::get<2>(tmp)->Release(); + SAFE_RELEASE(std::get<2>(tmp)); for (auto tmp : m_inUseTexture2D) - std::get<2>(tmp)->Release(); + SAFE_RELEASE(std::get<2>(tmp)); m_inUseConstantsBuffers.clear(); m_inUseVertexIndexBuffers.clear(); m_inUseTextureUploadBuffers.clear(); @@ -120,13 +120,13 @@ void D3D12GSRender::ResourceStorage::Release() { // NOTE: Should be released only if no command are in flight ! for (auto tmp : m_inUseConstantsBuffers) - std::get<2>(tmp)->Release(); + SAFE_RELEASE(std::get<2>(tmp)); for (auto tmp : m_inUseVertexIndexBuffers) - std::get<2>(tmp)->Release(); + SAFE_RELEASE(std::get<2>(tmp)); for (auto tmp : m_inUseTextureUploadBuffers) - std::get<2>(tmp)->Release(); + SAFE_RELEASE(std::get<2>(tmp)); for (auto tmp : m_inUseTexture2D) - std::get<2>(tmp)->Release(); + SAFE_RELEASE(std::get<2>(tmp)); m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); From 17e169e65217806fc36764b3c392ea0ac73f0425 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 11 Jun 2015 21:15:39 +0200 Subject: [PATCH 266/343] d3d12: Fix crash at exit + tweak heap sizes to make dice test working --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 ++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index fdad3fecae..9d6a14a69b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -376,8 +376,8 @@ D3D12GSRender::D3D12GSRender() m_rtts.Init(m_device); - m_constantsData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); - m_vertexIndexData.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); + m_constantsData.Init(m_device, 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); + m_vertexIndexData.Init(m_device, 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); m_textureUploadData.Init(m_device, 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); m_textureData.Init(m_device, 1024 * 1024 * 512, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index d77f6e9def..d4b0931bc2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -152,7 +152,7 @@ struct DataHeap m_heap->Release(); for (auto tmp : m_resourceStoredSinceLastSync) { - std::get<2>(tmp)->Release(); + SAFE_RELEASE(std::get<2>(tmp)); } } }; From 41577b5018e579c288f1713a8a3dd306e83e3a3b Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 13 Jun 2015 18:20:08 +0200 Subject: [PATCH 267/343] d3d12: Fix some warnings --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 7 +++++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 20 +++++++++++++------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index c683aca841..d37597156a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -83,6 +83,9 @@ DXGI_FORMAT getFormat(u8 type, u8 size) return typeX3[type]; case 4: return typeX4[type]; + default: + LOG_ERROR(RSX, "Wrong size for vertex attrib : %d", size); + return DXGI_FORMAT(); } } @@ -223,10 +226,10 @@ ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVerte memcpy(bufferMap, vertexData[attributeId].data.data(), vertexData[attributeId].data.size()); continue; } - size_t baseOffset = vertexData[attributeId].addr - vbf.range.first; + size_t baseOffset = (size_t)vertexData[attributeId].addr - vbf.range.first; size_t tsize = vertexData[attributeId].GetTypeSize(); size_t size = vertexData[attributeId].size; - auto src = vm::get_ptr(vertexData[attributeId].addr + vbf.stride * vertex); + auto src = vm::get_ptr(vertexData[attributeId].addr + (int)vbf.stride * vertex); char* dst = (char*)bufferMap + baseOffset + vbf.stride * vertex; switch (tsize) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 9d6a14a69b..c56ca13f53 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1221,7 +1221,7 @@ void D3D12GSRender::semaphorePFIFOAcquire(u32 offset, u32 value) u32 val = vm::read32(m_label_addr + offset); if (val == value) break; std::chrono::time_point waitPoint = std::chrono::system_clock::now(); - int elapsedTime = std::chrono::duration_cast(waitPoint - enterWait).count(); + long long elapsedTime = std::chrono::duration_cast(waitPoint - enterWait).count(); if (elapsedTime > 0) LOG_ERROR(RSX, "Has wait for more than a second for semaphore acquire"); std::this_thread::yield(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 31e2fe8016..71c57ba920 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -335,8 +335,8 @@ ID3D12Resource *uploadSingleTexture( src = (u32*)pixels; dst = (u32*)textureData; - log2width = (u32)(logf(w) / logf(2.f)); - log2height = (u32)(logf(h) / logf(2.f)); + log2width = (u32)(logf((float)w) / logf(2.f)); + log2height = (u32)(logf((float)h) / logf(2.f)); #pragma omp parallel for for (int j = 0; j < w; j++) @@ -364,8 +364,14 @@ ID3D12Resource *uploadSingleTexture( for (int j = 0; j < w * 4; j++) { - uint64_t tmp = src[row * w * 4 + j]; + unsigned short tmp = src[row * w * 4 + j]; dst[row * w * 4 + j] = (tmp >> 8) | (tmp << 8); + tmp = src[row * w * 4 + j + 1]; + dst[row * w * 4 + j + 1] = (tmp >> 8) | (tmp << 8); + tmp = src[row * w * 4 + j + 2]; + dst[row * w * 4 + j + 2] = (tmp >> 8) | (tmp << 8); + tmp = src[row * w * 4 + j + 3]; + dst[row * w * 4 + j + 3] = (tmp >> 8) | (tmp << 8); } break; } @@ -378,8 +384,8 @@ ID3D12Resource *uploadSingleTexture( } Texture->Unmap(0, nullptr); - size_t powerOf2Height = log2(heightInBlocks) + 1; - textureSize = rowPitch * (1 << powerOf2Height); + size_t powerOf2Height = (size_t)log2f((float)heightInBlocks) + 1; + textureSize = rowPitch * (1i64 << powerOf2Height); assert(textureHeap.canAlloc(textureSize)); size_t heapOffset2 = textureHeap.alloc(textureSize); @@ -400,8 +406,8 @@ ID3D12Resource *uploadSingleTexture( src.pResource = Texture; src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; src.PlacedFootprint.Footprint.Depth = 1; - src.PlacedFootprint.Footprint.Width = w; - src.PlacedFootprint.Footprint.Height = h; + src.PlacedFootprint.Footprint.Width = (UINT)w; + src.PlacedFootprint.Footprint.Height = (UINT)h; src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; src.PlacedFootprint.Footprint.Format = dxgiFormat; From e36c4f75e06dc7e295f65f28fcda9261322277a1 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 13 Jun 2015 19:43:46 +0200 Subject: [PATCH 268/343] d3d12; Fix swizzle for B8 texture format Fix menu in guided fate paradox for WARP --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 71c57ba920..b7bd290ffd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -441,7 +441,7 @@ size_t D3D12GSRender::UploadTextures() std::unordered_map::const_iterator ItRTT = m_rtts.m_renderTargets.find(texaddr); std::unordered_map::const_iterator ItCache = m_texturesCache.find(texaddr); bool isRenderTarget = false; - if (ItRTT != m_rtts.m_renderTargets.end()) +/* if (ItRTT != m_rtts.m_renderTargets.end()) { vramTexture = ItRTT->second; isRenderTarget = true; @@ -450,7 +450,7 @@ size_t D3D12GSRender::UploadTextures() { vramTexture = ItCache->second; } - else + else*/ { // Upload at each iteration to take advantage of overlapping transfer ID3D12GraphicsCommandList *commandList; @@ -480,9 +480,9 @@ size_t D3D12GSRender::UploadTextures() break; case CELL_GCM_TEXTURE_B8: srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); break; case CELL_GCM_TEXTURE_A1R5G5B5: @@ -565,7 +565,6 @@ size_t D3D12GSRender::UploadTextures() srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; } - D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); From 00c975b4696891332f0463c2cc5d60ae3b2a6fc6 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sun, 14 Jun 2015 04:45:15 +0800 Subject: [PATCH 269/343] d3d12: use gcm buffer width and height for copy texture region It fixes crash in Voodoo Chronicles and Terraria --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index c56ca13f53..c0a6c887fc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -663,10 +663,13 @@ void D3D12GSRender::ExecCMD() 1.f }; commandList->RSSetViewports(1, &viewport); + D3D12_RECT box = { - 0, 0, - (LONG)m_surface_clip_w, (LONG)m_surface_clip_h, + 0, + 0, + (LONG)RSXThread::m_width, + (LONG)RSXThread::m_height, }; commandList->RSSetScissorRects(1, &box); @@ -755,7 +758,7 @@ void D3D12GSRender::Flip() dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dst.SubresourceIndex = 0; dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; - D3D12_BOX box = { 0, 0, 0, m_surface_clip_w, m_surface_clip_h, 1 }; + D3D12_BOX box = { 0, 0, 0, RSXThread::m_width, RSXThread::m_height, 1 }; commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, &box); barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; From 006d9893049aaec58d82bb897213c293af49eb9d Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 14 Jun 2015 00:01:44 +0200 Subject: [PATCH 270/343] d3d12: Uncomment code that shouldn't have been commented out --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index b7bd290ffd..5c2d630c99 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -441,7 +441,7 @@ size_t D3D12GSRender::UploadTextures() std::unordered_map::const_iterator ItRTT = m_rtts.m_renderTargets.find(texaddr); std::unordered_map::const_iterator ItCache = m_texturesCache.find(texaddr); bool isRenderTarget = false; -/* if (ItRTT != m_rtts.m_renderTargets.end()) + if (ItRTT != m_rtts.m_renderTargets.end()) { vramTexture = ItRTT->second; isRenderTarget = true; @@ -450,7 +450,7 @@ size_t D3D12GSRender::UploadTextures() { vramTexture = ItCache->second; } - else*/ + else { // Upload at each iteration to take advantage of overlapping transfer ID3D12GraphicsCommandList *commandList; From f55bb7165c7891abd78871235d96544dd4a048bd Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 14 Jun 2015 01:07:15 +0200 Subject: [PATCH 271/343] d3d12: Load dll at runtime --- rpcs3/Emu/RSX/D3D12/D3D12.h | 1 + rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 29 +++++++++++++++++++++++---- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 --- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 0bfe9a901b..c0b061c875 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -7,6 +7,7 @@ #include "Emu/Memory/vm.h" #include "Emu/RSX/GCM.h" +#pragma comment (lib, "dxgi.lib") #define SAFE_RELEASE(x) if (x) x->Release(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index c0a6c887fc..3e65358503 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -7,6 +7,25 @@ #include #include +PFN_D3D12_CREATE_DEVICE wrapD3D12CreateDevice; +PFN_D3D12_GET_DEBUG_INTERFACE wrapD3D12GetDebugInterface; +PFN_D3D12_SERIALIZE_ROOT_SIGNATURE wrapD3D12SerializeRootSignature; + +static HMODULE D3D12Module; + +static void loadD3D12FunctionPointers() +{ + D3D12Module = LoadLibrary(L"d3d12.dll"); + wrapD3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)GetProcAddress(D3D12Module, "D3D12CreateDevice"); + wrapD3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(D3D12Module, "D3D12GetDebugInterface"); + wrapD3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)GetProcAddress(D3D12Module, "D3D12SerializeRootSignature"); +} + +static void unloadD3D12FunctionPointers() +{ + FreeLibrary(D3D12Module); +} + GetGSFrameCb2 GetGSFrame = nullptr; void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) @@ -194,7 +213,7 @@ std::pair compileF32toU8CS() ID3DBlob *rootSignatureBlob; - hr = D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); + hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); if (hr != S_OK) { const char *tmp = (const char*)errorBlob->GetBufferPointer(); @@ -207,10 +226,11 @@ std::pair compileF32toU8CS() D3D12GSRender::D3D12GSRender() : GSRender(), m_PSO(nullptr) { + loadD3D12FunctionPointers(); if (Ini.GSDebugOutputEnable.GetValue()) { Microsoft::WRL::ComPtr debugInterface; - D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); + wrapD3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)); debugInterface->EnableDebugLayer(); } @@ -230,7 +250,7 @@ D3D12GSRender::D3D12GSRender() dxgiFactory->EnumAdapters(Ini.GSD3DAdaptater.GetValue() - 2,&adaptater); break; } - check(D3D12CreateDevice(adaptater, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); + check(wrapD3D12CreateDevice(adaptater, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); // Queues D3D12_COMMAND_QUEUE_DESC copyQueueDesc = {}, graphicQueueDesc = {}; @@ -315,7 +335,7 @@ D3D12GSRender::D3D12GSRender() Microsoft::WRL::ComPtr rootSignatureBlob; Microsoft::WRL::ComPtr errorBlob; - check(D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); + check(wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); m_device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), @@ -408,6 +428,7 @@ D3D12GSRender::~D3D12GSRender() m_swapChain->Release(); m_device->Release(); delete[] vertexConstantShadowCopy; + unloadD3D12FunctionPointers(); } void D3D12GSRender::Close() diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index d4b0931bc2..c19e56f8fd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -17,9 +17,6 @@ // Some constants are the same between RSX and GL #include -#pragma comment (lib, "d3d12.lib") -#pragma comment (lib, "dxgi.lib") - class GSFrameBase2 { public: From 224bae383ca0aee94fa40f53d946fef0629491d9 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 15 Jun 2015 18:35:36 +0200 Subject: [PATCH 272/343] d3d12: Add some code that will scale final render target --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 191 ++++++++++++++++++++++++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 13 ++ 2 files changed, 194 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3e65358503..5532ec0eca 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -166,16 +166,6 @@ void D3D12GSRender::ResourceStorage::Release() // 32 bits float to U8 unorm CS #define STRINGIFY(x) #x -const char *shaderCode = STRINGIFY( - Texture2D InputTexture : register(t0); \n - RWTexture2D OutputTexture : register(u0);\n - - [numthreads(8, 8, 1)]\n - void main(uint3 Id : SV_DispatchThreadID)\n -{ \n - OutputTexture[Id.xy] = InputTexture.Load(uint3(Id.xy, 0));\n -} -); /** * returns bytecode and root signature of a Compute Shader converting texture from @@ -184,6 +174,17 @@ const char *shaderCode = STRINGIFY( static std::pair compileF32toU8CS() { + const char *shaderCode = STRINGIFY( + Texture2D InputTexture : register(t0); \n + RWTexture2D OutputTexture : register(u0);\n + + [numthreads(8, 8, 1)]\n + void main(uint3 Id : SV_DispatchThreadID)\n + { \n + OutputTexture[Id.xy] = InputTexture.Load(uint3(Id.xy, 0));\n + } + ); + ID3DBlob *bytecode; Microsoft::WRL::ComPtr errorBlob; HRESULT hr = D3DCompile(shaderCode, strlen(shaderCode), "test", nullptr, nullptr, "main", "cs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); @@ -223,6 +224,173 @@ std::pair compileF32toU8CS() return std::make_pair(bytecode, rootSignatureBlob); } +void D3D12GSRender::Shader::Init(ID3D12Device *device) +{ + const char *fsCode = STRINGIFY( + Texture2D InputTexture : register(t0); \n + sampler bilinearSampler : register(s0); \n + + struct PixelInput \n + { \n + float4 Pos : SV_POSITION; \n + float2 TexCoords : TEXCOORDS0; \n + }; \n + + float4 main(PixelInput In) : SV_TARGET \n + { \n + return InputTexture.Sample(bilinearSampler, In.TexCoords); \n + } + ); + + Microsoft::WRL::ComPtr fsBytecode; + Microsoft::WRL::ComPtr errorBlob; + HRESULT hr = D3DCompile(fsCode, strlen(fsCode), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &fsBytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } + + const char *vsCode = STRINGIFY( + struct VertexInput \n + { \n + float2 Pos : POSITION; \n + float2 TexCoords : TEXCOORDS0; \n + }; \n + + struct PixelInput \n + { \n + float4 Pos : SV_POSITION; \n + float2 TexCoords : TEXCOORDS0; \n + }; \n + + PixelInput main(VertexInput In) \n + { \n + PixelInput Out; \n + Out.Pos = float4(In.Pos, 0., 1.); \n + Out.TexCoords = In.TexCoords; \n + return Out; \n + } + ); + + Microsoft::WRL::ComPtr vsBytecode; + hr = D3DCompile(vsCode, strlen(vsCode), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &vsBytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } + + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.PS.BytecodeLength = fsBytecode->GetBufferSize(); + psoDesc.PS.pShaderBytecode = fsBytecode->GetBufferPointer(); + psoDesc.VS.BytecodeLength = vsBytecode->GetBufferSize(); + psoDesc.VS.pShaderBytecode = vsBytecode->GetBufferPointer(); + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + psoDesc.SampleDesc.Count = 1; + psoDesc.SampleMask = UINT_MAX; + + D3D12_INPUT_ELEMENT_DESC IADesc[2] = {}; + IADesc[0].SemanticName = "POSITION"; + IADesc[0].Format = DXGI_FORMAT_R32G32_FLOAT; + IADesc[1].SemanticName = "TEXCOORDS"; + IADesc[1].Format = DXGI_FORMAT_R32G32_FLOAT; + IADesc[1].AlignedByteOffset = 2 * sizeof(float); + + psoDesc.InputLayout.NumElements = 2; + psoDesc.InputLayout.pInputElementDescs = IADesc; + + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + + D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {}; + // Textures + descriptorRange[0].BaseShaderRegister = 0; + descriptorRange[0].NumDescriptors = 1; + descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + descriptorRange[1].BaseShaderRegister = 0; + descriptorRange[1].NumDescriptors = 1; + descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + D3D12_ROOT_PARAMETER RP[2] = {}; + RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; + RP[0].DescriptorTable.NumDescriptorRanges = 1; + RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1]; + RP[1].DescriptorTable.NumDescriptorRanges = 1; + + D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; + rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + rootSignatureDesc.NumParameters = 2; + rootSignatureDesc.pParameters = RP; + + Microsoft::WRL::ComPtr rootSignatureBlob; + + hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } + + hr = device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)); + + psoDesc.pRootSignature = m_rootSignature; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + check(device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_PSO))); + + + float quadVertex[16] = { + -1., -1., 0., 0., + -1., 1., 0., 1., + 1., 1., 1., 1., + 1., -1., 1., 0., + }; + + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; + check( + device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &getBufferResourceDesc(16 * sizeof(float)), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_vertexBuffer) + )); + + void *tmp; + m_vertexBuffer->Map(0, nullptr, &tmp); + memcpy(tmp, quadVertex, 16 * sizeof(float)); + m_vertexBuffer->Unmap(0, nullptr); + + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; + heapDesc.NumDescriptors = 1; + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + + check( + device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_textureDescriptorHeap)) + ); + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + check( + device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap)) + ); +} + +void D3D12GSRender::Shader::Release() +{ + m_PSO->Release(); + m_rootSignature->Release(); + m_vertexBuffer->Release(); + m_textureDescriptorHeap->Release(); + m_samplerDescriptorHeap->Release(); +} + D3D12GSRender::D3D12GSRender() : GSRender(), m_PSO(nullptr) { @@ -368,6 +536,8 @@ D3D12GSRender::D3D12GSRender() p.first->Release(); p.second->Release(); + m_outputScalingPass.Init(m_device); + D3D12_HEAP_PROPERTIES hp = {}; hp.Type = D3D12_HEAP_TYPE_DEFAULT; check( @@ -426,6 +596,7 @@ D3D12GSRender::~D3D12GSRender() for (unsigned i = 0; i < 17; i++) m_rootSignatures[i]->Release(); m_swapChain->Release(); + m_outputScalingPass.Release(); m_device->Release(); delete[] vertexConstantShadowCopy; unloadD3D12FunctionPointers(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index c19e56f8fd..891aec7355 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -182,6 +182,19 @@ private: // m_rootSignatures[N] is RS with N texture/sample ID3D12RootSignature *m_rootSignatures[17]; + struct Shader + { + ID3D12PipelineState *m_PSO; + ID3D12RootSignature *m_rootSignature; + ID3D12Resource *m_vertexBuffer; + ID3D12DescriptorHeap *m_textureDescriptorHeap; + ID3D12DescriptorHeap *m_samplerDescriptorHeap; + void Init(ID3D12Device *device); + void Release(); + }; + + Shader m_outputScalingPass; + ID3D12PipelineState *m_convertPSO; ID3D12RootSignature *m_convertRootSignature; From 281f8be76f9627e3405f2205d6e3d7c99c2a3e02 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 15 Jun 2015 19:09:29 +0200 Subject: [PATCH 273/343] d3d12: Enable scaling pass The output is black and white, need to find out why. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 76 +++++++++++++++++++++------ 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 5532ec0eca..d56554c72b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -340,15 +340,16 @@ void D3D12GSRender::Shader::Init(ID3D12Device *device) psoDesc.pRootSignature = m_rootSignature; psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; check(device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_PSO))); float quadVertex[16] = { - -1., -1., 0., 0., - -1., 1., 0., 1., - 1., 1., 1., 1., - 1., -1., 1., 0., + -1., -1., 0., 1., + -1., 1., 0., 0., + 1., -1., 1., 1., + 1., 1., 1., 0., }; D3D12_HEAP_PROPERTIES heapProp = {}; @@ -935,27 +936,68 @@ void D3D12GSRender::Flip() barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barriers[0].Transition.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; - barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barriers[1].Transition.pResource = m_rtts.m_currentlyBoundRenderTargets[0]; barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; commandList->ResourceBarrier(2, barriers); - D3D12_TEXTURE_COPY_LOCATION src = {}, dst = {}; - src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.SubresourceIndex = 0; - src.pResource = m_rtts.m_currentlyBoundRenderTargets[0], - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.SubresourceIndex = 0; - dst.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; - D3D12_BOX box = { 0, 0, 0, RSXThread::m_width, RSXThread::m_height, 1 }; - commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, &box); - barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + D3D12_VIEWPORT viewport = + { + 0.f, + 0.f, + (float)RSXThread::m_width, + (float)RSXThread::m_height, + 0.f, + 1.f + }; + commandList->RSSetViewports(1, &viewport); + + D3D12_RECT box = + { + 0, + 0, + (LONG)RSXThread::m_width, + (LONG)RSXThread::m_height, + }; + commandList->RSSetScissorRects(1, &box); + commandList->SetGraphicsRootSignature(m_outputScalingPass.m_rootSignature); + commandList->SetPipelineState(m_outputScalingPass.m_PSO); + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + // FIXME: Not always true + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + m_device->CreateShaderResourceView(m_rtts.m_currentlyBoundRenderTargets[0], &srvDesc, m_outputScalingPass.m_textureDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); + D3D12_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + m_device->CreateSampler(&samplerDesc, m_outputScalingPass.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); + commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_textureDescriptorHeap); + commandList->SetGraphicsRootDescriptorTable(0, m_outputScalingPass.m_textureDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_samplerDescriptorHeap); + commandList->SetGraphicsRootDescriptorTable(1, m_outputScalingPass.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + + D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_backbufferAsRendertarget[m_swapChain->GetCurrentBackBufferIndex()]->GetCPUDescriptorHandleForHeapStart(); + commandList->OMSetRenderTargets(1, &Handle, true, nullptr); + D3D12_VERTEX_BUFFER_VIEW vbv = {}; + vbv.BufferLocation = m_outputScalingPass.m_vertexBuffer->GetGPUVirtualAddress(); + vbv.StrideInBytes = 4 * sizeof(float); + vbv.SizeInBytes = 16 * sizeof(float); + commandList->IASetVertexBuffers(0, 1, &vbv); + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + commandList->DrawInstanced(4, 1, 0, 0); + + barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; - barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ; barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; commandList->ResourceBarrier(2, barriers); commandList->Close(); From 9fdb6f0dad8b61124fde1cb230a02503151ac415 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 15 Jun 2015 19:13:39 +0200 Subject: [PATCH 274/343] d3d12: Fix color of scaling output --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index d56554c72b..16b9db8990 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -227,7 +227,7 @@ std::pair compileF32toU8CS() void D3D12GSRender::Shader::Init(ID3D12Device *device) { const char *fsCode = STRINGIFY( - Texture2D InputTexture : register(t0); \n + Texture2D InputTexture : register(t0); \n sampler bilinearSampler : register(s0); \n struct PixelInput \n From 2310ba137ff45e164f3524fc16cb7a4fa6dc489d Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 16 Jun 2015 15:33:00 +0200 Subject: [PATCH 275/343] d3d12: Fix crash with rescaling pass --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 30 +++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 16b9db8990..3d7350e655 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -370,7 +370,7 @@ void D3D12GSRender::Shader::Init(ID3D12Device *device) m_vertexBuffer->Unmap(0, nullptr); D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; - heapDesc.NumDescriptors = 1; + heapDesc.NumDescriptors = 2; heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; @@ -966,26 +966,38 @@ void D3D12GSRender::Flip() commandList->RSSetScissorRects(1, &box); commandList->SetGraphicsRootSignature(m_outputScalingPass.m_rootSignature); commandList->SetPipelineState(m_outputScalingPass.m_PSO); + D3D12_CPU_DESCRIPTOR_HANDLE CPUHandle; + CPUHandle = m_outputScalingPass.m_textureDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + CPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * m_swapChain->GetCurrentBackBufferIndex(); D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; // FIXME: Not always true srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Texture2D.MipLevels = 1; srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - m_device->CreateShaderResourceView(m_rtts.m_currentlyBoundRenderTargets[0], &srvDesc, m_outputScalingPass.m_textureDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); + m_device->CreateShaderResourceView(m_rtts.m_currentlyBoundRenderTargets[0], &srvDesc, CPUHandle); + D3D12_SAMPLER_DESC samplerDesc = {}; samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - m_device->CreateSampler(&samplerDesc, m_outputScalingPass.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); - commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_textureDescriptorHeap); - commandList->SetGraphicsRootDescriptorTable(0, m_outputScalingPass.m_textureDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); - commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_samplerDescriptorHeap); - commandList->SetGraphicsRootDescriptorTable(1, m_outputScalingPass.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + CPUHandle = m_outputScalingPass.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + CPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) * m_swapChain->GetCurrentBackBufferIndex(); + m_device->CreateSampler(&samplerDesc, CPUHandle); - D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_backbufferAsRendertarget[m_swapChain->GetCurrentBackBufferIndex()]->GetCPUDescriptorHandleForHeapStart(); - commandList->OMSetRenderTargets(1, &Handle, true, nullptr); + D3D12_GPU_DESCRIPTOR_HANDLE GPUHandle; + GPUHandle = m_outputScalingPass.m_textureDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + GPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * m_swapChain->GetCurrentBackBufferIndex(); + commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_textureDescriptorHeap); + commandList->SetGraphicsRootDescriptorTable(0, GPUHandle); + GPUHandle = m_outputScalingPass.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + GPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) * m_swapChain->GetCurrentBackBufferIndex(); + commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_samplerDescriptorHeap); + commandList->SetGraphicsRootDescriptorTable(1, GPUHandle); + + CPUHandle = m_backbufferAsRendertarget[m_swapChain->GetCurrentBackBufferIndex()]->GetCPUDescriptorHandleForHeapStart(); + commandList->OMSetRenderTargets(1, &CPUHandle, true, nullptr); D3D12_VERTEX_BUFFER_VIEW vbv = {}; vbv.BufferLocation = m_outputScalingPass.m_vertexBuffer->GetGPUVirtualAddress(); vbv.StrideInBytes = 4 * sizeof(float); From 09ccd7e436de01c9c14d517d0fc8c9edf8284761 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 16 Jun 2015 15:48:04 +0200 Subject: [PATCH 276/343] d3d12: Fix crash with W16Z16Y16X16 texture format --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 5c2d630c99..e87cd0eaa7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -366,12 +366,6 @@ ID3D12Resource *uploadSingleTexture( { unsigned short tmp = src[row * w * 4 + j]; dst[row * w * 4 + j] = (tmp >> 8) | (tmp << 8); - tmp = src[row * w * 4 + j + 1]; - dst[row * w * 4 + j + 1] = (tmp >> 8) | (tmp << 8); - tmp = src[row * w * 4 + j + 2]; - dst[row * w * 4 + j + 2] = (tmp >> 8) | (tmp << 8); - tmp = src[row * w * 4 + j + 3]; - dst[row * w * 4 + j + 3] = (tmp >> 8) | (tmp << 8); } break; } From f59bc86ac5bf965c69dc46549e171277791c2016 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 16 Jun 2015 15:55:04 +0200 Subject: [PATCH 277/343] d3d12: Fix scaling for terraria/Voodoo chronicles It break render_to_target test but it looks like an issue with scale/offset buffer rather than viewport/scissor --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3d7350e655..731b9f69ab 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -850,8 +850,8 @@ void D3D12GSRender::ExecCMD() { 0.f, 0.f, - (float)RSXThread::m_width, - (float)RSXThread::m_height, + (float)m_surface_clip_w, + (float)m_surface_clip_h, -1.f, 1.f }; @@ -861,8 +861,8 @@ void D3D12GSRender::ExecCMD() { 0, 0, - (LONG)RSXThread::m_width, - (LONG)RSXThread::m_height, + (LONG)m_surface_clip_w, + (LONG)m_surface_clip_h, }; commandList->RSSetScissorRects(1, &box); From f2d39d0e82ff319449c1613e9290fab01188e304 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 16 Jun 2015 16:16:38 +0200 Subject: [PATCH 278/343] d3d12: Use fixed width/height scale and use surface_clip info for scaleOffset matrix Thanks to raven02 for the patch. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index d37597156a..16555d5425 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -409,17 +409,17 @@ void D3D12GSRender::setScaleOffset() }; // Scale - scaleOffsetMat[0] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 0)] / (RSXThread::m_width / RSXThread::m_width_scale); - scaleOffsetMat[5] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 1)] / (RSXThread::m_height / RSXThread::m_height_scale); + scaleOffsetMat[0] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 0)] / (m_surface_clip_w / 2.f); + scaleOffsetMat[5] *= (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 1)] / (m_surface_clip_h / 2.f); scaleOffsetMat[10] = (float&)methodRegisters[NV4097_SET_VIEWPORT_SCALE + (0x4 * 2)]; // Offset - scaleOffsetMat[3] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 0)] - (RSXThread::m_width / RSXThread::m_width_scale); - scaleOffsetMat[7] = -((float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 1)] - (RSXThread::m_height / RSXThread::m_height_scale)); + scaleOffsetMat[3] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 0)] - (m_surface_clip_w / 2.f); + scaleOffsetMat[7] = -((float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 1)] - (m_surface_clip_h / 2.f)); scaleOffsetMat[11] = (float&)methodRegisters[NV4097_SET_VIEWPORT_OFFSET + (0x4 * 2)]; - scaleOffsetMat[3] /= RSXThread::m_width / RSXThread::m_width_scale; - scaleOffsetMat[7] /= RSXThread::m_height / RSXThread::m_height_scale; + scaleOffsetMat[3] /= m_surface_clip_w / 2.f; + scaleOffsetMat[7] /= m_surface_clip_h / 2.f; assert(m_constantsData.canAlloc(256)); size_t heapOffset = m_constantsData.alloc(256); From aa66ddcd8608591a7b7b231bbac4a739a15fd014 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 16 Jun 2015 16:59:46 +0200 Subject: [PATCH 279/343] d3d12: Add some code documentation + rename some functions --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 2 +- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 6 +-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 45 +++++++++++++------ rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 2 +- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 16555d5425..eaf26a827d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -264,7 +264,7 @@ ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVerte return vertexBuffer; } -std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12GSRender::EnableVertexData(bool indexed_draw) +std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw) { std::pair, D3D12_INDEX_BUFFER_VIEW> result; const std::vector &vertexBufferFormat = FormatVertexData(m_vertex_data); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 731b9f69ab..902b45de94 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -630,7 +630,7 @@ void D3D12GSRender::ExecCMD(u32 cmd) { assert(cmd == NV4097_CLEAR_SURFACE); - InitDrawBuffers(); + PrepareRenderTargets(); ID3D12GraphicsCommandList *commandList; check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList))); @@ -710,7 +710,7 @@ void D3D12GSRender::ExecCMD(u32 cmd) void D3D12GSRender::ExecCMD() { - InitDrawBuffers(); + PrepareRenderTargets(); // Init vertex count // TODO: Very hackish, clean this @@ -743,7 +743,7 @@ void D3D12GSRender::ExecCMD() if (m_indexed_array.m_count || m_draw_array_count) { - const std::pair, D3D12_INDEX_BUFFER_VIEW> &vertexIndexBufferViews = EnableVertexData(m_indexed_array.m_count ? true : false); + const std::pair, D3D12_INDEX_BUFFER_VIEW> &vertexIndexBufferViews = UploadVertexBuffers(m_indexed_array.m_count ? true : false); commandList->IASetVertexBuffers(0, (UINT)vertexIndexBufferViews.first.size(), vertexIndexBufferViews.first.data()); if (m_forcedIndexBuffer || m_indexed_array.m_count) commandList->IASetIndexBuffer(&vertexIndexBufferViews.second); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 891aec7355..7c4ac7ee2f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -81,6 +81,14 @@ struct InitHeap } }; + +/** + * Wrapper around a ID3D12Resource or a ID3D12Heap. + * Acts as a ring buffer : hold a get and put pointers, + * put pointer is used as storage space offset + * and get is used as beginning of in use data space. + * This wrapper checks that put pointer doesn't cross get one. + */ template struct DataHeap { @@ -154,6 +162,12 @@ struct DataHeap } }; + +/** + * Wrapper for a worker thread that executes lambda functions + * in the order they were submitted during its lifetime. + * Used mostly to release data that are not needed anymore. + */ struct GarbageCollectionThread { std::mutex m_mutex; @@ -192,12 +206,25 @@ private: void Init(ID3D12Device *device); void Release(); }; - + + /** + * Stores data related to the scaling pass that turns internal + * render targets into presented buffers. + */ Shader m_outputScalingPass; + /** + * Data used when depth buffer is converted to uchar textures. + */ ID3D12PipelineState *m_convertPSO; ID3D12RootSignature *m_convertRootSignature; + + /** + * Stores data that are "ping ponged" between frame. + * For instance command allocator : maintains 2 command allocators and + * swap between them when frame is flipped. + */ struct ResourceStorage { ID3D12Fence* m_frameFinishedFence; @@ -296,29 +323,19 @@ private: virtual void Close() override; bool LoadProgram(); - std::pair, D3D12_INDEX_BUFFER_VIEW> EnableVertexData(bool indexed_draw = false); + std::pair, D3D12_INDEX_BUFFER_VIEW> UploadVertexBuffers(bool indexed_draw = false); void setScaleOffset(); void FillVertexShaderConstantsBuffer(); void FillPixelShaderConstantsBuffer(); /** + * Upload textures to Data heap if necessary and create necessary descriptor in the per frame storage struct. * returns the number of texture uploaded */ size_t UploadTextures(); size_t GetMaxAniso(size_t aniso); D3D12_TEXTURE_ADDRESS_MODE GetWrap(size_t wrap); - /*void DisableVertexData(); - - void WriteBuffers(); - void WriteColorBuffers(); - void WriteColorBufferA(); - void WriteColorBufferB(); - void WriteColorBufferC(); - void WriteColorBufferD(); - - void DrawObjects();*/ - void InitDrawBuffers(); - void WriteDepthBuffer(); + void PrepareRenderTargets(); protected: virtual void OnInit() override; virtual void OnInitThread() override; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 6bcdfbc56f..5a8d776239 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -12,7 +12,7 @@ #include "D3D12.h" #include "D3D12GSRender.h" -void D3D12GSRender::InitDrawBuffers() +void D3D12GSRender::PrepareRenderTargets() { // FBO location has changed, previous data might be copied u32 address_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); From dca9ae6ab5d63e6a7bae7dd5ba9730550788a384 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 16 Jun 2015 18:15:01 +0200 Subject: [PATCH 280/343] d3d12: Factorize cleaning function for heaps --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 27 ++++++++++----------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 902b45de94..70d97164e9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1031,29 +1031,22 @@ void D3D12GSRender::Flip() m_texturesCache.clear(); m_texturesRTTs.clear(); - storage.m_inUseConstantsBuffers = m_constantsData.m_resourceStoredSinceLastSync; - m_constantsData.m_resourceStoredSinceLastSync.clear(); - storage.m_inUseVertexIndexBuffers = m_vertexIndexData.m_resourceStoredSinceLastSync; - m_vertexIndexData.m_resourceStoredSinceLastSync.clear(); - storage.m_inUseTextureUploadBuffers = m_textureUploadData.m_resourceStoredSinceLastSync; - m_textureUploadData.m_resourceStoredSinceLastSync.clear(); - storage.m_inUseTexture2D = m_textureData.m_resourceStoredSinceLastSync; - m_textureData.m_resourceStoredSinceLastSync.clear(); + std::vector > cleaningFunction = + { + m_constantsData.getCleaningFunction(), + m_vertexIndexData.getCleaningFunction(), + m_textureUploadData.getCleaningFunction(), + m_textureData.getCleaningFunction() + }; - m_GC.pushWork([&]() + m_GC.pushWork([&, cleaningFunction]() { WaitForSingleObject(storage.m_frameFinishedHandle, INFINITE); CloseHandle(storage.m_frameFinishedHandle); storage.m_frameFinishedFence->Release(); - for (auto tmp : storage.m_inUseConstantsBuffers) - m_constantsData.m_getPos = std::get<0>(tmp); - for (auto tmp : storage.m_inUseVertexIndexBuffers) - m_vertexIndexData.m_getPos = std::get<0>(tmp); - for (auto tmp : storage.m_inUseTextureUploadBuffers) - m_textureUploadData.m_getPos = std::get<0>(tmp); - for (auto tmp : storage.m_inUseTexture2D) - m_textureData.m_getPos = std::get<0>(tmp); + for (unsigned i = 0; i < 4; i++) + cleaningFunction[i](); storage.Reset(); }); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 7c4ac7ee2f..214ae9eb1e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -160,6 +160,24 @@ struct DataHeap SAFE_RELEASE(std::get<2>(tmp)); } } + + /** + * Get a function that cleans heaps. + * It's caller responsability to ensure data are not used when executed. + */ + std::function getCleaningFunction() + { + size_t& getPointer = m_getPos; + auto duplicatem_resourceStoredSinceLastSync = m_resourceStoredSinceLastSync; + m_resourceStoredSinceLastSync.clear(); + return [=, &getPointer]() { + for (auto tmp : duplicatem_resourceStoredSinceLastSync) + { + SAFE_RELEASE(std::get<2>(tmp)); + getPointer = std::get<0>(tmp); + } + }; + } }; From d23cf861f1ee5e53dfc13c1c99c0de4d365d14f3 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 16 Jun 2015 18:34:56 +0200 Subject: [PATCH 281/343] d3d12: Factorize ring buffer like code to depth/color buffer migration --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 48 ++++++++++----------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 20 ++--------- 2 files changed, 20 insertions(+), 48 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 70d97164e9..17b92dab94 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -551,19 +551,8 @@ D3D12GSRender::D3D12GSRender() IID_PPV_ARGS(&m_dummyTexture)) ); - D3D12_HEAP_DESC hd = {}; - hd.SizeInBytes = 1024 * 1024 * 128; - hd.Properties.Type = D3D12_HEAP_TYPE_READBACK; - hd.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - check(m_device->CreateHeap(&hd, IID_PPV_ARGS(&m_readbackResources.m_heap))); - m_readbackResources.m_putPos = 0; - m_readbackResources.m_getPos = 1024 * 1024 * 128 - 1; - - hd.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - hd.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; - check(m_device->CreateHeap(&hd, IID_PPV_ARGS(&m_UAVHeap.m_heap))); - m_UAVHeap.m_putPos = 0; - m_UAVHeap.m_getPos = 1024 * 1024 * 128 - 1; + m_readbackResources.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); + m_UAVHeap.Init(m_device, 1024 * 1024 * 128, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES); m_rtts.Init(m_device); @@ -1087,14 +1076,11 @@ ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12Gra heapProp.Type = D3D12_HEAP_TYPE_READBACK; D3D12_RESOURCE_DESC resdesc = getBufferResourceDesc(rowPitch * h); - size_t heapOffset = powerOf2Align(m_readbackResources.m_putPos.load(), 65536); size_t sizeInByte = rowPitch * h; - - if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size - heapOffset = 0; + assert(m_readbackResources.canAlloc(sizeInByte)); + size_t heapOffset = m_readbackResources.alloc(sizeInByte); resdesc = getBufferResourceDesc(sizeInByte); - check( m_device->CreatePlacedResource( m_readbackResources.m_heap, @@ -1105,6 +1091,7 @@ ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12Gra IID_PPV_ARGS(&Result) ) ); + m_readbackResources.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, sizeInByte, Result)); cmdlist->ResourceBarrier(1, &getResourceBarrierTransition(RTT, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); @@ -1166,11 +1153,9 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) D3D12_RESOURCE_DESC resdesc = getTexture2DResourceDesc(m_surface_clip_w, m_surface_clip_h, DXGI_FORMAT_R8_UNORM); resdesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - size_t heapOffset = m_readbackResources.m_putPos.load(); - heapOffset = powerOf2Align(heapOffset, 65536); - size_t sizeInByte = m_surface_clip_w * m_surface_clip_h; - if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size - heapOffset = 0; + size_t sizeInByte = m_surface_clip_w * m_surface_clip_h * 2; + assert(m_UAVHeap.canAlloc(sizeInByte)); + size_t heapOffset = m_UAVHeap.alloc(sizeInByte); check( m_device->CreatePlacedResource( @@ -1182,16 +1167,13 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) IID_PPV_ARGS(&depthConverted) ) ); - m_UAVHeap.m_putPos.store(heapOffset + sizeInByte); + m_UAVHeap.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, sizeInByte, depthConverted)); - heapOffset = m_readbackResources.m_putPos.load(); - heapOffset = powerOf2Align(heapOffset, 65536); sizeInByte = depthRowPitch * m_surface_clip_h; + assert(m_readbackResources.canAlloc(sizeInByte)); + heapOffset = m_readbackResources.alloc(sizeInByte); - if (heapOffset + sizeInByte >= 1024 * 1024 * 128) // If it will be stored past heap size - heapOffset = 0; resdesc = getBufferResourceDesc(sizeInByte); - check( m_device->CreatePlacedResource( m_readbackResources.m_heap, @@ -1202,7 +1184,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) IID_PPV_ARGS(&writeDest) ) ); - m_readbackResources.m_putPos.store(heapOffset + sizeInByte); + m_readbackResources.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, sizeInByte, writeDest)); check( m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&convertCommandList)) @@ -1336,6 +1318,10 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) //Wait for result m_commandQueueGraphic->Signal(fence, 1); + + auto depthUAVCleaning = m_UAVHeap.getCleaningFunction(); + auto readbackCleaning = m_readbackResources.getCleaningFunction(); + m_GC.pushWork([=]() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); @@ -1364,6 +1350,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) depthConverted->Release(); descriptorHeap->Release(); convertCommandList->Release(); + depthUAVCleaning(); } size_t srcPitch, dstPitch; @@ -1448,6 +1435,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) if (needTransfer) downloadCommandList->Release(); + readbackCleaning(); vm::write32(m_label_addr + offset, value); }); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 214ae9eb1e..44b1a6fcfb 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -286,24 +286,8 @@ private: // Texture storage DataHeap m_textureUploadData; DataHeap m_textureData; - - struct UAVHeap - { - ID3D12Heap *m_heap; - std::atomic m_putPos, // Start of free space - m_getPos; // End of free space - }; - - UAVHeap m_UAVHeap; - - struct ReadbackHeap - { - ID3D12Heap *m_heap; - std::atomic m_putPos, // Start of free space - m_getPos; // End of free space - }; - - ReadbackHeap m_readbackResources; + DataHeap m_UAVHeap; + DataHeap m_readbackResources; bool m_forcedIndexBuffer; size_t indexCount; From 178d0e0e85be08f2b1a9ef792bfc7bc243b47fe9 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 16 Jun 2015 21:38:08 +0200 Subject: [PATCH 282/343] d3d12: Try implement mipmap The mipmap level below 0 are currently wrong, find out why --- rpcs3/Emu/RSX/D3D12/D3D12.h | 4 +- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 +- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 4 +- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 153 +++++++++++------- 4 files changed, 103 insertions(+), 62 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index c0b061c875..069fff3c4b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -88,7 +88,7 @@ D3D12_RESOURCE_DESC getBufferResourceDesc(size_t sizeInByte) } inline -D3D12_RESOURCE_DESC getTexture2DResourceDesc(size_t width, size_t height, DXGI_FORMAT dxgiFormat) +D3D12_RESOURCE_DESC getTexture2DResourceDesc(size_t width, size_t height, DXGI_FORMAT dxgiFormat, size_t mipmapLevels) { D3D12_RESOURCE_DESC result; result = {}; @@ -98,7 +98,7 @@ D3D12_RESOURCE_DESC getTexture2DResourceDesc(size_t width, size_t height, DXGI_F result.Format = dxgiFormat; result.DepthOrArraySize = 1; result.SampleDesc.Count = 1; - result.MipLevels = 1; + result.MipLevels = mipmapLevels; return result; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 17b92dab94..28eb929c4b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -545,7 +545,7 @@ D3D12GSRender::D3D12GSRender() m_device->CreateCommittedResource( &hp, D3D12_HEAP_FLAG_NONE, - &getTexture2DResourceDesc(2, 2, DXGI_FORMAT_R8G8B8A8_UNORM), + &getTexture2DResourceDesc(2, 2, DXGI_FORMAT_R8G8B8A8_UNORM, 1), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&m_dummyTexture)) @@ -1150,7 +1150,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; - D3D12_RESOURCE_DESC resdesc = getTexture2DResourceDesc(m_surface_clip_w, m_surface_clip_h, DXGI_FORMAT_R8_UNORM); + D3D12_RESOURCE_DESC resdesc = getTexture2DResourceDesc(m_surface_clip_w, m_surface_clip_h, DXGI_FORMAT_R8_UNORM, 1); resdesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; size_t sizeInByte = m_surface_clip_w * m_surface_clip_h * 2; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 5a8d776239..7faab5c99b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -180,7 +180,7 @@ ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device, D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; - D3D12_RESOURCE_DESC resourceDesc = getTexture2DResourceDesc(width, height, dxgiFormat); + D3D12_RESOURCE_DESC resourceDesc = getTexture2DResourceDesc(width, height, dxgiFormat, 1); resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; device->CreateCommittedResource( @@ -235,7 +235,7 @@ ID3D12Resource * RenderTargets::bindAddressAsDepthStencil(ID3D12Device * device, assert(0); } - D3D12_RESOURCE_DESC resourceDesc = getTexture2DResourceDesc(width, height, dxgiFormat); + D3D12_RESOURCE_DESC resourceDesc = getTexture2DResourceDesc(width, height, dxgiFormat, 1); resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; device->CreateCommittedResource( diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index e87cd0eaa7..4b036bf418 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -136,6 +136,14 @@ D3D12_FILTER getSamplerFilter(u32 minFilter, u32 magFilter) return D3D12_ENCODE_BASIC_FILTER(min, mag, mip, D3D12_FILTER_REDUCTION_TYPE_STANDARD); } +struct MipmapLevelInfo +{ + size_t offset; + size_t width; + size_t height; + size_t rowPitch; +}; + /** * Create a texture residing in default heap and generate uploads commands in commandList, * using a temporary texture buffer. @@ -302,7 +310,7 @@ ID3D12Resource *uploadSingleTexture( size_t rowPitch = powerOf2Align(blockSizeInByte * widthInBlocks, 256); ID3D12Resource *Texture; - size_t textureSize = rowPitch * heightInBlocks; + size_t textureSize = rowPitch * heightInBlocks * 4; // * 3 for mipmap levels assert(textureBuffersHeap.canAlloc(textureSize)); size_t heapOffset = textureBuffersHeap.alloc(textureSize); @@ -321,60 +329,85 @@ ID3D12Resource *uploadSingleTexture( check(Texture->Map(0, nullptr, (void**)&textureData)); // Upload with correct rowpitch - for (unsigned row = 0; row < heightInBlocks; row++) + std::vector mipinfos; + size_t offsetInDst = 0, offsetInSrc = 0; + size_t currentHeight = heightInBlocks, currentWidth = widthInBlocks; + + unsigned tmp = texture.GetMipmap(); + if (tmp > 1) + printf("here"); + for (unsigned mipLevel = 0; mipLevel < texture.GetMipmap(); mipLevel++) { - switch (format) + MipmapLevelInfo currentMipmapLevelInfo = {}; + currentMipmapLevelInfo.offset = offsetInDst; + currentMipmapLevelInfo.height = currentHeight; + currentMipmapLevelInfo.width = currentWidth; + + for (unsigned row = 0; row < currentHeight; row++) { - case CELL_GCM_TEXTURE_A8R8G8B8: - { - if (is_swizzled) + switch (format) { - u32 *src, *dst; - u32 log2width, log2height; + case CELL_GCM_TEXTURE_A8R8G8B8: + { + currentMipmapLevelInfo.rowPitch = powerOf2Align(currentWidth * blockSizeInByte, 256); + if (is_swizzled) + { + u32 *src, *dst; + u32 log2width, log2height; - src = (u32*)pixels; - dst = (u32*)textureData; + src = (u32*)pixels + offsetInSrc; + dst = (u32*)textureData + offsetInDst; - log2width = (u32)(logf((float)w) / logf(2.f)); - log2height = (u32)(logf((float)h) / logf(2.f)); + log2width = (u32)(logf((float)currentWidth) / logf(2.f)); + log2height = (u32)(logf((float)currentHeight) / logf(2.f)); + +#pragma omp parallel for + for (int j = 0; j < w; j++) + dst[(row * currentMipmapLevelInfo.rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; + } + else + memcpy((char*)textureData + offsetInDst + row * currentMipmapLevelInfo.rowPitch, (char*)pixels + offsetInSrc + row * currentWidth * blockSizeInByte, currentWidth * blockSizeInByte); + break; + } + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + { + currentMipmapLevelInfo.rowPitch = rowPitch; + unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; - #pragma omp parallel for for (int j = 0; j < w; j++) - dst[(row * rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; + { + u16 tmp = src[offsetInSrc / 2 + row * srcPitch / 2 + j]; + dst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); + } + break; } - else - streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * srcPitch, srcPitch); - break; - } - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G6B5: - { - unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; - - for (int j = 0; j < w; j++) + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: { - u16 tmp = src[row * srcPitch / 2 + j]; - dst[row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); - } - break; - } - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - { - unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; + currentMipmapLevelInfo.rowPitch = rowPitch; + unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; - for (int j = 0; j < w * 4; j++) - { - unsigned short tmp = src[row * w * 4 + j]; - dst[row * w * 4 + j] = (tmp >> 8) | (tmp << 8); + for (int j = 0; j < w * 4; j++) + { + unsigned short tmp = src[offsetInSrc / 2 + row * w * 4 + j]; + dst[offsetInDst / 2 + row * w * 4 + j] = (tmp >> 8) | (tmp << 8); + } + break; + } + default: + { + currentMipmapLevelInfo.rowPitch = rowPitch; + streamBuffer((char*)textureData + offsetInDst + row * rowPitch, (char*)pixels + offsetInSrc + row * srcPitch, srcPitch); + break; + } } - break; - } - default: - { - streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * srcPitch, srcPitch); - break; - } } + offsetInDst += currentHeight * currentMipmapLevelInfo.rowPitch; + offsetInDst = powerOf2Align(offsetInDst, 256); + offsetInSrc += currentHeight * currentWidth * blockSizeInByte; + mipinfos.push_back(currentMipmapLevelInfo); + currentHeight /= 2; + currentWidth /= 2; } Texture->Unmap(0, nullptr); @@ -387,25 +420,33 @@ ID3D12Resource *uploadSingleTexture( check(device->CreatePlacedResource( textureHeap.m_heap, heapOffset2, - &getTexture2DResourceDesc(w, h, dxgiFormat), + &getTexture2DResourceDesc(w, h, dxgiFormat, texture.GetMipmap()), D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&vramTexture) )); textureHeap.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset2, textureSize, vramTexture)); - D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; - dst.pResource = vramTexture; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.pResource = Texture; - src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src.PlacedFootprint.Footprint.Depth = 1; - src.PlacedFootprint.Footprint.Width = (UINT)w; - src.PlacedFootprint.Footprint.Height = (UINT)h; - src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; - src.PlacedFootprint.Footprint.Format = dxgiFormat; - commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + size_t miplevel = 0; + for (const MipmapLevelInfo mli : mipinfos) + { + D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; + dst.pResource = vramTexture; + dst.SubresourceIndex = miplevel; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.PlacedFootprint.Offset = mli.offset; + src.pResource = Texture; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.PlacedFootprint.Footprint.Depth = 1; + src.PlacedFootprint.Footprint.Width = (UINT)mli.width; + src.PlacedFootprint.Footprint.Height = (UINT)mli.height; + src.PlacedFootprint.Footprint.RowPitch = (UINT)mli.rowPitch; + src.PlacedFootprint.Footprint.Format = dxgiFormat; + + commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + miplevel++; + } D3D12_RESOURCE_BARRIER barrier = {}; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; @@ -461,7 +502,7 @@ size_t D3D12GSRender::UploadTextures() D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Format = dxgiFormat; - srvDesc.Texture2D.MipLevels = 1; + srvDesc.Texture2D.MipLevels = m_textures[i].GetMipmap(); switch (format) { From eda3c9084e2a3656fd71c5a33bca42f44253891c Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 16 Jun 2015 21:41:27 +0200 Subject: [PATCH 283/343] d3d12: Fix mipmap data --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 4b036bf418..4ace1f5a9f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -334,8 +334,6 @@ ID3D12Resource *uploadSingleTexture( size_t currentHeight = heightInBlocks, currentWidth = widthInBlocks; unsigned tmp = texture.GetMipmap(); - if (tmp > 1) - printf("here"); for (unsigned mipLevel = 0; mipLevel < texture.GetMipmap(); mipLevel++) { MipmapLevelInfo currentMipmapLevelInfo = {}; @@ -366,7 +364,7 @@ ID3D12Resource *uploadSingleTexture( dst[(row * currentMipmapLevelInfo.rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; } else - memcpy((char*)textureData + offsetInDst + row * currentMipmapLevelInfo.rowPitch, (char*)pixels + offsetInSrc + row * currentWidth * blockSizeInByte, currentWidth * blockSizeInByte); + memcpy((char*)textureData + offsetInDst + row * currentMipmapLevelInfo.rowPitch, (char*)pixels + offsetInSrc + row * widthInBlocks * blockSizeInByte, currentWidth * blockSizeInByte); break; } case CELL_GCM_TEXTURE_A4R4G4B4: @@ -404,7 +402,7 @@ ID3D12Resource *uploadSingleTexture( } offsetInDst += currentHeight * currentMipmapLevelInfo.rowPitch; offsetInDst = powerOf2Align(offsetInDst, 256); - offsetInSrc += currentHeight * currentWidth * blockSizeInByte; + offsetInSrc += currentHeight * widthInBlocks * blockSizeInByte; mipinfos.push_back(currentMipmapLevelInfo); currentHeight /= 2; currentWidth /= 2; From dbcddcf5e23c9e434b7be80f929156214379db30 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 17 Jun 2015 00:06:58 +0200 Subject: [PATCH 284/343] d3d12: Clean up texture upload code Should be easier to read code (and spot bugs). Fix crash with mipmap and DXTCn texture format. --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 300 ++++++++++++++++++++------- 1 file changed, 220 insertions(+), 80 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 4ace1f5a9f..35fbf8e245 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -144,6 +144,190 @@ struct MipmapLevelInfo size_t rowPitch; }; +#define MAX2(a, b) ((a) > (b)) ? (a) : (b) + +/** + * Write data, assume src pixels are packed but not mipmaplevel + */ +static std::vector +writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) +{ + std::vector Result; + size_t offsetInDst = 0, offsetInSrc = 0; + size_t currentHeight = heightInBlock, currentWidth = widthInBlock; + for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) + { + size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + + MipmapLevelInfo currentMipmapLevelInfo = {}; + currentMipmapLevelInfo.offset = offsetInDst; + currentMipmapLevelInfo.height = currentHeight; + currentMipmapLevelInfo.width = currentWidth; + currentMipmapLevelInfo.rowPitch = rowPitch; + Result.push_back(currentMipmapLevelInfo); + + for (unsigned row = 0; row < currentHeight; row++) + memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * widthInBlock * blockSize, currentWidth * blockSize); + + offsetInDst += currentHeight * rowPitch; + offsetInSrc += currentHeight * widthInBlock * blockSize; + currentHeight = MAX2(currentHeight / 2, 1); + currentWidth = MAX2(currentWidth / 2, 1); + } + return Result; +} + +/** +* Write data, assume src pixels are swizzled and but not mipmaplevel +*/ +static std::vector +writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) +{ + std::vector Result; + size_t offsetInDst = 0, offsetInSrc = 0; + size_t currentHeight = heightInBlock, currentWidth = widthInBlock; + for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) + { + size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + + MipmapLevelInfo currentMipmapLevelInfo = {}; + currentMipmapLevelInfo.offset = offsetInDst; + currentMipmapLevelInfo.height = currentHeight; + currentMipmapLevelInfo.width = currentWidth; + currentMipmapLevelInfo.rowPitch = rowPitch; + Result.push_back(currentMipmapLevelInfo); + + u32 *castedSrc, *castedDst; + u32 log2width, log2height; + + castedSrc = (u32*)src + offsetInSrc; + castedDst = (u32*)dst + offsetInDst; + + log2width = (u32)(logf((float)currentWidth) / logf(2.f)); + log2height = (u32)(logf((float)currentHeight) / logf(2.f)); + +#pragma omp parallel for + for (unsigned row = 0; row < currentHeight; row++) + for (int j = 0; j < currentWidth; j++) + castedDst[(row * rowPitch / 4) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; + + offsetInDst += currentHeight * rowPitch; + offsetInSrc += currentHeight * widthInBlock * blockSize; + currentHeight = MAX2(currentHeight / 2, 1); + currentWidth = MAX2(currentWidth / 2, 1); + } + return Result; +} + + +/** +* Write data, assume compressed (DXTCn) format +*/ +static std::vector +writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blockWidth, size_t heightInBlock, size_t blockHeight, size_t blockSize, size_t mipmapCount) +{ + std::vector Result; + size_t offsetInDst = 0, offsetInSrc = 0; + size_t currentHeight = heightInBlock, currentWidth = widthInBlock; + for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) + { + size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + + MipmapLevelInfo currentMipmapLevelInfo = {}; + currentMipmapLevelInfo.offset = offsetInDst; + currentMipmapLevelInfo.height = currentHeight * blockHeight; + currentMipmapLevelInfo.width = currentWidth * blockWidth; + currentMipmapLevelInfo.rowPitch = rowPitch; + Result.push_back(currentMipmapLevelInfo); + + for (unsigned row = 0; row < currentHeight; row++) + memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * currentWidth * blockSize, currentWidth * blockSize); + + offsetInDst += currentHeight * rowPitch; + offsetInDst = powerOf2Align(offsetInDst, 512); + offsetInSrc += currentHeight * currentWidth * blockSize; + currentHeight = MAX2(currentHeight / 2, 1); + currentWidth = MAX2(currentWidth / 2, 1); + } + return Result; +} + +/** +* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel +*/ +static std::vector +write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) +{ + std::vector Result; + size_t offsetInDst = 0, offsetInSrc = 0; + size_t currentHeight = heightInBlock, currentWidth = widthInBlock; + size_t srcPitch = widthInBlock * blockSize; + for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) + { + size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + + MipmapLevelInfo currentMipmapLevelInfo = {}; + currentMipmapLevelInfo.offset = offsetInDst; + currentMipmapLevelInfo.height = currentHeight; + currentMipmapLevelInfo.width = currentWidth; + currentMipmapLevelInfo.rowPitch = rowPitch; + Result.push_back(currentMipmapLevelInfo); + + unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src; + + for (unsigned row = 0; row < heightInBlock; row++) + for (int j = 0; j < currentWidth; j++) + { + u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j]; + castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); + } + + offsetInDst += currentHeight * rowPitch; + offsetInSrc += currentHeight * widthInBlock * blockSize; + currentHeight = MAX2(currentHeight / 2, 1); + currentWidth = MAX2(currentWidth / 2, 1); + } + return Result; +} + +/** +* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel +*/ +static std::vector +write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) +{ + std::vector Result; + size_t offsetInDst = 0, offsetInSrc = 0; + size_t currentHeight = heightInBlock, currentWidth = widthInBlock; + size_t srcPitch = widthInBlock * blockSize; + for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) + { + size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + + MipmapLevelInfo currentMipmapLevelInfo = {}; + currentMipmapLevelInfo.offset = offsetInDst; + currentMipmapLevelInfo.height = currentHeight; + currentMipmapLevelInfo.width = currentWidth; + currentMipmapLevelInfo.rowPitch = rowPitch; + Result.push_back(currentMipmapLevelInfo); + + unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src; + + for (unsigned row = 0; row < heightInBlock; row++) + for (int j = 0; j < currentWidth * 4; j++) + { + u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j]; + castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); + } + + offsetInDst += currentHeight * rowPitch; + offsetInSrc += currentHeight * widthInBlock * blockSize; + currentHeight = MAX2(currentHeight / 2, 1); + currentWidth = MAX2(currentWidth / 2, 1); + } + return Result; +} + /** * Create a texture residing in default heap and generate uploads commands in commandList, * using a temporary texture buffer. @@ -310,7 +494,7 @@ ID3D12Resource *uploadSingleTexture( size_t rowPitch = powerOf2Align(blockSizeInByte * widthInBlocks, 256); ID3D12Resource *Texture; - size_t textureSize = rowPitch * heightInBlocks * 4; // * 3 for mipmap levels + size_t textureSize = rowPitch * heightInBlocks * 4; // * 4 for mipmap levels assert(textureBuffersHeap.canAlloc(textureSize)); size_t heapOffset = textureBuffersHeap.alloc(textureSize); @@ -327,85 +511,41 @@ ID3D12Resource *uploadSingleTexture( auto pixels = vm::get_ptr(texaddr); void *textureData; check(Texture->Map(0, nullptr, (void**)&textureData)); + std::vector mipInfos; - // Upload with correct rowpitch - std::vector mipinfos; - size_t offsetInDst = 0, offsetInSrc = 0; - size_t currentHeight = heightInBlocks, currentWidth = widthInBlocks; - - unsigned tmp = texture.GetMipmap(); - for (unsigned mipLevel = 0; mipLevel < texture.GetMipmap(); mipLevel++) + switch (format) { - MipmapLevelInfo currentMipmapLevelInfo = {}; - currentMipmapLevelInfo.offset = offsetInDst; - currentMipmapLevelInfo.height = currentHeight; - currentMipmapLevelInfo.width = currentWidth; - - for (unsigned row = 0; row < currentHeight; row++) - { - switch (format) - { - case CELL_GCM_TEXTURE_A8R8G8B8: - { - currentMipmapLevelInfo.rowPitch = powerOf2Align(currentWidth * blockSizeInByte, 256); - if (is_swizzled) - { - u32 *src, *dst; - u32 log2width, log2height; - - src = (u32*)pixels + offsetInSrc; - dst = (u32*)textureData + offsetInDst; - - log2width = (u32)(logf((float)currentWidth) / logf(2.f)); - log2height = (u32)(logf((float)currentHeight) / logf(2.f)); - -#pragma omp parallel for - for (int j = 0; j < w; j++) - dst[(row * currentMipmapLevelInfo.rowPitch / 4) + j] = src[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; - } - else - memcpy((char*)textureData + offsetInDst + row * currentMipmapLevelInfo.rowPitch, (char*)pixels + offsetInSrc + row * widthInBlocks * blockSizeInByte, currentWidth * blockSizeInByte); - break; - } - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G6B5: - { - currentMipmapLevelInfo.rowPitch = rowPitch; - unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; - - for (int j = 0; j < w; j++) - { - u16 tmp = src[offsetInSrc / 2 + row * srcPitch / 2 + j]; - dst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8); - } - break; - } - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - { - currentMipmapLevelInfo.rowPitch = rowPitch; - unsigned short *dst = (unsigned short *)textureData, *src = (unsigned short *)pixels; - - for (int j = 0; j < w * 4; j++) - { - unsigned short tmp = src[offsetInSrc / 2 + row * w * 4 + j]; - dst[offsetInDst / 2 + row * w * 4 + j] = (tmp >> 8) | (tmp << 8); - } - break; - } - default: - { - currentMipmapLevelInfo.rowPitch = rowPitch; - streamBuffer((char*)textureData + offsetInDst + row * rowPitch, (char*)pixels + offsetInSrc + row * srcPitch, srcPitch); - break; - } - } - } - offsetInDst += currentHeight * currentMipmapLevelInfo.rowPitch; - offsetInDst = powerOf2Align(offsetInDst, 256); - offsetInSrc += currentHeight * widthInBlocks * blockSizeInByte; - mipinfos.push_back(currentMipmapLevelInfo); - currentHeight /= 2; - currentWidth /= 2; + case CELL_GCM_TEXTURE_A8R8G8B8: + { + if (is_swizzled) + mipInfos = writeTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap()); + else + mipInfos = writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap()); + break; + } + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + { + mipInfos = write16bTexelsGeneric((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap()); + break; + } + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + { + mipInfos = write16bX4TexelsGeneric((char*)pixels, (char*)textureData, w, h, 8, texture.GetMipmap()); + break; + } + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + { + mipInfos = writeCompressedTexel((char*)pixels, (char*)textureData, widthInBlocks, blockWidthInPixel, heightInBlocks, blockHeightInPixel, blockSizeInByte, texture.GetMipmap()); + break; + } + default: + { + mipInfos = writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, blockSizeInByte, texture.GetMipmap()); + break; + } } Texture->Unmap(0, nullptr); @@ -427,11 +567,11 @@ ID3D12Resource *uploadSingleTexture( size_t miplevel = 0; - for (const MipmapLevelInfo mli : mipinfos) + for (const MipmapLevelInfo mli : mipInfos) { D3D12_TEXTURE_COPY_LOCATION dst = {}, src = {}; dst.pResource = vramTexture; - dst.SubresourceIndex = miplevel; + dst.SubresourceIndex = (UINT)miplevel; dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; src.PlacedFootprint.Offset = mli.offset; src.pResource = Texture; From 628acbf0b4ad7fec854893c2661d49ff1b70778f Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 17 Jun 2015 00:26:49 +0200 Subject: [PATCH 285/343] d3d12: Do not reserve a lot more than necessary. --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 35fbf8e245..c0e8b4609d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -494,7 +494,7 @@ ID3D12Resource *uploadSingleTexture( size_t rowPitch = powerOf2Align(blockSizeInByte * widthInBlocks, 256); ID3D12Resource *Texture; - size_t textureSize = rowPitch * heightInBlocks * 4; // * 4 for mipmap levels + size_t textureSize = rowPitch * heightInBlocks * 2; // * 4 for mipmap levels assert(textureBuffersHeap.canAlloc(textureSize)); size_t heapOffset = textureBuffersHeap.alloc(textureSize); From ad3e50f90fb28cc8ce497a7f19b691fa895a30d9 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 18 Jun 2015 15:27:26 +0200 Subject: [PATCH 286/343] d3d12: Do not guess texture size but use actual value --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index c0e8b4609d..9916dfd74e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -549,8 +549,8 @@ ID3D12Resource *uploadSingleTexture( } Texture->Unmap(0, nullptr); - size_t powerOf2Height = (size_t)log2f((float)heightInBlocks) + 1; - textureSize = rowPitch * (1i64 << powerOf2Height); + D3D12_RESOURCE_DESC texturedesc = getTexture2DResourceDesc(w, h, dxgiFormat, texture.GetMipmap()); + textureSize = device->GetResourceAllocationInfo(0, 1, &texturedesc).SizeInBytes; assert(textureHeap.canAlloc(textureSize)); size_t heapOffset2 = textureHeap.alloc(textureSize); @@ -558,7 +558,7 @@ ID3D12Resource *uploadSingleTexture( check(device->CreatePlacedResource( textureHeap.m_heap, heapOffset2, - &getTexture2DResourceDesc(w, h, dxgiFormat, texture.GetMipmap()), + &texturedesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&vramTexture) From 3f495689c075bddd32636c88862bf986f3ffa060 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 18 Jun 2015 22:29:59 +0200 Subject: [PATCH 287/343] d3d12: Ignore texture with 0 width/height Fix crash in voodoo chronicles. --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 9916dfd74e..ae258a319f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -603,6 +603,7 @@ size_t D3D12GSRender::UploadTextures() { if (!m_textures[i].IsEnabled()) continue; size_t w = m_textures[i].GetWidth(), h = m_textures[i].GetHeight(); + if (!w || !h) continue; const u32 texaddr = GetAddress(m_textures[i].GetOffset(), m_textures[i].GetLocation()); From 03a84cb20861454e3df575dccee0d6753ccbf50b Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 20 Jun 2015 19:01:22 +0200 Subject: [PATCH 288/343] d3d12: Measure time spent uploading texture and vertex --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 15 +++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 28eb929c4b..ca3564072a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -730,6 +730,8 @@ void D3D12GSRender::ExecCMD() m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); + + std::chrono::time_point startVertexTime = std::chrono::system_clock::now(); if (m_indexed_array.m_count || m_draw_array_count) { const std::pair, D3D12_INDEX_BUFFER_VIEW> &vertexIndexBufferViews = UploadVertexBuffers(m_indexed_array.m_count ? true : false); @@ -737,6 +739,8 @@ void D3D12GSRender::ExecCMD() if (m_forcedIndexBuffer || m_indexed_array.m_count) commandList->IASetIndexBuffer(&vertexIndexBufferViews.second); } + std::chrono::time_point endVertexTime = std::chrono::system_clock::now(); + m_timers.m_vertexUploadDuration += std::chrono::duration_cast(endVertexTime - startVertexTime).count(); if (!LoadProgram()) { @@ -770,6 +774,7 @@ void D3D12GSRender::ExecCMD() if (m_PSO->second > 0) { + std::chrono::time_point startTextureTime = std::chrono::system_clock::now(); size_t usedTexture = UploadTextures(); // Fill empty slots @@ -809,6 +814,8 @@ void D3D12GSRender::ExecCMD() commandList->SetGraphicsRootDescriptorTable(3, Handle); getCurrentResourceStorage().m_currentTextureIndex += usedTexture; + std::chrono::time_point endTextureTime = std::chrono::system_clock::now(); + m_timers.m_textureUploadDuration += std::chrono::duration_cast(endTextureTime - startTextureTime).count(); } size_t numRTT; @@ -1042,6 +1049,14 @@ void D3D12GSRender::Flip() while (getCurrentResourceStorage().m_frameFinishedHandle) std::this_thread::yield(); m_frame->Flip(nullptr); + + ResetTimer(); +} + +void D3D12GSRender::ResetTimer() +{ + m_timers.m_textureUploadDuration = 0; + m_timers.m_vertexUploadDuration = 0; } D3D12GSRender::ResourceStorage& D3D12GSRender::getCurrentResourceStorage() diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 44b1a6fcfb..eccf800f9e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -214,6 +214,14 @@ private: // m_rootSignatures[N] is RS with N texture/sample ID3D12RootSignature *m_rootSignatures[17]; + struct + { + size_t m_vertexUploadDuration; + size_t m_textureUploadDuration; + } m_timers; + + void ResetTimer(); + struct Shader { ID3D12PipelineState *m_PSO; From 8cc9642b96ff5adc430c8e7ae7ceec71e7f309d9 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 21 Jun 2015 00:51:38 +0200 Subject: [PATCH 289/343] Completly unclean way to track texture modification between frames --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 17 +++++++++++++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 5 +++++ 3 files changed, 23 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index ca3564072a..fe42fc79ba 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -392,9 +392,26 @@ void D3D12GSRender::Shader::Release() m_samplerDescriptorHeap->Release(); } +extern std::function gfxHandler; + D3D12GSRender::D3D12GSRender() : GSRender(), m_PSO(nullptr) { + + gfxHandler = [this](u32 addr) { + LOG_ERROR(RSX, "CATCH SEGFAULT %x", addr); + for (auto tmp : texaddrs) + { + if (addr - tmp.first < tmp.second) + { + LOG_ERROR(RSX, "Modified %x range, starting again", tmp.first); + vm::page_protect(tmp.first, tmp.second, 0, vm::page_writable, 0); + return true; + } + } + + return false; + }; loadD3D12FunctionPointers(); if (Ini.GSDebugOutputEnable.GetValue()) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index eccf800f9e..2fa3efa42b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -202,6 +202,7 @@ struct GarbageCollectionThread class D3D12GSRender : public GSRender { private: + std::vector > texaddrs; // Address, size GarbageCollectionThread m_GC; // Copy of RTT to be used as texture std::unordered_map m_texturesRTTs; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index ae258a319f..3f51529d77 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -636,6 +636,11 @@ size_t D3D12GSRender::UploadTextures() m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); m_texturesCache[texaddr] = vramTexture; + + size_t s = powerOf2Align(w * h * 4, 4096); + LOG_ERROR(RSX, "PROTECTING %x of size %d", powerOf2Align(texaddr, 4096), s); + texaddrs.push_back(std::make_pair(texaddr & ~0xfff, s)); + vm::page_protect(texaddr & ~0xfff, s, 0, 0, vm::page_writable); } D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; From c6a5e905bc1a62e1ace617b78dcd967b15c8173c Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 21 Jun 2015 18:13:28 +0200 Subject: [PATCH 290/343] d3d12: enable texture caching Bring a little more perf in arkedo 2 --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 37 +++++++++++++++++++-------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 9 ++++++- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 26 +++++++++---------- 3 files changed, 47 insertions(+), 25 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index fe42fc79ba..bb30cba3db 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -399,18 +399,29 @@ D3D12GSRender::D3D12GSRender() { gfxHandler = [this](u32 addr) { - LOG_ERROR(RSX, "CATCH SEGFAULT %x", addr); - for (auto tmp : texaddrs) + bool handled = false; + auto It = m_protectedTextures.begin(), E = m_protectedTextures.end(); + for (; It != E;) { - if (addr - tmp.first < tmp.second) + auto currentIt = It; + ++It; + auto protectedTexture = *currentIt; + u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture); + if (addr - protectedRangeStart < protectedRangeSize) { - LOG_ERROR(RSX, "Modified %x range, starting again", tmp.first); - vm::page_protect(tmp.first, tmp.second, 0, vm::page_writable, 0); - return true; + std::lock_guard lock(mut); + u32 texadrr = std::get<0>(protectedTexture); + LOG_WARNING(RSX, "Modified %x, starting again", texadrr); + ID3D12Resource *texToErase = m_texturesCache[texadrr]; + m_texturesCache.erase(texadrr); + m_Textoclean.push_back(texToErase); + + vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0); + m_protectedTextures.erase(currentIt); + handled = true; } } - - return false; + return handled; }; loadD3D12FunctionPointers(); if (Ini.GSDebugOutputEnable.GetValue()) @@ -1041,7 +1052,6 @@ void D3D12GSRender::Flip() m_commandQueueGraphic->Signal(storage.m_frameFinishedFence, 1); // Flush - m_texturesCache.clear(); m_texturesRTTs.clear(); std::vector > cleaningFunction = @@ -1052,7 +1062,11 @@ void D3D12GSRender::Flip() m_textureData.getCleaningFunction() }; - m_GC.pushWork([&, cleaningFunction]() + std::lock_guard lock(mut); + std::vector textoclean = m_Textoclean; + m_Textoclean.clear(); + + m_GC.pushWork([&, cleaningFunction, textoclean]() { WaitForSingleObject(storage.m_frameFinishedHandle, INFINITE); CloseHandle(storage.m_frameFinishedHandle); @@ -1061,6 +1075,9 @@ void D3D12GSRender::Flip() for (unsigned i = 0; i < 4; i++) cleaningFunction[i](); storage.Reset(); + + for (auto tmp : textoclean) + tmp->Release(); }); while (getCurrentResourceStorage().m_frameFinishedHandle) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 2fa3efa42b..5eb4058930 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -202,7 +202,14 @@ struct GarbageCollectionThread class D3D12GSRender : public GSRender { private: - std::vector > texaddrs; // Address, size + /** + * Mutex protecting m_texturesCache and m_Textoclean access + * Memory protection fault catch can be generated by any thread and + * modifies these two members. + */ + std::mutex mut; + std::list > m_protectedTextures; // Texaddress, start of protected range, size of protected range + std::vector m_Textoclean; GarbageCollectionThread m_GC; // Copy of RTT to be used as texture std::unordered_map m_texturesRTTs; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 3f51529d77..876f0faa86 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -337,8 +337,7 @@ ID3D12Resource *uploadSingleTexture( const RSXTexture &texture, ID3D12Device *device, ID3D12GraphicsCommandList *commandList, - DataHeap &textureBuffersHeap, - DataHeap &textureHeap) + DataHeap &textureBuffersHeap) { ID3D12Resource *vramTexture; size_t w = texture.GetWidth(), h = texture.GetHeight(); @@ -552,19 +551,17 @@ ID3D12Resource *uploadSingleTexture( D3D12_RESOURCE_DESC texturedesc = getTexture2DResourceDesc(w, h, dxgiFormat, texture.GetMipmap()); textureSize = device->GetResourceAllocationInfo(0, 1, &texturedesc).SizeInBytes; - assert(textureHeap.canAlloc(textureSize)); - size_t heapOffset2 = textureHeap.alloc(textureSize); + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; - check(device->CreatePlacedResource( - textureHeap.m_heap, - heapOffset2, + check(device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, &texturedesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&vramTexture) )); - textureHeap.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset2, textureSize, vramTexture)); - size_t miplevel = 0; for (const MipmapLevelInfo mli : mipInfos) @@ -597,6 +594,7 @@ ID3D12Resource *uploadSingleTexture( size_t D3D12GSRender::UploadTextures() { + std::lock_guard lock(mut); size_t usedTexture = 0; for (u32 i = 0; i < m_textures_count; ++i) @@ -630,17 +628,17 @@ size_t D3D12GSRender::UploadTextures() ID3D12GraphicsCommandList *commandList; check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_textureUploadCommandAllocator, nullptr, IID_PPV_ARGS(&commandList))); - vramTexture = uploadSingleTexture(m_textures[i], m_device, commandList, m_textureUploadData, m_textureData); + vramTexture = uploadSingleTexture(m_textures[i], m_device, commandList, m_textureUploadData); commandList->Close(); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); m_texturesCache[texaddr] = vramTexture; - size_t s = powerOf2Align(w * h * 4, 4096); - LOG_ERROR(RSX, "PROTECTING %x of size %d", powerOf2Align(texaddr, 4096), s); - texaddrs.push_back(std::make_pair(texaddr & ~0xfff, s)); - vm::page_protect(texaddr & ~0xfff, s, 0, 0, vm::page_writable); + u32 s = align(w * h * 4, 4096); + LOG_WARNING(RSX, "PROTECTING %x of size %d", align(texaddr, 4096), s); + m_protectedTextures.push_back(std::make_tuple(texaddr, align(texaddr, 4096), s)); + vm::page_protect(align(texaddr, 4096), s, 0, 0, vm::page_writable); } D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; From d29b82566ef99ddc152d4ba390ee2d7f197d948c Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 21 Jun 2015 18:38:23 +0200 Subject: [PATCH 291/343] d3d12: Fix memleak --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 10 +++++++--- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 ++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index bb30cba3db..33fbdf8b71 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -414,7 +414,7 @@ D3D12GSRender::D3D12GSRender() LOG_WARNING(RSX, "Modified %x, starting again", texadrr); ID3D12Resource *texToErase = m_texturesCache[texadrr]; m_texturesCache.erase(texadrr); - m_Textoclean.push_back(texToErase); + m_texToClean.push_back(texToErase); vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0); m_protectedTextures.erase(currentIt); @@ -613,6 +613,10 @@ D3D12GSRender::~D3D12GSRender() m_rtts.Release(); for (unsigned i = 0; i < 17; i++) m_rootSignatures[i]->Release(); + for (auto tmp : m_texToClean) + tmp->Release(); + for (auto tmp : m_texturesCache) + tmp.second->Release(); m_swapChain->Release(); m_outputScalingPass.Release(); m_device->Release(); @@ -1063,8 +1067,8 @@ void D3D12GSRender::Flip() }; std::lock_guard lock(mut); - std::vector textoclean = m_Textoclean; - m_Textoclean.clear(); + std::vector textoclean = m_texToClean; + m_texToClean.clear(); m_GC.pushWork([&, cleaningFunction, textoclean]() { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 5eb4058930..7934a566e3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -209,7 +209,8 @@ private: */ std::mutex mut; std::list > m_protectedTextures; // Texaddress, start of protected range, size of protected range - std::vector m_Textoclean; + std::vector m_texToClean; + GarbageCollectionThread m_GC; // Copy of RTT to be used as texture std::unordered_map m_texturesRTTs; From 294d649012f6c17e7e4f4fbcb4e7d2befc91e6f3 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 21 Jun 2015 18:41:51 +0200 Subject: [PATCH 292/343] d3d12: Fix a type warning --- rpcs3/Emu/RSX/D3D12/D3D12.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 069fff3c4b..506d84b578 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -98,7 +98,7 @@ D3D12_RESOURCE_DESC getTexture2DResourceDesc(size_t width, size_t height, DXGI_F result.Format = dxgiFormat; result.DepthOrArraySize = 1; result.SampleDesc.Count = 1; - result.MipLevels = mipmapLevels; + result.MipLevels = (UINT16)mipmapLevels; return result; } From 4ee66a2680e6805f2c55c15994af9f5d105a6299 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 21 Jun 2015 23:45:06 +0200 Subject: [PATCH 293/343] d3d12: Implement intraframe vertex caching --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 25 ++++++++++++++++++++++++- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 1 + rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 ++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index eaf26a827d..ce1f31d0cd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -264,6 +264,17 @@ ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVerte return vertexBuffer; } +static bool +isContained(const std::vector > &ranges, const std::pair &range) +{ + for (auto &r : ranges) + { + if (r == range) + return true; + } + return false; +} + std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw) { std::pair, D3D12_INDEX_BUFFER_VIEW> result; @@ -280,7 +291,19 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G if (vbf.stride) subBufferSize = ((subBufferSize + vbf.stride - 1) / vbf.stride) * vbf.stride; - ID3D12Resource *vertexBuffer = createVertexBuffer(vbf, m_vertex_data, m_device, m_vertexIndexData); + u64 key = vbf.range.first; + key = key << 32; + key = key | vbf.range.second; + auto It = m_vertexCache.find(key); + + ID3D12Resource *vertexBuffer; + if (It != m_vertexCache.end()) + vertexBuffer = It->second; + else + { + vertexBuffer = createVertexBuffer(vbf, m_vertex_data, m_device, m_vertexIndexData); + m_vertexCache[key] = vertexBuffer; + } D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 33fbdf8b71..23aa68cbea 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1057,6 +1057,7 @@ void D3D12GSRender::Flip() // Flush m_texturesRTTs.clear(); + m_vertexCache.clear(); std::vector > cleaningFunction = { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 7934a566e3..bccb6e6042 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -218,6 +218,10 @@ private: std::unordered_map m_texturesCache; // std::vector m_post_draw_objs; + // TODO: Use a tree structure to parse more efficiently + // Key is begin << 32 | end + std::unordered_map m_vertexCache; + PipelineStateObjectCache m_cachePSO; std::pair *m_PSO; // m_rootSignatures[N] is RS with N texture/sample From 5102241ac2db0552f356c540e6b8526371c30028 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 22 Jun 2015 00:17:40 +0200 Subject: [PATCH 294/343] d3d12: Compute texture size in host mem --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 80 ++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 876f0faa86..60660d8d4b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -592,6 +592,78 @@ ID3D12Resource *uploadSingleTexture( return vramTexture; } +/** + * Get number of bytes occupied by texture in RSX mem + */ +static +size_t getTextureSize(const RSXTexture &texture) +{ + size_t w = texture.GetWidth(), h = texture.GetHeight(); + + int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + // TODO: Take mipmaps into account + switch (format) + { + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + default: + LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); + break; + case CELL_GCM_TEXTURE_B8: + return w * h; + case CELL_GCM_TEXTURE_A1R5G5B5: + return w * h * 2; + case CELL_GCM_TEXTURE_A4R4G4B4: + return w * h * 4; + case CELL_GCM_TEXTURE_R5G6B5: + return w * h * 2; + case CELL_GCM_TEXTURE_A8R8G8B8: + return w * h * 4; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + return w * h / 6; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + return w * h / 4; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + return w * h / 4; + case CELL_GCM_TEXTURE_G8B8: + return w * h * 2; + case CELL_GCM_TEXTURE_R6G5B5: + return w * h * 2; + case CELL_GCM_TEXTURE_DEPTH24_D8: + return w * h * 4; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return w * h * 4; + case CELL_GCM_TEXTURE_DEPTH16: + return w * h * 2; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return w * h * 2; + case CELL_GCM_TEXTURE_X16: + return w * h * 2; + case CELL_GCM_TEXTURE_Y16_X16: + return w * h * 4; + case CELL_GCM_TEXTURE_R5G5B5A1: + return w * h * 2; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + return w * h * 8; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + return w * h * 16; + case CELL_GCM_TEXTURE_X32_FLOAT: + return w * h * 4; + case CELL_GCM_TEXTURE_D1R5G5B5: + return w * h * 2; + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + return w * h * 4; + case CELL_GCM_TEXTURE_D8R8G8B8: + return w * h * 4; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + return w * h * 4; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + return w * h * 4; + } +} + size_t D3D12GSRender::UploadTextures() { std::lock_guard lock(mut); @@ -630,15 +702,15 @@ size_t D3D12GSRender::UploadTextures() vramTexture = uploadSingleTexture(m_textures[i], m_device, commandList, m_textureUploadData); - commandList->Close(); + check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); m_texturesCache[texaddr] = vramTexture; - u32 s = align(w * h * 4, 4096); + u32 s = align(getTextureSize(m_textures[i]), 4096); LOG_WARNING(RSX, "PROTECTING %x of size %d", align(texaddr, 4096), s); - m_protectedTextures.push_back(std::make_tuple(texaddr, align(texaddr, 4096), s)); - vm::page_protect(align(texaddr, 4096), s, 0, 0, vm::page_writable); +// m_protectedTextures.push_back(std::make_tuple(texaddr, align(texaddr, 4096), s)); +// vm::page_protect(align(texaddr, 4096), s, 0, 0, vm::page_writable); } D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; From d88d078f4af44a1bd77f19ae8870e1ff03004913 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 17:07:18 +0200 Subject: [PATCH 295/343] d3d12: Fix left over commented code --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 60660d8d4b..aa50654274 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -709,8 +709,8 @@ size_t D3D12GSRender::UploadTextures() u32 s = align(getTextureSize(m_textures[i]), 4096); LOG_WARNING(RSX, "PROTECTING %x of size %d", align(texaddr, 4096), s); -// m_protectedTextures.push_back(std::make_tuple(texaddr, align(texaddr, 4096), s)); -// vm::page_protect(align(texaddr, 4096), s, 0, 0, vm::page_writable); + m_protectedTextures.push_back(std::make_tuple(texaddr, align(texaddr, 4096), s)); + vm::page_protect(align(texaddr, 4096), s, 0, 0, vm::page_writable); } D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; From 8f31211557b653f591e46609d688a4e0353e4a52 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 18:19:14 +0200 Subject: [PATCH 296/343] d3d12: Avoid copying 8k of constant data per draw call --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 14 +++++++++++--- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 +--- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 ++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index ce1f31d0cd..7ea9c3e6fc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -473,8 +473,7 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() for (const RSXTransformConstant& c : m_transform_constants) { size_t offset = c.id * 4 * sizeof(float); - float vector[] = { c.x, c.y, c.z, c.w }; - memcpy((char*)vertexConstantShadowCopy + offset, vector, 4 * sizeof(float)); + m_vertexConstants[offset] = c; } size_t bufferSize = 512 * 4 * sizeof(float); @@ -486,7 +485,16 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() void *constantsBufferMap; check(m_constantsData.m_heap->Map(0, &range, &constantsBufferMap)); - streamBuffer((char*)constantsBufferMap + heapOffset, vertexConstantShadowCopy, bufferSize); + for (auto vertexConstants : m_vertexConstants) + { + float data[4] = { + vertexConstants.second.x, + vertexConstants.second.y, + vertexConstants.second.z, + vertexConstants.second.w + }; + memcpy((char*)constantsBufferMap + heapOffset + vertexConstants.first, data, 4 * sizeof(float)); + } m_constantsData.m_heap->Unmap(0, &range); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 23aa68cbea..a9908145a8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -545,8 +545,6 @@ D3D12GSRender::D3D12GSRender() m_perFrameStorage[1].Init(m_device); m_perFrameStorage[1].Reset(); - vertexConstantShadowCopy = new float[512 * 4]; - // Convert shader auto p = compileF32toU8CS(); check( @@ -620,7 +618,6 @@ D3D12GSRender::~D3D12GSRender() m_swapChain->Release(); m_outputScalingPass.Release(); m_device->Release(); - delete[] vertexConstantShadowCopy; unloadD3D12FunctionPointers(); } @@ -1058,6 +1055,7 @@ void D3D12GSRender::Flip() // Flush m_texturesRTTs.clear(); m_vertexCache.clear(); + m_vertexConstants.clear(); std::vector > cleaningFunction = { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index bccb6e6042..f265fc4eac 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -333,7 +333,8 @@ public: GSFrameBase2 *m_frame; u32 m_draw_frames; u32 m_skip_frames; - float *vertexConstantShadowCopy; + + std::unordered_map m_vertexConstants; D3D12GSRender(); virtual ~D3D12GSRender(); From 865445e29b1f8f75951a9f38aa7cfbe81ee8c5a5 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 18:29:08 +0200 Subject: [PATCH 297/343] d3d12: Remove m_textureData heap since it wasn't used Free 512 mb --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 3 --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 - 2 files changed, 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a9908145a8..5dd9f22bb2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -585,7 +585,6 @@ D3D12GSRender::D3D12GSRender() m_constantsData.Init(m_device, 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); m_vertexIndexData.Init(m_device, 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); m_textureUploadData.Init(m_device, 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); - m_textureData.Init(m_device, 1024 * 1024 * 512, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES); } D3D12GSRender::~D3D12GSRender() @@ -593,7 +592,6 @@ D3D12GSRender::~D3D12GSRender() m_constantsData.Release(); m_vertexIndexData.Release(); m_textureUploadData.Release(); - m_textureData.Release(); m_UAVHeap.m_heap->Release(); m_readbackResources.m_heap->Release(); m_texturesRTTs.clear(); @@ -1062,7 +1060,6 @@ void D3D12GSRender::Flip() m_constantsData.getCleaningFunction(), m_vertexIndexData.getCleaningFunction(), m_textureUploadData.getCleaningFunction(), - m_textureData.getCleaningFunction() }; std::lock_guard lock(mut); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index f265fc4eac..658e1d66fb 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -306,7 +306,6 @@ private: DataHeap m_vertexIndexData; // Texture storage DataHeap m_textureUploadData; - DataHeap m_textureData; DataHeap m_UAVHeap; DataHeap m_readbackResources; From 2c802735bda1ffc912c827a262c04906db3189f8 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 18:34:47 +0200 Subject: [PATCH 298/343] d3d12: Fix crash + use ref instead of copying in some for loops --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 2 +- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 30 +++++++++++++-------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 7ea9c3e6fc..f708d68a0c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -485,7 +485,7 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() void *constantsBufferMap; check(m_constantsData.m_heap->Map(0, &range, &constantsBufferMap)); - for (auto vertexConstants : m_vertexConstants) + for (const auto &vertexConstants : m_vertexConstants) { float data[4] = { vertexConstants.second.x, diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 5dd9f22bb2..3e20059f98 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -79,13 +79,13 @@ void D3D12GSRender::ResourceStorage::Reset() m_frameFinishedFence = nullptr; m_frameFinishedHandle = 0; - for (auto tmp : m_inUseConstantsBuffers) + for (auto &tmp : m_inUseConstantsBuffers) SAFE_RELEASE(std::get<2>(tmp)); - for (auto tmp : m_inUseVertexIndexBuffers) + for (auto &tmp : m_inUseVertexIndexBuffers) SAFE_RELEASE(std::get<2>(tmp)); - for (auto tmp : m_inUseTextureUploadBuffers) + for (auto &tmp : m_inUseTextureUploadBuffers) SAFE_RELEASE(std::get<2>(tmp)); - for (auto tmp : m_inUseTexture2D) + for (auto &tmp : m_inUseTexture2D) SAFE_RELEASE(std::get<2>(tmp)); m_inUseConstantsBuffers.clear(); m_inUseVertexIndexBuffers.clear(); @@ -138,22 +138,22 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) void D3D12GSRender::ResourceStorage::Release() { // NOTE: Should be released only if no command are in flight ! - for (auto tmp : m_inUseConstantsBuffers) + for (auto &tmp : m_inUseConstantsBuffers) SAFE_RELEASE(std::get<2>(tmp)); - for (auto tmp : m_inUseVertexIndexBuffers) + for (auto &tmp : m_inUseVertexIndexBuffers) SAFE_RELEASE(std::get<2>(tmp)); - for (auto tmp : m_inUseTextureUploadBuffers) + for (auto &tmp : m_inUseTextureUploadBuffers) SAFE_RELEASE(std::get<2>(tmp)); - for (auto tmp : m_inUseTexture2D) + for (auto &tmp : m_inUseTexture2D) SAFE_RELEASE(std::get<2>(tmp)); m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); - for (auto tmp : m_inflightResources) + for (auto &tmp : m_inflightResources) tmp->Release(); m_textureDescriptorsHeap->Release(); m_samplerDescriptorHeap->Release(); - for (auto tmp : m_inflightCommandList) + for (auto &tmp : m_inflightCommandList) tmp->Release(); m_commandAllocator->Release(); m_textureUploadCommandAllocator->Release(); @@ -546,7 +546,7 @@ D3D12GSRender::D3D12GSRender() m_perFrameStorage[1].Reset(); // Convert shader - auto p = compileF32toU8CS(); + const auto &p = compileF32toU8CS(); check( m_device->CreateRootSignature(0, p.second->GetBufferPointer(), p.second->GetBufferSize(), IID_PPV_ARGS(&m_convertRootSignature)) ); @@ -609,9 +609,9 @@ D3D12GSRender::~D3D12GSRender() m_rtts.Release(); for (unsigned i = 0; i < 17; i++) m_rootSignatures[i]->Release(); - for (auto tmp : m_texToClean) + for (auto &tmp : m_texToClean) tmp->Release(); - for (auto tmp : m_texturesCache) + for (auto &tmp : m_texturesCache) tmp.second->Release(); m_swapChain->Release(); m_outputScalingPass.Release(); @@ -1072,8 +1072,8 @@ void D3D12GSRender::Flip() CloseHandle(storage.m_frameFinishedHandle); storage.m_frameFinishedFence->Release(); - for (unsigned i = 0; i < 4; i++) - cleaningFunction[i](); + for (auto &cleanFunc : cleaningFunction) + cleanFunc(); storage.Reset(); for (auto tmp : textoclean) From 73aeda1507017bc5c46b709e4d25e9cd6f26d254 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 18:41:08 +0200 Subject: [PATCH 299/343] d3d12: Use stream to buffer to upload vertex constants --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index f708d68a0c..b081d75e06 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -493,7 +493,7 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer() vertexConstants.second.z, vertexConstants.second.w }; - memcpy((char*)constantsBufferMap + heapOffset + vertexConstants.first, data, 4 * sizeof(float)); + streamToBuffer((char*)constantsBufferMap + heapOffset + vertexConstants.first, data, 4 * sizeof(float)); } m_constantsData.m_heap->Unmap(0, &range); From 141c7ef340bf0a8877137bbd88d834b6d754b0d6 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 21:12:25 +0200 Subject: [PATCH 300/343] d3d12: Do not cache non buffer vertex attribute Fix dice test --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index b081d75e06..cfa3d36ffc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -297,7 +297,8 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G auto It = m_vertexCache.find(key); ID3D12Resource *vertexBuffer; - if (It != m_vertexCache.end()) + if (vbf.range.first != 0 && // Attribute is stored in a buffer, not inline in command buffer + It != m_vertexCache.end()) vertexBuffer = It->second; else { From 5e33d5535d9a990e3aab6e1d140fef4a9f517bc8 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 21:41:41 +0200 Subject: [PATCH 301/343] d3d12: Add a TODO notice at the beginning --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 658e1d66fb..f8af34128d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -17,6 +17,32 @@ // Some constants are the same between RSX and GL #include + +/** + * TODO: If you want to improve this backend, a small list of things that are unimplemented atm : + * - Vertex texture + * It requires adding the reading command in D3D12FragmentProgramDecompiler, + * generates corresponding root signatures and descriptor heap binding, and ensure that code in + * D3D12Textures.cpp doesn't contain pixel shader specific code. + * - MSAA + * There is no support in the gl backend for MSAA textures atm so it needs to be implemented from scratch. + * - Depth buffer read + * The depth buffer can be currently properly read, but for some reasons it needs a conversion from depth16/24 + * format to rgba8 format, which doesn't make sense since the PS3 doesn't make such conversion implicitly afaik. + * - Improve caching of vertex buffers and texture + * Vertex buffers are cached by range. This works but in some rare situation it may be wrong, for instance if 2 + * draw call use the same buffer, but the first one doesn't use all the attribute ; then the second one will use + * the cached version and not have updated attributes. Same for texture, if format/size does change, the caching + * system is ignoring it. + * - Fix vertex buffer in The Guided Paradox + * The vertex info in the guided paradox are wrong, leading to missing character parts ingame (like leg or torso). + * It's because some vertex position are incorrect. + * - Improve sync between cell and RSX + * A lot of optimisation can be gained from using Cell and RSX latency. Cell can't read RSX generated data without + * synchronisation. We currently only cover semaphore sync, but there are more (like implicit sync at flip) that + * are not currently correctly signaled which leads to deadlock. + */ + class GSFrameBase2 { public: From 1c7bff4d36ce3c8fb5ba23f4bd9ec44adc54d1e6 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 21:46:36 +0200 Subject: [PATCH 302/343] d3d12: Use align instead of powerOf2Align --- rpcs3/Emu/RSX/D3D12/D3D12.h | 11 ----------- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 2 +- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 8 ++++---- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 6 +++--- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 18 +++++++++--------- 5 files changed, 17 insertions(+), 28 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 506d84b578..1ef175ea82 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -18,17 +18,6 @@ void check(HRESULT hr) abort(); } -/** - * Get next value that is aligned by the corresponding power of 2 - */ -inline -size_t powerOf2Align(size_t unalignedVal, size_t powerOf2) -{ - // check that powerOf2 is power of 2 - assert(!(powerOf2 & (powerOf2 - 1))); - return (unalignedVal + powerOf2 - 1) & ~(powerOf2 - 1); -} - /** * Send data to dst pointer without polluting cache. * Usefull to write to mapped memory from upload heap. diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index cfa3d36ffc..e78b22029c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -365,7 +365,7 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G indexCount = 6 * m_indexed_array.m_data.size() / (4 * indexSize); else indexCount = m_draw_array_count * 6 / 4; - size_t subBufferSize = powerOf2Align(indexCount * indexSize, 64); + size_t subBufferSize = align(indexCount * indexSize, 64); assert(m_vertexIndexData.canAlloc(subBufferSize)); size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3e20059f98..7addfb7818 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1113,11 +1113,11 @@ ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12Gra { case CELL_GCM_SURFACE_A8R8G8B8: dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - rowPitch = powerOf2Align(w * 4, 256); + rowPitch = align(w * 4, 256); break; case CELL_GCM_SURFACE_F_W16Z16Y16X16: dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; - rowPitch = powerOf2Align(w * 8, 256); + rowPitch = align(w * 8, 256); break; } @@ -1406,11 +1406,11 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) switch (m_surface_color_format) { case CELL_GCM_SURFACE_A8R8G8B8: - srcPitch = powerOf2Align(m_surface_clip_w * 4, 256); + srcPitch = align(m_surface_clip_w * 4, 256); dstPitch = m_surface_clip_w * 4; break; case CELL_GCM_SURFACE_F_W16Z16Y16X16: - srcPitch = powerOf2Align(m_surface_clip_w * 8, 256); + srcPitch = align(m_surface_clip_w * 8, 256); dstPitch = m_surface_clip_w * 8; break; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index f8af34128d..0734311d54 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -138,7 +138,7 @@ struct DataHeap bool canAlloc(size_t size) { size_t putPos = m_putPos, getPos = m_getPos; - size_t allocSize = powerOf2Align(size, Alignment); + size_t allocSize = align(size, Alignment); if (putPos + allocSize < m_size) { // range before get @@ -168,12 +168,12 @@ struct DataHeap size_t putPos = m_putPos; if (putPos + size < m_size) { - m_putPos += powerOf2Align(size, Alignment); + m_putPos += align(size, Alignment); return putPos; } else { - m_putPos = powerOf2Align(size, Alignment); + m_putPos = align(size, Alignment); return 0; } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index aa50654274..a974c15659 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -157,7 +157,7 @@ writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heigh size_t currentHeight = heightInBlock, currentWidth = widthInBlock; for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) { - size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + size_t rowPitch = align(currentWidth * blockSize, 256); MipmapLevelInfo currentMipmapLevelInfo = {}; currentMipmapLevelInfo.offset = offsetInDst; @@ -188,7 +188,7 @@ writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heig size_t currentHeight = heightInBlock, currentWidth = widthInBlock; for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) { - size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + size_t rowPitch = align(currentWidth * blockSize, 256); MipmapLevelInfo currentMipmapLevelInfo = {}; currentMipmapLevelInfo.offset = offsetInDst; @@ -231,7 +231,7 @@ writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blo size_t currentHeight = heightInBlock, currentWidth = widthInBlock; for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) { - size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + size_t rowPitch = align(currentWidth * blockSize, 256); MipmapLevelInfo currentMipmapLevelInfo = {}; currentMipmapLevelInfo.offset = offsetInDst; @@ -244,7 +244,7 @@ writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blo memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * currentWidth * blockSize, currentWidth * blockSize); offsetInDst += currentHeight * rowPitch; - offsetInDst = powerOf2Align(offsetInDst, 512); + offsetInDst = align(offsetInDst, 512); offsetInSrc += currentHeight * currentWidth * blockSize; currentHeight = MAX2(currentHeight / 2, 1); currentWidth = MAX2(currentWidth / 2, 1); @@ -264,7 +264,7 @@ write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t he size_t srcPitch = widthInBlock * blockSize; for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) { - size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + size_t rowPitch = align(currentWidth * blockSize, 256); MipmapLevelInfo currentMipmapLevelInfo = {}; currentMipmapLevelInfo.offset = offsetInDst; @@ -302,7 +302,7 @@ write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t size_t srcPitch = widthInBlock * blockSize; for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) { - size_t rowPitch = powerOf2Align(currentWidth * blockSize, 256); + size_t rowPitch = align(currentWidth * blockSize, 256); MipmapLevelInfo currentMipmapLevelInfo = {}; currentMipmapLevelInfo.offset = offsetInDst; @@ -490,7 +490,7 @@ ID3D12Resource *uploadSingleTexture( size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel; size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel; // Multiple of 256 - size_t rowPitch = powerOf2Align(blockSizeInByte * widthInBlocks, 256); + size_t rowPitch = align(blockSizeInByte * widthInBlocks, 256); ID3D12Resource *Texture; size_t textureSize = rowPitch * heightInBlocks * 2; // * 4 for mipmap levels @@ -610,7 +610,7 @@ size_t getTextureSize(const RSXTexture &texture) case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: default: LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); - break; + return 0; case CELL_GCM_TEXTURE_B8: return w * h; case CELL_GCM_TEXTURE_A1R5G5B5: @@ -707,7 +707,7 @@ size_t D3D12GSRender::UploadTextures() getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); m_texturesCache[texaddr] = vramTexture; - u32 s = align(getTextureSize(m_textures[i]), 4096); + u32 s = (u32)align(getTextureSize(m_textures[i]), 4096); LOG_WARNING(RSX, "PROTECTING %x of size %d", align(texaddr, 4096), s); m_protectedTextures.push_back(std::make_tuple(texaddr, align(texaddr, 4096), s)); vm::page_protect(align(texaddr, 4096), s, 0, 0, vm::page_writable); From d4b83bcf6f9803527284d68691d201a0d55a583f Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 22:04:44 +0200 Subject: [PATCH 303/343] d3d12: Try to factorise CPU/GPU descriptor handle gen I'm still not satisfied with it but I didn't find another way to make it compact and readable. --- rpcs3/Emu/RSX/D3D12/D3D12.h | 16 ++++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 73 +++++++++++++++------------ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 ++ 3 files changed, 61 insertions(+), 32 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 1ef175ea82..a139297b1c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -288,4 +288,20 @@ inline DXGI_FORMAT getTextureDXGIFormat(int format) } } +inline +D3D12_CPU_DESCRIPTOR_HANDLE getCPUDescriptorHandle(ID3D12DescriptorHeap *descriptors, size_t offset) +{ + D3D12_CPU_DESCRIPTOR_HANDLE result = descriptors->GetCPUDescriptorHandleForHeapStart(); + result.ptr += offset; + return result; +} + +inline +D3D12_GPU_DESCRIPTOR_HANDLE getGPUDescriptorHandle(ID3D12DescriptorHeap *descriptors, size_t offset) +{ + D3D12_GPU_DESCRIPTOR_HANDLE result = descriptors->GetGPUDescriptorHandleForHeapStart(); + result.ptr += offset; + return result; +} + #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 7addfb7818..d468971490 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -456,6 +456,11 @@ D3D12GSRender::D3D12GSRender() check(m_device->CreateCommandQueue(©QueueDesc, IID_PPV_ARGS(&m_commandQueueCopy))); check(m_device->CreateCommandQueue(&graphicQueueDesc, IID_PPV_ARGS(&m_commandQueueGraphic))); + g_descriptorStrideSRVCBVUAV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + size_t g_descriptorStrideDSV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + size_t g_descriptorStrideRTV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + size_t g_descriptorStrideSamplers = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + m_frame = GetGSFrame(); DXGI_ADAPTER_DESC adaptaterDesc; adaptater->GetDesc(&adaptaterDesc); @@ -692,28 +697,26 @@ void D3D12GSRender::ExecCMD(u32 cmd) case CELL_GCM_SURFACE_TARGET_0: case CELL_GCM_SURFACE_TARGET_1: - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, 0), clearColor, 0, nullptr); break; case CELL_GCM_SURFACE_TARGET_MRT1: - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); - handle.ptr += g_RTTIncrement; - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, 0), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, g_descriptorStrideRTV), clearColor, 0, nullptr); break; case CELL_GCM_SURFACE_TARGET_MRT2: - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, 0), clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, g_descriptorStrideRTV), clearColor, 0, nullptr); handle.ptr += g_RTTIncrement; - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); - handle.ptr += g_RTTIncrement; - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, 2 * g_descriptorStrideRTV), clearColor, 0, nullptr); break; case CELL_GCM_SURFACE_TARGET_MRT3: - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, 0), clearColor, 0, nullptr); handle.ptr += g_RTTIncrement; - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, g_descriptorStrideRTV), clearColor, 0, nullptr); handle.ptr += g_RTTIncrement; - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, 2 * g_descriptorStrideRTV), clearColor, 0, nullptr); handle.ptr += g_RTTIncrement; - commandList->ClearRenderTargetView(handle, clearColor, 0, nullptr); + commandList->ClearRenderTargetView(getCPUDescriptorHandle(m_rtts.m_renderTargetsDescriptorsHeap, 3 * g_descriptorStrideRTV), clearColor, 0, nullptr); break; default: LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); @@ -782,9 +785,10 @@ void D3D12GSRender::ExecCMD() // Constants setScaleOffset(); commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_scaleOffsetDescriptorHeap); - D3D12_GPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetGraphicsRootDescriptorTable(0, Handle); + commandList->SetGraphicsRootDescriptorTable(0, + getGPUDescriptorHandle(getCurrentResourceStorage().m_scaleOffsetDescriptorHeap, + getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * g_descriptorStrideSRVCBVUAV) + ); getCurrentResourceStorage().m_currentScaleOffsetBufferIndex++; size_t currentBufferIndex = getCurrentResourceStorage().m_constantsBufferIndex; @@ -794,9 +798,10 @@ void D3D12GSRender::ExecCMD() getCurrentResourceStorage().m_constantsBufferIndex++; commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_constantsBufferDescriptorsHeap); - Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += currentBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - commandList->SetGraphicsRootDescriptorTable(1, Handle); + commandList->SetGraphicsRootDescriptorTable(1, + getGPUDescriptorHandle(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap, + currentBufferIndex * g_descriptorStrideSRVCBVUAV) + ); commandList->SetPipelineState(m_PSO->first); if (m_PSO->second > 0) @@ -807,8 +812,6 @@ void D3D12GSRender::ExecCMD() // Fill empty slots for (; usedTexture < m_PSO->second; usedTexture++) { - D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; @@ -818,27 +821,33 @@ void D3D12GSRender::ExecCMD() D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); - m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, Handle); + m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, + getCPUDescriptorHandle(getCurrentResourceStorage().m_textureDescriptorsHeap, + (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * g_descriptorStrideSRVCBVUAV) + ); D3D12_SAMPLER_DESC samplerDesc = {}; samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - m_device->CreateSampler(&samplerDesc, Handle); + m_device->CreateSampler(&samplerDesc, + getCPUDescriptorHandle(getCurrentResourceStorage().m_samplerDescriptorHeap, + (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * g_descriptorStrideSamplers) + ); } - Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_textureDescriptorsHeap); - commandList->SetGraphicsRootDescriptorTable(2, Handle); + commandList->SetGraphicsRootDescriptorTable(2, + getGPUDescriptorHandle(getCurrentResourceStorage().m_textureDescriptorsHeap, + getCurrentResourceStorage().m_currentTextureIndex * g_descriptorStrideSRVCBVUAV) + ); - Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - Handle.ptr += getCurrentResourceStorage().m_currentTextureIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_samplerDescriptorHeap); - commandList->SetGraphicsRootDescriptorTable(3, Handle); + commandList->SetGraphicsRootDescriptorTable(3, + getGPUDescriptorHandle(getCurrentResourceStorage().m_samplerDescriptorHeap, + getCurrentResourceStorage().m_currentTextureIndex * g_descriptorStrideSamplers) + ); getCurrentResourceStorage().m_currentTextureIndex += usedTexture; std::chrono::time_point endTextureTime = std::chrono::system_clock::now(); @@ -866,8 +875,8 @@ void D3D12GSRender::ExecCMD() LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); } - D3D12_CPU_DESCRIPTOR_HANDLE *DepthStencilHandle = &m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - commandList->OMSetRenderTargets((UINT)numRTT, &m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(), true, DepthStencilHandle); + commandList->OMSetRenderTargets((UINT)numRTT, &m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(), true, + &getCPUDescriptorHandle(m_rtts.m_depthStencilDescriptorHeap, 0)); D3D12_VIEWPORT viewport = { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 0734311d54..a170b53d51 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -342,6 +342,10 @@ private: std::vector m_IASet; ID3D12Device* m_device; + size_t g_descriptorStrideSRVCBVUAV; + size_t g_descriptorStrideDSV; + size_t g_descriptorStrideRTV; + size_t g_descriptorStrideSamplers; ID3D12CommandQueue *m_commandQueueCopy; ID3D12CommandQueue *m_commandQueueGraphic; From 3472f75ae025e46238035784a7d6c277b46745f6 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 25 Jun 2015 22:46:32 +0200 Subject: [PATCH 304/343] d3d12: Fix uninitialized variables --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index d468971490..3c8291974c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -457,9 +457,9 @@ D3D12GSRender::D3D12GSRender() check(m_device->CreateCommandQueue(&graphicQueueDesc, IID_PPV_ARGS(&m_commandQueueGraphic))); g_descriptorStrideSRVCBVUAV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - size_t g_descriptorStrideDSV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); - size_t g_descriptorStrideRTV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - size_t g_descriptorStrideSamplers = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + g_descriptorStrideDSV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + g_descriptorStrideRTV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + g_descriptorStrideSamplers = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); m_frame = GetGSFrame(); DXGI_ADAPTER_DESC adaptaterDesc; @@ -1044,7 +1044,7 @@ void D3D12GSRender::Flip() barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ; barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; commandList->ResourceBarrier(2, barriers); - commandList->Close(); + check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); } } @@ -1303,7 +1303,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) convertCommandList->ResourceBarrier(2, barriers); convertCommandList->ResourceBarrier(1, &getResourceBarrierTransition(depthConverted, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE)); - convertCommandList->Close(); + check(convertCommandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&convertCommandList); } @@ -1369,7 +1369,7 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) } if (needTransfer) { - downloadCommandList->Close(); + check(downloadCommandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&downloadCommandList); } From fd269f3adc6e3bb83068773bd0f11e9a73402da0 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 26 Jun 2015 18:31:51 +0200 Subject: [PATCH 305/343] d3d12: Fix alloc function It may generate wrong result in very rare circumstance, although I never experienced it. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index a170b53d51..b03e2f6161 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -137,26 +137,25 @@ struct DataHeap */ bool canAlloc(size_t size) { - size_t putPos = m_putPos, getPos = m_getPos; size_t allocSize = align(size, Alignment); - if (putPos + allocSize < m_size) + if (m_putPos + allocSize < m_size) { // range before get - if (putPos + allocSize < getPos) + if (m_putPos + allocSize < m_getPos) return true; // range after get - if (putPos > getPos) + if (m_putPos > m_getPos) return true; return false; } else { // ..]....[..get.. - if (putPos < getPos) + if (m_putPos < m_getPos) return false; // ..get..]...[... // Actually all resources extending beyond heap space starts at 0 - if (allocSize > getPos) + if (allocSize > m_getPos) return false; return true; } @@ -165,15 +164,16 @@ struct DataHeap size_t alloc(size_t size) { assert(canAlloc(size)); - size_t putPos = m_putPos; - if (putPos + size < m_size) + size_t allocSize = align(size, Alignment); + if (m_putPos + allocSize < m_size) { - m_putPos += align(size, Alignment); - return putPos; + size_t oldPutPos = m_putPos; + m_putPos += allocSize; + return oldPutPos; } else { - m_putPos = align(size, Alignment); + m_putPos = allocSize; return 0; } } From de55d64781641e0087e035d2faa5306b06aa4156 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 26 Jun 2015 18:33:42 +0200 Subject: [PATCH 306/343] d3d12: Make canAlloc function const --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index b03e2f6161..e2f1640fd2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -135,7 +135,7 @@ struct DataHeap /** * Does alloc cross get position ? */ - bool canAlloc(size_t size) + bool canAlloc(size_t size) const { size_t allocSize = align(size, Alignment); if (m_putPos + allocSize < m_size) From 16fa3697db093de5fd9fc9db3a3410d0d2d091b7 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 26 Jun 2015 20:24:31 +0200 Subject: [PATCH 307/343] d3d12: Use atomic for data heap --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index e2f1640fd2..ef62bd1ade 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -120,8 +120,8 @@ struct DataHeap { T *m_heap; size_t m_size; - size_t m_putPos, // Start of free space - m_getPos; // End of free space + size_t m_putPos; // Start of free space + std::atomic m_getPos; // End of free space std::vector > m_resourceStoredSinceLastSync; void Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) @@ -138,6 +138,7 @@ struct DataHeap bool canAlloc(size_t size) const { size_t allocSize = align(size, Alignment); + size_t currentGetPos = m_getPos.load(); if (m_putPos + allocSize < m_size) { // range before get @@ -193,14 +194,14 @@ struct DataHeap */ std::function getCleaningFunction() { - size_t& getPointer = m_getPos; + std::atomic& getPointer = m_getPos; auto duplicatem_resourceStoredSinceLastSync = m_resourceStoredSinceLastSync; m_resourceStoredSinceLastSync.clear(); return [=, &getPointer]() { for (auto tmp : duplicatem_resourceStoredSinceLastSync) { SAFE_RELEASE(std::get<2>(tmp)); - getPointer = std::get<0>(tmp); + getPointer.exchange(std::get<0>(tmp)); } }; } From fc65f181a72544871c64e29c43525d641981fcf9 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 26 Jun 2015 20:56:37 +0200 Subject: [PATCH 308/343] d3d12: Fix a potential crash in GC thread --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 3c8291974c..37fa295a65 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -39,7 +39,7 @@ GarbageCollectionThread::GarbageCollectionThread() while (true) { std::unique_lock lock(m_mutex); - if (m_queue.empty()) + while (m_queue.empty()) cv.wait(lock); m_queue.front()(); m_queue.pop(); From 8cf6255d5d5df23a1c2dca12cc752472c2e54af4 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 26 Jun 2015 21:08:35 +0200 Subject: [PATCH 309/343] d3d12: Fix unneeeded vectors --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 27 --------------------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 7 ------- 2 files changed, 34 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 37fa295a65..b40fd1bd5b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -79,28 +79,12 @@ void D3D12GSRender::ResourceStorage::Reset() m_frameFinishedFence = nullptr; m_frameFinishedHandle = 0; - for (auto &tmp : m_inUseConstantsBuffers) - SAFE_RELEASE(std::get<2>(tmp)); - for (auto &tmp : m_inUseVertexIndexBuffers) - SAFE_RELEASE(std::get<2>(tmp)); - for (auto &tmp : m_inUseTextureUploadBuffers) - SAFE_RELEASE(std::get<2>(tmp)); - for (auto &tmp : m_inUseTexture2D) - SAFE_RELEASE(std::get<2>(tmp)); - m_inUseConstantsBuffers.clear(); - m_inUseVertexIndexBuffers.clear(); - m_inUseTextureUploadBuffers.clear(); - m_inUseTexture2D.clear(); - m_commandAllocator->Reset(); m_textureUploadCommandAllocator->Reset(); m_downloadCommandAllocator->Reset(); for (ID3D12GraphicsCommandList *gfxCommandList : m_inflightCommandList) gfxCommandList->Release(); m_inflightCommandList.clear(); - for (ID3D12Resource *vertexBuffer : m_inflightResources) - vertexBuffer->Release(); - m_inflightResources.clear(); } void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) @@ -138,19 +122,8 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) void D3D12GSRender::ResourceStorage::Release() { // NOTE: Should be released only if no command are in flight ! - for (auto &tmp : m_inUseConstantsBuffers) - SAFE_RELEASE(std::get<2>(tmp)); - for (auto &tmp : m_inUseVertexIndexBuffers) - SAFE_RELEASE(std::get<2>(tmp)); - for (auto &tmp : m_inUseTextureUploadBuffers) - SAFE_RELEASE(std::get<2>(tmp)); - for (auto &tmp : m_inUseTexture2D) - SAFE_RELEASE(std::get<2>(tmp)); - m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); - for (auto &tmp : m_inflightResources) - tmp->Release(); m_textureDescriptorsHeap->Release(); m_samplerDescriptorHeap->Release(); for (auto &tmp : m_inflightCommandList) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index ef62bd1ade..7080399a00 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -299,13 +299,6 @@ private: ID3D12CommandAllocator *m_downloadCommandAllocator; std::list m_inflightCommandList; - std::vector m_inflightResources; - - std::vector > m_inUseConstantsBuffers; - std::vector > m_inUseVertexIndexBuffers; - std::vector > m_inUseTextureUploadBuffers; - std::vector > m_inUseTexture2D; - // Constants storage ID3D12DescriptorHeap *m_constantsBufferDescriptorsHeap; size_t m_constantsBufferIndex; From a519aa835055c0bd26a68b799a364ff619fe0e51 Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 26 Jun 2015 21:33:01 +0200 Subject: [PATCH 310/343] d3d12: Add a (ugly) fix because of some race condition somewhere --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index b40fd1bd5b..ba31947759 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1066,6 +1066,10 @@ void D3D12GSRender::Flip() std::this_thread::yield(); m_frame->Flip(nullptr); + // FIXME: Without this call Voodoo Chronicles + Warp trigger an error because + // index/vertex resources are released before being used. + m_GC.waitForCompletion(); + ResetTimer(); } From 40a3b5c918dd852f30d46a2c2ed2cca3eefc30ea Mon Sep 17 00:00:00 2001 From: vlj Date: Fri, 26 Jun 2015 22:00:49 +0200 Subject: [PATCH 311/343] d3d12: Mark semaphore location as volatile --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index ba31947759..8ac0a0d2a0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1483,7 +1483,7 @@ void D3D12GSRender::semaphorePFIFOAcquire(u32 offset, u32 value) const std::chrono::time_point enterWait = std::chrono::system_clock::now(); while (true) { - u32 val = vm::read32(m_label_addr + offset); + volatile u32 val = vm::read32(m_label_addr + offset); if (val == value) break; std::chrono::time_point waitPoint = std::chrono::system_clock::now(); long long elapsedTime = std::chrono::duration_cast(waitPoint - enterWait).count(); From 6cb00e681b720f6dbc3bda72368abf80af72b722 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 27 Jun 2015 00:22:04 +0200 Subject: [PATCH 312/343] d3d12: support swizzled CELL_GCM_TEXTURE_A4R4G4B4 Fix terraria menu --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 50 ++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index a974c15659..f7c2e15f9b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -252,6 +252,49 @@ writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blo return Result; } + +/** +* Write 16 bytes pixel textures, assume src pixels are swizzled and but not mipmaplevel +*/ +static std::vector +write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) +{ + std::vector Result; + size_t offsetInDst = 0, offsetInSrc = 0; + size_t currentHeight = heightInBlock, currentWidth = widthInBlock; + for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++) + { + size_t rowPitch = align(currentWidth * blockSize, 256); + + MipmapLevelInfo currentMipmapLevelInfo = {}; + currentMipmapLevelInfo.offset = offsetInDst; + currentMipmapLevelInfo.height = currentHeight; + currentMipmapLevelInfo.width = currentWidth; + currentMipmapLevelInfo.rowPitch = rowPitch; + Result.push_back(currentMipmapLevelInfo); + + u16 *castedSrc, *castedDst; + u16 log2width, log2height; + + castedSrc = (u16*)src + offsetInSrc; + castedDst = (u16*)dst + offsetInDst; + + log2width = (u32)(logf((float)currentWidth) / logf(2.f)); + log2height = (u32)(logf((float)currentHeight) / logf(2.f)); + +#pragma omp parallel for + for (unsigned row = 0; row < currentHeight; row++) + for (int j = 0; j < currentWidth; j++) + castedDst[(row * rowPitch / 2) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; + + offsetInDst += currentHeight * rowPitch; + offsetInSrc += currentHeight * widthInBlock * blockSize; + currentHeight = MAX2(currentHeight / 2, 1); + currentWidth = MAX2(currentWidth / 2, 1); + } + return Result; +} + /** * Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel */ @@ -525,7 +568,10 @@ ID3D12Resource *uploadSingleTexture( case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: { - mipInfos = write16bTexelsGeneric((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap()); + if (is_swizzled) + mipInfos = write16bTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap()); + else + mipInfos = write16bTexelsGeneric((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap()); break; } case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: @@ -616,7 +662,7 @@ size_t getTextureSize(const RSXTexture &texture) case CELL_GCM_TEXTURE_A1R5G5B5: return w * h * 2; case CELL_GCM_TEXTURE_A4R4G4B4: - return w * h * 4; + return w * h * 2; case CELL_GCM_TEXTURE_R5G6B5: return w * h * 2; case CELL_GCM_TEXTURE_A8R8G8B8: From 45b7da6666eaf80780dcd4fcc69eac9b0d7417b1 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 27 Jun 2015 02:09:26 +0200 Subject: [PATCH 313/343] d3d12: Mipmap offset is 512byte aligned Fix retro city rampage crash at startup --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index f7c2e15f9b..3dfa45a82f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -170,6 +170,7 @@ writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heigh memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * widthInBlock * blockSize, currentWidth * blockSize); offsetInDst += currentHeight * rowPitch; + offsetInDst = align(offsetInDst, 512); offsetInSrc += currentHeight * widthInBlock * blockSize; currentHeight = MAX2(currentHeight / 2, 1); currentWidth = MAX2(currentWidth / 2, 1); From 3cc397446630154754ffec4e49bd5b20db4dcf16 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 27 Jun 2015 19:06:22 +0200 Subject: [PATCH 314/343] d3d12: Fix m_ctrl not being properly passed to fragement decompiler Fix Retro City Rampage --- .../D3D12/D3D12FragmentProgramDecompiler.cpp | 35 +++++-------------- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 2 +- 2 files changed, 9 insertions(+), 28 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index 8916a9738b..a99747f81d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -71,25 +71,16 @@ void D3D12FragmentDecompiler::insertOutputs(std::stringstream & OS) OS << "{" << std::endl; const std::pair table[] = { - { "ocol0", "r0" }, - { "ocol1", "r2" }, - { "ocol2", "r3" }, - { "ocol3", "r4" }, + { "ocol0", m_ctrl & 0x40 ? "r0" : "h0" }, + { "ocol1", m_ctrl & 0x40 ? "r2" : "h4" }, + { "ocol2", m_ctrl & 0x40 ? "r3" : "h6" }, + { "ocol3", m_ctrl & 0x40 ? "r4" : "h8" }, }; - const std::pair table2[] = - { - { "ocol0", "h0" }, - { "ocol1", "h2" }, - { "ocol2", "h3" }, - { "ocol3", "h4" }, - }; for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) { if (m_parr.HasParam(PF_PARAM_NONE, "float4", table[i].second)) OS << " " << "float4" << " " << table[i].first << " : SV_TARGET" << i << ";" << std::endl; - else if (m_parr.HasParam(PF_PARAM_NONE, "float4", table2[i].second)) - OS << " " << "float4" << " " << table2[i].first << " : SV_TARGET" << i << ";" << std::endl; } OS << "};" << std::endl; } @@ -141,18 +132,10 @@ void D3D12FragmentDecompiler::insertMainEnd(std::stringstream & OS) { const std::pair table[] = { - { "ocol0", "r0" }, - { "ocol1", "r2" }, - { "ocol2", "r3" }, - { "ocol3", "r4" }, - }; - - const std::pair table2[] = - { - { "ocol0", "h0" }, - { "ocol1", "h2" }, - { "ocol2", "h3" }, - { "ocol3", "h4" }, + { "ocol0", m_ctrl & 0x40 ? "r0" : "h0" }, + { "ocol1", m_ctrl & 0x40 ? "r2" : "h4" }, + { "ocol2", m_ctrl & 0x40 ? "r3" : "h6" }, + { "ocol3", m_ctrl & 0x40 ? "r4" : "h8" }, }; OS << " PixelOutput Out;" << std::endl; @@ -160,8 +143,6 @@ void D3D12FragmentDecompiler::insertMainEnd(std::stringstream & OS) { if (m_parr.HasParam(PF_PARAM_NONE, "float4", table[i].second)) OS << " Out." << table[i].first << " = " << table[i].second << ";" << std::endl; - else if (m_parr.HasParam(PF_PARAM_NONE, "float4", table2[i].second)) - OS << " Out." << table2[i].first << " = " << table2[i].second << ";" << std::endl; } OS << " if (isAlphaTested && Out.ocol0.a <= alphaRef) discard;" << std::endl; OS << " return Out;" << std::endl; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 655ba0622f..4fe0e4734e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -91,7 +91,7 @@ struct D3D12Traits static void RecompileFragmentProgram(RSXFragmentProgram *RSXFP, FragmentProgramData& fragmentProgramData, size_t ID) { - D3D12FragmentDecompiler FS(RSXFP->addr, RSXFP->size, RSXFP->offset); + D3D12FragmentDecompiler FS(RSXFP->addr, RSXFP->size, RSXFP->ctrl); const std::string &shader = FS.Decompile(); fragmentProgramData.Compile(shader, Shader::SHADER_TYPE::SHADER_TYPE_FRAGMENT); fragmentProgramData.m_textureCount = 0; From cb0ebad210479232411de60818ca70b044e105a8 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 27 Jun 2015 23:21:11 +0200 Subject: [PATCH 315/343] d3d12: Fix binding of tex/sampler --- rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index a99747f81d..13a97cda55 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -97,13 +97,14 @@ void D3D12FragmentDecompiler::insertConstants(std::stringstream & OS) OS << " " << PT.type << " " << PI.name << ";" << std::endl; } OS << "};" << std::endl << std::endl; - size_t textureIndex = 0; + for (ParamType PT : m_parr.params[PF_PARAM_UNIFORM]) { if (PT.type != "sampler2D") continue; for (ParamItem PI : PT.items) { + size_t textureIndex = atoi(PI.name.data() + 3); OS << "Texture2D " << PI.name << " : register(t" << textureIndex << ");" << std::endl; OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl; textureIndex++; From 2cd035d53037088d7c1cb29cfa91ffbf5551d500 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 28 Jun 2015 00:24:23 +0200 Subject: [PATCH 316/343] d3d12: Fix A1R5G5B5 endianness Fix color in Retro city rampage --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 3dfa45a82f..4cf68fb251 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -566,6 +566,7 @@ ID3D12Resource *uploadSingleTexture( mipInfos = writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap()); break; } + case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: { From 428d66598d5827b49477c419922b06fea6ff4165 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 28 Jun 2015 17:38:52 +0200 Subject: [PATCH 317/343] d3d12: Move util shader creation in another file --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 237 +----------------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp | 248 ++++++++++++++++++++++++++ rpcs3/emucore.vcxproj | 1 + rpcs3/emucore.vcxproj.filters | 3 + 5 files changed, 254 insertions(+), 236 deletions(-) create mode 100644 rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 8ac0a0d2a0..dc0443f9ef 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -137,224 +137,6 @@ void D3D12GSRender::ResourceStorage::Release() m_frameFinishedFence->Release(); } -// 32 bits float to U8 unorm CS -#define STRINGIFY(x) #x - -/** - * returns bytecode and root signature of a Compute Shader converting texture from - * one format to another - */ -static -std::pair compileF32toU8CS() -{ - const char *shaderCode = STRINGIFY( - Texture2D InputTexture : register(t0); \n - RWTexture2D OutputTexture : register(u0);\n - - [numthreads(8, 8, 1)]\n - void main(uint3 Id : SV_DispatchThreadID)\n - { \n - OutputTexture[Id.xy] = InputTexture.Load(uint3(Id.xy, 0));\n - } - ); - - ID3DBlob *bytecode; - Microsoft::WRL::ComPtr errorBlob; - HRESULT hr = D3DCompile(shaderCode, strlen(shaderCode), "test", nullptr, nullptr, "main", "cs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); - if (hr != S_OK) - { - const char *tmp = (const char*)errorBlob->GetBufferPointer(); - LOG_ERROR(RSX, tmp); - } - D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {}; - // Textures - descriptorRange[0].BaseShaderRegister = 0; - descriptorRange[0].NumDescriptors = 1; - descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - descriptorRange[1].BaseShaderRegister = 0; - descriptorRange[1].NumDescriptors = 1; - descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - descriptorRange[1].OffsetInDescriptorsFromTableStart = 1; - D3D12_ROOT_PARAMETER RP[2] = {}; - RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; - RP[0].DescriptorTable.NumDescriptorRanges = 2; - - D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; - rootSignatureDesc.NumParameters = 1; - rootSignatureDesc.pParameters = RP; - - ID3DBlob *rootSignatureBlob; - - hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); - if (hr != S_OK) - { - const char *tmp = (const char*)errorBlob->GetBufferPointer(); - LOG_ERROR(RSX, tmp); - } - - return std::make_pair(bytecode, rootSignatureBlob); -} - -void D3D12GSRender::Shader::Init(ID3D12Device *device) -{ - const char *fsCode = STRINGIFY( - Texture2D InputTexture : register(t0); \n - sampler bilinearSampler : register(s0); \n - - struct PixelInput \n - { \n - float4 Pos : SV_POSITION; \n - float2 TexCoords : TEXCOORDS0; \n - }; \n - - float4 main(PixelInput In) : SV_TARGET \n - { \n - return InputTexture.Sample(bilinearSampler, In.TexCoords); \n - } - ); - - Microsoft::WRL::ComPtr fsBytecode; - Microsoft::WRL::ComPtr errorBlob; - HRESULT hr = D3DCompile(fsCode, strlen(fsCode), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &fsBytecode, errorBlob.GetAddressOf()); - if (hr != S_OK) - { - const char *tmp = (const char*)errorBlob->GetBufferPointer(); - LOG_ERROR(RSX, tmp); - } - - const char *vsCode = STRINGIFY( - struct VertexInput \n - { \n - float2 Pos : POSITION; \n - float2 TexCoords : TEXCOORDS0; \n - }; \n - - struct PixelInput \n - { \n - float4 Pos : SV_POSITION; \n - float2 TexCoords : TEXCOORDS0; \n - }; \n - - PixelInput main(VertexInput In) \n - { \n - PixelInput Out; \n - Out.Pos = float4(In.Pos, 0., 1.); \n - Out.TexCoords = In.TexCoords; \n - return Out; \n - } - ); - - Microsoft::WRL::ComPtr vsBytecode; - hr = D3DCompile(vsCode, strlen(vsCode), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &vsBytecode, errorBlob.GetAddressOf()); - if (hr != S_OK) - { - const char *tmp = (const char*)errorBlob->GetBufferPointer(); - LOG_ERROR(RSX, tmp); - } - - D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; - psoDesc.PS.BytecodeLength = fsBytecode->GetBufferSize(); - psoDesc.PS.pShaderBytecode = fsBytecode->GetBufferPointer(); - psoDesc.VS.BytecodeLength = vsBytecode->GetBufferSize(); - psoDesc.VS.pShaderBytecode = vsBytecode->GetBufferPointer(); - psoDesc.NumRenderTargets = 1; - psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; - psoDesc.SampleDesc.Count = 1; - psoDesc.SampleMask = UINT_MAX; - - D3D12_INPUT_ELEMENT_DESC IADesc[2] = {}; - IADesc[0].SemanticName = "POSITION"; - IADesc[0].Format = DXGI_FORMAT_R32G32_FLOAT; - IADesc[1].SemanticName = "TEXCOORDS"; - IADesc[1].Format = DXGI_FORMAT_R32G32_FLOAT; - IADesc[1].AlignedByteOffset = 2 * sizeof(float); - - psoDesc.InputLayout.NumElements = 2; - psoDesc.InputLayout.pInputElementDescs = IADesc; - - psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; - psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; - - D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {}; - // Textures - descriptorRange[0].BaseShaderRegister = 0; - descriptorRange[0].NumDescriptors = 1; - descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - descriptorRange[1].BaseShaderRegister = 0; - descriptorRange[1].NumDescriptors = 1; - descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - D3D12_ROOT_PARAMETER RP[2] = {}; - RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; - RP[0].DescriptorTable.NumDescriptorRanges = 1; - RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1]; - RP[1].DescriptorTable.NumDescriptorRanges = 1; - - D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; - rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - rootSignatureDesc.NumParameters = 2; - rootSignatureDesc.pParameters = RP; - - Microsoft::WRL::ComPtr rootSignatureBlob; - - hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); - if (hr != S_OK) - { - const char *tmp = (const char*)errorBlob->GetBufferPointer(); - LOG_ERROR(RSX, tmp); - } - - hr = device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)); - - psoDesc.pRootSignature = m_rootSignature; - psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; - - check(device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_PSO))); - - - float quadVertex[16] = { - -1., -1., 0., 1., - -1., 1., 0., 0., - 1., -1., 1., 1., - 1., 1., 1., 0., - }; - - D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; - check( - device->CreateCommittedResource( - &heapProp, - D3D12_HEAP_FLAG_NONE, - &getBufferResourceDesc(16 * sizeof(float)), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_vertexBuffer) - )); - - void *tmp; - m_vertexBuffer->Map(0, nullptr, &tmp); - memcpy(tmp, quadVertex, 16 * sizeof(float)); - m_vertexBuffer->Unmap(0, nullptr); - - D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; - heapDesc.NumDescriptors = 2; - heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - - check( - device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_textureDescriptorHeap)) - ); - heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; - check( - device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap)) - ); -} void D3D12GSRender::Shader::Release() { @@ -523,24 +305,7 @@ D3D12GSRender::D3D12GSRender() m_perFrameStorage[1].Init(m_device); m_perFrameStorage[1].Reset(); - // Convert shader - const auto &p = compileF32toU8CS(); - check( - m_device->CreateRootSignature(0, p.second->GetBufferPointer(), p.second->GetBufferSize(), IID_PPV_ARGS(&m_convertRootSignature)) - ); - - D3D12_COMPUTE_PIPELINE_STATE_DESC computePipelineStateDesc = {}; - computePipelineStateDesc.CS.BytecodeLength = p.first->GetBufferSize(); - computePipelineStateDesc.CS.pShaderBytecode = p.first->GetBufferPointer(); - computePipelineStateDesc.pRootSignature = m_convertRootSignature; - - check( - m_device->CreateComputePipelineState(&computePipelineStateDesc, IID_PPV_ARGS(&m_convertPSO)) - ); - - p.first->Release(); - p.second->Release(); - + initConvertShader(); m_outputScalingPass.Init(m_device); D3D12_HEAP_PROPERTIES hp = {}; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 7080399a00..4235426c0f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -284,6 +284,7 @@ private: */ ID3D12PipelineState *m_convertPSO; ID3D12RootSignature *m_convertRootSignature; + void initConvertShader(); /** diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp new file mode 100644 index 0000000000..1e51d30ab1 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp @@ -0,0 +1,248 @@ +/** +* Contains utility shaders +*/ +#include "stdafx.h" +#if defined(DX12_SUPPORT) +#include "D3D12GSRender.h" +#include +#define STRINGIFY(x) #x + +extern PFN_D3D12_SERIALIZE_ROOT_SIGNATURE wrapD3D12SerializeRootSignature; + + /** + * returns bytecode and root signature of a Compute Shader converting texture from + * one format to another + */ +static +std::pair compileF32toU8CS() +{ + const char *shaderCode = STRINGIFY( + Texture2D InputTexture : register(t0); \n + RWTexture2D OutputTexture : register(u0);\n + + [numthreads(8, 8, 1)]\n + void main(uint3 Id : SV_DispatchThreadID)\n + { \n + OutputTexture[Id.xy] = InputTexture.Load(uint3(Id.xy, 0));\n + } + ); + + ID3DBlob *bytecode; + Microsoft::WRL::ComPtr errorBlob; + HRESULT hr = D3DCompile(shaderCode, strlen(shaderCode), "test", nullptr, nullptr, "main", "cs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } + D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {}; + // Textures + descriptorRange[0].BaseShaderRegister = 0; + descriptorRange[0].NumDescriptors = 1; + descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + descriptorRange[1].BaseShaderRegister = 0; + descriptorRange[1].NumDescriptors = 1; + descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + descriptorRange[1].OffsetInDescriptorsFromTableStart = 1; + D3D12_ROOT_PARAMETER RP[2] = {}; + RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; + RP[0].DescriptorTable.NumDescriptorRanges = 2; + + D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; + rootSignatureDesc.NumParameters = 1; + rootSignatureDesc.pParameters = RP; + + ID3DBlob *rootSignatureBlob; + + hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } + + return std::make_pair(bytecode, rootSignatureBlob); +} + + +void D3D12GSRender::Shader::Init(ID3D12Device *device) +{ + const char *fsCode = STRINGIFY( + Texture2D InputTexture : register(t0); \n + sampler bilinearSampler : register(s0); \n + + struct PixelInput \n + { \n + float4 Pos : SV_POSITION; \n + float2 TexCoords : TEXCOORDS0; \n + }; \n + + float4 main(PixelInput In) : SV_TARGET \n + { \n + return InputTexture.Sample(bilinearSampler, In.TexCoords); \n + } + ); + + Microsoft::WRL::ComPtr fsBytecode; + Microsoft::WRL::ComPtr errorBlob; + HRESULT hr = D3DCompile(fsCode, strlen(fsCode), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &fsBytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } + + const char *vsCode = STRINGIFY( + struct VertexInput \n + { \n + float2 Pos : POSITION; \n + float2 TexCoords : TEXCOORDS0; \n + }; \n + + struct PixelInput \n + { \n + float4 Pos : SV_POSITION; \n + float2 TexCoords : TEXCOORDS0; \n + }; \n + + PixelInput main(VertexInput In) \n + { \n + PixelInput Out; \n + Out.Pos = float4(In.Pos, 0., 1.); \n + Out.TexCoords = In.TexCoords; \n + return Out; \n + } + ); + + Microsoft::WRL::ComPtr vsBytecode; + hr = D3DCompile(vsCode, strlen(vsCode), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &vsBytecode, errorBlob.GetAddressOf()); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } + + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.PS.BytecodeLength = fsBytecode->GetBufferSize(); + psoDesc.PS.pShaderBytecode = fsBytecode->GetBufferPointer(); + psoDesc.VS.BytecodeLength = vsBytecode->GetBufferSize(); + psoDesc.VS.pShaderBytecode = vsBytecode->GetBufferPointer(); + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + psoDesc.SampleDesc.Count = 1; + psoDesc.SampleMask = UINT_MAX; + + D3D12_INPUT_ELEMENT_DESC IADesc[2] = {}; + IADesc[0].SemanticName = "POSITION"; + IADesc[0].Format = DXGI_FORMAT_R32G32_FLOAT; + IADesc[1].SemanticName = "TEXCOORDS"; + IADesc[1].Format = DXGI_FORMAT_R32G32_FLOAT; + IADesc[1].AlignedByteOffset = 2 * sizeof(float); + + psoDesc.InputLayout.NumElements = 2; + psoDesc.InputLayout.pInputElementDescs = IADesc; + + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + + D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {}; + // Textures + descriptorRange[0].BaseShaderRegister = 0; + descriptorRange[0].NumDescriptors = 1; + descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + descriptorRange[1].BaseShaderRegister = 0; + descriptorRange[1].NumDescriptors = 1; + descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + D3D12_ROOT_PARAMETER RP[2] = {}; + RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; + RP[0].DescriptorTable.NumDescriptorRanges = 1; + RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1]; + RP[1].DescriptorTable.NumDescriptorRanges = 1; + + D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; + rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + rootSignatureDesc.NumParameters = 2; + rootSignatureDesc.pParameters = RP; + + Microsoft::WRL::ComPtr rootSignatureBlob; + + hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); + if (hr != S_OK) + { + const char *tmp = (const char*)errorBlob->GetBufferPointer(); + LOG_ERROR(RSX, tmp); + } + + hr = device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)); + + psoDesc.pRootSignature = m_rootSignature; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + + check(device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_PSO))); + + + float quadVertex[16] = { + -1., -1., 0., 1., + -1., 1., 0., 0., + 1., -1., 1., 1., + 1., 1., 1., 0., + }; + + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_UPLOAD; + check( + device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &getBufferResourceDesc(16 * sizeof(float)), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_vertexBuffer) + )); + + void *tmp; + m_vertexBuffer->Map(0, nullptr, &tmp); + memcpy(tmp, quadVertex, 16 * sizeof(float)); + m_vertexBuffer->Unmap(0, nullptr); + + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; + heapDesc.NumDescriptors = 2; + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + + check( + device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_textureDescriptorHeap)) + ); + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + check( + device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap)) + ); +} + +void D3D12GSRender::initConvertShader() +{ + const auto &p = compileF32toU8CS(); + check( + m_device->CreateRootSignature(0, p.second->GetBufferPointer(), p.second->GetBufferSize(), IID_PPV_ARGS(&m_convertRootSignature)) + ); + + D3D12_COMPUTE_PIPELINE_STATE_DESC computePipelineStateDesc = {}; + computePipelineStateDesc.CS.BytecodeLength = p.first->GetBufferSize(); + computePipelineStateDesc.CS.pShaderBytecode = p.first->GetBufferPointer(); + computePipelineStateDesc.pRootSignature = m_convertRootSignature; + + check( + m_device->CreateComputePipelineState(&computePipelineStateDesc, IID_PPV_ARGS(&m_convertPSO)) + ); + + p.first->Release(); + p.second->Release(); +} +#endif \ No newline at end of file diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 2612992e5e..ca795a3025 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -47,6 +47,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 1895f11773..ee8cda35ba 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -992,6 +992,9 @@ Emu\GPU\RSX\D3D12 + + Emu\GPU\RSX\D3D12 + From 6f0c74cf767ccba5d0a45c476822171186c54c8b Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 28 Jun 2015 17:51:29 +0200 Subject: [PATCH 318/343] d3d12: Fix crash with write color/depth buffer enabled --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 61 ++++++++++++++++----------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index dc0443f9ef..71b4402388 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -149,34 +149,40 @@ void D3D12GSRender::Shader::Release() extern std::function gfxHandler; +bool D3D12GSRender::invalidateTexture(u32 addr) +{ + bool handled = false; + auto It = m_protectedTextures.begin(), E = m_protectedTextures.end(); + for (; It != E;) + { + auto currentIt = It; + ++It; + auto protectedTexture = *currentIt; + u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture); + if (addr - protectedRangeStart < protectedRangeSize) + { + std::lock_guard lock(mut); + u32 texadrr = std::get<0>(protectedTexture); + ID3D12Resource *texToErase = m_texturesCache[texadrr]; + m_texturesCache.erase(texadrr); + m_texToClean.push_back(texToErase); + + vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0); + m_protectedTextures.erase(currentIt); + handled = true; + } + } + return handled; +} + D3D12GSRender::D3D12GSRender() : GSRender(), m_PSO(nullptr) { - gfxHandler = [this](u32 addr) { - bool handled = false; - auto It = m_protectedTextures.begin(), E = m_protectedTextures.end(); - for (; It != E;) - { - auto currentIt = It; - ++It; - auto protectedTexture = *currentIt; - u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture); - if (addr - protectedRangeStart < protectedRangeSize) - { - std::lock_guard lock(mut); - u32 texadrr = std::get<0>(protectedTexture); - LOG_WARNING(RSX, "Modified %x, starting again", texadrr); - ID3D12Resource *texToErase = m_texturesCache[texadrr]; - m_texturesCache.erase(texadrr); - m_texToClean.push_back(texToErase); - - vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0); - m_protectedTextures.erase(currentIt); - handled = true; - } - } - return handled; + bool result = invalidateTexture(addr); + if (result) + LOG_WARNING(RSX, "Reporting Cell writing to %x", addr); + return result; }; loadD3D12FunctionPointers(); if (Ini.GSDebugOutputEnable.GetValue()) @@ -1072,6 +1078,8 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) dst.PlacedFootprint.Footprint.Width = m_surface_clip_w; dst.PlacedFootprint.Footprint.RowPitch = (UINT)depthRowPitch; downloadCommandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + + invalidateTexture(GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000)); } ID3D12Resource *rtt0, *rtt1, *rtt2, *rtt3; @@ -1108,6 +1116,11 @@ void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) if (m_context_dma_color_d) rtt3 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[3], downloadCommandList); break; } + + invalidateTexture(GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000)); + invalidateTexture(GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000)); + invalidateTexture(GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000)); + invalidateTexture(GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000)); } if (needTransfer) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 4235426c0f..76c09dff3d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -237,6 +237,7 @@ private: std::mutex mut; std::list > m_protectedTextures; // Texaddress, start of protected range, size of protected range std::vector m_texToClean; + bool invalidateTexture(u32 addr); GarbageCollectionThread m_GC; // Copy of RTT to be used as texture From bf394d4f56ecaa22d44b5eccf440662bfe26d9a9 Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 28 Jun 2015 22:35:13 +0200 Subject: [PATCH 319/343] d3d12: Support triangle fan Fix missing character members in the guided fate paradox --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 49 ++++++++++++++++++++------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 5 +-- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index e78b22029c..40ce69b5f6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -313,7 +313,7 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G result.first.push_back(vertexBufferView); } - // Only handle quads now + // Only handle quads and triangle fan now switch (m_draw_mode - 1) { default: @@ -323,11 +323,11 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G case GL_LINE_STRIP: case GL_TRIANGLES: case GL_TRIANGLE_STRIP: - case GL_TRIANGLE_FAN: case GL_QUAD_STRIP: case GL_POLYGON: m_forcedIndexBuffer = false; break; + case GL_TRIANGLE_FAN: case GL_QUADS: m_forcedIndexBuffer = true; break; @@ -364,7 +364,17 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G else if (indexed_draw && m_forcedIndexBuffer) indexCount = 6 * m_indexed_array.m_data.size() / (4 * indexSize); else - indexCount = m_draw_array_count * 6 / 4; + { + switch (m_draw_mode - 1) + { + case GL_TRIANGLE_FAN: + indexCount = (m_draw_array_count - 2) * 3; + break; + case GL_QUADS: + indexCount = m_draw_array_count * 6 / 4; + break; + } + } size_t subBufferSize = align(indexCount * indexSize, 64); assert(m_vertexIndexData.canAlloc(subBufferSize)); @@ -386,6 +396,7 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G streamBuffer(bufferMap, m_indexed_array.m_data.data(), subBufferSize); else if (indexed_draw && m_forcedIndexBuffer) { + // Only quads supported now switch (m_indexed_array.m_type) { case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: @@ -399,17 +410,31 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G else { unsigned short *typedDst = static_cast(bufferMap); - for (unsigned i = 0; i < m_draw_array_count / 4; i++) + switch (m_draw_mode - 1) { - // First triangle - typedDst[6 * i] = 4 * i; - typedDst[6 * i + 1] = 4 * i + 1; - typedDst[6 * i + 2] = 4 * i + 2; - // Second triangle - typedDst[6 * i + 3] = 4 * i + 2; - typedDst[6 * i + 4] = 4 * i + 3; - typedDst[6 * i + 5] = 4 * i; + case GL_TRIANGLE_FAN: + for (unsigned i = 0; i < (m_draw_array_count - 2); i++) + { + typedDst[3 * i] = 0; + typedDst[3 * i + 1] = i + 2 - 1; + typedDst[3 * i + 2] = i + 2; + } + break; + case GL_QUADS: + for (unsigned i = 0; i < m_draw_array_count / 4; i++) + { + // First triangle + typedDst[6 * i] = 4 * i; + typedDst[6 * i + 1] = 4 * i + 1; + typedDst[6 * i + 2] = 4 * i + 2; + // Second triangle + typedDst[6 * i + 3] = 4 * i + 2; + typedDst[6 * i + 4] = 4 * i + 3; + typedDst[6 * i + 5] = 4 * i; + } + break; } + } indexBuffer->Unmap(0, nullptr); m_vertexIndexData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, subBufferSize, indexBuffer)); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 71b4402388..644b3cab6a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -664,7 +664,8 @@ void D3D12GSRender::ExecCMD() commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); break; case GL_TRIANGLE_FAN: - commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ); + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + requireIndexBuffer = true; break; case GL_QUADS: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); @@ -680,7 +681,7 @@ void D3D12GSRender::ExecCMD() // Indexed quad if (m_forcedIndexBuffer && m_indexed_array.m_count) commandList->DrawIndexedInstanced((UINT)indexCount, 1, 0, 0, 0); - // Non indexed quad + // Non indexed quad/triangle fan else if (m_forcedIndexBuffer && !m_indexed_array.m_count) commandList->DrawIndexedInstanced((UINT)indexCount, 1, 0, (UINT)m_draw_array_first, 0); // Indexed triangles From 91809c09d45859342edb62387a98d1e404df40c8 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 00:43:25 +0200 Subject: [PATCH 320/343] d3d12: use memcpy instead of streamToBuffer for alpha settings streamToBuffer doesn't work well for data < 128 bits --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 40ce69b5f6..c302ba8e0f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -481,8 +481,8 @@ void D3D12GSRender::setScaleOffset() check(m_constantsData.m_heap->Map(0, &range, &scaleOffsetMap)); streamToBuffer((char*)scaleOffsetMap + heapOffset, scaleOffsetMat, 16 * sizeof(float)); int isAlphaTested = m_set_alpha_test; - streamToBuffer((char*)scaleOffsetMap + heapOffset + 16 * sizeof(float), &isAlphaTested, sizeof(int)); - streamToBuffer((char*)scaleOffsetMap + heapOffset + 17 * sizeof(float), &m_alpha_ref, sizeof(float)); + memcpy((char*)scaleOffsetMap + heapOffset + 16 * sizeof(float), &isAlphaTested, sizeof(int)); + memcpy((char*)scaleOffsetMap + heapOffset + 17 * sizeof(float), &m_alpha_ref, sizeof(float)); m_constantsData.m_heap->Unmap(0, &range); D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; From 1f3fbe91e269f17414d67a69f38f4e4511733885 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 01:28:54 +0200 Subject: [PATCH 321/343] d3d12: Don't call GetAddress if context_dma is not set --- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 7faab5c99b..f131d0b102 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -15,11 +15,11 @@ void D3D12GSRender::PrepareRenderTargets() { // FBO location has changed, previous data might be copied - u32 address_a = GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000); - u32 address_b = GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000); - u32 address_c = GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000); - u32 address_d = GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000); - u32 address_z = GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000); + u32 address_a = m_set_context_dma_color_a ? GetAddress(m_surface_offset_a, m_context_dma_color_a - 0xfeed0000) : 0; + u32 address_b = m_set_context_dma_color_b ? GetAddress(m_surface_offset_b, m_context_dma_color_b - 0xfeed0000) : 0; + u32 address_c = m_set_context_dma_color_c ? GetAddress(m_surface_offset_c, m_context_dma_color_c - 0xfeed0000) : 0; + u32 address_d = m_set_context_dma_color_d ? GetAddress(m_surface_offset_d, m_context_dma_color_d - 0xfeed0000) : 0; + u32 address_z = m_set_context_dma_z ? GetAddress(m_surface_offset_z, m_context_dma_z - 0xfeed0000) : 0; ID3D12GraphicsCommandList *copycmdlist; check(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(©cmdlist))); From 8801abb93a5d72e70449d077bf0ae4b48e6d8ee8 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 19:20:18 +0200 Subject: [PATCH 322/343] d3d12: Refactor index management Separates it from vertex management and move all the logic (forced indexing, index count calculation...) outside of GSRender. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 241 ++++++++++++++------------ rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 34 ++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 11 +- 3 files changed, 151 insertions(+), 135 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index c302ba8e0f..db8e577d13 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -275,9 +275,9 @@ isContained(const std::vector > &ranges, const std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw) +std::vector D3D12GSRender::UploadVertexBuffers(bool indexed_draw) { - std::pair, D3D12_INDEX_BUFFER_VIEW> result; + std::vector result; const std::vector &vertexBufferFormat = FormatVertexData(m_vertex_data); m_IASet = getIALayout(m_device, vertexBufferFormat, m_vertex_data); @@ -310,10 +310,17 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G vertexBufferView.BufferLocation = vertexBuffer->GetGPUVirtualAddress(); vertexBufferView.SizeInBytes = (UINT)subBufferSize; vertexBufferView.StrideInBytes = (UINT)vbf.stride; - result.first.push_back(vertexBufferView); + result.push_back(vertexBufferView); } + return result; +} + +D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw) +{ + D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; // Only handle quads and triangle fan now + bool forcedIndexBuffer = false; switch (m_draw_mode - 1) { default: @@ -325,126 +332,140 @@ std::pair, D3D12_INDEX_BUFFER_VIEW> D3D12G case GL_TRIANGLE_STRIP: case GL_QUAD_STRIP: case GL_POLYGON: - m_forcedIndexBuffer = false; + forcedIndexBuffer = false; break; case GL_TRIANGLE_FAN: case GL_QUADS: - m_forcedIndexBuffer = true; + forcedIndexBuffer = true; break; } - if (indexed_draw || m_forcedIndexBuffer) + // No need for index buffer + if (!indexed_draw && !forcedIndexBuffer) { - D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; - size_t indexSize; + m_renderingInfo.m_indexed = false; + m_renderingInfo.m_count = m_draw_array_count; + m_renderingInfo.m_baseVertex = m_draw_array_first; + return indexBufferView; + } - if (!indexed_draw) + m_renderingInfo.m_indexed = true; + + // Index type + size_t indexSize; + if (!indexed_draw) + { + indexBufferView.Format = DXGI_FORMAT_R16_UINT; + indexSize = 2; + } + else + { + switch (m_indexed_array.m_type) { + default: abort(); + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: indexBufferView.Format = DXGI_FORMAT_R16_UINT; indexSize = 2; + break; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + indexBufferView.Format = DXGI_FORMAT_R32_UINT; + indexSize = 4; + break; } - else - { - switch (m_indexed_array.m_type) - { - default: abort(); - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: - indexBufferView.Format = DXGI_FORMAT_R16_UINT; - indexSize = 2; - break; - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - indexBufferView.Format = DXGI_FORMAT_R32_UINT; - indexSize = 4; - break; - } - } - - if (indexed_draw && !m_forcedIndexBuffer) - indexCount = m_indexed_array.m_data.size() / indexSize; - else if (indexed_draw && m_forcedIndexBuffer) - indexCount = 6 * m_indexed_array.m_data.size() / (4 * indexSize); - else - { - switch (m_draw_mode - 1) - { - case GL_TRIANGLE_FAN: - indexCount = (m_draw_array_count - 2) * 3; - break; - case GL_QUADS: - indexCount = m_draw_array_count * 6 / 4; - break; - } - } - size_t subBufferSize = align(indexCount * indexSize, 64); - - assert(m_vertexIndexData.canAlloc(subBufferSize)); - size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); - - ID3D12Resource *indexBuffer; - check(m_device->CreatePlacedResource( - m_vertexIndexData.m_heap, - heapOffset, - &getBufferResourceDesc(subBufferSize), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&indexBuffer) - )); - - void *bufferMap; - check(indexBuffer->Map(0, nullptr, (void**)&bufferMap)); - if (indexed_draw && !m_forcedIndexBuffer) - streamBuffer(bufferMap, m_indexed_array.m_data.data(), subBufferSize); - else if (indexed_draw && m_forcedIndexBuffer) - { - // Only quads supported now - switch (m_indexed_array.m_type) - { - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - expandIndexedQuads(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 4); - break; - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: - expandIndexedQuads(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 2); - break; - } - } - else - { - unsigned short *typedDst = static_cast(bufferMap); - switch (m_draw_mode - 1) - { - case GL_TRIANGLE_FAN: - for (unsigned i = 0; i < (m_draw_array_count - 2); i++) - { - typedDst[3 * i] = 0; - typedDst[3 * i + 1] = i + 2 - 1; - typedDst[3 * i + 2] = i + 2; - } - break; - case GL_QUADS: - for (unsigned i = 0; i < m_draw_array_count / 4; i++) - { - // First triangle - typedDst[6 * i] = 4 * i; - typedDst[6 * i + 1] = 4 * i + 1; - typedDst[6 * i + 2] = 4 * i + 2; - // Second triangle - typedDst[6 * i + 3] = 4 * i + 2; - typedDst[6 * i + 4] = 4 * i + 3; - typedDst[6 * i + 5] = 4 * i; - } - break; - } - - } - indexBuffer->Unmap(0, nullptr); - m_vertexIndexData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, subBufferSize, indexBuffer)); - - indexBufferView.SizeInBytes = (UINT)subBufferSize; - indexBufferView.BufferLocation = indexBuffer->GetGPUVirtualAddress(); - - result.second = indexBufferView; } - return result; + + // Index count + if (indexed_draw && !forcedIndexBuffer) + m_renderingInfo.m_count = m_indexed_array.m_data.size() / indexSize; + else if (indexed_draw && forcedIndexBuffer) + m_renderingInfo.m_count = 6 * m_indexed_array.m_data.size() / (4 * indexSize); + else + { + switch (m_draw_mode - 1) + { + case GL_TRIANGLE_FAN: + m_renderingInfo.m_count = (m_draw_array_count - 2) * 3; + break; + case GL_QUADS: + m_renderingInfo.m_count = m_draw_array_count * 6 / 4; + break; + } + } + + // Base vertex + if (!indexed_draw && forcedIndexBuffer) + m_renderingInfo.m_baseVertex = m_draw_array_first; + else + m_renderingInfo.m_baseVertex = 0; + + // Alloc + size_t subBufferSize = align(m_renderingInfo.m_count * indexSize, 64); + + assert(m_vertexIndexData.canAlloc(subBufferSize)); + size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); + + ID3D12Resource *indexBuffer; + check(m_device->CreatePlacedResource( + m_vertexIndexData.m_heap, + heapOffset, + &getBufferResourceDesc(subBufferSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&indexBuffer) + )); + + void *bufferMap; + check(indexBuffer->Map(0, nullptr, (void**)&bufferMap)); + if (indexed_draw && !forcedIndexBuffer) + streamBuffer(bufferMap, m_indexed_array.m_data.data(), subBufferSize); + else if (indexed_draw && forcedIndexBuffer) + { + // Only quads supported now + switch (m_indexed_array.m_type) + { + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + expandIndexedQuads(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 4); + break; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + expandIndexedQuads(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 2); + break; + } + } + else + { + unsigned short *typedDst = static_cast(bufferMap); + switch (m_draw_mode - 1) + { + case GL_TRIANGLE_FAN: + for (unsigned i = 0; i < (m_draw_array_count - 2); i++) + { + typedDst[3 * i] = 0; + typedDst[3 * i + 1] = i + 2 - 1; + typedDst[3 * i + 2] = i + 2; + } + break; + case GL_QUADS: + for (unsigned i = 0; i < m_draw_array_count / 4; i++) + { + // First triangle + typedDst[6 * i] = 4 * i; + typedDst[6 * i + 1] = 4 * i + 1; + typedDst[6 * i + 2] = 4 * i + 2; + // Second triangle + typedDst[6 * i + 3] = 4 * i + 2; + typedDst[6 * i + 4] = 4 * i + 3; + typedDst[6 * i + 5] = 4 * i; + } + break; + } + + } + indexBuffer->Unmap(0, nullptr); + m_vertexIndexData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, subBufferSize, indexBuffer)); + + indexBufferView.SizeInBytes = (UINT)subBufferSize; + indexBufferView.BufferLocation = indexBuffer->GetGPUVirtualAddress(); + return indexBufferView; } void D3D12GSRender::setScaleOffset() diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 644b3cab6a..92b282bbe4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -508,10 +508,11 @@ void D3D12GSRender::ExecCMD() std::chrono::time_point startVertexTime = std::chrono::system_clock::now(); if (m_indexed_array.m_count || m_draw_array_count) { - const std::pair, D3D12_INDEX_BUFFER_VIEW> &vertexIndexBufferViews = UploadVertexBuffers(m_indexed_array.m_count ? true : false); - commandList->IASetVertexBuffers(0, (UINT)vertexIndexBufferViews.first.size(), vertexIndexBufferViews.first.data()); - if (m_forcedIndexBuffer || m_indexed_array.m_count) - commandList->IASetIndexBuffer(&vertexIndexBufferViews.second); + const std::vector &vertexBufferViews = UploadVertexBuffers(m_indexed_array.m_count ? true : false); + const D3D12_INDEX_BUFFER_VIEW &indexBufferView = uploadIndexBuffers(m_indexed_array.m_count ? true : false); + commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data()); + if (m_renderingInfo.m_indexed) + commandList->IASetIndexBuffer(&indexBufferView); } std::chrono::time_point endVertexTime = std::chrono::system_clock::now(); m_timers.m_vertexUploadDuration += std::chrono::duration_cast(endVertexTime - startVertexTime).count(); @@ -635,14 +636,13 @@ void D3D12GSRender::ExecCMD() D3D12_RECT box = { - 0, + 0, 0, (LONG)m_surface_clip_w, (LONG)m_surface_clip_h, }; commandList->RSSetScissorRects(1, &box); - bool requireIndexBuffer = false; switch (m_draw_mode - 1) { case GL_POINTS: @@ -664,31 +664,21 @@ void D3D12GSRender::ExecCMD() commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); break; case GL_TRIANGLE_FAN: - commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - requireIndexBuffer = true; - break; case GL_QUADS: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - requireIndexBuffer = true; + break; case GL_QUAD_STRIP: case GL_POLYGON: default: commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); -// LOG_ERROR(RSX, "Unsupported primitive type"); + LOG_ERROR(RSX, "Unsupported primitive type"); break; } - // Indexed quad - if (m_forcedIndexBuffer && m_indexed_array.m_count) - commandList->DrawIndexedInstanced((UINT)indexCount, 1, 0, 0, 0); - // Non indexed quad/triangle fan - else if (m_forcedIndexBuffer && !m_indexed_array.m_count) - commandList->DrawIndexedInstanced((UINT)indexCount, 1, 0, (UINT)m_draw_array_first, 0); - // Indexed triangles - else if (m_indexed_array.m_count) - commandList->DrawIndexedInstanced((UINT)m_indexed_array.m_data.size() / ((m_indexed_array.m_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? 2 : 4), 1, 0, 0, 0); - else if (m_draw_array_count) - commandList->DrawInstanced(m_draw_array_count, 1, m_draw_array_first, 0); + if (m_renderingInfo.m_indexed) + commandList->DrawIndexedInstanced((UINT)m_renderingInfo.m_count, 1, 0, m_renderingInfo.m_baseVertex, 0); + else + commandList->DrawInstanced((UINT)m_renderingInfo.m_count, 1, m_renderingInfo.m_baseVertex, 0); check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 76c09dff3d..5d5dcbd60b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -331,8 +331,12 @@ private: DataHeap m_UAVHeap; DataHeap m_readbackResources; - bool m_forcedIndexBuffer; - size_t indexCount; + struct + { + bool m_indexed; + size_t m_count; + size_t m_baseVertex; + } m_renderingInfo; RenderTargets m_rtts; @@ -372,7 +376,8 @@ private: virtual void Close() override; bool LoadProgram(); - std::pair, D3D12_INDEX_BUFFER_VIEW> UploadVertexBuffers(bool indexed_draw = false); + std::vector UploadVertexBuffers(bool indexed_draw = false); + D3D12_INDEX_BUFFER_VIEW uploadIndexBuffers(bool indexed_draw = false); void setScaleOffset(); void FillVertexShaderConstantsBuffer(); void FillPixelShaderConstantsBuffer(); From 612d169b780cc3b597771b674ed8bf9f57eb68e4 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 19:31:11 +0200 Subject: [PATCH 323/343] d3d12: Add some comments --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 5d5dcbd60b..62532cc876 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -376,8 +376,22 @@ private: virtual void Close() override; bool LoadProgram(); + + /** + * Create as little vertex buffer as possible to hold all vertex info (in upload heap), + * create corresponding IA layout that can be used for load program and + * returns a vector of vertex buffer view that can be passed to IASetVertexBufferView(). + */ std::vector UploadVertexBuffers(bool indexed_draw = false); + + /** + * Create index buffer for indexed rendering and non native primitive format if nedded, and + * update m_renderingInfo member accordingly. If m_renderingInfo::m_indexed is true, + * returns an index buffer view that can be passed to a command list. + */ D3D12_INDEX_BUFFER_VIEW uploadIndexBuffers(bool indexed_draw = false); + + void setScaleOffset(); void FillVertexShaderConstantsBuffer(); void FillPixelShaderConstantsBuffer(); From ff219c6035b14e41c4a60b67455c850b780637e6 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 19:40:22 +0200 Subject: [PATCH 324/343] d3d12: Factorise sampler desc creation in a separate function --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 -- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 34 ++++++++++++++++------------ 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 62532cc876..ef84d059f8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -400,8 +400,6 @@ private: * returns the number of texture uploaded */ size_t UploadTextures(); - size_t GetMaxAniso(size_t aniso); - D3D12_TEXTURE_ADDRESS_MODE GetWrap(size_t wrap); void PrepareRenderTargets(); protected: diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 4cf68fb251..53f2952b79 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -80,12 +80,9 @@ D3D12_TEXTURE_ADDRESS_MODE getSamplerWrap(size_t wrap) case CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; } - return D3D12_TEXTURE_ADDRESS_MODE_WRAP; } - - static D3D12_FILTER getSamplerFilter(u32 minFilter, u32 magFilter) { @@ -136,6 +133,23 @@ D3D12_FILTER getSamplerFilter(u32 minFilter, u32 magFilter) return D3D12_ENCODE_BASIC_FILTER(min, mag, mip, D3D12_FILTER_REDUCTION_TYPE_STANDARD); } +static +D3D12_SAMPLER_DESC getSamplerDesc(const RSXTexture &texture) +{ + D3D12_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = getSamplerFilter(texture.GetMinFilter(), texture.GetMagFilter()); + samplerDesc.AddressU = getSamplerWrap(texture.GetWrapS()); + samplerDesc.AddressV = getSamplerWrap(texture.GetWrapT()); + samplerDesc.AddressW = getSamplerWrap(texture.GetWrapR()); + samplerDesc.ComparisonFunc = getSamplerCompFunc[texture.GetZfunc()]; + samplerDesc.MaxAnisotropy = (UINT)getSamplerMaxAniso(texture.GetMaxAniso()); + samplerDesc.MipLODBias = texture.GetBias(); + samplerDesc.BorderColor[4] = (FLOAT)texture.GetBorderColor(); + samplerDesc.MinLOD = (FLOAT)(texture.GetMinLOD() >> 8); + samplerDesc.MaxLOD = (FLOAT)(texture.GetMaxLOD() >> 8); + return samplerDesc; +} + struct MipmapLevelInfo { size_t offset; @@ -866,20 +880,10 @@ size_t D3D12GSRender::UploadTextures() Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); - D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = getSamplerFilter(m_textures[i].GetMinFilter(), m_textures[i].GetMagFilter()); - samplerDesc.AddressU = getSamplerWrap(m_textures[i].GetWrapS()); - samplerDesc.AddressV = getSamplerWrap(m_textures[i].GetWrapT()); - samplerDesc.AddressW = getSamplerWrap(m_textures[i].GetWrapR()); - samplerDesc.ComparisonFunc = getSamplerCompFunc[m_textures[i].GetZfunc()]; - samplerDesc.MaxAnisotropy = (UINT)getSamplerMaxAniso(m_textures[i].GetMaxAniso()); - samplerDesc.MipLODBias = m_textures[i].GetBias(); - samplerDesc.BorderColor[4] = (FLOAT)m_textures[i].GetBorderColor(); - samplerDesc.MinLOD = (FLOAT)(m_textures[i].GetMinLOD() >> 8); - samplerDesc.MaxLOD = (FLOAT)(m_textures[i].GetMaxLOD() >> 8); + Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - m_device->CreateSampler(&samplerDesc, Handle); + m_device->CreateSampler(&getSamplerDesc(m_textures[i]), Handle); usedTexture++; } From 0e6cd8cd0ef89ee7f874659e6020d5768232654e Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 19:40:31 +0200 Subject: [PATCH 325/343] d3d12: Fix warnings --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 92b282bbe4..82f0ac82c9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -676,9 +676,9 @@ void D3D12GSRender::ExecCMD() } if (m_renderingInfo.m_indexed) - commandList->DrawIndexedInstanced((UINT)m_renderingInfo.m_count, 1, 0, m_renderingInfo.m_baseVertex, 0); + commandList->DrawIndexedInstanced((UINT)m_renderingInfo.m_count, 1, 0, (UINT)m_renderingInfo.m_baseVertex, 0); else - commandList->DrawInstanced((UINT)m_renderingInfo.m_count, 1, m_renderingInfo.m_baseVertex, 0); + commandList->DrawInstanced((UINT)m_renderingInfo.m_count, 1, (UINT)m_renderingInfo.m_baseVertex, 0); check(commandList->Close()); m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); From 725b0c606d6f25db1c8e4b4efd25e1fdb7e2c9ed Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 19:58:00 +0200 Subject: [PATCH 326/343] d3d12: Add some doc --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index ef84d059f8..b31581515e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -333,9 +333,9 @@ private: struct { - bool m_indexed; - size_t m_count; - size_t m_baseVertex; + bool m_indexed; /* Date: Mon, 29 Jun 2015 20:36:09 +0200 Subject: [PATCH 327/343] d3d12: Support targetless flip Fix PS3Doom --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 232 +++++++++++++++++--------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 + 2 files changed, 155 insertions(+), 79 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 82f0ac82c9..2f1f40afb8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -89,6 +89,7 @@ void D3D12GSRender::ResourceStorage::Reset() void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) { + m_RAMFramebuffer = nullptr; m_frameFinishedHandle = 0; // Create a global command allocator device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)); @@ -685,105 +686,175 @@ void D3D12GSRender::ExecCMD() m_indexed_array.Reset(); } -void D3D12GSRender::Flip() +static bool +isFlipSurfaceInLocalMemory(u32 surfaceColorTarget) { - ID3D12GraphicsCommandList *commandList; - m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); - getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); - - switch (m_surface_color_target) + switch (surfaceColorTarget) { case CELL_GCM_SURFACE_TARGET_0: case CELL_GCM_SURFACE_TARGET_1: case CELL_GCM_SURFACE_TARGET_MRT1: case CELL_GCM_SURFACE_TARGET_MRT2: case CELL_GCM_SURFACE_TARGET_MRT3: + return true; + case CELL_GCM_SURFACE_TARGET_NONE: + default: + return false; + } +} + +void D3D12GSRender::Flip() +{ + ID3D12GraphicsCommandList *commandList; + m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, getCurrentResourceStorage().m_commandAllocator, nullptr, IID_PPV_ARGS(&commandList)); + getCurrentResourceStorage().m_inflightCommandList.push_back(commandList); + + ID3D12Resource *resourceToFlip; + float viewport_w, viewport_h; + + if (!isFlipSurfaceInLocalMemory(m_surface_color_target)) { - D3D12_RESOURCE_BARRIER barriers[2] = {}; - barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[0].Transition.pResource = m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]; - barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; - barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; + ResourceStorage &storage = getCurrentResourceStorage(); + assert(storage.m_RAMFramebuffer == nullptr); - barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[1].Transition.pResource = m_rtts.m_currentlyBoundRenderTargets[0]; - barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; - commandList->ResourceBarrier(2, barriers); + size_t w = 0, h = 0, rowPitch = 0; - D3D12_VIEWPORT viewport = + ID3D12Resource *stagingTexture; + if (m_read_buffer) { - 0.f, - 0.f, - (float)RSXThread::m_width, - (float)RSXThread::m_height, - 0.f, - 1.f - }; - commandList->RSSetViewports(1, &viewport); + CellGcmDisplayInfo* buffers = vm::get_ptr(m_gcm_buffers_addr); + u32 addr = GetAddress(buffers[m_gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL); + w = buffers[m_gcm_current_buffer].width; + h = buffers[m_gcm_current_buffer].height; + u8 *src_buffer = vm::get_ptr(addr); - D3D12_RECT box = - { - 0, - 0, - (LONG)RSXThread::m_width, - (LONG)RSXThread::m_height, - }; - commandList->RSSetScissorRects(1, &box); - commandList->SetGraphicsRootSignature(m_outputScalingPass.m_rootSignature); - commandList->SetPipelineState(m_outputScalingPass.m_PSO); - D3D12_CPU_DESCRIPTOR_HANDLE CPUHandle; - CPUHandle = m_outputScalingPass.m_textureDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - CPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * m_swapChain->GetCurrentBackBufferIndex(); - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - // FIXME: Not always true - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - m_device->CreateShaderResourceView(m_rtts.m_currentlyBoundRenderTargets[0], &srvDesc, CPUHandle); + rowPitch = align(w * 4, 256); + size_t textureSize = rowPitch * h; // * 4 for mipmap levels + assert(m_textureUploadData.canAlloc(textureSize)); + size_t heapOffset = m_textureUploadData.alloc(textureSize); - D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; - samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - CPUHandle = m_outputScalingPass.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - CPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) * m_swapChain->GetCurrentBackBufferIndex(); - m_device->CreateSampler(&samplerDesc, CPUHandle); + check(m_device->CreatePlacedResource( + m_textureUploadData.m_heap, + heapOffset, + &getBufferResourceDesc(textureSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&stagingTexture) + )); + m_textureUploadData.m_resourceStoredSinceLastSync.push_back(std::make_tuple(heapOffset, textureSize, stagingTexture)); - D3D12_GPU_DESCRIPTOR_HANDLE GPUHandle; - GPUHandle = m_outputScalingPass.m_textureDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - GPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * m_swapChain->GetCurrentBackBufferIndex(); - commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_textureDescriptorHeap); - commandList->SetGraphicsRootDescriptorTable(0, GPUHandle); - GPUHandle = m_outputScalingPass.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); - GPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) * m_swapChain->GetCurrentBackBufferIndex(); - commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_samplerDescriptorHeap); - commandList->SetGraphicsRootDescriptorTable(1, GPUHandle); + void *dstBuffer; + check(stagingTexture->Map(0, nullptr, &dstBuffer)); + for (unsigned row = 0; row < h; row++) + memcpy((char*)dstBuffer + row * rowPitch, (char*)src_buffer + row * w * 4, w * 4); + stagingTexture->Unmap(0, nullptr); + } - CPUHandle = m_backbufferAsRendertarget[m_swapChain->GetCurrentBackBufferIndex()]->GetCPUDescriptorHandleForHeapStart(); - commandList->OMSetRenderTargets(1, &CPUHandle, true, nullptr); - D3D12_VERTEX_BUFFER_VIEW vbv = {}; - vbv.BufferLocation = m_outputScalingPass.m_vertexBuffer->GetGPUVirtualAddress(); - vbv.StrideInBytes = 4 * sizeof(float); - vbv.SizeInBytes = 16 * sizeof(float); - commandList->IASetVertexBuffers(0, 1, &vbv); - commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + check( + m_device->CreateCommittedResource( + &heapProp, + D3D12_HEAP_FLAG_NONE, + &getTexture2DResourceDesc(w, h, DXGI_FORMAT_R8G8B8A8_UNORM, 1), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&storage.m_RAMFramebuffer) + ) + ); + D3D12_TEXTURE_COPY_LOCATION src = {}, dst = {}; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.pResource = storage.m_RAMFramebuffer; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.pResource = stagingTexture; + src.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + src.PlacedFootprint.Footprint.Width = (UINT)w; + src.PlacedFootprint.Footprint.Height = (UINT)h; + src.PlacedFootprint.Footprint.Depth = (UINT)1; + src.PlacedFootprint.Footprint.RowPitch = rowPitch; + commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); - commandList->DrawInstanced(4, 1, 0, 0); - - barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; - barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ; - barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; - commandList->ResourceBarrier(2, barriers); - check(commandList->Close()); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); + commandList->ResourceBarrier(1, &getResourceBarrierTransition(storage.m_RAMFramebuffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); + resourceToFlip = storage.m_RAMFramebuffer; + viewport_w = (float)w, viewport_h = (float)h; } + else + { + commandList->ResourceBarrier(1, &getResourceBarrierTransition(m_rtts.m_currentlyBoundRenderTargets[0], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ)); + resourceToFlip = m_rtts.m_currentlyBoundRenderTargets[0]; } + commandList->ResourceBarrier(1, &getResourceBarrierTransition(m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()], D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET)); + + D3D12_VIEWPORT viewport = + { + 0.f, + 0.f, + (float)m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]->GetDesc().Width, + (float)m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]->GetDesc().Height, + 0.f, + 1.f + }; + commandList->RSSetViewports(1, &viewport); + + D3D12_RECT box = + { + 0, + 0, + (LONG)m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]->GetDesc().Width, + (LONG)m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]->GetDesc().Height, + }; + commandList->RSSetScissorRects(1, &box); + commandList->SetGraphicsRootSignature(m_outputScalingPass.m_rootSignature); + commandList->SetPipelineState(m_outputScalingPass.m_PSO); + D3D12_CPU_DESCRIPTOR_HANDLE CPUHandle; + CPUHandle = m_outputScalingPass.m_textureDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + CPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * m_swapChain->GetCurrentBackBufferIndex(); + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + // FIXME: Not always true + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + m_device->CreateShaderResourceView(resourceToFlip, &srvDesc, CPUHandle); + + D3D12_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + CPUHandle = m_outputScalingPass.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + CPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) * m_swapChain->GetCurrentBackBufferIndex(); + m_device->CreateSampler(&samplerDesc, CPUHandle); + + D3D12_GPU_DESCRIPTOR_HANDLE GPUHandle; + GPUHandle = m_outputScalingPass.m_textureDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + GPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * m_swapChain->GetCurrentBackBufferIndex(); + commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_textureDescriptorHeap); + commandList->SetGraphicsRootDescriptorTable(0, GPUHandle); + GPUHandle = m_outputScalingPass.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + GPUHandle.ptr += m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) * m_swapChain->GetCurrentBackBufferIndex(); + commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_samplerDescriptorHeap); + commandList->SetGraphicsRootDescriptorTable(1, GPUHandle); + + CPUHandle = m_backbufferAsRendertarget[m_swapChain->GetCurrentBackBufferIndex()]->GetCPUDescriptorHandleForHeapStart(); + commandList->OMSetRenderTargets(1, &CPUHandle, true, nullptr); + D3D12_VERTEX_BUFFER_VIEW vbv = {}; + vbv.BufferLocation = m_outputScalingPass.m_vertexBuffer->GetGPUVirtualAddress(); + vbv.StrideInBytes = 4 * sizeof(float); + vbv.SizeInBytes = 16 * sizeof(float); + commandList->IASetVertexBuffers(0, 1, &vbv); + commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + commandList->DrawInstanced(4, 1, 0, 0); + + commandList->ResourceBarrier(1, &getResourceBarrierTransition(m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT)); + if (isFlipSurfaceInLocalMemory(m_surface_color_target)) + commandList->ResourceBarrier(1, &getResourceBarrierTransition(m_rtts.m_currentlyBoundRenderTargets[0], D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_RENDER_TARGET)); + check(commandList->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)&commandList); + check(m_swapChain->Present(Ini.GSVSyncEnable.GetValue() ? 1 : 0, 0)); // Add an event signaling queue completion @@ -822,6 +893,9 @@ void D3D12GSRender::Flip() for (auto tmp : textoclean) tmp->Release(); + + SAFE_RELEASE(storage.m_RAMFramebuffer); + storage.m_RAMFramebuffer = nullptr; }); while (getCurrentResourceStorage().m_frameFinishedHandle) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index b31581515e..d3012c58ab 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -313,6 +313,8 @@ private: ID3D12DescriptorHeap *m_samplerDescriptorHeap; size_t m_currentTextureIndex; + ID3D12Resource *m_RAMFramebuffer; + void Reset(); void Init(ID3D12Device *device); void Release(); From af181395fcfa61ac0b4fddd09cfdf345ce8f2733 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 20:36:31 +0200 Subject: [PATCH 328/343] d3d12: Fix warning --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 2f1f40afb8..38688e9b2d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -772,7 +772,7 @@ void D3D12GSRender::Flip() src.PlacedFootprint.Footprint.Width = (UINT)w; src.PlacedFootprint.Footprint.Height = (UINT)h; src.PlacedFootprint.Footprint.Depth = (UINT)1; - src.PlacedFootprint.Footprint.RowPitch = rowPitch; + src.PlacedFootprint.Footprint.RowPitch = (UINT)rowPitch; commandList->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); commandList->ResourceBarrier(1, &getResourceBarrierTransition(storage.m_RAMFramebuffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); From b839b8689558528de9648cf76a71fde41e53886a Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 23:23:53 +0200 Subject: [PATCH 329/343] d3d12: Fix color for target_none --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 38688e9b2d..1495660299 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -816,7 +816,15 @@ void D3D12GSRender::Flip() srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + if (isFlipSurfaceInLocalMemory(m_surface_color_target)) + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + else + srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + ); m_device->CreateShaderResourceView(resourceToFlip, &srvDesc, CPUHandle); D3D12_SAMPLER_DESC samplerDesc = {}; From d2edeafffe15818913689d5910213d055043111c Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 29 Jun 2015 23:26:33 +0200 Subject: [PATCH 330/343] d3d12: Remove extra , --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 1495660299..43d4b1436d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -823,7 +823,7 @@ void D3D12GSRender::Flip() D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0 ); m_device->CreateShaderResourceView(resourceToFlip, &srvDesc, CPUHandle); From 38a809b48315f8f708e95ede95f6fa1a70788640 Mon Sep 17 00:00:00 2001 From: vlj Date: Wed, 1 Jul 2015 22:52:32 +0200 Subject: [PATCH 331/343] d3d12: Use another sampler heap when using more than 2048 samplers --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 17 +++++++++++------ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 +++- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 10 ++++++++-- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 43d4b1436d..4ed9c4135a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -78,6 +78,8 @@ void D3D12GSRender::ResourceStorage::Reset() m_currentTextureIndex = 0; m_frameFinishedFence = nullptr; m_frameFinishedHandle = 0; + m_currentSamplerIndex = 0; + m_samplerDescriptorHeapIndex = 0; m_commandAllocator->Reset(); m_textureUploadCommandAllocator->Reset(); @@ -117,7 +119,8 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) textureDescriptorDesc.NumDescriptors = 2048; // For safety textureDescriptorDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; - check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap))); + check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap[0]))); + check(device->CreateDescriptorHeap(&textureDescriptorDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap[1]))); } void D3D12GSRender::ResourceStorage::Release() @@ -126,7 +129,8 @@ void D3D12GSRender::ResourceStorage::Release() m_constantsBufferDescriptorsHeap->Release(); m_scaleOffsetDescriptorHeap->Release(); m_textureDescriptorsHeap->Release(); - m_samplerDescriptorHeap->Release(); + m_samplerDescriptorHeap[0]->Release(); + m_samplerDescriptorHeap[1]->Release(); for (auto &tmp : m_inflightCommandList) tmp->Release(); m_commandAllocator->Release(); @@ -578,8 +582,8 @@ void D3D12GSRender::ExecCMD() samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; m_device->CreateSampler(&samplerDesc, - getCPUDescriptorHandle(getCurrentResourceStorage().m_samplerDescriptorHeap, - (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * g_descriptorStrideSamplers) + getCPUDescriptorHandle(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex], + (getCurrentResourceStorage().m_currentSamplerIndex + usedTexture) * g_descriptorStrideSamplers) ); } @@ -589,13 +593,14 @@ void D3D12GSRender::ExecCMD() getCurrentResourceStorage().m_currentTextureIndex * g_descriptorStrideSRVCBVUAV) ); - commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_samplerDescriptorHeap); + commandList->SetDescriptorHeaps(1, &getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]); commandList->SetGraphicsRootDescriptorTable(3, - getGPUDescriptorHandle(getCurrentResourceStorage().m_samplerDescriptorHeap, + getGPUDescriptorHandle(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex], getCurrentResourceStorage().m_currentTextureIndex * g_descriptorStrideSamplers) ); getCurrentResourceStorage().m_currentTextureIndex += usedTexture; + getCurrentResourceStorage().m_currentSamplerIndex += usedTexture; std::chrono::time_point endTextureTime = std::chrono::system_clock::now(); m_timers.m_textureUploadDuration += std::chrono::duration_cast(endTextureTime - startTextureTime).count(); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index d3012c58ab..33615fd472 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -310,7 +310,9 @@ private: // Texture storage ID3D12CommandAllocator *m_textureUploadCommandAllocator; ID3D12DescriptorHeap *m_textureDescriptorsHeap; - ID3D12DescriptorHeap *m_samplerDescriptorHeap; + ID3D12DescriptorHeap *m_samplerDescriptorHeap[2]; + size_t m_samplerDescriptorHeapIndex; + size_t m_currentSamplerIndex; size_t m_currentTextureIndex; ID3D12Resource *m_RAMFramebuffer; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 53f2952b79..b52a0d174a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -876,13 +876,19 @@ size_t D3D12GSRender::UploadTextures() srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; } + D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); m_device->CreateShaderResourceView(vramTexture, &srvDesc, Handle); + if (getCurrentResourceStorage().m_currentSamplerIndex + 16 > 2048) + { + getCurrentResourceStorage().m_samplerDescriptorHeapIndex = 1; + getCurrentResourceStorage().m_currentSamplerIndex = 0; + } - Handle = getCurrentResourceStorage().m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); - Handle.ptr += (getCurrentResourceStorage().m_currentTextureIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + Handle = getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetCPUDescriptorHandleForHeapStart(); + Handle.ptr += (getCurrentResourceStorage().m_currentSamplerIndex + usedTexture) * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); m_device->CreateSampler(&getSamplerDesc(m_textures[i]), Handle); usedTexture++; From 37cc5e5c117b27138a55ff16695dbd081c3e8a7d Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 4 Jul 2015 16:15:47 +0200 Subject: [PATCH 332/343] d3d12: Fix D3D12GSRender member name --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 ++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 4ed9c4135a..41a030a685 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -396,7 +396,7 @@ void D3D12GSRender::OnReset() { } -void D3D12GSRender::ExecCMD(u32 cmd) +void D3D12GSRender::Clear(u32 cmd) { assert(cmd == NV4097_CLEAR_SURFACE); @@ -476,7 +476,7 @@ void D3D12GSRender::ExecCMD(u32 cmd) m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**) &commandList); } -void D3D12GSRender::ExecCMD() +void D3D12GSRender::Draw() { PrepareRenderTargets(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 33615fd472..2f21777ce2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -411,8 +411,8 @@ protected: virtual void OnInitThread() override; virtual void OnExitThread() override; virtual void OnReset() override; - virtual void ExecCMD(u32 cmd) override; - virtual void ExecCMD() override; + virtual void Clear(u32 cmd) override; + virtual void Draw() override; virtual void Flip() override; }; From 6fcd0e04218810529f3d971b8ebfc111d9951b6c Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 4 Jul 2015 16:17:06 +0200 Subject: [PATCH 333/343] d3d12: Add semaphorePGRAPHTextureRead --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 ++++ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 1 + 2 files changed, 5 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 41a030a685..a1714cf0b5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -1006,6 +1006,10 @@ void copyToCellRamAndRelease(void *dstAddress, ID3D12Resource *res, size_t dstPi res->Release(); } +void D3D12GSRender::semaphorePGRAPHTextureReadRelease(u32 offset, u32 value) +{ + semaphorePGRAPHBackendRelease(offset, value); +} void D3D12GSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 2f21777ce2..243f5a4a70 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -372,6 +372,7 @@ public: D3D12GSRender(); virtual ~D3D12GSRender(); + virtual void semaphorePGRAPHTextureReadRelease(u32 offset, u32 value) override; virtual void semaphorePGRAPHBackendRelease(u32 offset, u32 value) override; virtual void semaphorePFIFOAcquire(u32 offset, u32 value) override; From 9b10895c3823ca62f8118a05efa21226ef66b8b0 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 30 Jul 2015 00:13:04 +0200 Subject: [PATCH 334/343] d3d12: Fix build with latest master --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 4 ++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 11 +++++++++-- rpcs3/emucore.vcxproj | 3 ++- rpcs3/rpcs3.vcxproj | 1 + 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index db8e577d13..bfa2364f89 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -244,7 +244,7 @@ ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVerte { const u16* c_src = (const u16*)src; u16* c_dst = (u16*)dst; - for (u32 j = 0; j < size; ++j) *c_dst++ = re16(*c_src++); + for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++); break; } @@ -252,7 +252,7 @@ ID3D12Resource *createVertexBuffer(const VertexBufferFormat &vbf, const RSXVerte { const u32* c_src = (const u32*)src; u32* c_dst = (u32*)dst; - for (u32 j = 0; j < size; ++j) *c_dst++ = re32(*c_src++); + for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++); break; } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a1714cf0b5..d205d70e6b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -375,8 +375,15 @@ D3D12GSRender::~D3D12GSRender() void D3D12GSRender::Close() { - Stop(); - m_frame->Hide(); + if (joinable()) + { + join(); + } + + if (m_frame->IsShown()) + { + m_frame->Hide(); + } } void D3D12GSRender::OnInit() diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index ca795a3025..724426cb19 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -645,6 +645,7 @@ {C4A10229-4712-4BD2-B63E-50D93C67A038} emucore + 10.0.10240.0 @@ -837,4 +838,4 @@ - + \ No newline at end of file diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 742c4f112b..e8aadd436f 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -18,6 +18,7 @@ {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12} Win32Proj rpcs3 + 10.0.10240.0 From 6a408301d78e7efd17c5dd33e75d61da6a7b49f5 Mon Sep 17 00:00:00 2001 From: vlj Date: Thu, 30 Jul 2015 00:17:30 +0200 Subject: [PATCH 335/343] d3d12: Another fix --- rpcs3/Emu/RSX/Common/ProgramStateCache.h | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.h b/rpcs3/Emu/RSX/Common/ProgramStateCache.h index 5466b2d87f..6a732e7bd7 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.h @@ -2,6 +2,7 @@ #include "Emu/RSX/RSXFragmentProgram.h" #include "Emu/RSX/RSXVertexProgram.h" +#include "Utilities/Log.h" enum class SHADER_TYPE From 11980346c9a74f23223115e6d49492bcc02abe6c Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sat, 1 Aug 2015 22:51:59 +0200 Subject: [PATCH 336/343] Clean an useless added line --- rpcs3/Emu/RSX/CgBinaryProgram.h | 1 - 1 file changed, 1 deletion(-) diff --git a/rpcs3/Emu/RSX/CgBinaryProgram.h b/rpcs3/Emu/RSX/CgBinaryProgram.h index a67354fb3a..0e1b4c858d 100644 --- a/rpcs3/Emu/RSX/CgBinaryProgram.h +++ b/rpcs3/Emu/RSX/CgBinaryProgram.h @@ -332,7 +332,6 @@ public: else { - ParamArray param_array; auto& vprog = GetCgRef(prog.program); m_arb_shader += "\n"; m_arb_shader += fmt::format("# binaryFormatRevision 0x%x\n", (u32)prog.binaryFormatRevision); From 3d486a8ba9a309a5f90d875abcb7b0167f54c847 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sun, 2 Aug 2015 00:04:33 +0200 Subject: [PATCH 337/343] d3d12: Reset gfxHandler in dtor --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index d205d70e6b..24d279e0c2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -343,6 +343,7 @@ D3D12GSRender::D3D12GSRender() D3D12GSRender::~D3D12GSRender() { + gfxHandler = [this](u32) { return false; }; m_constantsData.Release(); m_vertexIndexData.Release(); m_textureUploadData.Release(); From 3fe90d9e8dab94ccfce22ccb98eea46ad7bf7c2c Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sat, 1 Aug 2015 23:51:42 +0200 Subject: [PATCH 338/343] Add DX12 variant of debug/release with and without llvm --- rpcs3.sln | 226 +++++++++++++++++++++++++++++++++++++++ rpcs3/emucore.vcxproj | 176 ++++++++++++++++++++++++++++++ rpcs3/rpcs3.vcxproj | 114 ++++++++++++++++++++ rpcs3/rpcs3.vcxproj.user | 16 +++ 4 files changed, 532 insertions(+) diff --git a/rpcs3.sln b/rpcs3.sln index f685b141d7..00c151cc46 100644 --- a/rpcs3.sln +++ b/rpcs3.sln @@ -183,15 +183,23 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "copy_setup_h", "rpcs3\copy_ EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug - DX12|x64 = Debug - DX12|x64 + Debug - LLVM DX12|x64 = Debug - LLVM DX12|x64 Debug - LLVM|x64 = Debug - LLVM|x64 Debug - MemLeak|x64 = Debug - MemLeak|x64 Debug|x64 = Debug|x64 DLL Debug|x64 = DLL Debug|x64 DLL Release|x64 = DLL Release|x64 + Release - DX12|x64 = Release - DX12|x64 + Release - LLVM DX12|x64 = Release - LLVM DX12|x64 Release - LLVM|x64 = Release - LLVM|x64 Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution + {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Debug - DX12|x64.ActiveCfg = Debug - DX12|x64 + {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Debug - DX12|x64.Build.0 = Debug - DX12|x64 + {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Debug - LLVM DX12|x64.ActiveCfg = Debug - DX12|x64 + {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Debug - LLVM DX12|x64.Build.0 = Debug - DX12|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Debug - LLVM|x64.Build.0 = Debug|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 @@ -202,10 +210,18 @@ Global {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.DLL Debug|x64.Build.0 = Debug|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.DLL Release|x64.ActiveCfg = Release|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.DLL Release|x64.Build.0 = Release|x64 + {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release - DX12|x64.ActiveCfg = Release - DX12|x64 + {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release - DX12|x64.Build.0 = Release - DX12|x64 + {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release - LLVM DX12|x64.ActiveCfg = Release - DX12|x64 + {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release - LLVM DX12|x64.Build.0 = Release - DX12|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release - LLVM|x64.ActiveCfg = Release|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release - LLVM|x64.Build.0 = Release|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release|x64.ActiveCfg = Release|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release|x64.Build.0 = Release|x64 + {24C45343-FD20-5C92-81C1-35A2AE841E79}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {24C45343-FD20-5C92-81C1-35A2AE841E79}.Debug - DX12|x64.Build.0 = Debug|x64 + {24C45343-FD20-5C92-81C1-35A2AE841E79}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {24C45343-FD20-5C92-81C1-35A2AE841E79}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Debug - LLVM|x64.Build.0 = Debug|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -216,10 +232,18 @@ Global {24C45343-FD20-5C92-81C1-35A2AE841E79}.DLL Debug|x64.Build.0 = DLL Debug|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.DLL Release|x64.ActiveCfg = DLL Release|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.DLL Release|x64.Build.0 = DLL Release|x64 + {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release - DX12|x64.ActiveCfg = Release|x64 + {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release - DX12|x64.Build.0 = Release|x64 + {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release - LLVM DX12|x64.Build.0 = Release|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release - LLVM|x64.ActiveCfg = Release|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release - LLVM|x64.Build.0 = Release|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release|x64.ActiveCfg = Release|x64 {24C45343-FD20-5C92-81C1-35A2AE841E79}.Release|x64.Build.0 = Release|x64 + {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Debug - DX12|x64.Build.0 = Debug|x64 + {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Debug - LLVM|x64.Build.0 = Debug|x64 {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -230,10 +254,18 @@ Global {A16D3832-0F42-57CE-8F48-50E06649ADE8}.DLL Debug|x64.Build.0 = DLL Debug|x64 {A16D3832-0F42-57CE-8F48-50E06649ADE8}.DLL Release|x64.ActiveCfg = DLL Release|x64 {A16D3832-0F42-57CE-8F48-50E06649ADE8}.DLL Release|x64.Build.0 = DLL Release|x64 + {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Release - DX12|x64.ActiveCfg = Release|x64 + {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Release - DX12|x64.Build.0 = Release|x64 + {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Release - LLVM DX12|x64.Build.0 = Release|x64 {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Release - LLVM|x64.ActiveCfg = Release|x64 {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Release - LLVM|x64.Build.0 = Release|x64 {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Release|x64.ActiveCfg = Release|x64 {A16D3832-0F42-57CE-8F48-50E06649ADE8}.Release|x64.Build.0 = Release|x64 + {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Debug - DX12|x64.Build.0 = Debug|x64 + {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Debug - LLVM|x64.Build.0 = Debug|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -244,10 +276,18 @@ Global {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.DLL Debug|x64.Build.0 = DLL Debug|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.DLL Release|x64.ActiveCfg = DLL Release|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.DLL Release|x64.Build.0 = DLL Release|x64 + {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release - DX12|x64.ActiveCfg = Release|x64 + {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release - DX12|x64.Build.0 = Release|x64 + {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release - LLVM DX12|x64.Build.0 = Release|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release - LLVM|x64.ActiveCfg = Release|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release - LLVM|x64.Build.0 = Release|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release|x64.ActiveCfg = Release|x64 {3FCC50C2-81E9-5DB2-B8D8-2129427568B1}.Release|x64.Build.0 = Release|x64 + {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Debug - DX12|x64.Build.0 = Debug|x64 + {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Debug - LLVM|x64.Build.0 = Debug|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -258,10 +298,18 @@ Global {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.DLL Debug|x64.Build.0 = DLL Debug|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.DLL Release|x64.ActiveCfg = DLL Release|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.DLL Release|x64.Build.0 = DLL Release|x64 + {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release - DX12|x64.ActiveCfg = Release|x64 + {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release - DX12|x64.Build.0 = Release|x64 + {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release - LLVM DX12|x64.Build.0 = Release|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release - LLVM|x64.ActiveCfg = Release|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release - LLVM|x64.Build.0 = Release|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release|x64.ActiveCfg = Release|x64 {6744DAD8-9C70-574A-BFF2-9F8DDDB24A75}.Release|x64.Build.0 = Release|x64 + {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Debug - DX12|x64.Build.0 = Debug|x64 + {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Debug - LLVM|x64.Build.0 = Debug|x64 {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -272,10 +320,18 @@ Global {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.DLL Debug|x64.Build.0 = DLL Debug|x64 {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.DLL Release|x64.ActiveCfg = DLL Release|x64 {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.DLL Release|x64.Build.0 = DLL Release|x64 + {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Release - DX12|x64.ActiveCfg = Release|x64 + {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Release - DX12|x64.Build.0 = Release|x64 + {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Release - LLVM DX12|x64.Build.0 = Release|x64 {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Release - LLVM|x64.ActiveCfg = Release|x64 {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Release - LLVM|x64.Build.0 = Release|x64 {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Release|x64.ActiveCfg = Release|x64 {97FDAB45-9C58-5BC5-A2F4-EE42739EBC63}.Release|x64.Build.0 = Release|x64 + {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Debug - DX12|x64.Build.0 = Debug|x64 + {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Debug - LLVM|x64.Build.0 = Debug|x64 {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -286,10 +342,18 @@ Global {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.DLL Debug|x64.Build.0 = DLL Debug|x64 {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.DLL Release|x64.ActiveCfg = DLL Release|x64 {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.DLL Release|x64.Build.0 = DLL Release|x64 + {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Release - DX12|x64.ActiveCfg = Release|x64 + {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Release - DX12|x64.Build.0 = Release|x64 + {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Release - LLVM DX12|x64.Build.0 = Release|x64 {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Release - LLVM|x64.ActiveCfg = Release|x64 {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Release - LLVM|x64.Build.0 = Release|x64 {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Release|x64.ActiveCfg = Release|x64 {DA8B15EF-6750-5928-BC0E-C748213CF9B2}.Release|x64.Build.0 = Release|x64 + {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Debug - DX12|x64.Build.0 = Debug|x64 + {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Debug - LLVM|x64.Build.0 = Debug|x64 {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -301,10 +365,18 @@ Global {33CC42F9-7756-5587-863C-8D4461B7C5DD}.DLL Debug|x64.Build.0 = DLL Debug|x64 {33CC42F9-7756-5587-863C-8D4461B7C5DD}.DLL Release|x64.ActiveCfg = DLL Release|x64 {33CC42F9-7756-5587-863C-8D4461B7C5DD}.DLL Release|x64.Build.0 = DLL Release|x64 + {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Release - DX12|x64.ActiveCfg = Release|x64 + {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Release - DX12|x64.Build.0 = Release|x64 + {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Release - LLVM DX12|x64.Build.0 = Release|x64 {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Release - LLVM|x64.ActiveCfg = Release|x64 {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Release - LLVM|x64.Build.0 = Release|x64 {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Release|x64.ActiveCfg = Release|x64 {33CC42F9-7756-5587-863C-8D4461B7C5DD}.Release|x64.Build.0 = Release|x64 + {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Debug - DX12|x64.Build.0 = Debug|x64 + {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Debug - LLVM|x64.Build.0 = Debug|x64 {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -315,10 +387,18 @@ Global {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.DLL Debug|x64.Build.0 = DLL Debug|x64 {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.DLL Release|x64.ActiveCfg = DLL Release|x64 {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.DLL Release|x64.Build.0 = DLL Release|x64 + {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Release - DX12|x64.ActiveCfg = Release|x64 + {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Release - DX12|x64.Build.0 = Release|x64 + {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Release - LLVM DX12|x64.Build.0 = Release|x64 {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Release - LLVM|x64.ActiveCfg = Release|x64 {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Release - LLVM|x64.Build.0 = Release|x64 {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Release|x64.ActiveCfg = Release|x64 {8BD8F8D9-4275-5B42-A8F4-F1DB2970A550}.Release|x64.Build.0 = Release|x64 + {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Debug - DX12|x64.Build.0 = Debug|x64 + {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Debug - LLVM|x64.Build.0 = Debug|x64 {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -329,10 +409,18 @@ Global {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.DLL Debug|x64.Build.0 = DLL Debug|x64 {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.DLL Release|x64.ActiveCfg = DLL Release|x64 {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.DLL Release|x64.Build.0 = DLL Release|x64 + {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Release - DX12|x64.ActiveCfg = Release|x64 + {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Release - DX12|x64.Build.0 = Release|x64 + {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Release - LLVM DX12|x64.Build.0 = Release|x64 {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Release - LLVM|x64.ActiveCfg = Release|x64 {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Release - LLVM|x64.Build.0 = Release|x64 {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Release|x64.ActiveCfg = Release|x64 {69F2EDE4-7D21-5738-9BC0-F66F61C9AE00}.Release|x64.Build.0 = Release|x64 + {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Debug - DX12|x64.Build.0 = Debug|x64 + {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Debug - LLVM|x64.Build.0 = Debug|x64 {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -343,10 +431,18 @@ Global {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.DLL Debug|x64.Build.0 = DLL Debug|x64 {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.DLL Release|x64.ActiveCfg = DLL Release|x64 {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.DLL Release|x64.Build.0 = DLL Release|x64 + {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Release - DX12|x64.ActiveCfg = Release|x64 + {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Release - DX12|x64.Build.0 = Release|x64 + {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Release - LLVM DX12|x64.Build.0 = Release|x64 {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Release - LLVM|x64.ActiveCfg = Release|x64 {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Release - LLVM|x64.Build.0 = Release|x64 {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Release|x64.ActiveCfg = Release|x64 {E21129E0-7C08-5936-9D8C-0D60B5319BA7}.Release|x64.Build.0 = Release|x64 + {7FB0902D-8579-5DCE-B883-DAF66A885005}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {7FB0902D-8579-5DCE-B883-DAF66A885005}.Debug - DX12|x64.Build.0 = Debug|x64 + {7FB0902D-8579-5DCE-B883-DAF66A885005}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {7FB0902D-8579-5DCE-B883-DAF66A885005}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {7FB0902D-8579-5DCE-B883-DAF66A885005}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {7FB0902D-8579-5DCE-B883-DAF66A885005}.Debug - LLVM|x64.Build.0 = Debug|x64 {7FB0902D-8579-5DCE-B883-DAF66A885005}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -357,10 +453,18 @@ Global {7FB0902D-8579-5DCE-B883-DAF66A885005}.DLL Debug|x64.Build.0 = DLL Debug|x64 {7FB0902D-8579-5DCE-B883-DAF66A885005}.DLL Release|x64.ActiveCfg = DLL Release|x64 {7FB0902D-8579-5DCE-B883-DAF66A885005}.DLL Release|x64.Build.0 = DLL Release|x64 + {7FB0902D-8579-5DCE-B883-DAF66A885005}.Release - DX12|x64.ActiveCfg = Release|x64 + {7FB0902D-8579-5DCE-B883-DAF66A885005}.Release - DX12|x64.Build.0 = Release|x64 + {7FB0902D-8579-5DCE-B883-DAF66A885005}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {7FB0902D-8579-5DCE-B883-DAF66A885005}.Release - LLVM DX12|x64.Build.0 = Release|x64 {7FB0902D-8579-5DCE-B883-DAF66A885005}.Release - LLVM|x64.ActiveCfg = Release|x64 {7FB0902D-8579-5DCE-B883-DAF66A885005}.Release - LLVM|x64.Build.0 = Release|x64 {7FB0902D-8579-5DCE-B883-DAF66A885005}.Release|x64.ActiveCfg = Release|x64 {7FB0902D-8579-5DCE-B883-DAF66A885005}.Release|x64.Build.0 = Release|x64 + {A1A8355B-0988-528E-9CC2-B971D6266669}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {A1A8355B-0988-528E-9CC2-B971D6266669}.Debug - DX12|x64.Build.0 = Debug|x64 + {A1A8355B-0988-528E-9CC2-B971D6266669}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {A1A8355B-0988-528E-9CC2-B971D6266669}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {A1A8355B-0988-528E-9CC2-B971D6266669}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {A1A8355B-0988-528E-9CC2-B971D6266669}.Debug - LLVM|x64.Build.0 = Debug|x64 {A1A8355B-0988-528E-9CC2-B971D6266669}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -371,10 +475,18 @@ Global {A1A8355B-0988-528E-9CC2-B971D6266669}.DLL Debug|x64.Build.0 = DLL Debug|x64 {A1A8355B-0988-528E-9CC2-B971D6266669}.DLL Release|x64.ActiveCfg = DLL Release|x64 {A1A8355B-0988-528E-9CC2-B971D6266669}.DLL Release|x64.Build.0 = DLL Release|x64 + {A1A8355B-0988-528E-9CC2-B971D6266669}.Release - DX12|x64.ActiveCfg = Release|x64 + {A1A8355B-0988-528E-9CC2-B971D6266669}.Release - DX12|x64.Build.0 = Release|x64 + {A1A8355B-0988-528E-9CC2-B971D6266669}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {A1A8355B-0988-528E-9CC2-B971D6266669}.Release - LLVM DX12|x64.Build.0 = Release|x64 {A1A8355B-0988-528E-9CC2-B971D6266669}.Release - LLVM|x64.ActiveCfg = Release|x64 {A1A8355B-0988-528E-9CC2-B971D6266669}.Release - LLVM|x64.Build.0 = Release|x64 {A1A8355B-0988-528E-9CC2-B971D6266669}.Release|x64.ActiveCfg = Release|x64 {A1A8355B-0988-528E-9CC2-B971D6266669}.Release|x64.Build.0 = Release|x64 + {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Debug - DX12|x64.Build.0 = Debug|x64 + {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Debug - LLVM|x64.Build.0 = Debug|x64 {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -385,10 +497,18 @@ Global {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.DLL Debug|x64.Build.0 = DLL Debug|x64 {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.DLL Release|x64.ActiveCfg = DLL Release|x64 {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.DLL Release|x64.Build.0 = DLL Release|x64 + {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Release - DX12|x64.ActiveCfg = Release|x64 + {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Release - DX12|x64.Build.0 = Release|x64 + {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Release - LLVM DX12|x64.Build.0 = Release|x64 {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Release - LLVM|x64.ActiveCfg = Release|x64 {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Release - LLVM|x64.Build.0 = Release|x64 {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Release|x64.ActiveCfg = Release|x64 {6053CC38-CDEE-584C-8BC8-4B000D800FC7}.Release|x64.Build.0 = Release|x64 + {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Debug - DX12|x64.Build.0 = Debug|x64 + {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Debug - LLVM|x64.Build.0 = Debug|x64 {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -399,10 +519,18 @@ Global {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.DLL Debug|x64.Build.0 = DLL Debug|x64 {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.DLL Release|x64.ActiveCfg = DLL Release|x64 {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.DLL Release|x64.Build.0 = DLL Release|x64 + {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Release - DX12|x64.ActiveCfg = Release|x64 + {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Release - DX12|x64.Build.0 = Release|x64 + {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Release - LLVM DX12|x64.Build.0 = Release|x64 {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Release - LLVM|x64.ActiveCfg = Release|x64 {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Release - LLVM|x64.Build.0 = Release|x64 {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Release|x64.ActiveCfg = Release|x64 {8ACC122A-CA6A-5AA6-9C97-9CDD2E533DB0}.Release|x64.Build.0 = Release|x64 + {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Debug - DX12|x64.Build.0 = Debug|x64 + {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Debug - LLVM|x64.Build.0 = Debug|x64 {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -413,10 +541,18 @@ Global {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.DLL Debug|x64.Build.0 = DLL Debug|x64 {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.DLL Release|x64.ActiveCfg = DLL Release|x64 {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.DLL Release|x64.Build.0 = DLL Release|x64 + {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Release - DX12|x64.ActiveCfg = Release|x64 + {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Release - DX12|x64.Build.0 = Release|x64 + {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Release - LLVM DX12|x64.Build.0 = Release|x64 {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Release - LLVM|x64.ActiveCfg = Release|x64 {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Release - LLVM|x64.Build.0 = Release|x64 {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Release|x64.ActiveCfg = Release|x64 {56A4B526-BB81-5D01-AAA9-16D23BBB169D}.Release|x64.Build.0 = Release|x64 + {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Debug - DX12|x64.Build.0 = Debug|x64 + {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Debug - LLVM|x64.Build.0 = Debug|x64 {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -427,10 +563,18 @@ Global {75596CE6-5AE7-55C9-B890-C07B0A657A83}.DLL Debug|x64.Build.0 = DLL Debug|x64 {75596CE6-5AE7-55C9-B890-C07B0A657A83}.DLL Release|x64.ActiveCfg = DLL Release|x64 {75596CE6-5AE7-55C9-B890-C07B0A657A83}.DLL Release|x64.Build.0 = DLL Release|x64 + {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Release - DX12|x64.ActiveCfg = Release|x64 + {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Release - DX12|x64.Build.0 = Release|x64 + {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Release - LLVM DX12|x64.Build.0 = Release|x64 {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Release - LLVM|x64.ActiveCfg = Release|x64 {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Release - LLVM|x64.Build.0 = Release|x64 {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Release|x64.ActiveCfg = Release|x64 {75596CE6-5AE7-55C9-B890-C07B0A657A83}.Release|x64.Build.0 = Release|x64 + {8B867186-A0B5-5479-B824-E176EDD27C40}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {8B867186-A0B5-5479-B824-E176EDD27C40}.Debug - DX12|x64.Build.0 = Debug|x64 + {8B867186-A0B5-5479-B824-E176EDD27C40}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {8B867186-A0B5-5479-B824-E176EDD27C40}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {8B867186-A0B5-5479-B824-E176EDD27C40}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {8B867186-A0B5-5479-B824-E176EDD27C40}.Debug - LLVM|x64.Build.0 = Debug|x64 {8B867186-A0B5-5479-B824-E176EDD27C40}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -441,10 +585,18 @@ Global {8B867186-A0B5-5479-B824-E176EDD27C40}.DLL Debug|x64.Build.0 = DLL Debug|x64 {8B867186-A0B5-5479-B824-E176EDD27C40}.DLL Release|x64.ActiveCfg = DLL Release|x64 {8B867186-A0B5-5479-B824-E176EDD27C40}.DLL Release|x64.Build.0 = DLL Release|x64 + {8B867186-A0B5-5479-B824-E176EDD27C40}.Release - DX12|x64.ActiveCfg = Release|x64 + {8B867186-A0B5-5479-B824-E176EDD27C40}.Release - DX12|x64.Build.0 = Release|x64 + {8B867186-A0B5-5479-B824-E176EDD27C40}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {8B867186-A0B5-5479-B824-E176EDD27C40}.Release - LLVM DX12|x64.Build.0 = Release|x64 {8B867186-A0B5-5479-B824-E176EDD27C40}.Release - LLVM|x64.ActiveCfg = Release|x64 {8B867186-A0B5-5479-B824-E176EDD27C40}.Release - LLVM|x64.Build.0 = Release|x64 {8B867186-A0B5-5479-B824-E176EDD27C40}.Release|x64.ActiveCfg = Release|x64 {8B867186-A0B5-5479-B824-E176EDD27C40}.Release|x64.Build.0 = Release|x64 + {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Debug - DX12|x64.Build.0 = Debug|x64 + {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Debug - LLVM|x64.Build.0 = Debug|x64 {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -455,10 +607,18 @@ Global {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.DLL Debug|x64.Build.0 = DLL Debug|x64 {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.DLL Release|x64.ActiveCfg = DLL Release|x64 {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.DLL Release|x64.Build.0 = DLL Release|x64 + {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Release - DX12|x64.ActiveCfg = Release|x64 + {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Release - DX12|x64.Build.0 = Release|x64 + {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Release - LLVM DX12|x64.Build.0 = Release|x64 {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Release - LLVM|x64.ActiveCfg = Release|x64 {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Release - LLVM|x64.Build.0 = Release|x64 {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Release|x64.ActiveCfg = Release|x64 {3E6DCA27-5FA3-53EC-BBD6-2D42294B7AE6}.Release|x64.Build.0 = Release|x64 + {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Debug - DX12|x64.Build.0 = Debug|x64 + {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Debug - LLVM|x64.Build.0 = Debug|x64 {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -469,10 +629,18 @@ Global {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.DLL Debug|x64.Build.0 = DLL Debug|x64 {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.DLL Release|x64.ActiveCfg = DLL Release|x64 {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.DLL Release|x64.Build.0 = DLL Release|x64 + {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Release - DX12|x64.ActiveCfg = Release|x64 + {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Release - DX12|x64.Build.0 = Release|x64 + {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Release - LLVM DX12|x64.Build.0 = Release|x64 {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Release - LLVM|x64.ActiveCfg = Release|x64 {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Release - LLVM|x64.Build.0 = Release|x64 {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Release|x64.ActiveCfg = Release|x64 {09F2F96A-1CC6-5E43-AF1D-956EC2A4888D}.Release|x64.Build.0 = Release|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - DX12|x64.Build.0 = Debug|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - LLVM|x64.Build.0 = Debug|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -483,10 +651,18 @@ Global {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.DLL Debug|x64.Build.0 = DLL Debug|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.DLL Release|x64.ActiveCfg = DLL Release|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.DLL Release|x64.Build.0 = DLL Release|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release - DX12|x64.ActiveCfg = Release|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release - DX12|x64.Build.0 = Release|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release - LLVM DX12|x64.Build.0 = Release|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release - LLVM|x64.ActiveCfg = Release|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release - LLVM|x64.Build.0 = Release|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release|x64.ActiveCfg = Release|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release|x64.Build.0 = Release|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - DX12|x64.Build.0 = Debug|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - LLVM|x64.Build.0 = Debug|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -497,10 +673,18 @@ Global {23E1C437-A951-5943-8639-A17F3CF2E606}.DLL Debug|x64.Build.0 = DLL Debug|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.DLL Release|x64.ActiveCfg = DLL Release|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.DLL Release|x64.Build.0 = DLL Release|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Release - DX12|x64.ActiveCfg = Release|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Release - DX12|x64.Build.0 = Release|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Release - LLVM DX12|x64.Build.0 = Release|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.Release - LLVM|x64.ActiveCfg = Release|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.Release - LLVM|x64.Build.0 = Release|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.Release|x64.ActiveCfg = Release|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.Release|x64.Build.0 = Release|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - DX12|x64.Build.0 = Debug|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - LLVM|x64.Build.0 = Debug|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -511,10 +695,18 @@ Global {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.DLL Debug|x64.Build.0 = DLL Debug|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.DLL Release|x64.ActiveCfg = DLL Release|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.DLL Release|x64.Build.0 = DLL Release|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release - DX12|x64.ActiveCfg = Release|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release - DX12|x64.Build.0 = Release|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release - LLVM DX12|x64.Build.0 = Release|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release - LLVM|x64.ActiveCfg = Release|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release - LLVM|x64.Build.0 = Release|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release|x64.ActiveCfg = Release|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release|x64.Build.0 = Release|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug - DX12|x64.Build.0 = Debug|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug - LLVM|x64.Build.0 = Debug|x64 {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -525,10 +717,18 @@ Global {AC40FF01-426E-4838-A317-66354CEFAE88}.DLL Debug|x64.Build.0 = Debug|x64 {AC40FF01-426E-4838-A317-66354CEFAE88}.DLL Release|x64.ActiveCfg = Release|x64 {AC40FF01-426E-4838-A317-66354CEFAE88}.DLL Release|x64.Build.0 = Release|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Release - DX12|x64.ActiveCfg = Release|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Release - DX12|x64.Build.0 = Release|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Release - LLVM DX12|x64.Build.0 = Release|x64 {AC40FF01-426E-4838-A317-66354CEFAE88}.Release - LLVM|x64.ActiveCfg = Release|x64 {AC40FF01-426E-4838-A317-66354CEFAE88}.Release - LLVM|x64.Build.0 = Release|x64 {AC40FF01-426E-4838-A317-66354CEFAE88}.Release|x64.ActiveCfg = Release|x64 {AC40FF01-426E-4838-A317-66354CEFAE88}.Release|x64.Build.0 = Release|x64 + {C4A10229-4712-4BD2-B63E-50D93C67A038}.Debug - DX12|x64.ActiveCfg = Debug - DX12|x64 + {C4A10229-4712-4BD2-B63E-50D93C67A038}.Debug - DX12|x64.Build.0 = Debug - DX12|x64 + {C4A10229-4712-4BD2-B63E-50D93C67A038}.Debug - LLVM DX12|x64.ActiveCfg = Debug - LLVM DX12|x64 + {C4A10229-4712-4BD2-B63E-50D93C67A038}.Debug - LLVM DX12|x64.Build.0 = Debug - LLVM DX12|x64 {C4A10229-4712-4BD2-B63E-50D93C67A038}.Debug - LLVM|x64.ActiveCfg = Debug - LLVM|x64 {C4A10229-4712-4BD2-B63E-50D93C67A038}.Debug - LLVM|x64.Build.0 = Debug - LLVM|x64 {C4A10229-4712-4BD2-B63E-50D93C67A038}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 @@ -539,10 +739,17 @@ Global {C4A10229-4712-4BD2-B63E-50D93C67A038}.DLL Debug|x64.Build.0 = Debug|x64 {C4A10229-4712-4BD2-B63E-50D93C67A038}.DLL Release|x64.ActiveCfg = Release|x64 {C4A10229-4712-4BD2-B63E-50D93C67A038}.DLL Release|x64.Build.0 = Release|x64 + {C4A10229-4712-4BD2-B63E-50D93C67A038}.Release - DX12|x64.ActiveCfg = Release - DX12|x64 + {C4A10229-4712-4BD2-B63E-50D93C67A038}.Release - DX12|x64.Build.0 = Release - DX12|x64 + {C4A10229-4712-4BD2-B63E-50D93C67A038}.Release - LLVM DX12|x64.ActiveCfg = Release - LLVM DX12|x64 + {C4A10229-4712-4BD2-B63E-50D93C67A038}.Release - LLVM DX12|x64.Build.0 = Release - LLVM DX12|x64 {C4A10229-4712-4BD2-B63E-50D93C67A038}.Release - LLVM|x64.ActiveCfg = Release - LLVM|x64 {C4A10229-4712-4BD2-B63E-50D93C67A038}.Release - LLVM|x64.Build.0 = Release - LLVM|x64 {C4A10229-4712-4BD2-B63E-50D93C67A038}.Release|x64.ActiveCfg = Release|x64 {C4A10229-4712-4BD2-B63E-50D93C67A038}.Release|x64.Build.0 = Release|x64 + {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug - LLVM|x64.Build.0 = Debug|x64 {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -551,9 +758,16 @@ Global {8BC303AB-25BE-4276-8E57-73F171B2D672}.DLL Debug|x64.Build.0 = Debug|x64 {8BC303AB-25BE-4276-8E57-73F171B2D672}.DLL Release|x64.ActiveCfg = Release|x64 {8BC303AB-25BE-4276-8E57-73F171B2D672}.DLL Release|x64.Build.0 = Release|x64 + {8BC303AB-25BE-4276-8E57-73F171B2D672}.Release - DX12|x64.ActiveCfg = Release|x64 + {8BC303AB-25BE-4276-8E57-73F171B2D672}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {8BC303AB-25BE-4276-8E57-73F171B2D672}.Release - LLVM DX12|x64.Build.0 = Release|x64 {8BC303AB-25BE-4276-8E57-73F171B2D672}.Release - LLVM|x64.ActiveCfg = Release|x64 {8BC303AB-25BE-4276-8E57-73F171B2D672}.Release - LLVM|x64.Build.0 = Release|x64 {8BC303AB-25BE-4276-8E57-73F171B2D672}.Release|x64.ActiveCfg = Release|x64 + {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Debug - DX12|x64.Build.0 = Debug|x64 + {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Debug - LLVM|x64.Build.0 = Debug|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -564,10 +778,18 @@ Global {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.DLL Debug|x64.Build.0 = DLL Debug|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.DLL Release|x64.ActiveCfg = DLL Release|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.DLL Release|x64.Build.0 = DLL Release|x64 + {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release - DX12|x64.ActiveCfg = Release|x64 + {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release - DX12|x64.Build.0 = Release|x64 + {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release - LLVM DX12|x64.Build.0 = Release|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release - LLVM|x64.ActiveCfg = Release|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release - LLVM|x64.Build.0 = Release|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release|x64.ActiveCfg = Release|x64 {01F4CE10-2CFB-41A8-B41F-E54337868A1D}.Release|x64.Build.0 = Release|x64 + {00D36322-6188-4A66-B514-3B3F183E998D}.Debug - DX12|x64.ActiveCfg = Debug|x64 + {00D36322-6188-4A66-B514-3B3F183E998D}.Debug - DX12|x64.Build.0 = Debug|x64 + {00D36322-6188-4A66-B514-3B3F183E998D}.Debug - LLVM DX12|x64.ActiveCfg = Debug|x64 + {00D36322-6188-4A66-B514-3B3F183E998D}.Debug - LLVM DX12|x64.Build.0 = Debug|x64 {00D36322-6188-4A66-B514-3B3F183E998D}.Debug - LLVM|x64.ActiveCfg = Debug|x64 {00D36322-6188-4A66-B514-3B3F183E998D}.Debug - LLVM|x64.Build.0 = Debug|x64 {00D36322-6188-4A66-B514-3B3F183E998D}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 @@ -578,6 +800,10 @@ Global {00D36322-6188-4A66-B514-3B3F183E998D}.DLL Debug|x64.Build.0 = DLL Debug|x64 {00D36322-6188-4A66-B514-3B3F183E998D}.DLL Release|x64.ActiveCfg = DLL Release|x64 {00D36322-6188-4A66-B514-3B3F183E998D}.DLL Release|x64.Build.0 = DLL Release|x64 + {00D36322-6188-4A66-B514-3B3F183E998D}.Release - DX12|x64.ActiveCfg = Release|x64 + {00D36322-6188-4A66-B514-3B3F183E998D}.Release - DX12|x64.Build.0 = Release|x64 + {00D36322-6188-4A66-B514-3B3F183E998D}.Release - LLVM DX12|x64.ActiveCfg = Release|x64 + {00D36322-6188-4A66-B514-3B3F183E998D}.Release - LLVM DX12|x64.Build.0 = Release|x64 {00D36322-6188-4A66-B514-3B3F183E998D}.Release - LLVM|x64.ActiveCfg = Release|x64 {00D36322-6188-4A66-B514-3B3F183E998D}.Release - LLVM|x64.Build.0 = Release|x64 {00D36322-6188-4A66-B514-3B3F183E998D}.Release|x64.ActiveCfg = Release|x64 diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 724426cb19..dd0661e1cb 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -1,6 +1,14 @@  + + Debug - DX12 + x64 + + + Debug - LLVM DX12 + x64 + Debug - LLVM x64 @@ -13,6 +21,14 @@ Debug x64 + + Release - DX12 + x64 + + + Release - LLVM DX12 + x64 + Release - LLVM x64 @@ -70,9 +86,13 @@ NotUsing NotUsing + NotUsing NotUsing + NotUsing NotUsing + NotUsing NotUsing + NotUsing @@ -157,8 +177,10 @@ true + true true true + true @@ -333,15 +355,21 @@ true + true true true + true Create Create + Create Create + Create Create + Create Create + Create @@ -654,12 +682,24 @@ v140 Unicode + + StaticLibrary + true + v140 + Unicode + StaticLibrary true v140 Unicode + + StaticLibrary + true + v140 + Unicode + StaticLibrary true @@ -673,6 +713,13 @@ false Unicode + + StaticLibrary + false + v140 + false + Unicode + StaticLibrary false @@ -680,6 +727,13 @@ false Unicode + + StaticLibrary + false + v140 + false + Unicode + v140 @@ -689,18 +743,30 @@ + + + + + + + + + + + + .\;..\;..\minidx9\Include;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include;$(UniversalCRT_IncludePath) @@ -708,12 +774,24 @@ $(UniversalCRT_LibraryPath_x64);$(LibraryPath) $(ExcludePath) + + .\;..\;..\minidx9\Include;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include;$(UniversalCRT_IncludePath) + $(Platform)\$(Configuration)\emucore\ + $(UniversalCRT_LibraryPath_x64);$(LibraryPath) + $(ExcludePath) + .\;..\;..\minidx9\Include;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include;$(UniversalCRT_IncludePath) $(Platform)\$(Configuration)\emucore\ $(LibraryPath) $(ExcludePath) + + .\;..\;..\minidx9\Include;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include;$(UniversalCRT_IncludePath) + $(Platform)\$(Configuration)\emucore\ + $(LibraryPath) + $(ExcludePath) + .\;..\;..\minidx9\Include;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include;$(UniversalCRT_IncludePath) $(Platform)\$(Configuration)\emucore\ @@ -726,13 +804,43 @@ $(UniversalCRT_LibraryPath_x64);$(LibraryPath) $(ExcludePath) + + .\;..\;..\minidx9\Include;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include;$(UniversalCRT_IncludePath) + $(Platform)\$(Configuration)\emucore\ + $(UniversalCRT_LibraryPath_x64);$(LibraryPath) + $(ExcludePath) + .\;..\;..\minidx9\Include;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include;$(UniversalCRT_IncludePath) $(Platform)\$(Configuration)\emucore\ $(UniversalCRT_LibraryPath_x64);$(LibraryPath) $(ExcludePath) + + .\;..\;..\minidx9\Include;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include;$(UniversalCRT_IncludePath) + $(Platform)\$(Configuration)\emucore\ + $(UniversalCRT_LibraryPath_x64);$(LibraryPath) + $(ExcludePath) + + + Level3 + Disabled + false + Use + _UNICODE;UNICODE;%(PreprocessorDefinitions) + stdafx.h + Async + true + + + false + + + true + + + Level3 Disabled @@ -772,6 +880,28 @@ LLVMMCJIT.lib;LLVMRuntimeDyld.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib + + + Level3 + Disabled + false + Use + _UNICODE;UNICODE;LLVM_AVAILABLE;%(PreprocessorDefinitions);DX12_SUPPORT + stdafx.h + Async + true + + + true + + + true + + + ..\llvm_build\Debug\lib + LLVMMCJIT.lib;LLVMRuntimeDyld.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib + + Level2 @@ -790,6 +920,27 @@ + + Level3 + MaxSpeed + true + true + false + Use + stdafx.h + Async + true + + + _UNICODE;UNICODE;%(PreprocessorDefinitions) + + + true + true + true + + + Level3 MaxSpeed @@ -811,6 +962,31 @@ + + Level3 + MaxSpeed + true + true + false + Use + stdafx.h + Async + LLVM_AVAILABLE;%(PreprocessorDefinitions) + true + + + + + true + true + true + + + ..\llvm_build\Release\lib + LLVMMCJIT.lib;LLVMRuntimeDyld.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib + + + Level3 MaxSpeed diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index e8aadd436f..87088a93e6 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -1,6 +1,10 @@  + + Debug - DX12 + x64 + Debug - MemLeak x64 @@ -9,6 +13,10 @@ Debug x64 + + Release - DX12 + x64 + Release x64 @@ -27,6 +35,12 @@ Unicode v140 + + Application + true + Unicode + v140 + Application true @@ -41,18 +55,32 @@ v140 false + + Application + false + true + Unicode + v140 + false + + + + + + + .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(IncludePath);..\asmjit\src\asmjit;$(UniversalCRT_IncludePath) @@ -60,6 +88,12 @@ ..\libs\$(Configuration)\;$(UniversalCRT_LibraryPath_x64);$(LibraryPath) $(ProjectName)-dbg + + .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(IncludePath);..\asmjit\src\asmjit;$(UniversalCRT_IncludePath) + $(SolutionDir)bin\ + ..\libs\$(Configuration)\;$(UniversalCRT_LibraryPath_x64);$(LibraryPath) + $(ProjectName)-dbg + .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(UniversalCRT_IncludePath);$(IncludePath) $(SolutionDir)bin\ @@ -74,7 +108,46 @@ false false + + false + .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(IncludePath);..\asmjit\src\asmjit;$(UniversalCRT_IncludePath) + $(SolutionDir)bin\ + ..\libs\$(Configuration)\;$(UniversalCRT_LibraryPath_x64);$(LibraryPath) + false + false + + + Level3 + Disabled + ProgramDatabase + Use + ..\wxWidgets\include\msvc + Async + stdafx_gui.h + $(IntDir)$(TargetName)_gui.pch + _UNICODE;UNICODE;%(PreprocessorDefinitions) + true + + + true + wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;%(AdditionalDependencies) + %(IgnoreSpecificDefaultLibraries) + true + ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64 + 0x200000000 + true + false + + + "$(SolutionDir)\Utilities\git-version-gen.cmd" + Updating git-version.h + + + false + + + Level3 Disabled @@ -137,6 +210,45 @@ + + Level3 + Full + true + true + ..\wxWidgets\include\msvc + MultiThreadedDLL + WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) + false + Use + Speed + Async + false + true + stdafx_gui.h + $(IntDir)$(TargetName)_gui.pch + true + + + Windows + true + true + true + wxmsw31u_adv.lib;wxbase31u.lib;wxmsw31u_core.lib;wxmsw31u_aui.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;wxtiff.lib;wxjpeg.lib;wxpng.lib;wxzlib.lib;wxregexu.lib;wxexpat.lib;wsock32.lib;wininet.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;%(AdditionalDependencies) + + + %(IgnoreSpecificDefaultLibraries) + true + ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64 + 0x200000000 + true + false + + + "$(SolutionDir)\Utilities\git-version-gen.cmd" + Updating git-version.h + + + Level3 Full @@ -206,7 +318,9 @@ Create Create + Create Create + Create diff --git a/rpcs3/rpcs3.vcxproj.user b/rpcs3/rpcs3.vcxproj.user index 130ca03334..96886c83a5 100644 --- a/rpcs3/rpcs3.vcxproj.user +++ b/rpcs3/rpcs3.vcxproj.user @@ -4,6 +4,10 @@ $(SolutionDir)bin\ WindowsLocalDebugger + + $(SolutionDir)bin\ + WindowsLocalDebugger + $(SolutionDir)bin\ WindowsLocalDebugger @@ -13,10 +17,18 @@ WindowsLocalDebugger 1> stdout.log 2> stderr.log + + $(SolutionDir)bin\ + WindowsLocalDebugger + $(SolutionDir)bin\ WindowsLocalDebugger + + $(SolutionDir)bin\ + WindowsLocalDebugger + $(SolutionDir)bin\ WindowsLocalDebugger @@ -25,6 +37,10 @@ $(SolutionDir)bin\ WindowsLocalDebugger + + $(SolutionDir)bin\ + WindowsLocalDebugger + false From f0a5641cf06c70e59aaf5a50826150558fbf31b0 Mon Sep 17 00:00:00 2001 From: Zangetsu38 Date: Sun, 2 Aug 2015 19:30:31 +0200 Subject: [PATCH 339/343] Fix file not found --- rpcs3/rpcs3.vcxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 87088a93e6..9c9004b703 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -91,7 +91,7 @@ .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(IncludePath);..\asmjit\src\asmjit;$(UniversalCRT_IncludePath) $(SolutionDir)bin\ - ..\libs\$(Configuration)\;$(UniversalCRT_LibraryPath_x64);$(LibraryPath) + ..\libs\Debug\;$(UniversalCRT_LibraryPath_x64);$(LibraryPath) $(ProjectName)-dbg @@ -112,7 +112,7 @@ false .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(IncludePath);..\asmjit\src\asmjit;$(UniversalCRT_IncludePath) $(SolutionDir)bin\ - ..\libs\$(Configuration)\;$(UniversalCRT_LibraryPath_x64);$(LibraryPath) + ..\libs\Release\;$(UniversalCRT_LibraryPath_x64);$(LibraryPath) false false From 7843b23ee1d7105ade13d60f255e620405b9c815 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sun, 2 Aug 2015 22:16:57 +0200 Subject: [PATCH 340/343] Move DX12_SUPPORT before ($preprocessor defenitions) --- rpcs3/emucore.vcxproj | 8 ++++---- rpcs3/rpcs3.vcxproj | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index dd0661e1cb..be3f74be27 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -846,7 +846,7 @@ Disabled false Use - _UNICODE;UNICODE;%(PreprocessorDefinitions);DX12_SUPPORT + _UNICODE;UNICODE;DX12_SUPPORT;%(PreprocessorDefinitions) stdafx.h Async true @@ -886,7 +886,7 @@ Disabled false Use - _UNICODE;UNICODE;LLVM_AVAILABLE;%(PreprocessorDefinitions);DX12_SUPPORT + _UNICODE;UNICODE;LLVM_AVAILABLE;DX12_SUPPORT;%(PreprocessorDefinitions) stdafx.h Async true @@ -953,7 +953,7 @@ true - _UNICODE;UNICODE;%(PreprocessorDefinitions);DX12_SUPPORT + _UNICODE;UNICODE;DX12_SUPPORT;%(PreprocessorDefinitions) true @@ -996,7 +996,7 @@ Use stdafx.h Async - LLVM_AVAILABLE;%(PreprocessorDefinitions);DX12_SUPPORT + LLVM_AVAILABLE;DX12_SUPPORT;%(PreprocessorDefinitions) true diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 9c9004b703..ac894cd55a 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -157,7 +157,7 @@ Async stdafx_gui.h $(IntDir)$(TargetName)_gui.pch - _UNICODE;UNICODE;%(PreprocessorDefinitions);DX12_SUPPORT + _UNICODE;UNICODE;DX12_SUPPORT;%(PreprocessorDefinitions) true @@ -256,7 +256,7 @@ true ..\wxWidgets\include\msvc MultiThreadedDLL - WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions);DX12_SUPPORT + WIN32;NDEBUG;_WINDOWS;DX12_SUPPORT;%(PreprocessorDefinitions) false Use Speed From cf1c86bb2ff32f6a1566a263d381fccafddbf1f0 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 11 Aug 2015 00:37:47 +0200 Subject: [PATCH 341/343] d3d12: Signal thread termination request + use a producer/consumer pattern closer to other ones in rpcs3. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 33 +++++++++++++++++++++++---- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 ++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 24d279e0c2..81328c58bd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -35,21 +35,46 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) GarbageCollectionThread::GarbageCollectionThread() { + m_isThreadAlive = true; + m_askForTermination = false; m_worker = std::thread([this]() { - while (true) + while (m_isThreadAlive) { std::unique_lock lock(m_mutex); - while (m_queue.empty()) + while (!m_askForTermination) + { + CHECK_EMU_STATUS; + + if (!lock) + { + lock.lock(); + continue; + } + + if (!m_queue.empty()) + { + auto func = std::move(m_queue.front()); + + m_queue.pop(); + + if (lock) lock.unlock(); + + func(); + + continue; + } cv.wait(lock); - m_queue.front()(); - m_queue.pop(); + } } + m_isThreadAlive = false; }); m_worker.detach(); } GarbageCollectionThread::~GarbageCollectionThread() { + m_askForTermination = true; + while (m_isThreadAlive); } void GarbageCollectionThread::pushWork(std::function&& f) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 243f5a4a70..266ed1b7f6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -215,6 +215,8 @@ struct DataHeap */ struct GarbageCollectionThread { + bool m_isThreadAlive; + bool m_askForTermination; std::mutex m_mutex; std::condition_variable cv; std::queue > m_queue; From 9cb733906762fa91d5d00f467a26c52e63ba71c2 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 11 Aug 2015 22:04:15 +0200 Subject: [PATCH 342/343] d3d12: Do not detach garbage collection thread Thanks Neko for the tips. --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 62 +++++++++++++-------------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 3 +- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 81328c58bd..05367e9f94 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -35,53 +35,51 @@ void SetGetD3DGSFrameCallback(GetGSFrameCb2 value) GarbageCollectionThread::GarbageCollectionThread() { - m_isThreadAlive = true; m_askForTermination = false; m_worker = std::thread([this]() { - while (m_isThreadAlive) + std::unique_lock lock(m_mutex); + while (!m_askForTermination) { - std::unique_lock lock(m_mutex); - while (!m_askForTermination) + if (!lock) { - CHECK_EMU_STATUS; - - if (!lock) - { - lock.lock(); - continue; - } - - if (!m_queue.empty()) - { - auto func = std::move(m_queue.front()); - - m_queue.pop(); - - if (lock) lock.unlock(); - - func(); - - continue; - } - cv.wait(lock); + lock.lock(); + continue; } + + if (!m_queue.empty()) + { + auto func = std::move(m_queue.front()); + + m_queue.pop(); + + if (lock) lock.unlock(); + + func(); + + continue; + } + cv.wait(lock); } - m_isThreadAlive = false; }); - m_worker.detach(); } GarbageCollectionThread::~GarbageCollectionThread() { - m_askForTermination = true; - while (m_isThreadAlive); + { + std::unique_lock lock(m_mutex); + m_askForTermination = true; + cv.notify_one(); + } + m_worker.join(); } void GarbageCollectionThread::pushWork(std::function&& f) { - std::unique_lock lock(m_mutex); - m_queue.push(f); - cv.notify_all(); + { + std::unique_lock lock(m_mutex); + m_queue.push(f); + } + cv.notify_one(); } void GarbageCollectionThread::waitForCompletion() diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 266ed1b7f6..87cdbabe62 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -215,8 +215,7 @@ struct DataHeap */ struct GarbageCollectionThread { - bool m_isThreadAlive; - bool m_askForTermination; + std::atomic m_askForTermination; std::mutex m_mutex; std::condition_variable cv; std::queue > m_queue; From abee3539b8bb2e31677b0f45f48bf4d082d430ac Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 13 Aug 2015 23:50:47 +0200 Subject: [PATCH 343/343] d3d12: Fix non dx12 build config --- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 4 ++-- rpcs3/emucore.vcxproj | 2 ++ rpcs3/rpcs3.vcxproj | 12 ++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index b52a0d174a..2038ec5b52 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -222,7 +222,7 @@ writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heig log2height = (u32)(logf((float)currentHeight) / logf(2.f)); #pragma omp parallel for - for (unsigned row = 0; row < currentHeight; row++) + for (int row = 0; row < currentHeight; row++) for (int j = 0; j < currentWidth; j++) castedDst[(row * rowPitch / 4) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; @@ -298,7 +298,7 @@ write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t h log2height = (u32)(logf((float)currentHeight) / logf(2.f)); #pragma omp parallel for - for (unsigned row = 0; row < currentHeight; row++) + for (int row = 0; row < currentHeight; row++) for (int j = 0; j < currentWidth; j++) castedDst[(row * rowPitch / 2) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)]; diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index be3f74be27..b519ddc057 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -954,6 +954,7 @@ _UNICODE;UNICODE;DX12_SUPPORT;%(PreprocessorDefinitions) + true true @@ -1000,6 +1001,7 @@ true + true true diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index ac894cd55a..fc95cbed81 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -131,10 +131,10 @@ true - wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;%(AdditionalDependencies) + wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;asmjit.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) true - ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64 + ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\libs\Win64 0x200000000 true false @@ -162,10 +162,10 @@ true - wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;%(AdditionalDependencies) + wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;asmjit.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) true - ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64 + ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\libs\Win64 0x200000000 true false @@ -233,12 +233,12 @@ true true true - wxmsw31u_adv.lib;wxbase31u.lib;wxmsw31u_core.lib;wxmsw31u_aui.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;wxtiff.lib;wxjpeg.lib;wxpng.lib;wxzlib.lib;wxregexu.lib;wxexpat.lib;wsock32.lib;wininet.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;%(AdditionalDependencies) + wxmsw31u_adv.lib;wxbase31u.lib;wxmsw31u_core.lib;wxmsw31u_aui.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;wxtiff.lib;wxjpeg.lib;wxpng.lib;wxzlib.lib;wxregexu.lib;wxexpat.lib;wsock32.lib;wininet.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;asmjit.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) true - ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64 + ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\libs\Win64 0x200000000 true false