diff --git a/Utilities/StrFmt.h b/Utilities/StrFmt.h index efaf02f66f..d40a0b722c 100644 --- a/Utilities/StrFmt.h +++ b/Utilities/StrFmt.h @@ -233,12 +233,14 @@ namespace fmt for (std::size_t buf_size = fixed_buf.size();;) { +#ifndef _MSC_VER #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-security" +#endif const std::size_t len = std::snprintf(buf_addr, buf_size, fmt, do_unveil(args)...); - +#ifndef _MSC_VER #pragma GCC diagnostic pop - +#endif if (len > INT_MAX) { throw std::runtime_error("std::snprintf() failed"); diff --git a/rpcs3/D3D12GSRender.vcxproj b/rpcs3/D3D12GSRender.vcxproj index 689b7500d3..6451d9c69b 100644 --- a/rpcs3/D3D12GSRender.vcxproj +++ b/rpcs3/D3D12GSRender.vcxproj @@ -78,14 +78,14 @@ - - + + + - @@ -93,8 +93,10 @@ + + diff --git a/rpcs3/D3D12GSRender.vcxproj.filters b/rpcs3/D3D12GSRender.vcxproj.filters index 1e3c1260be..8a80059961 100644 --- a/rpcs3/D3D12GSRender.vcxproj.filters +++ b/rpcs3/D3D12GSRender.vcxproj.filters @@ -8,12 +8,6 @@ - - Source Files - - - Source Files - Source Files @@ -29,9 +23,6 @@ Source Files - - Source Files - Source Files @@ -41,6 +32,15 @@ Source Files + + Source Files + + + Source Files + + + Source Files + @@ -76,5 +76,11 @@ Source Files + + Source Files + + + Source Files + \ No newline at end of file diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 53f68dfa72..5d391a7b46 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -5,57 +5,7 @@ #define MIN2(x, y) ((x) < (y)) ? (x) : (y) #define MAX2(x, y) ((x) > (y)) ? (x) : (y) - -inline -bool overlaps(const std::pair &range1, const std::pair &range2) -{ - return !(range1.second < range2.first || range2.second < range1.first); -} - -std::vector FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector *vertex_data, size_t *vertex_data_size, size_t base_offset) -{ - std::vector Result; - for (size_t i = 0; i < rsx::limits::vertex_count; ++i) - { - const rsx::data_array_format_info &vertexData = vertex_array_desc[i]; - if (!vertexData.size) continue; - - u32 addrRegVal = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + i]; - u32 addr = rsx::get_address(addrRegVal & 0x7fffffff, addrRegVal >> 31); - size_t elementCount = ((vertexData.array) ? vertex_data_size[i] : vertex_data[i].size()) / (vertexData.size * rsx::get_vertex_type_size(vertexData.type)); - - // If there is a single element, stride is 0, use the size of element instead - size_t stride = vertexData.stride; - size_t elementSize = rsx::get_vertex_type_size(vertexData.type); - size_t start = addr + base_offset; - size_t end = start + elementSize * vertexData.size + (elementCount - 1) * stride - 1; - std::pair range = std::make_pair(start, end); - assert(start < end); - bool isMerged = false; - - for (VertexBufferFormat &vbf : Result) - { - if (overlaps(vbf.range, range) && vbf.stride == stride) - { - // Extend buffer if necessary - vbf.range.first = MIN2(vbf.range.first, range.first); - vbf.range.second = MAX2(vbf.range.second, range.second); - vbf.elementCount = MAX2(vbf.elementCount, elementCount); - - vbf.attributeId.push_back(i); - isMerged = true; - break; - } - } - if (isMerged) - continue; - VertexBufferFormat newRange = { range, std::vector{ i }, elementCount, stride }; - Result.emplace_back(newRange); - } - return Result; -} - -void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc) +void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc) noexcept { assert(vertex_array_desc.array); @@ -109,8 +59,10 @@ void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_ } } +namespace +{ template -void uploadAsIt(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) +void uploadAsIt(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) noexcept { for (u32 i = 0; i < indexCount; ++i) { @@ -124,7 +76,7 @@ void uploadAsIt(char *dst, u32 address, size_t indexCount, bool is_primitive_res } template -void expandIndexedTriangleFan(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) +void expandIndexedTriangleFan(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) noexcept { for (unsigned i = 0; i < indexCount - 2; i++) { @@ -154,7 +106,7 @@ void expandIndexedTriangleFan(char *dst, u32 address, size_t indexCount, bool is } template -void expandIndexedQuads(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) +void expandIndexedQuads(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) noexcept { for (unsigned i = 0; i < indexCount / 4; i++) { @@ -193,9 +145,10 @@ void expandIndexedQuads(char *dst, u32 address, size_t indexCount, bool is_primi } } } +} // Only handle quads and triangle fan now -bool isNativePrimitiveMode(unsigned m_draw_mode) +bool is_primitive_native(unsigned m_draw_mode) noexcept { switch (m_draw_mode) { @@ -215,10 +168,10 @@ bool isNativePrimitiveMode(unsigned m_draw_mode) } } -size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count) +size_t get_index_count(unsigned m_draw_mode, unsigned initial_index_count) noexcept { // Index count - if (isNativePrimitiveMode(m_draw_mode)) + if (is_primitive_native(m_draw_mode)) return initial_index_count; switch (m_draw_mode) @@ -232,7 +185,17 @@ size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count) } } -void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned draw_mode, unsigned first, unsigned count) +size_t get_index_type_size(u32 type) noexcept +{ + switch (type) + { + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: return 2; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: return 4; + default: return 0; + } +} + +void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned draw_mode, unsigned first, unsigned count) noexcept { unsigned short *typedDst = (unsigned short *)(dst); switch (draw_mode) @@ -261,7 +224,7 @@ void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, } } -void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index) +void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index) noexcept { u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_INDEX_ARRAY_ADDRESS], rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] & 0xf); u32 type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4; diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index 0294256b57..106825b3ad 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -12,36 +12,34 @@ struct VertexBufferFormat size_t stride; }; - -/* - * Detect buffer containing interleaved vertex attribute. - * This minimizes memory upload size. - */ -std::vector FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector *vertex_data, size_t *vertex_data_size, size_t base_offset); - -/* +/** * Write count vertex attributes from index array buffer starting at first, using vertex_array_desc */ -void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc); +void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc) noexcept; /* * If primitive mode is not supported and need to be emulated (using an index buffer) returns false. */ -bool isNativePrimitiveMode(unsigned m_draw_mode); +bool is_primitive_native(unsigned m_draw_mode) noexcept; -/* +/** * Returns a fixed index count for emulated primitive, otherwise returns initial_index_count */ -size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count); +size_t get_index_count(unsigned m_draw_mode, unsigned initial_index_count) noexcept; -/* +/** + * Returns index type size in byte + */ +size_t get_index_type_size(u32 type) noexcept; + +/** * Write count indexes starting at first to dst buffer. * Returns min/max index found during the process. * The function expands index buffer for non native primitive type. */ -void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index); +void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index) noexcept; -/* -* Write index data needed to emulate non indexed non native primitive mode. -*/ -void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count); \ No newline at end of file +/** + * Write index data needed to emulate non indexed non native primitive mode. + */ +void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count) noexcept; \ No newline at end of file diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 8d8edb1b14..f901b7aea2 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -6,11 +6,12 @@ #define MAX2(a, b) ((a) > (b)) ? (a) : (b) - +namespace +{ /** * Write data, assume src pixels are packed but not mipmaplevel */ -inline std::vector +std::vector writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) { std::vector Result; @@ -42,7 +43,7 @@ writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heigh /** * Write data, assume src pixels are swizzled and but not mipmaplevel */ -inline std::vector +std::vector writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) { std::vector Result; @@ -84,7 +85,7 @@ writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heig /** * Write data, assume compressed (DXTCn) format */ -inline std::vector +std::vector writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blockWidth, size_t heightInBlock, size_t blockHeight, size_t blockSize, size_t mipmapCount) { std::vector Result; @@ -117,7 +118,7 @@ writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blo /** * Write 16 bytes pixel textures, assume src pixels are swizzled and but not mipmaplevel */ -inline std::vector +std::vector write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) { std::vector Result; @@ -158,7 +159,7 @@ write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t h /** * Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel */ -inline std::vector +std::vector write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) { std::vector Result; @@ -196,7 +197,7 @@ write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t he /** * Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel */ -inline std::vector +std::vector write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount) { std::vector Result; @@ -231,255 +232,117 @@ write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t return Result; } - -size_t getPlacedTextureStorageSpace(const rsx::texture &texture, size_t rowPitchAlignement) +/** + * A texture is stored as an array of blocks, where a block is a pixel for standard texture + * but is a structure containing several pixels for compressed format + */ +size_t get_texture_block_size(u32 format) noexcept { - size_t w = texture.width(), h = texture.height(); - - size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel; - int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - switch (format) { + case CELL_GCM_TEXTURE_B8: return 1; + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: return 2; + case CELL_GCM_TEXTURE_A8R8G8B8: return 4; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return 8; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return 16; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return 16; + case CELL_GCM_TEXTURE_G8B8: return 2; + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return 4; + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_X16: return 2; + case CELL_GCM_TEXTURE_Y16_X16: return 4; + case CELL_GCM_TEXTURE_R5G5B5A1: return 2; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return 8; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return 16; + case CELL_GCM_TEXTURE_X32_FLOAT: return 4; + case CELL_GCM_TEXTURE_D1R5G5B5: return 2; + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_D8R8G8B8: + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 4; case CELL_GCM_TEXTURE_COMPRESSED_HILO8: case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: default: LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); - break; - case CELL_GCM_TEXTURE_B8: - blockSizeInByte = 1; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_A1R5G5B5: - blockSizeInByte = 2; - blockHeightInPixel = 1, blockWidthInPixel = 1; - break; - case CELL_GCM_TEXTURE_A4R4G4B4: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_R5G6B5: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_A8R8G8B8: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - blockSizeInByte = 8; - blockWidthInPixel = 4, blockHeightInPixel = 4; - break; - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - blockSizeInByte = 16; - blockWidthInPixel = 4, blockHeightInPixel = 4; - break; - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - blockSizeInByte = 16; - blockWidthInPixel = 4, blockHeightInPixel = 4; - break; - case CELL_GCM_TEXTURE_G8B8: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_R6G5B5: - // Not native - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_DEPTH24_D8: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_DEPTH16: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_X16: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_Y16_X16: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_R5G5B5A1: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - blockSizeInByte = 8; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - blockSizeInByte = 16; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_X32_FLOAT: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_D1R5G5B5: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_D8R8G8B8: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - blockSizeInByte = 4; - blockWidthInPixel = 2, blockHeightInPixel = 2; - break; - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - blockSizeInByte = 4; - blockWidthInPixel = 2, blockHeightInPixel = 2; - break; + return 0; } +} - size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel; - size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel; +size_t get_texture_block_edge(u32 format) noexcept +{ + switch (format) + { + case CELL_GCM_TEXTURE_B8: + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_A8R8G8B8: return 1; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return 4; + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + case CELL_GCM_TEXTURE_X32_FLOAT: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_D8R8G8B8: return 1; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 2; + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + default: + LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); + return 0; + } +} +} + + +size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement) noexcept +{ + size_t w = texture.width(), h = texture.height(); + + int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + size_t blockEdge = get_texture_block_edge(format); + size_t blockSizeInByte = get_texture_block_size(format); + + size_t heightInBlocks = (h + blockEdge - 1) / blockEdge; + size_t widthInBlocks = (w + blockEdge - 1) / blockEdge; size_t rowPitch = align(blockSizeInByte * widthInBlocks, rowPitchAlignement); return rowPitch * heightInBlocks * 2; // * 2 for mipmap levels } -std::vector uploadPlacedTexture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData) +std::vector upload_placed_texture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData) noexcept { size_t w = texture.width(), h = texture.height(); int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel; - switch (format) - { - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - default: - LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); - break; - case CELL_GCM_TEXTURE_B8: - blockSizeInByte = 1; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_A1R5G5B5: - blockSizeInByte = 2; - blockHeightInPixel = 1, blockWidthInPixel = 1; - break; - case CELL_GCM_TEXTURE_A4R4G4B4: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_R5G6B5: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_A8R8G8B8: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - blockSizeInByte = 8; - blockWidthInPixel = 4, blockHeightInPixel = 4; - break; - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - blockSizeInByte = 16; - blockWidthInPixel = 4, blockHeightInPixel = 4; - break; - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - blockSizeInByte = 16; - blockWidthInPixel = 4, blockHeightInPixel = 4; - break; - case CELL_GCM_TEXTURE_G8B8: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_R6G5B5: - // Not native - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_DEPTH24_D8: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_DEPTH16: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_X16: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_Y16_X16: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_R5G5B5A1: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - blockSizeInByte = 8; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - blockSizeInByte = 16; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_X32_FLOAT: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_D1R5G5B5: - blockSizeInByte = 2; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_D8R8G8B8: - blockSizeInByte = 4; - blockWidthInPixel = 1, blockHeightInPixel = 1; - break; - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - blockSizeInByte = 4; - blockWidthInPixel = 2, blockHeightInPixel = 2; - break; - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - blockSizeInByte = 4; - blockWidthInPixel = 2, blockHeightInPixel = 2; - break; - } + size_t blockSizeInByte = get_texture_block_size(format); + size_t blockEdge = get_texture_block_edge(format); - size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel; - size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel; + size_t heightInBlocks = (h + blockEdge - 1) / blockEdge; + size_t widthInBlocks = (w + blockEdge - 1) / blockEdge; std::vector mipInfos; @@ -505,8 +368,76 @@ std::vector uploadPlacedTexture(const rsx::texture &texture, si case CELL_GCM_TEXTURE_COMPRESSED_DXT1: case CELL_GCM_TEXTURE_COMPRESSED_DXT23: case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - return writeCompressedTexel((char*)pixels, (char*)textureData, widthInBlocks, blockWidthInPixel, heightInBlocks, blockHeightInPixel, blockSizeInByte, texture.mipmap()); + return writeCompressedTexel((char*)pixels, (char*)textureData, widthInBlocks, blockEdge, heightInBlocks, blockEdge, blockSizeInByte, texture.mipmap()); default: return writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, blockSizeInByte, texture.mipmap()); } } + +size_t get_texture_size(const rsx::texture &texture) noexcept +{ + size_t w = texture.width(), h = texture.height(); + + int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + // TODO: Take mipmaps into account + switch (format) + { + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + default: + LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); + return 0; + case CELL_GCM_TEXTURE_B8: + return w * h; + case CELL_GCM_TEXTURE_A1R5G5B5: + return w * h * 2; + case CELL_GCM_TEXTURE_A4R4G4B4: + return w * h * 2; + case CELL_GCM_TEXTURE_R5G6B5: + return w * h * 2; + case CELL_GCM_TEXTURE_A8R8G8B8: + return w * h * 4; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + return w * h / 6; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + return w * h / 4; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + return w * h / 4; + case CELL_GCM_TEXTURE_G8B8: + return w * h * 2; + case CELL_GCM_TEXTURE_R6G5B5: + return w * h * 2; + case CELL_GCM_TEXTURE_DEPTH24_D8: + return w * h * 4; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return w * h * 4; + case CELL_GCM_TEXTURE_DEPTH16: + return w * h * 2; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return w * h * 2; + case CELL_GCM_TEXTURE_X16: + return w * h * 2; + case CELL_GCM_TEXTURE_Y16_X16: + return w * h * 4; + case CELL_GCM_TEXTURE_R5G5B5A1: + return w * h * 2; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + return w * h * 8; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + return w * h * 16; + case CELL_GCM_TEXTURE_X32_FLOAT: + return w * h * 4; + case CELL_GCM_TEXTURE_D1R5G5B5: + return w * h * 2; + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + return w * h * 4; + case CELL_GCM_TEXTURE_D8R8G8B8: + return w * h * 4; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + return w * h * 4; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + return w * h * 4; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 4ed2981bfd..a4fc2240a1 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -14,11 +14,16 @@ struct MipmapLevelInfo * Get size to store texture in a linear fashion. * Storage is assumed to use a rowPitchAlignement boundary for every row of texture. */ -size_t getPlacedTextureStorageSpace(const rsx::texture &texture, size_t rowPitchAlignement); +size_t get_placed_texture_storage_size(const rsx::texture &texture, size_t rowPitchAlignement) noexcept; /** * Write texture data to textureData. * Data are not packed, they are stored per rows using rowPitchAlignement. * Similarly, offset for every mipmaplevel is aligned to rowPitchAlignement boundary. */ -std::vector uploadPlacedTexture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData); \ No newline at end of file +std::vector upload_placed_texture(const rsx::texture &texture, size_t rowPitchAlignement, void* textureData) noexcept; + +/** +* Get number of bytes occupied by texture in RSX mem +*/ +size_t get_texture_size(const rsx::texture &texture) noexcept; \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h deleted file mode 100644 index 833f3648a0..0000000000 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ /dev/null @@ -1,281 +0,0 @@ -#pragma once - -#include -#include -#include -#include "Utilities/Log.h" -#include "Emu/Memory/vm.h" -#include "Emu/RSX/GCM.h" - -using namespace Microsoft::WRL; - -#define SAFE_RELEASE(x) if (x) x->Release(); - -// From DX12 D3D11On12 Sample (MIT Licensed) -inline void ThrowIfFailed(HRESULT hr) -{ - if (FAILED(hr)) - { - throw; - } -} - -/** - * Send data to dst pointer without polluting cache. - * Usefull to write to mapped memory from upload heap. - */ -inline -void streamToBuffer(void* dst, void* src, size_t sizeInBytes) -{ -#pragma omp parallel for - for (int i = 0; i < sizeInBytes / 16; i++) - { - const __m128i &srcPtr = _mm_loadu_si128((__m128i*) ((char*)src + i * 16)); - _mm_stream_si128((__m128i*)((char*)dst + i * 16), srcPtr); - } -} - -/** -* copy src to dst pointer without polluting cache. -* Usefull to write to mapped memory from upload heap. -*/ -inline -void streamBuffer(void* dst, void* src, size_t sizeInBytes) -{ - // Assume 64 bytes cache line - int offset = 0; - bool isAligned = !((size_t)src & 15); - #pragma omp parallel for - for (offset = 0; offset < sizeInBytes - 64; offset += 64) - { - char *line = (char*)src + offset; - char *dstline = (char*)dst + offset; - // prefetch next line - _mm_prefetch(line + 16, _MM_HINT_NTA); - __m128i srcPtr = isAligned ? _mm_load_si128((__m128i *)line) : _mm_loadu_si128((__m128i *)line); - _mm_stream_si128((__m128i*)dstline, srcPtr); - srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 16)) : _mm_loadu_si128((__m128i *)(line + 16)); - _mm_stream_si128((__m128i*)(dstline + 16), srcPtr); - srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 32)) : _mm_loadu_si128((__m128i *)(line + 32)); - _mm_stream_si128((__m128i*)(dstline + 32), srcPtr); - srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 48)) : _mm_loadu_si128((__m128i *)(line + 48)); - _mm_stream_si128((__m128i*)(dstline + 48), srcPtr); - } - memcpy((char*)dst + offset, (char*)src + offset, sizeInBytes - offset); -} - -/** - * Convert GCM blend operator code to D3D12 one - */ -inline D3D12_BLEND_OP getBlendOp(u16 op) -{ - switch (op) - { - case CELL_GCM_FUNC_ADD: return D3D12_BLEND_OP_ADD; - case CELL_GCM_FUNC_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; - case CELL_GCM_FUNC_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; - case CELL_GCM_MIN: return D3D12_BLEND_OP_MIN; - case CELL_GCM_MAX: return D3D12_BLEND_OP_MAX; - default: - case CELL_GCM_FUNC_ADD_SIGNED: - case CELL_GCM_FUNC_REVERSE_ADD_SIGNED: - case CELL_GCM_FUNC_REVERSE_SUBTRACT_SIGNED: - LOG_WARNING(RSX, "Unsupported Blend Op %d", op); - return D3D12_BLEND_OP(); - } -} - -/** - * Convert GCM blend factor code to D3D12 one - */ -inline D3D12_BLEND getBlendFactor(u16 factor) -{ - switch (factor) - { - case CELL_GCM_ZERO: return D3D12_BLEND_ZERO; - case CELL_GCM_ONE: return D3D12_BLEND_ONE; - case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; - case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; - case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; - case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; - case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; - case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; - case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_COLOR; - case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; - case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; - default: - case CELL_GCM_CONSTANT_COLOR: - case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: - case CELL_GCM_CONSTANT_ALPHA: - case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: - LOG_WARNING(RSX, "Unsupported Blend Factor %d", factor); - return D3D12_BLEND(); - } -} - - -/** -* Convert GCM blend factor code to D3D12 one for alpha component -*/ -inline D3D12_BLEND getBlendFactorAlpha(u16 factor) -{ - switch (factor) - { - case CELL_GCM_ZERO: return D3D12_BLEND_ZERO; - case CELL_GCM_ONE: return D3D12_BLEND_ONE; - case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_ALPHA; - case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_ALPHA; - case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; - case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; - case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; - case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; - case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_ALPHA; - case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; - case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_INV_DEST_ALPHA; - default: - case CELL_GCM_CONSTANT_COLOR: - case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: - case CELL_GCM_CONSTANT_ALPHA: - case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: - LOG_WARNING(RSX, "Unsupported Blend Factor %d", factor); - return D3D12_BLEND(); - } -} - -/** - * Convert GCM logic op code to D3D12 one - */ -inline D3D12_LOGIC_OP getLogicOp(u32 op) -{ - switch (op) - { - default: - LOG_WARNING(RSX, "Unsupported Logic Op %d", op); - return D3D12_LOGIC_OP(); - case CELL_GCM_CLEAR: return D3D12_LOGIC_OP_CLEAR; - case CELL_GCM_AND: return D3D12_LOGIC_OP_AND; - case CELL_GCM_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; - case CELL_GCM_COPY: return D3D12_LOGIC_OP_COPY; - case CELL_GCM_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; - case CELL_GCM_NOOP: return D3D12_LOGIC_OP_NOOP; - case CELL_GCM_XOR: return D3D12_LOGIC_OP_XOR; - case CELL_GCM_OR: return D3D12_LOGIC_OP_OR; - case CELL_GCM_NOR: return D3D12_LOGIC_OP_NOR; - case CELL_GCM_EQUIV: return D3D12_LOGIC_OP_EQUIV; - case CELL_GCM_INVERT: return D3D12_LOGIC_OP_INVERT; - case CELL_GCM_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; - case CELL_GCM_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; - case CELL_GCM_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; - case CELL_GCM_NAND: return D3D12_LOGIC_OP_NAND; - } -} - -/** - * Convert GCM stencil op code to D3D12 one - */ -inline D3D12_STENCIL_OP getStencilOp(u32 op) -{ - switch (op) - { - case CELL_GCM_KEEP: return D3D12_STENCIL_OP_KEEP; - case CELL_GCM_ZERO: return D3D12_STENCIL_OP_ZERO; - case CELL_GCM_REPLACE: return D3D12_STENCIL_OP_REPLACE; - case CELL_GCM_INCR: return D3D12_STENCIL_OP_INCR; - case CELL_GCM_DECR: return D3D12_STENCIL_OP_DECR; - default: - case CELL_GCM_INCR_WRAP: - case CELL_GCM_DECR_WRAP: - LOG_WARNING(RSX, "Unsupported Stencil Op %d", op); - return D3D12_STENCIL_OP(); - } -} - -/** - * Convert GCM comparison function code to D3D12 one. - */ -inline D3D12_COMPARISON_FUNC getCompareFunc(u32 op) -{ - switch (op) - { - case CELL_GCM_ZERO: - case CELL_GCM_NEVER: return D3D12_COMPARISON_FUNC_NEVER; - case CELL_GCM_LESS: return D3D12_COMPARISON_FUNC_LESS; - case CELL_GCM_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL; - case CELL_GCM_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL; - case CELL_GCM_GREATER: return D3D12_COMPARISON_FUNC_GREATER; - case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; - case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; - case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; - default: - LOG_WARNING(RSX, "Unsupported Compare Function %d", op); - return D3D12_COMPARISON_FUNC(); - } -} - -/** - * Convert GCM texture format to an equivalent one supported by D3D12. - * Destination format may require a byte swap or data conversion. - */ -inline DXGI_FORMAT getTextureDXGIFormat(int format) -{ - switch (format) - { - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - default: - LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); - return DXGI_FORMAT(); - case CELL_GCM_TEXTURE_B8: - return DXGI_FORMAT_R8_UNORM; - case CELL_GCM_TEXTURE_A1R5G5B5: - return DXGI_FORMAT_B5G5R5A1_UNORM; - case CELL_GCM_TEXTURE_A4R4G4B4: - return DXGI_FORMAT_B4G4R4A4_UNORM; - case CELL_GCM_TEXTURE_R5G6B5: - return DXGI_FORMAT_B5G6R5_UNORM; - case CELL_GCM_TEXTURE_A8R8G8B8: - return DXGI_FORMAT_R8G8B8A8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - return DXGI_FORMAT_BC1_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - return DXGI_FORMAT_BC2_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - return DXGI_FORMAT_BC3_UNORM; - case CELL_GCM_TEXTURE_G8B8: - return DXGI_FORMAT_G8R8_G8B8_UNORM; - case CELL_GCM_TEXTURE_R6G5B5: - // Not native - return DXGI_FORMAT_R8G8B8A8_UNORM; - case CELL_GCM_TEXTURE_DEPTH24_D8: - return DXGI_FORMAT_R32_UINT; - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - return DXGI_FORMAT_R32_FLOAT; - case CELL_GCM_TEXTURE_DEPTH16: - return DXGI_FORMAT_R16_UNORM; - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - return DXGI_FORMAT_R16_FLOAT; - case CELL_GCM_TEXTURE_X16: - return DXGI_FORMAT_R16_UNORM; - case CELL_GCM_TEXTURE_Y16_X16: - return DXGI_FORMAT_R16G16_UNORM; - case CELL_GCM_TEXTURE_R5G5B5A1: - return DXGI_FORMAT_B5G5R5A1_UNORM; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return DXGI_FORMAT_R16G16B16A16_FLOAT; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - return DXGI_FORMAT_R32G32B32A32_FLOAT; - case CELL_GCM_TEXTURE_X32_FLOAT: - return DXGI_FORMAT_R32_FLOAT; - case CELL_GCM_TEXTURE_D1R5G5B5: - return DXGI_FORMAT_B5G5R5A1_UNORM; - case CELL_GCM_TEXTURE_D8R8G8B8: - return DXGI_FORMAT_R8G8B8A8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - return DXGI_FORMAT_G8R8_G8B8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - return DXGI_FORMAT_R8G8_B8G8_UNORM; - } -} diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 78c66bc379..1313df4f1f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -1,115 +1,31 @@ #include "stdafx_d3d12.h" #ifdef _WIN32 -#include "D3D12Buffer.h" #include "Utilities/Log.h" #include "D3D12GSRender.h" #include "d3dx12.h" #include "../Common/BufferUtils.h" +#include "D3D12Formats.h" -const int g_vertexCount = 32; - -// Where are these type defined ??? -static -DXGI_FORMAT getFormat(u8 type, u8 size) +namespace { - /*static const u32 gl_types[] = - { - GL_SHORT, - GL_FLOAT, - GL_HALF_FLOAT, - GL_UNSIGNED_BYTE, - GL_SHORT, - GL_FLOAT, // Needs conversion - GL_UNSIGNED_BYTE, - }; - - static const bool gl_normalized[] = - { - GL_TRUE, - GL_FALSE, - GL_FALSE, - GL_TRUE, - GL_FALSE, - GL_TRUE, - GL_FALSE, - };*/ - static const DXGI_FORMAT typeX1[] = - { - DXGI_FORMAT_R16_SNORM, - DXGI_FORMAT_R32_FLOAT, - DXGI_FORMAT_R16_FLOAT, - DXGI_FORMAT_R8_UNORM, - DXGI_FORMAT_R16_SINT, - DXGI_FORMAT_R32_FLOAT, - DXGI_FORMAT_R8_UINT - }; - static const DXGI_FORMAT typeX2[] = - { - DXGI_FORMAT_R16G16_SNORM, - DXGI_FORMAT_R32G32_FLOAT, - DXGI_FORMAT_R16G16_FLOAT, - DXGI_FORMAT_R8G8_UNORM, - DXGI_FORMAT_R16G16_SINT, - DXGI_FORMAT_R32G32_FLOAT, - DXGI_FORMAT_R8G8_UINT - }; - static const DXGI_FORMAT typeX3[] = - { - DXGI_FORMAT_R16G16B16A16_SNORM, - DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R16G16B16A16_FLOAT, - DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R16G16B16A16_SINT, - DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R8G8B8A8_UINT - }; - static const DXGI_FORMAT typeX4[] = - { - DXGI_FORMAT_R16G16B16A16_SNORM, - DXGI_FORMAT_R32G32B32A32_FLOAT, - DXGI_FORMAT_R16G16B16A16_FLOAT, - DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R16G16B16A16_SINT, - DXGI_FORMAT_R32G32B32A32_FLOAT, - DXGI_FORMAT_R8G8B8A8_UINT - }; - - switch (size) - { - case 1: - return typeX1[type]; - case 2: - return typeX2[type]; - case 3: - return typeX3[type]; - case 4: - return typeX4[type]; - default: - LOG_ERROR(RSX, "Wrong size for vertex attrib : %d", size); - return DXGI_FORMAT(); - } -} - -// D3D12GS member handling buffers - - /** * */ -static -D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &vertex_array_desc, const std::vector &vertex_data, ID3D12Device *device, DataHeap &vertexIndexHeap) +D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &vertex_array_desc, const std::vector &vertex_data, ID3D12Device *device, data_heap &vertex_index_heap) { - size_t subBufferSize = vertex_data.size(); - assert(vertexIndexHeap.canAlloc(subBufferSize)); - size_t heapOffset = vertexIndexHeap.alloc(subBufferSize); + size_t buffer_size = vertex_data.size(); + assert(vertex_index_heap.can_alloc(buffer_size)); + size_t heap_offset = vertex_index_heap.alloc(buffer_size); void *buffer; - ThrowIfFailed(vertexIndexHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); - void *bufferMap = (char*)buffer + heapOffset; + ThrowIfFailed(vertex_index_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); + void *bufferMap = (char*)buffer + heap_offset; memcpy(bufferMap, vertex_data.data(), vertex_data.size()); - vertexIndexHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); - return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset; + vertex_index_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + return vertex_index_heap.m_heap->GetGPUVirtualAddress() + heap_offset; +} + } void D3D12GSRender::load_vertex_data(u32 first, u32 count) @@ -122,7 +38,7 @@ void D3D12GSRender::upload_vertex_attributes(const std::vectorMap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); - void *bufferMap = (char*)buffer + heapOffset; + ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; for (const auto &range : vertex_ranges) { - write_vertex_array_data_to_buffer(bufferMap, range.first, range.second, index, info); - bufferMap = (char*)bufferMap + range.second * element_size; + write_vertex_array_data_to_buffer(mapped_buffer, range.first, range.second, index, info); + mapped_buffer = (char*)mapped_buffer + range.second * element_size; } - m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); + m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; - vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset; - vertexBufferView.SizeInBytes = (UINT)subBufferSize; - vertexBufferView.StrideInBytes = (UINT)element_size; - m_vertex_buffer_views.push_back(vertexBufferView); + D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = + { + m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heap_offset, + (UINT)buffer_size, + (UINT)element_size + }; + m_vertex_buffer_views.push_back(vertex_buffer_view); - m_timers.m_bufferUploadSize += subBufferSize; + m_timers.m_bufferUploadSize += buffer_size; D3D12_INPUT_ELEMENT_DESC IAElement = {}; IAElement.SemanticName = "TEXCOORD"; IAElement.SemanticIndex = (UINT)index; - IAElement.InputSlot = (UINT)inputSlot++; - IAElement.Format = getFormat(info.type - 1, info.size); + IAElement.InputSlot = (UINT)input_slot++; + IAElement.Format = get_vertex_attribute_format(info.type, info.size); IAElement.AlignedByteOffset = 0; IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; IAElement.InstanceDataStepRate = 0; @@ -188,27 +106,28 @@ void D3D12GSRender::upload_vertex_attributes(const std::vectorMap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); - void *bufferMap = (char*)buffer + heapOffset; - memcpy(bufferMap, data.data(), data.size()); - m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); + ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; + memcpy(mapped_buffer, data.data(), data.size()); + m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {}; - vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset; - vertexBufferView.SizeInBytes = (UINT)subBufferSize; - vertexBufferView.StrideInBytes = (UINT)element_size; - m_vertex_buffer_views.push_back(vertexBufferView); + D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = { + m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heap_offset, + (UINT)buffer_size, + (UINT)element_size + }; + m_vertex_buffer_views.push_back(vertex_buffer_view); D3D12_INPUT_ELEMENT_DESC IAElement = {}; IAElement.SemanticName = "TEXCOORD"; IAElement.SemanticIndex = (UINT)index; - IAElement.InputSlot = (UINT)inputSlot++; - IAElement.Format = getFormat(info.type - 1, info.size); + IAElement.InputSlot = (UINT)input_slot++; + IAElement.Format = get_vertex_attribute_format(info.type, info.size); IAElement.AlignedByteOffset = 0; IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; IAElement.InstanceDataStepRate = 1; @@ -221,9 +140,9 @@ void D3D12GSRender::load_vertex_index_data(u32 first, u32 count) m_renderingInfo.m_indexed = true; } -void D3D12GSRender::setScaleOffset(size_t descriptorIndex) +void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) { - float scaleOffsetMat[16] = + float scale_offset_matrix[16] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f, 0.0f, 0.0f, @@ -235,52 +154,53 @@ void D3D12GSRender::setScaleOffset(size_t descriptorIndex) int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; // Scale - scaleOffsetMat[0] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f); - scaleOffsetMat[5] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f); - scaleOffsetMat[10] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2]; + scale_offset_matrix[0] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f); + scale_offset_matrix[5] *= (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f); + scale_offset_matrix[10] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2]; // Offset - scaleOffsetMat[3] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f); - scaleOffsetMat[7] = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f)); - scaleOffsetMat[11] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2]; + scale_offset_matrix[3] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f); + scale_offset_matrix[7] = -((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f)); + scale_offset_matrix[11] = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2]; - scaleOffsetMat[3] /= clip_w / 2.f; - scaleOffsetMat[7] /= clip_h / 2.f; + scale_offset_matrix[3] /= clip_w / 2.f; + scale_offset_matrix[7] /= clip_h / 2.f; - assert(m_constantsData.canAlloc(256)); - size_t heapOffset = m_constantsData.alloc(256); + assert(m_constantsData.can_alloc(256)); + size_t heap_offset = m_constantsData.alloc(256); // Scale offset buffer // Separate constant buffer - void *scaleOffsetMap; - ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + 256), &scaleOffsetMap)); - streamToBuffer((char*)scaleOffsetMap + heapOffset, scaleOffsetMat, 16 * sizeof(float)); - int isAlphaTested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]); + void *mapped_buffer; + ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256), &mapped_buffer)); + streamToBuffer((char*)mapped_buffer + heap_offset, scale_offset_matrix, 16 * sizeof(float)); + int is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]); float alpha_ref = (float&)rsx::method_registers[NV4097_SET_ALPHA_REF]; - memcpy((char*)scaleOffsetMap + heapOffset + 16 * sizeof(float), &isAlphaTested, sizeof(int)); - memcpy((char*)scaleOffsetMap + heapOffset + 17 * sizeof(float), &alpha_ref, sizeof(float)); - m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + 256)); + memcpy((char*)mapped_buffer + heap_offset + 16 * sizeof(float), &is_alpha_tested, sizeof(int)); + memcpy((char*)mapped_buffer + heap_offset + 17 * sizeof(float), &alpha_ref, sizeof(float)); + m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256)); - D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; - constantBufferViewDesc.SizeInBytes = (UINT)256; - m_device->CreateConstantBufferView(&constantBufferViewDesc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart()) + D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { + m_constantsData.m_heap->GetGPUVirtualAddress() + heap_offset, + 256 + }; + m_device->CreateConstantBufferView(&constant_buffer_view_desc, + CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) .Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV)); } -void D3D12GSRender::FillVertexShaderConstantsBuffer(size_t descriptorIndex) +void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_index) { for (const auto &entry : transform_constants) local_transform_constants[entry.first] = entry.second; - size_t bufferSize = 512 * 4 * sizeof(float); + size_t buffer_size = 512 * 4 * sizeof(float); - assert(m_constantsData.canAlloc(bufferSize)); - size_t heapOffset = m_constantsData.alloc(bufferSize); + assert(m_constantsData.can_alloc(buffer_size)); + size_t heap_offset = m_constantsData.alloc(buffer_size); - void *constantsBufferMap; - ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize), &constantsBufferMap)); + void *mapped_buffer; + ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer)); for (const auto &entry : local_transform_constants) { float data[4] = { @@ -289,118 +209,101 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer(size_t descriptorIndex) entry.second.z, entry.second.w }; - streamToBuffer((char*)constantsBufferMap + heapOffset + entry.first * 4 * sizeof(float), data, 4 * sizeof(float)); + streamToBuffer((char*)mapped_buffer + heap_offset + entry.first * 4 * sizeof(float), data, 4 * sizeof(float)); } - m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize)); + m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; - constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; - m_device->CreateConstantBufferView(&constantBufferViewDesc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart()) - .Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV)); + D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { + m_constantsData.m_heap->GetGPUVirtualAddress() + heap_offset, + (UINT)buffer_size + }; + m_device->CreateConstantBufferView(&constant_buffer_view_desc, + CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) + .Offset((INT)descriptor_index, g_descriptorStrideSRVCBVUAV)); } -void D3D12GSRender::FillPixelShaderConstantsBuffer(size_t descriptorIndex) +void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_index) { // Get constant from fragment program - const std::vector &fragmentOffset = m_cachePSO.getFragmentConstantOffsetsCache(&fragment_program); - size_t bufferSize = fragmentOffset.size() * 4 * sizeof(float) + 1; + const std::vector &fragment_constant_offsets = m_cachePSO.getFragmentConstantOffsetsCache(&fragment_program); + size_t buffer_size = fragment_constant_offsets.size() * 4 * sizeof(float) + 1; // Multiple of 256 never 0 - bufferSize = (bufferSize + 255) & ~255; + buffer_size = (buffer_size + 255) & ~255; - assert(m_constantsData.canAlloc(bufferSize)); - size_t heapOffset = m_constantsData.alloc(bufferSize); + assert(m_constantsData.can_alloc(buffer_size)); + size_t heap_offset = m_constantsData.alloc(buffer_size); size_t offset = 0; - void *constantsBufferMap; - ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize), &constantsBufferMap)); - for (size_t offsetInFP : fragmentOffset) + void *mapped_buffer; + ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer)); + for (size_t offset_in_fragment_program : fragment_constant_offsets) { u32 vector[4]; - // Is it assigned by color register in command buffer ? - // TODO : we loop every iteration, we might do better... - bool isCommandBufferSetConstant = false; -/* for (const auto& entry : fragment_constants) - { - size_t fragmentId = entry.first - fragment_program.offset; - if (fragmentId == offsetInFP) - { - isCommandBufferSetConstant = true; - vector[0] = (u32&)entry.second.x; - vector[1] = (u32&)entry.second.y; - vector[2] = (u32&)entry.second.z; - vector[3] = (u32&)entry.second.w; - break; - } - }*/ - if (!isCommandBufferSetConstant) - { - auto data = vm::ps3::ptr::make(fragment_program.addr + (u32)offsetInFP); + auto data = vm::ps3::ptr::make(fragment_program.addr + (u32)offset_in_fragment_program); - u32 c0 = (data[0] >> 16 | data[0] << 16); - u32 c1 = (data[1] >> 16 | data[1] << 16); - u32 c2 = (data[2] >> 16 | data[2] << 16); - u32 c3 = (data[3] >> 16 | data[3] << 16); + u32 c0 = (data[0] >> 16 | data[0] << 16); + u32 c1 = (data[1] >> 16 | data[1] << 16); + u32 c2 = (data[2] >> 16 | data[2] << 16); + u32 c3 = (data[3] >> 16 | data[3] << 16); - vector[0] = c0; - vector[1] = c1; - vector[2] = c2; - vector[3] = c3; - } + vector[0] = c0; + vector[1] = c1; + vector[2] = c2; + vector[3] = c3; - streamToBuffer((char*)constantsBufferMap + heapOffset + offset, vector, 4 * sizeof(u32)); + streamToBuffer((char*)mapped_buffer + heap_offset + offset, vector, 4 * sizeof(u32)); offset += 4 * sizeof(u32); } - m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize)); + m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; - constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; - constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; - m_device->CreateConstantBufferView(&constantBufferViewDesc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart()) - .Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV)); + D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { + m_constantsData.m_heap->GetGPUVirtualAddress() + heap_offset, + (UINT)buffer_size + }; + m_device->CreateConstantBufferView(&constant_buffer_view_desc, + CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) + .Offset((INT)descriptor_index, g_descriptorStrideSRVCBVUAV)); } -void D3D12GSRender::upload_vertex_index_data(ID3D12GraphicsCommandList *cmdlist) +void D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list) { // Index count m_renderingInfo.m_count = 0; for (const auto &pair : m_first_count_pairs) - m_renderingInfo.m_count += getIndexCount(draw_mode, pair.second); + m_renderingInfo.m_count += get_index_count(draw_mode, pair.second); if (!m_renderingInfo.m_indexed) { // Non indexed upload_vertex_attributes(m_first_count_pairs); - cmdlist->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); - if (isNativePrimitiveMode(draw_mode)) + command_list->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); + if (is_primitive_native(draw_mode)) return; // Handle non native primitive // Alloc - size_t subBufferSize = align(m_renderingInfo.m_count * sizeof(u16), 64); - assert(m_vertexIndexData.canAlloc(subBufferSize)); - size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); + size_t buffer_size = align(m_renderingInfo.m_count * sizeof(u16), 64); + assert(m_vertexIndexData.can_alloc(buffer_size)); + size_t heap_offset = m_vertexIndexData.alloc(buffer_size); void *buffer; - ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); - void *bufferMap = (char*)buffer + heapOffset; + ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; size_t first = 0; for (const auto &pair : m_first_count_pairs) { - size_t element_count = getIndexCount(draw_mode, pair.second); - write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)bufferMap, draw_mode, first, pair.second); - bufferMap = (char*)bufferMap + element_count * sizeof(u16); + size_t element_count = get_index_count(draw_mode, pair.second); + write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)mapped_buffer, draw_mode, (u32)first, (u32)pair.second); + mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16); first += pair.second; } - m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); - D3D12_INDEX_BUFFER_VIEW indexBufferView = { - m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset, - (UINT)subBufferSize, + m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + D3D12_INDEX_BUFFER_VIEW index_buffer_view = { + m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heap_offset, + (UINT)buffer_size, DXGI_FORMAT_R16_UINT }; - cmdlist->IASetIndexBuffer(&indexBufferView); + command_list->IASetIndexBuffer(&index_buffer_view); m_renderingInfo.m_indexed = true; } else @@ -408,35 +311,35 @@ void D3D12GSRender::upload_vertex_index_data(ID3D12GraphicsCommandList *cmdlist) u32 indexed_type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4; // Index type - size_t indexSize = (indexed_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? 2 : 4; + size_t index_size = get_index_type_size(indexed_type); // Alloc - size_t subBufferSize = align(m_renderingInfo.m_count * indexSize, 64); - assert(m_vertexIndexData.canAlloc(subBufferSize)); - size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); + size_t buffer_size = align(m_renderingInfo.m_count * index_size, 64); + assert(m_vertexIndexData.can_alloc(buffer_size)); + size_t heap_offset = m_vertexIndexData.alloc(buffer_size); void *buffer; - ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); - void *bufferMap = (char*)buffer + heapOffset; + ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; u32 min_index = (u32)-1, max_index = 0; for (const auto &pair : m_first_count_pairs) { - size_t element_count = getIndexCount(draw_mode, pair.second); - write_index_array_data_to_buffer((char*)bufferMap, draw_mode, pair.first, pair.second, min_index, max_index); - bufferMap = (char*)bufferMap + element_count * indexSize; + size_t element_count = get_index_count(draw_mode, pair.second); + write_index_array_data_to_buffer((char*)mapped_buffer, draw_mode, pair.first, pair.second, min_index, max_index); + mapped_buffer = (char*)mapped_buffer + element_count * index_size; } - m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); - D3D12_INDEX_BUFFER_VIEW indexBufferView = { - m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset, - (UINT)subBufferSize, - (indexed_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT + m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + D3D12_INDEX_BUFFER_VIEW index_buffer_view = { + m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heap_offset, + (UINT)buffer_size, + get_index_type(indexed_type) }; - m_timers.m_bufferUploadSize += subBufferSize; - cmdlist->IASetIndexBuffer(&indexBufferView); + m_timers.m_bufferUploadSize += buffer_size; + command_list->IASetIndexBuffer(&index_buffer_view); m_renderingInfo.m_indexed = true; upload_vertex_attributes({ std::make_pair(0, max_index + 1) }); - cmdlist->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); + command_list->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h deleted file mode 100644 index 79287d1f8c..0000000000 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include -#include "Emu/Memory/vm.h" -#include "Emu/RSX/RSXThread.h" - -std::vector getIALayout(ID3D12Device *device, bool indexedDraw, const rsx::data_array_format_info *vertexData); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp index fb8931d58a..e454a75738 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp @@ -71,4 +71,3 @@ std::string compareFunctionImp(COMPARE f, const std::string &Op0, const std::str } } #endif - diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Formats.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Formats.cpp new file mode 100644 index 0000000000..9cd3bf11b9 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12Formats.cpp @@ -0,0 +1,468 @@ +#include "stdafx_d3d12.h" +#ifdef _WIN32 +#include "D3D12Formats.h" +#include "D3D12Utils.h" +#include "Emu/RSX/GCM.h" + + +D3D12_BLEND_OP get_blend_op(u16 op) noexcept +{ + switch (op) + { + case CELL_GCM_FUNC_ADD: return D3D12_BLEND_OP_ADD; + case CELL_GCM_FUNC_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; + case CELL_GCM_FUNC_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; + case CELL_GCM_MIN: return D3D12_BLEND_OP_MIN; + case CELL_GCM_MAX: return D3D12_BLEND_OP_MAX; + case CELL_GCM_FUNC_ADD_SIGNED: + case CELL_GCM_FUNC_REVERSE_ADD_SIGNED: + case CELL_GCM_FUNC_REVERSE_SUBTRACT_SIGNED: + unreachable("Unsupported blend op"); + } + unreachable("Wrong blend op"); +} + +D3D12_BLEND get_blend_factor(u16 factor) noexcept +{ + switch (factor) + { + case CELL_GCM_ZERO: return D3D12_BLEND_ZERO; + case CELL_GCM_ONE: return D3D12_BLEND_ONE; + case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; + case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; + case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; + case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; + case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; + case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; + case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_COLOR; + case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; + case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + case CELL_GCM_CONSTANT_COLOR: + case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: + case CELL_GCM_CONSTANT_ALPHA: + case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: + unreachable("Unsupported blend color factor"); + } + unreachable("Wrong blend color factor"); +} + +D3D12_BLEND get_blend_factor_alpha(u16 factor) noexcept +{ + switch (factor) + { + case CELL_GCM_ZERO: return D3D12_BLEND_ZERO; + case CELL_GCM_ONE: return D3D12_BLEND_ONE; + case CELL_GCM_SRC_COLOR: return D3D12_BLEND_SRC_ALPHA; + case CELL_GCM_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_ALPHA; + case CELL_GCM_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; + case CELL_GCM_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; + case CELL_GCM_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; + case CELL_GCM_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; + case CELL_GCM_DST_COLOR: return D3D12_BLEND_DEST_ALPHA; + case CELL_GCM_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; + case CELL_GCM_SRC_ALPHA_SATURATE: return D3D12_BLEND_INV_DEST_ALPHA; + case CELL_GCM_CONSTANT_COLOR: + case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: + case CELL_GCM_CONSTANT_ALPHA: + case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: + unreachable("Unsupported blend alpha factor"); + } + unreachable("Wrong blend alpha factor"); +} + +/** +* Convert GCM logic op code to D3D12 one +*/ +D3D12_LOGIC_OP get_logic_op(u32 op) noexcept +{ + switch (op) + { + case CELL_GCM_CLEAR: return D3D12_LOGIC_OP_CLEAR; + case CELL_GCM_AND: return D3D12_LOGIC_OP_AND; + case CELL_GCM_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; + case CELL_GCM_COPY: return D3D12_LOGIC_OP_COPY; + case CELL_GCM_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; + case CELL_GCM_NOOP: return D3D12_LOGIC_OP_NOOP; + case CELL_GCM_XOR: return D3D12_LOGIC_OP_XOR; + case CELL_GCM_OR: return D3D12_LOGIC_OP_OR; + case CELL_GCM_NOR: return D3D12_LOGIC_OP_NOR; + case CELL_GCM_EQUIV: return D3D12_LOGIC_OP_EQUIV; + case CELL_GCM_INVERT: return D3D12_LOGIC_OP_INVERT; + case CELL_GCM_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; + case CELL_GCM_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; + case CELL_GCM_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; + case CELL_GCM_NAND: return D3D12_LOGIC_OP_NAND; + } + unreachable("Wrong logic op"); +} + +/** +* Convert GCM stencil op code to D3D12 one +*/ +D3D12_STENCIL_OP get_stencil_op(u32 op) noexcept +{ + switch (op) + { + case CELL_GCM_KEEP: return D3D12_STENCIL_OP_KEEP; + case CELL_GCM_ZERO: return D3D12_STENCIL_OP_ZERO; + case CELL_GCM_REPLACE: return D3D12_STENCIL_OP_REPLACE; + case CELL_GCM_INCR: return D3D12_STENCIL_OP_INCR; + case CELL_GCM_DECR: return D3D12_STENCIL_OP_DECR; + case CELL_GCM_INCR_WRAP: + case CELL_GCM_DECR_WRAP: + unreachable("Unsupported Stencil Op %d"); + } + unreachable("Wrong Stencil Op %d"); +} + +D3D12_COMPARISON_FUNC get_compare_func(u32 op) noexcept +{ + switch (op) + { + case CELL_GCM_ZERO: + case CELL_GCM_NEVER: return D3D12_COMPARISON_FUNC_NEVER; + case CELL_GCM_LESS: return D3D12_COMPARISON_FUNC_LESS; + case CELL_GCM_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL; + case CELL_GCM_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case CELL_GCM_GREATER: return D3D12_COMPARISON_FUNC_GREATER; + case CELL_GCM_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case CELL_GCM_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case CELL_GCM_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; + } + unreachable("Wrong compare function"); +} + +DXGI_FORMAT get_texture_format(int format) noexcept +{ + switch (format) + { + case CELL_GCM_TEXTURE_B8: + return DXGI_FORMAT_R8_UNORM; + case CELL_GCM_TEXTURE_A1R5G5B5: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case CELL_GCM_TEXTURE_A4R4G4B4: + return DXGI_FORMAT_B4G4R4A4_UNORM; + case CELL_GCM_TEXTURE_R5G6B5: + return DXGI_FORMAT_B5G6R5_UNORM; + case CELL_GCM_TEXTURE_A8R8G8B8: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + return DXGI_FORMAT_BC1_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + return DXGI_FORMAT_BC2_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + return DXGI_FORMAT_BC3_UNORM; + case CELL_GCM_TEXTURE_G8B8: + return DXGI_FORMAT_G8R8_G8B8_UNORM; + case CELL_GCM_TEXTURE_R6G5B5: + // Not native + return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_TEXTURE_DEPTH24_D8: + return DXGI_FORMAT_R32_UINT; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + case CELL_GCM_TEXTURE_DEPTH16: + return DXGI_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return DXGI_FORMAT_R16_FLOAT; + case CELL_GCM_TEXTURE_X16: + return DXGI_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_Y16_X16: + return DXGI_FORMAT_R16G16_UNORM; + case CELL_GCM_TEXTURE_R5G5B5A1: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + case CELL_GCM_TEXTURE_X32_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + case CELL_GCM_TEXTURE_D1R5G5B5: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case CELL_GCM_TEXTURE_D8R8G8B8: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + return DXGI_FORMAT_G8R8_G8B8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + return DXGI_FORMAT_R8G8_B8G8_UNORM; + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + unreachable( "Unimplemented Texture format"); + } + unreachable("Wrong Texture format"); +} + +UINT get_texture_max_aniso(u8 aniso) noexcept +{ + switch (aniso) + { + case CELL_GCM_TEXTURE_MAX_ANISO_1: return 1; + case CELL_GCM_TEXTURE_MAX_ANISO_2: return 2; + case CELL_GCM_TEXTURE_MAX_ANISO_4: return 4; + case CELL_GCM_TEXTURE_MAX_ANISO_6: return 6; + case CELL_GCM_TEXTURE_MAX_ANISO_8: return 8; + case CELL_GCM_TEXTURE_MAX_ANISO_10: return 10; + case CELL_GCM_TEXTURE_MAX_ANISO_12: return 12; + case CELL_GCM_TEXTURE_MAX_ANISO_16: return 16; + } + unreachable("Wrong Texture max aniso"); +} + +D3D12_TEXTURE_ADDRESS_MODE get_texture_wrap_mode(u8 wrap) noexcept +{ + switch (wrap) + { + case CELL_GCM_TEXTURE_WRAP: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + case CELL_GCM_TEXTURE_MIRROR: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + case CELL_GCM_TEXTURE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + case CELL_GCM_TEXTURE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER; + case CELL_GCM_TEXTURE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; + case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; + } + unreachable("Wrong texture wrap mode"); +} + +namespace +{ + void get_min_filter(u8 min_filter, D3D12_FILTER_TYPE &min, D3D12_FILTER_TYPE &mip) noexcept + { + switch (min_filter) + { + case CELL_GCM_TEXTURE_NEAREST: + min = D3D12_FILTER_TYPE_POINT; + mip = D3D12_FILTER_TYPE_POINT; + return;; + case CELL_GCM_TEXTURE_LINEAR: + min = D3D12_FILTER_TYPE_LINEAR; + mip = D3D12_FILTER_TYPE_POINT; + return; + case CELL_GCM_TEXTURE_NEAREST_NEAREST: + min = D3D12_FILTER_TYPE_POINT; + mip = D3D12_FILTER_TYPE_POINT; + return; + case CELL_GCM_TEXTURE_LINEAR_NEAREST: + min = D3D12_FILTER_TYPE_LINEAR; + mip = D3D12_FILTER_TYPE_POINT; + return; + case CELL_GCM_TEXTURE_NEAREST_LINEAR: + min = D3D12_FILTER_TYPE_POINT; + mip = D3D12_FILTER_TYPE_LINEAR; + return; + case CELL_GCM_TEXTURE_LINEAR_LINEAR: + min = D3D12_FILTER_TYPE_LINEAR; + mip = D3D12_FILTER_TYPE_LINEAR; + return; + case CELL_GCM_TEXTURE_CONVOLUTION_MIN: + unreachable("Unsupported min filter"); + } + unreachable("Wrong min filter"); + } + + D3D12_FILTER_TYPE get_mag_filter(u8 mag_filter) noexcept + { + switch (mag_filter) + { + case CELL_GCM_TEXTURE_NEAREST: return D3D12_FILTER_TYPE_POINT; + case CELL_GCM_TEXTURE_LINEAR: return D3D12_FILTER_TYPE_LINEAR; + } + unreachable("Wrong mag filter"); + } +} + +D3D12_FILTER get_texture_filter(u8 min_filter, u8 mag_filter) noexcept +{ + D3D12_FILTER_TYPE min, mip; + get_min_filter(min_filter, min, mip); + D3D12_FILTER_TYPE mag = get_mag_filter(mag_filter); + return D3D12_ENCODE_BASIC_FILTER(min, mag, mip, D3D12_FILTER_REDUCTION_TYPE_STANDARD); +} + +D3D12_PRIMITIVE_TOPOLOGY get_primitive_topology(u8 draw_mode) noexcept +{ + switch (draw_mode) + { + case CELL_GCM_PRIMITIVE_POINTS: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + case CELL_GCM_PRIMITIVE_LINES: return D3D_PRIMITIVE_TOPOLOGY_LINELIST; + case CELL_GCM_PRIMITIVE_LINE_LOOP: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; + case CELL_GCM_PRIMITIVE_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + case CELL_GCM_PRIMITIVE_TRIANGLES: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + // Emulated + case CELL_GCM_PRIMITIVE_TRIANGLE_FAN: + case CELL_GCM_PRIMITIVE_QUADS: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + + case CELL_GCM_PRIMITIVE_QUAD_STRIP: + case CELL_GCM_PRIMITIVE_POLYGON: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + } + unreachable("Wrong draw mode"); +} +D3D12_PRIMITIVE_TOPOLOGY_TYPE get_primitive_topology_type(u8 draw_mode) noexcept +{ + switch (draw_mode) + { + case CELL_GCM_PRIMITIVE_POINTS: return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + case CELL_GCM_PRIMITIVE_LINES: + case CELL_GCM_PRIMITIVE_LINE_LOOP: + case CELL_GCM_PRIMITIVE_LINE_STRIP: return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + case CELL_GCM_PRIMITIVE_TRIANGLES: + case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP: + case CELL_GCM_PRIMITIVE_TRIANGLE_FAN: return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + case CELL_GCM_PRIMITIVE_QUADS: + // unsupported + case CELL_GCM_PRIMITIVE_QUAD_STRIP: + case CELL_GCM_PRIMITIVE_POLYGON: return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + } + unreachable("Wrong draw mode"); +} + +DXGI_FORMAT get_color_surface_format(u8 format) noexcept +{ + switch (format) + { + case CELL_GCM_SURFACE_A8R8G8B8: return DXGI_FORMAT_R8G8B8A8_UNORM; + case CELL_GCM_SURFACE_F_W16Z16Y16X16: return DXGI_FORMAT_R16G16B16A16_FLOAT; + } + unreachable("Wrong color surface format"); +} + +DXGI_FORMAT get_depth_stencil_surface_format(u8 format) noexcept +{ + switch (format) + { + case CELL_GCM_SURFACE_Z16: return DXGI_FORMAT_D16_UNORM; + case CELL_GCM_SURFACE_Z24S8: return DXGI_FORMAT_D24_UNORM_S8_UINT; + } + unreachable("Wrong depth stencil surface format"); +} + +DXGI_FORMAT get_depth_stencil_surface_clear_format(u8 format) noexcept +{ + switch (format) + { + case CELL_GCM_SURFACE_Z16: return DXGI_FORMAT_D16_UNORM; + case CELL_GCM_SURFACE_Z24S8: return DXGI_FORMAT_D24_UNORM_S8_UINT; + } + unreachable("Wrong depth stencil surface format"); +} + +DXGI_FORMAT get_depth_typeless_surface_format(u8 format) noexcept +{ + switch (format) + { + case CELL_GCM_SURFACE_Z16: return DXGI_FORMAT_R16_TYPELESS; + case CELL_GCM_SURFACE_Z24S8: return DXGI_FORMAT_R24G8_TYPELESS; + } + unreachable("Wrong depth stencil surface format"); +} + +BOOL get_front_face_ccw(u32 set_front_face_value) noexcept +{ + switch (set_front_face_value) + { + case CELL_GCM_CW: return FALSE; + default: // Disgaea 3 pass some garbage value at startup, this is needed to survive. + case CELL_GCM_CCW: return TRUE; + } + unreachable("Wrong front face value"); +} + +DXGI_FORMAT get_index_type(u8 index_type) noexcept +{ + switch (index_type) + { + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: return DXGI_FORMAT_R16_UINT; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: return DXGI_FORMAT_R32_UINT; + } + unreachable("Wrong index type"); +} + +DXGI_FORMAT get_vertex_attribute_format(u8 type, u8 size) noexcept +{ + switch (type) + { + case CELL_GCM_VERTEX_S1: + { + switch (size) + { + case 1: return DXGI_FORMAT_R16_SNORM; + case 2: return DXGI_FORMAT_R16G16_SNORM; + case 3: return DXGI_FORMAT_R16G16B16A16_SNORM; // No 3 channel type + case 4: return DXGI_FORMAT_R16G16B16A16_SNORM; + } + unreachable("Wrong type size"); + } + case CELL_GCM_VERTEX_F: + { + switch (size) + { + case 1: return DXGI_FORMAT_R32_FLOAT; + case 2: return DXGI_FORMAT_R32G32_FLOAT; + case 3: return DXGI_FORMAT_R32G32B32_FLOAT; + case 4: return DXGI_FORMAT_R32G32B32A32_FLOAT; + } + unreachable("Wrong type size"); + } + case CELL_GCM_VERTEX_SF: + { + switch (size) + { + case 1: return DXGI_FORMAT_R16_FLOAT; + case 2: return DXGI_FORMAT_R16G16_FLOAT; + case 3: return DXGI_FORMAT_R16G16B16A16_FLOAT; // No 3 channel type + case 4: return DXGI_FORMAT_R16G16B16A16_FLOAT; + } + unreachable("Wrong type size"); + } + case CELL_GCM_VERTEX_UB: + { + switch (size) + { + case 1: return DXGI_FORMAT_R8_UNORM; + case 2: return DXGI_FORMAT_R8G8_UNORM; + case 3: return DXGI_FORMAT_R8G8B8A8_UNORM; // No 3 channel type + case 4: return DXGI_FORMAT_R8G8B8A8_UNORM; + } + unreachable("Wrong type size"); + } + case CELL_GCM_VERTEX_S32K: + { + switch (size) + { + case 1: return DXGI_FORMAT_R16_SINT; + case 2: return DXGI_FORMAT_R16G16_SINT; + case 3: return DXGI_FORMAT_R16G16B16A16_SINT; // No 3 channel type + case 4: return DXGI_FORMAT_R16G16B16A16_SINT; + } + unreachable("Wrong type size"); + } + case CELL_GCM_VERTEX_CMP: + { + switch (size) + { + case 1: return DXGI_FORMAT_R32_FLOAT; + case 2: return DXGI_FORMAT_R32G32_FLOAT; + case 3: return DXGI_FORMAT_R32G32B32_FLOAT; + case 4: return DXGI_FORMAT_R32G32B32A32_FLOAT; + } + unreachable("Wrong type size"); + } + case CELL_GCM_VERTEX_UB256: + { + switch (size) + { + case 1: return DXGI_FORMAT_R8_UINT; + case 2: return DXGI_FORMAT_R8G8_UINT; + case 3: return DXGI_FORMAT_R8G8B8A8_UINT; // No 3 channel type + case 4: return DXGI_FORMAT_R8G8B8A8_UINT; + } + unreachable("Wrong type size"); + } + } + unreachable("Wrong type"); +} +#endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Formats.h b/rpcs3/Emu/RSX/D3D12/D3D12Formats.h new file mode 100644 index 0000000000..37fd9f6aee --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12Formats.h @@ -0,0 +1,99 @@ +#pragma once + +#include + +/** + * Convert GCM blend operator code to D3D12 one + */ +D3D12_BLEND_OP get_blend_op(u16 op) noexcept; + +/** + * Convert GCM blend factor code to D3D12 one + */ +D3D12_BLEND get_blend_factor(u16 factor) noexcept; + +/** + * Convert GCM blend factor code to D3D12 one for alpha component + */ +D3D12_BLEND get_blend_factor_alpha(u16 factor) noexcept; + +/** +* Convert GCM logic op code to D3D12 one +*/ +D3D12_LOGIC_OP get_logic_op(u32 op) noexcept; + +/** + * Convert GCM stencil op code to D3D12 one + */ +D3D12_STENCIL_OP get_stencil_op(u32 op) noexcept; + +/** + * Convert GCM comparison function code to D3D12 one. + */ +D3D12_COMPARISON_FUNC get_compare_func(u32 op) noexcept; + +/** + * Convert GCM texture format to an equivalent one supported by D3D12. + * Destination format may require a byte swap or data conversion. + */ +DXGI_FORMAT get_texture_format(int format) noexcept; + +/** + * Convert texture aniso value to UINT. + */ +UINT get_texture_max_aniso(u8 aniso) noexcept; + +/** + * Convert texture wrap mode to D3D12_TEXTURE_ADDRESS_MODE + */ +D3D12_TEXTURE_ADDRESS_MODE get_texture_wrap_mode(u8 wrap) noexcept; + +/** + * Convert minify and magnify filter to D3D12_FILTER + */ +D3D12_FILTER get_texture_filter(u8 min_filter, u8 mag_filter) noexcept; + +/** + * Convert draw mode to D3D12_PRIMITIVE_TOPOLOGY + */ +D3D12_PRIMITIVE_TOPOLOGY get_primitive_topology(u8 draw_mode) noexcept; + +/** +* Convert draw mode to D3D12_PRIMITIVE_TOPOLOGY_TYPE +*/ +D3D12_PRIMITIVE_TOPOLOGY_TYPE get_primitive_topology_type(u8 draw_mode) noexcept; + +/** + * Convert color surface format to DXGI_FORMAT + */ +DXGI_FORMAT get_color_surface_format(u8 format) noexcept; + +/** + * Convert depth stencil surface format to DXGI_FORMAT + */ +DXGI_FORMAT get_depth_stencil_surface_format(u8 format) noexcept; + +/** + *Convert depth stencil surface format to DXGI_FORMAT suited for clear value + */ +DXGI_FORMAT get_depth_stencil_surface_clear_format(u8 format) noexcept; + +/** + * Convert depth surface format to DXGI_FORMAT using typeless for stencil + */ +DXGI_FORMAT get_depth_typeless_surface_format(u8 format) noexcept; + +/** + * Convert front face value to bool value telling wheter front face is counterclockwise or not + */ +BOOL get_front_face_ccw(u32 set_front_face_value) noexcept; + +/** + * Convert index type to DXGI_FORMAT + */ +DXGI_FORMAT get_index_type(u8 index_type) noexcept; + +/** + * Convert vertex attribute format and size to DXGI_FORMAT + */ +DXGI_FORMAT get_vertex_attribute_format(u8 type, u8 size) noexcept; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index c26f86c2ff..4ad6251a59 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -62,6 +62,7 @@ void D3D12FragmentDecompiler::insertIntputs(std::stringstream & OS) OS << " float4 tc6 : TEXCOORD6;" << std::endl; OS << " float4 tc7 : TEXCOORD7;" << std::endl; OS << " float4 tc8 : TEXCOORD8;" << std::endl; + OS << " float4 tc9 : TEXCOORD9;" << std::endl; OS << "};" << std::endl; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index f9c7cabd9a..a8bfbaf469 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -9,6 +9,7 @@ #include "d3dx12.h" #include #include "Emu/state.h" +#include "D3D12Formats.h" #pragma comment(lib, "d2d1") #pragma comment(lib, "DXGI") #pragma comment(lib, "Dwrite") @@ -18,87 +19,46 @@ PFN_D3D12_GET_DEBUG_INTERFACE wrapD3D12GetDebugInterface; PFN_D3D12_SERIALIZE_ROOT_SIGNATURE wrapD3D12SerializeRootSignature; PFN_D3D11ON12_CREATE_DEVICE wrapD3D11On12CreateDevice; -static HMODULE D3D12Module; -static HMODULE D3D11Module; +namespace +{ +HMODULE D3D12Module; +HMODULE D3D11Module; -static void loadD3D12FunctionPointers() +void loadD3D12FunctionPointers() { D3D12Module = LoadLibrary(L"d3d12.dll"); + if (!D3D12Module) + unreachable("Failed to load d3d12.dll"); wrapD3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)GetProcAddress(D3D12Module, "D3D12CreateDevice"); wrapD3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(D3D12Module, "D3D12GetDebugInterface"); wrapD3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)GetProcAddress(D3D12Module, "D3D12SerializeRootSignature"); D3D11Module = LoadLibrary(L"d3d11.dll"); + if (!D3D11Module) + unreachable("Failed to load d3d11.dll"); wrapD3D11On12CreateDevice = (PFN_D3D11ON12_CREATE_DEVICE)GetProcAddress(D3D11Module, "D3D11On12CreateDevice"); } -static void unloadD3D12FunctionPointers() +void unloadD3D12FunctionPointers() { FreeLibrary(D3D12Module); FreeLibrary(D3D11Module); } -void D3D12GSRender::ResourceStorage::Reset() +/** + * Wait until command queue has completed all task. + */ +void wait_for_command_queue(ID3D12Device *device, ID3D12CommandQueue *command_queue) { - m_descriptorsHeapIndex = 0; - m_currentSamplerIndex = 0; - m_samplerDescriptorHeapIndex = 0; - - ThrowIfFailed(m_commandAllocator->Reset()); - setNewCommandList(); - - m_singleFrameLifetimeResources.clear(); + ComPtr fence; + ThrowIfFailed(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf()))); + HANDLE handle = CreateEventEx(nullptr, FALSE, FALSE, EVENT_ALL_ACCESS); + fence->SetEventOnCompletion(1, handle); + command_queue->Signal(fence.Get(), 1); + WaitForSingleObjectEx(handle, INFINITE, FALSE); + CloseHandle(handle); } - -void D3D12GSRender::ResourceStorage::setNewCommandList() -{ - ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), nullptr)); } -void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device) -{ - m_inUse = false; - m_device = device; - m_RAMFramebuffer = nullptr; - // Create a global command allocator - ThrowIfFailed(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(m_commandAllocator.GetAddressOf()))); - - ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator.Get(), nullptr, IID_PPV_ARGS(m_commandList.GetAddressOf()))); - ThrowIfFailed(m_commandList->Close()); - - D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 10000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; - ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_descriptorsHeap))); - - D3D12_DESCRIPTOR_HEAP_DESC samplerHeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER , 2048, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; - ThrowIfFailed(device->CreateDescriptorHeap(&samplerHeapDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap[0]))); - ThrowIfFailed(device->CreateDescriptorHeap(&samplerHeapDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap[1]))); - - m_frameFinishedHandle = CreateEventEx(nullptr, FALSE, FALSE, EVENT_ALL_ACCESS); - m_fenceValue = 0; - ThrowIfFailed(device->CreateFence(m_fenceValue++, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(m_frameFinishedFence.GetAddressOf()))); -} - -void D3D12GSRender::ResourceStorage::WaitAndClean() -{ - if (m_inUse) - WaitForSingleObjectEx(m_frameFinishedHandle, INFINITE, FALSE); - else - ThrowIfFailed(m_commandList->Close()); - - Reset(); - - m_dirtyTextures.clear(); - - m_RAMFramebuffer = nullptr; -} - -void D3D12GSRender::ResourceStorage::Release() -{ - m_dirtyTextures.clear(); - // NOTE: Should be released only after gfx pipeline last command has been finished. - CloseHandle(m_frameFinishedHandle); -} - - void D3D12GSRender::Shader::Release() { m_PSO->Release(); @@ -113,7 +73,7 @@ extern std::function gfxHandler; bool D3D12GSRender::invalidateAddress(u32 addr) { bool result = false; - result |= m_textureCache.invalidateAddress(addr); + result |= m_textureCache.invalidate_address(addr); return result; } @@ -148,18 +108,16 @@ D3D12GSRender::D3D12GSRender() debugInterface->EnableDebugLayer(); } - Microsoft::WRL::ComPtr dxgiFactory; - ThrowIfFailed(CreateDXGIFactory(IID_PPV_ARGS(&dxgiFactory))); + Microsoft::WRL::ComPtr dxgi_factory; + ThrowIfFailed(CreateDXGIFactory(IID_PPV_ARGS(&dxgi_factory))); // Create adapter ComPtr adaptater = nullptr; - ThrowIfFailed(dxgiFactory->EnumAdapters(rpcs3::state.config.rsx.d3d12.adaptater.value(), adaptater.GetAddressOf())); + ThrowIfFailed(dxgi_factory->EnumAdapters(rpcs3::state.config.rsx.d3d12.adaptater.value(), adaptater.GetAddressOf())); ThrowIfFailed(wrapD3D12CreateDevice(adaptater.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); // Queues - D3D12_COMMAND_QUEUE_DESC copyQueueDesc = {}, graphicQueueDesc = {}; - copyQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY; - graphicQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - ThrowIfFailed(m_device->CreateCommandQueue(&graphicQueueDesc, IID_PPV_ARGS(m_commandQueueGraphic.GetAddressOf()))); + D3D12_COMMAND_QUEUE_DESC graphic_queue_desc = { D3D12_COMMAND_LIST_TYPE_DIRECT }; + ThrowIfFailed(m_device->CreateCommandQueue(&graphic_queue_desc, IID_PPV_ARGS(m_commandQueueGraphic.GetAddressOf()))); g_descriptorStrideSRVCBVUAV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); g_descriptorStrideDSV = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); @@ -167,31 +125,31 @@ D3D12GSRender::D3D12GSRender() g_descriptorStrideSamplers = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); // Create swap chain and put them in a descriptor heap as rendertarget - DXGI_SWAP_CHAIN_DESC swapChain = {}; - swapChain.BufferCount = 2; - swapChain.Windowed = true; - swapChain.OutputWindow = (HWND)m_frame->handle(); - swapChain.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - swapChain.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - swapChain.SampleDesc.Count = 1; - swapChain.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; - swapChain.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + DXGI_SWAP_CHAIN_DESC swap_chain = {}; + swap_chain.BufferCount = 2; + swap_chain.Windowed = true; + swap_chain.OutputWindow = (HWND)m_frame->handle(); + swap_chain.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swap_chain.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain.SampleDesc.Count = 1; + swap_chain.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + swap_chain.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; - ThrowIfFailed(dxgiFactory->CreateSwapChain(m_commandQueueGraphic.Get(), &swapChain, (IDXGISwapChain**)m_swapChain.GetAddressOf())); + ThrowIfFailed(dxgi_factory->CreateSwapChain(m_commandQueueGraphic.Get(), &swap_chain, (IDXGISwapChain**)m_swapChain.GetAddressOf())); m_swapChain->GetBuffer(0, IID_PPV_ARGS(&m_backBuffer[0])); m_swapChain->GetBuffer(1, IID_PPV_ARGS(&m_backBuffer[1])); - D3D12_DESCRIPTOR_HEAP_DESC heapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1}; - D3D12_RENDER_TARGET_VIEW_DESC rttDesc = {}; - rttDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rttDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[0])); - m_device->CreateRenderTargetView(m_backBuffer[0].Get(), &rttDesc, m_backbufferAsRendertarget[0]->GetCPUDescriptorHandleForHeapStart()); - m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_backbufferAsRendertarget[1])); - m_device->CreateRenderTargetView(m_backBuffer[1].Get(), &rttDesc, m_backbufferAsRendertarget[1]->GetCPUDescriptorHandleForHeapStart()); + D3D12_DESCRIPTOR_HEAP_DESC render_target_descriptor_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1}; + D3D12_RENDER_TARGET_VIEW_DESC renter_target_view_desc = {}; + renter_target_view_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + renter_target_view_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_device->CreateDescriptorHeap(&render_target_descriptor_heap_desc, IID_PPV_ARGS(&m_backbufferAsRendertarget[0])); + m_device->CreateRenderTargetView(m_backBuffer[0].Get(), &renter_target_view_desc, m_backbufferAsRendertarget[0]->GetCPUDescriptorHandleForHeapStart()); + m_device->CreateDescriptorHeap(&render_target_descriptor_heap_desc, IID_PPV_ARGS(&m_backbufferAsRendertarget[1])); + m_device->CreateRenderTargetView(m_backBuffer[1].Get(), &renter_target_view_desc, m_backbufferAsRendertarget[1]->GetCPUDescriptorHandleForHeapStart()); // Common root signatures - for (unsigned textureCount = 0; textureCount < 17; textureCount++) + for (unsigned texture_count = 0; texture_count < 17; texture_count++) { CD3DX12_DESCRIPTOR_RANGE descriptorRange[] = { @@ -200,30 +158,30 @@ D3D12GSRender::D3D12GSRender() // Constants CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 2, 1), // Textures - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, textureCount, 0), + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_count, 0), // Samplers - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, textureCount, 0), + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, texture_count, 0), }; CD3DX12_ROOT_PARAMETER RP[2]; - RP[0].InitAsDescriptorTable((textureCount > 0) ? 3 : 2, &descriptorRange[0]); + RP[0].InitAsDescriptorTable((texture_count > 0) ? 3 : 2, &descriptorRange[0]); RP[1].InitAsDescriptorTable(1, &descriptorRange[3]); Microsoft::WRL::ComPtr rootSignatureBlob; Microsoft::WRL::ComPtr errorBlob; ThrowIfFailed(wrapD3D12SerializeRootSignature( - &CD3DX12_ROOT_SIGNATURE_DESC((textureCount > 0) ? 2 : 1, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), + &CD3DX12_ROOT_SIGNATURE_DESC((texture_count > 0) ? 2 : 1, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); m_device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferSize(), - IID_PPV_ARGS(m_rootSignatures[textureCount].GetAddressOf())); + IID_PPV_ARGS(m_rootSignatures[texture_count].GetAddressOf())); } - m_perFrameStorage[0].Init(m_device.Get()); - m_perFrameStorage[0].Reset(); - m_perFrameStorage[1].Init(m_device.Get()); - m_perFrameStorage[1].Reset(); + m_perFrameStorage[0].init(m_device.Get()); + m_perFrameStorage[0].reset(); + m_perFrameStorage[1].init(m_device.Get()); + m_perFrameStorage[1].reset(); initConvertShader(); m_outputScalingPass.Init(m_device.Get(), m_commandQueueGraphic.Get()); @@ -238,51 +196,41 @@ D3D12GSRender::D3D12GSRender() IID_PPV_ARGS(&m_dummyTexture)) ); - m_readbackResources.Init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); - m_UAVHeap.Init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES); + m_readbackResources.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); + m_UAVHeap.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES); m_rtts.Init(m_device.Get()); - m_constantsData.Init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); - m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); - m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 512, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); + m_constantsData.init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); + m_vertexIndexData.init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); + m_textureUploadData.init(m_device.Get(), 1024 * 1024 * 512, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); if (rpcs3::config.rsx.d3d12.overlay.value()) - InitD2DStructures(); + init_d2d_structures(); } D3D12GSRender::~D3D12GSRender() { - // wait until queue has completed - ComPtr fence; - ThrowIfFailed(m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf()))); - HANDLE handle = CreateEventEx(nullptr, FALSE, FALSE, EVENT_ALL_ACCESS); - fence->SetEventOnCompletion(1, handle); + wait_for_command_queue(m_device.Get(), m_commandQueueGraphic.Get()); - m_commandQueueGraphic->Signal(fence.Get(), 1); - WaitForSingleObjectEx(handle, INFINITE, FALSE); - CloseHandle(handle); - - { - m_textureCache.unprotedAll(); - } + m_textureCache.unprotect_all(); gfxHandler = [this](u32) { return false; }; - m_constantsData.Release(); - m_vertexIndexData.Release(); - m_textureUploadData.Release(); + m_constantsData.release(); + m_vertexIndexData.release(); + m_textureUploadData.release(); m_UAVHeap.m_heap->Release(); m_readbackResources.m_heap->Release(); m_texturesRTTs.clear(); m_dummyTexture->Release(); m_convertPSO->Release(); m_convertRootSignature->Release(); - m_perFrameStorage[0].Release(); - m_perFrameStorage[1].Release(); + m_perFrameStorage[0].release(); + m_perFrameStorage[1].release(); m_rtts.Release(); m_outputScalingPass.Release(); - ReleaseD2DStructures(); + release_d2d_structures(); } void D3D12GSRender::onexit_thread() @@ -297,10 +245,10 @@ bool D3D12GSRender::domethod(u32 cmd, u32 arg) clear_surface(arg); return true; case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: - semaphore_PGRAPH_texture_read_release(); + copy_render_target_to_dma_location(); return false; //call rsx::thread method implementation case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: - semaphore_PGRAPH_backend_release(); + copy_render_target_to_dma_location(); return false; //call rsx::thread method implementation default: @@ -308,15 +256,66 @@ bool D3D12GSRender::domethod(u32 cmd, u32 arg) } } +namespace +{ + UINT get_num_rtt(u8 color_target) noexcept + { + switch (color_target) + { + case CELL_GCM_SURFACE_TARGET_NONE: return 0; + case CELL_GCM_SURFACE_TARGET_0: + case CELL_GCM_SURFACE_TARGET_1: return 1; + case CELL_GCM_SURFACE_TARGET_MRT1: return 2; + case CELL_GCM_SURFACE_TARGET_MRT2: return 3; + case CELL_GCM_SURFACE_TARGET_MRT3: return 4; + } + unreachable("Wrong color target"); + } + + std::vector get_rtt_indexes(u8 color_target) noexcept + { + switch (color_target) + { + case CELL_GCM_SURFACE_TARGET_NONE: return{}; + case CELL_GCM_SURFACE_TARGET_0: return{ 0 }; + case CELL_GCM_SURFACE_TARGET_1: return{ 1 }; + case CELL_GCM_SURFACE_TARGET_MRT1: return{ 0, 1 }; + case CELL_GCM_SURFACE_TARGET_MRT2: return{ 0, 1, 2 }; + case CELL_GCM_SURFACE_TARGET_MRT3: return{ 0, 1, 2, 3 }; + } + unreachable("Wrong color target"); + } + + std::array get_clear_color(u32 clear_color) noexcept + { + u8 clear_a = clear_color >> 24; + u8 clear_r = clear_color >> 16; + u8 clear_g = clear_color >> 8; + u8 clear_b = clear_color; + return + { + clear_r / 255.0f, + clear_g / 255.0f, + clear_b / 255.0f, + clear_a / 255.0f + }; + } + + u8 get_clear_stencil(u32 register_value) noexcept + { + return register_value & 0xff; + } +} + void D3D12GSRender::clear_surface(u32 arg) { - std::chrono::time_point startDuration = std::chrono::system_clock::now(); + std::chrono::time_point start_duration = std::chrono::system_clock::now(); - std::chrono::time_point rttDurationStart = std::chrono::system_clock::now(); - PrepareRenderTargets(getCurrentResourceStorage().m_commandList.Get()); + std::chrono::time_point rtt_duration_start = std::chrono::system_clock::now(); + prepare_render_targets(getCurrentResourceStorage().command_list.Get()); - std::chrono::time_point rttDurationEnd = std::chrono::system_clock::now(); - m_timers.m_rttDuration += std::chrono::duration_cast(rttDurationEnd - rttDurationStart).count(); + std::chrono::time_point rtt_duration_end = std::chrono::system_clock::now(); + m_timers.m_rttDuration += std::chrono::duration_cast(rtt_duration_end - rtt_duration_start).count(); /* if (m_set_color_mask) { @@ -335,202 +334,115 @@ void D3D12GSRender::clear_surface(u32 arg) { u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8; u32 max_depth_value = m_surface.depth_format == CELL_GCM_SURFACE_Z16 ? 0x0000ffff : 0x00ffffff; - getCurrentResourceStorage().m_commandList->ClearDepthStencilView(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_DEPTH, clear_depth / (float)max_depth_value, 0, 0, nullptr); + getCurrentResourceStorage().command_list->ClearDepthStencilView(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_DEPTH, clear_depth / (float)max_depth_value, 0, 0, nullptr); } if (arg & 0x2) - { - u8 clear_stencil = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] & 0xff; - getCurrentResourceStorage().m_commandList->ClearDepthStencilView(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_STENCIL, 0.f, clear_stencil, 0, nullptr); - } + getCurrentResourceStorage().command_list->ClearDepthStencilView(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_STENCIL, 0.f, + get_clear_stencil(rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE]), 0, nullptr); if (arg & 0xF0) { - u32 clear_color = rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]; - u8 clear_a = clear_color >> 24; - u8 clear_r = clear_color >> 16; - u8 clear_g = clear_color >> 8; - u8 clear_b = clear_color; - float clearColor[] = - { - clear_r / 255.0f, - clear_g / 255.0f, - clear_b / 255.0f, - clear_a / 255.0f - }; - - size_t g_RTTIncrement = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - switch (u32 color_target = rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]) - { - case CELL_GCM_SURFACE_TARGET_NONE: break; - - case CELL_GCM_SURFACE_TARGET_0: - case CELL_GCM_SURFACE_TARGET_1: - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()), clearColor, 0, nullptr); - break; - case CELL_GCM_SURFACE_TARGET_MRT1: - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()), clearColor, 0, nullptr); - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()).Offset(1, g_descriptorStrideRTV), clearColor, 0, nullptr); - break; - case CELL_GCM_SURFACE_TARGET_MRT2: - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()), clearColor, 0, nullptr); - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()).Offset(1, g_descriptorStrideRTV), clearColor, 0, nullptr); - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()).Offset(2, g_descriptorStrideRTV), clearColor, 0, nullptr); - break; - case CELL_GCM_SURFACE_TARGET_MRT3: - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()), clearColor, 0, nullptr); - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()).Offset(1, g_descriptorStrideRTV), clearColor, 0, nullptr); - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()).Offset(2, g_descriptorStrideRTV), clearColor, 0, nullptr); - getCurrentResourceStorage().m_commandList->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()).Offset(3, g_descriptorStrideRTV), clearColor, 0, nullptr); - break; - default: - LOG_ERROR(RSX, "Bad surface color target: %d", color_target); - } + for (u8 i : get_rtt_indexes(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) + getCurrentResourceStorage().command_list->ClearRenderTargetView(CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()).Offset(i, g_descriptorStrideRTV), + get_clear_color(rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]).data(), 0, nullptr); } - std::chrono::time_point endDuration = std::chrono::system_clock::now(); - m_timers.m_drawCallDuration += std::chrono::duration_cast(endDuration - startDuration).count(); + std::chrono::time_point end_duration = std::chrono::system_clock::now(); + m_timers.m_drawCallDuration += std::chrono::duration_cast(end_duration - start_duration).count(); m_timers.m_drawCallCount++; if (rpcs3::config.rsx.d3d12.debug_output.value()) { - ThrowIfFailed(getCurrentResourceStorage().m_commandList->Close()); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().m_commandList.GetAddressOf()); - getCurrentResourceStorage().setNewCommandList(); + ThrowIfFailed(getCurrentResourceStorage().command_list->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf()); + getCurrentResourceStorage().set_new_command_list(); } } void D3D12GSRender::end() { - std::chrono::time_point startDuration = std::chrono::system_clock::now(); + std::chrono::time_point start_duration = std::chrono::system_clock::now(); - std::chrono::time_point rttDurationStart = std::chrono::system_clock::now(); - PrepareRenderTargets(getCurrentResourceStorage().m_commandList.Get()); + std::chrono::time_point rtt_duration_start = std::chrono::system_clock::now(); + prepare_render_targets(getCurrentResourceStorage().command_list.Get()); - std::chrono::time_point rttDurationEnd = std::chrono::system_clock::now(); - m_timers.m_rttDuration += std::chrono::duration_cast(rttDurationEnd - rttDurationStart).count(); + std::chrono::time_point rtt_duration_end = std::chrono::system_clock::now(); + m_timers.m_rttDuration += std::chrono::duration_cast(rtt_duration_end - rtt_duration_start).count(); - std::chrono::time_point vertexIndexDurationStart = std::chrono::system_clock::now(); + std::chrono::time_point vertex_index_duration_start = std::chrono::system_clock::now(); if (!vertex_index_array.empty() || vertex_draw_count) - upload_vertex_index_data(getCurrentResourceStorage().m_commandList.Get()); + upload_and_set_vertex_index_data(getCurrentResourceStorage().command_list.Get()); - std::chrono::time_point vertexIndexDurationEnd = std::chrono::system_clock::now(); - m_timers.m_vertexIndexDuration += std::chrono::duration_cast(vertexIndexDurationEnd - vertexIndexDurationStart).count(); + std::chrono::time_point vertex_index_duration_end = std::chrono::system_clock::now(); + m_timers.m_vertexIndexDuration += std::chrono::duration_cast(vertex_index_duration_end - vertex_index_duration_start).count(); - std::chrono::time_point programLoadStart = std::chrono::system_clock::now(); - if (!LoadProgram()) + std::chrono::time_point program_load_start = std::chrono::system_clock::now(); + if (!load_program()) { LOG_ERROR(RSX, "LoadProgram failed."); Emu.Pause(); return; } - std::chrono::time_point programLoadEnd = std::chrono::system_clock::now(); - m_timers.m_programLoadDuration += std::chrono::duration_cast(programLoadEnd - programLoadStart).count(); + std::chrono::time_point program_load_end = std::chrono::system_clock::now(); + m_timers.m_programLoadDuration += std::chrono::duration_cast(program_load_end - program_load_start).count(); - getCurrentResourceStorage().m_commandList->SetGraphicsRootSignature(m_rootSignatures[std::get<2>(*m_PSO)].Get()); - getCurrentResourceStorage().m_commandList->OMSetStencilRef(rsx::method_registers[NV4097_SET_STENCIL_FUNC_REF]); + getCurrentResourceStorage().command_list->SetGraphicsRootSignature(m_rootSignatures[std::get<2>(*m_PSO)].Get()); + getCurrentResourceStorage().command_list->OMSetStencilRef(rsx::method_registers[NV4097_SET_STENCIL_FUNC_REF]); - std::chrono::time_point constantsDurationStart = std::chrono::system_clock::now(); + std::chrono::time_point constants_duration_start = std::chrono::system_clock::now(); - size_t currentDescriptorIndex = getCurrentResourceStorage().m_descriptorsHeapIndex; + size_t currentDescriptorIndex = getCurrentResourceStorage().descriptors_heap_index; // Constants - setScaleOffset(currentDescriptorIndex); - FillVertexShaderConstantsBuffer(currentDescriptorIndex + 1); - FillPixelShaderConstantsBuffer(currentDescriptorIndex + 2); + upload_and_bind_scale_offset_matrix(currentDescriptorIndex); + upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1); + upload_and_bind_fragment_shader_constants(currentDescriptorIndex + 2); - std::chrono::time_point constantsDurationEnd = std::chrono::system_clock::now(); - m_timers.m_constantsDuration += std::chrono::duration_cast(constantsDurationEnd - constantsDurationStart).count(); + std::chrono::time_point constants_duration_end = std::chrono::system_clock::now(); + m_timers.m_constantsDuration += std::chrono::duration_cast(constants_duration_end - constants_duration_start).count(); - getCurrentResourceStorage().m_commandList->SetPipelineState(std::get<0>(*m_PSO)); + getCurrentResourceStorage().command_list->SetPipelineState(std::get<0>(*m_PSO)); - std::chrono::time_point textureDurationStart = std::chrono::system_clock::now(); + std::chrono::time_point texture_duration_start = std::chrono::system_clock::now(); if (std::get<2>(*m_PSO) > 0) { - size_t usedTexture = UploadTextures(getCurrentResourceStorage().m_commandList.Get(), currentDescriptorIndex + 3); - - // Fill empty slots - for (; usedTexture < std::get<2>(*m_PSO); usedTexture++) - { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); - m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart()) - .Offset((INT)currentDescriptorIndex + 3 + (INT)usedTexture, g_descriptorStrideSRVCBVUAV) - ); - - D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - m_device->CreateSampler(&samplerDesc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetCPUDescriptorHandleForHeapStart()) - .Offset((INT)getCurrentResourceStorage().m_currentSamplerIndex + (INT)usedTexture, g_descriptorStrideSamplers) - ); - } + upload_and_bind_textures(getCurrentResourceStorage().command_list.Get(), currentDescriptorIndex + 3, std::get<2>(*m_PSO) > 0); ID3D12DescriptorHeap *descriptors[] = { - getCurrentResourceStorage().m_descriptorsHeap.Get(), - getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex].Get(), + getCurrentResourceStorage().descriptors_heap.Get(), + getCurrentResourceStorage().sampler_descriptor_heap[getCurrentResourceStorage().sampler_descriptors_heap_index].Get(), }; - getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(2, descriptors); + getCurrentResourceStorage().command_list->SetDescriptorHeaps(2, descriptors); - getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(0, - CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetGPUDescriptorHandleForHeapStart()) + getCurrentResourceStorage().command_list->SetGraphicsRootDescriptorTable(0, + CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetGPUDescriptorHandleForHeapStart()) .Offset((INT)currentDescriptorIndex, g_descriptorStrideSRVCBVUAV) ); - getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(1, - CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetGPUDescriptorHandleForHeapStart()) - .Offset((INT)getCurrentResourceStorage().m_currentSamplerIndex, g_descriptorStrideSamplers) + getCurrentResourceStorage().command_list->SetGraphicsRootDescriptorTable(1, + CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().sampler_descriptor_heap[getCurrentResourceStorage().sampler_descriptors_heap_index]->GetGPUDescriptorHandleForHeapStart()) + .Offset((INT)getCurrentResourceStorage().current_sampler_index, g_descriptorStrideSamplers) ); - getCurrentResourceStorage().m_currentSamplerIndex += usedTexture; - getCurrentResourceStorage().m_descriptorsHeapIndex += usedTexture + 3; + getCurrentResourceStorage().current_sampler_index += std::get<2>(*m_PSO); + getCurrentResourceStorage().descriptors_heap_index += std::get<2>(*m_PSO) + 3; } else { - getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_descriptorsHeap.GetAddressOf()); - getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(0, - CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetGPUDescriptorHandleForHeapStart()) + getCurrentResourceStorage().command_list->SetDescriptorHeaps(1, getCurrentResourceStorage().descriptors_heap.GetAddressOf()); + getCurrentResourceStorage().command_list->SetGraphicsRootDescriptorTable(0, + CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetGPUDescriptorHandleForHeapStart()) .Offset((INT)currentDescriptorIndex, g_descriptorStrideSRVCBVUAV) ); - getCurrentResourceStorage().m_descriptorsHeapIndex += 3; + getCurrentResourceStorage().descriptors_heap_index += 3; } - std::chrono::time_point textureDurationEnd = std::chrono::system_clock::now(); - m_timers.m_textureDuration += std::chrono::duration_cast(textureDurationEnd - textureDurationStart).count(); + std::chrono::time_point texture_duration_end = std::chrono::system_clock::now(); + m_timers.m_textureDuration += std::chrono::duration_cast(texture_duration_end - texture_duration_start).count(); - size_t numRTT; - switch (u32 color_target = rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]) - { - case CELL_GCM_SURFACE_TARGET_NONE: break; - case CELL_GCM_SURFACE_TARGET_0: - case CELL_GCM_SURFACE_TARGET_1: - numRTT = 1; - break; - case CELL_GCM_SURFACE_TARGET_MRT1: - numRTT = 2; - break; - case CELL_GCM_SURFACE_TARGET_MRT2: - numRTT = 3; - break; - case CELL_GCM_SURFACE_TARGET_MRT3: - numRTT = 4; - break; - default: - LOG_ERROR(RSX, "Bad surface color target: %d", color_target); - } - - getCurrentResourceStorage().m_commandList->OMSetRenderTargets((UINT)numRTT, &m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(), true, + size_t num_rtt = get_num_rtt(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]); + getCurrentResourceStorage().command_list->OMSetRenderTargets((UINT)num_rtt, &m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(), true, &CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart())); int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; @@ -545,7 +457,7 @@ void D3D12GSRender::end() -1.f, 1.f }; - getCurrentResourceStorage().m_commandList->RSSetViewports(1, &viewport); + getCurrentResourceStorage().command_list->RSSetViewports(1, &viewport); D3D12_RECT box = { @@ -554,65 +466,36 @@ void D3D12GSRender::end() (LONG)clip_w, (LONG)clip_h, }; - getCurrentResourceStorage().m_commandList->RSSetScissorRects(1, &box); + getCurrentResourceStorage().command_list->RSSetScissorRects(1, &box); - switch (draw_mode) - { - case CELL_GCM_PRIMITIVE_POINTS: - getCurrentResourceStorage().m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_POINTLIST); - break; - case CELL_GCM_PRIMITIVE_LINES: - getCurrentResourceStorage().m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_LINELIST); - break; - case CELL_GCM_PRIMITIVE_LINE_LOOP: - getCurrentResourceStorage().m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ); - break; - case CELL_GCM_PRIMITIVE_LINE_STRIP: - getCurrentResourceStorage().m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_LINESTRIP); - break; - case CELL_GCM_PRIMITIVE_TRIANGLES: - getCurrentResourceStorage().m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - break; - case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP: - getCurrentResourceStorage().m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - break; - case CELL_GCM_PRIMITIVE_TRIANGLE_FAN: - case CELL_GCM_PRIMITIVE_QUADS: - getCurrentResourceStorage().m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - break; - case CELL_GCM_PRIMITIVE_QUAD_STRIP: - case CELL_GCM_PRIMITIVE_POLYGON: - default: - getCurrentResourceStorage().m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - LOG_ERROR(RSX, "Unsupported primitive type"); - break; - } + getCurrentResourceStorage().command_list->IASetPrimitiveTopology(get_primitive_topology(draw_mode)); if (m_renderingInfo.m_indexed) - getCurrentResourceStorage().m_commandList->DrawIndexedInstanced((UINT)m_renderingInfo.m_count, 1, 0, 0, 0); + getCurrentResourceStorage().command_list->DrawIndexedInstanced((UINT)m_renderingInfo.m_count, 1, 0, 0, 0); else - getCurrentResourceStorage().m_commandList->DrawInstanced((UINT)m_renderingInfo.m_count, 1, 0, 0); + getCurrentResourceStorage().command_list->DrawInstanced((UINT)m_renderingInfo.m_count, 1, 0, 0); vertex_index_array.clear(); - std::chrono::time_point endDuration = std::chrono::system_clock::now(); - m_timers.m_drawCallDuration += std::chrono::duration_cast(endDuration - startDuration).count(); + std::chrono::time_point end_duration = std::chrono::system_clock::now(); + m_timers.m_drawCallDuration += std::chrono::duration_cast(end_duration - start_duration).count(); m_timers.m_drawCallCount++; if (rpcs3::config.rsx.d3d12.debug_output.value()) { - ThrowIfFailed(getCurrentResourceStorage().m_commandList->Close()); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().m_commandList.GetAddressOf()); - getCurrentResourceStorage().setNewCommandList(); + ThrowIfFailed(getCurrentResourceStorage().command_list->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf()); + getCurrentResourceStorage().set_new_command_list(); } m_first_count_pairs.clear(); m_renderingInfo.m_indexed = false; thread::end(); } -static bool -isFlipSurfaceInLocalMemory(u32 surfaceColorTarget) +namespace { - switch (surfaceColorTarget) +bool is_flip_surface_in_global_memory(u32 color_target) +{ + switch (color_target) { case CELL_GCM_SURFACE_TARGET_0: case CELL_GCM_SURFACE_TARGET_1: @@ -621,44 +504,45 @@ isFlipSurfaceInLocalMemory(u32 surfaceColorTarget) case CELL_GCM_SURFACE_TARGET_MRT3: return true; case CELL_GCM_SURFACE_TARGET_NONE: - default: return false; } + unreachable("Wrong color target"); +} } void D3D12GSRender::flip(int buffer) { - ID3D12Resource *resourceToFlip; + ID3D12Resource *resource_to_flip; float viewport_w, viewport_h; - if (!isFlipSurfaceInLocalMemory(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) + if (!is_flip_surface_in_global_memory(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) { - ResourceStorage &storage = getCurrentResourceStorage(); - assert(storage.m_RAMFramebuffer == nullptr); + resource_storage &storage = getCurrentResourceStorage(); + assert(storage.ram_framebuffer == nullptr); - size_t w = 0, h = 0, rowPitch = 0; + size_t w = 0, h = 0, row_pitch = 0; size_t offset = 0; if (false) { - CellGcmDisplayInfo* buffers;// = vm::ps3::_ptr(m_gcm_buffers_addr); + CellGcmDisplayInfo* buffers = nullptr;// = vm::ps3::_ptr(m_gcm_buffers_addr); u32 addr = rsx::get_address(gcm_buffers[gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL); w = gcm_buffers[gcm_current_buffer].width; h = gcm_buffers[gcm_current_buffer].height; u8 *src_buffer = vm::ps3::_ptr(addr); - rowPitch = align(w * 4, 256); - size_t textureSize = rowPitch * h; // * 4 for mipmap levels - assert(m_textureUploadData.canAlloc(textureSize)); - size_t heapOffset = m_textureUploadData.alloc(textureSize); + row_pitch = align(w * 4, 256); + size_t texture_size = row_pitch * h; // * 4 for mipmap levels + assert(m_textureUploadData.can_alloc(texture_size)); + size_t heap_offset = m_textureUploadData.alloc(texture_size); void *buffer; - ThrowIfFailed(m_textureUploadData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize), &buffer)); - void *dstBuffer = (char*)buffer + heapOffset; + ThrowIfFailed(m_textureUploadData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + texture_size), &buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; for (unsigned row = 0; row < h; row++) - memcpy((char*)dstBuffer + row * rowPitch, (char*)src_buffer + row * w * 4, w * 4); - m_textureUploadData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize)); - offset = heapOffset; + memcpy((char*)mapped_buffer + row * row_pitch, (char*)src_buffer + row * w * 4, w * 4); + m_textureUploadData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + texture_size)); + offset = heap_offset; } ThrowIfFailed( @@ -668,24 +552,24 @@ void D3D12GSRender::flip(int buffer) &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, 1), D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(storage.m_RAMFramebuffer.GetAddressOf()) + IID_PPV_ARGS(storage.ram_framebuffer.GetAddressOf()) ) ); - getCurrentResourceStorage().m_commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(storage.m_RAMFramebuffer.Get(), 0), 0, 0, 0, - &CD3DX12_TEXTURE_COPY_LOCATION(m_textureUploadData.m_heap, { offset, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)rowPitch} }), nullptr); + getCurrentResourceStorage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(storage.ram_framebuffer.Get(), 0), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(m_textureUploadData.m_heap, { offset, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)row_pitch } }), nullptr); - getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(storage.m_RAMFramebuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); - resourceToFlip = storage.m_RAMFramebuffer.Get(); + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(storage.ram_framebuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); + resource_to_flip = storage.ram_framebuffer.Get(); viewport_w = (float)w, viewport_h = (float)h; } else { if (m_rtts.m_currentlyBoundRenderTargets[0] != nullptr) - getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.m_currentlyBoundRenderTargets[0], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ)); - resourceToFlip = m_rtts.m_currentlyBoundRenderTargets[0]; + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.m_currentlyBoundRenderTargets[0], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ)); + resource_to_flip = m_rtts.m_currentlyBoundRenderTargets[0]; } - getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET)); + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET)); D3D12_VIEWPORT viewport = { @@ -696,7 +580,7 @@ void D3D12GSRender::flip(int buffer) 0.f, 1.f }; - getCurrentResourceStorage().m_commandList->RSSetViewports(1, &viewport); + getCurrentResourceStorage().command_list->RSSetViewports(1, &viewport); D3D12_RECT box = { @@ -705,113 +589,113 @@ void D3D12GSRender::flip(int buffer) (LONG)m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]->GetDesc().Width, (LONG)m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()]->GetDesc().Height, }; - getCurrentResourceStorage().m_commandList->RSSetScissorRects(1, &box); - getCurrentResourceStorage().m_commandList->SetGraphicsRootSignature(m_outputScalingPass.m_rootSignature); - getCurrentResourceStorage().m_commandList->SetPipelineState(m_outputScalingPass.m_PSO); + getCurrentResourceStorage().command_list->RSSetScissorRects(1, &box); + getCurrentResourceStorage().command_list->SetGraphicsRootSignature(m_outputScalingPass.m_rootSignature); + getCurrentResourceStorage().command_list->SetPipelineState(m_outputScalingPass.m_PSO); - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + D3D12_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = {}; // FIXME: Not always true - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Texture2D.MipLevels = 1; - if (isFlipSurfaceInLocalMemory(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + shader_resource_view_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + shader_resource_view_desc.Texture2D.MipLevels = 1; + if (is_flip_surface_in_global_memory(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) + shader_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; else - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + shader_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0 ); - m_device->CreateShaderResourceView(resourceToFlip, &srvDesc, + m_device->CreateShaderResourceView(resource_to_flip, &shader_resource_view_desc, CD3DX12_CPU_DESCRIPTOR_HANDLE(m_outputScalingPass.m_textureDescriptorHeap->GetCPUDescriptorHandleForHeapStart()).Offset(m_swapChain->GetCurrentBackBufferIndex(), g_descriptorStrideSRVCBVUAV)); - D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; - samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - m_device->CreateSampler(&samplerDesc, + D3D12_SAMPLER_DESC sampler_desc = {}; + sampler_desc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + m_device->CreateSampler(&sampler_desc, CD3DX12_CPU_DESCRIPTOR_HANDLE(m_outputScalingPass.m_samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart()).Offset(m_swapChain->GetCurrentBackBufferIndex(), g_descriptorStrideSamplers)); - getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_textureDescriptorHeap); - getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(0, + getCurrentResourceStorage().command_list->SetDescriptorHeaps(1, &m_outputScalingPass.m_textureDescriptorHeap); + getCurrentResourceStorage().command_list->SetGraphicsRootDescriptorTable(0, CD3DX12_GPU_DESCRIPTOR_HANDLE(m_outputScalingPass.m_textureDescriptorHeap->GetGPUDescriptorHandleForHeapStart()).Offset(m_swapChain->GetCurrentBackBufferIndex(), g_descriptorStrideSRVCBVUAV)); - getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, &m_outputScalingPass.m_samplerDescriptorHeap); - getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(1, + getCurrentResourceStorage().command_list->SetDescriptorHeaps(1, &m_outputScalingPass.m_samplerDescriptorHeap); + getCurrentResourceStorage().command_list->SetGraphicsRootDescriptorTable(1, CD3DX12_GPU_DESCRIPTOR_HANDLE(m_outputScalingPass.m_samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart()).Offset(m_swapChain->GetCurrentBackBufferIndex(), g_descriptorStrideSamplers)); - getCurrentResourceStorage().m_commandList->OMSetRenderTargets(1, + getCurrentResourceStorage().command_list->OMSetRenderTargets(1, &CD3DX12_CPU_DESCRIPTOR_HANDLE(m_backbufferAsRendertarget[m_swapChain->GetCurrentBackBufferIndex()]->GetCPUDescriptorHandleForHeapStart()), true, nullptr); - D3D12_VERTEX_BUFFER_VIEW vbv = {}; - vbv.BufferLocation = m_outputScalingPass.m_vertexBuffer->GetGPUVirtualAddress(); - vbv.StrideInBytes = 4 * sizeof(float); - vbv.SizeInBytes = 16 * sizeof(float); - getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, 1, &vbv); - getCurrentResourceStorage().m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = {}; + vertex_buffer_view.BufferLocation = m_outputScalingPass.m_vertexBuffer->GetGPUVirtualAddress(); + vertex_buffer_view.StrideInBytes = 4 * sizeof(float); + vertex_buffer_view.SizeInBytes = 16 * sizeof(float); + getCurrentResourceStorage().command_list->IASetVertexBuffers(0, 1, &vertex_buffer_view); + getCurrentResourceStorage().command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); if (m_rtts.m_currentlyBoundRenderTargets[0] != nullptr) - getCurrentResourceStorage().m_commandList->DrawInstanced(4, 1, 0, 0); + getCurrentResourceStorage().command_list->DrawInstanced(4, 1, 0, 0); if (!rpcs3::config.rsx.d3d12.overlay.value()) - getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT)); - if (isFlipSurfaceInLocalMemory(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]) && m_rtts.m_currentlyBoundRenderTargets[0] != nullptr) - getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.m_currentlyBoundRenderTargets[0], D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_RENDER_TARGET)); - ThrowIfFailed(getCurrentResourceStorage().m_commandList->Close()); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().m_commandList.GetAddressOf()); + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_backBuffer[m_swapChain->GetCurrentBackBufferIndex()].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT)); + if (is_flip_surface_in_global_memory(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]) && m_rtts.m_currentlyBoundRenderTargets[0] != nullptr) + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.m_currentlyBoundRenderTargets[0], D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_RENDER_TARGET)); + ThrowIfFailed(getCurrentResourceStorage().command_list->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf()); if(rpcs3::config.rsx.d3d12.overlay.value()) - renderOverlay(); + render_overlay(); - ResetTimer(); + reset_timer(); - std::chrono::time_point flipStart = std::chrono::system_clock::now(); + std::chrono::time_point flip_start = std::chrono::system_clock::now(); ThrowIfFailed(m_swapChain->Present(rpcs3::state.config.rsx.vsync.value() ? 1 : 0, 0)); // Add an event signaling queue completion - ResourceStorage &storage = getNonCurrentResourceStorage(); + resource_storage &storage = getNonCurrentResourceStorage(); - m_commandQueueGraphic->Signal(storage.m_frameFinishedFence.Get(), storage.m_fenceValue); - storage.m_frameFinishedFence->SetEventOnCompletion(storage.m_fenceValue, storage.m_frameFinishedHandle); - storage.m_fenceValue++; + m_commandQueueGraphic->Signal(storage.frame_finished_fence.Get(), storage.fence_value); + storage.frame_finished_fence->SetEventOnCompletion(storage.fence_value, storage.frame_finished_handle); + storage.fence_value++; - storage.m_inUse = true; + storage.in_use = true; // Get the put pos - 1. This way after cleaning we can set the get ptr to // this value, allowing heap to proceed even if we cleant before allocating // a new value (that's the reason of the -1) - storage.m_getPosConstantsHeap = m_constantsData.getCurrentPutPosMinusOne(); - storage.m_getPosVertexIndexHeap = m_vertexIndexData.getCurrentPutPosMinusOne(); - storage.m_getPosTextureUploadHeap = m_textureUploadData.getCurrentPutPosMinusOne(); - storage.m_getPosReadbackHeap = m_readbackResources.getCurrentPutPosMinusOne(); - storage.m_getPosUAVHeap = m_UAVHeap.getCurrentPutPosMinusOne(); + storage.constants_heap_get_pos = m_constantsData.get_current_put_pos_minus_one(); + storage.vertex_index_heap_get_pos = m_vertexIndexData.get_current_put_pos_minus_one(); + storage.texture_upload_heap_get_pos = m_textureUploadData.get_current_put_pos_minus_one(); + storage.readback_heap_get_pos = m_readbackResources.get_current_put_pos_minus_one(); + storage.uav_heap_get_pos = m_UAVHeap.get_current_put_pos_minus_one(); // Flush local_transform_constants.clear(); m_texturesRTTs.clear(); // Now get ready for next frame - ResourceStorage &newStorage = getCurrentResourceStorage(); + resource_storage &new_storage = getCurrentResourceStorage(); - newStorage.WaitAndClean(); - if (newStorage.m_inUse) + new_storage.wait_and_clean(); + if (new_storage.in_use) { - m_constantsData.m_getPos = newStorage.m_getPosConstantsHeap; - m_vertexIndexData.m_getPos = newStorage.m_getPosVertexIndexHeap; - m_textureUploadData.m_getPos = newStorage.m_getPosTextureUploadHeap; - m_readbackResources.m_getPos = newStorage.m_getPosReadbackHeap; - m_UAVHeap.m_getPos = newStorage.m_getPosUAVHeap; + m_constantsData.m_get_pos = new_storage.constants_heap_get_pos; + m_vertexIndexData.m_get_pos = new_storage.vertex_index_heap_get_pos; + m_textureUploadData.m_get_pos = new_storage.texture_upload_heap_get_pos; + m_readbackResources.m_get_pos = new_storage.readback_heap_get_pos; + m_UAVHeap.m_get_pos = new_storage.uav_heap_get_pos; } m_frame->flip(nullptr); - std::chrono::time_point flipEnd = std::chrono::system_clock::now(); - m_timers.m_flipDuration += std::chrono::duration_cast(flipEnd - flipStart).count(); + std::chrono::time_point flip_end = std::chrono::system_clock::now(); + m_timers.m_flipDuration += std::chrono::duration_cast(flip_end - flip_start).count(); } -void D3D12GSRender::ResetTimer() +void D3D12GSRender::reset_timer() { m_timers.m_drawCallCount = 0; m_timers.m_drawCallDuration = 0; @@ -824,77 +708,84 @@ void D3D12GSRender::ResetTimer() m_timers.m_flipDuration = 0; } -D3D12GSRender::ResourceStorage& D3D12GSRender::getCurrentResourceStorage() +resource_storage& D3D12GSRender::getCurrentResourceStorage() { return m_perFrameStorage[m_swapChain->GetCurrentBackBufferIndex()]; } -D3D12GSRender::ResourceStorage& D3D12GSRender::getNonCurrentResourceStorage() +resource_storage& D3D12GSRender::getNonCurrentResourceStorage() { return m_perFrameStorage[1 - m_swapChain->GetCurrentBackBufferIndex()]; } -ID3D12Resource * D3D12GSRender::writeColorBuffer(ID3D12Resource * RTT, ID3D12GraphicsCommandList * cmdlist) +namespace +{ +/** + * Create a write back buffer resource and populate command_list with copy command to fill it + * with color_surface data. + */ + ComPtr create_readback_buffer_and_download( + ID3D12Device *device, + ID3D12GraphicsCommandList * command_list, + data_heap &readback_heap, + ID3D12Resource * color_surface, + int color_surface_format + ) { int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - ID3D12Resource *Result; - size_t w = clip_w, h = clip_h; - DXGI_FORMAT dxgiFormat; - size_t rowPitch; - switch (m_surface.color_format) + + DXGI_FORMAT dxgi_format = get_color_surface_format(color_surface_format); + size_t row_pitch; + switch (color_surface_format) { case CELL_GCM_SURFACE_A8R8G8B8: - dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - rowPitch = align(w * 4, 256); + row_pitch = align(clip_w * 4, 256); break; case CELL_GCM_SURFACE_F_W16Z16Y16X16: - dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; - rowPitch = align(w * 8, 256); + row_pitch = align(clip_w * 8, 256); break; } - size_t sizeInByte = rowPitch * h; - assert(m_readbackResources.canAlloc(sizeInByte)); - size_t heapOffset = m_readbackResources.alloc(sizeInByte); - + size_t buffer_size = row_pitch * clip_h; + assert(readback_heap.can_alloc(buffer_size)); + size_t heapOffset = readback_heap.alloc(buffer_size); + ComPtr Result; ThrowIfFailed( - m_device->CreatePlacedResource( - m_readbackResources.m_heap, + device->CreatePlacedResource( + readback_heap.m_heap, heapOffset, - &CD3DX12_RESOURCE_DESC::Buffer(rowPitch * h), + &CD3DX12_RESOURCE_DESC::Buffer(row_pitch * clip_h), D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(&Result) + IID_PPV_ARGS(Result.GetAddressOf()) ) ); - getCurrentResourceStorage().m_singleFrameLifetimeResources.push_back(Result); - cmdlist->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(RTT, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(color_surface, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); - cmdlist->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(Result, { 0, {dxgiFormat, (UINT)h, (UINT)w, 1, (UINT)rowPitch } }), 0, 0, 0, - &CD3DX12_TEXTURE_COPY_LOCATION(RTT, 0), nullptr); - cmdlist->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(RTT, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); + command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(Result.Get(), { 0, { dxgi_format, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(color_surface, 0), nullptr); + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(color_surface, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); return Result; } -static -void copyToCellRamAndRelease(void *dstAddress, ID3D12Resource *res, size_t dstPitch, size_t srcPitch, size_t width, size_t height) +void copy_readback_buffer_to_dest(void *dest, ID3D12Resource *res, size_t dst_pitch, size_t src_pitch, size_t height) { - void *srcBuffer; - ThrowIfFailed(res->Map(0, nullptr, &srcBuffer)); + void *mapped_buffer; + ThrowIfFailed(res->Map(0, nullptr, &mapped_buffer)); for (unsigned row = 0; row < height; row++) - memcpy((char*)dstAddress + row * dstPitch, (char*)srcBuffer + row * srcPitch, srcPitch); + { + u32 *casted_dest = (u32*)((char*)dest + row * dst_pitch); + u32 *casted_src = (u32*)((char*)mapped_buffer + row * src_pitch); + for (unsigned col = 0; col < src_pitch / 4; col++) + *casted_dest++ = _byteswap_ulong(*casted_src++); + } res->Unmap(0, nullptr); - res->Release(); +} } -void D3D12GSRender::semaphore_PGRAPH_texture_read_release() -{ - semaphore_PGRAPH_backend_release(); -} - -void D3D12GSRender::semaphore_PGRAPH_backend_release() +void D3D12GSRender::copy_render_target_to_dma_location() { // Add all buffer write // Cell can't make any assumption about readyness of color/depth buffer @@ -902,180 +793,133 @@ void D3D12GSRender::semaphore_PGRAPH_backend_release() int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - ComPtr fence; - ThrowIfFailed( - m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())) - ); - HANDLE handle = CreateEvent(0, FALSE, FALSE, 0); - fence->SetEventOnCompletion(1, handle); + ComPtr depth_buffer_write_dest, depth_format_conversion_buffer; + ComPtr descriptor_heap; + size_t depth_row_pitch = align(clip_w, 256); - ComPtr writeDest, depthConverted; - ComPtr descriptorHeap; - size_t depthRowPitch = clip_w; - depthRowPitch = (depthRowPitch + 255) & ~255; - - u32 m_context_dma_color_a = rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_A]; - u32 m_context_dma_color_b = rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_B]; - u32 m_context_dma_color_c = rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_C]; - u32 m_context_dma_color_d = rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_D]; + u32 context_dma_color[] = + { + rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_A], + rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_B], + rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_C], + rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_D], + }; u32 m_context_dma_z = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]; - bool needTransfer = (m_context_dma_z && rpcs3::state.config.rsx.opengl.write_depth_buffer) || - ((m_context_dma_color_a || m_context_dma_color_b || m_context_dma_color_c || m_context_dma_color_d) && rpcs3::state.config.rsx.opengl.write_color_buffers); + bool need_transfer = false; if (m_context_dma_z && rpcs3::state.config.rsx.opengl.write_depth_buffer) { - size_t sizeInByte = clip_w * clip_h * 2; - assert(m_UAVHeap.canAlloc(sizeInByte)); - size_t heapOffset = m_UAVHeap.alloc(sizeInByte); + size_t uav_size = clip_w * clip_h * 2; + assert(m_UAVHeap.can_alloc(uav_size)); + size_t heap_offset = m_UAVHeap.alloc(uav_size); ThrowIfFailed( m_device->CreatePlacedResource( m_UAVHeap.m_heap, - heapOffset, + heap_offset, &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8_UNORM, clip_w, clip_h, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, - IID_PPV_ARGS(depthConverted.GetAddressOf()) + IID_PPV_ARGS(depth_format_conversion_buffer.GetAddressOf()) ) ); - sizeInByte = depthRowPitch * clip_h; - assert(m_readbackResources.canAlloc(sizeInByte)); - heapOffset = m_readbackResources.alloc(sizeInByte); + size_t buffer_size = depth_row_pitch * clip_h; + assert(m_readbackResources.can_alloc(buffer_size)); + heap_offset = m_readbackResources.alloc(buffer_size); ThrowIfFailed( m_device->CreatePlacedResource( m_readbackResources.m_heap, - heapOffset, - &CD3DX12_RESOURCE_DESC::Buffer(sizeInByte), + heap_offset, + &CD3DX12_RESOURCE_DESC::Buffer(buffer_size), D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(writeDest.GetAddressOf()) + IID_PPV_ARGS(depth_buffer_write_dest.GetAddressOf()) ) ); - D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV , 2, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; + D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV , 2, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; ThrowIfFailed( - m_device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(descriptorHeap.GetAddressOf())) + m_device->CreateDescriptorHeap(&descriptor_heap_desc, IID_PPV_ARGS(descriptor_heap.GetAddressOf())) ); - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - switch (m_surface.depth_format) - { - case 0: - break; - case CELL_GCM_SURFACE_Z16: - srvDesc.Format = DXGI_FORMAT_R16_UNORM; - break; - case CELL_GCM_SURFACE_Z24S8: - srvDesc.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS; - break; - default: - LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface.depth_format); - assert(0); - } - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Texture2D.MipLevels = 1; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - m_device->CreateShaderResourceView(m_rtts.m_currentlyBoundDepthStencil, &srvDesc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptorHeap->GetCPUDescriptorHandleForHeapStart())); - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.Format = DXGI_FORMAT_R8_UNORM; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - m_device->CreateUnorderedAccessView(depthConverted.Get(), nullptr, &uavDesc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptorHeap->GetCPUDescriptorHandleForHeapStart()).Offset(1, g_descriptorStrideSRVCBVUAV)); + D3D12_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = {}; + m_surface.depth_format = get_depth_typeless_surface_format(m_surface.depth_format); + shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + shader_resource_view_desc.Texture2D.MipLevels = 1; + shader_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + m_device->CreateShaderResourceView(m_rtts.m_currentlyBoundDepthStencil, &shader_resource_view_desc, + CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptor_heap->GetCPUDescriptorHandleForHeapStart())); + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; + uav_desc.Format = DXGI_FORMAT_R8_UNORM; + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + m_device->CreateUnorderedAccessView(depth_format_conversion_buffer.Get(), nullptr, &uav_desc, + CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptor_heap->GetCPUDescriptorHandleForHeapStart()).Offset(1, g_descriptorStrideSRVCBVUAV)); // Convert - getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.m_currentlyBoundDepthStencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ)); + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.m_currentlyBoundDepthStencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ)); - getCurrentResourceStorage().m_commandList->SetPipelineState(m_convertPSO); - getCurrentResourceStorage().m_commandList->SetComputeRootSignature(m_convertRootSignature); - getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, descriptorHeap.GetAddressOf()); - getCurrentResourceStorage().m_commandList->SetComputeRootDescriptorTable(0, descriptorHeap->GetGPUDescriptorHandleForHeapStart()); - getCurrentResourceStorage().m_commandList->Dispatch(clip_w / 8, clip_h / 8, 1); + getCurrentResourceStorage().command_list->SetPipelineState(m_convertPSO); + getCurrentResourceStorage().command_list->SetComputeRootSignature(m_convertRootSignature); + getCurrentResourceStorage().command_list->SetDescriptorHeaps(1, descriptor_heap.GetAddressOf()); + getCurrentResourceStorage().command_list->SetComputeRootDescriptorTable(0, descriptor_heap->GetGPUDescriptorHandleForHeapStart()); + getCurrentResourceStorage().command_list->Dispatch(clip_w / 8, clip_h / 8, 1); D3D12_RESOURCE_BARRIER barriers[] = { CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.m_currentlyBoundDepthStencil, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE), - CD3DX12_RESOURCE_BARRIER::UAV(depthConverted.Get()), + CD3DX12_RESOURCE_BARRIER::UAV(depth_format_conversion_buffer.Get()), }; - getCurrentResourceStorage().m_commandList->ResourceBarrier(2, barriers); - getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(depthConverted.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE)); - getCurrentResourceStorage().m_commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(writeDest.Get(), { 0, { DXGI_FORMAT_R8_UNORM, (UINT)clip_w, (UINT)clip_h, 1, (UINT)depthRowPitch } }), 0, 0, 0, - &CD3DX12_TEXTURE_COPY_LOCATION(depthConverted.Get(), 0), nullptr); + getCurrentResourceStorage().command_list->ResourceBarrier(2, barriers); + getCurrentResourceStorage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(depth_format_conversion_buffer.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE)); + getCurrentResourceStorage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(depth_buffer_write_dest.Get(), { 0, { DXGI_FORMAT_R8_UNORM, (UINT)clip_w, (UINT)clip_h, 1, (UINT)depth_row_pitch } }), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(depth_buffer_write_dest.Get(), 0), nullptr); invalidateAddress(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], m_context_dma_z - 0xfeed0000)); + + need_transfer = true; } - ID3D12Resource *rtt0 = nullptr, *rtt1 = nullptr, *rtt2 = nullptr, *rtt3 = nullptr; + ComPtr readback_buffers[4]; if (rpcs3::state.config.rsx.opengl.write_color_buffers) { - switch (rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]) + for (u8 i : get_rtt_indexes(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) { - case CELL_GCM_SURFACE_TARGET_NONE: - break; - - case CELL_GCM_SURFACE_TARGET_0: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], getCurrentResourceStorage().m_commandList.Get()); - break; - - case CELL_GCM_SURFACE_TARGET_1: - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], getCurrentResourceStorage().m_commandList.Get()); - break; - - case CELL_GCM_SURFACE_TARGET_MRT1: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], getCurrentResourceStorage().m_commandList.Get()); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], getCurrentResourceStorage().m_commandList.Get()); - break; - - case CELL_GCM_SURFACE_TARGET_MRT2: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], getCurrentResourceStorage().m_commandList.Get()); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], getCurrentResourceStorage().m_commandList.Get()); - if (m_context_dma_color_c) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], getCurrentResourceStorage().m_commandList.Get()); - break; - - case CELL_GCM_SURFACE_TARGET_MRT3: - if (m_context_dma_color_a) rtt0 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[0], getCurrentResourceStorage().m_commandList.Get()); - if (m_context_dma_color_b) rtt1 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[1], getCurrentResourceStorage().m_commandList.Get()); - if (m_context_dma_color_c) rtt2 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[2], getCurrentResourceStorage().m_commandList.Get()); - if (m_context_dma_color_d) rtt3 = writeColorBuffer(m_rtts.m_currentlyBoundRenderTargets[3], getCurrentResourceStorage().m_commandList.Get()); - break; + if (!context_dma_color[i]) + continue; + readback_buffers[i] = create_readback_buffer_and_download(m_device.Get(), getCurrentResourceStorage().command_list.Get(), m_readbackResources, m_rtts.m_currentlyBoundRenderTargets[0], m_surface.color_format); + invalidateAddress(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], context_dma_color[i] - 0xfeed0000)); + need_transfer = true; } - - if (m_context_dma_color_a) invalidateAddress(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], m_context_dma_color_a - 0xfeed0000)); - if (m_context_dma_color_b) invalidateAddress(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_BOFFSET], m_context_dma_color_b - 0xfeed0000)); - if (m_context_dma_color_c) invalidateAddress(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_COFFSET], m_context_dma_color_c - 0xfeed0000)); - if (m_context_dma_color_d) invalidateAddress(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_DOFFSET], m_context_dma_color_d - 0xfeed0000)); } - if (needTransfer) + if (need_transfer) { - ThrowIfFailed(getCurrentResourceStorage().m_commandList->Close()); - m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().m_commandList.GetAddressOf()); - getCurrentResourceStorage().setNewCommandList(); + ThrowIfFailed(getCurrentResourceStorage().command_list->Close()); + m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().command_list.GetAddressOf()); + getCurrentResourceStorage().set_new_command_list(); } //Wait for result - m_commandQueueGraphic->Signal(fence.Get(), 1); - WaitForSingleObject(handle, INFINITE); - CloseHandle(handle); + wait_for_command_queue(m_device.Get(), m_commandQueueGraphic.Get()); if (m_context_dma_z && rpcs3::state.config.rsx.opengl.write_depth_buffer) { u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], m_context_dma_z - 0xfeed0000); auto ptr = vm::base(address); - char *ptrAsChar = (char*)ptr; - unsigned char *writeDestPtr; - ThrowIfFailed(writeDest->Map(0, nullptr, (void**)&writeDestPtr)); + char *depth_buffer = (char*)ptr; + unsigned char *mapped_buffer; + ThrowIfFailed(depth_buffer_write_dest->Map(0, nullptr, (void**)&mapped_buffer)); for (unsigned row = 0; row < (unsigned)clip_h; row++) { for (unsigned i = 0; i < (unsigned)clip_w; i++) { - unsigned char c = writeDestPtr[row * depthRowPitch + i]; - ptrAsChar[4 * (row * clip_w + i)] = c; - ptrAsChar[4 * (row * clip_w + i) + 1] = c; - ptrAsChar[4 * (row * clip_w + i) + 2] = c; - ptrAsChar[4 * (row * clip_w + i) + 3] = c; + unsigned char c = mapped_buffer[row * depth_row_pitch + i]; + depth_buffer[4 * (row * clip_w + i)] = c; + depth_buffer[4 * (row * clip_w + i) + 1] = c; + depth_buffer[4 * (row * clip_w + i) + 2] = c; + depth_buffer[4 * (row * clip_w + i) + 3] = c; } } } @@ -1095,52 +939,19 @@ void D3D12GSRender::semaphore_PGRAPH_backend_release() if (rpcs3::state.config.rsx.opengl.write_color_buffers) { - switch (rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]) + void *dest_buffer[] = { - case CELL_GCM_SURFACE_TARGET_NONE: - break; - case CELL_GCM_SURFACE_TARGET_0: + vm::base(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], context_dma_color[0] - 0xfeed0000)), + vm::base(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], context_dma_color[1] - 0xfeed0000)), + vm::base(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], context_dma_color[2] - 0xfeed0000)), + vm::base(rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], context_dma_color[3] - 0xfeed0000)) + }; + + for (u8 i : get_rtt_indexes(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) { - u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], m_context_dma_color_a - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt0, srcPitch, dstPitch, clip_w, clip_h); - } - break; - case CELL_GCM_SURFACE_TARGET_1: - { - u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_BOFFSET], m_context_dma_color_b - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt1, srcPitch, dstPitch, clip_w, clip_h); - } - break; - case CELL_GCM_SURFACE_TARGET_MRT1: - { - u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], m_context_dma_color_a - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt0, srcPitch, dstPitch, clip_w, clip_h); - address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_BOFFSET], m_context_dma_color_b - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt1, srcPitch, dstPitch, clip_w, clip_h); - } - break; - case CELL_GCM_SURFACE_TARGET_MRT2: - { - u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], m_context_dma_color_a - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt0, srcPitch, dstPitch, clip_w, clip_h); - address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_BOFFSET], m_context_dma_color_b - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt1, srcPitch, dstPitch, clip_w, clip_h); - address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_COFFSET], m_context_dma_color_c - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt2, srcPitch, dstPitch, clip_w, clip_h); - } - break; - case CELL_GCM_SURFACE_TARGET_MRT3: - { - u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_AOFFSET], m_context_dma_color_a - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt0, srcPitch, dstPitch, clip_w, clip_h); - address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_BOFFSET], m_context_dma_color_b - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt1, srcPitch, dstPitch, clip_w, clip_h); - address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_COFFSET], m_context_dma_color_c - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt2, srcPitch, dstPitch, clip_w, clip_h); - address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_COLOR_DOFFSET], m_context_dma_color_d - 0xfeed0000); - copyToCellRamAndRelease(vm::base(address), rtt3, srcPitch, dstPitch, clip_w, clip_h); - } - break; + if (!context_dma_color[i]) + continue; + copy_readback_buffer_to_dest(dest_buffer[i], readback_buffers[i].Get(), srcPitch, dstPitch, clip_h); } } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index e8dd4f6d14..a8cf6a0ec8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -1,6 +1,6 @@ #pragma once -#include "D3D12.h" +#include "D3D12Utils.h" #include "Utilities/rPlatform.h" // only for rImage #include "Utilities/File.h" #include "Utilities/Log.h" @@ -10,8 +10,8 @@ #include "D3D12RenderTargetSets.h" #include "D3D12PipelineState.h" -#include "D3D12Buffer.h" #include "d3dx12.h" +#include "D3D12MemoryHelpers.h" /** @@ -36,244 +36,6 @@ * are not currently correctly signaled which leads to deadlock. */ -template -struct InitHeap -{ - static T* Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags); -}; - -template<> -struct InitHeap -{ - static ID3D12Heap* Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) - { - ID3D12Heap *result; - D3D12_HEAP_DESC heapDesc = {}; - heapDesc.SizeInBytes = heapSize; - heapDesc.Properties.Type = type; - heapDesc.Flags = flags; - ThrowIfFailed(device->CreateHeap(&heapDesc, IID_PPV_ARGS(&result))); - return result; - } -}; - -template<> -struct InitHeap -{ - static ID3D12Resource* Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) - { - ID3D12Resource *result; - D3D12_HEAP_PROPERTIES heapProperties = {}; - heapProperties.Type = type; - ThrowIfFailed(device->CreateCommittedResource(&heapProperties, - flags, - &CD3DX12_RESOURCE_DESC::Buffer(heapSize), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&result)) - ); - - return result; - } -}; - - -/** - * Wrapper around a ID3D12Resource or a ID3D12Heap. - * Acts as a ring buffer : hold a get and put pointers, - * put pointer is used as storage space offset - * and get is used as beginning of in use data space. - * This wrapper checks that put pointer doesn't cross get one. - */ -template -struct DataHeap -{ - T *m_heap; - size_t m_size; - size_t m_putPos; // Start of free space - size_t m_getPos; // End of free space - - void Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) - { - m_size = heapSize; - m_heap = InitHeap::Init(device, heapSize, type, flags); - m_putPos = 0; - m_getPos = heapSize - 1; - } - - /** - * Does alloc cross get position ? - */ - bool canAlloc(size_t size) const - { - size_t allocSize = align(size, Alignment); - size_t currentGetPos = m_getPos; - if (m_putPos + allocSize < m_size) - { - // range before get - if (m_putPos + allocSize < m_getPos) - return true; - // range after get - if (m_putPos > m_getPos) - return true; - return false; - } - else - { - // ..]....[..get.. - if (m_putPos < m_getPos) - return false; - // ..get..]...[... - // Actually all resources extending beyond heap space starts at 0 - if (allocSize > m_getPos) - return false; - return true; - } - } - - size_t alloc(size_t size) - { - assert(canAlloc(size)); - size_t allocSize = align(size, Alignment); - if (m_putPos + allocSize < m_size) - { - size_t oldPutPos = m_putPos; - m_putPos += allocSize; - return oldPutPos; - } - else - { - m_putPos = allocSize; - return 0; - } - } - - void Release() - { - m_heap->Release(); - } - - /** - * return current putpos - 1 - */ - size_t getCurrentPutPosMinusOne() const - { - return (m_putPos - 1 > 0) ? m_putPos - 1 : m_size - 1; - } -}; - -struct TextureEntry -{ - int m_format; - size_t m_width; - size_t m_height; - size_t m_mipmap; - bool m_isDirty; - - TextureEntry() : m_format(0), m_width(0), m_height(0), m_isDirty(true) - {} - - TextureEntry(int f, size_t w, size_t h, size_t m) : m_format(f), m_width(w), m_height(h), m_isDirty(false) - {} - - bool operator==(const TextureEntry &other) - { - return (m_format == other.m_format && m_width == other.m_width && m_height == other.m_height); - } -}; - -/** - * Manages cache of data (texture/vertex/index) - */ -struct DataCache -{ -private: - /** - * Mutex protecting m_dataCache access - * Memory protection fault catch can be generated by any thread and - * modifies it. - */ - std::mutex mut; - - std::unordered_map> > m_dataCache; // Storage - std::list > m_protectedRange; // address, start of protected range, size of protected range -public: - void storeAndProtectData(u64 key, u32 start, size_t size, int format, size_t w, size_t h, size_t m, ComPtr data) - { - std::lock_guard lock(mut); - m_dataCache[key] = std::make_pair(TextureEntry(format, w, h, m), data); - protectData(key, start, size); - } - - /** - * Make memory from start to start + size write protected. - * Associate key to this range so that when a write is detected, data at key is marked dirty. - */ - void protectData(u64 key, u32 start, size_t size) - { - /// align start to 4096 byte - u32 protected_range_start = align(start, 4096); - u32 protected_range_size = (u32)align(size, 4096); - m_protectedRange.push_back(std::make_tuple(key, protected_range_start, protected_range_size)); - vm::page_protect(protected_range_start, protected_range_size, 0, 0, vm::page_writable); - } - - /// remove all data containing addr from cache, unprotect them. Returns false if no data is modified. - bool invalidateAddress(u32 addr) - { - bool handled = false; - auto It = m_protectedRange.begin(), E = m_protectedRange.end(); - for (; It != E;) - { - auto currentIt = It; - ++It; - auto protectedTexture = *currentIt; - u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture); - if (addr >= protectedRangeStart && addr <= protectedRangeSize + protectedRangeStart) - { - std::lock_guard lock(mut); - u64 texadrr = std::get<0>(protectedTexture); - m_dataCache[texadrr].first.m_isDirty = true; - - vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0); - m_protectedRange.erase(currentIt); - handled = true; - } - } - return handled; - } - - std::pair > *findDataIfAvailable(u64 key) - { - std::lock_guard lock(mut); - auto It = m_dataCache.find(key); - if (It == m_dataCache.end()) - return nullptr; - return &It->second; - } - - void unprotedAll() - { - std::lock_guard lock(mut); - for (auto &protectedTexture : m_protectedRange) - { - u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture); - vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0); - } - } - - /** - * Remove data stored at key, and returns a ComPtr owning it. - * The caller is responsible for releasing the ComPtr. - */ - ComPtr removeFromCache(u64 key) - { - auto result = m_dataCache[key].second; - m_dataCache.erase(key); - return result; - } -}; - /** * Structure used to load/unload D3D12 lib. */ @@ -299,7 +61,7 @@ private: ComPtr m_rootSignatures[17]; // TODO: Use a tree structure to parse more efficiently - DataCache m_textureCache; + data_cache m_textureCache; bool invalidateAddress(u32 addr); // Copy of RTT to be used as texture @@ -325,7 +87,7 @@ private: size_t m_flipDuration; } m_timers; - void ResetTimer(); + void reset_timer(); struct Shader { @@ -351,67 +113,18 @@ private: ID3D12RootSignature *m_convertRootSignature; void initConvertShader(); - - /** - * Stores data that are "ping ponged" between frame. - * For instance command allocator : maintains 2 command allocators and - * swap between them when frame is flipped. - */ - struct ResourceStorage - { - bool m_inUse; // False until command list has been populated at least once - ComPtr m_frameFinishedFence; - UINT64 m_fenceValue; - HANDLE m_frameFinishedHandle; - - // Pointer to device, not owned by ResourceStorage - ID3D12Device *m_device; - ComPtr m_commandAllocator; - ComPtr m_commandList; - - // Descriptor heap - ComPtr m_descriptorsHeap; - size_t m_descriptorsHeapIndex; - - // Sampler heap - ComPtr m_samplerDescriptorHeap[2]; - size_t m_samplerDescriptorHeapIndex; - size_t m_currentSamplerIndex; - - ComPtr m_RAMFramebuffer; - - // List of resources that can be freed after frame is flipped - std::vector > m_singleFrameLifetimeResources; - - - /// Texture that were invalidated - std::list > m_dirtyTextures; - - size_t m_getPosConstantsHeap; - size_t m_getPosVertexIndexHeap; - size_t m_getPosTextureUploadHeap; - size_t m_getPosReadbackHeap; - size_t m_getPosUAVHeap; - - void Reset(); - void Init(ID3D12Device *device); - void setNewCommandList(); - void WaitAndClean(); - void Release(); - }; - - ResourceStorage m_perFrameStorage[2]; - ResourceStorage &getCurrentResourceStorage(); - ResourceStorage &getNonCurrentResourceStorage(); + resource_storage m_perFrameStorage[2]; + resource_storage &getCurrentResourceStorage(); + resource_storage &getNonCurrentResourceStorage(); // Constants storage - DataHeap m_constantsData; + data_heap m_constantsData; // Vertex storage - DataHeap m_vertexIndexData; + data_heap m_vertexIndexData; // Texture storage - DataHeap m_textureUploadData; - DataHeap m_UAVHeap; - DataHeap m_readbackResources; + data_heap m_textureUploadData; + data_heap m_UAVHeap; + data_heap m_readbackResources; struct { @@ -432,8 +145,6 @@ private: // Used to fill unused texture slot ID3D12Resource *m_dummyTexture; - size_t m_lastWidth, m_lastHeight, m_lastDepth; - // Store previous fbo addresses to detect RTT config changes. u32 m_previous_address_a; u32 m_previous_address_b; @@ -441,27 +152,19 @@ private: u32 m_previous_address_d; u32 m_previous_address_z; public: - u32 m_draw_frames; - u32 m_skip_frames; - D3D12GSRender(); virtual ~D3D12GSRender(); - - void semaphore_PGRAPH_texture_read_release(); - void semaphore_PGRAPH_backend_release(); - private: - void InitD2DStructures(); - void ReleaseD2DStructures(); - ID3D12Resource *writeColorBuffer(ID3D12Resource *RTT, ID3D12GraphicsCommandList *cmdlist); + void init_d2d_structures(); + void release_d2d_structures(); - bool LoadProgram(); + bool load_program(); /** * Create vertex and index buffers (if needed) and set them to cmdlist. * Non native primitive type are emulated by index buffers expansion. */ - void upload_vertex_index_data(ID3D12GraphicsCommandList *cmdlist); + void upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list); std::vector > m_first_count_pairs; /** @@ -471,31 +174,37 @@ private: */ void upload_vertex_attributes(const std::vector > &vertex_ranges); - void setScaleOffset(size_t descriptorIndex); - void FillVertexShaderConstantsBuffer(size_t descriptorIndex); - void FillPixelShaderConstantsBuffer(size_t descriptorIndex); + void upload_and_bind_scale_offset_matrix(size_t descriptor_index); + void upload_and_bind_vertex_shader_constants(size_t descriptor_index); + void upload_and_bind_fragment_shader_constants(size_t descriptorIndex); /** * Fetch all textures recorded in the state in the render target cache and in the texture cache. * If a texture is not cached, populate cmdlist with uploads command. * Create necessary resource view/sampler descriptors in the per frame storage struct. - * returns the number of texture uploaded. + * If the count of enabled texture is below texture_count, fills with dummy texture and sampler. */ - size_t UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t descriptorIndex); + void upload_and_bind_textures(ID3D12GraphicsCommandList *command_list, size_t descriptor_index, size_t texture_count); /** * Creates render target if necessary. * Populate cmdlist with render target state change (from RTT to generic read for previous rtt, * from generic to rtt for rtt in cache). */ - void PrepareRenderTargets(ID3D12GraphicsCommandList *cmdlist); + void prepare_render_targets(ID3D12GraphicsCommandList *command_list); /** * Render D2D overlay if enabled on top of the backbuffer. */ - void renderOverlay(); + void render_overlay(); void clear_surface(u32 arg); + /** + * Copy currently bound current target to the dma location affecting them. + * NOTE: We should also copy previously bound rtts. + */ + void copy_render_target_to_dma_location(); + protected: virtual void onexit_thread() override; virtual bool domethod(u32 cmd, u32 arg) override; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp new file mode 100644 index 0000000000..66f38b41d2 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp @@ -0,0 +1,131 @@ +#include "stdafx_d3d12.h" +#ifdef _WIN32 +#include "D3D12MemoryHelpers.h" + + +void data_cache::store_and_protect_data(u64 key, u32 start, size_t size, int format, size_t w, size_t h, size_t m, ComPtr data) noexcept +{ + std::lock_guard lock(m_mut); + m_address_to_data[key] = std::make_pair(texture_entry(format, w, h, m), data); + protect_data(key, start, size); +} + +void data_cache::protect_data(u64 key, u32 start, size_t size) noexcept +{ + /// align start to 4096 byte + u32 protected_range_start = align(start, 4096); + u32 protected_range_size = (u32)align(size, 4096); + m_protected_ranges.push_back(std::make_tuple(key, protected_range_start, protected_range_size)); + vm::page_protect(protected_range_start, protected_range_size, 0, 0, vm::page_writable); +} + +bool data_cache::invalidate_address(u32 addr) noexcept +{ + bool handled = false; + auto It = m_protected_ranges.begin(), E = m_protected_ranges.end(); + for (; It != E;) + { + auto currentIt = It; + ++It; + auto protectedTexture = *currentIt; + u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture); + if (addr >= protectedRangeStart && addr <= protectedRangeSize + protectedRangeStart) + { + std::lock_guard lock(m_mut); + u64 texadrr = std::get<0>(protectedTexture); + m_address_to_data[texadrr].first.m_is_dirty = true; + + vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0); + m_protected_ranges.erase(currentIt); + handled = true; + } + } + return handled; +} + +std::pair > *data_cache::find_data_if_available(u64 key) noexcept +{ + std::lock_guard lock(m_mut); + auto It = m_address_to_data.find(key); + if (It == m_address_to_data.end()) + return nullptr; + return &It->second; +} + +void data_cache::unprotect_all() noexcept +{ + std::lock_guard lock(m_mut); + for (auto &protectedTexture : m_protected_ranges) + { + u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture); + vm::page_protect(protectedRangeStart, protectedRangeSize, 0, vm::page_writable, 0); + } +} + +ComPtr data_cache::remove_from_cache(u64 key) noexcept +{ + auto result = m_address_to_data[key].second; + m_address_to_data.erase(key); + return result; +} + +void resource_storage::reset() +{ + descriptors_heap_index = 0; + current_sampler_index = 0; + sampler_descriptors_heap_index = 0; + + ThrowIfFailed(command_allocator->Reset()); + set_new_command_list(); +} + +void resource_storage::set_new_command_list() +{ + ThrowIfFailed(command_list->Reset(command_allocator.Get(), nullptr)); +} + +void resource_storage::init(ID3D12Device *device) +{ + in_use = false; + m_device = device; + ram_framebuffer = nullptr; + // Create a global command allocator + ThrowIfFailed(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(command_allocator.GetAddressOf()))); + + ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, command_allocator.Get(), nullptr, IID_PPV_ARGS(command_list.GetAddressOf()))); + ThrowIfFailed(command_list->Close()); + + D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 10000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; + ThrowIfFailed(device->CreateDescriptorHeap(&descriptor_heap_desc, IID_PPV_ARGS(&descriptors_heap))); + + D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER , 2048, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; + ThrowIfFailed(device->CreateDescriptorHeap(&sampler_heap_desc, IID_PPV_ARGS(&sampler_descriptor_heap[0]))); + ThrowIfFailed(device->CreateDescriptorHeap(&sampler_heap_desc, IID_PPV_ARGS(&sampler_descriptor_heap[1]))); + + frame_finished_handle = CreateEventEx(nullptr, FALSE, FALSE, EVENT_ALL_ACCESS); + fence_value = 0; + ThrowIfFailed(device->CreateFence(fence_value++, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(frame_finished_fence.GetAddressOf()))); +} + +void resource_storage::wait_and_clean() +{ + if (in_use) + WaitForSingleObjectEx(frame_finished_handle, INFINITE, FALSE); + else + ThrowIfFailed(command_list->Close()); + + reset(); + + dirty_textures.clear(); + + ram_framebuffer = nullptr; +} + +void resource_storage::release() +{ + dirty_textures.clear(); + // NOTE: Should be released only after gfx pipeline last command has been finished. + CloseHandle(frame_finished_handle); +} + +#endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.h b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.h new file mode 100644 index 0000000000..7c0781b92f --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.h @@ -0,0 +1,238 @@ +#pragma once +#include "D3D12Utils.h" +#include "d3dx12.h" + + +template +struct init_heap +{ + static T* init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags); +}; + +template<> +struct init_heap +{ + static ID3D12Heap* init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) + { + ID3D12Heap *result; + D3D12_HEAP_DESC heap_desc = {}; + heap_desc.SizeInBytes = heap_size; + heap_desc.Properties.Type = type; + heap_desc.Flags = flags; + ThrowIfFailed(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&result))); + return result; + } +}; + +template<> +struct init_heap +{ + static ID3D12Resource* init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) + { + ID3D12Resource *result; + D3D12_HEAP_PROPERTIES heap_properties = {}; + heap_properties.Type = type; + ThrowIfFailed(device->CreateCommittedResource(&heap_properties, + flags, + &CD3DX12_RESOURCE_DESC::Buffer(heap_size), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&result)) + ); + + return result; + } +}; + + +/** +* Wrapper around a ID3D12Resource or a ID3D12Heap. +* Acts as a ring buffer : hold a get and put pointers, +* put pointer is used as storage space offset +* and get is used as beginning of in use data space. +* This wrapper checks that put pointer doesn't cross get one. +*/ +template +struct data_heap +{ + T *m_heap; + size_t m_size; + size_t m_put_pos; // Start of free space + size_t m_get_pos; // End of free space + + void init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) + { + m_size = heap_size; + m_heap = init_heap::init(device, heap_size, type, flags); + m_put_pos = 0; + m_get_pos = heap_size - 1; + } + + /** + * Does alloc cross get position ? + */ + bool can_alloc(size_t size) const noexcept + { + size_t alloc_size = align(size, alignment); + if (m_put_pos + alloc_size < m_size) + { + // range before get + if (m_put_pos + alloc_size < m_get_pos) + return true; + // range after get + if (m_put_pos > m_get_pos) + return true; + return false; + } + else + { + // ..]....[..get.. + if (m_put_pos < m_get_pos) + return false; + // ..get..]...[... + // Actually all resources extending beyond heap space starts at 0 + if (alloc_size > m_get_pos) + return false; + return true; + } + } + + size_t alloc(size_t size) noexcept + { + assert(can_alloc(size)); + size_t alloc_size = align(size, alignment); + if (m_put_pos + alloc_size < m_size) + { + size_t old_put_pos = m_put_pos; + m_put_pos += alloc_size; + return old_put_pos; + } + else + { + m_put_pos = alloc_size; + return 0; + } + } + + void release() noexcept + { + m_heap->Release(); + } + + /** + * return current putpos - 1 + */ + size_t get_current_put_pos_minus_one() const noexcept + { + return (m_put_pos - 1 > 0) ? m_put_pos - 1 : m_size - 1; + } +}; + +struct texture_entry +{ + int m_format; + size_t m_width; + size_t m_height; + size_t m_mipmap; + bool m_is_dirty; + + texture_entry() : m_format(0), m_width(0), m_height(0), m_is_dirty(true) + {} + + texture_entry(int f, size_t w, size_t h, size_t m) : m_format(f), m_width(w), m_height(h), m_is_dirty(false) + {} + + bool operator==(const texture_entry &other) + { + return (m_format == other.m_format && m_width == other.m_width && m_height == other.m_height); + } +}; + +/** +* Manages cache of data (texture/vertex/index) +*/ +struct data_cache +{ +private: + /** + * Mutex protecting m_dataCache access + * Memory protection fault catch can be generated by any thread and + * modifies it. + */ + std::mutex m_mut; + + std::unordered_map> > m_address_to_data; // Storage + std::list > m_protected_ranges; // address, start of protected range, size of protected range +public: + void store_and_protect_data(u64 key, u32 start, size_t size, int format, size_t w, size_t h, size_t m, ComPtr data) noexcept; + + /** + * Make memory from start to start + size write protected. + * Associate key to this range so that when a write is detected, data at key is marked dirty. + */ + void protect_data(u64 key, u32 start, size_t size) noexcept; + + /** + * Remove all data containing addr from cache, unprotect them. Returns false if no data is modified. + */ + bool invalidate_address(u32 addr) noexcept; + + std::pair > *find_data_if_available(u64 key) noexcept; + + void unprotect_all() noexcept; + + /** + * Remove data stored at key, and returns a ComPtr owning it. + * The caller is responsible for releasing the ComPtr. + */ + ComPtr remove_from_cache(u64 key) noexcept; +}; + +/** +* Stores data that are "ping ponged" between frame. +* For instance command allocator : maintains 2 command allocators and +* swap between them when frame is flipped. +*/ +struct resource_storage +{ + bool in_use; // False until command list has been populated at least once + ComPtr frame_finished_fence; + UINT64 fence_value; + HANDLE frame_finished_handle; + + // Pointer to device, not owned by ResourceStorage + ID3D12Device *m_device; + ComPtr command_allocator; + ComPtr command_list; + + // Descriptor heap + ComPtr descriptors_heap; + size_t descriptors_heap_index; + + // Sampler heap + ComPtr sampler_descriptor_heap[2]; + size_t sampler_descriptors_heap_index; + size_t current_sampler_index; + + ComPtr ram_framebuffer; + + /// Texture that were invalidated + std::list > dirty_textures; + + /** + * Start position in heaps of resources used for this frame. + * This means newer resources shouldn't allocate memory crossing this position + * until the frame rendering is over. + */ + size_t constants_heap_get_pos; + size_t vertex_index_heap_get_pos; + size_t texture_upload_heap_get_pos; + size_t readback_heap_get_pos; + size_t uav_heap_get_pos; + + void reset(); + void init(ID3D12Device *device); + void set_new_command_list(); + void wait_and_clean(); + void release(); +}; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Overlay.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Overlay.cpp index 3500e258f1..58c3600eea 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Overlay.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Overlay.cpp @@ -6,23 +6,59 @@ #include #include + +namespace +{ // D2D -ComPtr d3d11Device; -ComPtr m_d3d11DeviceContext; -ComPtr m_d3d11On12Device; -ComPtr m_d3d12Device; -ComPtr m_dWriteFactory; -ComPtr m_d2dFactory; -ComPtr m_d2dDevice; -ComPtr m_d2dDeviceContext; -ComPtr m_wrappedBackBuffers[2]; -ComPtr m_d2dRenderTargets[2]; -ComPtr m_textFormat; -ComPtr m_textBrush; +ComPtr g_d3d11_device; +ComPtr g_d3d11_device_context; +ComPtr g_d3d11on12_device; +ComPtr g_d3d12_device; +ComPtr g_dwrite_factory; +ComPtr g_d2d_factory; +ComPtr g_d2d_device; +ComPtr g_d2d_device_context; +ComPtr g_wrapped_backbuffers[2]; +ComPtr g_d2d_render_targets[2]; +ComPtr g_text_format; +ComPtr g_text_brush; + +void draw_strings(const D2D1_SIZE_F &rtSize, size_t backbuffer_id, const std::vector &strings) noexcept +{ + // Acquire our wrapped render target resource for the current back buffer. + g_d3d11on12_device->AcquireWrappedResources(g_wrapped_backbuffers[backbuffer_id ].GetAddressOf(), 1); + + // Render text directly to the back buffer. + g_d2d_device_context->SetTarget(g_d2d_render_targets[backbuffer_id].Get()); + g_d2d_device_context->BeginDraw(); + g_d2d_device_context->SetTransform(D2D1::Matrix3x2F::Identity()); + float xpos = 0.f; + for (const std::wstring &str : strings) + { + g_d2d_device_context->DrawTextW( + str.c_str(), + (UINT32)str.size(), + g_text_format.Get(), + &D2D1::RectF(0, xpos, rtSize.width, rtSize.height), + g_text_brush.Get() + ); + xpos += 14.f; + } + g_d2d_device_context->EndDraw(); + + // Release our wrapped render target resource. Releasing + // transitions the back buffer resource to the state specified + // as the OutState when the wrapped resource was created. + g_d3d11on12_device->ReleaseWrappedResources(g_wrapped_backbuffers[backbuffer_id].GetAddressOf(), 1); + + // Flush to submit the 11 command list to the shared command queue. + g_d3d11_device_context->Flush(); +} +} extern PFN_D3D11ON12_CREATE_DEVICE wrapD3D11On12CreateDevice; -void D3D12GSRender::InitD2DStructures() +void D3D12GSRender::init_d2d_structures() { wrapD3D11On12CreateDevice( m_device.Get(), @@ -32,25 +68,25 @@ void D3D12GSRender::InitD2DStructures() reinterpret_cast(m_commandQueueGraphic.GetAddressOf()), 1, 0, - &d3d11Device, - &m_d3d11DeviceContext, + &g_d3d11_device, + &g_d3d11_device_context, nullptr ); - d3d11Device.As(&m_d3d11On12Device); + g_d3d11_device.As(&g_d3d11on12_device); D2D1_DEVICE_CONTEXT_OPTIONS deviceOptions = D2D1_DEVICE_CONTEXT_OPTIONS_NONE; D2D1_FACTORY_OPTIONS d2dFactoryOptions = {}; - D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(ID2D1Factory3), &d2dFactoryOptions, &m_d2dFactory); + D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(ID2D1Factory3), &d2dFactoryOptions, &g_d2d_factory); Microsoft::WRL::ComPtr dxgiDevice; - m_d3d11On12Device.As(&dxgiDevice); - m_d2dFactory->CreateDevice(dxgiDevice.Get(), &m_d2dDevice); - m_d2dDevice->CreateDeviceContext(deviceOptions, &m_d2dDeviceContext); - DWriteCreateFactory(DWRITE_FACTORY_TYPE_SHARED, __uuidof(IDWriteFactory), &m_dWriteFactory); + g_d3d11on12_device.As(&dxgiDevice); + g_d2d_factory->CreateDevice(dxgiDevice.Get(), &g_d2d_device); + g_d2d_device->CreateDeviceContext(deviceOptions, &g_d2d_device_context); + DWriteCreateFactory(DWRITE_FACTORY_TYPE_SHARED, __uuidof(IDWriteFactory), &g_dwrite_factory); float dpiX; float dpiY; - m_d2dFactory->GetDesktopDpi(&dpiX, &dpiY); + g_d2d_factory->GetDesktopDpi(&dpiX, &dpiY); D2D1_BITMAP_PROPERTIES1 bitmapProperties = D2D1::BitmapProperties1( D2D1_BITMAP_OPTIONS_TARGET | D2D1_BITMAP_OPTIONS_CANNOT_DRAW, D2D1::PixelFormat(DXGI_FORMAT_UNKNOWN, D2D1_ALPHA_MODE_PREMULTIPLIED), @@ -61,26 +97,26 @@ void D3D12GSRender::InitD2DStructures() for (unsigned i = 0; i < 2; i++) { D3D11_RESOURCE_FLAGS d3d11Flags = { D3D11_BIND_RENDER_TARGET }; - m_d3d11On12Device->CreateWrappedResource( + g_d3d11on12_device->CreateWrappedResource( m_backBuffer[i].Get(), &d3d11Flags, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT, - IID_PPV_ARGS(&m_wrappedBackBuffers[i]) + IID_PPV_ARGS(&g_wrapped_backbuffers[i]) ); // Create a render target for D2D to draw directly to this back buffer. Microsoft::WRL::ComPtr surface; - m_wrappedBackBuffers[i].As(&surface); - m_d2dDeviceContext->CreateBitmapFromDxgiSurface( + g_wrapped_backbuffers[i].As(&surface); + g_d2d_device_context->CreateBitmapFromDxgiSurface( surface.Get(), &bitmapProperties, - &m_d2dRenderTargets[i] + &g_d2d_render_targets[i] ); } - m_d2dDeviceContext->CreateSolidColorBrush(D2D1::ColorF(D2D1::ColorF::DarkGreen), &m_textBrush); - m_dWriteFactory->CreateTextFormat( + g_d2d_device_context->CreateSolidColorBrush(D2D1::ColorF(D2D1::ColorF::DarkGreen), &g_text_brush); + g_dwrite_factory->CreateTextFormat( L"Verdana", NULL, DWRITE_FONT_WEIGHT_BOLD, @@ -88,33 +124,33 @@ void D3D12GSRender::InitD2DStructures() DWRITE_FONT_STRETCH_NORMAL, 14, L"en-us", - &m_textFormat + &g_text_format ); - m_textFormat->SetTextAlignment(DWRITE_TEXT_ALIGNMENT_LEADING); - m_textFormat->SetParagraphAlignment(DWRITE_PARAGRAPH_ALIGNMENT_NEAR); + g_text_format->SetTextAlignment(DWRITE_TEXT_ALIGNMENT_LEADING); + g_text_format->SetParagraphAlignment(DWRITE_PARAGRAPH_ALIGNMENT_NEAR); } -void D3D12GSRender::ReleaseD2DStructures() +void D3D12GSRender::release_d2d_structures() { - d3d11Device.Reset(); - m_d3d11DeviceContext.Reset(); - m_d3d11On12Device.Reset(); - m_d3d12Device.Reset(); - m_dWriteFactory.Reset(); - m_d2dFactory.Reset(); - m_d2dDevice.Reset(); - m_d2dDeviceContext.Reset(); - m_wrappedBackBuffers[0].Reset(); - m_d2dRenderTargets[0].Reset(); - m_wrappedBackBuffers[1].Reset(); - m_d2dRenderTargets[1].Reset(); - m_textFormat.Reset(); - m_textBrush.Reset(); + g_d3d11_device.Reset(); + g_d3d11_device_context.Reset(); + g_d3d11on12_device.Reset(); + g_d3d12_device.Reset(); + g_dwrite_factory.Reset(); + g_d2d_factory.Reset(); + g_d2d_device.Reset(); + g_d2d_device_context.Reset(); + g_wrapped_backbuffers[0].Reset(); + g_d2d_render_targets[0].Reset(); + g_wrapped_backbuffers[1].Reset(); + g_d2d_render_targets[1].Reset(); + g_text_format.Reset(); + g_text_brush.Reset(); } -void D3D12GSRender::renderOverlay() +void D3D12GSRender::render_overlay() { - D2D1_SIZE_F rtSize = m_d2dRenderTargets[m_swapChain->GetCurrentBackBufferIndex()]->GetSize(); + D2D1_SIZE_F rtSize = g_d2d_render_targets[m_swapChain->GetCurrentBackBufferIndex()]->GetSize(); std::wstring duration = L"Draw duration : " + std::to_wstring(m_timers.m_drawCallDuration) + L" us"; float vtxIdxPercent = (float)m_timers.m_vertexIndexDuration / (float)m_timers.m_drawCallDuration; std::wstring vertexIndexDuration = L"Vtx/Idx upload : " + std::to_wstring(m_timers.m_vertexIndexDuration) + L" us (" + std::to_wstring(100.f * vtxIdxPercent) + L" %)"; @@ -130,85 +166,17 @@ void D3D12GSRender::renderOverlay() std::wstring flipDuration = L"Flip : " + std::to_wstring(m_timers.m_flipDuration) + L" us"; std::wstring count = L"Draw count : " + std::to_wstring(m_timers.m_drawCallCount); - - // Acquire our wrapped render target resource for the current back buffer. - m_d3d11On12Device->AcquireWrappedResources(m_wrappedBackBuffers[m_swapChain->GetCurrentBackBufferIndex()].GetAddressOf(), 1); - - // Render text directly to the back buffer. - m_d2dDeviceContext->SetTarget(m_d2dRenderTargets[m_swapChain->GetCurrentBackBufferIndex()].Get()); - m_d2dDeviceContext->BeginDraw(); - m_d2dDeviceContext->SetTransform(D2D1::Matrix3x2F::Identity()); - m_d2dDeviceContext->DrawTextW( - duration.c_str(), - (UINT32)duration.size(), - m_textFormat.Get(), - &D2D1::RectF(0, 0, rtSize.width, rtSize.height), - m_textBrush.Get() - ); - m_d2dDeviceContext->DrawTextW( - count.c_str(), - (UINT32)count.size(), - m_textFormat.Get(), - &D2D1::RectF(0, 14, rtSize.width, rtSize.height), - m_textBrush.Get() - ); - m_d2dDeviceContext->DrawTextW( - rttDuration.c_str(), - (UINT32)rttDuration.size(), - m_textFormat.Get(), - &D2D1::RectF(0, 28, rtSize.width, rtSize.height), - m_textBrush.Get() - ); - m_d2dDeviceContext->DrawTextW( - vertexIndexDuration.c_str(), - (UINT32)vertexIndexDuration.size(), - m_textFormat.Get(), - &D2D1::RectF(0, 42, rtSize.width, rtSize.height), - m_textBrush.Get() - ); - m_d2dDeviceContext->DrawTextW( - size.c_str(), - (UINT32)size.size(), - m_textFormat.Get(), - &D2D1::RectF(0, 56, rtSize.width, rtSize.height), - m_textBrush.Get() - ); - m_d2dDeviceContext->DrawTextW( - programDuration.c_str(), - (UINT32)programDuration.size(), - m_textFormat.Get(), - &D2D1::RectF(0, 70, rtSize.width, rtSize.height), - m_textBrush.Get() - ); - m_d2dDeviceContext->DrawTextW( - constantDuration.c_str(), - (UINT32)constantDuration.size(), - m_textFormat.Get(), - &D2D1::RectF(0, 86, rtSize.width, rtSize.height), - m_textBrush.Get() - ); - m_d2dDeviceContext->DrawTextW( - texDuration.c_str(), - (UINT32)texDuration.size(), - m_textFormat.Get(), - &D2D1::RectF(0, 98, rtSize.width, rtSize.height), - m_textBrush.Get() - ); - m_d2dDeviceContext->DrawTextW( - flipDuration.c_str(), - (UINT32)flipDuration.size(), - m_textFormat.Get(), - &D2D1::RectF(0, 112, rtSize.width, rtSize.height), - m_textBrush.Get() - ); - m_d2dDeviceContext->EndDraw(); - - // Release our wrapped render target resource. Releasing - // transitions the back buffer resource to the state specified - // as the OutState when the wrapped resource was created. - m_d3d11On12Device->ReleaseWrappedResources(m_wrappedBackBuffers[m_swapChain->GetCurrentBackBufferIndex()].GetAddressOf(), 1); - - // Flush to submit the 11 command list to the shared command queue. - m_d3d11DeviceContext->Flush(); + draw_strings(rtSize, m_swapChain->GetCurrentBackBufferIndex(), + { + duration, + count, + rttDuration, + vertexIndexDuration, + size, + programDuration, + constantDuration, + texDuration, + flipDuration + }); } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index ca03098028..2ce50bc6aa 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -4,6 +4,7 @@ #include "D3D12PipelineState.h" #include "D3D12GSRender.h" #include "Emu/state.h" +#include "D3D12Formats.h" #pragma comment (lib, "d3dcompiler.lib") @@ -33,7 +34,7 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st) } } -bool D3D12GSRender::LoadProgram() +bool D3D12GSRender::load_program() { RSXVertexProgram vertex_program; u32 transform_program_start = rsx::method_registers[NV4097_SET_TRANSFORM_PROGRAM_START]; @@ -57,29 +58,7 @@ bool D3D12GSRender::LoadProgram() fragment_program.ctrl = rsx::method_registers[NV4097_SET_SHADER_CONTROL]; D3D12PipelineProperties prop = {}; - switch (draw_mode) - { - case CELL_GCM_PRIMITIVE_POINTS: - prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; - break; - case CELL_GCM_PRIMITIVE_LINES: - case CELL_GCM_PRIMITIVE_LINE_LOOP: - case CELL_GCM_PRIMITIVE_LINE_STRIP: - prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; - break; - case CELL_GCM_PRIMITIVE_TRIANGLES: - case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP: - case CELL_GCM_PRIMITIVE_TRIANGLE_FAN: - prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - break; - case CELL_GCM_PRIMITIVE_QUADS: - case CELL_GCM_PRIMITIVE_QUAD_STRIP: - case CELL_GCM_PRIMITIVE_POLYGON: - default: - // LOG_ERROR(RSX, "Unsupported primitive type"); - prop.Topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - break; - } + prop.Topology = get_primitive_topology_type(draw_mode); static D3D12_BLEND_DESC CD3D12_BLEND_DESC = { @@ -106,61 +85,61 @@ bool D3D12GSRender::LoadProgram() if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x8) prop.Blend.RenderTarget[3].BlendEnable = true; - prop.Blend.RenderTarget[0].BlendOp = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF); - prop.Blend.RenderTarget[0].BlendOpAlpha = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16); + prop.Blend.RenderTarget[0].BlendOp = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF); + prop.Blend.RenderTarget[0].BlendOpAlpha = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16); if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x2) { - prop.Blend.RenderTarget[1].BlendOp = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF); - prop.Blend.RenderTarget[1].BlendOpAlpha = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16); + prop.Blend.RenderTarget[1].BlendOp = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF); + prop.Blend.RenderTarget[1].BlendOpAlpha = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16); } if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x4) { - prop.Blend.RenderTarget[2].BlendOp = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF); - prop.Blend.RenderTarget[2].BlendOpAlpha = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16); + prop.Blend.RenderTarget[2].BlendOp = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF); + prop.Blend.RenderTarget[2].BlendOpAlpha = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16); } if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x8) { - prop.Blend.RenderTarget[3].BlendOp = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF); - prop.Blend.RenderTarget[3].BlendOpAlpha = getBlendOp(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16); + prop.Blend.RenderTarget[3].BlendOp = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] & 0xFFFF); + prop.Blend.RenderTarget[3].BlendOpAlpha = get_blend_op(rsx::method_registers[NV4097_SET_BLEND_EQUATION] >> 16); } - prop.Blend.RenderTarget[0].SrcBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF); - prop.Blend.RenderTarget[0].DestBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF); - prop.Blend.RenderTarget[0].SrcBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16); - prop.Blend.RenderTarget[0].DestBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16); + prop.Blend.RenderTarget[0].SrcBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF); + prop.Blend.RenderTarget[0].DestBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF); + prop.Blend.RenderTarget[0].SrcBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16); + prop.Blend.RenderTarget[0].DestBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16); if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x2) { - prop.Blend.RenderTarget[1].SrcBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF); - prop.Blend.RenderTarget[1].DestBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF); - prop.Blend.RenderTarget[1].SrcBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16); - prop.Blend.RenderTarget[1].DestBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16); + prop.Blend.RenderTarget[1].SrcBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF); + prop.Blend.RenderTarget[1].DestBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF); + prop.Blend.RenderTarget[1].SrcBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16); + prop.Blend.RenderTarget[1].DestBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16); } if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x4) { - prop.Blend.RenderTarget[2].SrcBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF); - prop.Blend.RenderTarget[2].DestBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF); - prop.Blend.RenderTarget[2].SrcBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16); - prop.Blend.RenderTarget[2].DestBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16); + prop.Blend.RenderTarget[2].SrcBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF); + prop.Blend.RenderTarget[2].DestBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF); + prop.Blend.RenderTarget[2].SrcBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16); + prop.Blend.RenderTarget[2].DestBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16); } if (rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT] & 0x8) { - prop.Blend.RenderTarget[3].SrcBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF); - prop.Blend.RenderTarget[3].DestBlend = getBlendFactor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF); - prop.Blend.RenderTarget[3].SrcBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16); - prop.Blend.RenderTarget[3].DestBlendAlpha = getBlendFactorAlpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16); + prop.Blend.RenderTarget[3].SrcBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] & 0xFFFF); + prop.Blend.RenderTarget[3].DestBlend = get_blend_factor(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] & 0xFFFF); + prop.Blend.RenderTarget[3].SrcBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR] >> 16); + prop.Blend.RenderTarget[3].DestBlendAlpha = get_blend_factor_alpha(rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR] >> 16); } } if (rsx::method_registers[NV4097_SET_LOGIC_OP_ENABLE]) { prop.Blend.RenderTarget[0].LogicOpEnable = true; - prop.Blend.RenderTarget[0].LogicOp = getLogicOp(rsx::method_registers[NV4097_SET_LOGIC_OP]); + prop.Blend.RenderTarget[0].LogicOp = get_logic_op(rsx::method_registers[NV4097_SET_LOGIC_OP]); } // if (m_set_blend_color) @@ -168,31 +147,8 @@ bool D3D12GSRender::LoadProgram() // glBlendColor(m_blend_color_r, m_blend_color_g, m_blend_color_b, m_blend_color_a); // checkForGlError("glBlendColor"); } - - switch (m_surface.depth_format) - { - case 0: - break; - case CELL_GCM_SURFACE_Z16: - prop.DepthStencilFormat = DXGI_FORMAT_D16_UNORM; - break; - case CELL_GCM_SURFACE_Z24S8: - prop.DepthStencilFormat = DXGI_FORMAT_D24_UNORM_S8_UINT; - break; - default: - LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface.depth_format); - assert(0); - } - - switch (m_surface.color_format) - { - case CELL_GCM_SURFACE_A8R8G8B8: - prop.RenderTargetsFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - break; - case CELL_GCM_SURFACE_F_W16Z16Y16X16: - prop.RenderTargetsFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; - break; - } + prop.DepthStencilFormat = get_depth_stencil_surface_format(m_surface.depth_format); + prop.RenderTargetsFormat = get_color_surface_format(m_surface.color_format); switch (u32 color_target = rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]) { @@ -210,33 +166,33 @@ bool D3D12GSRender::LoadProgram() prop.numMRT = 4; break; default: - LOG_ERROR(RSX, "Bad surface color target: %d", color_target); + break; } prop.DepthStencil.DepthEnable = !!(rsx::method_registers[NV4097_SET_DEPTH_TEST_ENABLE]); prop.DepthStencil.DepthWriteMask = !!(rsx::method_registers[NV4097_SET_DEPTH_MASK]) ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; - prop.DepthStencil.DepthFunc = getCompareFunc(rsx::method_registers[NV4097_SET_DEPTH_FUNC]); + prop.DepthStencil.DepthFunc = get_compare_func(rsx::method_registers[NV4097_SET_DEPTH_FUNC]); prop.DepthStencil.StencilEnable = !!(rsx::method_registers[NV4097_SET_STENCIL_TEST_ENABLE]); prop.DepthStencil.StencilReadMask = rsx::method_registers[NV4097_SET_STENCIL_FUNC_MASK]; prop.DepthStencil.StencilWriteMask = rsx::method_registers[NV4097_SET_STENCIL_MASK]; - prop.DepthStencil.FrontFace.StencilPassOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]); - prop.DepthStencil.FrontFace.StencilDepthFailOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]); - prop.DepthStencil.FrontFace.StencilFailOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]); - prop.DepthStencil.FrontFace.StencilFunc = getCompareFunc(rsx::method_registers[NV4097_SET_STENCIL_FUNC]); + prop.DepthStencil.FrontFace.StencilPassOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]); + prop.DepthStencil.FrontFace.StencilDepthFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]); + prop.DepthStencil.FrontFace.StencilFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]); + prop.DepthStencil.FrontFace.StencilFunc = get_compare_func(rsx::method_registers[NV4097_SET_STENCIL_FUNC]); if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE]) { - prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_FAIL]); - prop.DepthStencil.BackFace.StencilFunc = getCompareFunc(rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC]); - prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZPASS]); - prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZFAIL]); + prop.DepthStencil.BackFace.StencilFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_FAIL]); + prop.DepthStencil.BackFace.StencilFunc = get_compare_func(rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC]); + prop.DepthStencil.BackFace.StencilPassOp = get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZPASS]); + prop.DepthStencil.BackFace.StencilDepthFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZFAIL]); } else { - prop.DepthStencil.BackFace.StencilPassOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]); - prop.DepthStencil.BackFace.StencilDepthFailOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]); - prop.DepthStencil.BackFace.StencilFailOp = getStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]); - prop.DepthStencil.BackFace.StencilFunc = getCompareFunc(rsx::method_registers[NV4097_SET_STENCIL_FUNC]); + prop.DepthStencil.BackFace.StencilPassOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]); + prop.DepthStencil.BackFace.StencilDepthFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]); + prop.DepthStencil.BackFace.StencilFailOp = get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]); + prop.DepthStencil.BackFace.StencilFunc = get_compare_func(rsx::method_registers[NV4097_SET_STENCIL_FUNC]); } // Sensible default value @@ -273,15 +229,7 @@ bool D3D12GSRender::LoadProgram() else prop.Rasterization.CullMode = D3D12_CULL_MODE_NONE; - switch (rsx::method_registers[NV4097_SET_FRONT_FACE]) - { - case CELL_GCM_CW: - prop.Rasterization.FrontCounterClockwise = FALSE; - break; - case CELL_GCM_CCW: - prop.Rasterization.FrontCounterClockwise = TRUE; - break; - } + prop.Rasterization.FrontCounterClockwise = get_front_face_ccw(rsx::method_registers[NV4097_SET_FRONT_FACE]); UINT8 mask = 0; mask |= (rsx::method_registers[NV4097_SET_COLOR_MASK] >> 16) & 0xFF ? D3D12_COLOR_WRITE_ENABLE_RED : 0; @@ -299,4 +247,4 @@ bool D3D12GSRender::LoadProgram() m_PSO = m_cachePSO.getGraphicPipelineState(&vertex_program, &fragment_program, prop, std::make_pair(m_device.Get(), m_rootSignatures)); return m_PSO != nullptr; } -#endif \ No newline at end of file +#endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 721f33d9d7..26d4267fdb 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -1,6 +1,6 @@ #pragma once -#include "D3D12.h" +#include "D3D12Utils.h" #include "../Common/ProgramStateCache.h" #include "D3D12VertexProgramDecompiler.h" #include "D3D12FragmentProgramDecompiler.h" diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 44a97ef872..4e9345c9ef 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -11,8 +11,9 @@ #include "D3D12.h" #include "D3D12GSRender.h" +#include "D3D12Formats.h" -void D3D12GSRender::PrepareRenderTargets(ID3D12GraphicsCommandList *copycmdlist) +void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlist) { u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT]; @@ -78,16 +79,7 @@ void D3D12GSRender::PrepareRenderTargets(ID3D12GraphicsCommandList *copycmdlist) D3D12_CPU_DESCRIPTOR_HANDLE Handle = m_rtts.m_renderTargetsDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); size_t g_RTTIncrement = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - DXGI_FORMAT dxgiFormat; - switch (m_surface.color_format) - { - case CELL_GCM_SURFACE_A8R8G8B8: - dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - break; - case CELL_GCM_SURFACE_F_W16Z16Y16X16: - dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; - break; - } + DXGI_FORMAT dxgiFormat = get_color_surface_format(m_surface.color_format); D3D12_RENDER_TARGET_VIEW_DESC rttViewDesc = {}; rttViewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rttViewDesc.Format = dxgiFormat; @@ -168,23 +160,13 @@ void D3D12GSRender::PrepareRenderTargets(ID3D12GraphicsCommandList *copycmdlist) } } - ID3D12Resource *ds = m_rtts.bindAddressAsDepthStencil(m_device.Get(), copycmdlist, address_z, clip_width, clip_height, m_surface.depth_format, 1., 0); + ComPtr oldDS; + ID3D12Resource *ds = m_rtts.bindAddressAsDepthStencil(m_device.Get(), copycmdlist, address_z, clip_width, clip_height, m_surface.depth_format, 1., 0, oldDS); + if (oldDS) + getCurrentResourceStorage().dirty_textures.push_back(oldDS); D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc = {}; - switch (m_surface.depth_format) - { - case 0: - break; - case CELL_GCM_SURFACE_Z16: - depthStencilViewDesc.Format = DXGI_FORMAT_D16_UNORM; - break; - case CELL_GCM_SURFACE_Z24S8: - depthStencilViewDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - break; - default: - LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface.depth_format); - assert(0); - } + depthStencilViewDesc.Format = get_depth_stencil_surface_format(m_surface.depth_format); depthStencilViewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; m_device->CreateDepthStencilView(ds, &depthStencilViewDesc, m_rtts.m_depthStencilDescriptorHeap->GetCPUDescriptorHandleForHeapStart()); } @@ -203,16 +185,7 @@ ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device, else { LOG_WARNING(RSX, "Creating RTT"); - DXGI_FORMAT dxgiFormat; - switch (surfaceColorFormat) - { - case CELL_GCM_SURFACE_A8R8G8B8: - dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; - break; - case CELL_GCM_SURFACE_F_W16Z16Y16X16: - dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; - break; - } + DXGI_FORMAT dxgiFormat = get_color_surface_format(surfaceColorFormat); D3D12_CLEAR_VALUE clearColorValue = {}; clearColorValue.Format = dxgiFormat; clearColorValue.Color[0] = clearColor[0]; @@ -235,56 +208,51 @@ ID3D12Resource *RenderTargets::bindAddressAsRenderTargets(ID3D12Device *device, return rtt; } -ID3D12Resource * RenderTargets::bindAddressAsDepthStencil(ID3D12Device * device, ID3D12GraphicsCommandList * cmdList, u32 address, size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear) +ID3D12Resource * RenderTargets::bindAddressAsDepthStencil(ID3D12Device * device, ID3D12GraphicsCommandList * cmdList, u32 address, size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear, ComPtr &dirtyDS) { - ID3D12Resource* ds; + auto It = m_depthStencil.find(address); - // TODO: Check if sizes and surface depth format match + m_currentlyBoundDepthStencilAddress = address; + + // TODO: Check if surface depth format match if (It != m_depthStencil.end()) { - ds = It->second; - cmdList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE)); - } - else - { - D3D12_CLEAR_VALUE clearDepthValue = {}; - clearDepthValue.DepthStencil.Depth = depthClear; - - D3D12_HEAP_PROPERTIES heapProp = {}; - heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; - - DXGI_FORMAT dxgiFormat; - switch (surfaceDepthFormat) + ComPtr ds = It->second; + if (ds->GetDesc().Width == width && ds->GetDesc().Height == height) { - case 0: - break; - case CELL_GCM_SURFACE_Z16: - dxgiFormat = DXGI_FORMAT_R16_TYPELESS; - clearDepthValue.Format = DXGI_FORMAT_D16_UNORM; - break; - case CELL_GCM_SURFACE_Z24S8: - dxgiFormat = DXGI_FORMAT_R24G8_TYPELESS; - clearDepthValue.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - break; - default: - LOG_ERROR(RSX, "Bad depth format! (%d)", surfaceDepthFormat); - assert(0); + // set the resource as depth write + cmdList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds.Get(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE)); + m_currentlyBoundDepthStencil = ds.Get(); + return ds.Get(); } + // If size doesn't match, remove ds from cache + m_depthStencil.erase(address); + dirtyDS = ds; + } - device->CreateCommittedResource( + D3D12_CLEAR_VALUE clearDepthValue = {}; + clearDepthValue.DepthStencil.Depth = depthClear; + + D3D12_HEAP_PROPERTIES heapProp = {}; + heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; + + DXGI_FORMAT dxgiFormat = get_depth_typeless_surface_format(surfaceDepthFormat); + clearDepthValue.Format = get_depth_stencil_surface_clear_format(surfaceDepthFormat); + + ComPtr newds; + device->CreateCommittedResource( &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &CD3DX12_RESOURCE_DESC::Tex2D(dxgiFormat, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL), D3D12_RESOURCE_STATE_DEPTH_WRITE, &clearDepthValue, - IID_PPV_ARGS(&ds) + IID_PPV_ARGS(newds.GetAddressOf()) ); - m_depthStencil[address] = ds; - } - m_currentlyBoundDepthStencil = ds; - m_currentlyBoundDepthStencilAddress = address; - return ds; + m_depthStencil[address] = newds; + m_currentlyBoundDepthStencil = newds.Get(); + + return newds.Get(); } void RenderTargets::Init(ID3D12Device *device)//, u8 surfaceDepthFormat, size_t width, size_t height, float clearColor[4], float clearDepth) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 0cf1a58d45..0392ed8d69 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -4,10 +4,10 @@ struct RenderTargets { - std::unordered_map m_renderTargets; + std::unordered_map m_renderTargets; ID3D12Resource *m_currentlyBoundRenderTargets[4]; u32 m_currentlyBoundRenderTargetsAddress[4]; - std::unordered_map m_depthStencil; + std::unordered_map > m_depthStencil; ID3D12Resource *m_currentlyBoundDepthStencil; u32 m_currentlyBoundDepthStencilAddress; ID3D12DescriptorHeap *m_renderTargetsDescriptorsHeap; @@ -22,7 +22,7 @@ struct RenderTargets size_t width, size_t height, u8 surfaceColorFormat, const std::array &clearColor); ID3D12Resource *bindAddressAsDepthStencil(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address, - size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear); + size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear, ComPtr &dirtyDS); void Init(ID3D12Device *device); void Release(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 78b6df4c7b..484b28e4e8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -4,9 +4,11 @@ #include "d3dx12.h" #include "../Common/TextureUtils.h" // For clarity this code deals with texture but belongs to D3D12GSRender class +#include "D3D12Formats.h" -static -D3D12_COMPARISON_FUNC getSamplerCompFunc[] = +namespace +{ +D3D12_COMPARISON_FUNC get_sampler_compare_func[] = { D3D12_COMPARISON_FUNC_NEVER, D3D12_COMPARISON_FUNC_LESS, @@ -18,103 +20,20 @@ D3D12_COMPARISON_FUNC getSamplerCompFunc[] = D3D12_COMPARISON_FUNC_ALWAYS }; -static -size_t getSamplerMaxAniso(size_t aniso) -{ - switch (aniso) - { - case CELL_GCM_TEXTURE_MAX_ANISO_1: return 1; - case CELL_GCM_TEXTURE_MAX_ANISO_2: return 2; - case CELL_GCM_TEXTURE_MAX_ANISO_4: return 4; - case CELL_GCM_TEXTURE_MAX_ANISO_6: return 6; - case CELL_GCM_TEXTURE_MAX_ANISO_8: return 8; - case CELL_GCM_TEXTURE_MAX_ANISO_10: return 10; - case CELL_GCM_TEXTURE_MAX_ANISO_12: return 12; - case CELL_GCM_TEXTURE_MAX_ANISO_16: return 16; - } - - return 1; -} - -static -D3D12_TEXTURE_ADDRESS_MODE getSamplerWrap(size_t wrap) -{ - switch (wrap) - { - case CELL_GCM_TEXTURE_WRAP: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; - case CELL_GCM_TEXTURE_MIRROR: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; - case CELL_GCM_TEXTURE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - case CELL_GCM_TEXTURE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER; - case CELL_GCM_TEXTURE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; - case CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; - case CELL_GCM_TEXTURE_MIRROR_ONCE_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; - } - return D3D12_TEXTURE_ADDRESS_MODE_WRAP; -} - -static -D3D12_FILTER getSamplerFilter(u32 minFilter, u32 magFilter) -{ - D3D12_FILTER_TYPE min, mag, mip; - switch (minFilter) - { - case CELL_GCM_TEXTURE_NEAREST: - min = D3D12_FILTER_TYPE_POINT; - mip = D3D12_FILTER_TYPE_POINT; - break; - case CELL_GCM_TEXTURE_LINEAR: - min = D3D12_FILTER_TYPE_LINEAR; - mip = D3D12_FILTER_TYPE_POINT; - break; - case CELL_GCM_TEXTURE_NEAREST_NEAREST: - min = D3D12_FILTER_TYPE_POINT; - mip = D3D12_FILTER_TYPE_POINT; - break; - case CELL_GCM_TEXTURE_LINEAR_NEAREST: - min = D3D12_FILTER_TYPE_LINEAR; - mip = D3D12_FILTER_TYPE_POINT; - break; - case CELL_GCM_TEXTURE_NEAREST_LINEAR: - min = D3D12_FILTER_TYPE_POINT; - mip = D3D12_FILTER_TYPE_LINEAR; - break; - case CELL_GCM_TEXTURE_LINEAR_LINEAR: - min = D3D12_FILTER_TYPE_LINEAR; - mip = D3D12_FILTER_TYPE_LINEAR; - break; - case CELL_GCM_TEXTURE_CONVOLUTION_MIN: - default: - LOG_ERROR(RSX, "Unknow min filter %x", minFilter); - } - - switch (magFilter) - { - case CELL_GCM_TEXTURE_NEAREST: - mag = D3D12_FILTER_TYPE_POINT; - break; - case CELL_GCM_TEXTURE_LINEAR: - mag = D3D12_FILTER_TYPE_LINEAR; - break; - default: - LOG_ERROR(RSX, "Unknow mag filter %x", magFilter); - } - - return D3D12_ENCODE_BASIC_FILTER(min, mag, mip, D3D12_FILTER_REDUCTION_TYPE_STANDARD); -} - -static -D3D12_SAMPLER_DESC getSamplerDesc(const rsx::texture &texture) +D3D12_SAMPLER_DESC get_sampler_desc(const rsx::texture &texture) noexcept { D3D12_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = getSamplerFilter(texture.min_filter(), texture.mag_filter()); - samplerDesc.AddressU = getSamplerWrap(texture.wrap_s()); - samplerDesc.AddressV = getSamplerWrap(texture.wrap_t()); - samplerDesc.AddressW = getSamplerWrap(texture.wrap_r()); - samplerDesc.ComparisonFunc = getSamplerCompFunc[texture.zfunc()]; - samplerDesc.MaxAnisotropy = (UINT)getSamplerMaxAniso(texture.max_aniso()); + samplerDesc.Filter = get_texture_filter(texture.min_filter(), texture.mag_filter()); + samplerDesc.AddressU = get_texture_wrap_mode(texture.wrap_s()); + samplerDesc.AddressV = get_texture_wrap_mode(texture.wrap_t()); + samplerDesc.AddressW = get_texture_wrap_mode(texture.wrap_r()); + samplerDesc.ComparisonFunc = get_sampler_compare_func[texture.zfunc()]; + samplerDesc.MaxAnisotropy = get_texture_max_aniso(texture.max_aniso()); samplerDesc.MipLODBias = texture.bias(); - samplerDesc.BorderColor[4] = (FLOAT)texture.border_color(); + samplerDesc.BorderColor[0] = (FLOAT)texture.border_color(); + samplerDesc.BorderColor[1] = (FLOAT)texture.border_color(); + samplerDesc.BorderColor[2] = (FLOAT)texture.border_color(); + samplerDesc.BorderColor[3] = (FLOAT)texture.border_color(); samplerDesc.MinLOD = (FLOAT)(texture.min_lod() >> 8); samplerDesc.MaxLOD = (FLOAT)(texture.max_lod() >> 8); return samplerDesc; @@ -125,168 +44,89 @@ D3D12_SAMPLER_DESC getSamplerDesc(const rsx::texture &texture) * Create a texture residing in default heap and generate uploads commands in commandList, * using a temporary texture buffer. */ -static -ComPtr uploadSingleTexture( +ComPtr upload_single_texture( const rsx::texture &texture, ID3D12Device *device, - ID3D12GraphicsCommandList *commandList, - DataHeap &textureBuffersHeap) + ID3D12GraphicsCommandList *command_list, + data_heap &texture_buffer_heap) { - ComPtr vramTexture; size_t w = texture.width(), h = texture.height(); int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format); + DXGI_FORMAT dxgi_format = get_texture_format(format); - size_t textureSize = getPlacedTextureStorageSpace(texture, 256); - assert(textureBuffersHeap.canAlloc(textureSize)); - size_t heapOffset = textureBuffersHeap.alloc(textureSize); + size_t buffer_size = get_placed_texture_storage_size(texture, 256); + assert(texture_buffer_heap.can_alloc(buffer_size)); + size_t heap_offset = texture_buffer_heap.alloc(buffer_size); void *buffer; - ThrowIfFailed(textureBuffersHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize), &buffer)); - void *textureData = (char*)buffer + heapOffset; - std::vector mipInfos = uploadPlacedTexture(texture, 256, textureData); - textureBuffersHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize)); - - D3D12_RESOURCE_DESC texturedesc = CD3DX12_RESOURCE_DESC::Tex2D(dxgiFormat, (UINT)w, (UINT)h, 1, texture.mipmap()); - textureSize = device->GetResourceAllocationInfo(0, 1, &texturedesc).SizeInBytes; + ThrowIfFailed(texture_buffer_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; + std::vector mipInfos = upload_placed_texture(texture, 256, mapped_buffer); + texture_buffer_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + ComPtr result; ThrowIfFailed(device->CreateCommittedResource( &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, - &texturedesc, + &CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)w, (UINT)h, 1, texture.mipmap()), D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(vramTexture.GetAddressOf()) + IID_PPV_ARGS(result.GetAddressOf()) )); - size_t miplevel = 0; + size_t mip_level = 0; for (const MipmapLevelInfo mli : mipInfos) { - commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(vramTexture.Get(), (UINT)miplevel), 0, 0, 0, - &CD3DX12_TEXTURE_COPY_LOCATION(textureBuffersHeap.m_heap, { heapOffset + mli.offset, { dxgiFormat, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr); - miplevel++; + command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(result.Get(), (UINT)mip_level), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.m_heap, { heap_offset + mli.offset, { dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr); + mip_level++; } - commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(vramTexture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); - return vramTexture; + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(result.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); + return result; } - - /** * */ -static -void updateExistingTexture( +void update_existing_texture( const rsx::texture &texture, - ID3D12GraphicsCommandList *commandList, - DataHeap &textureBuffersHeap, - ID3D12Resource *existingTexture) + ID3D12GraphicsCommandList *command_list, + data_heap &texture_buffer_heap, + ID3D12Resource *existing_texture) { size_t w = texture.width(), h = texture.height(); int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format); + DXGI_FORMAT dxgi_format = get_texture_format(format); - size_t textureSize = getPlacedTextureStorageSpace(texture, 256); - assert(textureBuffersHeap.canAlloc(textureSize)); - size_t heapOffset = textureBuffersHeap.alloc(textureSize); + size_t buffer_size = get_placed_texture_storage_size(texture, 256); + assert(texture_buffer_heap.can_alloc(buffer_size)); + size_t heap_offset = texture_buffer_heap.alloc(buffer_size); void *buffer; - ThrowIfFailed(textureBuffersHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize), &buffer)); - void *textureData = (char*)buffer + heapOffset; - std::vector mipInfos = uploadPlacedTexture(texture, 256, textureData); - textureBuffersHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize)); + ThrowIfFailed(texture_buffer_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; + std::vector mipInfos = upload_placed_texture(texture, 256, mapped_buffer); + texture_buffer_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existingTexture, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_COPY_DEST)); + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existing_texture, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_COPY_DEST)); size_t miplevel = 0; for (const MipmapLevelInfo mli : mipInfos) { - commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existingTexture, (UINT)miplevel), 0, 0, 0, - &CD3DX12_TEXTURE_COPY_LOCATION(textureBuffersHeap.m_heap, { heapOffset + mli.offset,{ dxgiFormat, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr); + command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existing_texture, (UINT)miplevel), 0, 0, 0, + &CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.m_heap, { heap_offset + mli.offset,{ dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr); miplevel++; } - commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existingTexture, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existing_texture, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); +} } - -/** - * Get number of bytes occupied by texture in RSX mem - */ -static -size_t getTextureSize(const rsx::texture &texture) +void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_list, size_t descriptor_index, size_t texture_count) { - size_t w = texture.width(), h = texture.height(); - - int format = texture.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - // TODO: Take mipmaps into account - switch (format) - { - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - default: - LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); - return 0; - case CELL_GCM_TEXTURE_B8: - return w * h; - case CELL_GCM_TEXTURE_A1R5G5B5: - return w * h * 2; - case CELL_GCM_TEXTURE_A4R4G4B4: - return w * h * 2; - case CELL_GCM_TEXTURE_R5G6B5: - return w * h * 2; - case CELL_GCM_TEXTURE_A8R8G8B8: - return w * h * 4; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - return w * h / 6; - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - return w * h / 4; - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - return w * h / 4; - case CELL_GCM_TEXTURE_G8B8: - return w * h * 2; - case CELL_GCM_TEXTURE_R6G5B5: - return w * h * 2; - case CELL_GCM_TEXTURE_DEPTH24_D8: - return w * h * 4; - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - return w * h * 4; - case CELL_GCM_TEXTURE_DEPTH16: - return w * h * 2; - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - return w * h * 2; - case CELL_GCM_TEXTURE_X16: - return w * h * 2; - case CELL_GCM_TEXTURE_Y16_X16: - return w * h * 4; - case CELL_GCM_TEXTURE_R5G5B5A1: - return w * h * 2; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return w * h * 8; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - return w * h * 16; - case CELL_GCM_TEXTURE_X32_FLOAT: - return w * h * 4; - case CELL_GCM_TEXTURE_D1R5G5B5: - return w * h * 2; - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - return w * h * 4; - case CELL_GCM_TEXTURE_D8R8G8B8: - return w * h * 4; - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - return w * h * 4; - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - return w * h * 4; - } -} - -size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t descriptorIndex) -{ - size_t usedTexture = 0; + size_t used_texture = 0; for (u32 i = 0; i < rsx::limits::textures_count; ++i) { @@ -297,40 +137,39 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t const u32 texaddr = rsx::get_address(textures[i].offset(), textures[i].location()); int format = textures[i].format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format); bool is_swizzled = !(textures[i].format() & CELL_GCM_TEXTURE_LN); - ID3D12Resource *vramTexture; + ID3D12Resource *vram_texture; std::unordered_map::const_iterator ItRTT = m_rtts.m_renderTargets.find(texaddr); - std::pair > *cachedTex = m_textureCache.findDataIfAvailable(texaddr); + std::pair > *cached_texture = m_textureCache.find_data_if_available(texaddr); bool isRenderTarget = false; if (ItRTT != m_rtts.m_renderTargets.end()) { - vramTexture = ItRTT->second; + vram_texture = ItRTT->second; isRenderTarget = true; } - else if (cachedTex != nullptr && (cachedTex->first == TextureEntry(format, w, h, textures[i].mipmap()))) + else if (cached_texture != nullptr && (cached_texture->first == texture_entry(format, w, h, textures[i].mipmap()))) { - if (cachedTex->first.m_isDirty) + if (cached_texture->first.m_is_dirty) { - updateExistingTexture(textures[i], cmdlist, m_textureUploadData, cachedTex->second.Get()); - m_textureCache.protectData(texaddr, texaddr, getTextureSize(textures[i])); + update_existing_texture(textures[i], command_list, m_textureUploadData, cached_texture->second.Get()); + m_textureCache.protect_data(texaddr, texaddr, get_texture_size(textures[i])); } - vramTexture = cachedTex->second.Get(); + vram_texture = cached_texture->second.Get(); } else { - if (cachedTex != nullptr) - getCurrentResourceStorage().m_dirtyTextures.push_back(m_textureCache.removeFromCache(texaddr)); - ComPtr tex = uploadSingleTexture(textures[i], m_device.Get(), cmdlist, m_textureUploadData); - vramTexture = tex.Get(); - m_textureCache.storeAndProtectData(texaddr, texaddr, getTextureSize(textures[i]), format, w, h, textures[i].mipmap(), tex); + if (cached_texture != nullptr) + getCurrentResourceStorage().dirty_textures.push_back(m_textureCache.remove_from_cache(texaddr)); + ComPtr tex = upload_single_texture(textures[i], m_device.Get(), command_list, m_textureUploadData); + vram_texture = tex.Get(); + m_textureCache.store_and_protect_data(texaddr, texaddr, get_texture_size(textures[i]), format, w, h, textures[i].mipmap(), tex); } - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Format = dxgiFormat; - srvDesc.Texture2D.MipLevels = textures[i].mipmap(); + D3D12_SHADER_RESOURCE_VIEW_DESC shared_resource_view_desc = {}; + shared_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + shared_resource_view_desc.Format = get_texture_format(format); + shared_resource_view_desc.Texture2D.MipLevels = textures[i].mipmap(); switch (format) { @@ -342,7 +181,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t LOG_ERROR(RSX, "Unimplemented Texture format : %x", format); break; case CELL_GCM_TEXTURE_B8: - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + shared_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, @@ -351,7 +190,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; case CELL_GCM_TEXTURE_A8R8G8B8: { @@ -373,7 +212,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2 }; - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + shared_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( RemapValue[remap_r], RemapValue[remap_g], RemapValue[remap_b], @@ -391,7 +230,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3 }; - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + shared_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( RemapValue[remap_r], RemapValue[remap_g], RemapValue[remap_b], @@ -416,7 +255,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: case CELL_GCM_TEXTURE_X32_FLOAT: case CELL_GCM_TEXTURE_D1R5G5B5: - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; case CELL_GCM_TEXTURE_D8R8G8B8: { @@ -433,7 +272,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t u8 remap_g = (textures[i].remap() >> 4) & 0x3; u8 remap_b = (textures[i].remap() >> 6) & 0x3; - srvDesc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + shared_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( RemapValue[remap_a], RemapValue[remap_r], RemapValue[remap_g], @@ -441,32 +280,58 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t break; } case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + shared_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; break; } - m_device->CreateShaderResourceView(vramTexture, &srvDesc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart()) - .Offset((UINT)descriptorIndex + (UINT)usedTexture, g_descriptorStrideSRVCBVUAV)); + m_device->CreateShaderResourceView(vram_texture, &shared_resource_view_desc, + CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) + .Offset((UINT)descriptor_index + (UINT)used_texture, g_descriptorStrideSRVCBVUAV)); - if (getCurrentResourceStorage().m_currentSamplerIndex + 16 > 2048) + if (getCurrentResourceStorage().current_sampler_index + 16 > 2048) { - getCurrentResourceStorage().m_samplerDescriptorHeapIndex = 1; - getCurrentResourceStorage().m_currentSamplerIndex = 0; + getCurrentResourceStorage().sampler_descriptors_heap_index = 1; + getCurrentResourceStorage().current_sampler_index = 0; } - m_device->CreateSampler(&getSamplerDesc(textures[i]), - CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetCPUDescriptorHandleForHeapStart()) - .Offset((UINT)getCurrentResourceStorage().m_currentSamplerIndex + (UINT)usedTexture, g_descriptorStrideSamplers)); + m_device->CreateSampler(&get_sampler_desc(textures[i]), + CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().sampler_descriptor_heap[getCurrentResourceStorage().sampler_descriptors_heap_index]->GetCPUDescriptorHandleForHeapStart()) + .Offset((UINT)getCurrentResourceStorage().current_sampler_index + (UINT)used_texture, g_descriptorStrideSamplers)); - usedTexture++; + used_texture++; } - return usedTexture; + // Now fill remaining texture slots with dummy texture/sampler + for (; used_texture < texture_count; used_texture++) + { + D3D12_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = {}; + shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + shader_resource_view_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + shader_resource_view_desc.Texture2D.MipLevels = 1; + shader_resource_view_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); + m_device->CreateShaderResourceView(m_dummyTexture, &shader_resource_view_desc, + CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) + .Offset((INT)descriptor_index + (INT)used_texture, g_descriptorStrideSRVCBVUAV) + ); + + D3D12_SAMPLER_DESC sampler_desc = {}; + sampler_desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + m_device->CreateSampler(&sampler_desc, + CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().sampler_descriptor_heap[getCurrentResourceStorage().sampler_descriptors_heap_index]->GetCPUDescriptorHandleForHeapStart()) + .Offset((INT)getCurrentResourceStorage().current_sampler_index + (INT)used_texture, g_descriptorStrideSamplers) + ); + } } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.h b/rpcs3/Emu/RSX/D3D12/D3D12Texture.h deleted file mode 100644 index 6f70f09bee..0000000000 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.h +++ /dev/null @@ -1 +0,0 @@ -#pragma once diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp index 582bcfcc3d..5733d47970 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp @@ -262,4 +262,13 @@ void D3D12GSRender::initConvertShader() p.first->Release(); p.second->Release(); } + +void unreachable_internal(const char *msg, const char *file, unsigned line) +{ + abort(); + #ifdef LLVM_BUILTIN_UNREACHABLE + LLVM_BUILTIN_UNREACHABLE; + #endif +} + #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Utils.h b/rpcs3/Emu/RSX/D3D12/D3D12Utils.h new file mode 100644 index 0000000000..a82186e9f3 --- /dev/null +++ b/rpcs3/Emu/RSX/D3D12/D3D12Utils.h @@ -0,0 +1,118 @@ +#pragma once + +#include +#include +#include +#include "Utilities/Log.h" +#include "Emu/Memory/vm.h" +#include "Emu/RSX/GCM.h" + + +// From llvm Compiler.h + +// Need to be set by define +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +/// \macro LLVM_GNUC_PREREQ +/// \brief Extend the default __GNUC_PREREQ even if glibc's features.h isn't +/// available. +#ifndef LLVM_GNUC_PREREQ +# if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) +#define LLVM_GNUC_PREREQ(maj, min, patch) \ + ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) + __GNUC_PATCHLEVEL__ >= \ + ((maj) << 20) + ((min) << 10) + (patch)) +# elif defined(__GNUC__) && defined(__GNUC_MINOR__) +#define LLVM_GNUC_PREREQ(maj, min, patch) \ + ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) >= ((maj) << 20) + ((min) << 10)) +#else +#define LLVM_GNUC_PREREQ(maj, min, patch) 0 +#endif +#endif + +#ifdef __GNUC__ +#define LLVM_ATTRIBUTE_NORETURN __attribute__((noreturn)) +#elif defined(_MSC_VER) +#define LLVM_ATTRIBUTE_NORETURN __declspec(noreturn) +#else +#define LLVM_ATTRIBUTE_NORETURN +#endif + +#if __has_builtin(__builtin_unreachable) || LLVM_GNUC_PREREQ(4, 5, 0) +# define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable() +#elif defined(_MSC_VER) +# define LLVM_BUILTIN_UNREACHABLE __assume(false) +#endif + +LLVM_ATTRIBUTE_NORETURN void unreachable_internal(const char *msg = nullptr, const char *file = nullptr, unsigned line = 0); + +/// Marks that the current location is not supposed to be reachable. +/// In !NDEBUG builds, prints the message and location info to stderr. +/// In NDEBUG builds, becomes an optimizer hint that the current location +/// is not supposed to be reachable. On compilers that don't support +/// such hints, prints a reduced message instead. +/// +/// Use this instead of assert(0). It conveys intent more clearly and +/// allows compilers to omit some unnecessary code. +#ifndef NDEBUG +#define unreachable(msg) \ + unreachable_internal(msg, __FILE__, __LINE__) +#elif defined(LLVM_BUILTIN_UNREACHABLE) +#define unreachable(msg) LLVM_BUILTIN_UNREACHABLE +#else +#define unreachable(msg) unreachable_internal() +#endif + +using namespace Microsoft::WRL; + +// From DX12 D3D11On12 Sample (MIT Licensed) +inline void ThrowIfFailed(HRESULT hr) +{ + if (FAILED(hr)) + { + throw; + } +} + +/** + * Send data to dst pointer without polluting cache. + * Usefull to write to mapped memory from upload heap. + */ +inline +void streamToBuffer(void* dst, void* src, size_t sizeInBytes) +{ + for (int i = 0; i < sizeInBytes / 16; i++) + { + const __m128i &srcPtr = _mm_loadu_si128((__m128i*) ((char*)src + i * 16)); + _mm_stream_si128((__m128i*)((char*)dst + i * 16), srcPtr); + } +} + +/** +* copy src to dst pointer without polluting cache. +* Usefull to write to mapped memory from upload heap. +*/ +inline +void streamBuffer(void* dst, void* src, size_t sizeInBytes) +{ + // Assume 64 bytes cache line + int offset = 0; + bool isAligned = !((size_t)src & 15); + for (offset = 0; offset < sizeInBytes - 64; offset += 64) + { + char *line = (char*)src + offset; + char *dstline = (char*)dst + offset; + // prefetch next line + _mm_prefetch(line + 16, _MM_HINT_NTA); + __m128i srcPtr = isAligned ? _mm_load_si128((__m128i *)line) : _mm_loadu_si128((__m128i *)line); + _mm_stream_si128((__m128i*)dstline, srcPtr); + srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 16)) : _mm_loadu_si128((__m128i *)(line + 16)); + _mm_stream_si128((__m128i*)(dstline + 16), srcPtr); + srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 32)) : _mm_loadu_si128((__m128i *)(line + 32)); + _mm_stream_si128((__m128i*)(dstline + 32), srcPtr); + srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 48)) : _mm_loadu_si128((__m128i *)(line + 48)); + _mm_stream_si128((__m128i*)(dstline + 48), srcPtr); + } + memcpy((char*)dst + offset, (char*)src + offset, sizeInBytes - offset); +} diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 0c2c986baa..6954d1ed99 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -84,6 +84,7 @@ void D3D12VertexProgramDecompiler::insertOutputs(std::stringstream & OS, const s OS << " float4 dst_reg13 : TEXCOORD6;" << std::endl; OS << " float4 dst_reg14 : TEXCOORD7;" << std::endl; OS << " float4 dst_reg15 : TEXCOORD8;" << std::endl; + OS << " float4 dst_reg16 : TEXCOORD9;" << std::endl; OS << "};" << std::endl; } diff --git a/rpcs3/stdafx_d3d12.h b/rpcs3/stdafx_d3d12.h index 9fb2716b0d..f912db7cd2 100644 --- a/rpcs3/stdafx_d3d12.h +++ b/rpcs3/stdafx_d3d12.h @@ -1,3 +1,9 @@ #pragma once #include "stdafx.h" +#ifdef _WIN32 +#include +#include "Emu\RSX\D3D12\D3D12Utils.h" +#include "Emu\RSX\D3D12\D3D12Formats.h" +#include "Emu\RSX\D3D12\D3D12GSRender.h" +#endif