diff --git a/.reuse/dep5 b/.reuse/dep5 index 43586f77f..060924813 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -58,3 +58,7 @@ License: MIT Files: externals/tracy/* Copyright: 2017-2024 Bartosz Taudul License: BSD-3-Clause + +Files: src/imgui/renderer/fonts/NotoSansJP-Regular.ttf +Copyright: 2012 Google Inc. All Rights Reserved. +License: OFL-1.1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 70af9ef43..c03cc3bcc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -359,8 +359,9 @@ set(COMMON src/common/logging/backend.cpp src/common/config.h src/common/cstring.h src/common/debug.h - src/common/disassembler.cpp - src/common/disassembler.h + src/common/decoder.cpp + src/common/decoder.h + src/common/elf_info.h src/common/endian.h src/common/enum.h src/common/io_file.cpp @@ -378,6 +379,8 @@ set(COMMON src/common/logging/backend.cpp src/common/polyfill_thread.h src/common/rdtsc.cpp src/common/rdtsc.h + src/common/signal_context.h + src/common/signal_context.cpp src/common/singleton.h src/common/slot_vector.h src/common/string_util.cpp @@ -475,6 +478,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/params.h src/shader_recompiler/runtime_info.h src/shader_recompiler/specialization.h + src/shader_recompiler/backend/bindings.h src/shader_recompiler/backend/spirv/emit_spirv.cpp src/shader_recompiler/backend/spirv/emit_spirv.h src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -580,6 +584,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_master_semaphore.h src/video_core/renderer_vulkan/vk_pipeline_cache.cpp src/video_core/renderer_vulkan/vk_pipeline_cache.h + src/video_core/renderer_vulkan/vk_pipeline_common.cpp + src/video_core/renderer_vulkan/vk_pipeline_common.h src/video_core/renderer_vulkan/vk_platform.cpp src/video_core/renderer_vulkan/vk_platform.h src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -810,6 +816,11 @@ add_subdirectory(${HOST_SHADERS_INCLUDE}) add_dependencies(shadps4 host_shaders) target_include_directories(shadps4 PRIVATE ${HOST_SHADERS_INCLUDE}) +# ImGui resources +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/imgui/renderer) +add_dependencies(shadps4 ImGui_Resources) +target_include_directories(shadps4 PRIVATE ${IMGUI_RESOURCES_INCLUDE}) + if (ENABLE_QT_GUI) set_target_properties(shadps4 PROPERTIES # WIN32_EXECUTABLE ON diff --git a/LICENSES/OFL-1.1.txt b/LICENSES/OFL-1.1.txt new file mode 100644 index 000000000..6fe84ee21 --- /dev/null +++ b/LICENSES/OFL-1.1.txt @@ -0,0 +1,43 @@ +SIL OPEN FONT LICENSE + +Version 1.1 - 26 February 2007 + +PREAMBLE + +The goals of the Open Font License (OFL) are to stimulate worldwide development of collaborative font projects, to support the font creation efforts of academic and linguistic communities, and to provide a free and open framework in which fonts may be shared and improved in partnership with others. + +The OFL allows the licensed fonts to be used, studied, modified and redistributed freely as long as they are not sold by themselves. The fonts, including any derivative works, can be bundled, embedded, redistributed and/or sold with any software provided that any reserved names are not used by derivative works. The fonts and derivatives, however, cannot be released under any other type of license. The requirement for fonts to remain under this license does not apply to any document created using the fonts or their derivatives. + +DEFINITIONS + +"Font Software" refers to the set of files released by the Copyright Holder(s) under this license and clearly marked as such. This may include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the copyright statement(s). + +"Original Version" refers to the collection of Font Software components as distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, deleting, or substituting — in part or in whole — any of the components of the Original Version, by changing formats or by porting the Font Software to a new environment. + +"Author" refers to any designer, engineer, programmer, technical writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS + +Permission is hereby granted, free of charge, to any person obtaining a copy of the Font Software, to use, study, copy, merge, embed, modify, redistribute, and sell modified and unmodified copies of the Font Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, in Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, redistributed and/or sold with any software, provided that each copy contains the above copyright notice and this license. These can be included either as stand-alone text files, human-readable headers or in the appropriate machine-readable metadata fields within text or binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font Name(s) unless explicit written permission is granted by the corresponding Copyright Holder. This restriction only applies to the primary font name as presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font Software shall not be used to promote, endorse or advertise any Modified Version, except to acknowledge the contribution(s) of the Copyright Holder(s) and the Author(s) or with their explicit written permission. + +5) The Font Software, modified or unmodified, in part or in whole, must be distributed entirely under this license, and must not be distributed under any other license. The requirement for fonts to remain under this license does not apply to any document created using the Font Software. + +TERMINATION + +This license becomes null and void if any of the above conditions are not met. + +DISCLAIMER + +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE. diff --git a/README.md b/README.md index 1be14c4fa..aab6ded3a 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ To discuss shadPS4 development, suggest ideas or to ask for help, join our [**Di To get the latest news, go to our [**X (Twitter)**](https://x.com/shadps4) or our [**website**](https://shadps4.net/). +For those who'd like to donate to the project, we now have a [Kofi page!](https://ko-fi.com/shadps4) + # Status > [!IMPORTANT] diff --git a/src/common/cstring.h b/src/common/cstring.h index 1b47bdbf0..fb29443ee 100644 --- a/src/common/cstring.h +++ b/src/common/cstring.h @@ -9,6 +9,9 @@ namespace Common { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wtautological-undefined-compare" + /** * @brief A null-terminated string with a fixed maximum length * This class is not meant to be used as a general-purpose string class @@ -29,20 +32,27 @@ public: explicit CString(const CString& other) requires(M <= N) { + if (this == nullptr) { + return; + } std::ranges::copy(other.begin(), other.end(), data); } void FromString(const std::basic_string_view& str) { + if (this == nullptr) { + return; + } size_t p = str.copy(data, N - 1); data[p] = '\0'; } void Zero() { + if (this == nullptr) { + return; + } std::ranges::fill(data, 0); } -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wtautological-undefined-compare" explicit(false) operator std::basic_string_view() const { if (this == nullptr) { return {}; @@ -70,21 +80,32 @@ public: } return std::basic_string_view{data}; } -#pragma clang diagnostic pop char* begin() { + if (this == nullptr) { + return nullptr; + } return data; } const char* begin() const { + if (this == nullptr) { + return nullptr; + } return data; } char* end() { + if (this == nullptr) { + return nullptr; + } return data + N; } const char* end() const { + if (this == nullptr) { + return nullptr; + } return data + N; } @@ -127,7 +148,10 @@ public: } }; }; + static_assert(sizeof(CString<13>) == sizeof(char[13])); // Ensure size still matches a simple array static_assert(std::weakly_incrementable::Iterator>); +#pragma clang diagnostic pop + } // namespace Common \ No newline at end of file diff --git a/src/common/disassembler.cpp b/src/common/decoder.cpp similarity index 65% rename from src/common/disassembler.cpp rename to src/common/decoder.cpp index 2d1264a4e..249907419 100644 --- a/src/common/disassembler.cpp +++ b/src/common/decoder.cpp @@ -2,18 +2,18 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include "common/disassembler.h" +#include "common/decoder.h" namespace Common { -Disassembler::Disassembler() { +DecoderImpl::DecoderImpl() { ZydisDecoderInit(&m_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); ZydisFormatterInit(&m_formatter, ZYDIS_FORMATTER_STYLE_INTEL); } -Disassembler::~Disassembler() = default; +DecoderImpl::~DecoderImpl() = default; -void Disassembler::printInstruction(void* code, u64 address) { +void DecoderImpl::printInstruction(void* code, u64 address) { ZydisDecodedInstruction instruction; ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT_VISIBLE]; ZyanStatus status = @@ -25,8 +25,8 @@ void Disassembler::printInstruction(void* code, u64 address) { } } -void Disassembler::printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands, - u64 address) { +void DecoderImpl::printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands, + u64 address) { const int bufLen = 256; char szBuffer[bufLen]; ZydisFormatterFormatInstruction(&m_formatter, &inst, operands, inst.operand_count_visible, @@ -34,4 +34,9 @@ void Disassembler::printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* fmt::print("instruction: {}\n", szBuffer); } +ZyanStatus DecoderImpl::decodeInstruction(ZydisDecodedInstruction& inst, + ZydisDecodedOperand* operands, void* data, u64 size) { + return ZydisDecoderDecodeFull(&m_decoder, data, size, &inst, operands); +} + } // namespace Common diff --git a/src/common/disassembler.h b/src/common/decoder.h similarity index 60% rename from src/common/disassembler.h rename to src/common/decoder.h index b81f9e31b..1f2219596 100644 --- a/src/common/disassembler.h +++ b/src/common/decoder.h @@ -4,21 +4,26 @@ #pragma once #include +#include "common/singleton.h" #include "common/types.h" namespace Common { -class Disassembler { +class DecoderImpl { public: - Disassembler(); - ~Disassembler(); + DecoderImpl(); + ~DecoderImpl(); void printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands, u64 address); void printInstruction(void* code, u64 address); + ZyanStatus decodeInstruction(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands, + void* data, u64 size = 15); private: ZydisDecoder m_decoder; ZydisFormatter m_formatter; }; +using Decoder = Common::Singleton; + } // namespace Common diff --git a/src/common/elf_info.h b/src/common/elf_info.h new file mode 100644 index 000000000..5a2c914e0 --- /dev/null +++ b/src/common/elf_info.h @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "assert.h" +#include "singleton.h" +#include "types.h" + +namespace Core { +class Emulator; +} + +namespace Common { + +class ElfInfo { + friend class Core::Emulator; + + bool initialized = false; + + std::string game_serial{}; + std::string title{}; + std::string app_ver{}; + u32 firmware_ver = 0; + u32 raw_firmware_ver = 0; + +public: + static constexpr u32 FW_15 = 0x1500000; + static constexpr u32 FW_16 = 0x1600000; + static constexpr u32 FW_17 = 0x1700000; + static constexpr u32 FW_20 = 0x2000000; + static constexpr u32 FW_25 = 0x2500000; + static constexpr u32 FW_30 = 0x3000000; + static constexpr u32 FW_40 = 0x4000000; + static constexpr u32 FW_45 = 0x4500000; + static constexpr u32 FW_50 = 0x5000000; + static constexpr u32 FW_80 = 0x8000000; + + static ElfInfo& Instance() { + return *Singleton::Instance(); + } + + [[nodiscard]] std::string_view GameSerial() const { + ASSERT(initialized); + return Instance().game_serial; + } + + [[nodiscard]] std::string_view Title() const { + ASSERT(initialized); + return title; + } + + [[nodiscard]] std::string_view AppVer() const { + ASSERT(initialized); + return app_ver; + } + + [[nodiscard]] u32 FirmwareVer() const { + ASSERT(initialized); + return firmware_ver; + } + + [[nodiscard]] u32 RawFirmwareVer() const { + ASSERT(initialized); + return raw_firmware_ver; + } +}; + +} // namespace Common diff --git a/src/common/signal_context.cpp b/src/common/signal_context.cpp new file mode 100644 index 000000000..112160bc8 --- /dev/null +++ b/src/common/signal_context.cpp @@ -0,0 +1,92 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/arch.h" +#include "common/assert.h" +#include "common/signal_context.h" + +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace Common { + +void* GetXmmPointer(void* ctx, u8 index) { +#if defined(_WIN32) +#define CASE(index) \ + case index: \ + return (void*)(&((EXCEPTION_POINTERS*)ctx)->ContextRecord->Xmm##index.Low) +#elif defined(__APPLE__) +#define CASE(index) \ + case index: \ + return (void*)(&((ucontext_t*)ctx)->uc_mcontext->__fs.__fpu_xmm##index); +#else +#define CASE(index) \ + case index: \ + return (void*)(&((ucontext_t*)ctx)->uc_mcontext.fpregs->_xmm[index].element[0]) +#endif + switch (index) { + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + default: { + UNREACHABLE_MSG("Invalid XMM register index: {}", index); + return nullptr; + } + } +#undef CASE +} + +void* GetRip(void* ctx) { +#if defined(_WIN32) + return (void*)((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip; +#elif defined(__APPLE__) + return (void*)((ucontext_t*)ctx)->uc_mcontext->__ss.__rip; +#else + return (void*)((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP]; +#endif +} + +void IncrementRip(void* ctx, u64 length) { +#if defined(_WIN32) + ((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip += length; +#elif defined(__APPLE__) + ((ucontext_t*)ctx)->uc_mcontext->__ss.__rip += length; +#else + ((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP] += length; +#endif +} + +bool IsWriteError(void* ctx) { +#if defined(_WIN32) + return ((EXCEPTION_POINTERS*)ctx)->ExceptionRecord->ExceptionInformation[0] == 1; +#elif defined(__APPLE__) +#if defined(ARCH_X86_64) + return ((ucontext_t*)ctx)->uc_mcontext->__es.__err & 0x2; +#elif defined(ARCH_ARM64) + return ((ucontext_t*)ctx)->uc_mcontext->__es.__esr & 0x40; +#endif +#else +#if defined(ARCH_X86_64) + return ((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ERR] & 0x2; +#else +#error "Unsupported architecture" +#endif +#endif +} +} // namespace Common \ No newline at end of file diff --git a/src/common/signal_context.h b/src/common/signal_context.h new file mode 100644 index 000000000..b09da64f2 --- /dev/null +++ b/src/common/signal_context.h @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Common { + +void* GetXmmPointer(void* ctx, u8 index); + +void* GetRip(void* ctx); + +void IncrementRip(void* ctx, u64 length); + +bool IsWriteError(void* ctx); + +} // namespace Common \ No newline at end of file diff --git a/src/common/thread.cpp b/src/common/thread.cpp index d1b225472..46df68c38 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -3,10 +3,12 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/error.h" #include "common/logging/log.h" #include "common/thread.h" +#include "ntapi.h" #ifdef __APPLE__ #include #include @@ -102,6 +104,16 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { SetThreadPriority(handle, windows_priority); } +static void AccurateSleep(std::chrono::nanoseconds duration) { + LARGE_INTEGER interval{ + .QuadPart = -1 * (duration.count() / 100u), + }; + HANDLE timer = ::CreateWaitableTimer(NULL, TRUE, NULL); + SetWaitableTimer(timer, &interval, 0, NULL, NULL, 0); + WaitForSingleObject(timer, INFINITE); + ::CloseHandle(timer); +} + #else void SetCurrentThreadPriority(ThreadPriority new_priority) { @@ -122,6 +134,10 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { pthread_setschedparam(this_thread, scheduling_type, ¶ms); } +static void AccurateSleep(std::chrono::nanoseconds duration) { + std::this_thread::sleep_for(duration); +} + #endif #ifdef _MSC_VER @@ -164,4 +180,22 @@ void SetCurrentThreadName(const char*) { #endif +AccurateTimer::AccurateTimer(std::chrono::nanoseconds target_interval) + : target_interval(target_interval) {} + +void AccurateTimer::Start() { + auto begin_sleep = std::chrono::high_resolution_clock::now(); + if (total_wait.count() > 0) { + AccurateSleep(total_wait); + } + start_time = std::chrono::high_resolution_clock::now(); + total_wait -= std::chrono::duration_cast(start_time - begin_sleep); +} + +void AccurateTimer::End() { + auto now = std::chrono::high_resolution_clock::now(); + total_wait += + target_interval - std::chrono::duration_cast(now - start_time); +} + } // namespace Common diff --git a/src/common/thread.h b/src/common/thread.h index 3ee60c72f..fd962f8e5 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -23,4 +23,18 @@ void SetCurrentThreadPriority(ThreadPriority new_priority); void SetCurrentThreadName(const char* name); +class AccurateTimer { + std::chrono::nanoseconds target_interval{}; + std::chrono::nanoseconds total_wait{}; + + std::chrono::high_resolution_clock::time_point start_time; + +public: + explicit AccurateTimer(std::chrono::nanoseconds target_interval); + + void Start(); + + void End(); +}; + } // namespace Common diff --git a/src/common/version.h b/src/common/version.h index 80de187b0..12fd17041 100644 --- a/src/common/version.h +++ b/src/common/version.h @@ -8,7 +8,7 @@ namespace Common { -constexpr char VERSION[] = "0.2.1 WIP"; +constexpr char VERSION[] = "0.3.1 WIP"; constexpr bool isRelease = false; } // namespace Common diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 1b159d32b..24438b6b5 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -7,8 +7,12 @@ #include #include #include +#include #include "common/alignment.h" +#include "common/arch.h" #include "common/assert.h" +#include "common/decoder.h" +#include "common/signal_context.h" #include "common/types.h" #include "core/signals.h" #include "core/tls.h" @@ -26,6 +30,16 @@ using namespace Xbyak::util; +#define MAYBE_AVX(OPCODE, ...) \ + [&] { \ + Cpu cpu; \ + if (cpu.has(Cpu::tAVX)) { \ + c.v##OPCODE(__VA_ARGS__); \ + } else { \ + c.OPCODE(__VA_ARGS__); \ + } \ + }() + namespace Core { static Xbyak::Reg ZydisToXbyakRegister(const ZydisRegister reg) { @@ -586,6 +600,114 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe #endif // __APPLE__ +static bool FilterNoSSE4a(const ZydisDecodedOperand*) { + Cpu cpu; + return !cpu.has(Cpu::tSSE4a); +} + +static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { + bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && + operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; + + ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER, "operand 0 must be a register"); + + const auto dst = ZydisToXbyakRegisterOperand(operands[0]); + + ASSERT_MSG(dst.isXMM(), "operand 0 must be an XMM register"); + + Xbyak::Xmm xmm_dst = *reinterpret_cast(&dst); + + if (immediateForm) { + u8 length = operands[1].imm.value.u & 0x3F; + u8 index = operands[2].imm.value.u & 0x3F; + if (length == 0) { + length = 64; + } + + LOG_DEBUG(Core, "Patching immediate form EXTRQ, length: {}, index: {}", length, index); + + const Xbyak::Reg64 scratch1 = rax; + const Xbyak::Reg64 scratch2 = rcx; + + // Set rsp to before red zone and save scratch registers + c.lea(rsp, ptr[rsp - 128]); + c.pushfq(); + c.push(scratch1); + c.push(scratch2); + + u64 mask = (1ULL << length) - 1; + + // Get lower qword from xmm register + MAYBE_AVX(movq, scratch1, xmm_dst); + + if (index != 0) { + c.shr(scratch1, index); + } + + // We need to move mask to a register because we can't use all the possible + // immediate values with `and reg, imm32` + c.mov(scratch2, mask); + c.and_(scratch1, scratch2); + + // Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't + // care to preserve them + MAYBE_AVX(movq, xmm_dst, scratch1); + + c.pop(scratch2); + c.pop(scratch1); + c.popfq(); + c.lea(rsp, ptr[rsp + 128]); + } else { + ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && + operands[0].reg.value >= ZYDIS_REGISTER_XMM0 && + operands[0].reg.value <= ZYDIS_REGISTER_XMM15 && + operands[1].reg.value >= ZYDIS_REGISTER_XMM0 && + operands[1].reg.value <= ZYDIS_REGISTER_XMM15, + "Unexpected operand types for EXTRQ instruction"); + + const auto src = ZydisToXbyakRegisterOperand(operands[1]); + + ASSERT_MSG(src.isXMM(), "operand 1 must be an XMM register"); + + Xbyak::Xmm xmm_src = *reinterpret_cast(&src); + + const Xbyak::Reg64 scratch1 = rax; + const Xbyak::Reg64 scratch2 = rcx; + const Xbyak::Reg64 mask = rdx; + + c.lea(rsp, ptr[rsp - 128]); + c.pushfq(); + c.push(scratch1); + c.push(scratch2); + c.push(mask); + + // Construct the mask out of the length that resides in bottom 6 bits of source xmm + MAYBE_AVX(movq, scratch1, xmm_src); + c.mov(scratch2, scratch1); + c.and_(scratch2, 0x3F); + c.mov(mask, 1); + c.shl(mask, cl); + c.dec(mask); + + // Get the shift amount and store it in scratch2 + c.shr(scratch1, 8); + c.and_(scratch1, 0x3F); + c.mov(scratch2, scratch1); // cl now contains the shift amount + + MAYBE_AVX(movq, scratch1, xmm_dst); + c.shr(scratch1, cl); + c.and_(scratch1, mask); + MAYBE_AVX(movq, xmm_dst, scratch1); + + c.pop(mask); + c.pop(scratch2); + c.pop(scratch1); + c.popfq(); + c.lea(rsp, ptr[rsp + 128]); + } +} + using PatchFilter = bool (*)(const ZydisDecodedOperand*); using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&); struct PatchInfo { @@ -607,6 +729,8 @@ static const std::unordered_map Patches = { {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}}, #endif + {ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}}, + #ifdef __APPLE__ // Patches for instruction sets not supported by Rosetta 2. // BMI1 @@ -622,7 +746,6 @@ static const std::unordered_map Patches = { }; static std::once_flag init_flag; -static ZydisDecoder instr_decoder; struct PatchModule { /// Mutex controlling access to module code regions. @@ -663,22 +786,31 @@ static PatchModule* GetModule(const void* ptr) { static std::pair TryPatch(u8* code, PatchModule* module) { ZydisDecodedInstruction instruction; ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; - const auto status = - ZydisDecoderDecodeFull(&instr_decoder, code, module->end - code, &instruction, operands); + const auto status = Common::Decoder::Instance()->decodeInstruction(instruction, operands, code, + module->end - code); if (!ZYAN_SUCCESS(status)) { return std::make_pair(false, 1); } if (Patches.contains(instruction.mnemonic)) { const auto& patch_info = Patches.at(instruction.mnemonic); + bool needs_trampoline = patch_info.trampoline; if (patch_info.filter(operands)) { auto& patch_gen = module->patch_gen; + if (needs_trampoline && instruction.length < 5) { + // Trampoline is needed but instruction is too short to patch. + // Return false and length to fall back to the illegal instruction handler, + // or to signal to AOT compilation that this instruction should be skipped and + // handled at runtime. + return std::make_pair(false, instruction.length); + } + // Reset state and move to current code position. patch_gen.reset(); patch_gen.setSize(code - patch_gen.getCode()); - if (patch_info.trampoline) { + if (needs_trampoline) { auto& trampoline_gen = module->trampoline_gen; const auto trampoline_ptr = trampoline_gen.getCurr(); @@ -714,6 +846,78 @@ static std::pair TryPatch(u8* code, PatchModule* module) { return std::make_pair(false, instruction.length); } +#if defined(ARCH_X86_64) + +static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + const auto status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + + switch (instruction.mnemonic) { + case ZYDIS_MNEMONIC_EXTRQ: { + bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && + operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; + if (immediateForm) { + LOG_ERROR(Core, "EXTRQ immediate form should have been patched at code address: {}", + fmt::ptr(code_address)); + return false; + } else { + ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && + operands[0].reg.value >= ZYDIS_REGISTER_XMM0 && + operands[0].reg.value <= ZYDIS_REGISTER_XMM15 && + operands[1].reg.value >= ZYDIS_REGISTER_XMM0 && + operands[1].reg.value <= ZYDIS_REGISTER_XMM15, + "Unexpected operand types for EXTRQ instruction"); + + const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; + const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0; + + const auto dst = Common::GetXmmPointer(ctx, dstIndex); + const auto src = Common::GetXmmPointer(ctx, srcIndex); + + u64 lowQWordSrc; + memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); + + u64 lowQWordDst; + memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); + + u64 mask = lowQWordSrc & 0x3F; + mask = (1ULL << mask) - 1; + + u64 shift = (lowQWordSrc >> 8) & 0x3F; + + lowQWordDst >>= shift; + lowQWordDst &= mask; + + memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); + + Common::IncrementRip(ctx, instruction.length); + + return true; + } + break; + } + default: { + LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", + fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic)); + return false; + } + } + + UNREACHABLE(); +} +#elif defined(ARCH_ARM64) +// These functions shouldn't be needed for ARM as it will use a JIT so there's no need to patch +// instructions. +static bool TryExecuteIllegalInstruction(void*, void*) { + return false; +} +#else +#error "Unsupported architecture" +#endif + static bool TryPatchJit(void* code_address) { auto* code = static_cast(code_address); auto* module = GetModule(code); @@ -746,17 +950,19 @@ static void TryPatchAot(void* code_address, u64 code_size) { } } -static bool PatchesAccessViolationHandler(void* code_address, void* fault_address, bool is_write) { - return TryPatchJit(code_address); +static bool PatchesAccessViolationHandler(void* context, void* /* fault_address */) { + return TryPatchJit(Common::GetRip(context)); } -static bool PatchesIllegalInstructionHandler(void* code_address) { - return TryPatchJit(code_address); +static bool PatchesIllegalInstructionHandler(void* context) { + void* code_address = Common::GetRip(context); + if (!TryPatchJit(code_address)) { + return TryExecuteIllegalInstruction(context, code_address); + } + return true; } static void PatchesInit() { - ZydisDecoderInit(&instr_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); - if (!Patches.empty()) { auto* signals = Signals::Instance(); // Should be called last. diff --git a/src/core/libraries/app_content/app_content.cpp b/src/core/libraries/app_content/app_content.cpp index 81ce044fa..754343eef 100644 --- a/src/core/libraries/app_content/app_content.cpp +++ b/src/core/libraries/app_content/app_content.cpp @@ -4,6 +4,7 @@ #include #include "app_content.h" +#include "common/assert.h" #include "common/io_file.h" #include "common/logging/log.h" #include "common/path_util.h" @@ -246,7 +247,11 @@ int PS4_SYSV_ABI sceAppContentInitialize(const OrbisAppContentInitParam* initPar auto* param_sfo = Common::Singleton::Instance(); const auto addons_dir = Common::FS::GetUserPath(Common::FS::PathType::AddonsDir); - title_id = *param_sfo->GetString("TITLE_ID"); + if (const auto value = param_sfo->GetString("TITLE_ID"); value.has_value()) { + title_id = *value; + } else { + UNREACHABLE_MSG("Failed to get TITLE_ID"); + } auto addon_path = addons_dir / title_id; if (std::filesystem::exists(addon_path)) { for (const auto& entry : std::filesystem::directory_iterator(addon_path)) { diff --git a/src/core/libraries/avplayer/avplayer.h b/src/core/libraries/avplayer/avplayer.h index 360f06b65..98e932070 100644 --- a/src/core/libraries/avplayer/avplayer.h +++ b/src/core/libraries/avplayer/avplayer.h @@ -161,7 +161,20 @@ struct SceAvPlayerFileReplacement { SceAvPlayerSizeFile size; }; -typedef void PS4_SYSV_ABI (*SceAvPlayerEventCallback)(void* p, s32 event, s32 src_id, void* data); +enum SceAvPlayerEvents { + SCE_AVPLAYER_STATE_STOP = 0x01, + SCE_AVPLAYER_STATE_READY = 0x02, + SCE_AVPLAYER_STATE_PLAY = 0x03, + SCE_AVPLAYER_STATE_PAUSE = 0x04, + SCE_AVPLAYER_STATE_BUFFERING = 0x05, + SCE_AVPLAYER_TIMED_TEXT_DELIVERY = 0x10, + SCE_AVPLAYER_WARNING_ID = 0x20, + SCE_AVPLAYER_ENCRYPTION = 0x30, + SCE_AVPLAYER_DRM_ERROR = 0x40 +}; + +typedef void PS4_SYSV_ABI (*SceAvPlayerEventCallback)(void* p, SceAvPlayerEvents event, s32 src_id, + void* data); struct SceAvPlayerEventReplacement { void* object_ptr; @@ -275,18 +288,6 @@ enum SceAvPlayerAvSyncMode { typedef int PS4_SYSV_ABI (*SceAvPlayerLogCallback)(void* p, const char* format, va_list args); -enum SceAvPlayerEvents { - SCE_AVPLAYER_STATE_STOP = 0x01, - SCE_AVPLAYER_STATE_READY = 0x02, - SCE_AVPLAYER_STATE_PLAY = 0x03, - SCE_AVPLAYER_STATE_PAUSE = 0x04, - SCE_AVPLAYER_STATE_BUFFERING = 0x05, - SCE_AVPLAYER_TIMED_TEXT_DELIVERY = 0x10, - SCE_AVPLAYER_WARNING_ID = 0x20, - SCE_AVPLAYER_ENCRYPTION = 0x30, - SCE_AVPLAYER_DRM_ERROR = 0x40 -}; - void RegisterlibSceAvPlayer(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_state.cpp b/src/core/libraries/avplayer/avplayer_state.cpp index 654e04836..c4d666fce 100644 --- a/src/core/libraries/avplayer/avplayer_state.cpp +++ b/src/core/libraries/avplayer/avplayer_state.cpp @@ -5,10 +5,11 @@ #include "avplayer_source.h" #include "avplayer_state.h" +#include "common/singleton.h" #include "common/thread.h" - #include "core/libraries/error_codes.h" #include "core/libraries/kernel/time_management.h" +#include "core/linker.h" #include @@ -16,8 +17,8 @@ namespace Libraries::AvPlayer { using namespace Kernel; -void PS4_SYSV_ABI AvPlayerState::AutoPlayEventCallback(void* opaque, s32 event_id, s32 source_id, - void* event_data) { +void PS4_SYSV_ABI AvPlayerState::AutoPlayEventCallback(void* opaque, SceAvPlayerEvents event_id, + s32 source_id, void* event_data) { auto const self = reinterpret_cast(opaque); if (event_id == SCE_AVPLAYER_STATE_READY) { @@ -90,7 +91,8 @@ void PS4_SYSV_ABI AvPlayerState::AutoPlayEventCallback(void* opaque, s32 event_i const auto callback = self->m_event_replacement.event_callback; const auto ptr = self->m_event_replacement.object_ptr; if (callback != nullptr) { - callback(ptr, event_id, 0, event_data); + auto* linker = Common::Singleton::Instance(); + linker->ExecuteGuest(callback, ptr, event_id, 0, event_data); } } @@ -365,7 +367,8 @@ void AvPlayerState::EmitEvent(SceAvPlayerEvents event_id, void* event_data) { const auto callback = m_init_data.event_replacement.event_callback; if (callback) { const auto ptr = m_init_data.event_replacement.object_ptr; - callback(ptr, event_id, 0, event_data); + auto* linker = Common::Singleton::Instance(); + linker->ExecuteGuest(callback, ptr, event_id, 0, event_data); } } diff --git a/src/core/libraries/avplayer/avplayer_state.h b/src/core/libraries/avplayer/avplayer_state.h index 7a15eaf8c..f50d1bc1f 100644 --- a/src/core/libraries/avplayer/avplayer_state.h +++ b/src/core/libraries/avplayer/avplayer_state.h @@ -39,8 +39,8 @@ public: private: // Event Replacement - static void PS4_SYSV_ABI AutoPlayEventCallback(void* handle, s32 event_id, s32 source_id, - void* event_data); + static void PS4_SYSV_ABI AutoPlayEventCallback(void* handle, SceAvPlayerEvents event_id, + s32 source_id, void* event_data); void OnWarning(u32 id) override; void OnError() override; diff --git a/src/core/libraries/kernel/event_flag/event_flag.cpp b/src/core/libraries/kernel/event_flag/event_flag.cpp index 4d3925127..c85aa0d90 100644 --- a/src/core/libraries/kernel/event_flag/event_flag.cpp +++ b/src/core/libraries/kernel/event_flag/event_flag.cpp @@ -137,7 +137,7 @@ int PS4_SYSV_ABI sceKernelPollEventFlag(OrbisKernelEventFlag ef, u64 bitPattern, auto result = ef->Poll(bitPattern, wait, clear, pResultPat); - if (result != ORBIS_OK) { + if (result != ORBIS_OK && result != ORBIS_KERNEL_ERROR_EBUSY) { LOG_ERROR(Kernel_Event, "returned {}", result); } diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index cb8e0aac2..45ebb4be8 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -89,6 +89,8 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { } // RW, then scekernelWrite is called and savedata is written just fine now. e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); + } else if (write) { + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write); } else { UNREACHABLE(); } diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index 41ca726ba..65d3dde14 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/debug.h" +#include "common/elf_info.h" #include "common/logging/log.h" #include "common/polyfill_thread.h" #include "common/singleton.h" @@ -243,8 +244,7 @@ int PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, } int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) { - auto* param_sfo = Common::Singleton::Instance(); - int version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); + int version = Common::ElfInfo::Instance().RawFirmwareVer(); LOG_INFO(Kernel, "returned system version = {:#x}", version); *ver = version; return (version > 0) ? ORBIS_OK : ORBIS_KERNEL_ERROR_EINVAL; diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index af3542912..7853a77a4 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -228,8 +228,7 @@ int PS4_SYSV_ABI sceKernelMProtect(const void* addr, size_t size, int prot) { int PS4_SYSV_ABI sceKernelMTypeProtect(const void* addr, size_t size, int mtype, int prot) { Core::MemoryManager* memory_manager = Core::Memory::Instance(); Core::MemoryProt protection_flags = static_cast(prot); - return memory_manager->MTypeProtect(std::bit_cast(addr), size, - static_cast(mtype), protection_flags); + return memory_manager->Protect(std::bit_cast(addr), size, protection_flags); } int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 2a44f853b..b7a8f1533 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -6,13 +6,11 @@ #include #include "common/alignment.h" -#include "common/arch.h" #include "common/assert.h" #include "common/error.h" #include "common/logging/log.h" #include "common/singleton.h" #include "common/thread.h" -#include "core/cpu_patches.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/libkernel.h" #include "core/libraries/kernel/thread_management.h" @@ -991,16 +989,12 @@ static void cleanup_thread(void* arg) { static void* run_thread(void* arg) { auto* thread = static_cast(arg); Common::SetCurrentThreadName(thread->name.c_str()); -#ifdef ARCH_X86_64 - Core::InitializeThreadPatchStack(); -#endif auto* linker = Common::Singleton::Instance(); - linker->InitTlsForThread(false); void* ret = nullptr; g_pthread_self = thread; pthread_cleanup_push(cleanup_thread, thread); thread->is_started = true; - ret = thread->entry(thread->arg); + ret = linker->ExecuteGuest(thread->entry, thread->arg); pthread_cleanup_pop(1); return ret; } diff --git a/src/core/libraries/kernel/time_management.cpp b/src/core/libraries/kernel/time_management.cpp index 7a6ba4f62..5e5e0ef27 100644 --- a/src/core/libraries/kernel/time_management.cpp +++ b/src/core/libraries/kernel/time_management.cpp @@ -147,13 +147,20 @@ int PS4_SYSV_ABI sceKernelGettimeofday(OrbisKernelTimeval* tp) { } #ifdef _WIN64 - auto now = std::chrono::system_clock::now(); - auto duration = now.time_since_epoch(); - auto seconds = std::chrono::duration_cast(duration); - auto microsecs = std::chrono::duration_cast(duration - seconds); + FILETIME filetime; + GetSystemTimeAsFileTime(&filetime); - tp->tv_sec = seconds.count(); - tp->tv_usec = microsecs.count(); + constexpr u64 UNIX_TIME_START = 0x295E9648864000; + constexpr u64 TICKS_PER_SECOND = 1000000; + + u64 ticks = filetime.dwHighDateTime; + ticks <<= 32; + ticks |= filetime.dwLowDateTime; + ticks /= 10; + ticks -= UNIX_TIME_START; + + tp->tv_sec = ticks / TICKS_PER_SECOND; + tp->tv_usec = ticks % TICKS_PER_SECOND; #else timeval tv; gettimeofday(&tv, nullptr); diff --git a/src/core/libraries/network/net_ctl_obj.cpp b/src/core/libraries/network/net_ctl_obj.cpp index 935a700c0..8193c684e 100644 --- a/src/core/libraries/network/net_ctl_obj.cpp +++ b/src/core/libraries/network/net_ctl_obj.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/singleton.h" +#include "core/linker.h" #include "net_ctl_codes.h" #include "net_ctl_obj.h" @@ -57,18 +59,22 @@ s32 Libraries::NetCtl::NetCtlInternal::registerNpToolkitCallback( void Libraries::NetCtl::NetCtlInternal::checkCallback() { std::unique_lock lock{m_mutex}; + auto* linker = Common::Singleton::Instance(); for (auto& callback : callbacks) { if (callback.func != nullptr) { - callback.func(ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED, callback.arg); + linker->ExecuteGuest(callback.func, ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED, + callback.arg); } } } void Libraries::NetCtl::NetCtlInternal::checkNpToolkitCallback() { std::unique_lock lock{m_mutex}; + auto* linker = Common::Singleton::Instance(); for (auto& callback : nptoolCallbacks) { if (callback.func != nullptr) { - callback.func(ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED, callback.arg); + linker->ExecuteGuest(callback.func, ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED, + callback.arg); } } } diff --git a/src/core/libraries/save_data/dialog/savedatadialog.cpp b/src/core/libraries/save_data/dialog/savedatadialog.cpp index a647d80f9..0ad7d7dc0 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog.cpp +++ b/src/core/libraries/save_data/dialog/savedatadialog.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/libs.h" #include "core/libraries/system/commondialog.h" diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp index 8d8cdff45..793b4dd38 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp @@ -5,7 +5,9 @@ #include #include +#include "common/elf_info.h" #include "common/singleton.h" +#include "common/string_util.h" #include "core/file_sys/fs.h" #include "core/libraries/save_data/save_instance.h" #include "imgui/imgui_std.h" @@ -13,6 +15,7 @@ using namespace ImGui; using namespace Libraries::CommonDialog; +using Common::ElfInfo; constexpr u32 OrbisSaveDataBlockSize = 32768; // 32 KiB @@ -46,11 +49,13 @@ void SaveDialogResult::CopyTo(OrbisSaveDataDialogResult& result) const { result.mode = this->mode; result.result = this->result; result.buttonId = this->button_id; - if (result.dirName != nullptr) { - result.dirName->data.FromString(this->dir_name); - } - if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) { - result.param->FromSFO(this->param); + if (mode == SaveDataDialogMode::LIST || ElfInfo::Instance().FirmwareVer() >= ElfInfo::FW_45) { + if (result.dirName != nullptr) { + result.dirName->data.FromString(this->dir_name); + } + if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) { + result.param->FromSFO(this->param); + } } result.userData = this->user_data; } @@ -63,8 +68,7 @@ SaveDialogState::SaveDialogState(const OrbisSaveDataDialogParam& param) { this->enable_back = {param.optionParam->back == OptionBack::ENABLE}; } - static std::string game_serial{*Common::Singleton::Instance()->GetString("CONTENT_ID"), 7, - 9}; + const auto& game_serial = Common::ElfInfo::Instance().GameSerial(); const auto item = param.items; this->user_id = item->userId; @@ -115,9 +119,9 @@ SaveDialogState::SaveDialogState(const OrbisSaveDataDialogParam& param) { .dir_name = std::string{dir_name}, .icon = icon, - .title = std::string{*param_sfo.GetString(SaveParams::MAINTITLE)}, - .subtitle = std::string{*param_sfo.GetString(SaveParams::SUBTITLE)}, - .details = std::string{*param_sfo.GetString(SaveParams::DETAIL)}, + .title = std::string{param_sfo.GetString(SaveParams::MAINTITLE).value_or("Unknown")}, + .subtitle = std::string{param_sfo.GetString(SaveParams::SUBTITLE).value_or("")}, + .details = std::string{param_sfo.GetString(SaveParams::DETAIL).value_or("")}, .date = date_str, .size = size_str, .last_write = param_sfo.GetLastWrite(), @@ -126,12 +130,12 @@ SaveDialogState::SaveDialogState(const OrbisSaveDataDialogParam& param) { }); } - if (type == DialogType::SAVE) { + if (type == DialogType::SAVE && item->newItem != nullptr) { RefCountedTexture icon; std::string title{"New Save"}; const auto new_item = item->newItem; - if (new_item != nullptr && new_item->iconBuf && new_item->iconSize) { + if (new_item->iconBuf && new_item->iconSize) { auto buf = (u8*)new_item->iconBuf; icon = RefCountedTexture::DecodePngTexture({buf, buf + new_item->iconSize}); } else { @@ -140,7 +144,7 @@ SaveDialogState::SaveDialogState(const OrbisSaveDataDialogParam& param) { icon = RefCountedTexture::DecodePngFile(src_icon); } } - if (new_item != nullptr && new_item->title != nullptr) { + if (new_item->title != nullptr) { title = std::string{new_item->title}; } @@ -199,6 +203,7 @@ SaveDialogState::SystemState::SystemState(const SaveDialogState& state, auto& sys = *param.sysMsgParam; switch (sys.msgType) { case SystemMessageType::NODATA: { + return_cancel = true; this->msg = "There is no saved data"; } break; case SystemMessageType::CONFIRM: @@ -211,6 +216,7 @@ SaveDialogState::SystemState::SystemState(const SaveDialogState& state, M("Do you want to overwrite the existing saved data?", "##UNKNOWN##", "##UNKNOWN##"); break; case SystemMessageType::NOSPACE: + return_cancel = true; M(fmt::format( "There is not enough space to save the data. To continue {} free space is required.", SpaceSizeToString(sys.value * OrbisSaveDataBlockSize)), @@ -222,12 +228,15 @@ SaveDialogState::SystemState::SystemState(const SaveDialogState& state, M("Saving...", "Loading...", "Deleting..."); break; case SystemMessageType::FILE_CORRUPTED: + return_cancel = true; this->msg = "The saved data is corrupted."; break; case SystemMessageType::FINISHED: + return_cancel = true; M("Saved successfully.", "Loading complete.", "Deletion complete."); break; case SystemMessageType::NOSPACE_CONTINUABLE: + return_cancel = true; M(fmt::format("There is not enough space to save the data. {} free space is required.", SpaceSizeToString(sys.value * OrbisSaveDataBlockSize)), "##UNKNOWN##", "##UNKNOWN##"); @@ -279,29 +288,36 @@ SaveDialogState::ErrorCodeState::ErrorCodeState(const OrbisSaveDataDialogParam& } SaveDialogState::ProgressBarState::ProgressBarState(const SaveDialogState& state, const OrbisSaveDataDialogParam& param) { + static auto fw_ver = ElfInfo::Instance().FirmwareVer(); + this->progress = 0; auto& bar = *param.progressBarParam; - switch (bar.sysMsgType) { - case ProgressSystemMessageType::INVALID: - this->msg = bar.msg != nullptr ? std::string{bar.msg} : std::string{}; - break; - case ProgressSystemMessageType::PROGRESS: - switch (state.type) { - case DialogType::SAVE: - this->msg = "Saving..."; + + if (bar.msg != nullptr) { + this->msg = std::string{bar.msg}; + } else { + switch (bar.sysMsgType) { + case ProgressSystemMessageType::INVALID: + this->msg = "INVALID"; break; - case DialogType::LOAD: - this->msg = "Loading..."; + case ProgressSystemMessageType::PROGRESS: + switch (state.type) { + case DialogType::SAVE: + this->msg = "Saving..."; + break; + case DialogType::LOAD: + this->msg = "Loading..."; + break; + case DialogType::DELETE: + this->msg = "Deleting..."; + break; + } break; - case DialogType::DELETE: - this->msg = "Deleting..."; + case ProgressSystemMessageType::RESTORE: + this->msg = "Restoring saved data..."; break; } - break; - case ProgressSystemMessageType::RESTORE: - this->msg = "Restoring saved data..."; - break; } } @@ -378,7 +394,7 @@ void SaveDialogUi::Draw() { }; } else { window_size = ImVec2{ - std::min(io.DisplaySize.x, 500.0f), + std::min(io.DisplaySize.x, 600.0f), std::min(io.DisplaySize.y, 300.0f), }; } @@ -446,7 +462,7 @@ void SaveDialogUi::Draw() { } void SaveDialogUi::DrawItem(int _id, const SaveDialogState::Item& item, bool clickable) { - constexpr auto text_spacing = 1.2f; + constexpr auto text_spacing = 0.95f; auto& ctx = *GetCurrentContext(); auto& window = *ctx.CurrentWindow; @@ -495,18 +511,20 @@ void SaveDialogUi::DrawItem(int _id, const SaveDialogState::Item& item, bool cli if (!item.title.empty()) { const char* begin = &item.title.front(); const char* end = &item.title.back() + 1; - SetWindowFontScale(2.0f); + SetWindowFontScale(1.5f); RenderText(pos + ImVec2{pos_x, pos_y}, begin, end, false); - if (item.is_corrupted) { - float width = CalcTextSize(begin, end).x + 10.0f; - PushStyleColor(ImGuiCol_Text, 0xFF0000FF); - RenderText(pos + ImVec2{pos_x + width, pos_y}, "- Corrupted", nullptr, false); - PopStyleColor(); - } pos_y += ctx.FontSize * text_spacing; } + SetWindowFontScale(1.1f); - SetWindowFontScale(1.3f); + if (item.is_corrupted) { + pos_y -= ctx.FontSize * text_spacing * 0.3f; + const auto bright = (int)std::abs(std::sin(ctx.Time) * 0.15f * 255.0f); + PushStyleColor(ImGuiCol_Text, IM_COL32(bright + 216, bright, bright, 0xFF)); + RenderText(pos + ImVec2{pos_x, pos_y}, "Corrupted", nullptr, false); + PopStyleColor(); + pos_y += ctx.FontSize * text_spacing * 0.8f; + } if (state->style == ItemStyle::TITLE_SUBTITLE_DATESIZE) { if (!item.subtitle.empty()) { @@ -620,7 +638,7 @@ void SaveDialogUi::DrawList() { SetCursorPosX(GetContentRegionAvail().x - button_size.x); if (Button(back, button_size)) { result->dir_name.clear(); - Finish(ButtonId::INVALID); + Finish(ButtonId::INVALID, Result::USER_CANCELED); } if (IsKeyPressed(ImGuiKey_GamepadFaceRight)) { SetItemCurrentNavFocus(); @@ -636,6 +654,8 @@ void SaveDialogUi::DrawUser() { if (!state->save_list.empty()) { DrawItem(0, state->save_list.front(), false); + } else if (state->new_item) { + DrawItem(0, *state->new_item, false); } auto has_btn = btn_type != ButtonType::NONE; @@ -660,7 +680,7 @@ void SaveDialogUi::DrawUser() { if (has_btn) { int count = 1; - if (btn_type == ButtonType::YESNO || btn_type == ButtonType::ONCANCEL) { + if (btn_type == ButtonType::YESNO || btn_type == ButtonType::OKCANCEL) { ++count; } @@ -676,19 +696,28 @@ void SaveDialogUi::DrawUser() { } SameLine(); if (Button("No", BUTTON_SIZE)) { - Finish(ButtonId::NO); + if (ElfInfo::Instance().FirmwareVer() < ElfInfo::FW_45) { + Finish(ButtonId::INVALID, Result::USER_CANCELED); + } else { + Finish(ButtonId::NO); + } } if (first_render || IsKeyPressed(ImGuiKey_GamepadFaceRight)) { SetItemCurrentNavFocus(); } } else { if (Button("OK", BUTTON_SIZE)) { - Finish(ButtonId::OK); + if (btn_type == ButtonType::OK && + ElfInfo::Instance().FirmwareVer() < ElfInfo::FW_45) { + Finish(ButtonId::INVALID, Result::USER_CANCELED); + } else { + Finish(ButtonId::OK); + } } if (first_render) { SetItemCurrentNavFocus(); } - if (btn_type == ButtonType::ONCANCEL) { + if (btn_type == ButtonType::OKCANCEL) { SameLine(); if (Button("Cancel", BUTTON_SIZE)) { Finish(ButtonId::INVALID, Result::USER_CANCELED); @@ -707,6 +736,8 @@ void SaveDialogUi::DrawSystemMessage() { if (!state->save_list.empty()) { DrawItem(0, state->save_list.front(), false); + } else if (state->new_item) { + DrawItem(0, *state->new_item, false); } const auto ws = GetWindowSize(); @@ -730,12 +761,20 @@ void SaveDialogUi::DrawSystemMessage() { }); BeginGroup(); if (Button(sys_state.show_no ? "Yes" : "OK", BUTTON_SIZE)) { - Finish(ButtonId::YES); + if (sys_state.return_cancel && ElfInfo::Instance().FirmwareVer() < ElfInfo::FW_45) { + Finish(ButtonId::INVALID, Result::USER_CANCELED); + } else { + Finish(ButtonId::YES); + } } SameLine(); if (sys_state.show_no) { if (Button("No", BUTTON_SIZE)) { - Finish(ButtonId::NO); + if (ElfInfo::Instance().FirmwareVer() < ElfInfo::FW_45) { + Finish(ButtonId::INVALID, Result::USER_CANCELED); + } else { + Finish(ButtonId::NO); + } } } else if (sys_state.show_cancel) { if (Button("Cancel", BUTTON_SIZE)) { @@ -753,6 +792,8 @@ void SaveDialogUi::DrawErrorCode() { if (!state->save_list.empty()) { DrawItem(0, state->save_list.front(), false); + } else if (state->new_item) { + DrawItem(0, *state->new_item, false); } const auto ws = GetWindowSize(); @@ -768,7 +809,11 @@ void SaveDialogUi::DrawErrorCode() { ws.y - FOOTER_HEIGHT + 5.0f, }); if (Button("OK", BUTTON_SIZE)) { - Finish(ButtonId::OK); + if (ElfInfo::Instance().FirmwareVer() < ElfInfo::FW_45) { + Finish(ButtonId::INVALID, Result::USER_CANCELED); + } else { + Finish(ButtonId::OK); + } } if (first_render) { SetItemCurrentNavFocus(); @@ -782,6 +827,8 @@ void SaveDialogUi::DrawProgressBar() { if (!state->save_list.empty()) { DrawItem(0, state->save_list.front(), false); + } else if (state->new_item) { + DrawItem(0, *state->new_item, false); } const auto& msg = bar_state.msg; diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.h b/src/core/libraries/save_data/dialog/savedatadialog_ui.h index 8b9a68e13..3f414470f 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.h +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.h @@ -48,7 +48,7 @@ enum class ButtonType : u32 { OK = 0, YESNO = 1, NONE = 2, - ONCANCEL = 3, + OKCANCEL = 3, }; enum class UserMessageType : u32 { @@ -222,6 +222,8 @@ public: bool show_no{}; // Yes instead of OK bool show_cancel{}; + bool return_cancel{}; + SystemState(const SaveDialogState& state, const OrbisSaveDataDialogParam& param); }; struct ErrorCodeState { diff --git a/src/core/libraries/save_data/save_backup.cpp b/src/core/libraries/save_data/save_backup.cpp index 93af373a8..8f7e0d69a 100644 --- a/src/core/libraries/save_data/save_backup.cpp +++ b/src/core/libraries/save_data/save_backup.cpp @@ -18,6 +18,7 @@ constexpr std::string_view sce_sys = "sce_sys"; // system folder inside save constexpr std::string_view backup_dir = "sce_backup"; // backup folder constexpr std::string_view backup_dir_tmp = "sce_backup_tmp"; // in-progress backup folder +constexpr std::string_view backup_dir_old = "sce_backup_old"; // previous backup folder namespace fs = std::filesystem; @@ -26,6 +27,8 @@ namespace Libraries::SaveData::Backup { static std::jthread g_backup_thread; static std::counting_semaphore g_backup_thread_semaphore{0}; +static std::mutex g_backup_running_mutex; + static std::mutex g_backup_queue_mutex; static std::deque g_backup_queue; static std::deque g_result_queue; @@ -34,59 +37,89 @@ static std::atomic_int g_backup_progress = 0; static std::atomic g_backup_status = WorkerStatus::NotStarted; static void backup(const std::filesystem::path& dir_name) { + std::unique_lock lk{g_backup_running_mutex}; if (!fs::exists(dir_name)) { return; } + + const auto backup_dir = dir_name / ::backup_dir; + const auto backup_dir_tmp = dir_name / ::backup_dir_tmp; + const auto backup_dir_old = dir_name / ::backup_dir_old; + + fs::remove_all(backup_dir_tmp); + fs::remove_all(backup_dir_old); + std::vector backup_files; for (const auto& entry : fs::directory_iterator(dir_name)) { const auto filename = entry.path().filename(); - if (filename != backup_dir && filename != backup_dir_tmp) { + if (filename != ::backup_dir) { backup_files.push_back(entry.path()); } } - const auto backup_dir = dir_name / ::backup_dir; - const auto backup_dir_tmp = dir_name / ::backup_dir_tmp; g_backup_progress = 0; int total_count = static_cast(backup_files.size()); int current_count = 0; - fs::remove_all(backup_dir_tmp); fs::create_directory(backup_dir_tmp); for (const auto& file : backup_files) { fs::copy(file, backup_dir_tmp / file.filename(), fs::copy_options::recursive); current_count++; g_backup_progress = current_count * 100 / total_count; } - bool has_existing = fs::exists(backup_dir); - if (has_existing) { - fs::rename(backup_dir, dir_name / "sce_backup_old"); + bool has_existing_backup = fs::exists(backup_dir); + if (has_existing_backup) { + fs::rename(backup_dir, backup_dir_old); } fs::rename(backup_dir_tmp, backup_dir); - if (has_existing) { - fs::remove_all(dir_name / "sce_backup_old"); + if (has_existing_backup) { + fs::remove_all(backup_dir_old); } } static void BackupThreadBody() { Common::SetCurrentThreadName("SaveData_BackupThread"); - while (true) { + while (g_backup_status != WorkerStatus::Stopping) { g_backup_status = WorkerStatus::Waiting; - g_backup_thread_semaphore.acquire(); + + bool wait; BackupRequest req; { std::scoped_lock lk{g_backup_queue_mutex}; - req = g_backup_queue.front(); + wait = g_backup_queue.empty(); + if (!wait) { + req = g_backup_queue.front(); + } + } + if (wait) { + g_backup_thread_semaphore.acquire(); + { + std::scoped_lock lk{g_backup_queue_mutex}; + if (g_backup_queue.empty()) { + continue; + } + req = g_backup_queue.front(); + } } if (req.save_path.empty()) { break; } g_backup_status = WorkerStatus::Running; + LOG_INFO(Lib_SaveData, "Backing up the following directory: {}", req.save_path.string()); - backup(req.save_path); + try { + backup(req.save_path); + } catch (const std::filesystem::filesystem_error& err) { + LOG_ERROR(Lib_SaveData, "Failed to backup {}: {}", req.save_path.string(), err.what()); + } LOG_DEBUG(Lib_SaveData, "Backing up the following directory: {} finished", req.save_path.string()); + { + std::scoped_lock lk{g_backup_queue_mutex}; + g_backup_queue.front().done = true; + } + std::this_thread::sleep_for(std::chrono::seconds(10)); // Don't backup too often { std::scoped_lock lk{g_backup_queue_mutex}; g_backup_queue.pop_front(); @@ -104,8 +137,8 @@ void StartThread() { return; } LOG_DEBUG(Lib_SaveData, "Starting backup thread"); - g_backup_thread = std::jthread{BackupThreadBody}; g_backup_status = WorkerStatus::Waiting; + g_backup_thread = std::jthread{BackupThreadBody}; } void StopThread() { @@ -132,6 +165,12 @@ bool NewRequest(OrbisUserServiceUserId user_id, std::string_view title_id, } { std::scoped_lock lk{g_backup_queue_mutex}; + for (const auto& it : g_backup_queue) { + if (it.dir_name == dir_name) { + LOG_TRACE(Lib_SaveData, "Backup request to {} ignored. Already queued", dir_name); + return false; + } + } g_backup_queue.push_back(BackupRequest{ .user_id = user_id, .title_id = std::string{title_id}, @@ -146,6 +185,7 @@ bool NewRequest(OrbisUserServiceUserId user_id, std::string_view title_id, bool Restore(const std::filesystem::path& save_path) { LOG_INFO(Lib_SaveData, "Restoring backup for {}", save_path.string()); + std::unique_lock lk{g_backup_running_mutex}; if (!fs::exists(save_path) || !fs::exists(save_path / backup_dir)) { return false; } @@ -170,8 +210,9 @@ WorkerStatus GetWorkerStatus() { bool IsBackupExecutingFor(const std::filesystem::path& save_path) { std::scoped_lock lk{g_backup_queue_mutex}; - return std::ranges::find(g_backup_queue, save_path, - [](const auto& v) { return v.save_path; }) != g_backup_queue.end(); + const auto& it = + std::ranges::find(g_backup_queue, save_path, [](const auto& v) { return v.save_path; }); + return it != g_backup_queue.end() && !it->done; } std::filesystem::path MakeBackupPath(const std::filesystem::path& save_path) { diff --git a/src/core/libraries/save_data/save_backup.h b/src/core/libraries/save_data/save_backup.h index f0aef3696..e49c69f60 100644 --- a/src/core/libraries/save_data/save_backup.h +++ b/src/core/libraries/save_data/save_backup.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "common/types.h" @@ -27,6 +28,8 @@ enum class OrbisSaveDataEventType : u32 { }; struct BackupRequest { + bool done{}; + OrbisUserServiceUserId user_id{}; std::string title_id{}; std::string dir_name{}; diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index 839ec335b..d62c1b9a1 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -9,10 +9,10 @@ #include "common/assert.h" #include "common/cstring.h" +#include "common/elf_info.h" #include "common/enum.h" #include "common/logging/log.h" #include "common/path_util.h" -#include "common/singleton.h" #include "common/string_util.h" #include "core/file_format/psf.h" #include "core/file_sys/fs.h" @@ -28,11 +28,13 @@ namespace fs = std::filesystem; namespace chrono = std::chrono; using Common::CString; +using Common::ElfInfo; namespace Libraries::SaveData { enum class Error : u32 { OK = 0, + USER_SERVICE_NOT_INITIALIZED = 0x80960002, PARAMETER = 0x809F0000, NOT_INITIALIZED = 0x809F0001, OUT_OF_MEMORY = 0x809F0002, @@ -191,7 +193,9 @@ struct OrbisSaveDataMemorySetup2 { OrbisUserServiceUserId userId; size_t memorySize; size_t iconMemorySize; + // +4.5 const OrbisSaveDataParam* initParam; + // +4.5 const OrbisSaveDataIcon* initIcon; std::array _reserved; }; @@ -241,6 +245,7 @@ struct OrbisSaveDataMountResult { OrbisSaveDataMountPoint mount_point; OrbisSaveDataBlocks required_blocks; u32 _unused; + // +4.5 OrbisSaveDataMountStatus mount_status; std::array _reserved; s32 : 32; @@ -278,8 +283,11 @@ struct OrbisSaveDataDirNameSearchResult { int : 32; OrbisSaveDataDirName* dirNames; u32 dirNamesNum; + // +1.7 u32 setNum; + // +1.7 OrbisSaveDataParam* params; + // +2.5 OrbisSaveDataSearchInfo* infos; std::array _reserved; int : 32; @@ -303,12 +311,13 @@ struct OrbisSaveDataEvent { static bool g_initialized = false; static std::string g_game_serial; +static u32 g_fw_ver; static std::array, 16> g_mount_slots; static void initialize() { g_initialized = true; - static auto* param_sfo = Common::Singleton::Instance(); - g_game_serial = std::string(*param_sfo->GetString("CONTENT_ID"), 7, 9); + g_game_serial = ElfInfo::Instance().GameSerial(); + g_fw_ver = ElfInfo::Instance().FirmwareVer(); } // game_00other | game*other @@ -337,6 +346,16 @@ static bool match(std::string_view str, std::string_view pattern) { return str_it == str.end() && pat_it == pattern.end(); } +static Error setNotInitializedError() { + if (g_fw_ver < ElfInfo::FW_20) { + return Error::INTERNAL; + } + if (g_fw_ver < ElfInfo::FW_25) { + return Error::USER_SERVICE_NOT_INITIALIZED; + } + return Error::NOT_INITIALIZED; +} + static Error saveDataMount(const OrbisSaveDataMount2* mount_info, OrbisSaveDataMountResult* mount_result) { @@ -352,7 +371,7 @@ static Error saveDataMount(const OrbisSaveDataMount2* mount_info, { const auto save_path = SaveInstance::MakeDirSavePath(mount_info->userId, g_game_serial, mount_info->dirName->data); - if (Backup::IsBackupExecutingFor(save_path)) { + if (Backup::IsBackupExecutingFor(save_path) && g_fw_ver) { return Error::BACKUP_BUSY; } } @@ -361,11 +380,14 @@ static Error saveDataMount(const OrbisSaveDataMount2* mount_info, const bool is_ro = True(mount_mode & OrbisSaveDataMountMode::RDONLY); const bool create = True(mount_mode & OrbisSaveDataMountMode::CREATE); - const bool create_if_not_exist = True(mount_mode & OrbisSaveDataMountMode::CREATE2); + const bool create_if_not_exist = + True(mount_mode & OrbisSaveDataMountMode::CREATE2) && g_fw_ver >= ElfInfo::FW_45; ASSERT(!create || !create_if_not_exist); // Can't have both const bool copy_icon = True(mount_mode & OrbisSaveDataMountMode::COPY_ICON); - const bool ignore_corrupt = True(mount_mode & OrbisSaveDataMountMode::DESTRUCT_OFF); + + const bool ignore_corrupt = + True(mount_mode & OrbisSaveDataMountMode::DESTRUCT_OFF) || g_fw_ver < ElfInfo::FW_16; const std::string_view dir_name{mount_info->dirName->data}; @@ -437,9 +459,11 @@ static Error saveDataMount(const OrbisSaveDataMount2* mount_info, mount_result->mount_point.data.FromString(save_instance.GetMountPoint()); - mount_result->mount_status = create_if_not_exist && to_be_created - ? OrbisSaveDataMountStatus::CREATED - : OrbisSaveDataMountStatus::NOTHING; + if (g_fw_ver >= ElfInfo::FW_45) { + mount_result->mount_status = create_if_not_exist && to_be_created + ? OrbisSaveDataMountStatus::CREATED + : OrbisSaveDataMountStatus::NOTHING; + } g_mount_slots[slot_num].emplace(std::move(save_instance)); @@ -449,7 +473,7 @@ static Error saveDataMount(const OrbisSaveDataMount2* mount_info, static Error Umount(const OrbisSaveDataMountPoint* mountPoint, bool call_backup = false) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (mountPoint == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -479,9 +503,9 @@ static Error Umount(const OrbisSaveDataMountPoint* mountPoint, bool call_backup void OrbisSaveDataParam::FromSFO(const PSF& sfo) { memset(this, 0, sizeof(OrbisSaveDataParam)); - title.FromString(*sfo.GetString(SaveParams::MAINTITLE)); - subTitle.FromString(*sfo.GetString(SaveParams::SUBTITLE)); - detail.FromString(*sfo.GetString(SaveParams::DETAIL)); + title.FromString(sfo.GetString(SaveParams::MAINTITLE).value_or("Unknown")); + subTitle.FromString(sfo.GetString(SaveParams::SUBTITLE).value_or("")); + detail.FromString(sfo.GetString(SaveParams::DETAIL).value_or("")); userParam = sfo.GetInteger(SaveParams::SAVEDATA_LIST_PARAM).value_or(0); const auto time_since_epoch = sfo.GetLastWrite().time_since_epoch(); mtime = chrono::duration_cast(time_since_epoch).count(); @@ -502,7 +526,7 @@ int PS4_SYSV_ABI sceSaveDataAbort() { Error PS4_SYSV_ABI sceSaveDataBackup(const OrbisSaveDataBackup* backup) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (backup == nullptr || backup->dirName == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -551,12 +575,13 @@ int PS4_SYSV_ABI sceSaveDataChangeInternal() { Error PS4_SYSV_ABI sceSaveDataCheckBackupData(const OrbisSaveDataCheckBackupData* check) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (check == nullptr || check->dirName == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); return Error::PARAMETER; } + LOG_DEBUG(Lib_SaveData, "called with titleId={}", check->titleId->data.to_view()); const std::string_view title{check->titleId != nullptr ? std::string_view{check->titleId->data} : std::string_view{g_game_serial}}; @@ -636,7 +661,7 @@ int PS4_SYSV_ABI sceSaveDataCheckSaveDataVersionLatest() { Error PS4_SYSV_ABI sceSaveDataClearProgress() { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } LOG_DEBUG(Lib_SaveData, "called"); Backup::ClearProgress(); @@ -691,7 +716,7 @@ int PS4_SYSV_ABI sceSaveDataDebugTarget() { Error PS4_SYSV_ABI sceSaveDataDelete(const OrbisSaveDataDelete* del) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (del == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -743,7 +768,7 @@ Error PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond OrbisSaveDataDirNameSearchResult* result) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (cond == nullptr || result == nullptr || cond->key > OrbisSaveDataSortKey::FREE_BLOCKS || cond->order > OrbisSaveDataSortOrder::DESCENT) { @@ -758,7 +783,9 @@ Error PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond if (!fs::exists(save_path)) { result->hitNum = 0; - result->setNum = 0; + if (g_fw_ver >= ElfInfo::FW_17) { + result->setNum = 0; + } return Error::OK; } @@ -775,9 +802,11 @@ Error PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond if (cond->dirName != nullptr) { // Filter names const auto pat = Common::ToLower(std::string_view{cond->dirName->data}); - std::erase_if(dir_list, [&](const std::string& dir_name) { - return !match(Common::ToLower(dir_name), pat); - }); + if (!pat.empty()) { + std::erase_if(dir_list, [&](const std::string& dir_name) { + return !match(Common::ToLower(dir_name), pat); + }); + } } std::unordered_map map_dir_sfo; @@ -826,21 +855,25 @@ Error PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond std::ranges::reverse(dir_list); } - result->hitNum = dir_list.size(); size_t max_count = std::min(static_cast(result->dirNamesNum), dir_list.size()); - result->setNum = max_count; + if (g_fw_ver >= ElfInfo::FW_17) { + result->hitNum = dir_list.size(); + result->setNum = max_count; + } else { + result->hitNum = max_count; + } for (size_t i = 0; i < max_count; i++) { auto& name_data = result->dirNames[i].data; name_data.FromString(dir_list[i]); - if (result->params != nullptr) { + if (g_fw_ver >= ElfInfo::FW_17 && result->params != nullptr) { auto& sfo = map_dir_sfo.at(dir_list[i]); auto& param_data = result->params[i]; param_data.FromSFO(sfo); } - if (result->infos != nullptr) { + if (g_fw_ver >= ElfInfo::FW_25 && result->infos != nullptr) { auto& info = result->infos[i]; info.blocks = map_max_blocks.at(dir_list[i]); info.freeBlocks = map_free_size.at(dir_list[i]); @@ -914,7 +947,7 @@ Error PS4_SYSV_ABI sceSaveDataGetEventResult(const OrbisSaveDataEventParam*, OrbisSaveDataEvent* event) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (event == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -950,7 +983,7 @@ Error PS4_SYSV_ABI sceSaveDataGetMountInfo(const OrbisSaveDataMountPoint* mountP OrbisSaveDataMountInfo* info) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (mountPoint == nullptr || info == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -975,7 +1008,7 @@ Error PS4_SYSV_ABI sceSaveDataGetParam(const OrbisSaveDataMountPoint* mountPoint size_t paramBufSize, size_t* gotSize) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (paramType > OrbisSaveDataParamType::MTIME || paramBuf == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1019,7 +1052,7 @@ Error PS4_SYSV_ABI sceSaveDataGetParam(const OrbisSaveDataMountPoint* mountPoint } else { UNREACHABLE(); } - const size_t s = param_sfo->GetString(key)->copy(param, paramBufSize - 1); + const size_t s = param_sfo->GetString(key).value_or("").copy(param, paramBufSize - 1); param[s] = '\0'; // null terminate if (gotSize != nullptr) { *gotSize = s + 1; @@ -1050,7 +1083,7 @@ Error PS4_SYSV_ABI sceSaveDataGetParam(const OrbisSaveDataMountPoint* mountPoint Error PS4_SYSV_ABI sceSaveDataGetProgress(float* progress) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (progress == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1084,7 +1117,7 @@ Error PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(const OrbisUserServiceUserId use Error PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getParam) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (getParam == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1180,7 +1213,7 @@ Error PS4_SYSV_ABI sceSaveDataLoadIcon(const OrbisSaveDataMountPoint* mountPoint OrbisSaveDataIcon* icon) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (mountPoint == nullptr || icon == nullptr || icon->buf == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1209,7 +1242,7 @@ Error PS4_SYSV_ABI sceSaveDataMount(const OrbisSaveDataMount* mount, OrbisSaveDataMountResult* mount_result) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (mount == nullptr && mount->dirName != nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1230,7 +1263,7 @@ Error PS4_SYSV_ABI sceSaveDataMount2(const OrbisSaveDataMount2* mount, OrbisSaveDataMountResult* mount_result) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (mount == nullptr && mount->dirName != nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1274,7 +1307,7 @@ int PS4_SYSV_ABI sceSaveDataRegisterEventCallback() { Error PS4_SYSV_ABI sceSaveDataRestoreBackupData(const OrbisSaveDataRestoreBackupData* restore) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (restore == nullptr || restore->dirName == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1325,7 +1358,7 @@ Error PS4_SYSV_ABI sceSaveDataSaveIcon(const OrbisSaveDataMountPoint* mountPoint const OrbisSaveDataIcon* icon) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (mountPoint == nullptr || icon == nullptr || icon->buf == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1373,7 +1406,7 @@ Error PS4_SYSV_ABI sceSaveDataSetParam(const OrbisSaveDataMountPoint* mountPoint size_t paramBufSize) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (paramType > OrbisSaveDataParamType::USER_PARAM || mountPoint == nullptr || paramBuf == nullptr) { @@ -1438,13 +1471,15 @@ Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(OrbisUserServiceUserId userId, v OrbisSaveDataMemorySet2 setParam{}; setParam.userId = userId; setParam.data = &data; + setParam.param = nullptr; + setParam.icon = nullptr; return sceSaveDataSetSaveDataMemory2(&setParam); } Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* setParam) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (setParam == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1477,17 +1512,35 @@ Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* return Error::OK; } -int PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory(/*u32 userId, size_t memorySize, - OrbisSaveDataParam* param*/) { - LOG_ERROR(Lib_SaveData, "(STUBBED) called"); - return ORBIS_OK; +Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory(OrbisUserServiceUserId userId, size_t memorySize, + OrbisSaveDataParam* param) { + LOG_DEBUG(Lib_SaveData, "called: userId = {}, memorySize = {}", userId, memorySize); + OrbisSaveDataMemorySetup2 setupParam{}; + setupParam.userId = userId; + setupParam.memorySize = memorySize; + setupParam.initParam = nullptr; + setupParam.initIcon = nullptr; + OrbisSaveDataMemorySetupResult result{}; + const auto res = sceSaveDataSetupSaveDataMemory2(&setupParam, &result); + if (res != Error::OK) { + return res; + } + if (param != nullptr) { + OrbisSaveDataMemorySet2 setParam{}; + setParam.userId = userId; + setParam.data = nullptr; + setParam.param = param; + setParam.icon = nullptr; + sceSaveDataSetSaveDataMemory2(&setParam); + } + return Error::OK; } Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetup2* setupParam, OrbisSaveDataMemorySetupResult* result) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (setupParam == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1507,20 +1560,20 @@ Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetu try { size_t existed_size = SaveMemory::CreateSaveMemory(setupParam->memorySize); if (existed_size == 0) { // Just created - if (setupParam->initParam != nullptr) { + if (g_fw_ver >= ElfInfo::FW_45 && setupParam->initParam != nullptr) { auto& sfo = SaveMemory::GetParamSFO(); setupParam->initParam->ToSFO(sfo); } SaveMemory::SaveSFO(); auto init_icon = setupParam->initIcon; - if (init_icon != nullptr) { + if (g_fw_ver >= ElfInfo::FW_45 && init_icon != nullptr) { SaveMemory::SetIcon(init_icon->buf, init_icon->bufSize); } else { SaveMemory::SetIcon(nullptr, 0); } } - if (result != nullptr) { + if (g_fw_ver >= ElfInfo::FW_45 && result != nullptr) { result->existedMemorySize = existed_size; } } catch (const fs::filesystem_error& e) { @@ -1556,7 +1609,7 @@ int PS4_SYSV_ABI sceSaveDataSyncCloudList() { Error PS4_SYSV_ABI sceSaveDataSyncSaveDataMemory(OrbisSaveDataMemorySync* syncParam) { if (!g_initialized) { LOG_INFO(Lib_SaveData, "called without initialize"); - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } if (syncParam == nullptr) { LOG_INFO(Lib_SaveData, "called with invalid parameter"); @@ -1577,11 +1630,15 @@ Error PS4_SYSV_ABI sceSaveDataSyncSaveDataMemory(OrbisSaveDataMemorySync* syncPa Error PS4_SYSV_ABI sceSaveDataTerminate() { LOG_DEBUG(Lib_SaveData, "called"); if (!g_initialized) { - return Error::NOT_INITIALIZED; + return setNotInitializedError(); } - for (const auto& instance : g_mount_slots) { + for (auto& instance : g_mount_slots) { if (instance.has_value()) { - return Error::BUSY; + if (g_fw_ver >= ElfInfo::FW_40) { + return Error::BUSY; + } + instance->Umount(); + instance.reset(); } } g_initialized = false; diff --git a/src/core/libraries/save_data/savedata.h b/src/core/libraries/save_data/savedata.h index 5e6a8ad4c..13b3dd59e 100644 --- a/src/core/libraries/save_data/savedata.h +++ b/src/core/libraries/save_data/savedata.h @@ -165,8 +165,8 @@ int PS4_SYSV_ABI sceSaveDataSetSaveDataLibraryUser(); Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(OrbisUserServiceUserId userId, void* buf, size_t bufSize, int64_t offset); Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* setParam); -int PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory(/*u32 userId, size_t memorySize, - OrbisSaveDataParam* param*/); +Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory(OrbisUserServiceUserId userId, size_t memorySize, + OrbisSaveDataParam* param); Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetup2* setupParam, OrbisSaveDataMemorySetupResult* result); int PS4_SYSV_ABI sceSaveDataShutdownStart(); diff --git a/src/core/libraries/system/systemservice.cpp b/src/core/libraries/system/systemservice.cpp index d99ec7c7c..8002e2bfc 100644 --- a/src/core/libraries/system/systemservice.cpp +++ b/src/core/libraries/system/systemservice.cpp @@ -1717,7 +1717,7 @@ int PS4_SYSV_ABI sceSystemServiceGetAppType() { s32 PS4_SYSV_ABI sceSystemServiceGetDisplaySafeAreaInfo(OrbisSystemServiceDisplaySafeAreaInfo* info) { - LOG_INFO(Lib_SystemService, "called"); + LOG_DEBUG(Lib_SystemService, "called"); if (info == nullptr) { LOG_ERROR(Lib_SystemService, "OrbisSystemServiceDisplaySafeAreaInfo is null"); return ORBIS_SYSTEM_SERVICE_ERROR_PARAMETER; diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index f04fb505d..fa7577907 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "common/assert.h" @@ -160,9 +161,7 @@ int VideoOutDriver::UnregisterBuffers(VideoOutPort* port, s32 attributeIndex) { return ORBIS_OK; } -std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { - const auto start = std::chrono::high_resolution_clock::now(); - +void VideoOutDriver::Flip(const Request& req) { // Whatever the game is rendering show splash if it is active if (!renderer->ShowSplash(req.frame)) { // Present the frame. @@ -198,9 +197,6 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { port->buffer_labels[req.index] = 0; port->SignalVoLabel(); } - - const auto end = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(end - start); } void VideoOutDriver::DrawBlankFrame() { @@ -267,6 +263,8 @@ void VideoOutDriver::PresentThread(std::stop_token token) { Common::SetCurrentThreadName("PresentThread"); Common::SetCurrentThreadRealtime(vblank_period); + Common::AccurateTimer timer{vblank_period}; + const auto receive_request = [this] -> Request { std::scoped_lock lk{mutex}; if (!requests.empty()) { @@ -279,20 +277,18 @@ void VideoOutDriver::PresentThread(std::stop_token token) { auto delay = std::chrono::microseconds{0}; while (!token.stop_requested()) { - // Sleep for most of the vblank duration. - std::this_thread::sleep_for(vblank_period - delay); + timer.Start(); // Check if it's time to take a request. auto& vblank_status = main_port.vblank_status; if (vblank_status.count % (main_port.flip_rate + 1) == 0) { const auto request = receive_request(); if (!request) { - delay = std::chrono::microseconds{0}; if (!main_port.is_open) { DrawBlankFrame(); } } else { - delay = Flip(request); + Flip(request); FRAME_END; } } @@ -313,6 +309,8 @@ void VideoOutDriver::PresentThread(std::stop_token token) { Kernel::SceKernelEvent::Filter::VideoOut, nullptr); } } + + timer.End(); } } diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index 141294bfd..2e478b9ee 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -101,7 +101,7 @@ private: } }; - std::chrono::microseconds Flip(const Request& req); + void Flip(const Request& req); void DrawBlankFrame(); // Used when there is no flip request to keep ImGui up to date void SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); void PresentThread(std::stop_token token); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index e8aab673d..4e4fa28d2 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -90,11 +90,8 @@ void Linker::Execute() { // Init primary thread. Common::SetCurrentThreadName("GAME_MainThread"); -#ifdef ARCH_X86_64 - InitializeThreadPatchStack(); -#endif Libraries::Kernel::pthreadInitSelfMainThread(); - InitTlsForThread(true); + EnsureThreadInitialized(true); // Start shared library modules for (auto& m : m_modules) { @@ -335,6 +332,17 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { return addr + offset; } +thread_local std::once_flag init_tls_flag; + +void Linker::EnsureThreadInitialized(bool is_primary) { + std::call_once(init_tls_flag, [this, is_primary] { +#ifdef ARCH_X86_64 + InitializeThreadPatchStack(); +#endif + InitTlsForThread(is_primary); + }); +} + void Linker::InitTlsForThread(bool is_primary) { static constexpr size_t TcbSize = 0x40; static constexpr size_t TlsAllocAlign = 0x20; diff --git a/src/core/linker.h b/src/core/linker.h index ed1fe400c..18454f602 100644 --- a/src/core/linker.h +++ b/src/core/linker.h @@ -98,7 +98,6 @@ public: } void* TlsGetAddr(u64 module_index, u64 offset); - void InitTlsForThread(bool is_primary = false); s32 LoadModule(const std::filesystem::path& elf_name, bool is_dynamic = false); Module* FindByAddress(VAddr address); @@ -109,8 +108,17 @@ public: void Execute(); void DebugDump(); + template + ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) { + // Make sure TLS is initialized for the thread before entering guest. + EnsureThreadInitialized(); + return func(std::forward(args)...); + } + private: const Module* FindExportedModule(const ModuleInfo& m, const LibraryInfo& l); + void EnsureThreadInitialized(bool is_primary = false); + void InitTlsForThread(bool is_primary); MemoryManager* memory; std::mutex mutex; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 44f96a001..ebda00357 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -348,63 +348,6 @@ int MemoryManager::Protect(VAddr addr, size_t size, MemoryProt prot) { return ORBIS_OK; } -int MemoryManager::MTypeProtect(VAddr addr, size_t size, VMAType mtype, MemoryProt prot) { - std::scoped_lock lk{mutex}; - - // Find the virtual memory area that contains the specified address range. - auto it = FindVMA(addr); - if (it == vma_map.end() || !it->second.Contains(addr, size)) { - LOG_ERROR(Core, "Address range not mapped"); - return ORBIS_KERNEL_ERROR_EINVAL; - } - - VirtualMemoryArea& vma = it->second; - - if (vma.type == VMAType::Free) { - LOG_ERROR(Core, "Cannot change protection on free memory region"); - return ORBIS_KERNEL_ERROR_EINVAL; - } - - // Validate protection flags - constexpr static MemoryProt valid_flags = MemoryProt::NoAccess | MemoryProt::CpuRead | - MemoryProt::CpuReadWrite | MemoryProt::GpuRead | - MemoryProt::GpuWrite | MemoryProt::GpuReadWrite; - - MemoryProt invalid_flags = prot & ~valid_flags; - if (u32(invalid_flags) != 0 && u32(invalid_flags) != u32(MemoryProt::NoAccess)) { - LOG_ERROR(Core, "Invalid protection flags: prot = {:#x}, invalid flags = {:#x}", u32(prot), - u32(invalid_flags)); - return ORBIS_KERNEL_ERROR_EINVAL; - } - - // Change type and protection - vma.type = mtype; - vma.prot = prot; - - // Set permissions - Core::MemoryPermission perms{}; - - if (True(prot & MemoryProt::CpuRead)) { - perms |= Core::MemoryPermission::Read; - } - if (True(prot & MemoryProt::CpuReadWrite)) { - perms |= Core::MemoryPermission::ReadWrite; - } - if (True(prot & MemoryProt::GpuRead)) { - perms |= Core::MemoryPermission::Read; - } - if (True(prot & MemoryProt::GpuWrite)) { - perms |= Core::MemoryPermission::Write; - } - if (True(prot & MemoryProt::GpuReadWrite)) { - perms |= Core::MemoryPermission::ReadWrite; - } - - impl.Protect(addr, size, perms); - - return ORBIS_OK; -} - int MemoryManager::VirtualQuery(VAddr addr, int flags, ::Libraries::Kernel::OrbisVirtualQueryInfo* info) { std::scoped_lock lk{mutex}; diff --git a/src/core/memory.h b/src/core/memory.h index d0935ffb7..73ffab503 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -166,8 +166,6 @@ public: int Protect(VAddr addr, size_t size, MemoryProt prot); - int MTypeProtect(VAddr addr, size_t size, VMAType mtype, MemoryProt prot); - int VirtualQuery(VAddr addr, int flags, ::Libraries::Kernel::OrbisVirtualQueryInfo* info); int DirectMemoryQuery(PAddr addr, bool find_next, diff --git a/src/core/module.cpp b/src/core/module.cpp index ce2f9d3ab..e62c57785 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -9,6 +9,7 @@ #include "common/string_util.h" #include "core/aerolib/aerolib.h" #include "core/cpu_patches.h" +#include "core/linker.h" #include "core/loader/dwarf.h" #include "core/memory.h" #include "core/module.h" @@ -69,8 +70,9 @@ Module::~Module() = default; s32 Module::Start(size_t args, const void* argp, void* param) { LOG_INFO(Core_Linker, "Module started : {}", name); + auto* linker = Common::Singleton::Instance(); const VAddr addr = dynamic_info.init_virtual_addr + GetBaseAddress(); - return reinterpret_cast(addr)(args, argp, param); + return linker->ExecuteGuest(reinterpret_cast(addr), args, argp, param); } void Module::LoadModuleToMemory(u32& max_tls_index) { diff --git a/src/core/signals.cpp b/src/core/signals.cpp index a16c150e9..87f56c85a 100644 --- a/src/core/signals.cpp +++ b/src/core/signals.cpp @@ -3,6 +3,8 @@ #include "common/arch.h" #include "common/assert.h" +#include "common/decoder.h" +#include "common/signal_context.h" #include "core/signals.h" #ifdef _WIN32 @@ -10,7 +12,6 @@ #else #include #ifdef ARCH_X86_64 -#include #include #endif #endif @@ -22,17 +23,14 @@ namespace Core { static LONG WINAPI SignalHandler(EXCEPTION_POINTERS* pExp) noexcept { const auto* signals = Signals::Instance(); - auto* code_address = reinterpret_cast(pExp->ContextRecord->Rip); - bool handled = false; switch (pExp->ExceptionRecord->ExceptionCode) { case EXCEPTION_ACCESS_VIOLATION: handled = signals->DispatchAccessViolation( - code_address, reinterpret_cast(pExp->ExceptionRecord->ExceptionInformation[1]), - pExp->ExceptionRecord->ExceptionInformation[0] == 1); + pExp, reinterpret_cast(pExp->ExceptionRecord->ExceptionInformation[1])); break; case EXCEPTION_ILLEGAL_INSTRUCTION: - handled = signals->DispatchIllegalInstruction(code_address); + handled = signals->DispatchIllegalInstruction(pExp); break; default: break; @@ -43,37 +41,14 @@ static LONG WINAPI SignalHandler(EXCEPTION_POINTERS* pExp) noexcept { #else -#ifdef __APPLE__ -#if defined(ARCH_X86_64) -#define CODE_ADDRESS(ctx) reinterpret_cast((ctx)->uc_mcontext->__ss.__rip) -#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__err & 0x2) -#elif defined(ARCH_ARM64) -#define CODE_ADDRESS(ctx) reinterpret_cast((ctx)->uc_mcontext->__ss.__pc) -#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__esr & 0x40) -#endif -#else -#if defined(ARCH_X86_64) -#define CODE_ADDRESS(ctx) reinterpret_cast((ctx)->uc_mcontext.gregs[REG_RIP]) -#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext.gregs[REG_ERR] & 0x2) -#endif -#endif - -#ifndef IS_WRITE_ERROR -#error "Missing IS_WRITE_ERROR() implementation for target OS and CPU architecture." -#endif - static std::string DisassembleInstruction(void* code_address) { char buffer[256] = ""; #ifdef ARCH_X86_64 - ZydisDecoder decoder; - ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); - ZydisDecodedInstruction instruction; ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; - static constexpr u64 max_length = 0x20; const auto status = - ZydisDecoderDecodeFull(&decoder, code_address, max_length, &instruction, operands); + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); if (ZYAN_SUCCESS(status)) { ZydisFormatter formatter; ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); @@ -87,23 +62,23 @@ static std::string DisassembleInstruction(void* code_address) { } static void SignalHandler(int sig, siginfo_t* info, void* raw_context) { - const auto* ctx = static_cast(raw_context); const auto* signals = Signals::Instance(); - auto* code_address = CODE_ADDRESS(ctx); + auto* code_address = Common::GetRip(raw_context); switch (sig) { case SIGSEGV: - case SIGBUS: - if (const bool is_write = IS_WRITE_ERROR(ctx); - !signals->DispatchAccessViolation(code_address, info->si_addr, is_write)) { + case SIGBUS: { + const bool is_write = Common::IsWriteError(raw_context); + if (!signals->DispatchAccessViolation(raw_context, info->si_addr)) { UNREACHABLE_MSG("Unhandled access violation at code address {}: {} address {}", fmt::ptr(code_address), is_write ? "Write to" : "Read from", fmt::ptr(info->si_addr)); } break; + } case SIGILL: - if (!signals->DispatchIllegalInstruction(code_address)) { + if (!signals->DispatchIllegalInstruction(raw_context)) { UNREACHABLE_MSG("Unhandled illegal instruction at code address {}: {}", fmt::ptr(code_address), DisassembleInstruction(code_address)); } @@ -150,19 +125,18 @@ SignalDispatch::~SignalDispatch() { #endif } -bool SignalDispatch::DispatchAccessViolation(void* code_address, void* fault_address, - bool is_write) const { +bool SignalDispatch::DispatchAccessViolation(void* context, void* fault_address) const { for (const auto& [handler, _] : access_violation_handlers) { - if (handler(code_address, fault_address, is_write)) { + if (handler(context, fault_address)) { return true; } } return false; } -bool SignalDispatch::DispatchIllegalInstruction(void* code_address) const { +bool SignalDispatch::DispatchIllegalInstruction(void* context) const { for (const auto& [handler, _] : illegal_instruction_handlers) { - if (handler(code_address)) { + if (handler(context)) { return true; } } diff --git a/src/core/signals.h b/src/core/signals.h index bb018a937..6ee525e10 100644 --- a/src/core/signals.h +++ b/src/core/signals.h @@ -8,8 +8,8 @@ namespace Core { -using AccessViolationHandler = bool (*)(void* code_address, void* fault_address, bool is_write); -using IllegalInstructionHandler = bool (*)(void* code_address); +using AccessViolationHandler = bool (*)(void* context, void* fault_address); +using IllegalInstructionHandler = bool (*)(void* context); /// Receives OS signals and dispatches to the appropriate handlers. class SignalDispatch { @@ -28,10 +28,10 @@ public: } /// Dispatches an access violation signal, returning whether it was successfully handled. - bool DispatchAccessViolation(void* code_address, void* fault_address, bool is_write) const; + bool DispatchAccessViolation(void* context, void* fault_address) const; /// Dispatches an illegal instruction signal, returning whether it was successfully handled. - bool DispatchIllegalInstruction(void* code_address) const; + bool DispatchIllegalInstruction(void* context) const; private: template diff --git a/src/emulator.cpp b/src/emulator.cpp index 581d0da83..4a2e38ff8 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -11,6 +11,7 @@ #include "common/memory_patcher.h" #endif #include "common/assert.h" +#include "common/elf_info.h" #include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" @@ -91,10 +92,14 @@ void Emulator::Run(const std::filesystem::path& file) { // Certain games may use /hostapp as well such as CUSA001100 mnt->Mount(file.parent_path(), "/hostapp"); + auto& game_info = Common::ElfInfo::Instance(); + // Loading param.sfo file if exists std::string id; std::string title; std::string app_version; + u32 fw_version; + std::filesystem::path sce_sys_folder = file.parent_path() / "sce_sys"; if (std::filesystem::is_directory(sce_sys_folder)) { for (const auto& entry : std::filesystem::directory_iterator(sce_sys_folder)) { @@ -102,7 +107,9 @@ void Emulator::Run(const std::filesystem::path& file) { auto* param_sfo = Common::Singleton::Instance(); const bool success = param_sfo->Open(sce_sys_folder / "param.sfo"); ASSERT_MSG(success, "Failed to open param.sfo"); - id = std::string(*param_sfo->GetString("CONTENT_ID"), 7, 9); + const auto content_id = param_sfo->GetString("CONTENT_ID"); + ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID"); + id = std::string(*content_id, 7, 9); Libraries::NpTrophy::game_serial = id; const auto trophyDir = Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles"; @@ -115,10 +122,10 @@ void Emulator::Run(const std::filesystem::path& file) { #ifdef ENABLE_QT_GUI MemoryPatcher::g_game_serial = id; #endif - title = *param_sfo->GetString("TITLE"); + title = param_sfo->GetString("TITLE").value_or("Unknown title"); LOG_INFO(Loader, "Game id: {} Title: {}", id, title); - u32 fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); - app_version = *param_sfo->GetString("APP_VER"); + fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); + app_version = param_sfo->GetString("APP_VER").value_or("Unknown version"); LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); } else if (entry.path().filename() == "playgo-chunk.dat") { auto* playgo = Common::Singleton::Instance(); @@ -139,6 +146,13 @@ void Emulator::Run(const std::filesystem::path& file) { } } + game_info.initialized = true; + game_info.game_serial = id; + game_info.title = title; + game_info.app_ver = app_version; + game_info.firmware_ver = fw_version & 0xFFF00000; + game_info.raw_firmware_ver = fw_version; + std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version); std::string window_title = ""; if (Common::isRelease) { diff --git a/src/imgui/layer/video_info.cpp b/src/imgui/layer/video_info.cpp index bf30f8701..55cfaf895 100644 --- a/src/imgui/layer/video_info.cpp +++ b/src/imgui/layer/video_info.cpp @@ -2,16 +2,121 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include + +#include "common/config.h" +#include "common/types.h" +#include "imgui_internal.h" #include "video_info.h" -void ImGui::Layers::VideoInfo::Draw() { - const ImGuiIO& io = GetIO(); +using namespace ImGui; - m_show = IsKeyPressed(ImGuiKey_F10, false) ^ m_show; +struct FrameInfo { + u32 num; + float delta; +}; - if (m_show) { - if (Begin("Video Info", 0, ImGuiWindowFlags_NoNav)) { - Text("Frame time: %.3f ms (%.1f FPS)", 1000.0f / io.Framerate, io.Framerate); +static bool show = false; +static bool show_advanced = false; + +static u32 current_frame = 0; +constexpr float TARGET_FPS = 60.0f; +constexpr u32 FRAME_BUFFER_SIZE = 1024; +constexpr float BAR_WIDTH_MULT = 1.4f; +constexpr float BAR_HEIGHT_MULT = 1.25f; +constexpr float FRAME_GRAPH_PADDING_Y = 3.0f; +static std::array frame_list; +static float frame_graph_height = 50.0f; + +static void DrawSimple() { + const auto io = GetIO(); + Text("Frame time: %.3f ms (%.1f FPS)", 1000.0f / io.Framerate, io.Framerate); +} + +static void DrawAdvanced() { + const auto& ctx = *GetCurrentContext(); + const auto& io = ctx.IO; + const auto& window = *ctx.CurrentWindow; + auto& draw_list = *window.DrawList; + + Text("Frame time: %.3f ms (%.1f FPS)", io.DeltaTime * 1000.0f, io.Framerate); + + SeparatorText("Frame graph"); + const float full_width = GetContentRegionAvail().x; + { // Frame graph - inspired by + // https://asawicki.info/news_1758_an_idea_for_visualization_of_frame_times + auto pos = GetCursorScreenPos(); + const ImVec2 size{full_width, frame_graph_height + FRAME_GRAPH_PADDING_Y * 2.0f}; + ItemSize(size); + if (!ItemAdd({pos, pos + size}, GetID("FrameGraph"))) { + return; + } + + float target_dt = 1.0f / (TARGET_FPS * (float)Config::vblankDiv()); + float cur_pos_x = pos.x + full_width; + pos.y += FRAME_GRAPH_PADDING_Y; + const float final_pos_y = pos.y + frame_graph_height; + + draw_list.AddRectFilled({pos.x, pos.y - FRAME_GRAPH_PADDING_Y}, + {pos.x + full_width, final_pos_y + FRAME_GRAPH_PADDING_Y}, + IM_COL32(0x33, 0x33, 0x33, 0xFF)); + draw_list.PushClipRect({pos.x, pos.y}, {pos.x + full_width, final_pos_y}, true); + for (u32 i = 0; i < FRAME_BUFFER_SIZE; ++i) { + const auto& frame_info = frame_list[(current_frame - i) % FRAME_BUFFER_SIZE]; + const float dt_factor = target_dt / frame_info.delta; + + const float width = std::ceil(BAR_WIDTH_MULT / dt_factor); + const float height = + std::min(std::log2(BAR_HEIGHT_MULT / dt_factor) / 3.0f, 1.0f) * frame_graph_height; + + ImU32 color; + if (dt_factor >= 0.95f) { // BLUE + color = IM_COL32(0x33, 0x33, 0xFF, 0xFF); + } else if (dt_factor >= 0.5f) { // GREEN <> YELLOW + float t = 1.0f - (dt_factor - 0.5f) * 2.0f; + int r = (int)(0xFF * t); + color = IM_COL32(r, 0xFF, 0, 0xFF); + } else { // YELLOW <> RED + float t = dt_factor * 2.0f; + int g = (int)(0xFF * t); + color = IM_COL32(0xFF, g, 0, 0xFF); + } + draw_list.AddRectFilled({cur_pos_x - width, final_pos_y - height}, + {cur_pos_x, final_pos_y}, color); + cur_pos_x -= width; + if (cur_pos_x < width) { + break; + } + } + draw_list.PopClipRect(); + } +} + +void Layers::VideoInfo::Draw() { + const auto io = GetIO(); + + const FrameInfo frame_info{ + .num = ++current_frame, + .delta = io.DeltaTime, + }; + frame_list[current_frame % FRAME_BUFFER_SIZE] = frame_info; + + if (IsKeyPressed(ImGuiKey_F10, false)) { + const bool changed_ctrl = io.KeyCtrl != show_advanced; + show_advanced = io.KeyCtrl; + show = changed_ctrl || !show; + } + + if (show) { + if (show_advanced) { + if (Begin("Video debug info", &show, 0)) { + DrawAdvanced(); + } + } else { + if (Begin("Video Info", nullptr, + ImGuiWindowFlags_NoNav | ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_AlwaysAutoResize)) { + DrawSimple(); + } } End(); } diff --git a/src/imgui/layer/video_info.h b/src/imgui/layer/video_info.h index 8eec972a8..8a8af554e 100644 --- a/src/imgui/layer/video_info.h +++ b/src/imgui/layer/video_info.h @@ -11,7 +11,6 @@ class RendererVulkan; namespace ImGui::Layers { class VideoInfo : public Layer { - bool m_show = false; ::Vulkan::RendererVulkan* renderer{}; public: diff --git a/src/imgui/renderer/CMakeLists.txt b/src/imgui/renderer/CMakeLists.txt new file mode 100644 index 000000000..b5f51ef62 --- /dev/null +++ b/src/imgui/renderer/CMakeLists.txt @@ -0,0 +1,32 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +project(ImGui_Resources) + +add_executable(Dear_ImGui_FontEmbed ${CMAKE_SOURCE_DIR}/externals/dear_imgui/misc/fonts/binary_to_compressed_c.cpp) + +set(FONT_LIST + NotoSansJP-Regular.ttf +) + +set(OutputList "") +FOREACH (FONT_FILE ${FONT_LIST}) + string(REGEX REPLACE "-" "_" fontname ${FONT_FILE}) + string(TOLOWER ${fontname} fontname) + string(REGEX REPLACE ".ttf" "" fontname_cpp ${fontname}) + set(fontname_cpp "imgui_font_${fontname_cpp}") + + MESSAGE(STATUS "Embedding font ${FONT_FILE}") + set(OUTPUT "generated_fonts/imgui_fonts/${fontname}") + add_custom_command( + OUTPUT "${OUTPUT}.g.cpp" + COMMAND ${CMAKE_COMMAND} -E make_directory "generated_fonts/imgui_fonts" + COMMAND $ -nostatic "${CMAKE_CURRENT_SOURCE_DIR}/fonts/${FONT_FILE}" ${fontname_cpp} > "${OUTPUT}.g.cpp" + DEPENDS Dear_ImGui_FontEmbed "fonts/${FONT_FILE}" + USES_TERMINAL + ) + list(APPEND OutputList "${OUTPUT}.g.cpp") +ENDFOREACH () + +add_library(ImGui_Resources STATIC ${OutputList}) +set(IMGUI_RESOURCES_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/generated_fonts PARENT_SCOPE) diff --git a/src/imgui/renderer/fonts/NotoSansJP-Regular.ttf b/src/imgui/renderer/fonts/NotoSansJP-Regular.ttf new file mode 100644 index 000000000..b2dad730d Binary files /dev/null and b/src/imgui/renderer/fonts/NotoSansJP-Regular.ttf differ diff --git a/src/imgui/renderer/imgui_core.cpp b/src/imgui/renderer/imgui_core.cpp index d52536f68..b972d99d0 100644 --- a/src/imgui/renderer/imgui_core.cpp +++ b/src/imgui/renderer/imgui_core.cpp @@ -3,6 +3,7 @@ #include #include + #include "common/config.h" #include "common/path_util.h" #include "imgui/imgui_layer.h" @@ -14,6 +15,8 @@ #include "texture_manager.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "imgui_fonts/notosansjp_regular.ttf.g.cpp" + static void CheckVkResult(const vk::Result err) { LOG_ERROR(ImGui, "Vulkan error {}", vk::to_string(err)); } @@ -50,6 +53,22 @@ void Initialize(const ::Vulkan::Instance& instance, const Frontend::WindowSDL& w io.DisplaySize = ImVec2((float)window.getWidth(), (float)window.getHeight()); io.IniFilename = SDL_strdup(config_path.string().c_str()); io.LogFilename = SDL_strdup(log_path.string().c_str()); + + ImFontGlyphRangesBuilder rb{}; + rb.AddRanges(io.Fonts->GetGlyphRangesDefault()); + rb.AddRanges(io.Fonts->GetGlyphRangesGreek()); + rb.AddRanges(io.Fonts->GetGlyphRangesKorean()); + rb.AddRanges(io.Fonts->GetGlyphRangesJapanese()); + rb.AddRanges(io.Fonts->GetGlyphRangesCyrillic()); + ImVector ranges{}; + rb.BuildRanges(&ranges); + ImFontConfig font_cfg{}; + font_cfg.OversampleH = 2; + font_cfg.OversampleV = 1; + io.Fonts->AddFontFromMemoryCompressedTTF(imgui_font_notosansjp_regular_compressed_data, + imgui_font_notosansjp_regular_compressed_size, 16.0f, + &font_cfg, ranges.Data); + StyleColorsDark(); Sdl::Init(window.GetSdlWindow()); diff --git a/src/imgui/renderer/imgui_impl_sdl3.cpp b/src/imgui/renderer/imgui_impl_sdl3.cpp index 2a7d801e4..bb194bff7 100644 --- a/src/imgui/renderer/imgui_impl_sdl3.cpp +++ b/src/imgui/renderer/imgui_impl_sdl3.cpp @@ -703,8 +703,8 @@ static void UpdateGamepads() { const int thumb_dead_zone = 8000; // SDL_gamepad.h suggests using this value. UpdateGamepadButton(bd, io, ImGuiKey_GamepadStart, SDL_GAMEPAD_BUTTON_START); UpdateGamepadButton(bd, io, ImGuiKey_GamepadBack, SDL_GAMEPAD_BUTTON_BACK); - UpdateGamepadButton(bd, io, ImGuiKey_GamepadFaceLeft, - SDL_GAMEPAD_BUTTON_WEST); // Xbox X, PS Square + /*UpdateGamepadButton(bd, io, ImGuiKey_GamepadFaceLeft, + SDL_GAMEPAD_BUTTON_WEST); // Xbox X, PS Square*/ // Disable to avoid menu toggle UpdateGamepadButton(bd, io, ImGuiKey_GamepadFaceRight, SDL_GAMEPAD_BUTTON_EAST); // Xbox B, PS Circle UpdateGamepadButton(bd, io, ImGuiKey_GamepadFaceUp, diff --git a/src/qt_gui/game_info.h b/src/qt_gui/game_info.h index 2d08bc085..a4bcd20ee 100644 --- a/src/qt_gui/game_info.h +++ b/src/qt_gui/game_info.h @@ -32,16 +32,31 @@ public: QString iconpath = QString::fromStdString(game.icon_path); game.icon = QImage(iconpath); game.pic_path = game.path + "/sce_sys/pic1.png"; - game.name = *psf.GetString("TITLE"); - game.serial = *psf.GetString("TITLE_ID"); - game.region = - GameListUtils::GetRegion(psf.GetString("CONTENT_ID")->at(0)).toStdString(); - u32 fw_int = *psf.GetInteger("SYSTEM_VER"); - QString fw = QString::number(fw_int, 16); - QString fw_ = fw.length() > 7 ? QString::number(fw_int, 16).left(3).insert(2, '.') - : fw.left(3).insert(1, '.'); - game.fw = (fw_int == 0) ? "0.00" : fw_.toStdString(); - game.version = *psf.GetString("APP_VER"); + if (const auto title = psf.GetString("TITLE"); title.has_value()) { + game.name = *title; + } + if (const auto title_id = psf.GetString("TITLE_ID"); title_id.has_value()) { + game.serial = *title_id; + } + if (const auto content_id = psf.GetString("CONTENT_ID"); + content_id.has_value() && !content_id->empty()) { + game.region = GameListUtils::GetRegion(content_id->at(0)).toStdString(); + } + if (const auto fw_int_opt = psf.GetInteger("SYSTEM_VER"); fw_int_opt.has_value()) { + auto fw_int = *fw_int_opt; + if (fw_int == 0) { + game.fw = "0.00"; + } else { + QString fw = QString::number(fw_int, 16); + QString fw_ = fw.length() > 7 + ? QString::number(fw_int, 16).left(3).insert(2, '.') + : fw.left(3).insert(1, '.'); + game.fw = fw_.toStdString(); + } + } + if (auto app_ver = psf.GetString("APP_VER"); app_ver.has_value()) { + game.version = *app_ver; + } } return game; } diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h index bd3961dd9..a2f7f28ff 100644 --- a/src/qt_gui/gui_context_menus.h +++ b/src/qt_gui/gui_context_menus.h @@ -96,25 +96,37 @@ public: QTableWidgetItem* valueItem; switch (entry.param_fmt) { case PSFEntryFmt::Binary: { - - const auto bin = *psf.GetBinary(entry.key); - std::string text; - text.reserve(bin.size() * 2); - for (const auto& c : bin) { - static constexpr char hex[] = "0123456789ABCDEF"; - text.push_back(hex[c >> 4 & 0xF]); - text.push_back(hex[c & 0xF]); + const auto bin = psf.GetBinary(entry.key); + if (!bin.has_value()) { + valueItem = new QTableWidgetItem(QString("Unknown")); + } else { + std::string text; + text.reserve(bin->size() * 2); + for (const auto& c : *bin) { + static constexpr char hex[] = "0123456789ABCDEF"; + text.push_back(hex[c >> 4 & 0xF]); + text.push_back(hex[c & 0xF]); + } + valueItem = new QTableWidgetItem(QString::fromStdString(text)); } - valueItem = new QTableWidgetItem(QString::fromStdString(text)); } break; case PSFEntryFmt::Text: { - auto text = *psf.GetString(entry.key); - valueItem = new QTableWidgetItem(QString::fromStdString(std::string{text})); + auto text = psf.GetString(entry.key); + if (!text.has_value()) { + valueItem = new QTableWidgetItem(QString("Unknown")); + } else { + valueItem = + new QTableWidgetItem(QString::fromStdString(std::string{*text})); + } } break; case PSFEntryFmt::Integer: { - auto integer = *psf.GetInteger(entry.key); - valueItem = - new QTableWidgetItem(QString("0x") + QString::number(integer, 16)); + auto integer = psf.GetInteger(entry.key); + if (!integer.has_value()) { + valueItem = new QTableWidgetItem(QString("Unknown")); + } else { + valueItem = + new QTableWidgetItem(QString("0x") + QString::number(*integer, 16)); + } } break; } diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 206cb0c2b..535e470f8 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -653,9 +653,19 @@ void MainWindow::InstallDragDropPkg(std::filesystem::path file, int pkgNum, int QMessageBox msgBox; msgBox.setWindowTitle(tr("PKG Extraction")); - psf.Open(pkg.sfo); + if (!psf.Open(pkg.sfo)) { + QMessageBox::critical(this, tr("PKG ERROR"), + "Could not read SFO. Check log for details"); + return; + } - std::string content_id{*psf.GetString("CONTENT_ID")}; + std::string content_id; + if (auto value = psf.GetString("CONTENT_ID"); value.has_value()) { + content_id = std::string{*value}; + } else { + QMessageBox::critical(this, tr("PKG ERROR"), "PSF file there is no CONTENT_ID"); + return; + } std::string entitlement_label = Common::SplitString(content_id, '-')[2]; auto addon_extract_path = Common::FS::GetUserPath(Common::FS::PathType::AddonsDir) / @@ -664,11 +674,21 @@ void MainWindow::InstallDragDropPkg(std::filesystem::path file, int pkgNum, int auto category = psf.GetString("CATEGORY"); if (pkgType.contains("PATCH")) { - QString pkg_app_version = - QString::fromStdString(std::string{*psf.GetString("APP_VER")}); + QString pkg_app_version; + if (auto app_ver = psf.GetString("APP_VER"); app_ver.has_value()) { + pkg_app_version = QString::fromStdString(std::string{*app_ver}); + } else { + QMessageBox::critical(this, tr("PKG ERROR"), "PSF file there is no APP_VER"); + return; + } psf.Open(extract_path / "sce_sys" / "param.sfo"); - QString game_app_version = - QString::fromStdString(std::string{*psf.GetString("APP_VER")}); + QString game_app_version; + if (auto app_ver = psf.GetString("APP_VER"); app_ver.has_value()) { + game_app_version = QString::fromStdString(std::string{*app_ver}); + } else { + QMessageBox::critical(this, tr("PKG ERROR"), "PSF file there is no APP_VER"); + return; + } double appD = game_app_version.toDouble(); double pkgD = pkg_app_version.toDouble(); if (pkgD == appD) { diff --git a/src/qt_gui/pkg_viewer.cpp b/src/qt_gui/pkg_viewer.cpp index d41d37dbe..8f20f6929 100644 --- a/src/qt_gui/pkg_viewer.cpp +++ b/src/qt_gui/pkg_viewer.cpp @@ -110,12 +110,16 @@ void PKGViewer::ProcessPKGInfo() { #endif package.Open(path); psf.Open(package.sfo); - QString title_name = QString::fromStdString(std::string{*psf.GetString("TITLE")}); - QString title_id = QString::fromStdString(std::string{*psf.GetString("TITLE_ID")}); - QString app_type = game_list_util.GetAppType(*psf.GetInteger("APP_TYPE")); - QString app_version = QString::fromStdString(std::string{*psf.GetString("APP_VER")}); - QString title_category = QString::fromStdString(std::string{*psf.GetString("CATEGORY")}); - QString pkg_size = game_list_util.FormatSize(package.GetPkgHeader().pkg_size); + QString title_name = + QString::fromStdString(std::string{psf.GetString("TITLE").value_or("Unknown")}); + QString title_id = + QString::fromStdString(std::string{psf.GetString("TITLE_ID").value_or("Unknown")}); + QString app_type = GameListUtils::GetAppType(psf.GetInteger("APP_TYPE").value_or(0)); + QString app_version = + QString::fromStdString(std::string{psf.GetString("APP_VER").value_or("Unknown")}); + QString title_category = + QString::fromStdString(std::string{psf.GetString("CATEGORY").value_or("Unknown")}); + QString pkg_size = GameListUtils::FormatSize(package.GetPkgHeader().pkg_size); pkg_content_flag = package.GetPkgHeader().pkg_content_flags; QString flagss = ""; for (const auto& flag : package.flagNames) { @@ -126,11 +130,17 @@ void PKGViewer::ProcessPKGInfo() { } } - u32 fw_int = *psf.GetInteger("SYSTEM_VER"); - QString fw = QString::number(fw_int, 16); - QString fw_ = fw.length() > 7 ? QString::number(fw_int, 16).left(3).insert(2, '.') + QString fw_ = "Unknown"; + if (const auto fw_int_opt = psf.GetInteger("SYSTEM_VER"); fw_int_opt.has_value()) { + const u32 fw_int = *fw_int_opt; + if (fw_int == 0) { + fw_ = "0.00"; + } else { + QString fw = QString::number(fw_int, 16); + fw_ = fw.length() > 7 ? QString::number(fw_int, 16).left(3).insert(2, '.') : fw.left(3).insert(1, '.'); - fw_ = (fw_int == 0) ? "0.00" : fw_; + } + } char region = package.GetPkgHeader().pkg_content_id[0]; QString pkg_info = ""; if (title_category == "gd" && !flagss.contains("PATCH")) { diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h index 1b53c74eb..510b0c0ec 100644 --- a/src/shader_recompiler/backend/bindings.h +++ b/src/shader_recompiler/backend/bindings.h @@ -9,10 +9,10 @@ namespace Shader::Backend { struct Bindings { u32 unified{}; - u32 uniform_buffer{}; - u32 storage_buffer{}; - u32 texture{}; - u32 image{}; + u32 buffer{}; + u32 user_data{}; + + auto operator<=>(const Bindings&) const = default; }; } // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index e671a37eb..8aa292b1c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -265,7 +265,7 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) { } // Anonymous namespace std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, - const IR::Program& program, u32& binding) { + const IR::Program& program, Bindings& binding) { EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index aada0ff67..5b8da4496 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,12 +4,13 @@ #pragma once #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, - const IR::Program& program, u32& binding); + const IR::Program& program, Bindings& binding); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 5fed9b4db..92279c5fb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -86,7 +86,14 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { } // Anonymous namespace Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { - return ctx.ConstU32(ctx.info.user_data[static_cast(reg)]); + const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg); + const u32 half = PushData::UdRegsIndex + (index >> 2); + const Id ud_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), + ctx.push_data_block, ctx.ConstU32(half), + ctx.ConstU32(index & 3))}; + const Id ud_reg{ctx.OpLoad(ctx.U32[1], ud_ptr)}; + ctx.Name(ud_reg, fmt::format("ud_{}", u32(reg))); + return ud_reg; } void EmitGetThreadBitScalarReg(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 530f381d7..50d9cc8cb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -157,17 +157,20 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const ImageOperands operands; operands.AddOffset(ctx, offset); operands.Add(spv::ImageOperandsMask::Lod, lod); - return ctx.OpBitcast( - ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands)); + const Id texel = + texture.is_storage + ? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands) + : ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands); + return ctx.OpBitcast(ctx.F32[4], texel); } -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) { +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool has_mips) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const auto type = ctx.info.images[handle & 0xFFFF].type; const Id zero = ctx.u32_zero_value; - const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }}; - const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa}; + const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }}; + const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage}; const auto query{[&](Id type) { return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod) : ctx.OpImageQuerySize(type, image); @@ -178,6 +181,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod case AmdGpu::ImageType::Color1DArray: case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Cube: + case AmdGpu::ImageType::Color2DMsaa: return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips()); case AmdGpu::ImageType::Color2DArray: case AmdGpu::ImageType::Color3D: diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 2f1f7aa75..dd780622f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -42,7 +42,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, - const Info& info_, u32& binding_) + const Info& info_, Bindings& binding_) : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, profile{profile_}, stage{info.stage}, binding{binding_} { AddCapability(spv::Capability::Shader); @@ -173,7 +173,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f } void EmitContext::DefineBufferOffsets() { - for (auto& buffer : buffers) { + for (BufferDefinition& buffer : buffers) { const u32 binding = buffer.binding; const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 comp = (binding & 0xf) >> 2; @@ -182,9 +182,11 @@ void EmitContext::DefineBufferOffsets() { push_data_block, ConstU32(half), ConstU32(comp))}; const Id value{OpLoad(U32[1], ptr)}; buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); + Name(buffer.offset, fmt::format("buf{}_off", binding)); buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); + Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding)); } - for (auto& tex_buffer : texture_buffers) { + for (TextureBufferDefinition& tex_buffer : texture_buffers) { const u32 binding = tex_buffer.binding; const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 comp = (binding & 0xf) >> 2; @@ -192,7 +194,8 @@ void EmitContext::DefineBufferOffsets() { const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), push_data_block, ConstU32(half), ConstU32(comp))}; const Id value{OpLoad(U32[1], ptr)}; - tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); + tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(6U)); + Name(tex_buffer.coord_offset, fmt::format("texbuf{}_off", binding)); } } @@ -330,18 +333,25 @@ void EmitContext::DefineOutputs() { void EmitContext::DefinePushDataBlock() { // Create push constants block for instance steps rates - const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4]), "AuxData")}; + const Id struct_type{Name( + TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4], U32[4], U32[4], U32[4]), "AuxData")}; Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, 0, "sr0"); MemberName(struct_type, 1, "sr1"); MemberName(struct_type, 2, "buf_offsets0"); MemberName(struct_type, 3, "buf_offsets1"); - MemberName(struct_type, 4, "buf_offsets2"); + MemberName(struct_type, 4, "ud_regs0"); + MemberName(struct_type, 5, "ud_regs1"); + MemberName(struct_type, 6, "ud_regs2"); + MemberName(struct_type, 7, "ud_regs3"); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U); MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U); MemberDecorate(struct_type, 4, spv::Decoration::Offset, 40U); + MemberDecorate(struct_type, 5, spv::Decoration::Offset, 56U); + MemberDecorate(struct_type, 6, spv::Decoration::Offset, 72U); + MemberDecorate(struct_type, 7, spv::Decoration::Offset, 88U); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); Name(push_data_block, "push_data"); interfaces.push_back(push_data_block); @@ -379,7 +389,7 @@ void EmitContext::DefineBuffers() { const Id struct_pointer_type{TypePointer(storage_class, struct_type)}; const Id pointer_type = TypePointer(storage_class, data_type); const Id id{AddGlobalVariable(struct_pointer_type, storage_class)}; - Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); if (is_storage && !desc.is_written) { Decorate(id, spv::Decoration::NonWritable); @@ -388,7 +398,7 @@ void EmitContext::DefineBuffers() { buffers.push_back({ .id = id, - .binding = binding++, + .binding = binding.buffer++, .data_types = data_types, .pointer_type = pointer_type, }); @@ -406,12 +416,12 @@ void EmitContext::DefineTextureBuffers() { sampled, spv::ImageFormat::Unknown)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base)); texture_buffers.push_back({ .id = id, - .binding = binding++, + .binding = binding.buffer++, .image_type = image_type, .result_type = sampled_type[4], .is_integer = is_integer, @@ -507,10 +517,13 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, sampled, format); case AmdGpu::ImageType::Color2DArray: return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, sampled, format); + case AmdGpu::ImageType::Color2DMsaa: + return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format); case AmdGpu::ImageType::Color3D: return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format); case AmdGpu::ImageType::Cube: - return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format); + return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled, + format); default: break; } @@ -524,7 +537,7 @@ void EmitContext::DefineImagesAndSamplers() { const Id image_type{ImageType(*this, image_desc, sampled_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base, image_desc.dword_offset)); @@ -534,9 +547,9 @@ void EmitContext::DefineImagesAndSamplers() { .sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type), .pointer_type = pointer_type, .image_type = image_type, + .is_storage = image_desc.is_storage, }); interfaces.push_back(id); - ++binding; } if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) { image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); @@ -548,13 +561,12 @@ void EmitContext::DefineImagesAndSamplers() { sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type); for (const auto& samp_desc : info.samplers) { const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base, samp_desc.dword_offset)); samplers.push_back(id); interfaces.push_back(id); - ++binding; } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 0908b7f82..9029866b0 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -6,6 +6,7 @@ #include #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" @@ -37,7 +38,7 @@ struct VectorIds { class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info, - u32& binding); + Bindings& binding); ~EmitContext(); Id Def(const IR::Value& value); @@ -200,6 +201,7 @@ public: Id sampled_type; Id pointer_type; Id image_type; + bool is_storage = false; }; struct BufferDefinition { @@ -216,11 +218,11 @@ public: u32 binding; Id image_type; Id result_type; - bool is_integer; - bool is_storage; + bool is_integer = false; + bool is_storage = false; }; - u32& binding; + Bindings& binding; boost::container::small_vector buffers; boost::container::small_vector texture_buffers; boost::container::small_vector images; diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 276bd9db0..9d481d32c 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -23,7 +23,6 @@ struct Compare { static IR::Condition MakeCondition(const GcnInst& inst) { if (inst.IsCmpx()) { - ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32); return IR::Condition::Execnz; } @@ -99,7 +98,7 @@ void CFG::EmitDivergenceLabels() { // with SAVEEXEC to mask the threads that didn't pass the condition // of initial branch. (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) || - inst.opcode == Opcode::V_CMPX_NE_U32; + inst.IsCmpx(); }; const auto is_close_scope = [](const GcnInst& inst) { // Closing an EXEC scope can be either a branch instruction @@ -109,7 +108,7 @@ void CFG::EmitDivergenceLabels() { // Sometimes compiler might insert instructions between the SAVEEXEC and the branch. // Those instructions need to be wrapped in the condition as well so allow branch // as end scope instruction. - inst.opcode == Opcode::S_CBRANCH_EXECZ || + inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ENDPGM || (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo); }; @@ -127,7 +126,8 @@ void CFG::EmitDivergenceLabels() { s32 curr_begin = -1; for (size_t index = GetIndex(start); index < end_index; index++) { const auto& inst = inst_list[index]; - if (is_close_scope(inst) && curr_begin != -1) { + const bool is_close = is_close_scope(inst); + if ((is_close || index == end_index - 1) && curr_begin != -1) { // If there are no instructions inside scope don't do anything. if (index - curr_begin == 1) { curr_begin = -1; @@ -138,8 +138,16 @@ void CFG::EmitDivergenceLabels() { const auto& save_inst = inst_list[curr_begin]; const Label label = index_to_pc[curr_begin] + save_inst.length; AddLabel(label); - // Add a label to the close scope instruction as well. - AddLabel(index_to_pc[index]); + // Add a label to the close scope instruction. + // There are 3 cases where we need to close a scope. + // * Close scope instruction inside the block + // * Close scope instruction at the end of the block (cbranch or endpgm) + // * Normal instruction at the end of the block + // For the last case we must NOT add a label as that would cause + // the instruction to be separated into its own basic block. + if (is_close) { + AddLabel(index_to_pc[index]); + } // Reset scope begin. curr_begin = -1; } @@ -194,7 +202,7 @@ void CFG::LinkBlocks() { const auto end_inst{block.end_inst}; // Handle divergence block inserted here. if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 || - end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.opcode == Opcode::V_CMPX_NE_U32) { + end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.IsCmpx()) { // Blocks are stored ordered by address in the set auto next_it = std::next(it); auto* target_block = &(*next_it); diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index 26a2c1a6c..6020f93bb 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -1032,6 +1032,7 @@ void GcnDecodeContext::decodeInstructionMIMG(uint64_t hexInstruction) { m_instruction.control.mimg = *reinterpret_cast(&hexInstruction); m_instruction.control.mimg.mod = getMimgModifier(m_instruction.opcode); + ASSERT(m_instruction.control.mimg.r128 == 0); } void GcnDecodeContext::decodeInstructionDS(uint64_t hexInstruction) { diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 18e830f7b..7d901822d 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -71,6 +71,9 @@ void Translator::EmitExport(const GcnInst& inst) { ir.SetAttribute(attrib, comp, swizzle(i)); } } + if (IR::IsMrt(attrib)) { + info.mrt_mask |= 1u << u8(attrib); + } } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 0c9efdc48..1e572a97f 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -281,6 +281,12 @@ void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) { return ir.GetExec(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); + case OperandField::ConstZero: + return ir.Imm1(false); + case OperandField::SignedConstIntNeg: + ASSERT_MSG(-s32(operand.code) + SignedConstIntNegMin - 1 == -1, + "SignedConstIntNeg must be -1"); + return ir.Imm1(true); default: UNREACHABLE(); } @@ -506,6 +512,8 @@ void Translator::S_NOT_B64(const GcnInst& inst) { return ir.GetExec(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); + case OperandField::ConstZero: + return ir.Imm1(false); default: UNREACHABLE(); } @@ -520,6 +528,9 @@ void Translator::S_NOT_B64(const GcnInst& inst) { case OperandField::ScalarGPR: ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result); break; + case OperandField::ExecLo: + ir.SetExec(result); + break; default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 07295f5b3..cfef5858a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -174,7 +174,7 @@ T Translator::GetSrc(const InstOperand& operand) { value = ir.IAbs(value); } if (operand.input_modifier.neg) { - UNREACHABLE(); + value = ir.INeg(value); } } return value; @@ -281,12 +281,15 @@ template IR::F64 Translator::GetSrc64(const InstOperand&); void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { IR::U32F32 result = value; - if (operand.output_modifier.multiplier != 0.f) { - result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier)); - } - if (operand.output_modifier.clamp) { - result = ir.FPSaturate(value); + if (value.Type() == IR::Type::F32) { + if (operand.output_modifier.multiplier != 0.f) { + result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier)); + } + if (operand.output_modifier.clamp) { + result = ir.FPSaturate(value); + } } + switch (operand.field) { case OperandField::ScalarGPR: return ir.SetScalarReg(IR::ScalarReg(operand.code), result); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index efa27cbd7..7559b8533 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -155,6 +155,7 @@ public: void V_SUBREV_I32(const GcnInst& inst); void V_ADDC_U32(const GcnInst& inst); void V_LDEXP_F32(const GcnInst& inst); + void V_CVT_PKNORM_U16_F32(const GcnInst& inst); void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); // VOP1 @@ -216,6 +217,7 @@ public: void V_MED3_I32(const GcnInst& inst); void V_SAD(const GcnInst& inst); void V_SAD_U32(const GcnInst& inst); + void V_CVT_PK_U8_F32(const GcnInst& inst); void V_LSHL_B64(const GcnInst& inst); void V_MUL_F64(const GcnInst& inst); void V_MAX_F64(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 2d85d9bfb..f497e2606 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -89,6 +89,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_ADDC_U32(inst); case Opcode::V_LDEXP_F32: return V_LDEXP_F32(inst); + case Opcode::V_CVT_PKNORM_U16_F32: + return V_CVT_PKNORM_U16_F32(inst); case Opcode::V_CVT_PKRTZ_F16_F32: return V_CVT_PKRTZ_F16_F32(inst); @@ -244,6 +246,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { // V_CMPX_{OP8}_I32 case Opcode::V_CMPX_LT_I32: return V_CMP_U32(ConditionOp::LT, true, true, inst); + case Opcode::V_CMPX_EQ_I32: + return V_CMP_U32(ConditionOp::EQ, true, true, inst); case Opcode::V_CMPX_GT_I32: return V_CMP_U32(ConditionOp::GT, true, true, inst); case Opcode::V_CMPX_LG_I32: @@ -335,6 +339,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_MED3_I32(inst); case Opcode::V_SAD_U32: return V_SAD_U32(inst); + case Opcode::V_CVT_PK_U8_F32: + return V_CVT_PK_U8_F32(inst); case Opcode::V_LSHL_B64: return V_LSHL_B64(inst); case Opcode::V_MUL_F64: @@ -359,14 +365,13 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { // VOP2 void Translator::V_CNDMASK_B32(const GcnInst& inst) { - const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::ScalarReg flag_reg{inst.src[2].code}; const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR ? ir.GetThreadBitScalarReg(flag_reg) : ir.GetVcc(); const IR::Value result = ir.Select(flag, GetSrc(inst.src[1]), GetSrc(inst.src[0])); - ir.SetVectorReg(dst_reg, IR::U32F32{result}); + SetDst(inst.dst[0], IR::U32F32{result}); } void Translator::V_ADD_F32(const GcnInst& inst) { @@ -460,23 +465,19 @@ void Translator::V_LSHL_B32(const GcnInst& inst) { void Translator::V_LSHLREV_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); + SetDst(inst.dst[0], ir.ShiftLeftLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); } void Translator::V_AND_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.BitwiseAnd(src0, src1)); + SetDst(inst.dst[0], ir.BitwiseAnd(src0, src1)); } void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, - is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1))); + SetDst(inst.dst[0], is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1))); } void Translator::V_BFM_B32(const GcnInst& inst) { @@ -535,8 +536,7 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { void Translator::V_ADD_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.IAdd(src0, src1)); + SetDst(inst.dst[0], ir.IAdd(src0, src1)); // TODO: Carry } @@ -573,8 +573,7 @@ void Translator::V_ADDC_U32(const GcnInst& inst) { const IR::U32 scarry = IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))}; const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry); - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, result); + SetDst(inst.dst[0], result); const IR::U1 less_src0 = ir.ILessThan(result, src0, false); const IR::U1 less_src1 = ir.ILessThan(result, src1, false); @@ -588,11 +587,19 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPLdexp(src0, src1)); } -void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { +void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::U32 dst0 = ir.ConvertFToU(32, ir.FPMul(src0, ir.Imm32(65535.f))); + const IR::U32 dst1 = ir.ConvertFToU(32, ir.FPMul(src1, ir.Imm32(65535.f))); const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.BitFieldInsert(dst0, dst1, ir.Imm32(16), ir.Imm32(16))); +} + +void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); - ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); + SetDst(inst.dst[0], ir.PackHalf2x16(vec_f32)); } // VOP1 @@ -603,14 +610,12 @@ void Translator::V_MOV(const GcnInst& inst) { void Translator::V_CVT_F32_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.ConvertSToF(32, 32, src0)); + SetDst(inst.dst[0], ir.ConvertSToF(32, 32, src0)); } void Translator::V_CVT_F32_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.ConvertUToF(32, 32, src0)); + SetDst(inst.dst[0], ir.ConvertUToF(32, 32, src0)); } void Translator::V_CVT_U32_F32(const GcnInst& inst) { @@ -642,12 +647,11 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) { void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::VectorReg dst_reg{inst.dst[0].code}; ASSERT(src0.IsImmediate()); static constexpr std::array IntToFloat = { 0.0f, 0.0625f, 0.1250f, 0.1875f, 0.2500f, 0.3125f, 0.3750f, 0.4375f, -0.5000f, -0.4375f, -0.3750f, -0.3125f, -0.2500f, -0.1875f, -0.1250f, -0.0625f}; - ir.SetVectorReg(dst_reg, ir.Imm32(IntToFloat[src0.U32() & 0xF])); + SetDst(inst.dst[0], ir.Imm32(IntToFloat[src0.U32() & 0xF])); } void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { @@ -658,8 +662,7 @@ void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { void Translator::V_FRACT_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.Fract(src0)); + SetDst(inst.dst[0], ir.Fract(src0)); } void Translator::V_TRUNC_F32(const GcnInst& inst) { @@ -679,8 +682,7 @@ void Translator::V_RNDNE_F32(const GcnInst& inst) { void Translator::V_FLOOR_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.FPFloor(src0)); + SetDst(inst.dst[0], ir.FPFloor(src0)); } void Translator::V_EXP_F32(const GcnInst& inst) { @@ -1043,10 +1045,25 @@ void Translator::V_SAD_U32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IAdd(result, src2)); } +void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + + const IR::U32 value_uint = ir.ConvertFToU(32, src0); + const IR::U32 offset = ir.ShiftLeftLogical(src1, ir.Imm32(3)); + SetDst(inst.dst[0], ir.BitFieldInsert(src2, value_uint, offset, ir.Imm32(8))); +} + void Translator::V_LSHL_B64(const GcnInst& inst) { const IR::U64 src0{GetSrc64(inst.src[0])}; const IR::U64 src1{GetSrc64(inst.src[1])}; const IR::VectorReg dst_reg{inst.dst[0].code}; + if (src0.IsImmediate() && src0.U64() == -1) { + ir.SetVectorReg(dst_reg, ir.Imm32(0xFFFFFFFF)); + ir.SetVectorReg(dst_reg + 1, ir.Imm32(0xFFFFFFFF)); + return; + } ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0, "V_LSHL_B64 with non-zero src0 or src1 is not supported"); ir.SetVectorReg(dst_reg, ir.Imm32(0)); diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 8617370ac..431cb2f04 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -22,17 +22,15 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) { // VINTRP void Translator::V_INTERP_P2_F32(const GcnInst& inst) { - const IR::VectorReg dst_reg{inst.dst[0].code}; auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; - ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); + SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); } void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { - const IR::VectorReg dst_reg{inst.dst[0].code}; auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; - ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); + SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 212d7fdc5..7ecc2e762 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -546,6 +546,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { info.has_offset.Assign(flags.test(MimgModifier::Offset)); info.explicit_lod.Assign(explicit_lod); info.has_derivatives.Assign(has_derivatives); + info.is_array.Assign(mimg.da); // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { @@ -630,6 +631,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { info.has_offset.Assign(flags.test(MimgModifier::Offset)); // info.explicit_lod.Assign(explicit_lod); info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); + info.is_array.Assign(mimg.da); // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index d8282bf49..739214ec9 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -7,6 +7,7 @@ #include #include "common/assert.h" #include "common/types.h" +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" @@ -64,9 +65,10 @@ struct ImageResource { u32 dword_offset; AmdGpu::ImageType type; AmdGpu::NumberFormat nfmt; - bool is_storage; - bool is_depth; + bool is_storage{}; + bool is_depth{}; bool is_atomic{}; + bool is_array{}; constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; }; @@ -84,17 +86,21 @@ struct SamplerResource { using SamplerResourceList = boost::container::small_vector; struct PushData { - static constexpr size_t BufOffsetIndex = 2; + static constexpr u32 BufOffsetIndex = 2; + static constexpr u32 UdRegsIndex = 4; u32 step0; u32 step1; - std::array buf_offsets; + std::array buf_offsets; + std::array ud_regs; void AddOffset(u32 binding, u32 offset) { ASSERT(offset < 256 && binding < buf_offsets.size()); buf_offsets[binding] = offset; } }; +static_assert(sizeof(PushData) <= 128, + "PushData size is greater than minimum size guaranteed by Vulkan spec"); /** * Contains general information generated by the shader recompiler for an input program. @@ -144,6 +150,24 @@ struct Info { AttributeFlags loads{}; AttributeFlags stores{}; + struct UserDataMask { + void Set(IR::ScalarReg reg) noexcept { + mask |= 1 << static_cast(reg); + } + + u32 Index(IR::ScalarReg reg) const noexcept { + const u32 reg_mask = (1 << static_cast(reg)) - 1; + return std::popcount(mask & reg_mask); + } + + u32 NumRegs() const noexcept { + return std::popcount(mask); + } + + u32 mask; + }; + UserDataMask ud_mask{}; + s8 vertex_offset_sgpr = -1; s8 instance_offset_sgpr = -1; @@ -171,6 +195,7 @@ struct Info { bool uses_fp64{}; bool uses_step_rates{}; bool translation_failed{}; // indicates that shader has unsupported instructions + u8 mrt_mask{0u}; explicit Info(Stage stage_, ShaderParams params) : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, @@ -188,11 +213,23 @@ struct Info { return data; } - size_t NumBindings() const noexcept { - return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); + void PushUd(Backend::Bindings& bnd, PushData& push) const { + u32 mask = ud_mask.mask; + while (mask) { + const u32 index = std::countr_zero(mask); + ASSERT(bnd.user_data < NumUserDataRegs && index < NumUserDataRegs); + mask &= ~(1U << index); + push.ud_regs[bnd.user_data++] = user_data[index]; + } } - [[nodiscard]] std::pair GetDrawOffsets() const noexcept { + void AddBindings(Backend::Bindings& bnd) const { + bnd.buffer += buffers.size() + texture_buffers.size(); + bnd.unified += bnd.buffer + images.size() + samplers.size(); + bnd.user_data += ud_mask.NumRegs(); + } + + [[nodiscard]] std::pair GetDrawOffsets() const { u32 vertex_offset = 0; u32 instance_offset = 0; if (vertex_offset_sgpr != -1) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index ce809514d..a7edb6d9c 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1079,6 +1079,10 @@ U32 IREmitter::IAbs(const U32& value) { } U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { + if (shift.IsImmediate() && shift.U32() == 0) { + return base; + } + switch (base.Type()) { case Type::U32: return Inst(Opcode::ShiftLeftLogical32, base, shift); @@ -1090,6 +1094,10 @@ U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { } U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { + if (shift.IsImmediate() && shift.U32() == 0) { + return base; + } + switch (base.Type()) { case Type::U32: return Inst(Opcode::ShiftRightLogical32, base, shift); @@ -1101,6 +1109,10 @@ U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { } U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) { + if (shift.IsImmediate() && shift.U32() == 0) { + return base; + } + switch (base.Type()) { case Type::U32: return Inst(Opcode::ShiftRightArithmetic32, base, shift); diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 87a069338..775aed5b3 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -278,6 +278,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::FPCmpClass32: FoldCmpClass(inst); return; + case IR::Opcode::ShiftLeftLogical32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return static_cast(a << b); }); + return; + case IR::Opcode::ShiftRightLogical32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return static_cast(a >> b); }); + return; case IR::Opcode::ShiftRightArithmetic32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast(a >> b); }); return; @@ -347,7 +353,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return; case IR::Opcode::INotEqual: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); - FoldBooleanConvert(inst); return; case IR::Opcode::BitwiseAnd32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; }); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index f1fc14d02..db0d75f0c 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -200,9 +200,10 @@ public: u32 Add(const ImageResource& desc) { const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { return desc.sgpr_base == existing.sgpr_base && - desc.dword_offset == existing.dword_offset && desc.type == existing.type && - desc.is_storage == existing.is_storage; + desc.dword_offset == existing.dword_offset; })}; + auto& image = image_resources[index]; + image.is_storage |= desc.is_storage; return index; } @@ -441,18 +442,29 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, - const IR::Value& z, bool is_storage) { + const IR::Value& z, bool is_storage, bool is_array) { // When cubemap is written with imageStore it is treated like 2DArray. if (is_storage) { return ir.CompositeConstruct(s, t, z); } + + ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below + // We need to fix x and y coordinate, // because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32. // We already force the scale value to be 1.0 when handling v_cubema_f32, // here we subtract 1.5 to recover the original value. const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f)); const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f)); - return ir.CompositeConstruct(x, y, z); + if (is_array) { + const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z}); + const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u)); + const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u)); + return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id), + ir.ConvertIToF(32, 32, false, slice_id)); + } else { + return ir.CompositeConstruct(x, y, z); + } } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { @@ -481,14 +493,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_storage = IsImageStorageInstruction(inst); + const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType(); u32 image_binding = descriptors.Add(ImageResource{ .sgpr_base = tsharp.sgpr_base, .dword_offset = tsharp.dword_offset, - .type = image.GetType(), + .type = type, .nfmt = static_cast(image.GetNumberFmt()), .is_storage = is_storage, .is_depth = bool(inst_info.is_depth), .is_atomic = IsImageAtomicInstruction(inst), + .is_array = bool(inst_info.is_array), }); // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions @@ -545,7 +559,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip case AmdGpu::ImageType::Color3D: // x, y, z return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; case AmdGpu::ImageType::Cube: // x, y, face - return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage), + return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage, + inst_info.is_array), body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", image.GetType()); @@ -584,7 +599,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } } if (inst_info.has_derivatives) { - ASSERT_MSG(image.GetType() == AmdGpu::ImageType::Color2D, + ASSERT_MSG(image.GetType() == AmdGpu::ImageType::Color2D || + image.GetType() == AmdGpu::ImageType::Color2DArray, "User derivatives only supported for 2D images"); } if (inst_info.has_lod_clamp) { diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 7251473d1..e995852d5 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -8,14 +8,15 @@ namespace Shader::Optimization { void Visit(Info& info, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::GetAttribute: - case IR::Opcode::GetAttributeU32: { + case IR::Opcode::GetAttributeU32: info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32()); break; - } - case IR::Opcode::SetAttribute: { + case IR::Opcode::SetAttribute: info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); break; - } + case IR::Opcode::GetUserData: + info.ud_mask.Set(inst.Arg(0).ScalarReg()); + break; case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index 54dce0355..df73c1bc8 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -80,10 +80,10 @@ struct DefTable { } const IR::Value& Def(IR::Block* block, ThreadBitScalar variable) { - return block->ssa_sreg_values[RegIndex(variable.sgpr)]; + return block->ssa_sbit_values[RegIndex(variable.sgpr)]; } void SetDef(IR::Block* block, ThreadBitScalar variable, const IR::Value& value) { - block->ssa_sreg_values[RegIndex(variable.sgpr)] = value; + block->ssa_sbit_values[RegIndex(variable.sgpr)] = value; } const IR::Value& Def(IR::Block* block, SccFlagTag) { diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index fba04f33e..4783d08e5 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -59,6 +59,7 @@ union TextureInstInfo { BitField<5, 1, u32> has_offset; BitField<6, 2, u32> gather_comp; BitField<8, 1, u32> has_derivatives; + BitField<9, 1, u32> is_array; }; union BufferInstInfo { diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index bbcafdb86..0a3a696bc 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -6,6 +6,7 @@ #include #include "common/types.h" +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" namespace Shader { @@ -45,11 +46,11 @@ struct StageSpecialization { boost::container::small_vector buffers; boost::container::small_vector tex_buffers; boost::container::small_vector images; - u32 start_binding{}; + Backend::Bindings start{}; explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, - u32 start_binding_) - : info{&info_}, runtime_info{runtime_info_}, start_binding{start_binding_} { + Backend::Bindings start_) + : info{&info_}, runtime_info{runtime_info_}, start{start_} { u32 binding{}; ForEachSharp(binding, buffers, info->buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { @@ -62,7 +63,8 @@ struct StageSpecialization { }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { - spec.type = sharp.GetType(); + spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray + : sharp.GetType(); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); }); } @@ -81,7 +83,7 @@ struct StageSpecialization { } bool operator==(const StageSpecialization& other) const { - if (start_binding != other.start_binding) { + if (start != other.start) { return false; } if (runtime_info != other.runtime_info) { diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 41dbe801d..fc572a04b 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -238,10 +238,15 @@ struct Image { return pitch + 1; } - u32 NumLayers() const { + u32 NumLayers(bool is_array) const { u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1; if (GetType() == ImageType::Cube) { - slices *= 6; + if (is_array) { + slices = last_array + 1; + ASSERT(slices % 6 == 0); + } else { + slices = 6; + } } if (pow2pad) { slices = std::bit_ceil(slices); @@ -282,6 +287,11 @@ struct Image { bool IsTiled() const { return GetTilingMode() != TilingMode::Display_Linear; } + + bool IsPartialCubemap() const { + const auto viewed_slice = last_array - base_array + 1; + return GetType() == ImageType::Cube && viewed_slice < 6; + } }; static_assert(sizeof(Image) == 32); // 256bits diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 4530f690e..caffee6ba 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -17,7 +17,7 @@ namespace VideoCore { static constexpr size_t NumVertexBuffers = 32; static constexpr size_t GdsBufferSize = 64_KB; static constexpr size_t StagingBufferSize = 1_GB; -static constexpr size_t UboStreamBufferSize = 128_MB; +static constexpr size_t UboStreamBufferSize = 64_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, @@ -581,15 +581,26 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, return false; } Image& image = texture_cache.GetImage(image_id); + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; + } + ASSERT_MSG(device_addr == image.info.guest_address, + "Texel buffer aliases image subresources {:x} : {:x}", device_addr, + image.info.guest_address); boost::container::small_vector copies; u32 offset = buffer.Offset(image.cpu_addr); const u32 num_layers = image.info.resources.layers; + const u32 max_offset = offset + size; for (u32 m = 0; m < image.info.resources.levels; m++) { const u32 width = std::max(image.info.size.width >> m, 1u); const u32 height = std::max(image.info.size.height >> m, 1u); const u32 depth = image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + offset += mip_ofs * num_layers; + if (offset + (mip_size * num_layers) > max_offset) { + break; + } copies.push_back({ .bufferOffset = offset, .bufferRowLength = static_cast(mip_pitch), @@ -603,11 +614,10 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, .imageOffset = {0, 0, 0}, .imageExtent = {width, height, depth}, }); - offset += mip_ofs * num_layers; } if (!copies.empty()) { scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, copies); diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 23905e83b..fb09e70f2 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -6,6 +6,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/error.h" +#include "common/signal_context.h" #include "core/signals.h" #include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -152,8 +153,9 @@ struct PageManager::Impl { #endif } - static bool GuestFaultSignalHandler(void* code_address, void* fault_address, bool is_write) { + static bool GuestFaultSignalHandler(void* context, void* fault_address) { const auto addr = reinterpret_cast(fault_address); + const bool is_write = Common::IsWriteError(context); if (is_write && owned_ranges.find(addr) != owned_ranges.end()) { const VAddr addr_aligned = Common::AlignDown(addr, PAGESIZE); rasterizer->InvalidateMemory(addr_aligned, PAGESIZE); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 37bd7ebc4..c4b779fad 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -199,8 +199,17 @@ vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode) { return vk::SamplerAddressMode::eMirroredRepeat; case AmdGpu::ClampMode::ClampLastTexel: return vk::SamplerAddressMode::eClampToEdge; + case AmdGpu::ClampMode::MirrorOnceHalfBorder: + case AmdGpu::ClampMode::MirrorOnceBorder: + LOG_WARNING(Render_Vulkan, "Unimplemented clamp mode {}, using closest equivalent.", + static_cast(mode)); + [[fallthrough]]; case AmdGpu::ClampMode::MirrorOnceLastTexel: return vk::SamplerAddressMode::eMirrorClampToEdge; + case AmdGpu::ClampMode::ClampHalfBorder: + LOG_WARNING(Render_Vulkan, "Unimplemented clamp mode {}, using closest equivalent.", + static_cast(mode)); + [[fallthrough]]; case AmdGpu::ClampMode::ClampBorder: return vk::SamplerAddressMode::eClampToBorder; default: diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 8432d2141..f5d10d48f 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -4,6 +4,7 @@ #pragma once #include +#include "common/assert.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -55,4 +56,13 @@ vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags support void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices); +static inline vk::Format PromoteFormatToDepth(vk::Format fmt) { + if (fmt == vk::Format::eR32Sfloat) { + return vk::Format::eD32Sfloat; + } else if (fmt == vk::Format::eR16Unorm) { + return vk::Format::eD16Unorm; + } + UNREACHABLE(); +} + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d019ff034..d7954bf79 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -202,7 +202,8 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}, + cmdbuf); const std::array pre_barrier{ vk::ImageMemoryBarrier{ @@ -228,7 +229,7 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. cmdbuf.blitImage( - image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, + image.image, image.last_state.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height), vk::Filter::eLinear); @@ -269,6 +270,9 @@ void RendererVulkan::Present(Frame* frame) { auto& scheduler = present_scheduler; const auto cmdbuf = scheduler.CommandBuffer(); + + ImGui::Core::Render(cmdbuf, frame); + { auto* profiler_ctx = instance.GetProfilerContext(); TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame", @@ -326,8 +330,6 @@ void RendererVulkan::Present(Frame* frame) { }, }; - ImGui::Core::Render(cmdbuf, frame); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index d9296b501..3558bf785 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include + #include "common/alignment.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" @@ -15,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache, u64 compute_key_, const Shader::Info& info_, vk::ShaderModule module) - : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, compute_key{compute_key_}, + : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, compute_key{compute_key_}, info{&info_} { const vk::PipelineShaderStageCreateInfo shader_ci = { .stage = vk::ShaderStageFlagBits::eCompute, @@ -108,12 +109,14 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, // Bind resource buffers and textures. boost::container::static_vector buffer_views; boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; boost::container::small_vector set_writes; boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; - u32 binding{}; + Shader::Backend::Bindings binding{}; + image_infos.clear(); + + info->PushUd(binding, push_data); for (const auto& desc : info->buffers) { bool is_storage = true; if (desc.is_gds_buffer) { @@ -145,21 +148,20 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, buffer_cache.ObtainBuffer(address, size, desc.is_written); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding, adjust); - } + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding.buffer, adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); + ++binding.buffer; } for (const auto& desc : info->texture_buffers) { @@ -186,10 +188,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, "Texel buffer stride must match format stride"); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding, adjust / fmt_stride); - } + ASSERT(adjust % fmt_stride == 0); + push_data.AddOffset(binding.buffer, adjust / fmt_stride); buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); if (auto barrier = @@ -199,49 +199,23 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { - texture_cache.MarkWritten(address, size); + texture_cache.InvalidateMemoryFromGPU(address, size); } } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer : vk::DescriptorType::eUniformTexelBuffer, .pTexelBufferView = &buffer_view, }); + ++binding.buffer; } - for (const auto& image_desc : info->images) { - const auto tsharp = image_desc.GetSharp(*info); - if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; - VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; - const auto& image_view = texture_cache.FindTexture(image_info, view_info); - const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); - } else if (instance.IsNullDescriptorSupported()) { - image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, - vk::ImageLayout::eGeneral); - } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, - .pImageInfo = &image_infos.back(), - }); + BindTextures(texture_cache, *info, binding, set_writes); - if (texture_cache.IsMeta(tsharp.Address())) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (texture)"); - } - } for (const auto& sampler : info->samplers) { const auto ssharp = sampler.GetSharp(*info); if (ssharp.force_degamma) { @@ -251,7 +225,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eSampler, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 8a6213a29..f1bc7285a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -3,9 +3,8 @@ #pragma once -#include -#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_pipeline_common.h" namespace VideoCore { class BufferCache; @@ -18,27 +17,17 @@ class Instance; class Scheduler; class DescriptorHeap; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(const Instance& instance, Scheduler& scheduler, - DescriptorHeap& desc_heap, vk::PipelineCache pipeline_cache, - u64 compute_key, const Shader::Info& info, vk::ShaderModule module); + ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + vk::PipelineCache pipeline_cache, u64 compute_key, const Shader::Info& info, + vk::ShaderModule module); ~ComputePipeline(); - [[nodiscard]] vk::Pipeline Handle() const noexcept { - return *pipeline; - } - bool BindResources(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const; private: - const Instance& instance; - Scheduler& scheduler; - DescriptorHeap& desc_heap; - vk::UniquePipeline pipeline; - vk::UniquePipelineLayout pipeline_layout; - vk::UniqueDescriptorSetLayout desc_layout; u64 compute_key; const Shader::Info* info; bool uses_push_descriptors{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dc311a7c6..8edf2f50c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -21,7 +21,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul vk::PipelineCache pipeline_cache, std::span infos, std::span modules) - : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, key{key_} { + : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} { const vk::Device device = instance.GetDevice(); std::ranges::copy(infos, stages.begin()); BuildDescSetLayout(); @@ -41,8 +41,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }; pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); - boost::container::static_vector bindings; - boost::container::static_vector attributes; + boost::container::static_vector vertex_bindings; + boost::container::static_vector vertex_attributes; const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; for (const auto& input : vs_info->vs_inputs) { if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || @@ -52,13 +52,13 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul } const auto buffer = vs_info->ReadUd(input.sgpr_base, input.dword_offset); - attributes.push_back({ + vertex_attributes.push_back({ .location = input.binding, .binding = input.binding, .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), .offset = 0, }); - bindings.push_back({ + vertex_bindings.push_back({ .binding = input.binding, .stride = buffer.GetStride(), .inputRate = input.instance_step_rate == Shader::Info::VsInput::None @@ -68,10 +68,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul } const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { - .vertexBindingDescriptionCount = static_cast(bindings.size()), - .pVertexBindingDescriptions = bindings.data(), - .vertexAttributeDescriptionCount = static_cast(attributes.size()), - .pVertexAttributeDescriptions = attributes.data(), + .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), }; if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) { @@ -83,8 +83,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .topology = LiverpoolToVK::PrimitiveType(key.prim_type), .primitiveRestartEnable = key.enable_primitive_restart != 0, }; - ASSERT_MSG(!key.enable_primitive_restart || key.primitive_restart_index == 0xFFFF, - "Primitive restart index other than 0xFFFF is not supported yet"); + ASSERT_MSG(!key.enable_primitive_restart || key.primitive_restart_index == 0xFFFF || + key.primitive_restart_index == 0xFFFFFFFF, + "Primitive restart index other than -1 is not supported yet"); const vk::PipelineRasterizationStateCreateInfo raster_state = { .depthClampEnable = false, @@ -291,8 +292,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul GraphicsPipeline::~GraphicsPipeline() = default; void GraphicsPipeline::BuildDescSetLayout() { - u32 binding{}; boost::container::small_vector bindings; + u32 binding{}; + for (const auto* stage : stages) { if (!stage) { continue; @@ -352,11 +354,12 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, // Bind resource buffers and textures. boost::container::static_vector buffer_views; boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; boost::container::small_vector set_writes; boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; - u32 binding{}; + Shader::Backend::Bindings binding{}; + + image_infos.clear(); for (const auto* stage : stages) { if (!stage) { @@ -366,6 +369,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, push_data.step0 = regs.vgt_instance_step_rate_0; push_data.step1 = regs.vgt_instance_step_rate_1; } + stage->PushUd(binding, push_data); for (const auto& buffer : stage->buffers) { const auto vsharp = buffer.GetSharp(*stage); const bool is_storage = buffer.IsStorage(vsharp); @@ -381,10 +385,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, buffer_cache.ObtainBuffer(address, size, buffer.is_written); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding, adjust); - } + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding.buffer, adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); } else if (instance.IsNullDescriptorSupported()) { buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); @@ -394,13 +396,14 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); + ++binding.buffer; } for (const auto& desc : stage->texture_buffers) { @@ -417,10 +420,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, "Texel buffer stride must match format stride"); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding, adjust / fmt_stride); - } + ASSERT(adjust % fmt_stride == 0); + push_data.AddOffset(binding.buffer, adjust / fmt_stride); buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite @@ -430,58 +431,30 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { - texture_cache.MarkWritten(address, size); + texture_cache.InvalidateMemoryFromGPU(address, size); } } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer : vk::DescriptorType::eUniformTexelBuffer, .pTexelBufferView = &buffer_view, }); + ++binding.buffer; } - boost::container::static_vector tsharps; - for (const auto& image_desc : stage->images) { - const auto tsharp = image_desc.GetSharp(*stage); - if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - tsharps.emplace_back(tsharp); - VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; - VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; - const auto& image_view = texture_cache.FindTexture(image_info, view_info); - const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); - } else if (instance.IsNullDescriptorSupported()) { - image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, - vk::ImageLayout::eGeneral); - } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, - .pImageInfo = &image_infos.back(), - }); + BindTextures(texture_cache, *stage, binding, set_writes); - if (texture_cache.IsMeta(tsharp.Address())) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)"); - } - } for (const auto& sampler : stage->samplers) { auto ssharp = sampler.GetSharp(*stage); if (ssharp.force_degamma) { LOG_WARNING(Render_Vulkan, "Texture requires gamma correction"); } if (sampler.disable_aniso) { - const auto& tsharp = tsharps[sampler.associated_image]; + const auto& tsharp = stage->images[sampler.associated_image].GetSharp(*stage); if (tsharp.base_level == 0 && tsharp.last_level == 0) { ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One); } @@ -490,7 +463,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eSampler, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index c8a08b4f2..74817656a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -5,7 +5,7 @@ #include "common/types.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" -#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_pipeline_common.h" namespace VideoCore { class BufferCache; @@ -33,6 +33,7 @@ struct GraphicsPipelineKey { Liverpool::DepthControl depth_stencil; u32 depth_bias_enable; u32 num_samples; + u32 mrt_mask; Liverpool::StencilControl stencil; Liverpool::PrimitiveType prim_type; u32 enable_primitive_restart; @@ -50,26 +51,17 @@ struct GraphicsPipelineKey { } }; -class GraphicsPipeline { +class GraphicsPipeline : public Pipeline { public: - explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler, - DescriptorHeap& desc_heap, const GraphicsPipelineKey& key, - vk::PipelineCache pipeline_cache, - std::span stages, - std::span modules); + GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, + std::span stages, + std::span modules); ~GraphicsPipeline(); void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const; - vk::Pipeline Handle() const noexcept { - return *pipeline; - } - - vk::PipelineLayout GetLayout() const { - return *pipeline_layout; - } - const Shader::Info& GetStage(Shader::Stage stage) const noexcept { return *stages[u32(stage)]; } @@ -83,6 +75,10 @@ public: return key.write_masks; } + auto GetMrtMask() const { + return key.mrt_mask; + } + bool IsDepthEnabled() const { return key.depth_stencil.depth_enable.Value(); } @@ -91,12 +87,6 @@ private: void BuildDescSetLayout(); private: - const Instance& instance; - Scheduler& scheduler; - DescriptorHeap& desc_heap; - vk::UniquePipeline pipeline; - vk::UniquePipelineLayout pipeline_layout; - vk::UniqueDescriptorSetLayout desc_layout; std::array stages{}; GraphicsPipelineKey key; bool uses_push_descriptors{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 0bc73e14f..52143907c 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -282,6 +282,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceFeatures2{ .features{ .robustBufferAccess = features.robustBufferAccess, + .imageCubeArray = features.imageCubeArray, .independentBlend = features.independentBlend, .geometryShader = features.geometryShader, .logicOp = features.logicOp, @@ -309,6 +310,7 @@ bool Instance::CreateDevice() { .separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts, .hostQueryReset = vk12_features.hostQueryReset, .timelineSemaphore = vk12_features.timelineSemaphore, + .samplerMirrorClampToEdge = vk12_features.samplerMirrorClampToEdge, }, vk::PhysicalDeviceMaintenance4FeaturesKHR{ .maintenance4 = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7f6079a5c..7a094f66d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -234,18 +234,20 @@ bool PipelineCache::RefreshGraphicsKey() { key.front_face = regs.polygon_control.front_face; key.num_samples = regs.aa_config.NumSamples(); - const auto skip_cb_binding = + const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color - // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. - // We need to do some arrays compaction at this stage + // attachments. This might be not a case as HW color buffers can be bound in an arbitrary + // order. We need to do some arrays compaction at this stage key.color_formats.fill(vk::Format::eUndefined); key.blend_controls.fill({}); key.write_masks.fill({}); key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); - int remapped_cb{}; - for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + + // First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader + // recompiler. + for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { continue; @@ -258,16 +260,11 @@ bool PipelineCache::RefreshGraphicsKey() { if (base_format == key.color_formats[remapped_cb]) { key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); } - key.blend_controls[remapped_cb] = regs.blend_control[cb]; - key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && - !col_buf.info.blend_bypass); - key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; - key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); ++remapped_cb; } - u32 binding{}; + Shader::Backend::Bindings binding{}; for (u32 i = 0; i < MaxShaderStages; i++) { if (!regs.stage_enable.IsStageEnabled(i)) { key.stage_hashes[i] = 0; @@ -309,11 +306,33 @@ bool PipelineCache::RefreshGraphicsKey() { std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); } + + const auto* fs_info = infos[u32(Shader::Stage::Fragment)]; + key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; + + // Second pass to fill remain CB pipeline key data + for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + auto const& col_buf = regs.color_buffers[cb]; + if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb) || + (key.mrt_mask & (1u << cb)) == 0) { + key.color_formats[cb] = vk::Format::eUndefined; + key.mrt_swizzles[cb] = Liverpool::ColorBuffer::SwapMode::Standard; + continue; + } + + key.blend_controls[remapped_cb] = regs.blend_control[cb]; + key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && + !col_buf.info.blend_bypass); + key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; + key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); + + ++remapped_cb; + } return true; } bool PipelineCache::RefreshComputeKey() { - u32 binding{}; + Shader::Backend::Bindings binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; const auto cs_params = Liverpool::GetParams(*cs_pgm); if (ShouldSkipShader(cs_params.hash, "compute")) { @@ -327,7 +346,7 @@ bool PipelineCache::RefreshComputeKey() { vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, - u32& binding) { + Shader::Backend::Bindings& binding) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, perm_idx != 0 ? "(permutation)" : ""); if (Config::dumpShaders()) { @@ -347,14 +366,14 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, } std::tuple PipelineCache::GetProgram( - Shader::Stage stage, Shader::ShaderParams params, u32& binding) { + Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding) { const auto runtime_info = BuildRuntimeInfo(stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { Program* program = program_pool.Create(stage, params); - u32 start_binding = binding; + auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); - const auto spec = Shader::StageSpecialization(program->info, runtime_info, start_binding); + const auto spec = Shader::StageSpecialization(program->info, runtime_info, start); program->AddPermut(module, std::move(spec)); it_pgm.value() = program; return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); @@ -372,7 +391,7 @@ std::tuple PipelineCache::GetProgram module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); program->AddPermut(module, std::move(spec)); } else { - binding += info.NumBindings(); + info.AddBindings(binding); module = it->module; perm_idx = std::distance(program->modules.begin(), it); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7f0064fb8..7e44bbf09 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -49,9 +49,8 @@ public: const ComputePipeline* GetComputePipeline(); - std::tuple GetProgram(Shader::Stage stage, - Shader::ShaderParams params, - u32& binding); + std::tuple GetProgram( + Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding); private: bool RefreshGraphicsKey(); @@ -60,7 +59,8 @@ private: void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, - std::span code, size_t perm_idx, u32& binding); + std::span code, size_t perm_idx, + Shader::Backend::Bindings& binding); Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage); private: diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp new file mode 100644 index 000000000..61e564150 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -0,0 +1,81 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_pipeline_common.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace Vulkan { + +boost::container::static_vector Pipeline::image_infos; + +Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, + vk::PipelineCache pipeline_cache) + : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_} {} + +Pipeline::~Pipeline() = default; + +void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, + Shader::Backend::Bindings& binding, + DescriptorWrites& set_writes) const { + + using ImageBindingInfo = std::tuple; + boost::container::static_vector image_bindings; + + for (const auto& image_desc : stage.images) { + const auto tsharp = image_desc.GetSharp(stage); + if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { + VideoCore::ImageInfo image_info{tsharp, image_desc}; + const auto image_id = texture_cache.FindImage(image_info); + auto& image = texture_cache.GetImage(image_id); + image.flags |= VideoCore::ImageFlagBits::Bound; + image_bindings.emplace_back(image_id, tsharp, image_desc); + } else { + image_bindings.emplace_back(VideoCore::ImageId{}, tsharp, image_desc); + } + + if (texture_cache.IsMeta(tsharp.Address())) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)"); + } + } + + // Second pass to re-bind images that were updated after binding + for (auto [image_id, tsharp, desc] : image_bindings) { + if (!image_id) { + if (instance.IsNullDescriptorSupported()) { + image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); + } else { + auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); + image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, + vk::ImageLayout::eGeneral); + } + } else { + auto& image = texture_cache.GetImage(image_id); + if (True(image.flags & VideoCore::ImageFlagBits::NeedsRebind)) { + image_id = texture_cache.FindImage(image.info); + } + VideoCore::ImageViewInfo view_info{tsharp, desc}; + auto& image_view = texture_cache.FindTexture(image_id, view_info); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, + texture_cache.GetImage(image_id).last_state.layout); + image.flags &= + ~(VideoCore::ImageFlagBits::NeedsRebind | VideoCore::ImageFlagBits::Bound); + } + + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = desc.is_storage ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, + .pImageInfo = &image_infos.back(), + }); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h new file mode 100644 index 000000000..ab99e7b33 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore { +class BufferCache; +class TextureCache; +} // namespace VideoCore + +namespace Vulkan { + +class Instance; +class Scheduler; +class DescriptorHeap; + +class Pipeline { +public: + Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + vk::PipelineCache pipeline_cache); + virtual ~Pipeline(); + + vk::Pipeline Handle() const noexcept { + return *pipeline; + } + + vk::PipelineLayout GetLayout() const noexcept { + return *pipeline_layout; + } + + using DescriptorWrites = boost::container::small_vector; + void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, + Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const; + +protected: + const Instance& instance; + Scheduler& scheduler; + DescriptorHeap& desc_heap; + vk::UniquePipeline pipeline; + vk::UniquePipelineLayout pipeline_layout; + vk::UniqueDescriptorSetLayout desc_layout; + static boost::container::static_vector image_infos; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index feadda96c..6abd00aaa 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -44,7 +44,6 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( case 0xc81ad50e: case 0xb7c39078: case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE - case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error) return VK_FALSE; default: break; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 23f60da13..eac272726 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -62,7 +62,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { buffer_cache.BindVertexBuffers(vs_info); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); - BeginRendering(); + BeginRendering(*pipeline); UpdateDynamicState(*pipeline); const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(); @@ -102,7 +102,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si buffer_cache.BindVertexBuffers(vs_info); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, 0); - BeginRendering(); + BeginRendering(*pipeline); UpdateDynamicState(*pipeline); const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true); @@ -179,7 +179,7 @@ void Rasterizer::Finish() { scheduler.Finish(); } -void Rasterizer::BeginRendering() { +void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline) { const auto& regs = liverpool->regs; RenderState state; @@ -199,6 +199,13 @@ void Rasterizer::BeginRendering() { continue; } + // Skip stale color buffers if shader doesn't output to them. Otherwise it will perform + // an unnecessary transition and may result in state conflict if the resource is already + // bound for reading. + if ((pipeline.GetMrtMask() & (1 << col_buf_id)) == 0) { + continue; + } + const auto& hint = liverpool->last_cb_extent[col_buf_id]; VideoCore::ImageInfo image_info{col_buf, hint}; VideoCore::ImageViewInfo view_info{col_buf, false /*!!image.info.usage.vo_buffer*/}; @@ -240,7 +247,7 @@ void Rasterizer::BeginRendering() { state.depth_image = image.image; state.depth_attachment = { .imageView = *image_view.image_view, - .imageLayout = image.layout, + .imageLayout = image.last_state.layout, .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, .storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore, .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 5aa90c5cc..bd05c8faf 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -52,7 +52,7 @@ public: void Finish(); private: - void BeginRendering(); + void BeginRendering(const GraphicsPipeline& pipeline); void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateViewportScissorState(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index b99dfdbb4..08b5014ec 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -59,58 +59,6 @@ void Scheduler::EndRendering() { } is_rendering = false; current_cmdbuf.endRendering(); - - boost::container::static_vector barriers; - for (size_t i = 0; i < render_state.num_color_attachments; ++i) { - barriers.push_back(vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, - .oldLayout = vk::ImageLayout::eColorAttachmentOptimal, - .newLayout = vk::ImageLayout::eColorAttachmentOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = render_state.color_images[i], - .subresourceRange = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - } - if (render_state.has_depth || render_state.has_stencil) { - barriers.push_back(vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, - .oldLayout = render_state.depth_attachment.imageLayout, - .newLayout = render_state.depth_attachment.imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = render_state.depth_image, - .subresourceRange = - { - .aspectMask = vk::ImageAspectFlagBits::eDepth | - (render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil - : vk::ImageAspectFlagBits::eNone), - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - } - - if (!barriers.empty()) { - const auto src_stages = - vk::PipelineStageFlagBits::eColorAttachmentOutput | - (render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests | - vk::PipelineStageFlagBits::eEarlyFragmentTests - : vk::PipelineStageFlagBits::eNone); - current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader, - vk::DependencyFlagBits::eByRegion, {}, {}, barriers); - } } void Scheduler::Flush(SubmitInfo& info) { diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 9e8c38f0d..4ce6e1eea 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #define VULKAN_HPP_NO_EXCEPTIONS +#include #include "common/assert.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -124,7 +125,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; - if (info.props.is_cube) { + if (info.props.is_cube || (info.type == vk::ImageType::e2D && info.resources.layers >= 6)) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; } else if (info.props.is_volume) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; @@ -179,52 +180,132 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, info.guest_size_bytes); } -void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::CommandBuffer cmdbuf) { - if (dst_layout == layout && dst_mask == access_mask) { - return; +boost::container::small_vector Image::GetBarriers( + vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::PipelineStageFlags2 dst_stage, std::optional subres_range) { + const bool needs_partial_transition = + subres_range && + (subres_range->base != SubresourceBase{} || subres_range->extent != info.resources); + const bool partially_transited = !subresource_states.empty(); + + boost::container::small_vector barriers{}; + if (needs_partial_transition || partially_transited) { + if (!partially_transited) { + subresource_states.resize(info.resources.levels * info.resources.layers); + std::fill(subresource_states.begin(), subresource_states.end(), last_state); + } + + // In case of partial transition, we need to change the specified subresources only. + // Otherwise all subresources need to be set to the same state so we can use a full + // resource transition for the next time. + const auto mips = + needs_partial_transition + ? std::ranges::views::iota(subres_range->base.level, + subres_range->base.level + subres_range->extent.levels) + : std::views::iota(0u, info.resources.levels); + const auto layers = + needs_partial_transition + ? std::ranges::views::iota(subres_range->base.layer, + subres_range->base.layer + subres_range->extent.layers) + : std::views::iota(0u, info.resources.layers); + + for (u32 mip : mips) { + for (u32 layer : layers) { + // NOTE: these loops may produce a lot of small barriers. + // If this becomes a problem, we can optimize it by merging adjacent barriers. + const auto subres_idx = mip * info.resources.layers + layer; + ASSERT(subres_idx < subresource_states.size()); + auto& state = subresource_states[subres_idx]; + + if (state.layout != dst_layout || state.access_mask != dst_mask) { + barriers.emplace_back(vk::ImageMemoryBarrier2{ + .srcStageMask = state.pl_stage, + .srcAccessMask = state.access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_mask, + .oldLayout = state.layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = mip, + .levelCount = 1, + .baseArrayLayer = layer, + .layerCount = 1, + }, + }); + state.layout = dst_layout; + state.access_mask = dst_mask; + state.pl_stage = dst_stage; + } + } + } + + if (!needs_partial_transition) { + subresource_states.clear(); + } + } else { // Full resource transition + if (last_state.layout == dst_layout && last_state.access_mask == dst_mask) { + return {}; + } + + barriers.emplace_back(vk::ImageMemoryBarrier2{ + .srcStageMask = last_state.pl_stage, + .srcAccessMask = last_state.access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_mask, + .oldLayout = last_state.layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); } - const vk::ImageMemoryBarrier barrier = { - .srcAccessMask = access_mask, - .dstAccessMask = dst_mask, - .oldLayout = layout, - .newLayout = dst_layout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; + last_state.layout = dst_layout; + last_state.access_mask = dst_mask; + last_state.pl_stage = dst_stage; + return barriers; +} + +void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + std::optional range, vk::CommandBuffer cmdbuf /*= {}*/) { // Adjust pipieline stage - const vk::PipelineStageFlags dst_pl_stage = - (dst_mask == vk::AccessFlagBits::eTransferRead || - dst_mask == vk::AccessFlagBits::eTransferWrite) - ? vk::PipelineStageFlagBits::eTransfer - : vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader; + const vk::PipelineStageFlags2 dst_pl_stage = + (dst_mask == vk::AccessFlagBits2::eTransferRead || + dst_mask == vk::AccessFlagBits2::eTransferWrite) + ? vk::PipelineStageFlagBits2::eTransfer + : vk::PipelineStageFlagBits2::eAllGraphics | vk::PipelineStageFlagBits2::eComputeShader; + + const auto barriers = GetBarriers(dst_layout, dst_mask, dst_pl_stage, range); + if (barriers.empty()) { + return; + } if (!cmdbuf) { // When using external cmdbuf you are responsible for ending rp. scheduler->EndRendering(); cmdbuf = scheduler->CommandBuffer(); } - cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {}, - barrier); - - layout = dst_layout; - access_mask = dst_mask; - pl_stage = dst_pl_stage; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); } void Image::Upload(vk::Buffer buffer, u64 offset) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); // Copy to the image. const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil @@ -248,12 +329,12 @@ void Image::Upload(vk::Buffer buffer, u64 offset) { cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } void Image::CopyImage(const Image& image) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); @@ -279,15 +360,16 @@ void Image::CopyImage(const Image& image) { .extent = {mip_w, mip_h, mip_d}, }); } - cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, + image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } void Image::CopyMip(const Image& image, u32 mip) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); @@ -313,10 +395,11 @@ void Image::CopyMip(const Image& image, u32 mip) { }, .extent = {mip_w, mip_h, mip_d}, }; - cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, + image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } Image::~Image() = default; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 1bbb975ba..01e6fe8f3 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -5,13 +5,9 @@ #include "common/enum.h" #include "common/types.h" -#include "core/libraries/videoout/buffer.h" -#include "video_core/amdgpu/liverpool.h" -#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view.h" -#include "video_core/texture_cache/types.h" #include @@ -26,12 +22,16 @@ VK_DEFINE_HANDLE(VmaAllocator) namespace VideoCore { enum ImageFlagBits : u32 { - CpuModified = 1 << 2, ///< Contents have been modified from the CPU + CpuDirty = 1 << 1, ///< Contents have been modified from the CPU + GpuDirty = 1 << 2, ///< Contents have been modified from the GPU (valid data in buffer cache) + Dirty = CpuDirty | GpuDirty, GpuModified = 1 << 3, ///< Contents have been modified from the GPU Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered + Bound = 1 << 9, ///< True when the image is bound to a descriptor set + NeedsRebind = 1 << 10, ///< True when the image needs to be rebound }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -91,8 +91,11 @@ struct Image { return image_view_ids[std::distance(image_view_infos.begin(), it)]; } - void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::CommandBuffer cmdbuf = {}); + boost::container::small_vector GetBarriers( + vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::PipelineStageFlags2 dst_stage, std::optional subres_range); + void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + std::optional range, vk::CommandBuffer cmdbuf = {}); void Upload(vk::Buffer buffer, u64 offset); void CopyImage(const Image& image); @@ -103,7 +106,7 @@ struct Image { ImageInfo info; UniqueImage image; vk::ImageAspectFlags aspect_mask = vk::ImageAspectFlagBits::eColor; - ImageFlagBits flags = ImageFlagBits::CpuModified; + ImageFlagBits flags = ImageFlagBits::Dirty; VAddr cpu_addr = 0; VAddr cpu_addr_end = 0; std::vector image_view_infos; @@ -111,10 +114,14 @@ struct Image { // Resource state tracking vk::ImageUsageFlags usage; - vk::Flags pl_stage = vk::PipelineStageFlagBits::eAllCommands; - vk::Flags access_mask = vk::AccessFlagBits::eNone; - vk::ImageLayout layout = vk::ImageLayout::eUndefined; - boost::container::small_vector mip_hashes; + struct State { + vk::Flags pl_stage = vk::PipelineStageFlagBits2::eAllCommands; + vk::Flags access_mask = vk::AccessFlagBits2::eNone; + vk::ImageLayout layout = vk::ImageLayout::eUndefined; + }; + State last_state{}; + std::vector subresource_states{}; + boost::container::small_vector mip_hashes{}; u64 tick_accessed_last{0}; }; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 011e19db8..521e4118f 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -200,18 +200,12 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice mips_layout.emplace_back(depth_slice_sz, pitch, 0); } -ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) noexcept { +ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept { tiling_mode = image.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); // Override format if image is forced to be a depth target - if (force_depth) { - if (pixel_format == vk::Format::eR32Sfloat || pixel_format == vk::Format::eR8Unorm) { - pixel_format = vk::Format::eD32SfloatS8Uint; - } else if (pixel_format == vk::Format::eR16Unorm) { - pixel_format = vk::Format::eD16UnormS8Uint; - } else { - UNREACHABLE(); - } + if (desc.is_depth) { + pixel_format = LiverpoolToVK::PromoteFormatToDepth(pixel_format); } type = ConvertImageType(image.GetType()); props.is_tiled = image.IsTiled(); @@ -224,7 +218,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n size.depth = props.is_volume ? image.depth + 1 : 1; pitch = image.Pitch(); resources.levels = image.NumLevels(); - resources.layers = image.NumLayers(); + resources.layers = image.NumLayers(desc.is_array); num_bits = NumBits(image.GetDataFmt()); usage.texture = true; diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index ba8985b8f..2ae2547f7 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -5,6 +5,7 @@ #include "common/types.h" #include "core/libraries/videoout/buffer.h" +#include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/texture_cache/types.h" @@ -19,7 +20,7 @@ struct ImageInfo { const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; - ImageInfo(const AmdGpu::Image& image, bool force_depth = false) noexcept; + ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; bool IsTiled() const { return tiling_mode != AmdGpu::TilingMode::Display_Linear; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index f94c1a37b..2aad1afb6 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/logging/log.h" +#include "shader_recompiler/info.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -66,19 +67,40 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { return format; } -ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept - : is_storage{is_storage_} { - type = ConvertImageViewType(image.GetType()); +ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept + : is_storage{desc.is_storage} { const auto dfmt = image.GetDataFmt(); auto nfmt = image.GetNumberFmt(); if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) { nfmt = AmdGpu::NumberFormat::Unorm; } format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt); + if (desc.is_depth) { + format = Vulkan::LiverpoolToVK::PromoteFormatToDepth(format); + } range.base.level = image.base_level; range.base.layer = image.base_array; - range.extent.levels = image.last_level + 1; - range.extent.layers = image.last_array + 1; + range.extent.levels = image.last_level - image.base_level + 1; + range.extent.layers = image.last_array - image.base_array + 1; + type = ConvertImageViewType(image.GetType()); + + // Adjust view type for partial cubemaps and arrays + if (image.IsPartialCubemap()) { + type = vk::ImageViewType::e2DArray; + } + if (type == vk::ImageViewType::eCube) { + if (desc.is_array) { + type = vk::ImageViewType::eCubeArray; + } else { + // Some games try to bind an array of cubemaps while shader reads only single one. + range.extent.layers = std::min(range.extent.layers, 6u); + } + } + if (type == vk::ImageViewType::e3D && range.extent.layers > 1) { + // Some games pass incorrect layer count for 3D textures so we need to fixup it. + range.extent.layers = 1; + } + if (!is_storage) { mapping.r = ConvertComponentSwizzle(image.dst_sel_x); mapping.g = ConvertComponentSwizzle(image.dst_sel_y); @@ -103,7 +125,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, const auto base_format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); range.base.layer = col_buffer.view.slice_start; - range.extent.layers = col_buffer.NumSlices(); + range.extent.layers = col_buffer.NumSlices() - range.base.layer; format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); } @@ -115,7 +137,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, depth_buffer.stencil_info.format); is_storage = ctl.depth_write_enable; range.base.layer = view.slice_start; - range.extent.layers = view.NumSlices(); + range.extent.layers = view.NumSlices() - range.base.layer; } ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, @@ -147,9 +169,9 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .subresourceRange{ .aspectMask = aspect, .baseMipLevel = info.range.base.level, - .levelCount = info.range.extent.levels - info.range.base.level, + .levelCount = info.range.extent.levels, .baseArrayLayer = info.range.base.layer, - .layerCount = info.range.extent.layers - info.range.base.layer, + .layerCount = info.range.extent.layers, }, }; image_view = instance.GetDevice().createImageViewUnique(image_view_ci); diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 7d53590dd..ba8d2c72b 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -3,6 +3,7 @@ #pragma once +#include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -17,7 +18,7 @@ namespace VideoCore { struct ImageViewInfo { ImageViewInfo() = default; - ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; + ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept; ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 8621e95f5..4813a3c57 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -47,24 +47,23 @@ void TextureCache::InvalidateMemory(VAddr address, size_t size) { std::scoped_lock lock{mutex}; ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { // Ensure image is reuploaded when accessed again. - image.flags |= ImageFlagBits::CpuModified; + image.flags |= ImageFlagBits::CpuDirty; // Untrack image, so the range is unprotected and the guest can write freely. UntrackImage(image_id); }); } -void TextureCache::MarkWritten(VAddr address, size_t max_size) { - static constexpr FindFlags find_flags = - FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; - ImageInfo info{}; - info.guest_address = address; - info.guest_size_bytes = max_size; - const ImageId image_id = FindImage(info, find_flags); - if (!image_id) { - return; - } - // Ensure image is copied when accessed again. - slot_images[image_id].flags |= ImageFlagBits::CpuModified; +void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) { + std::scoped_lock lock{mutex}; + ForEachImageInRegion(address, max_size, [&](ImageId image_id, Image& image) { + // Only consider images that match base address. + // TODO: Maybe also consider subresources + if (image.info.guest_address != address) { + return; + } + // Ensure image is reuploaded when accessed again. + image.flags |= ImageFlagBits::GpuDirty; + }); } void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { @@ -87,8 +86,7 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Image auto new_image_id = slot_images.insert(instance, scheduler, requested_info); RegisterImage(new_image_id); - // auto& new_image = slot_images[new_image_id]; - // TODO: need to run a helper for depth copy here + // TODO: perform a depth copy here FreeImage(cache_image_id); return new_image_id; @@ -98,7 +96,11 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Image !requested_info.usage.depth_target && (requested_info.usage.texture || requested_info.usage.storage); if (cache_info.usage.depth_target && should_bind_as_texture) { - return cache_image_id; + if (cache_info.resources == requested_info.resources) { + return cache_image_id; + } else { + UNREACHABLE(); + } } return {}; @@ -154,7 +156,7 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_ if (tex_cache_image.info.IsMipOf(image_info)) { tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, - vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eTransferRead, {}); const auto num_mips_to_copy = tex_cache_image.info.resources.levels; ASSERT(num_mips_to_copy == 1); @@ -176,13 +178,17 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { auto& src_image = slot_images[image_id]; auto& new_image = slot_images[new_image_id]; - src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); new_image.CopyImage(src_image); + if (True(src_image.flags & ImageFlagBits::Bound)) { + src_image.flags |= ImageFlagBits::NeedsRebind; + } + FreeImage(image_id); TrackImage(new_image_id); - new_image.flags &= ~ImageFlagBits::CpuModified; + new_image.flags &= ~ImageFlagBits::Dirty; return new_image_id; } @@ -255,21 +261,21 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo return slot_image_views[view_id]; } -ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { - const ImageId image_id = FindImage(info); +ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) { Image& image = slot_images[image_id]; UpdateImage(image_id); auto& usage = image.info.usage; if (view_info.is_storage) { image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite, + view_info.range); usage.storage = true; } else { const auto new_layout = image.info.IsDepthStencil() ? vk::ImageLayout::eDepthStencilReadOnlyOptimal : vk::ImageLayout::eShaderReadOnlyOptimal; - image.Transit(new_layout, vk::AccessFlagBits::eShaderRead); + image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead, view_info.range); usage.texture = true; } @@ -284,8 +290,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, UpdateImage(image_id); image.Transit(vk::ImageLayout::eColorAttachmentOptimal, - vk::AccessFlagBits::eColorAttachmentWrite | - vk::AccessFlagBits::eColorAttachmentRead); + vk::AccessFlagBits2::eColorAttachmentWrite | + vk::AccessFlagBits2::eColorAttachmentRead, + view_info.range); // Register meta data for this color buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { @@ -317,7 +324,7 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; - image.flags &= ~ImageFlagBits::CpuModified; + image.flags &= ~ImageFlagBits::Dirty; image.aspect_mask = vk::ImageAspectFlagBits::eDepth; const bool has_stencil = image_info.usage.stencil; @@ -330,8 +337,10 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, : vk::ImageLayout::eDepthAttachmentOptimal : has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal : vk::ImageLayout::eDepthReadOnlyOptimal; - image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite | - vk::AccessFlagBits::eDepthStencilAttachmentRead); + image.Transit(new_layout, + vk::AccessFlagBits2::eDepthStencilAttachmentWrite | + vk::AccessFlagBits2::eDepthStencilAttachmentRead, + view_info.range); // Register meta data for this depth buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { @@ -352,11 +361,9 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, } void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { - if (False(image.flags & ImageFlagBits::CpuModified)) { + if (False(image.flags & ImageFlagBits::Dirty)) { return; } - // Mark image as validated. - image.flags &= ~ImageFlagBits::CpuModified; const auto& num_layers = image.info.resources.layers; const auto& num_mips = image.info.resources.levels; @@ -370,9 +377,10 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - // Protect GPU modified resources from accidental reuploads. - if (True(image.flags & ImageFlagBits::GpuModified) && - !buffer_cache.IsRegionGpuModified(image.info.guest_address + mip_ofs, mip_size)) { + // Protect GPU modified resources from accidental CPU reuploads. + const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified); + const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty); + if (is_gpu_modified && !is_gpu_dirty) { const u8* addr = std::bit_cast(image.info.guest_address); const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size); if (image.mip_hashes[m] == hash) { @@ -404,7 +412,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule sched_ptr->EndRendering(); const auto cmdbuf = sched_ptr->CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}, + cmdbuf); const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes; @@ -427,6 +436,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule } cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + image.flags &= ~ImageFlagBits::Dirty; } vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 329128a3c..3bbfd952c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -51,7 +51,7 @@ public: void InvalidateMemory(VAddr address, size_t size); /// Marks an image as dirty if it exists at the provided address. - void MarkWritten(VAddr address, size_t max_size); + void InvalidateMemoryFromGPU(VAddr address, size_t max_size); /// Evicts any images that overlap the unmapped range. void UnmapMemory(VAddr cpu_addr, size_t size); @@ -59,9 +59,8 @@ public: /// Retrieves the image handle of the image with the provided attributes. [[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {}); - /// Retrieves an image view with the properties of the specified image descriptor. - [[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info, - const ImageViewInfo& view_info); + /// Retrieves an image view with the properties of the specified image id. + [[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info); /// Retrieves the render target with specified properties [[nodiscard]] ImageView& FindRenderTarget(const ImageInfo& image_info,