diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index e46401cb7..1a2a6eff6 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -43,10 +43,10 @@ jobs: mv ${{github.workspace}}/build/shadps4 upload cp $(arch -x86_64 /usr/local/bin/brew --prefix)/opt/molten-vk/lib/libMoltenVK.dylib upload install_name_tool -add_rpath "@loader_path" upload/shadps4 - tar cf shadps4-macos.tar.gz -C upload . + tar cf shadps4-macos-sdl.tar.gz -C upload . - name: Upload executable uses: actions/upload-artifact@v4 with: - name: shadps4-macos - path: shadps4-macos.tar.gz + name: shadps4-macos-sdl + path: shadps4-macos-sdl.tar.gz diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 46dc13a89..499124863 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -29,6 +29,6 @@ jobs: - name: Upload executable uses: actions/upload-artifact@v4 with: - name: shadps4-win64 + name: shadps4-win64-sdl path: | ${{github.workspace}}/build/Release/shadPS4.exe diff --git a/.gitmodules b/.gitmodules index 3a9d8f42f..60fb5fbbf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -61,3 +61,6 @@ [submodule "externals/date"] path = externals/date url = https://github.com/HowardHinnant/date.git +[submodule "externals/ffmpeg-core"] + path = externals/ffmpeg-core + url = https://github.com/shadps4-emu/ext-ffmpeg-core diff --git a/.reuse/dep5 b/.reuse/dep5 index a80001f84..0140c0c02 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -15,6 +15,7 @@ Files: CMakeSettings.json documents/Screenshots/Undertale.png documents/Screenshots/We are DOOMED.png scripts/ps4_names.txt + src/images/about_icon.png src/images/controller_icon.png src/images/exit_icon.png src/images/file_icon.png diff --git a/CMakeLists.txt b/CMakeLists.txt index bc30f6c98..6c36462ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,11 +67,13 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/common/scm_rev.cpp.in" "${CMAKE_ find_package(Boost 1.84.0 CONFIG) find_package(cryptopp 8.9.0 MODULE) +find_package(FFmpeg 5.1.2 MODULE) find_package(fmt 10.2.1 CONFIG) find_package(glslang 14.2.0 CONFIG) find_package(magic_enum 0.9.6 CONFIG) +find_package(RenderDoc 1.6.0 MODULE) find_package(SDL3 3.1.2 CONFIG) -find_package(toml11 3.8.1 CONFIG) +find_package(toml11 4.2.0 CONFIG) find_package(tsl-robin-map 1.3.0 CONFIG) find_package(VulkanHeaders 1.3.289 CONFIG) find_package(VulkanMemoryAllocator 3.1.0 CONFIG) @@ -79,7 +81,6 @@ find_package(xbyak 7.07 CONFIG) find_package(xxHash 0.8.2 MODULE) find_package(zlib-ng 2.2.0 MODULE) find_package(Zydis 4.1.0 CONFIG) -find_package(RenderDoc MODULE) if (APPLE) find_package(date 3.0.1 CONFIG) @@ -96,6 +97,15 @@ if(HAVE_SEM_TIMEDWAIT OR WIN32) add_compile_options(-DHAVE_SEM_TIMEDWAIT) endif() +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") + # libc++ requires -fexperimental-library to enable std::jthread and std::stop_token support. + include(CheckCXXSymbolExists) + check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP) + if(LIBCPP) + add_compile_options(-fexperimental-library) + endif() +endif() + add_subdirectory(externals) include_directories(src) @@ -107,7 +117,7 @@ if(ENABLE_QT_GUI) set(CMAKE_AUTOUIC ON) endif() -set(AUDIO_CORE src/audio_core/sdl_audio.cpp +set(AUDIO_CORE src/audio_core/sdl_audio.cpp src/audio_core/sdl_audio.h ) @@ -117,6 +127,8 @@ set(AUDIO_LIB src/core/libraries/audio/audioin.cpp src/core/libraries/audio/audioout.h src/core/libraries/ajm/ajm.cpp src/core/libraries/ajm/ajm.h + src/core/libraries/ngs2/ngs2.cpp + src/core/libraries/ngs2/ngs2.h ) set(GNM_LIB src/core/libraries/gnmdriver/gnmdriver.cpp @@ -181,11 +193,27 @@ set(SYSTEM_LIBS src/core/libraries/system/commondialog.cpp src/core/libraries/app_content/app_content.h src/core/libraries/rtc/rtc.cpp src/core/libraries/rtc/rtc.h + src/core/libraries/rtc/rtc_error.h src/core/libraries/disc_map/disc_map.cpp src/core/libraries/disc_map/disc_map.h src/core/libraries/disc_map/disc_map_codes.h + src/core/libraries/avplayer/avplayer_common.cpp + src/core/libraries/avplayer/avplayer_common.h + src/core/libraries/avplayer/avplayer_file_streamer.cpp + src/core/libraries/avplayer/avplayer_file_streamer.h + src/core/libraries/avplayer/avplayer_impl.cpp + src/core/libraries/avplayer/avplayer_impl.h + src/core/libraries/avplayer/avplayer_source.cpp + src/core/libraries/avplayer/avplayer_source.h + src/core/libraries/avplayer/avplayer_state.cpp + src/core/libraries/avplayer/avplayer_state.h src/core/libraries/avplayer/avplayer.cpp src/core/libraries/avplayer/avplayer.h + src/core/libraries/ngs2/ngs2.cpp + src/core/libraries/ngs2/ngs2.h + src/core/libraries/ngs2/ngs2_error.h + src/core/libraries/ngs2/ngs2_impl.cpp + src/core/libraries/ngs2/ngs2_impl.h ) set(VIDEOOUT_LIB src/core/libraries/videoout/buffer.h @@ -421,6 +449,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h + src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -528,34 +557,36 @@ set(EMULATOR src/emulator.cpp if(ENABLE_QT_GUI) qt_add_resources(RESOURCE_FILES src/shadps4.qrc) -set(QT_GUI - src/qt_gui/main_window_ui.h - src/qt_gui/main_window.cpp - src/qt_gui/main_window.h - src/qt_gui/gui_context_menus.h - src/qt_gui/game_list_utils.h - src/qt_gui/game_info.cpp - src/qt_gui/game_info.h - src/qt_gui/game_list_frame.cpp - src/qt_gui/game_list_frame.h - src/qt_gui/game_grid_frame.cpp - src/qt_gui/game_grid_frame.h - src/qt_gui/game_install_dialog.cpp - src/qt_gui/game_install_dialog.h - src/qt_gui/pkg_viewer.cpp - src/qt_gui/pkg_viewer.h - src/qt_gui/trophy_viewer.cpp - src/qt_gui/trophy_viewer.h - src/qt_gui/elf_viewer.cpp - src/qt_gui/elf_viewer.h - src/qt_gui/main_window_themes.cpp - src/qt_gui/main_window_themes.h - src/qt_gui/settings_dialog.cpp - src/qt_gui/settings_dialog.h - src/qt_gui/settings_dialog.ui - src/qt_gui/main.cpp - ${EMULATOR} - ${RESOURCE_FILES} +set(QT_GUI src/qt_gui/about_dialog.cpp + src/qt_gui/about_dialog.h + src/qt_gui/about_dialog.ui + src/qt_gui/main_window_ui.h + src/qt_gui/main_window.cpp + src/qt_gui/main_window.h + src/qt_gui/gui_context_menus.h + src/qt_gui/game_list_utils.h + src/qt_gui/game_info.cpp + src/qt_gui/game_info.h + src/qt_gui/game_list_frame.cpp + src/qt_gui/game_list_frame.h + src/qt_gui/game_grid_frame.cpp + src/qt_gui/game_grid_frame.h + src/qt_gui/game_install_dialog.cpp + src/qt_gui/game_install_dialog.h + src/qt_gui/pkg_viewer.cpp + src/qt_gui/pkg_viewer.h + src/qt_gui/trophy_viewer.cpp + src/qt_gui/trophy_viewer.h + src/qt_gui/elf_viewer.cpp + src/qt_gui/elf_viewer.h + src/qt_gui/main_window_themes.cpp + src/qt_gui/main_window_themes.h + src/qt_gui/settings_dialog.cpp + src/qt_gui/settings_dialog.h + src/qt_gui/settings_dialog.ui + src/qt_gui/main.cpp + ${EMULATOR} + ${RESOURCE_FILES} ) endif() @@ -594,7 +625,7 @@ endif() create_target_directory_groups(shadps4) -target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API) +target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg) target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3) if (APPLE) @@ -624,23 +655,40 @@ if (ENABLE_QT_GUI) endif() if (WIN32) - target_link_libraries(shadps4 PRIVATE mincore winpthreads clang_rt.builtins-x86_64.lib) - add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS) + target_link_libraries(shadps4 PRIVATE mincore winpthreads) + + if (MSVC) + # MSVC likes putting opinions on what people can use, disable: + add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS) + endif() + add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN) + if (MSVC) # Needed for conflicts with time.h of windows.h add_definitions(-D_TIMESPEC_DEFINED) endif() + # Target Windows 10 RS5 add_definitions(-DNTDDI_VERSION=0x0A000006 -D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00) - # Increase stack commit area - target_link_options(shadps4 PRIVATE /STACK:0x200000,0x200000) + + if (MSVC) + target_link_libraries(shadps4 PRIVATE clang_rt.builtins-x86_64.lib) + endif() + # Disable ASLR so we can reserve the user area if (MSVC) target_link_options(shadps4 PRIVATE /DYNAMICBASE:NO) else() target_link_options(shadps4 PRIVATE -Wl,--disable-dynamicbase) endif() + + # Increase stack commit area (Needed, otherwise there are crashes) + if (MSVC) + target_link_options(shadps4 PRIVATE /STACK:0x200000,0x200000) + else() + target_link_options(shadps4 PRIVATE -Wl,--stack,2097152) + endif() endif() if (WIN32) diff --git a/cmake/FindFFmpeg.cmake b/cmake/FindFFmpeg.cmake new file mode 100644 index 000000000..9c45844b7 --- /dev/null +++ b/cmake/FindFFmpeg.cmake @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +find_package(PkgConfig QUIET) +pkg_check_modules(FFMPEG QUIET IMPORTED_TARGET libavcodec libavfilter libavformat libavutil libswresample libswscale) + +find_file(FFMPEG_VERSION_FILE libavutil/ffversion.h HINTS "${FFMPEG_libavutil_INCLUDEDIR}") +if (FFMPEG_VERSION_FILE) + file(STRINGS "${FFMPEG_VERSION_FILE}" FFMPEG_VERSION_LINE REGEX "FFMPEG_VERSION") + string(REGEX MATCH "[0-9.]+" FFMPEG_VERSION "${FFMPEG_VERSION_LINE}") + unset(FFMPEG_VERSION_LINE) + unset(FFMPEG_VERSION_FILE) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FFmpeg + REQUIRED_VARS FFMPEG_LINK_LIBRARIES + VERSION_VAR FFMPEG_VERSION +) + +if (FFmpeg_FOUND AND NOT TARGET FFmpeg::ffmpeg) + add_library(FFmpeg::ffmpeg ALIAS PkgConfig::FFMPEG) +endif() diff --git a/cmake/FindRenderDoc.cmake b/cmake/FindRenderDoc.cmake new file mode 100644 index 000000000..e4cf8a6df --- /dev/null +++ b/cmake/FindRenderDoc.cmake @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +find_path(RENDERDOC_INCLUDE_DIR renderdoc_app.h) + +if (RENDERDOC_INCLUDE_DIR AND EXISTS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h") + file(STRINGS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h" RENDERDOC_VERSION_LINE REGEX "typedef struct RENDERDOC_API") + string(REGEX REPLACE ".*typedef struct RENDERDOC_API_([0-9]+)_([0-9]+)_([0-9]+).*" "\\1.\\2.\\3" RENDERDOC_VERSION "${RENDERDOC_VERSION_LINE}") + unset(RENDERDOC_VERSION_LINE) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(RenderDoc + REQUIRED_VARS RENDERDOC_INCLUDE_DIR + VERSION_VAR RENDERDOC_VERSION +) + +if (RenderDoc_FOUND AND NOT TARGET RenderDoc::API) + add_library(RenderDoc::API INTERFACE IMPORTED) + set_target_properties(RenderDoc::API PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${RENDERDOC_INCLUDE_DIR}" + ) +endif() + +mark_as_advanced(RENDERDOC_INCLUDE_DIR) diff --git a/documents/building-windows.md b/documents/building-windows.md index e00ed90d9..684e5fe95 100644 --- a/documents/building-windows.md +++ b/documents/building-windows.md @@ -5,21 +5,77 @@ SPDX-License-Identifier: GPL-2.0-or-later # Build shadPS4 for Windows -## Download Visual Studio Community 2022 +This tutorial reads as if you have none of the prerequisites already installed. If you do, just ignore the steps regarding installation. +If you are building to contribute to the project, please omit `--depth 1` from the git invokations. -Download link: [**Visual Studio 2022**](https://visualstudio.microsoft.com/vs/) +Note: **ARM64 is not supported!** As of writing, it will not build nor run. The instructions with respect to ARM64 are for developers only. -## Requirements +## Option 1: Visual Studio 2022 -### From Visual Studio Community +### (Prerequisite) Download the Community edition from [**Visual Studio 2022**](https://visualstudio.microsoft.com/vs/) -- Desktop development with C++ +Once you are within the installer: +1. Select `Desktop development with C++` +2. Go to "Individual Components" tab +3. Make sure `C++ Clang Compiler for Windows`, `MSBuild support for LLVM` and `C++ CMake Tools for Windows` are selected +4. Continue the installation -### From individual components tab install +### (Prerequisite) Download [**Qt**](https://doc.qt.io/qt-6/get-and-install-qt.html) -- C++ Clang Compiler for Windows (17.0.3) -- MSBuild support for LLVM (Clang-cl) toolset +Beware, this requires you to create a Qt account. If you do not want to do this, please follow the MSYS2/MinGW compilation method instead. -- ## Compiling +1. Select Qt for Visual Studio plugin +2. Select `msvc2019_64` option or similar. If you are on Windows on ARM / Qualcomm Snapdragon Elite X, select `msvc2019_arm64` -- Open Visual Studio Community and select the **x64-Clang-Release**, **x64-Clang-Debug** or **x64-Clang-RelWithDebInfo**. It should compile just fine. +Go through the installation normally. If you do not know what components to select, just select the newest Qt version it gives you. +If you know what you are doing, you may unselect individual components that eat up too much disk space. + +Once you are finished, you will have to configure Qt within Visual Studio: +1. Tools -> Options -> Qt -> Versions +2. Add a new Qt version and navigate it to the correct folder. Should look like so: `C:\Qt\6.7.1\msvc2019_64` +3. Enable the default checkmark on the new version you just created. + +### (Prerequisite) Download [**Git for Windows**](https://git-scm.com/download/win) + +Go through the Git for Windows installation as normal + +### Compiling with Visual Studio GUI + +1. Open Git for Windows, navigate to a place where you want to store the shadPS4 source code folder +2. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` +3. Open up Visual Studio, select `Open a local folder` and select the folder with the shadPS4 source code. The folder should contain `CMakeLists.txt` +4. Build -> Build All + +## Option 2: MSYS2/MinGW + +### (Prerequisite) Download [**MSYS2**](https://www.msys2.org/) + +Go through the MSYS2 installation as normal + +If you are building to distribute, please omit `-DCMAKE_CXX_FLAGS="-O2 -march=native"` within the build configuration step. + +Normal x86-based computers, follow: +1. Open "MSYS2 MINGW64" from your new applications +2. Run `pacman -Syu`, let it complete; +3. Run `pacman -S --needed git mingw-w64-x86_64-binutils mingw-w64-x86_64-clang mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja mingw-w64-x86_64-qt6-base` +4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` +5. Run `cd shadPS4` +6. Run `cmake -S . -B build -DCMAKE_CXX_COMPILER="clang++.exe" -DCMAKE_C_COMPILER="clang.exe" -DCMAKE_CXX_FLAGS="-O2 -march=native"` +7. Run `cmake --build build` +8. To run the finished product, run `./build/shadPS4.exe` + +ARM64-based computers, follow: +1. Open "MSYS2 CLANGARM64" from your new applications +2. Run `pacman -Syu`, let it complete; +3. Run `pacman -S --needed git mingw-w64-clang-aarch64-binutils mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-cmake mingw-w64-clang-aarch64-ninja mingw-w64-clang-aarch64-qt6-base` +4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` +5. Run `cd shadPS4` +6. Run `cmake -S . -B build -DCMAKE_CXX_COMPILER="clang++.exe" -DCMAKE_C_COMPILER="clang.exe" -DCMAKE_CXX_FLAGS="-O2 -march=native"` +7. Run `cmake --build build` +8. To run the finished product, run `./build/shadPS4.exe` + +## Note on MSYS2 builds + +These builds may not be easily copyable to people who do not also have a MSYS2 installation. +If you want to distribute these builds, you need to copy over the correct DLLs into a distribution folder. +In order to run them, you must be within the MSYS2 shell environment. \ No newline at end of file diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 9ebdd8783..0b19034d6 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -47,6 +47,12 @@ else() endif() endif() +if (NOT TARGET FFmpeg::ffmpeg) + set(ARCHITECTURE "x86_64") + add_subdirectory(ffmpeg-core) + add_library(FFmpeg::ffmpeg ALIAS ffmpeg) +endif() + # Zlib-Ng if (NOT TARGET zlib-ng::zlib) set(ZLIB_ENABLE_TESTS OFF) diff --git a/externals/ext-boost b/externals/ext-boost index 147b2de77..a04136add 160000 --- a/externals/ext-boost +++ b/externals/ext-boost @@ -1 +1 @@ -Subproject commit 147b2de7734f5dc3b9aeb1f4135ae15fcd44b9d7 +Subproject commit a04136add1e469f46d8ae8d3e8307779240a5c53 diff --git a/externals/ffmpeg-core b/externals/ffmpeg-core new file mode 160000 index 000000000..e30b7d7fe --- /dev/null +++ b/externals/ffmpeg-core @@ -0,0 +1 @@ +Subproject commit e30b7d7fe228bfb3f6e41ce1040b44a15eb7d5e0 diff --git a/externals/glslang b/externals/glslang index 7c4d91e78..d59c84d38 160000 --- a/externals/glslang +++ b/externals/glslang @@ -1 +1 @@ -Subproject commit 7c4d91e7819a1d27213aa3499953d54ae1a00e8f +Subproject commit d59c84d388c805022e2bddea08aa41cbe7e43e55 diff --git a/externals/toml11 b/externals/toml11 index fcb1d3d7e..cc0bee4fd 160000 --- a/externals/toml11 +++ b/externals/toml11 @@ -1 +1 @@ -Subproject commit fcb1d3d7e5885edfadbbe9572991dc4b3248af58 +Subproject commit cc0bee4fd46ea1f5db147d63ea545208cc9e8405 diff --git a/externals/vma b/externals/vma index 871913da6..e1bdbca9b 160000 --- a/externals/vma +++ b/externals/vma @@ -1 +1 @@ -Subproject commit 871913da6a4b132b567d7b65c509600363c0041e +Subproject commit e1bdbca9baf4d682fb6066b380f4aa4a7bdbb58a diff --git a/externals/vulkan-headers b/externals/vulkan-headers index 595c8d479..d205aff40 160000 --- a/externals/vulkan-headers +++ b/externals/vulkan-headers @@ -1 +1 @@ -Subproject commit 595c8d4794410a4e64b98dc58d27c0310d7ea2fd +Subproject commit d205aff40b4e15d4c568523ee6a26f85138126d9 diff --git a/externals/xxhash b/externals/xxhash index ee65ff988..dbea33e47 160000 --- a/externals/xxhash +++ b/externals/xxhash @@ -1 +1 @@ -Subproject commit ee65ff988bab34a184c700e2fbe1e1c5bc27485d +Subproject commit dbea33e47e7c0fe0b7c8592cd931c7430c1f130d diff --git a/externals/zydis b/externals/zydis index 16c6a369c..bd73bc03b 160000 --- a/externals/zydis +++ b/externals/zydis @@ -1 +1 @@ -Subproject commit 16c6a369c193981e9cf314126589eaa8763f92c3 +Subproject commit bd73bc03b0aacaa89c9c203b9b43cd08f1b1843b diff --git a/src/audio_core/sdl_audio.cpp b/src/audio_core/sdl_audio.cpp index 141d338e8..f544c52f9 100644 --- a/src/audio_core/sdl_audio.cpp +++ b/src/audio_core/sdl_audio.cpp @@ -1,19 +1,23 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "sdl_audio.h" + +#include "common/assert.h" +#include "core/libraries/error_codes.h" + #include #include #include -#include "common/assert.h" -#include "core/libraries/error_codes.h" -#include "sdl_audio.h" + +#include // std::unique_lock namespace Audio { int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq, Libraries::AudioOut::OrbisAudioOutParamFormat format) { using Libraries::AudioOut::OrbisAudioOutParamFormat; - std::scoped_lock lock{m_mutex}; + std::unique_lock lock{m_mutex}; for (int id = 0; id < portsOut.size(); id++) { auto& port = portsOut[id]; if (!port.isOpen) { @@ -88,7 +92,7 @@ int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq, } s32 SDLAudio::AudioOutOutput(s32 handle, const void* ptr) { - std::scoped_lock lock{m_mutex}; + std::shared_lock lock{m_mutex}; auto& port = portsOut[handle - 1]; if (!port.isOpen) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; @@ -100,7 +104,7 @@ s32 SDLAudio::AudioOutOutput(s32 handle, const void* ptr) { int result = SDL_PutAudioStreamData(port.stream, ptr, port.samples_num * port.sample_size * port.channels_num); // TODO find a correct value 8192 is estimated - while (SDL_GetAudioStreamAvailable(port.stream) > 8192) { + while (SDL_GetAudioStreamAvailable(port.stream) > 65536) { SDL_Delay(0); } @@ -109,7 +113,7 @@ s32 SDLAudio::AudioOutOutput(s32 handle, const void* ptr) { bool SDLAudio::AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume) { using Libraries::AudioOut::OrbisAudioOutParamFormat; - std::scoped_lock lock{m_mutex}; + std::shared_lock lock{m_mutex}; auto& port = portsOut[handle - 1]; if (!port.isOpen) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; @@ -147,7 +151,7 @@ bool SDLAudio::AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume) { } bool SDLAudio::AudioOutGetStatus(s32 handle, int* type, int* channels_num) { - std::scoped_lock lock{m_mutex}; + std::shared_lock lock{m_mutex}; auto& port = portsOut[handle - 1]; *type = port.type; *channels_num = port.channels_num; diff --git a/src/audio_core/sdl_audio.h b/src/audio_core/sdl_audio.h index d20c44559..7844bd61b 100644 --- a/src/audio_core/sdl_audio.h +++ b/src/audio_core/sdl_audio.h @@ -3,7 +3,7 @@ #pragma once -#include +#include #include #include "core/libraries/audio/audioout.h" @@ -32,7 +32,7 @@ private: int volume[8] = {}; SDL_AudioStream* stream = nullptr; }; - std::mutex m_mutex; + std::shared_mutex m_mutex; std::array portsOut; // main up to 8 ports , BGM 1 port , voice up to 4 ports , // personal up to 4 ports , padspk up to 5 ports , aux 1 port }; diff --git a/src/common/config.cpp b/src/common/config.cpp index ebdd9c320..24db6b039 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -16,6 +16,7 @@ static u32 screenHeight = 720; static s32 gpuId = -1; // Vulkan physical device index. Set to negative for auto select static std::string logFilter; static std::string logType = "async"; +static std::string userName = "shadPS4"; static bool isDebugDump = false; static bool isLibc = true; static bool isShowSplash = false; @@ -25,7 +26,9 @@ static bool shouldDumpPM4 = false; static u32 vblankDivider = 1; static bool vkValidation = false; static bool vkValidationSync = false; +static bool vkValidationGpu = false; static bool rdocEnable = false; +static bool rdocMarkersEnable = false; // Gui std::string settings_install_dir = ""; u32 main_window_geometry_x = 400; @@ -78,6 +81,10 @@ std::string getLogType() { return logType; } +std::string getUserName() { + return userName; +} + bool debugDump() { return isDebugDump; } @@ -102,6 +109,10 @@ bool isRdocEnabled() { return rdocEnable; } +bool isMarkersEnabled() { + return rdocMarkersEnable; +} + u32 vblankDiv() { return vblankDivider; } @@ -114,6 +125,14 @@ bool vkValidationSyncEnabled() { return vkValidationSync; } +bool vkValidationGpuEnabled() { + return vkValidationGpu; +} + +void setGpuId(s32 selectedGpuId) { + gpuId = selectedGpuId; +} + void setScreenWidth(u32 width) { screenWidth = width; } @@ -178,6 +197,10 @@ void setLogFilter(std::string type) { logFilter = type; } +void setUserName(std::string type) { + userName = type; +} + void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h) { main_window_geometry_x = x; main_window_geometry_y = y; @@ -299,6 +322,7 @@ void load(const std::filesystem::path& path) { isFullscreen = toml::find_or(general, "Fullscreen", false); logFilter = toml::find_or(general, "logFilter", ""); logType = toml::find_or(general, "logType", "sync"); + userName = toml::find_or(general, "userName", "shadPS4"); isShowSplash = toml::find_or(general, "showSplash", true); } @@ -319,7 +343,9 @@ void load(const std::filesystem::path& path) { gpuId = toml::find_or(vk, "gpuId", -1); vkValidation = toml::find_or(vk, "validation", false); vkValidationSync = toml::find_or(vk, "validation_sync", false); + vkValidationGpu = toml::find_or(vk, "validation_gpu", true); rdocEnable = toml::find_or(vk, "rdocEnable", false); + rdocMarkersEnable = toml::find_or(vk, "rdocMarkersEnable", false); } if (data.contains("Debug")) { @@ -384,6 +410,7 @@ void save(const std::filesystem::path& path) { data["General"]["Fullscreen"] = isFullscreen; data["General"]["logFilter"] = logFilter; data["General"]["logType"] = logType; + data["General"]["userName"] = userName; data["General"]["showSplash"] = isShowSplash; data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenHeight"] = screenHeight; @@ -394,7 +421,9 @@ void save(const std::filesystem::path& path) { data["Vulkan"]["gpuId"] = gpuId; data["Vulkan"]["validation"] = vkValidation; data["Vulkan"]["validation_sync"] = vkValidationSync; + data["Vulkan"]["validation_gpu"] = vkValidationGpu; data["Vulkan"]["rdocEnable"] = rdocEnable; + data["Vulkan"]["rdocMarkersEnable"] = rdocMarkersEnable; data["Debug"]["DebugDump"] = isDebugDump; data["LLE"]["libc"] = isLibc; data["GUI"]["theme"] = mw_themes; @@ -428,6 +457,7 @@ void setDefaultValues() { screenHeight = 720; logFilter = ""; logType = "async"; + userName = "shadPS4"; isDebugDump = false; isShowSplash = false; isNullGpu = false; @@ -437,6 +467,7 @@ void setDefaultValues() { vkValidation = false; rdocEnable = false; m_language = 1; + gpuId = -1; } } // namespace Config diff --git a/src/common/config.h b/src/common/config.h index ad0aad221..3006f2e2a 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -15,6 +15,7 @@ bool isNeoMode(); bool isFullscreenMode(); std::string getLogFilter(); std::string getLogType(); +std::string getUserName(); u32 getScreenWidth(); u32 getScreenHeight(); @@ -27,6 +28,7 @@ bool nullGpu(); bool dumpShaders(); bool dumpPM4(); bool isRdocEnabled(); +bool isMarkersEnabled(); u32 vblankDiv(); void setDebugDump(bool enable); @@ -35,11 +37,13 @@ void setNullGpu(bool enable); void setDumpShaders(bool enable); void setDumpPM4(bool enable); void setVblankDiv(u32 value); +void setGpuId(s32 selectedGpuId); void setScreenWidth(u32 width); void setScreenHeight(u32 height); void setFullscreenMode(bool enable); void setLanguage(u32 language); void setNeoMode(bool enable); +void setUserName(std::string type); void setLogType(std::string type); void setLogFilter(std::string type); @@ -50,6 +54,7 @@ void setRdocEnabled(bool enable); bool vkValidationEnabled(); bool vkValidationSyncEnabled(); +bool vkValidationGpuEnabled(); // Gui void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h); diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index a514652d4..ab3468ca0 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -106,12 +106,13 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Lib, DiscMap) \ SUB(Lib, Png) \ SUB(Lib, PlayGo) \ + SUB(Lib, Random) \ SUB(Lib, Usbd) \ SUB(Lib, Ajm) \ SUB(Lib, ErrorDialog) \ SUB(Lib, ImeDialog) \ SUB(Lib, AvPlayer) \ - SUB(Lib, Random) \ + SUB(Lib, Ngs2) \ CLS(Frontend) \ CLS(Render) \ SUB(Render, Vulkan) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index dccb838a5..dd2376ea6 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -79,6 +79,7 @@ enum class Class : u8 { Lib_ErrorDialog, ///< The LibSceErrorDialog implementation. Lib_ImeDialog, ///< The LibSceImeDialog implementation. Lib_AvPlayer, ///< The LibSceAvPlayer implementation. + Lib_Ngs2, ///< The LibSceNgs2 implementation. Frontend, ///< Emulator UI Render, ///< Video Core Render_Vulkan, ///< Vulkan backend diff --git a/src/common/version.h b/src/common/version.h index 92fd18fb2..80de187b0 100644 --- a/src/common/version.h +++ b/src/common/version.h @@ -8,7 +8,7 @@ namespace Common { -constexpr char VERSION[] = "0.1.1 WIP"; +constexpr char VERSION[] = "0.2.1 WIP"; constexpr bool isRelease = false; } // namespace Common diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index c3e0d77aa..77d021a6e 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -98,7 +98,7 @@ struct AddressSpace::Impl { backing_handle = CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ, PAGE_READWRITE, SEC_COMMIT, BackingSize, nullptr, nullptr, 0); - ASSERT(backing_handle); + ASSERT_MSG(backing_handle, "{}", Common::GetLastErrorMsg()); // Allocate a virtual memory for the backing file map as placeholder backing_base = static_cast(VirtualAlloc2(process, nullptr, BackingSize, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, @@ -106,7 +106,7 @@ struct AddressSpace::Impl { // Map backing placeholder. This will commit the pages void* const ret = MapViewOfFile3(backing_handle, process, backing_base, 0, BackingSize, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0); - ASSERT(ret == backing_base); + ASSERT_MSG(ret == backing_base, "{}", Common::GetLastErrorMsg()); } ~Impl() { @@ -454,8 +454,28 @@ void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 #endif } -void AddressSpace::Unmap(VAddr virtual_addr, size_t size, bool has_backing) { - return impl->Unmap(virtual_addr, size, has_backing); +void AddressSpace::Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma, + PAddr phys_base, bool is_exec, bool has_backing) { +#ifdef _WIN32 + // There does not appear to be comparable support for partial unmapping on Windows. + // Unfortunately, a least one title was found to require this. The workaround is to unmap + // the entire allocation and remap the portions outside of the requested unmapping range. + impl->Unmap(virtual_addr, size, has_backing); + + // TODO: Determine if any titles require partial unmapping support for flexible allocations. + ASSERT_MSG(has_backing || (start_in_vma == 0 && end_in_vma == size), + "Partial unmapping of flexible allocations is not supported"); + + if (start_in_vma != 0) { + Map(virtual_addr, start_in_vma, 0, phys_base, is_exec); + } + + if (end_in_vma != size) { + Map(virtual_addr + end_in_vma, size - end_in_vma, 0, phys_base + end_in_vma, is_exec); + } +#else + impl->Unmap(virtual_addr + start_in_vma, end_in_vma - start_in_vma, has_backing); +#endif } void AddressSpace::Protect(VAddr virtual_addr, size_t size, MemoryPermission perms) { diff --git a/src/core/address_space.h b/src/core/address_space.h index 29f74f568..53041bccb 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -91,7 +91,8 @@ public: void* MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot, uintptr_t fd); /// Unmaps specified virtual memory area. - void Unmap(VAddr virtual_addr, size_t size, bool has_backing); + void Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma, + PAddr phys_base, bool is_exec, bool has_backing); void Protect(VAddr virtual_addr, size_t size, MemoryPermission perms); diff --git a/src/core/file_format/pkg.cpp b/src/core/file_format/pkg.cpp index 6d5fb0d4a..336d81019 100644 --- a/src/core/file_format/pkg.cpp +++ b/src/core/file_format/pkg.cpp @@ -350,7 +350,7 @@ bool PKG::Extract(const std::filesystem::path& filepath, const std::filesystem:: return true; } -void PKG::ExtractFiles(const int& index) { +void PKG::ExtractFiles(const int index) { int inode_number = fsTable[index].inode; int inode_type = fsTable[index].type; std::string inode_name = fsTable[index].name; diff --git a/src/core/file_format/pkg.h b/src/core/file_format/pkg.h index 3fef6c1c4..b6b09a191 100644 --- a/src/core/file_format/pkg.h +++ b/src/core/file_format/pkg.h @@ -104,7 +104,7 @@ public: ~PKG(); bool Open(const std::filesystem::path& filepath); - void ExtractFiles(const int& index); + void ExtractFiles(const int index); bool Extract(const std::filesystem::path& filepath, const std::filesystem::path& extract, std::string& failreason); diff --git a/src/core/file_format/playgo_chunk.cpp b/src/core/file_format/playgo_chunk.cpp index 43d8a4ded..a430b1ac1 100644 --- a/src/core/file_format/playgo_chunk.cpp +++ b/src/core/file_format/playgo_chunk.cpp @@ -1,16 +1,75 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "common/io_file.h" - #include "playgo_chunk.h" -bool PlaygoChunk::Open(const std::filesystem::path& filepath) { +bool PlaygoFile::Open(const std::filesystem::path& filepath) { Common::FS::IOFile file(filepath, Common::FS::FileAccessMode::Read); - if (!file.IsOpen()) { - return false; + if (file.IsOpen()) { + file.Read(playgoHeader); + if (LoadChunks(file)) { + return true; + } } - file.Read(playgoHeader); + return false; +} - return true; +bool PlaygoFile::LoadChunks(const Common::FS::IOFile& file) { + if (file.IsOpen()) { + if (playgoHeader.magic == PLAYGO_MAGIC) { + bool ret = true; + + std::string chunk_attrs_data, chunk_mchunks_data, chunk_labels_data, mchunk_attrs_data; + ret = ret && load_chunk_data(file, playgoHeader.chunk_attrs, chunk_attrs_data); + ret = ret && load_chunk_data(file, playgoHeader.chunk_mchunks, chunk_mchunks_data); + ret = ret && load_chunk_data(file, playgoHeader.chunk_labels, chunk_labels_data); + ret = ret && load_chunk_data(file, playgoHeader.mchunk_attrs, mchunk_attrs_data); + + if (ret) { + chunks.resize(playgoHeader.chunk_count); + + auto chunk_attrs = + reinterpret_cast(&chunk_attrs_data[0]); + auto chunk_mchunks = reinterpret_cast(&chunk_mchunks_data[0]); + auto chunk_labels = reinterpret_cast(&chunk_labels_data[0]); + auto mchunk_attrs = + reinterpret_cast(&mchunk_attrs_data[0]); + + for (u16 i = 0; i < playgoHeader.chunk_count; i++) { + chunks[i].req_locus = chunk_attrs[i].req_locus; + chunks[i].language_mask = chunk_attrs[i].language_mask; + chunks[i].label_name = std::string(chunk_labels + chunk_attrs[i].label_offset); + + u64 total_size = 0; + u16 mchunk_count = chunk_attrs[i].mchunk_count; + if (mchunk_count != 0) { + auto mchunks = reinterpret_cast( + ((u8*)chunk_mchunks + chunk_attrs[i].mchunks_offset)); + for (u16 j = 0; j < mchunk_count; j++) { + u16 mchunk_id = mchunks[j]; + total_size += mchunk_attrs[mchunk_id].size.size; + } + } + chunks[i].total_size = total_size; + } + } + + return ret; + } + } + return false; +} + +bool PlaygoFile::load_chunk_data(const Common::FS::IOFile& file, const chunk_t& chunk, + std::string& data) { + if (file.IsOpen()) { + if (file.Seek(chunk.offset)) { + data.resize(chunk.length); + if (data.size() == chunk.length) { + file.ReadRaw(&data[0], chunk.length); + return true; + } + } + } + return false; } \ No newline at end of file diff --git a/src/core/file_format/playgo_chunk.h b/src/core/file_format/playgo_chunk.h index d17d24bf9..439e27d05 100644 --- a/src/core/file_format/playgo_chunk.h +++ b/src/core/file_format/playgo_chunk.h @@ -3,29 +3,129 @@ #pragma once #include -#include "common/types.h" +#include +#include +#include "common/io_file.h" +#include "core/libraries/playgo/playgo_types.h" + +constexpr u32 PLAYGO_MAGIC = 0x6F676C70; + +struct chunk_t { + u32 offset; + u32 length; +} __attribute__((packed)); struct PlaygoHeader { u32 magic; u16 version_major; u16 version_minor; - u16 image_count; + u16 image_count; // [0;1] + u16 chunk_count; // [0;1000] + u16 mchunk_count; // [0;8000] + u16 scenario_count; // [0;32] + + u32 file_size; + u16 default_scenario_id; + u16 attrib; + u32 sdk_version; + u16 disc_count; // [0;2] (if equals to 0 then disc count = 1) + u16 layer_bmp; + + u8 reserved[32]; + char content_id[128]; + + chunk_t chunk_attrs; // [0;32000] + chunk_t chunk_mchunks; + chunk_t chunk_labels; // [0;16000] + chunk_t mchunk_attrs; // [0;12800] + chunk_t scenario_attrs; // [0;1024] + chunk_t scenario_chunks; + chunk_t scenario_labels; + chunk_t inner_mchunk_attrs; // [0;12800] +} __attribute__((packed)); + +struct playgo_scenario_attr_entry_t { + u8 _type; + u8 _unk[19]; + u16 initial_chunk_count; u16 chunk_count; + u32 chunks_offset; //<-scenario_chunks + u32 label_offset; //<-scenario_labels +} __attribute__((packed)); + +struct image_disc_layer_no_t { + u8 layer_no : 2; + u8 disc_no : 2; + u8 image_no : 4; +} __attribute__((packed)); + +struct playgo_chunk_attr_entry_t { + u8 flag; + image_disc_layer_no_t image_disc_layer_no; + u8 req_locus; + u8 unk[11]; u16 mchunk_count; - u16 scenario_count; - // TODO fill the rest + u64 language_mask; + u32 mchunks_offset; //<-chunk_mchunks + u32 label_offset; //<-chunk_labels +} __attribute__((packed)); + +struct playgo_chunk_loc_t { + u64 offset : 48; + u64 _align1 : 8; + u64 image_no : 4; + u64 _align2 : 4; +} __attribute__((packed)); + +struct playgo_chunk_size_t { + u64 size : 48; + u64 _align : 16; +} __attribute__((packed)); + +struct playgo_mchunk_attr_entry_t { + playgo_chunk_loc_t loc; + playgo_chunk_size_t size; +} __attribute__((packed)); + +struct PlaygoChunk { + u64 req_locus; + u64 language_mask; + u64 total_size; + std::string label_name; }; -class PlaygoChunk { + +class PlaygoFile { public: - PlaygoChunk() = default; - ~PlaygoChunk() = default; + bool initialized; + OrbisPlayGoHandle handle; + OrbisPlayGoChunkId id; + OrbisPlayGoLocus locus; + OrbisPlayGoInstallSpeed speed; + s64 speed_tick; + OrbisPlayGoEta eta; + OrbisPlayGoLanguageMask langMask; + std::vector chunks; + +public: + PlaygoFile() + : initialized(false), handle(0), id(0), locus(0), speed(ORBIS_PLAYGO_INSTALL_SPEED_TRICKLE), + speed_tick(0), eta(0), langMask(0), playgoHeader{0} {} + ~PlaygoFile() = default; bool Open(const std::filesystem::path& filepath); - PlaygoHeader GetPlaygoHeader() { + bool LoadChunks(const Common::FS::IOFile& file); + PlaygoHeader& GetPlaygoHeader() { return playgoHeader; } + std::mutex& GetSpeedMutex() { + return speed_mutex; + } + +private: + bool load_chunk_data(const Common::FS::IOFile& file, const chunk_t& chunk, std::string& data); private: PlaygoHeader playgoHeader; -}; \ No newline at end of file + std::mutex speed_mutex; +}; diff --git a/src/core/file_format/trp.cpp b/src/core/file_format/trp.cpp index b4d4c95e2..f122709e4 100644 --- a/src/core/file_format/trp.cpp +++ b/src/core/file_format/trp.cpp @@ -6,7 +6,7 @@ TRP::TRP() = default; TRP::~TRP() = default; -void TRP::GetNPcommID(std::filesystem::path trophyPath, int index) { +void TRP::GetNPcommID(const std::filesystem::path& trophyPath, int index) { std::filesystem::path trpPath = trophyPath / "sce_sys/npbind.dat"; Common::FS::IOFile npbindFile(trpPath, Common::FS::FileAccessMode::Read); if (!npbindFile.IsOpen()) { @@ -27,7 +27,7 @@ static void removePadding(std::vector& vec) { } } -bool TRP::Extract(std::filesystem::path trophyPath) { +bool TRP::Extract(const std::filesystem::path& trophyPath) { std::string title = trophyPath.filename().string(); std::filesystem::path gameSysDir = trophyPath / "sce_sys/trophy/"; if (!std::filesystem::exists(gameSysDir)) { diff --git a/src/core/file_format/trp.h b/src/core/file_format/trp.h index 6d1f13bd9..56f490026 100644 --- a/src/core/file_format/trp.h +++ b/src/core/file_format/trp.h @@ -33,8 +33,8 @@ class TRP { public: TRP(); ~TRP(); - bool Extract(std::filesystem::path trophyPath); - void GetNPcommID(std::filesystem::path trophyPath, int index); + bool Extract(const std::filesystem::path& trophyPath); + void GetNPcommID(const std::filesystem::path& trophyPath, int index); private: Crypto crypto; diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 2bcff191c..40d8212bb 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -25,9 +25,9 @@ void MntPoints::UnmountAll() { m_mnt_pairs.clear(); } -std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory) { +std::filesystem::path MntPoints::GetHostPath(std::string_view guest_directory) { // Evil games like Turok2 pass double slashes e.g /app0//game.kpf - auto corrected_path = guest_directory; + std::string corrected_path(guest_directory); size_t pos = corrected_path.find("//"); while (pos != std::string::npos) { corrected_path.replace(pos, 2, "/"); @@ -54,6 +54,7 @@ std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory) // If the path does not exist attempt to verify this. // Retrieve parent path until we find one that exists. + std::scoped_lock lk{m_mutex}; path_parts.clear(); auto current_path = host_path; while (!std::filesystem::exists(current_path)) { diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index d636f8bff..b0fb63242 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -31,7 +31,7 @@ public: void Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder); void UnmountAll(); - std::filesystem::path GetHostPath(const std::string& guest_directory); + std::filesystem::path GetHostPath(std::string_view guest_directory); const MntPair* GetMount(const std::string& guest_path) { const auto it = std::ranges::find_if( diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index eac3845f3..cb676afc1 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -175,7 +175,6 @@ int PS4_SYSV_ABI sceAudioOutGetLastOutputTime() { } int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* state) { - int type = 0; int channels_num = 0; @@ -235,11 +234,11 @@ int PS4_SYSV_ABI sceAudioOutGetSystemState() { } int PS4_SYSV_ABI sceAudioOutInit() { + LOG_TRACE(Lib_AudioOut, "called"); if (audio != nullptr) { return ORBIS_AUDIO_OUT_ERROR_ALREADY_INIT; } audio = std::make_unique(); - LOG_INFO(Lib_AudioOut, "called"); return ORBIS_OK; } diff --git a/src/core/libraries/avplayer/avplayer.cpp b/src/core/libraries/avplayer/avplayer.cpp index dd9f42b26..bd1f6b503 100644 --- a/src/core/libraries/avplayer/avplayer.cpp +++ b/src/core/libraries/avplayer/avplayer.cpp @@ -1,21 +1,34 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -// Generated By moduleGenerator #include "avplayer.h" + +#include "avplayer_impl.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" +#include "core/libraries/kernel/thread_management.h" #include "core/libraries/libs.h" namespace Libraries::AvPlayer { -int PS4_SYSV_ABI sceAvPlayerAddSource() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; +using namespace Kernel; + +s32 PS4_SYSV_ABI sceAvPlayerAddSource(SceAvPlayerHandle handle, const char* filename) { + LOG_TRACE(Lib_AvPlayer, "filename = {}", filename); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->AddSource(filename); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; } -int PS4_SYSV_ABI sceAvPlayerAddSourceEx() { +s32 PS4_SYSV_ABI sceAvPlayerAddSourceEx(SceAvPlayerHandle handle, SceAvPlayerUriType uriType, + SceAvPlayerSourceDetails* sourceDetails) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } return ORBIS_OK; } @@ -24,122 +37,244 @@ int PS4_SYSV_ABI sceAvPlayerChangeStream() { return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerClose() { +s32 PS4_SYSV_ABI sceAvPlayerClose(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + delete handle; + return ORBIS_OK; +} + +u64 PS4_SYSV_ABI sceAvPlayerCurrentTime(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->CurrentTime(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +s32 PS4_SYSV_ABI sceAvPlayerDisableStream(SceAvPlayerHandle handle, u32 stream_id) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerEnableStream(SceAvPlayerHandle handle, u32 stream_id) { + LOG_TRACE(Lib_AvPlayer, "stream_id = {}", stream_id); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->EnableStream(stream_id); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +bool PS4_SYSV_ABI sceAvPlayerGetAudioData(SceAvPlayerHandle handle, SceAvPlayerFrameInfo* p_info) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr || p_info == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetAudioData(*p_info); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +s32 PS4_SYSV_ABI sceAvPlayerGetStreamInfo(SceAvPlayerHandle handle, u32 stream_id, + SceAvPlayerStreamInfo* p_info) { + LOG_TRACE(Lib_AvPlayer, "stream_id = {}", stream_id); + if (handle == nullptr || p_info == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetStreamInfo(stream_id, *p_info); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +bool PS4_SYSV_ABI sceAvPlayerGetVideoData(SceAvPlayerHandle handle, + SceAvPlayerFrameInfo* video_info) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr || video_info == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetVideoData(*video_info); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +bool PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(SceAvPlayerHandle handle, + SceAvPlayerFrameInfoEx* video_info) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr || video_info == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetVideoData(*video_info); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +SceAvPlayerHandle PS4_SYSV_ABI sceAvPlayerInit(SceAvPlayerInitData* data) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (data == nullptr) { + return nullptr; + } + + if (data->memory_replacement.allocate == nullptr || + data->memory_replacement.allocate_texture == nullptr || + data->memory_replacement.deallocate == nullptr || + data->memory_replacement.deallocate_texture == nullptr) { + LOG_ERROR(Lib_AvPlayer, "All allocators are required for AVPlayer Initialisation."); + return nullptr; + } + + return new AvPlayer(*data); +} + +s32 PS4_SYSV_ABI sceAvPlayerInitEx(const SceAvPlayerInitDataEx* p_data, + SceAvPlayerHandle* p_player) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (p_data == nullptr || p_player == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + + if (p_data->memory_replacement.allocate == nullptr || + p_data->memory_replacement.allocate_texture == nullptr || + p_data->memory_replacement.deallocate == nullptr || + p_data->memory_replacement.deallocate_texture == nullptr) { + LOG_ERROR(Lib_AvPlayer, "All allocators are required for AVPlayer Initialisation."); + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + + SceAvPlayerInitData data = {}; + data.memory_replacement = p_data->memory_replacement; + data.file_replacement = p_data->file_replacement; + data.event_replacement = p_data->event_replacement; + data.default_language = p_data->default_language; + data.num_output_video_framebuffers = p_data->num_output_video_framebuffers; + data.auto_start = p_data->auto_start; + + *p_player = new AvPlayer(data); + return ORBIS_OK; +} + +bool PS4_SYSV_ABI sceAvPlayerIsActive(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + LOG_TRACE(Lib_AvPlayer, "returning ORBIS_AVPLAYER_ERROR_INVALID_PARAMS"); + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->IsActive(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +s32 PS4_SYSV_ABI sceAvPlayerJumpToTime(SceAvPlayerHandle handle, uint64_t time) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called, time (msec) = {}", time); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerPause(SceAvPlayerHandle handle) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerPostInit(SceAvPlayerHandle handle, SceAvPlayerPostInitData* data) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr || data == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->PostInit(*data); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +s32 PS4_SYSV_ABI sceAvPlayerPrintf(const char* format, ...) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerCurrentTime() { +s32 PS4_SYSV_ABI sceAvPlayerResume(SceAvPlayerHandle handle) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerSetAvSyncMode(SceAvPlayerHandle handle, + SceAvPlayerAvSyncMode sync_mode) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerSetLogCallback(SceAvPlayerLogCallback log_cb, void* user_data) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerDisableStream() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceAvPlayerSetLooping(SceAvPlayerHandle handle, bool loop_flag) { + LOG_TRACE(Lib_AvPlayer, "called, looping = {}", loop_flag); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + if (!handle->SetLooping(loop_flag)) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerEnableStream() { +s32 PS4_SYSV_ABI sceAvPlayerSetTrickSpeed(SceAvPlayerHandle handle, s32 trick_speed) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerGetAudioData() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceAvPlayerStart(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->Start(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; } -int PS4_SYSV_ABI sceAvPlayerGetStreamInfo() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceAvPlayerStop(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->Stop(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; } -int PS4_SYSV_ABI sceAvPlayerGetVideoData() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceAvPlayerStreamCount(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetStreamCount(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; } -int PS4_SYSV_ABI sceAvPlayerGetVideoDataEx() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerInit() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerInitEx() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerIsActive() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerJumpToTime() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerPause() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerPostInit() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerPrintf() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerResume() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerSetAvSyncMode() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerSetLogCallback() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerSetLooping() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerSetTrickSpeed() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerStart() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerStop() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerStreamCount() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerVprintf() { +s32 PS4_SYSV_ABI sceAvPlayerVprintf(const char* format, va_list args) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); return ORBIS_OK; } diff --git a/src/core/libraries/avplayer/avplayer.h b/src/core/libraries/avplayer/avplayer.h index 39a619ee1..360f06b65 100644 --- a/src/core/libraries/avplayer/avplayer.h +++ b/src/core/libraries/avplayer/avplayer.h @@ -5,39 +5,288 @@ #include "common/types.h" +#include // va_list +#include // size_t + namespace Core::Loader { class SymbolsResolver; } namespace Libraries::AvPlayer { -int PS4_SYSV_ABI sceAvPlayerAddSource(); -int PS4_SYSV_ABI sceAvPlayerAddSourceEx(); -int PS4_SYSV_ABI sceAvPlayerChangeStream(); -int PS4_SYSV_ABI sceAvPlayerClose(); -int PS4_SYSV_ABI sceAvPlayerCurrentTime(); -int PS4_SYSV_ABI sceAvPlayerDisableStream(); -int PS4_SYSV_ABI sceAvPlayerEnableStream(); -int PS4_SYSV_ABI sceAvPlayerGetAudioData(); -int PS4_SYSV_ABI sceAvPlayerGetStreamInfo(); -int PS4_SYSV_ABI sceAvPlayerGetVideoData(); -int PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(); -int PS4_SYSV_ABI sceAvPlayerInit(); -int PS4_SYSV_ABI sceAvPlayerInitEx(); -int PS4_SYSV_ABI sceAvPlayerIsActive(); -int PS4_SYSV_ABI sceAvPlayerJumpToTime(); -int PS4_SYSV_ABI sceAvPlayerPause(); -int PS4_SYSV_ABI sceAvPlayerPostInit(); -int PS4_SYSV_ABI sceAvPlayerPrintf(); -int PS4_SYSV_ABI sceAvPlayerResume(); -int PS4_SYSV_ABI sceAvPlayerSetAvSyncMode(); -int PS4_SYSV_ABI sceAvPlayerSetLogCallback(); -int PS4_SYSV_ABI sceAvPlayerSetLooping(); -int PS4_SYSV_ABI sceAvPlayerSetTrickSpeed(); -int PS4_SYSV_ABI sceAvPlayerStart(); -int PS4_SYSV_ABI sceAvPlayerStop(); -int PS4_SYSV_ABI sceAvPlayerStreamCount(); -int PS4_SYSV_ABI sceAvPlayerVprintf(); +class AvPlayer; + +using SceAvPlayerHandle = AvPlayer*; + +enum SceAvPlayerUriType { SCE_AVPLAYER_URI_TYPE_SOURCE = 0 }; + +struct SceAvPlayerUri { + const char* name; + u32 length; +}; + +enum SceAvPlayerSourceType { + SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN = 0, + SCE_AVPLAYER_SOURCE_TYPE_FILE_MP4 = 1, + SCE_AVPLAYER_SOURCE_TYPE_HLS = 8 +}; + +struct SceAvPlayerSourceDetails { + SceAvPlayerUri uri; + u8 reserved1[64]; + SceAvPlayerSourceType source_type; + u8 reserved2[44]; +}; + +struct SceAvPlayerAudio { + u16 channel_count; + u8 reserved1[2]; + u32 sample_rate; + u32 size; + u8 language_code[4]; +}; + +struct SceAvPlayerVideo { + u32 width; + u32 height; + f32 aspect_ratio; + u8 language_code[4]; +}; + +struct SceAvPlayerTextPosition { + u16 top; + u16 left; + u16 bottom; + u16 right; +}; + +struct SceAvPlayerTimedText { + u8 language_code[4]; + u16 text_size; + u16 font_size; + SceAvPlayerTextPosition position; +}; + +union SceAvPlayerStreamDetails { + u8 reserved[16]; + SceAvPlayerAudio audio; + SceAvPlayerVideo video; + SceAvPlayerTimedText subs; +}; + +struct SceAvPlayerFrameInfo { + u8* pData; + u8 reserved[4]; + u64 timestamp; + SceAvPlayerStreamDetails details; +}; + +struct SceAvPlayerStreamInfo { + u32 type; + u8 reserved[4]; + SceAvPlayerStreamDetails details; + u64 duration; + u64 start_time; +}; + +struct SceAvPlayerAudioEx { + u16 channel_count; + u8 reserved[2]; + u32 sample_rate; + u32 size; + u8 language_code[4]; + u8 reserved1[64]; +}; + +struct SceAvPlayerVideoEx { + u32 width; + u32 height; + f32 aspect_ratio; + u8 language_code[4]; + u32 framerate; + u32 crop_left_offset; + u32 crop_right_offset; + u32 crop_top_offset; + u32 crop_bottom_offset; + u32 pitch; + u8 luma_bit_depth; + u8 chroma_bit_depth; + bool video_full_range_flag; + u8 reserved1[37]; +}; + +struct SceAvPlayerTimedTextEx { + u8 language_code[4]; + u8 reserved[12]; + u8 reserved1[64]; +}; + +union SceAvPlayerStreamDetailsEx { + SceAvPlayerAudioEx audio; + SceAvPlayerVideoEx video; + SceAvPlayerTimedTextEx subs; + u8 reserved1[80]; +}; + +struct SceAvPlayerFrameInfoEx { + void* pData; + u8 reserved[4]; + u64 timestamp; + SceAvPlayerStreamDetailsEx details; +}; + +typedef void* PS4_SYSV_ABI (*SceAvPlayerAllocate)(void* p, u32 align, u32 size); +typedef void PS4_SYSV_ABI (*SceAvPlayerDeallocate)(void* p, void* mem); +typedef void* PS4_SYSV_ABI (*SceAvPlayerAllocateTexture)(void* p, u32 align, u32 size); +typedef void PS4_SYSV_ABI (*SceAvPlayerDeallocateTexture)(void* p, void* mem); + +struct SceAvPlayerMemAllocator { + void* object_ptr; + SceAvPlayerAllocate allocate; + SceAvPlayerDeallocate deallocate; + SceAvPlayerAllocateTexture allocate_texture; + SceAvPlayerDeallocateTexture deallocate_texture; +}; + +typedef s32 PS4_SYSV_ABI (*SceAvPlayerOpenFile)(void* p, const char* name); +typedef s32 PS4_SYSV_ABI (*SceAvPlayerCloseFile)(void* p); +typedef s32 PS4_SYSV_ABI (*SceAvPlayerReadOffsetFile)(void* p, u8* buf, u64 pos, u32 len); +typedef u64 PS4_SYSV_ABI (*SceAvPlayerSizeFile)(void* p); + +struct SceAvPlayerFileReplacement { + void* object_ptr; + SceAvPlayerOpenFile open; + SceAvPlayerCloseFile close; + SceAvPlayerReadOffsetFile readOffset; + SceAvPlayerSizeFile size; +}; + +typedef void PS4_SYSV_ABI (*SceAvPlayerEventCallback)(void* p, s32 event, s32 src_id, void* data); + +struct SceAvPlayerEventReplacement { + void* object_ptr; + SceAvPlayerEventCallback event_callback; +}; + +enum SceAvPlayerDebuglevels { + SCE_AVPLAYER_DBG_NONE, + SCE_AVPLAYER_DBG_INFO, + SCE_AVPLAYER_DBG_WARNINGS, + SCE_AVPLAYER_DBG_ALL +}; + +struct SceAvPlayerInitData { + SceAvPlayerMemAllocator memory_replacement; + SceAvPlayerFileReplacement file_replacement; + SceAvPlayerEventReplacement event_replacement; + SceAvPlayerDebuglevels debug_level; + u32 base_priority; + s32 num_output_video_framebuffers; + bool auto_start; + u8 reserved[3]; + const char* default_language; +}; + +struct SceAvPlayerInitDataEx { + size_t this_size; + SceAvPlayerMemAllocator memory_replacement; + SceAvPlayerFileReplacement file_replacement; + SceAvPlayerEventReplacement event_replacement; + const char* default_language; + SceAvPlayerDebuglevels debug_level; + u32 audio_decoder_priority; + u32 audio_decoder_affinity; + u32 video_decoder_priority; + u32 video_decoder_affinity; + u32 demuxer_priority; + u32 demuxer_affinity; + u32 controller_priority; + u32 controller_affinity; + u32 http_streaming_priority; + u32 http_streaming_affinity; + u32 file_streaming_priority; + u32 file_streaming_affinity; + s32 num_output_video_framebuffers; + bool auto_start; + u8 reserved[3]; +}; + +enum SceAvPlayerStreamType { + SCE_AVPLAYER_VIDEO, + SCE_AVPLAYER_AUDIO, + SCE_AVPLAYER_TIMEDTEXT, + SCE_AVPLAYER_UNKNOWN +}; + +enum SceAvPlayerVideoDecoderType { + SCE_AVPLAYER_VIDEO_DECODER_TYPE_DEFAULT = 0, + SCE_AVPLAYER_VIDEO_DECODER_TYPE_RESERVED1, + SCE_AVPLAYER_VIDEO_DECODER_TYPE_SOFTWARE, + SCE_AVPLAYER_VIDEO_DECODER_TYPE_SOFTWARE2 +}; + +enum SceAvPlayerAudioDecoderType { + SCE_AVPLAYER_AUDIO_DECODER_TYPE_DEFAULT = 0, + SCE_AVPLAYER_AUDIO_DECODER_TYPE_RESERVED1, + SCE_AVPLAYER_AUDIO_DECODER_TYPE_RESERVED2 +}; + +struct SceAvPlayerDecoderInit { + union { + SceAvPlayerVideoDecoderType video_type; + SceAvPlayerAudioDecoderType audio_type; + u8 reserved[4]; + } decoderType; + union { + struct { + s32 cpu_affinity_mask; + s32 cpu_thread_priority; + u8 decode_pipeline_depth; + u8 compute_pipe_id; + u8 compute_queue_id; + u8 enable_interlaced; + u8 reserved[16]; + } avcSw2; + struct { + u8 audio_channel_order; + u8 reserved[27]; + } aac; + u8 reserved[28]; + } decoderParams; +}; + +struct SceAvPlayerHTTPCtx { + u32 http_context_id; + u32 ssl_context_id; +}; + +struct SceAvPlayerPostInitData { + u32 demux_video_buffer_size; + SceAvPlayerDecoderInit video_decoder_init; + SceAvPlayerDecoderInit audio_decoder_init; + SceAvPlayerHTTPCtx http_context; + u8 reserved[56]; +}; + +enum SceAvPlayerAvSyncMode { + SCE_AVPLAYER_AV_SYNC_MODE_DEFAULT = 0, + SCE_AVPLAYER_AV_SYNC_MODE_NONE +}; + +typedef int PS4_SYSV_ABI (*SceAvPlayerLogCallback)(void* p, const char* format, va_list args); + +enum SceAvPlayerEvents { + SCE_AVPLAYER_STATE_STOP = 0x01, + SCE_AVPLAYER_STATE_READY = 0x02, + SCE_AVPLAYER_STATE_PLAY = 0x03, + SCE_AVPLAYER_STATE_PAUSE = 0x04, + SCE_AVPLAYER_STATE_BUFFERING = 0x05, + SCE_AVPLAYER_TIMED_TEXT_DELIVERY = 0x10, + SCE_AVPLAYER_WARNING_ID = 0x20, + SCE_AVPLAYER_ENCRYPTION = 0x30, + SCE_AVPLAYER_DRM_ERROR = 0x40 +}; void RegisterlibSceAvPlayer(Core::Loader::SymbolsResolver* sym); -} // namespace Libraries::AvPlayer \ No newline at end of file + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_common.cpp b/src/core/libraries/avplayer/avplayer_common.cpp new file mode 100644 index 000000000..306603e29 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_common.cpp @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer.h" +#include "avplayer_common.h" + +#include // std::equal +#include // std::tolower +#include // std::string_view + +namespace Libraries::AvPlayer { + +using namespace Kernel; + +static bool ichar_equals(char a, char b) { + return std::tolower(static_cast(a)) == + std::tolower(static_cast(b)); +} + +static bool iequals(std::string_view l, std::string_view r) { + return std::ranges::equal(l, r, ichar_equals); +} + +SceAvPlayerSourceType GetSourceType(std::string_view path) { + if (path.empty()) { + return SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN; + } + + std::string_view name = path; + if (path.find("://") != std::string_view::npos) { + // This path is a URI. Strip HTTP parameters from it. + // schema://server.domain/path/file.ext/and/beyond?param=value#paragraph -> + // -> schema://server.domain/path/to/file.ext/and/beyond + name = path.substr(0, path.find_first_of("?#")); + if (name.empty()) { + return SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN; + } + } + + // schema://server.domain/path/to/file.ext/and/beyond -> .ext/and/beyond + auto ext = name.substr(name.rfind('.')); + if (ext.empty()) { + return SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN; + } + + // .ext/and/beyond -> .ext + ext = ext.substr(0, ext.find('/')); + + if (iequals(ext, ".mp4") || iequals(ext, ".m4v") || iequals(ext, ".m3d") || + iequals(ext, ".m4a") || iequals(ext, ".mov")) { + return SCE_AVPLAYER_SOURCE_TYPE_FILE_MP4; + } + + if (iequals(ext, ".m3u8")) { + return SCE_AVPLAYER_SOURCE_TYPE_HLS; + } + + return SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN; +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_common.h b/src/core/libraries/avplayer/avplayer_common.h new file mode 100644 index 000000000..a53696ecf --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_common.h @@ -0,0 +1,91 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/libraries/kernel/thread_management.h" + +#include +#include +#include + +#define AVPLAYER_IS_ERROR(x) ((x) < 0) + +namespace Libraries::AvPlayer { + +enum class AvState { + Initial, + AddingSource, + Ready, + Play, + Stop, + EndOfFile, + Pause, + C0x08, + Jump, + TrickMode, + C0x0B, + Buffering, + Starting, + Error, +}; + +enum class AvEventType { + ChangeFlowState = 21, + WarningId = 22, + RevertState = 30, + AddSource = 40, + Error = 255, +}; + +union AvPlayerEventData { + u32 num_frames; // 20 + AvState state; // AvEventType::ChangeFlowState + s32 error; // AvEventType::WarningId + u32 attempt; // AvEventType::AddSource +}; + +struct AvPlayerEvent { + AvEventType event; + AvPlayerEventData payload; +}; + +template +class AvPlayerQueue { +public: + size_t Size() { + return m_queue.size(); + } + + void Push(T&& value) { + std::lock_guard guard(m_mutex); + m_queue.emplace(std::forward(value)); + } + + std::optional Pop() { + if (Size() == 0) { + return std::nullopt; + } + std::lock_guard guard(m_mutex); + auto result = std::move(m_queue.front()); + m_queue.pop(); + return result; + } + + void Clear() { + std::lock_guard guard(m_mutex); + m_queue = {}; + } + +private: + std::mutex m_mutex{}; + std::queue m_queue{}; +}; + +SceAvPlayerSourceType GetSourceType(std::string_view path); + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_data_streamer.h b/src/core/libraries/avplayer/avplayer_data_streamer.h new file mode 100644 index 000000000..04097bb4d --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_data_streamer.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" + +#include "common/types.h" + +struct AVIOContext; + +namespace Libraries::AvPlayer { + +class IDataStreamer { +public: + virtual ~IDataStreamer() = default; + virtual AVIOContext* GetContext() = 0; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_file_streamer.cpp b/src/core/libraries/avplayer/avplayer_file_streamer.cpp new file mode 100644 index 000000000..dc1386a47 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_file_streamer.cpp @@ -0,0 +1,86 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer_file_streamer.h" + +#include "avplayer_common.h" + +#include + +extern "C" { +#include +#include +} + +#include // std::max, std::min + +#define AVPLAYER_AVIO_BUFFER_SIZE 4096 + +namespace Libraries::AvPlayer { + +AvPlayerFileStreamer::AvPlayerFileStreamer(const SceAvPlayerFileReplacement& file_replacement, + std::string_view path) + : m_file_replacement(file_replacement) { + const auto ptr = m_file_replacement.object_ptr; + m_fd = m_file_replacement.open(ptr, path.data()); + ASSERT(m_fd >= 0); + m_file_size = m_file_replacement.size(ptr); + // avio_buffer is deallocated in `avio_context_free` + const auto avio_buffer = reinterpret_cast(av_malloc(AVPLAYER_AVIO_BUFFER_SIZE)); + m_avio_context = + avio_alloc_context(avio_buffer, AVPLAYER_AVIO_BUFFER_SIZE, 0, this, + &AvPlayerFileStreamer::ReadPacket, nullptr, &AvPlayerFileStreamer::Seek); +} + +AvPlayerFileStreamer::~AvPlayerFileStreamer() { + if (m_avio_context != nullptr) { + avio_context_free(&m_avio_context); + } + if (m_file_replacement.close != nullptr && m_fd >= 0) { + const auto close = m_file_replacement.close; + const auto ptr = m_file_replacement.object_ptr; + close(ptr); + } +} + +s32 AvPlayerFileStreamer::ReadPacket(void* opaque, u8* buffer, s32 size) { + const auto self = reinterpret_cast(opaque); + if (self->m_position >= self->m_file_size) { + return AVERROR_EOF; + } + if (self->m_position + size > self->m_file_size) { + size = self->m_file_size - self->m_position; + } + const auto read_offset = self->m_file_replacement.readOffset; + const auto ptr = self->m_file_replacement.object_ptr; + const auto bytes_read = read_offset(ptr, buffer, self->m_position, size); + if (bytes_read == 0 && size != 0) { + return AVERROR_EOF; + } + self->m_position += bytes_read; + return bytes_read; +} + +s64 AvPlayerFileStreamer::Seek(void* opaque, s64 offset, int whence) { + const auto self = reinterpret_cast(opaque); + if (whence & AVSEEK_SIZE) { + return self->m_file_size; + } + + if (whence == SEEK_CUR) { + self->m_position = + std::min(u64(std::max(s64(0), s64(self->m_position) + offset)), self->m_file_size); + return self->m_position; + } else if (whence == SEEK_SET) { + self->m_position = std::min(u64(std::max(s64(0), offset)), self->m_file_size); + return self->m_position; + } else if (whence == SEEK_END) { + self->m_position = + std::min(u64(std::max(s64(0), s64(self->m_file_size) + offset)), self->m_file_size); + return self->m_position; + } + + return -1; +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_file_streamer.h b/src/core/libraries/avplayer/avplayer_file_streamer.h new file mode 100644 index 000000000..658ce8c1e --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_file_streamer.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" +#include "avplayer_data_streamer.h" + +#include +#include + +struct AVIOContext; + +namespace Libraries::AvPlayer { + +class AvPlayerFileStreamer : public IDataStreamer { +public: + AvPlayerFileStreamer(const SceAvPlayerFileReplacement& file_replacement, std::string_view path); + ~AvPlayerFileStreamer(); + + AVIOContext* GetContext() override { + return m_avio_context; + } + +private: + static s32 ReadPacket(void* opaque, u8* buffer, s32 size); + static s64 Seek(void* opaque, s64 buffer, int whence); + + SceAvPlayerFileReplacement m_file_replacement; + + int m_fd = -1; + u64 m_position{}; + u64 m_file_size{}; + AVIOContext* m_avio_context{}; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_impl.cpp b/src/core/libraries/avplayer/avplayer_impl.cpp new file mode 100644 index 000000000..cdfff8277 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_impl.cpp @@ -0,0 +1,200 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer_common.h" +#include "avplayer_file_streamer.h" +#include "avplayer_impl.h" + +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/kernel/libkernel.h" + +using namespace Libraries::Kernel; + +namespace Libraries::AvPlayer { + +void* PS4_SYSV_ABI AvPlayer::Allocate(void* handle, u32 alignment, u32 size) { + const auto* const self = reinterpret_cast(handle); + const auto allocate = self->m_init_data_original.memory_replacement.allocate; + const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; + return allocate(ptr, alignment, size); +} + +void PS4_SYSV_ABI AvPlayer::Deallocate(void* handle, void* memory) { + const auto* const self = reinterpret_cast(handle); + const auto deallocate = self->m_init_data_original.memory_replacement.deallocate; + const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; + return deallocate(ptr, memory); +} + +void* PS4_SYSV_ABI AvPlayer::AllocateTexture(void* handle, u32 alignment, u32 size) { + const auto* const self = reinterpret_cast(handle); + const auto allocate = self->m_init_data_original.memory_replacement.allocate_texture; + const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; + return allocate(ptr, alignment, size); +} + +void PS4_SYSV_ABI AvPlayer::DeallocateTexture(void* handle, void* memory) { + const auto* const self = reinterpret_cast(handle); + const auto deallocate = self->m_init_data_original.memory_replacement.deallocate_texture; + const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; + return deallocate(ptr, memory); +} + +int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) { + auto const self = reinterpret_cast(handle); + std::lock_guard guard(self->m_file_io_mutex); + + const auto open = self->m_init_data_original.file_replacement.open; + const auto ptr = self->m_init_data_original.file_replacement.object_ptr; + return open(ptr, filename); +} + +int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) { + auto const self = reinterpret_cast(handle); + std::lock_guard guard(self->m_file_io_mutex); + + const auto close = self->m_init_data_original.file_replacement.close; + const auto ptr = self->m_init_data_original.file_replacement.object_ptr; + return close(ptr); +} + +int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position, u32 length) { + auto const self = reinterpret_cast(handle); + std::lock_guard guard(self->m_file_io_mutex); + + const auto read_offset = self->m_init_data_original.file_replacement.readOffset; + const auto ptr = self->m_init_data_original.file_replacement.object_ptr; + return read_offset(ptr, buffer, position, length); +} + +u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) { + auto const self = reinterpret_cast(handle); + std::lock_guard guard(self->m_file_io_mutex); + + const auto size = self->m_init_data_original.file_replacement.size; + const auto ptr = self->m_init_data_original.file_replacement.object_ptr; + return size(ptr); +} + +SceAvPlayerInitData AvPlayer::StubInitData(const SceAvPlayerInitData& data) { + SceAvPlayerInitData result = data; + result.memory_replacement.object_ptr = this; + result.memory_replacement.allocate = &AvPlayer::Allocate; + result.memory_replacement.deallocate = &AvPlayer::Deallocate; + result.memory_replacement.allocate_texture = &AvPlayer::AllocateTexture; + result.memory_replacement.deallocate_texture = &AvPlayer::DeallocateTexture; + if (data.file_replacement.open == nullptr || data.file_replacement.close == nullptr || + data.file_replacement.readOffset == nullptr || data.file_replacement.size == nullptr) { + result.file_replacement = {}; + } else { + result.file_replacement.object_ptr = this; + result.file_replacement.open = &AvPlayer::OpenFile; + result.file_replacement.close = &AvPlayer::CloseFile; + result.file_replacement.readOffset = &AvPlayer::ReadOffsetFile; + result.file_replacement.size = &AvPlayer::SizeFile; + } + return result; +} + +AvPlayer::AvPlayer(const SceAvPlayerInitData& data) + : m_init_data(StubInitData(data)), m_init_data_original(data), + m_state(std::make_unique(m_init_data)) {} + +s32 AvPlayer::PostInit(const SceAvPlayerPostInitData& data) { + m_post_init_data = data; + return ORBIS_OK; +} + +s32 AvPlayer::AddSource(std::string_view path) { + if (path.empty()) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + if (AVPLAYER_IS_ERROR(m_state->AddSource(path, GetSourceType(path)))) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return ORBIS_OK; +} + +s32 AvPlayer::GetStreamCount() { + if (m_state == nullptr) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + const auto res = m_state->GetStreamCount(); + if (AVPLAYER_IS_ERROR(res)) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return res; +} + +s32 AvPlayer::GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info) { + if (AVPLAYER_IS_ERROR(m_state->GetStreamInfo(stream_index, info))) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return ORBIS_OK; +} + +s32 AvPlayer::EnableStream(u32 stream_index) { + if (m_state == nullptr) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + if (!m_state->EnableStream(stream_index)) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return ORBIS_OK; +} + +s32 AvPlayer::Start() { + return m_state->Start(); +} + +bool AvPlayer::GetVideoData(SceAvPlayerFrameInfo& video_info) { + if (m_state == nullptr) { + return false; + } + return m_state->GetVideoData(video_info); +} + +bool AvPlayer::GetVideoData(SceAvPlayerFrameInfoEx& video_info) { + if (m_state == nullptr) { + return false; + } + return m_state->GetVideoData(video_info); +} + +bool AvPlayer::GetAudioData(SceAvPlayerFrameInfo& audio_info) { + if (m_state == nullptr) { + return false; + } + return m_state->GetAudioData(audio_info); +} + +bool AvPlayer::IsActive() { + if (m_state == nullptr) { + return false; + } + return m_state->IsActive(); +} + +u64 AvPlayer::CurrentTime() { + if (m_state == nullptr) { + return 0; + } + return m_state->CurrentTime(); +} + +s32 AvPlayer::Stop() { + if (m_state == nullptr || !m_state->Stop()) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return ORBIS_OK; +} + +bool AvPlayer::SetLooping(bool is_looping) { + if (m_state == nullptr) { + return false; + } + return m_state->SetLooping(is_looping); +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_impl.h b/src/core/libraries/avplayer/avplayer_impl.h new file mode 100644 index 000000000..09989d399 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_impl.h @@ -0,0 +1,68 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" +#include "avplayer_data_streamer.h" +#include "avplayer_state.h" + +#include "core/libraries/kernel/thread_management.h" + +#include + +extern "C" { +#include +#include +} + +#include +#include + +namespace Libraries::AvPlayer { + +class AvPlayer { +public: + AvPlayer(const SceAvPlayerInitData& data); + + s32 PostInit(const SceAvPlayerPostInitData& data); + s32 AddSource(std::string_view filename); + s32 GetStreamCount(); + s32 GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info); + s32 EnableStream(u32 stream_index); + s32 Start(); + bool GetAudioData(SceAvPlayerFrameInfo& audio_info); + bool GetVideoData(SceAvPlayerFrameInfo& video_info); + bool GetVideoData(SceAvPlayerFrameInfoEx& video_info); + bool IsActive(); + u64 CurrentTime(); + s32 Stop(); + bool SetLooping(bool is_looping); + +private: + using ScePthreadMutex = Kernel::ScePthreadMutex; + + // Memory Replacement + static void* PS4_SYSV_ABI Allocate(void* handle, u32 alignment, u32 size); + static void PS4_SYSV_ABI Deallocate(void* handle, void* memory); + static void* PS4_SYSV_ABI AllocateTexture(void* handle, u32 alignment, u32 size); + static void PS4_SYSV_ABI DeallocateTexture(void* handle, void* memory); + + // File Replacement + static int PS4_SYSV_ABI OpenFile(void* handle, const char* filename); + static int PS4_SYSV_ABI CloseFile(void* handle); + static int PS4_SYSV_ABI ReadOffsetFile(void* handle, u8* buffer, u64 position, u32 length); + static u64 PS4_SYSV_ABI SizeFile(void* handle); + + SceAvPlayerInitData StubInitData(const SceAvPlayerInitData& data); + + SceAvPlayerInitData m_init_data{}; + SceAvPlayerInitData m_init_data_original{}; + SceAvPlayerPostInitData m_post_init_data{}; + std::mutex m_file_io_mutex{}; + + std::atomic_bool m_has_source{}; + std::unique_ptr m_state{}; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_source.cpp b/src/core/libraries/avplayer/avplayer_source.cpp new file mode 100644 index 000000000..776d389f0 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_source.cpp @@ -0,0 +1,730 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer_source.h" + +#include "avplayer_file_streamer.h" + +#include "common/singleton.h" +#include "core/file_sys/fs.h" +#include "core/libraries/kernel/time_management.h" + +#include + +extern "C" { +#include +#include +#include +#include +#include +} + +namespace Libraries::AvPlayer { + +using namespace Kernel; + +AvPlayerSource::AvPlayerSource(AvPlayerStateCallback& state, std::string_view path, + const SceAvPlayerInitData& init_data, + SceAvPlayerSourceType source_type) + : m_state(state), m_memory_replacement(init_data.memory_replacement), + m_num_output_video_framebuffers( + std::min(std::max(2, init_data.num_output_video_framebuffers), 16)) { + AVFormatContext* context = avformat_alloc_context(); + if (init_data.file_replacement.open != nullptr) { + m_up_data_streamer = + std::make_unique(init_data.file_replacement, path); + context->pb = m_up_data_streamer->GetContext(); + ASSERT(!AVPLAYER_IS_ERROR(avformat_open_input(&context, nullptr, nullptr, nullptr))); + } else { + const auto mnt = Common::Singleton::Instance(); + const auto filepath = mnt->GetHostPath(path); + ASSERT(!AVPLAYER_IS_ERROR( + avformat_open_input(&context, filepath.string().c_str(), nullptr, nullptr))); + } + m_avformat_context = AVFormatContextPtr(context, &ReleaseAVFormatContext); +} + +AvPlayerSource::~AvPlayerSource() { + Stop(); +} + +bool AvPlayerSource::FindStreamInfo() { + if (m_avformat_context == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not find stream info. NULL context."); + return false; + } + if (m_avformat_context->nb_streams > 0) { + return true; + } + return avformat_find_stream_info(m_avformat_context.get(), nullptr) == 0; +} + +s32 AvPlayerSource::GetStreamCount() { + if (m_avformat_context == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream count. NULL context."); + return -1; + } + LOG_INFO(Lib_AvPlayer, "Stream Count: {}", m_avformat_context->nb_streams); + return m_avformat_context->nb_streams; +} + +static s32 CodecTypeToStreamType(AVMediaType codec_type) { + switch (codec_type) { + case AVMediaType::AVMEDIA_TYPE_VIDEO: + return SCE_AVPLAYER_VIDEO; + case AVMediaType::AVMEDIA_TYPE_AUDIO: + return SCE_AVPLAYER_AUDIO; + case AVMediaType::AVMEDIA_TYPE_SUBTITLE: + return SCE_AVPLAYER_TIMEDTEXT; + default: + LOG_ERROR(Lib_AvPlayer, "Unexpected AVMediaType {}", magic_enum::enum_name(codec_type)); + return -1; + } +} + +static f32 AVRationalToF32(const AVRational& rational) { + return f32(rational.num) / rational.den; +} + +s32 AvPlayerSource::GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info) { + info = {}; + if (m_avformat_context == nullptr || stream_index >= m_avformat_context->nb_streams) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream {} info.", stream_index); + return -1; + } + const auto p_stream = m_avformat_context->streams[stream_index]; + if (p_stream == nullptr || p_stream->codecpar == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream {} info. NULL stream.", stream_index); + return -1; + } + info.type = CodecTypeToStreamType(p_stream->codecpar->codec_type); + info.start_time = p_stream->start_time; + info.duration = p_stream->duration; + const auto p_lang_node = av_dict_get(p_stream->metadata, "language", nullptr, 0); + if (p_lang_node != nullptr) { + LOG_INFO(Lib_AvPlayer, "Stream {} language = {}", stream_index, p_lang_node->value); + } else { + LOG_WARNING(Lib_AvPlayer, "Stream {} language is unknown", stream_index); + } + switch (info.type) { + case SCE_AVPLAYER_VIDEO: + LOG_INFO(Lib_AvPlayer, "Stream {} is a video stream.", stream_index); + info.details.video.aspect_ratio = + f32(p_stream->codecpar->width) / p_stream->codecpar->height; + info.details.video.width = p_stream->codecpar->width; + info.details.video.height = p_stream->codecpar->height; + if (p_lang_node != nullptr) { + std::memcpy(info.details.video.language_code, p_lang_node->value, + std::min(strlen(p_lang_node->value), size_t(3))); + } + break; + case SCE_AVPLAYER_AUDIO: + LOG_INFO(Lib_AvPlayer, "Stream {} is an audio stream.", stream_index); + info.details.audio.channel_count = p_stream->codecpar->ch_layout.nb_channels; + info.details.audio.sample_rate = p_stream->codecpar->sample_rate; + info.details.audio.size = 0; // sceAvPlayerGetStreamInfo() is expected to set this to 0 + if (p_lang_node != nullptr) { + std::memcpy(info.details.audio.language_code, p_lang_node->value, + std::min(strlen(p_lang_node->value), size_t(3))); + } + break; + case SCE_AVPLAYER_TIMEDTEXT: + LOG_WARNING(Lib_AvPlayer, "Stream {} is a timedtext stream.", stream_index); + info.details.subs.font_size = 12; + info.details.subs.text_size = 12; + if (p_lang_node != nullptr) { + std::memcpy(info.details.subs.language_code, p_lang_node->value, + std::min(strlen(p_lang_node->value), size_t(3))); + } + break; + default: + LOG_ERROR(Lib_AvPlayer, "Stream {} type is unknown: {}.", stream_index, info.type); + return -1; + } + return 0; +} + +bool AvPlayerSource::EnableStream(u32 stream_index) { + if (m_avformat_context == nullptr || stream_index >= m_avformat_context->nb_streams) { + return false; + } + const auto stream = m_avformat_context->streams[stream_index]; + const auto decoder = avcodec_find_decoder(stream->codecpar->codec_id); + if (decoder == nullptr) { + return false; + } + switch (stream->codecpar->codec_type) { + case AVMediaType::AVMEDIA_TYPE_VIDEO: { + m_video_stream_index = stream_index; + m_video_codec_context = + AVCodecContextPtr(avcodec_alloc_context3(decoder), &ReleaseAVCodecContext); + if (avcodec_parameters_to_context(m_video_codec_context.get(), stream->codecpar) < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not copy stream {} avcodec parameters to context.", + stream_index); + return false; + } + if (avcodec_open2(m_video_codec_context.get(), decoder, nullptr) < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not open avcodec for video stream {}.", stream_index); + return false; + } + const auto width = m_video_codec_context->width; + const auto size = (width * m_video_codec_context->height * 3) / 2; + for (u64 index = 0; index < m_num_output_video_framebuffers; ++index) { + m_video_buffers.Push(FrameBuffer(m_memory_replacement, 0x100, size)); + } + LOG_INFO(Lib_AvPlayer, "Video stream {} enabled", stream_index); + break; + } + case AVMediaType::AVMEDIA_TYPE_AUDIO: { + m_audio_stream_index = stream_index; + m_audio_codec_context = + AVCodecContextPtr(avcodec_alloc_context3(decoder), &ReleaseAVCodecContext); + if (avcodec_parameters_to_context(m_audio_codec_context.get(), stream->codecpar) < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not copy stream {} avcodec parameters to context.", + stream_index); + return false; + } + if (avcodec_open2(m_audio_codec_context.get(), decoder, nullptr) < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not open avcodec for audio stream {}.", stream_index); + return false; + } + const auto num_channels = m_audio_codec_context->ch_layout.nb_channels; + const auto align = num_channels * sizeof(u16); + const auto size = num_channels * sizeof(u16) * 1024; + for (u64 index = 0; index < 4; ++index) { + m_audio_buffers.Push(FrameBuffer(m_memory_replacement, 0x100, size)); + } + LOG_INFO(Lib_AvPlayer, "Audio stream {} enabled", stream_index); + break; + } + default: + LOG_WARNING(Lib_AvPlayer, "Unknown stream type {} for stream {}", + magic_enum::enum_name(stream->codecpar->codec_type), stream_index); + break; + } + return true; +} + +void AvPlayerSource::SetLooping(bool is_looping) { + m_is_looping = is_looping; +} + +std::optional AvPlayerSource::HasFrames(u32 num_frames) { + return m_video_packets.Size() > num_frames || m_is_eof; +} + +s32 AvPlayerSource::Start() { + std::unique_lock lock(m_state_mutex); + + if (m_audio_codec_context == nullptr && m_video_codec_context == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not start playback. NULL context."); + return -1; + } + m_demuxer_thread = std::jthread([this](std::stop_token stop) { this->DemuxerThread(stop); }); + m_video_decoder_thread = + std::jthread([this](std::stop_token stop) { this->VideoDecoderThread(stop); }); + m_audio_decoder_thread = + std::jthread([this](std::stop_token stop) { this->AudioDecoderThread(stop); }); + m_start_time = std::chrono::high_resolution_clock::now(); + return 0; +} + +bool AvPlayerSource::Stop() { + std::unique_lock lock(m_state_mutex); + + if (!HasRunningThreads()) { + LOG_WARNING(Lib_AvPlayer, "Could not stop playback: already stopped."); + return false; + } + + m_video_decoder_thread.request_stop(); + m_audio_decoder_thread.request_stop(); + m_demuxer_thread.request_stop(); + if (m_demuxer_thread.joinable()) { + m_demuxer_thread.join(); + } + if (m_video_decoder_thread.joinable()) { + m_video_decoder_thread.join(); + } + if (m_audio_decoder_thread.joinable()) { + m_audio_decoder_thread.join(); + } + if (m_current_audio_frame.has_value()) { + m_audio_buffers.Push(std::move(m_current_audio_frame.value())); + m_current_audio_frame.reset(); + } + if (m_current_video_frame.has_value()) { + m_video_buffers.Push(std::move(m_current_video_frame.value())); + m_current_video_frame.reset(); + } + m_stop_cv.Notify(); + + m_audio_packets.Clear(); + m_video_packets.Clear(); + m_audio_frames.Clear(); + m_video_frames.Clear(); + return true; +} + +bool AvPlayerSource::GetVideoData(SceAvPlayerFrameInfo& video_info) { + if (!IsActive()) { + return false; + } + + SceAvPlayerFrameInfoEx info{}; + if (!GetVideoData(info)) { + return false; + } + video_info = {}; + video_info.timestamp = u64(info.timestamp); + video_info.pData = reinterpret_cast(info.pData); + video_info.details.video.aspect_ratio = info.details.video.aspect_ratio; + video_info.details.video.width = info.details.video.width; + video_info.details.video.height = info.details.video.height; + return true; +} + +static void CopyNV12Data(u8* dst, const AVFrame& src) { + std::memcpy(dst, src.data[0], src.width * src.height); + std::memcpy(dst + src.width * src.height, src.data[1], (src.width * src.height) / 2); +} + +bool AvPlayerSource::GetVideoData(SceAvPlayerFrameInfoEx& video_info) { + if (!IsActive()) { + return false; + } + + m_video_frames_cv.Wait([this] { return m_video_frames.Size() != 0 || m_is_eof; }); + + auto frame = m_video_frames.Pop(); + if (!frame.has_value()) { + LOG_WARNING(Lib_AvPlayer, "Could get video frame. EOF reached."); + return false; + } + + { + using namespace std::chrono; + auto elapsed_time = + duration_cast(high_resolution_clock::now() - m_start_time).count(); + if (elapsed_time < frame->info.timestamp) { + if (m_stop_cv.WaitFor(milliseconds(frame->info.timestamp - elapsed_time), + [&] { return elapsed_time >= frame->info.timestamp; })) { + return false; + } + } + } + + // return the buffer to the queue + if (m_current_video_frame.has_value()) { + m_video_buffers.Push(std::move(m_current_video_frame.value())); + m_video_buffers_cv.Notify(); + } + m_current_video_frame = std::move(frame->buffer); + video_info = frame->info; + return true; +} + +bool AvPlayerSource::GetAudioData(SceAvPlayerFrameInfo& audio_info) { + if (!IsActive()) { + return false; + } + + m_audio_frames_cv.Wait([this] { return m_audio_frames.Size() != 0 || m_is_eof; }); + + auto frame = m_audio_frames.Pop(); + if (!frame.has_value()) { + LOG_WARNING(Lib_AvPlayer, "Could get audio frame. EOF reached."); + return false; + } + + { + using namespace std::chrono; + auto elapsed_time = + duration_cast(high_resolution_clock::now() - m_start_time).count(); + if (elapsed_time < frame->info.timestamp) { + if (m_stop_cv.WaitFor(milliseconds(frame->info.timestamp - elapsed_time), + [&] { return elapsed_time >= frame->info.timestamp; })) { + return false; + } + } + } + + // return the buffer to the queue + if (m_current_audio_frame.has_value()) { + m_audio_buffers.Push(std::move(m_current_audio_frame.value())); + m_audio_buffers_cv.Notify(); + } + m_current_audio_frame = std::move(frame->buffer); + + audio_info = {}; + audio_info.timestamp = frame->info.timestamp; + audio_info.pData = reinterpret_cast(frame->info.pData); + audio_info.details.audio.size = frame->info.details.audio.size; + audio_info.details.audio.channel_count = frame->info.details.audio.channel_count; + return true; +} + +u64 AvPlayerSource::CurrentTime() { + using namespace std::chrono; + return duration_cast(high_resolution_clock::now() - m_start_time).count(); +} + +bool AvPlayerSource::IsActive() { + return !m_is_eof || m_audio_packets.Size() != 0 || m_video_packets.Size() != 0 || + m_video_frames.Size() != 0 || m_audio_frames.Size() != 0; +} + +void AvPlayerSource::ReleaseAVPacket(AVPacket* packet) { + if (packet != nullptr) { + av_packet_free(&packet); + } +} + +void AvPlayerSource::ReleaseAVFrame(AVFrame* frame) { + if (frame != nullptr) { + av_frame_free(&frame); + } +} + +void AvPlayerSource::ReleaseAVCodecContext(AVCodecContext* context) { + if (context != nullptr) { + avcodec_free_context(&context); + } +} + +void AvPlayerSource::ReleaseSWRContext(SwrContext* context) { + if (context != nullptr) { + swr_free(&context); + } +} + +void AvPlayerSource::ReleaseSWSContext(SwsContext* context) { + if (context != nullptr) { + sws_freeContext(context); + } +} + +void AvPlayerSource::ReleaseAVFormatContext(AVFormatContext* context) { + if (context != nullptr) { + avformat_close_input(&context); + } +} + +void AvPlayerSource::DemuxerThread(std::stop_token stop) { + using namespace std::chrono; + if (!m_audio_stream_index.has_value() && !m_video_stream_index.has_value()) { + LOG_WARNING(Lib_AvPlayer, "Could not start DEMUXER thread. No streams enabled."); + return; + } + LOG_INFO(Lib_AvPlayer, "Demuxer Thread started"); + + while (!stop.stop_requested()) { + if (m_video_packets.Size() > 30 && m_audio_packets.Size() > 8) { + std::this_thread::sleep_for(milliseconds(5)); + continue; + } + AVPacketPtr up_packet(av_packet_alloc(), &ReleaseAVPacket); + const auto res = av_read_frame(m_avformat_context.get(), up_packet.get()); + if (res < 0) { + if (res == AVERROR_EOF) { + if (m_is_looping) { + LOG_INFO(Lib_AvPlayer, "EOF reached in demuxer. Looping the source..."); + avio_seek(m_avformat_context->pb, 0, SEEK_SET); + if (m_video_stream_index.has_value()) { + const auto index = m_video_stream_index.value(); + const auto stream = m_avformat_context->streams[index]; + avformat_seek_file(m_avformat_context.get(), index, 0, 0, stream->duration, + 0); + } + if (m_audio_stream_index.has_value()) { + const auto index = m_audio_stream_index.value(); + const auto stream = m_avformat_context->streams[index]; + avformat_seek_file(m_avformat_context.get(), index, 0, 0, stream->duration, + 0); + } + continue; + } else { + LOG_INFO(Lib_AvPlayer, "EOF reached in demuxer. Exiting."); + break; + } + } else { + LOG_ERROR(Lib_AvPlayer, "Could not read AV frame: error = {}", res); + m_state.OnError(); + return; + } + break; + } + if (up_packet->stream_index == m_video_stream_index) { + m_video_packets.Push(std::move(up_packet)); + m_video_packets_cv.Notify(); + } else if (up_packet->stream_index == m_audio_stream_index) { + m_audio_packets.Push(std::move(up_packet)); + m_audio_packets_cv.Notify(); + } + } + + m_is_eof = true; + + m_video_packets_cv.Notify(); + m_audio_packets_cv.Notify(); + m_video_frames_cv.Notify(); + m_audio_frames_cv.Notify(); + + if (m_video_decoder_thread.joinable()) { + m_video_decoder_thread.join(); + } + if (m_audio_decoder_thread.joinable()) { + m_audio_decoder_thread.join(); + } + m_state.OnEOF(); + + LOG_INFO(Lib_AvPlayer, "Demuxer Thread exited normaly"); +} + +AvPlayerSource::AVFramePtr AvPlayerSource::ConvertVideoFrame(const AVFrame& frame) { + auto nv12_frame = AVFramePtr{av_frame_alloc(), &ReleaseAVFrame}; + nv12_frame->pts = frame.pts; + nv12_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts; + nv12_frame->format = AV_PIX_FMT_NV12; + nv12_frame->width = frame.width; + nv12_frame->height = frame.height; + nv12_frame->sample_aspect_ratio = frame.sample_aspect_ratio; + + av_frame_get_buffer(nv12_frame.get(), 0); + + if (m_sws_context == nullptr) { + m_sws_context = + SWSContextPtr(sws_getContext(frame.width, frame.height, AVPixelFormat(frame.format), + frame.width, frame.height, AV_PIX_FMT_NV12, + SWS_FAST_BILINEAR, nullptr, nullptr, nullptr), + &ReleaseSWSContext); + } + const auto res = sws_scale(m_sws_context.get(), frame.data, frame.linesize, 0, frame.height, + nv12_frame->data, nv12_frame->linesize); + if (res < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not convert to NV12: {}", av_err2str(res)); + return AVFramePtr{nullptr, &ReleaseAVFrame}; + } + return nv12_frame; +} + +Frame AvPlayerSource::PrepareVideoFrame(FrameBuffer buffer, const AVFrame& frame) { + ASSERT(frame.format == AV_PIX_FMT_NV12); + + auto p_buffer = buffer.GetBuffer(); + CopyNV12Data(p_buffer, frame); + + const auto pkt_dts = u64(frame.pkt_dts) * 1000; + const auto stream = m_avformat_context->streams[m_video_stream_index.value()]; + const auto time_base = stream->time_base; + const auto den = time_base.den; + const auto num = time_base.num; + const auto timestamp = (num != 0 && den > 1) ? (pkt_dts * num) / den : pkt_dts; + + return Frame{ + .buffer = std::move(buffer), + .info = + { + .pData = p_buffer, + .timestamp = timestamp, + .details = + { + .video = + { + .width = u32(frame.width), + .height = u32(frame.height), + .aspect_ratio = AVRationalToF32(frame.sample_aspect_ratio), + .pitch = u32(frame.linesize[0]), + .luma_bit_depth = 8, + .chroma_bit_depth = 8, + }, + }, + }, + }; +} + +void AvPlayerSource::VideoDecoderThread(std::stop_token stop) { + using namespace std::chrono; + LOG_INFO(Lib_AvPlayer, "Video Decoder Thread started"); + while ((!m_is_eof || m_video_packets.Size() != 0) && !stop.stop_requested()) { + if (!m_video_packets_cv.Wait(stop, + [this] { return m_video_packets.Size() != 0 || m_is_eof; })) { + continue; + } + const auto packet = m_video_packets.Pop(); + if (!packet.has_value()) { + continue; + } + + auto res = avcodec_send_packet(m_video_codec_context.get(), packet->get()); + if (res < 0 && res != AVERROR(EAGAIN)) { + m_state.OnError(); + LOG_ERROR(Lib_AvPlayer, "Could not send packet to the video codec. Error = {}", + av_err2str(res)); + return; + } + while (res >= 0) { + if (!m_video_buffers_cv.Wait(stop, [this] { return m_video_buffers.Size() != 0; })) { + break; + } + if (m_video_buffers.Size() == 0) { + continue; + } + auto up_frame = AVFramePtr(av_frame_alloc(), &ReleaseAVFrame); + res = avcodec_receive_frame(m_video_codec_context.get(), up_frame.get()); + if (res < 0) { + if (res == AVERROR_EOF) { + LOG_INFO(Lib_AvPlayer, "EOF reached in video decoder"); + return; + } else if (res != AVERROR(EAGAIN)) { + LOG_ERROR(Lib_AvPlayer, + "Could not receive frame from the video codec. Error = {}", + av_err2str(res)); + m_state.OnError(); + return; + } + } else { + auto buffer = m_video_buffers.Pop(); + if (!buffer.has_value()) { + // Video buffers queue was cleared. This means that player was stopped. + break; + } + if (up_frame->format != AV_PIX_FMT_NV12) { + const auto nv12_frame = ConvertVideoFrame(*up_frame); + m_video_frames.Push(PrepareVideoFrame(std::move(buffer.value()), *nv12_frame)); + } else { + m_video_frames.Push(PrepareVideoFrame(std::move(buffer.value()), *up_frame)); + } + m_video_frames_cv.Notify(); + } + } + } + + LOG_INFO(Lib_AvPlayer, "Video Decoder Thread exited normaly"); +} + +AvPlayerSource::AVFramePtr AvPlayerSource::ConvertAudioFrame(const AVFrame& frame) { + auto pcm16_frame = AVFramePtr{av_frame_alloc(), &ReleaseAVFrame}; + pcm16_frame->pts = frame.pts; + pcm16_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts; + pcm16_frame->format = AV_SAMPLE_FMT_S16; + pcm16_frame->ch_layout = frame.ch_layout; + pcm16_frame->sample_rate = frame.sample_rate; + + if (m_swr_context == nullptr) { + SwrContext* swr_context = nullptr; + AVChannelLayout in_ch_layout = frame.ch_layout; + AVChannelLayout out_ch_layout = frame.ch_layout; + swr_alloc_set_opts2(&swr_context, &out_ch_layout, AV_SAMPLE_FMT_S16, frame.sample_rate, + &in_ch_layout, AVSampleFormat(frame.format), frame.sample_rate, 0, + nullptr); + m_swr_context = SWRContextPtr(swr_context, &ReleaseSWRContext); + swr_init(m_swr_context.get()); + } + const auto res = swr_convert_frame(m_swr_context.get(), pcm16_frame.get(), &frame); + if (res < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not convert to NV12: {}", av_err2str(res)); + return AVFramePtr{nullptr, &ReleaseAVFrame}; + } + return pcm16_frame; +} + +Frame AvPlayerSource::PrepareAudioFrame(FrameBuffer buffer, const AVFrame& frame) { + ASSERT(frame.format == AV_SAMPLE_FMT_S16); + ASSERT(frame.nb_samples <= 1024); + + auto p_buffer = buffer.GetBuffer(); + const auto size = frame.ch_layout.nb_channels * frame.nb_samples * sizeof(u16); + std::memcpy(p_buffer, frame.data[0], size); + + const auto pkt_dts = u64(frame.pkt_dts) * 1000; + const auto stream = m_avformat_context->streams[m_audio_stream_index.value()]; + const auto time_base = stream->time_base; + const auto den = time_base.den; + const auto num = time_base.num; + const auto timestamp = (num != 0 && den > 1) ? (pkt_dts * num) / den : pkt_dts; + + return Frame{ + .buffer = std::move(buffer), + .info = + { + .pData = p_buffer, + .timestamp = timestamp, + .details = + { + .audio = + { + .channel_count = u16(frame.ch_layout.nb_channels), + .size = u32(size), + }, + }, + }, + }; +} + +void AvPlayerSource::AudioDecoderThread(std::stop_token stop) { + using namespace std::chrono; + LOG_INFO(Lib_AvPlayer, "Audio Decoder Thread started"); + while ((!m_is_eof || m_audio_packets.Size() != 0) && !stop.stop_requested()) { + if (!m_audio_packets_cv.Wait(stop, + [this] { return m_audio_packets.Size() != 0 || m_is_eof; })) { + continue; + } + const auto packet = m_audio_packets.Pop(); + if (!packet.has_value()) { + continue; + } + auto res = avcodec_send_packet(m_audio_codec_context.get(), packet->get()); + if (res < 0 && res != AVERROR(EAGAIN)) { + m_state.OnError(); + LOG_ERROR(Lib_AvPlayer, "Could not send packet to the audio codec. Error = {}", + av_err2str(res)); + return; + } + while (res >= 0) { + if (!m_audio_buffers_cv.Wait(stop, [this] { return m_audio_buffers.Size() != 0; })) { + break; + } + if (m_audio_buffers.Size() == 0) { + continue; + } + + auto up_frame = AVFramePtr(av_frame_alloc(), &ReleaseAVFrame); + res = avcodec_receive_frame(m_audio_codec_context.get(), up_frame.get()); + if (res < 0) { + if (res == AVERROR_EOF) { + LOG_INFO(Lib_AvPlayer, "EOF reached in audio decoder"); + return; + } else if (res != AVERROR(EAGAIN)) { + m_state.OnError(); + LOG_ERROR(Lib_AvPlayer, + "Could not receive frame from the audio codec. Error = {}", + av_err2str(res)); + return; + } + } else { + auto buffer = m_audio_buffers.Pop(); + if (!buffer.has_value()) { + // Audio buffers queue was cleared. This means that player was stopped. + break; + } + if (up_frame->format != AV_SAMPLE_FMT_S16) { + const auto pcm16_frame = ConvertAudioFrame(*up_frame); + m_audio_frames.Push(PrepareAudioFrame(std::move(buffer.value()), *pcm16_frame)); + } else { + m_audio_frames.Push(PrepareAudioFrame(std::move(buffer.value()), *up_frame)); + } + m_audio_frames_cv.Notify(); + } + } + } + + LOG_INFO(Lib_AvPlayer, "Audio Decoder Thread exited normaly"); +} + +bool AvPlayerSource::HasRunningThreads() const { + return m_demuxer_thread.joinable() || m_video_decoder_thread.joinable() || + m_audio_decoder_thread.joinable(); +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_source.h b/src/core/libraries/avplayer/avplayer_source.h new file mode 100644 index 000000000..7144e7ee4 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_source.h @@ -0,0 +1,219 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" +#include "avplayer_common.h" +#include "avplayer_data_streamer.h" + +#include "common/polyfill_thread.h" +#include "common/types.h" +#include "core/libraries/kernel/thread_management.h" + +#include +#include +#include +#include +#include +#include + +struct AVCodecContext; +struct AVFormatContext; +struct AVFrame; +struct AVIOContext; +struct AVPacket; +struct SwrContext; +struct SwsContext; + +namespace Libraries::AvPlayer { + +class AvPlayerStateCallback { +public: + virtual ~AvPlayerStateCallback() = default; + + virtual void OnWarning(u32 id) = 0; + virtual void OnError() = 0; + virtual void OnEOF() = 0; +}; + +class FrameBuffer { +public: + FrameBuffer(const SceAvPlayerMemAllocator& memory_replacement, u32 align, u32 size) noexcept + : m_memory_replacement(memory_replacement), + m_data(Allocate(memory_replacement, align, size)) { + ASSERT_MSG(m_data, "Could not allocated frame buffer."); + } + + ~FrameBuffer() { + if (m_data != nullptr) { + Deallocate(m_memory_replacement, m_data); + m_data = {}; + } + } + + FrameBuffer(const FrameBuffer&) noexcept = delete; + FrameBuffer& operator=(const FrameBuffer&) noexcept = delete; + + FrameBuffer(FrameBuffer&& r) noexcept + : m_memory_replacement(r.m_memory_replacement), m_data(r.m_data) { + r.m_data = nullptr; + }; + + FrameBuffer& operator=(FrameBuffer&& r) noexcept { + std::swap(m_data, r.m_data); + return *this; + } + + u8* GetBuffer() const noexcept { + return m_data; + } + +private: + static u8* Allocate(const SceAvPlayerMemAllocator& memory_replacement, u32 align, u32 size) { + return reinterpret_cast( + memory_replacement.allocate(memory_replacement.object_ptr, align, size)); + } + + static void Deallocate(const SceAvPlayerMemAllocator& memory_replacement, void* ptr) { + memory_replacement.deallocate(memory_replacement.object_ptr, ptr); + } + + const SceAvPlayerMemAllocator& m_memory_replacement; + u8* m_data = nullptr; +}; + +struct Frame { + FrameBuffer buffer; + SceAvPlayerFrameInfoEx info; +}; + +class EventCV { +public: + template + void Wait(Pred pred) { + std::unique_lock lock(m_mutex); + m_cv.wait(lock, std::move(pred)); + } + + template + bool Wait(std::stop_token stop, Pred pred) { + std::unique_lock lock(m_mutex); + return m_cv.wait(lock, std::move(stop), std::move(pred)); + } + + template + bool WaitFor(std::chrono::duration timeout, Pred pred) { + std::unique_lock lock(m_mutex); + return m_cv.wait_for(lock, timeout, std::move(pred)); + } + + void Notify() { + std::unique_lock lock(m_mutex); + m_cv.notify_all(); + } + +private: + std::mutex m_mutex{}; + std::condition_variable_any m_cv{}; +}; + +class AvPlayerSource { +public: + AvPlayerSource(AvPlayerStateCallback& state, std::string_view path, + const SceAvPlayerInitData& init_data, SceAvPlayerSourceType source_type); + ~AvPlayerSource(); + + bool FindStreamInfo(); + s32 GetStreamCount(); + s32 GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info); + bool EnableStream(u32 stream_index); + void SetLooping(bool is_looping); + std::optional HasFrames(u32 num_frames); + s32 Start(); + bool Stop(); + bool GetAudioData(SceAvPlayerFrameInfo& audio_info); + bool GetVideoData(SceAvPlayerFrameInfo& video_info); + bool GetVideoData(SceAvPlayerFrameInfoEx& video_info); + u64 CurrentTime(); + bool IsActive(); + +private: + using ScePthread = Kernel::ScePthread; + + static void ReleaseAVPacket(AVPacket* packet); + static void ReleaseAVFrame(AVFrame* frame); + static void ReleaseAVCodecContext(AVCodecContext* context); + static void ReleaseSWRContext(SwrContext* context); + static void ReleaseSWSContext(SwsContext* context); + static void ReleaseAVFormatContext(AVFormatContext* context); + + using AVPacketPtr = std::unique_ptr; + using AVFramePtr = std::unique_ptr; + using AVCodecContextPtr = std::unique_ptr; + using SWRContextPtr = std::unique_ptr; + using SWSContextPtr = std::unique_ptr; + using AVFormatContextPtr = std::unique_ptr; + + void DemuxerThread(std::stop_token stop); + void VideoDecoderThread(std::stop_token stop); + void AudioDecoderThread(std::stop_token stop); + + bool HasRunningThreads() const; + + AVFramePtr ConvertAudioFrame(const AVFrame& frame); + AVFramePtr ConvertVideoFrame(const AVFrame& frame); + + Frame PrepareAudioFrame(FrameBuffer buffer, const AVFrame& frame); + Frame PrepareVideoFrame(FrameBuffer buffer, const AVFrame& frame); + + AvPlayerStateCallback& m_state; + + SceAvPlayerMemAllocator m_memory_replacement{}; + u32 m_num_output_video_framebuffers{}; + + std::atomic_bool m_is_looping = false; + std::atomic_bool m_is_eof = false; + + std::unique_ptr m_up_data_streamer; + + AvPlayerQueue m_audio_buffers; + AvPlayerQueue m_video_buffers; + + AvPlayerQueue m_audio_packets; + AvPlayerQueue m_video_packets; + + AvPlayerQueue m_audio_frames; + AvPlayerQueue m_video_frames; + + std::optional m_current_video_frame; + std::optional m_current_audio_frame; + + std::optional m_video_stream_index{}; + std::optional m_audio_stream_index{}; + + EventCV m_audio_packets_cv{}; + EventCV m_audio_frames_cv{}; + EventCV m_audio_buffers_cv{}; + + EventCV m_video_packets_cv{}; + EventCV m_video_frames_cv{}; + EventCV m_video_buffers_cv{}; + + EventCV m_stop_cv{}; + + std::mutex m_state_mutex{}; + std::jthread m_demuxer_thread{}; + std::jthread m_video_decoder_thread{}; + std::jthread m_audio_decoder_thread{}; + + AVFormatContextPtr m_avformat_context{nullptr, &ReleaseAVFormatContext}; + AVCodecContextPtr m_video_codec_context{nullptr, &ReleaseAVCodecContext}; + AVCodecContextPtr m_audio_codec_context{nullptr, &ReleaseAVCodecContext}; + SWRContextPtr m_swr_context{nullptr, &ReleaseSWRContext}; + SWSContextPtr m_sws_context{nullptr, &ReleaseSWSContext}; + + std::chrono::high_resolution_clock::time_point m_start_time{}; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_state.cpp b/src/core/libraries/avplayer/avplayer_state.cpp new file mode 100644 index 000000000..884cd9408 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_state.cpp @@ -0,0 +1,493 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer_file_streamer.h" +#include "avplayer_source.h" +#include "avplayer_state.h" + +#include "core/libraries/error_codes.h" +#include "core/libraries/kernel/time_management.h" + +#include + +namespace Libraries::AvPlayer { + +using namespace Kernel; + +void PS4_SYSV_ABI AvPlayerState::AutoPlayEventCallback(void* opaque, s32 event_id, s32 source_id, + void* event_data) { + auto const self = reinterpret_cast(opaque); + + if (event_id == SCE_AVPLAYER_STATE_READY) { + s32 video_stream_index = -1; + s32 audio_stream_index = -1; + s32 timedtext_stream_index = -1; + const s32 stream_count = self->GetStreamCount(); + if (AVPLAYER_IS_ERROR(stream_count)) { + return; + } + if (stream_count == 0) { + self->Stop(); + return; + } + for (u32 stream_index = 0; stream_index < stream_count; ++stream_index) { + SceAvPlayerStreamInfo info{}; + self->GetStreamInfo(stream_index, info); + + const std::string_view default_language( + reinterpret_cast(self->m_default_language)); + switch (info.type) { + case SCE_AVPLAYER_VIDEO: + if (video_stream_index == -1) { + video_stream_index = stream_index; + } + if (!default_language.empty() && + default_language == reinterpret_cast(info.details.video.language_code)) { + video_stream_index = stream_index; + } + break; + case SCE_AVPLAYER_AUDIO: + if (audio_stream_index == -1) { + audio_stream_index = stream_index; + } + if (!default_language.empty() && + default_language == reinterpret_cast(info.details.video.language_code)) { + audio_stream_index = stream_index; + } + break; + case SCE_AVPLAYER_TIMEDTEXT: + if (default_language.empty()) { + timedtext_stream_index = stream_index; + break; + } + if (default_language == reinterpret_cast(info.details.video.language_code)) { + timedtext_stream_index = stream_index; + } + break; + } + } + + if (video_stream_index != -1) { + self->EnableStream(video_stream_index); + } + if (audio_stream_index != -1) { + self->EnableStream(audio_stream_index); + } + if (timedtext_stream_index != -1) { + self->EnableStream(timedtext_stream_index); + } + self->Start(); + return; + } + + // Pass other events to the game + const auto callback = self->m_event_replacement.event_callback; + const auto ptr = self->m_event_replacement.object_ptr; + if (callback != nullptr) { + callback(ptr, event_id, 0, event_data); + } +} + +// Called inside GAME thread +AvPlayerState::AvPlayerState(const SceAvPlayerInitData& init_data) + : m_init_data(init_data), m_event_replacement(init_data.event_replacement) { + if (m_event_replacement.event_callback == nullptr || init_data.auto_start) { + m_auto_start = true; + m_init_data.event_replacement.event_callback = &AvPlayerState::AutoPlayEventCallback; + m_init_data.event_replacement.object_ptr = this; + } + if (init_data.default_language != nullptr) { + std::memcpy(m_default_language, init_data.default_language, sizeof(m_default_language)); + } + SetState(AvState::Initial); + StartControllerThread(); +} + +AvPlayerState::~AvPlayerState() { + { + std::unique_lock lock(m_source_mutex); + m_up_source.reset(); + } + if (m_controller_thread.joinable()) { + m_controller_thread.request_stop(); + m_controller_thread.join(); + } + m_event_queue.Clear(); +} + +// Called inside GAME thread +s32 AvPlayerState::AddSource(std::string_view path, SceAvPlayerSourceType source_type) { + if (path.empty()) { + LOG_ERROR(Lib_AvPlayer, "File path is empty."); + return -1; + } + + { + std::unique_lock lock(m_source_mutex); + if (m_up_source != nullptr) { + LOG_ERROR(Lib_AvPlayer, "Only one source is supported."); + return -1; + } + + m_up_source = std::make_unique(*this, path, m_init_data, source_type); + } + AddSourceEvent(); + return 0; +} + +// Called inside GAME thread +s32 AvPlayerState::GetStreamCount() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream count. No source."); + return -1; + } + return m_up_source->GetStreamCount(); +} + +// Called inside GAME thread +s32 AvPlayerState::GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream {} info. No source.", stream_index); + return -1; + } + return m_up_source->GetStreamInfo(stream_index, info); +} + +// Called inside GAME thread +s32 AvPlayerState::Start() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr || m_up_source->Start() < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not start playback."); + return -1; + } + SetState(AvState::Play); + OnPlaybackStateChanged(AvState::Play); + return 0; +} + +void AvPlayerState::AvControllerThread(std::stop_token stop) { + using std::chrono::milliseconds; + + while (!stop.stop_requested()) { + if (m_event_queue.Size() != 0) { + ProcessEvent(); + continue; + } + std::this_thread::sleep_for(milliseconds(5)); + UpdateBufferingState(); + } +} + +// Called inside GAME thread +void AvPlayerState::AddSourceEvent() { + SetState(AvState::AddingSource); + m_event_queue.Push(AvPlayerEvent{ + .event = AvEventType::AddSource, + }); +} + +void AvPlayerState::WarningEvent(s32 id) { + m_event_queue.Push(AvPlayerEvent{ + .event = AvEventType::WarningId, + .payload = + { + .error = id, + }, + }); +} + +// Called inside GAME thread +void AvPlayerState::StartControllerThread() { + m_controller_thread = + std::jthread([this](std::stop_token stop) { this->AvControllerThread(stop); }); +} + +// Called inside GAME thread +bool AvPlayerState::EnableStream(u32 stream_index) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_up_source->EnableStream(stream_index); +} + +// Called inside GAME thread +bool AvPlayerState::Stop() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr || m_current_state == AvState::Stop) { + return false; + } + if (!SetState(AvState::Stop)) { + return false; + } + OnPlaybackStateChanged(AvState::Stop); + return m_up_source->Stop(); +} + +bool AvPlayerState::GetVideoData(SceAvPlayerFrameInfo& video_info) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_up_source->GetVideoData(video_info); +} + +bool AvPlayerState::GetVideoData(SceAvPlayerFrameInfoEx& video_info) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_up_source->GetVideoData(video_info); +} + +bool AvPlayerState::GetAudioData(SceAvPlayerFrameInfo& audio_info) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_up_source->GetAudioData(audio_info); +} + +bool AvPlayerState::IsActive() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_current_state != AvState::Stop && m_current_state != AvState::Error && + m_current_state != AvState::EndOfFile && m_up_source->IsActive(); +} + +u64 AvPlayerState::CurrentTime() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get current time. No source."); + return 0; + } + return m_up_source->CurrentTime(); +} + +bool AvPlayerState::SetLooping(bool is_looping) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not set loop flag. No source."); + return false; + } + m_up_source->SetLooping(is_looping); + return true; +} + +// May be called from different threads +void AvPlayerState::OnWarning(u32 id) { + // Forward to CONTROLLER thread + WarningEvent(id); +} + +void AvPlayerState::OnError() { + SetState(AvState::Error); + OnPlaybackStateChanged(AvState::Error); +} + +void AvPlayerState::OnEOF() { + SetState(AvState::EndOfFile); +} + +// Called inside CONTROLLER thread +void AvPlayerState::OnPlaybackStateChanged(AvState state) { + switch (state) { + case AvState::Ready: { + EmitEvent(SCE_AVPLAYER_STATE_READY); + break; + } + case AvState::Play: { + EmitEvent(SCE_AVPLAYER_STATE_PLAY); + break; + } + case AvState::Stop: { + EmitEvent(SCE_AVPLAYER_STATE_STOP); + break; + } + case AvState::Pause: { + EmitEvent(SCE_AVPLAYER_STATE_PAUSE); + break; + } + case AvState::Buffering: { + EmitEvent(SCE_AVPLAYER_STATE_BUFFERING); + break; + } + default: + break; + } +} + +// Called inside CONTROLLER and GAME threads +bool AvPlayerState::SetState(AvState state) { + std::lock_guard guard(m_state_machine_mutex); + + if (!IsStateTransitionValid(state)) { + LOG_ERROR(Lib_AvPlayer, "Invalid state transition: {} -> {}", + magic_enum::enum_name(m_current_state.load()), magic_enum::enum_name(state)); + return false; + } + m_previous_state.store(m_current_state); + m_current_state.store(state); + return true; +} + +// Called inside CONTROLLER thread +std::optional AvPlayerState::OnBufferingCheckEvent(u32 num_frames) { + std::shared_lock lock(m_source_mutex); + if (!m_up_source) { + return std::nullopt; + } + return m_up_source->HasFrames(num_frames); +} + +// Called inside CONTROLLER thread +void AvPlayerState::EmitEvent(SceAvPlayerEvents event_id, void* event_data) { + LOG_INFO(Lib_AvPlayer, "Sending event to the game: id = {}", magic_enum::enum_name(event_id)); + const auto callback = m_init_data.event_replacement.event_callback; + if (callback) { + const auto ptr = m_init_data.event_replacement.object_ptr; + callback(ptr, event_id, 0, event_data); + } +} + +// Called inside CONTROLLER thread +void AvPlayerState::ProcessEvent() { + if (m_current_state == AvState::Jump) { + return; + } + + std::lock_guard guard(m_event_handler_mutex); + + auto event = m_event_queue.Pop(); + if (!event.has_value()) { + return; + } + switch (event->event) { + case AvEventType::WarningId: { + OnWarning(event->payload.error); + break; + } + case AvEventType::RevertState: { + SetState(m_previous_state.load()); + break; + } + case AvEventType::AddSource: { + std::shared_lock lock(m_source_mutex); + if (m_up_source->FindStreamInfo()) { + SetState(AvState::Ready); + OnPlaybackStateChanged(AvState::Ready); + } else { + OnWarning(ORBIS_AVPLAYER_ERROR_NOT_SUPPORTED); + SetState(AvState::Error); + } + break; + } + case AvEventType::Error: { + OnWarning(event->payload.error); + SetState(AvState::Error); + break; + } + default: + break; + } +} + +// Called inside CONTROLLER thread +void AvPlayerState::UpdateBufferingState() { + if (m_current_state == AvState::Buffering) { + const auto has_frames = OnBufferingCheckEvent(10); + if (!has_frames.has_value()) { + return; + } + if (has_frames.value()) { + const auto state = + m_previous_state >= AvState::C0x0B ? m_previous_state.load() : AvState::Play; + SetState(state); + OnPlaybackStateChanged(state); + } + } else if (m_current_state == AvState::Play) { + const auto has_frames = OnBufferingCheckEvent(0); + if (!has_frames.has_value()) { + return; + } + if (!has_frames.value()) { + SetState(AvState::Buffering); + OnPlaybackStateChanged(AvState::Buffering); + } + } +} + +bool AvPlayerState::IsStateTransitionValid(AvState state) { + switch (state) { + case AvState::Play: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + // case AvState::C0x08: + case AvState::Error: + return false; + default: + return true; + } + } + case AvState::Pause: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + // case AvState::C0x08: + case AvState::Starting: + case AvState::Error: + return false; + default: + return true; + } + } + case AvState::Jump: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + // case AvState::C0x08: + case AvState::TrickMode: + case AvState::Starting: + case AvState::Error: + return false; + default: + return true; + } + } + case AvState::TrickMode: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + // case AvState::C0x08: + case AvState::Jump: + case AvState::Starting: + case AvState::Error: + return false; + default: + return true; + } + } + case AvState::Buffering: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + case AvState::Pause: + // case AvState::C0x08: + case AvState::Starting: + case AvState::Error: + return false; + default: + return true; + } + } + default: + return true; + } +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_state.h b/src/core/libraries/avplayer/avplayer_state.h new file mode 100644 index 000000000..ff80b6cea --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_state.h @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" +#include "avplayer_data_streamer.h" +#include "avplayer_source.h" + +#include "common/polyfill_thread.h" +#include "core/libraries/kernel/thread_management.h" + +#include +#include +#include + +namespace Libraries::AvPlayer { + +class Stream; +class AvDecoder; + +class AvPlayerState : public AvPlayerStateCallback { +public: + AvPlayerState(const SceAvPlayerInitData& init_data); + ~AvPlayerState(); + + s32 AddSource(std::string_view filename, SceAvPlayerSourceType source_type); + s32 GetStreamCount(); + s32 GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info); + bool EnableStream(u32 stream_index); + s32 Start(); + bool Stop(); + bool GetAudioData(SceAvPlayerFrameInfo& audio_info); + bool GetVideoData(SceAvPlayerFrameInfo& video_info); + bool GetVideoData(SceAvPlayerFrameInfoEx& video_info); + bool IsActive(); + u64 CurrentTime(); + bool SetLooping(bool is_looping); + +private: + using ScePthreadMutex = Kernel::ScePthreadMutex; + using ScePthread = Kernel::ScePthread; + + // Event Replacement + static void PS4_SYSV_ABI AutoPlayEventCallback(void* handle, s32 event_id, s32 source_id, + void* event_data); + + void OnWarning(u32 id) override; + void OnError() override; + void OnEOF() override; + + void OnPlaybackStateChanged(AvState state); + std::optional OnBufferingCheckEvent(u32 num_frames); + + void EmitEvent(SceAvPlayerEvents event_id, void* event_data = nullptr); + bool SetState(AvState state); + + void AvControllerThread(std::stop_token stop); + + void AddSourceEvent(); + void WarningEvent(s32 id); + + void StartControllerThread(); + void ProcessEvent(); + void UpdateBufferingState(); + bool IsStateTransitionValid(AvState state); + + std::unique_ptr m_up_source; + + SceAvPlayerInitData m_init_data{}; + SceAvPlayerEventReplacement m_event_replacement{}; + bool m_auto_start{}; + u8 m_default_language[4]{}; + + std::atomic m_current_state; + std::atomic m_previous_state; + u32 m_thread_priority; + u32 m_thread_affinity; + std::atomic_uint32_t m_some_event_result{}; + + std::shared_mutex m_source_mutex{}; + std::mutex m_state_machine_mutex{}; + std::mutex m_event_handler_mutex{}; + std::jthread m_controller_thread{}; + AvPlayerQueue m_event_queue{}; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/error_codes.h b/src/core/libraries/error_codes.h index 5eabaaf67..123edcee8 100644 --- a/src/core/libraries/error_codes.h +++ b/src/core/libraries/error_codes.h @@ -457,5 +457,18 @@ constexpr int ORBIS_NP_TROPHY_ERROR_HANDLE_EXCEEDS_MAX = 0x80551624; constexpr int ORBIS_NP_TROPHY_ERROR_CONTEXT_ALREADY_EXISTS = 0x80551613; constexpr int ORBIS_NP_TROPHY_ERROR_CONTEXT_EXCEEDS_MAX = 0x80551622; +// AvPlayer library +constexpr int ORBIS_AVPLAYER_ERROR_INVALID_PARAMS = 0x806A0001; +constexpr int ORBIS_AVPLAYER_ERROR_OPERATION_FAILED = 0x806A0002; +constexpr int ORBIS_AVPLAYER_ERROR_NO_MEMORY = 0x806A0003; +constexpr int ORBIS_AVPLAYER_ERROR_NOT_SUPPORTED = 0x806A0004; +constexpr int ORBIS_AVPLAYER_ERROR_WAR_FILE_NONINTERLEAVED = 0x806A00A0; +constexpr int ORBIS_AVPLAYER_ERROR_WAR_LOOPING_BACK = 0x806A00A1; +constexpr int ORBIS_AVPLAYER_ERROR_WAR_JUMP_COMPLETE = 0x806A00A3; +constexpr int ORBIS_AVPLAYER_ERROR_INFO_MARLIN_ENCRY = 0x806A00B0; +constexpr int ORBIS_AVPLAYER_ERROR_INFO_PLAYREADY_ENCRY = 0x806A00B4; +constexpr int ORBIS_AVPLAYER_ERROR_INFO_AES_ENCRY = 0x806A00B5; +constexpr int ORBIS_AVPLAYER_ERROR_INFO_OTHER_ENCRY = 0x806A00BF; + // AppContent library -constexpr int ORBIS_APP_CONTENT_ERROR_PARAMETER = 0x80D90002; +constexpr int ORBIS_APP_CONTENT_ERROR_PARAMETER = 0x80D90002; \ No newline at end of file diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 650252f95..c2ee6d592 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -956,9 +956,9 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { - LOG_DEBUG(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { + LOG_TRACE(Lib_GnmDriver, "called"); + return Config::isNeoMode() ? 911'000'000 : 800'000'000; } int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() { @@ -1706,8 +1706,18 @@ int PS4_SYSV_ABI sceGnmSetupMipStatsReport() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetVgtControl() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one, + u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size != 3 || (prim_group_sz_minus_one >= 0x100) || + ((wd_switch_only_on_eop_mode | partial_vs_wave_mode) >= 2)) { + return -1; + } + + const u32 reg_value = + ((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu); + PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 8100b1164..84872297e 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -85,7 +85,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp(); int PS4_SYSV_ABI sceGnmGetEqEventType(); int PS4_SYSV_ABI sceGnmGetEqTimeStamp(); int PS4_SYSV_ABI sceGnmGetGpuBlockStatus(); -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); +u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); int PS4_SYSV_ABI sceGnmGetGpuInfoStatus(); int PS4_SYSV_ABI sceGnmGetLastWaitedAddress(); int PS4_SYSV_ABI sceGnmGetNumTcaUnits(); @@ -161,7 +161,8 @@ int PS4_SYSV_ABI sceGnmSetResourceUserData(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCountersForUnitInstance(); int PS4_SYSV_ABI sceGnmSetupMipStatsReport(); -int PS4_SYSV_ABI sceGnmSetVgtControl(); +s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one, + u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode); s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier); int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier(); int PS4_SYSV_ABI sceGnmSetWaveLimitMultipliers(); diff --git a/src/core/libraries/kernel/event_queues.cpp b/src/core/libraries/kernel/event_queues.cpp index bb3d8ba70..540c20c43 100644 --- a/src/core/libraries/kernel/event_queues.cpp +++ b/src/core/libraries/kernel/event_queues.cpp @@ -208,4 +208,7 @@ int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id) { return ORBIS_OK; } +s16 PS4_SYSV_ABI sceKernelGetEventFilter(const SceKernelEvent* ev) { + return ev->filter; +} } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/event_queues.h b/src/core/libraries/kernel/event_queues.h index 0f9c42a9d..d400ff187 100644 --- a/src/core/libraries/kernel/event_queues.h +++ b/src/core/libraries/kernel/event_queues.h @@ -21,5 +21,6 @@ int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id); int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id); int PS4_SYSV_ABI sceKernelAddUserEventEdge(SceKernelEqueue eq, int id); s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* ts, void* udata); +s16 PS4_SYSV_ABI sceKernelGetEventFilter(const SceKernelEvent* ev); } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index f83863479..990b11d69 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -112,6 +112,15 @@ int PS4_SYSV_ABI posix_open(const char* path, int flags, /* SceKernelMode*/ u16 return result; } +int PS4_SYSV_ABI open(const char* filename, const char* mode) { + LOG_INFO(Kernel_Fs, "open redirect to sceKernelOpen"); + int result = sceKernelOpen(filename, ORBIS_KERNEL_O_RDWR, 0); + if (result < 0) { + return -1; + } + return result; +} + int PS4_SYSV_ABI sceKernelClose(int d) { if (d < 3) { // d probably hold an error code return ORBIS_KERNEL_ERROR_EPERM; @@ -498,6 +507,7 @@ void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) { std::srand(std::time(nullptr)); LIB_FUNCTION("1G3lF1Gg1k8", "libkernel", 1, "libkernel", 1, 1, sceKernelOpen); LIB_FUNCTION("wuCroIGjt2g", "libScePosix", 1, "libkernel", 1, 1, posix_open); + LIB_FUNCTION("wuCroIGjt2g", "libkernel", 1, "libkernel", 1, 1, open); LIB_FUNCTION("UK2Tl2DWUns", "libkernel", 1, "libkernel", 1, 1, sceKernelClose); LIB_FUNCTION("bY-PO6JhzhQ", "libkernel", 1, "libkernel", 1, 1, posix_close); LIB_FUNCTION("bY-PO6JhzhQ", "libScePosix", 1, "libkernel", 1, 1, posix_close); diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index e2625819b..2634e25c8 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -125,6 +125,37 @@ int ErrnoToSceKernelError(int e) { return res > SCE_KERNEL_ERROR_ESTOP ? SCE_KERNEL_ERROR_UNKNOWN : res; } +void SetPosixErrno(int e) { + // Some error numbers are different between supported OSes or the PS4 + switch (e) { + case EPERM: + g_posix_errno = POSIX_EPERM; + break; + case EAGAIN: + g_posix_errno = POSIX_EAGAIN; + break; + case ENOMEM: + g_posix_errno = POSIX_ENOMEM; + break; + case EINVAL: + g_posix_errno = POSIX_EINVAL; + break; + case ENOSPC: + g_posix_errno = POSIX_ENOSPC; + break; + case ERANGE: + g_posix_errno = POSIX_ERANGE; + break; + case EDEADLK: + g_posix_errno = POSIX_EDEADLK; + break; + case ETIMEDOUT: + g_posix_errno = POSIX_ETIMEDOUT; + break; + default: + g_posix_errno = e; + } +} int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, size_t offset, void** res) { LOG_INFO(Kernel_Vmm, "called addr = {}, len = {}, prot = {}, flags = {}, fd = {}, offset = {}", @@ -360,7 +391,6 @@ int PS4_SYSV_ABI posix_connect() { } int PS4_SYSV_ABI _sigprocmask() { - LOG_DEBUG(Lib_Kernel, "STUBBED"); return ORBIS_OK; } @@ -424,6 +454,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent); LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent); LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId); + LIB_FUNCTION("23CPPI1tyBY", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventFilter); // misc LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode); diff --git a/src/core/libraries/kernel/libkernel.h b/src/core/libraries/kernel/libkernel.h index 5b22dea4b..5b7f1e722 100644 --- a/src/core/libraries/kernel/libkernel.h +++ b/src/core/libraries/kernel/libkernel.h @@ -14,6 +14,7 @@ namespace Libraries::Kernel { void ErrSceToPosix(int result); int ErrnoToSceKernelError(int e); +void SetPosixErrno(int e); struct OrbisTimesec { time_t t; diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index 94762c4a0..826d47973 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -74,13 +74,22 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE size_t* sizeOut) { LOG_WARNING(Kernel_Vmm, "called searchStart = {:#x}, searchEnd = {:#x}, alignment = {:#x}", searchStart, searchEnd, alignment); + + if (searchEnd <= searchStart) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + if (searchEnd > SCE_KERNEL_MAIN_DMEM_SIZE) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + auto* memory = Core::Memory::Instance(); PAddr physAddr; - s32 size = memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut); + s32 result = + memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut); *physAddrOut = static_cast(physAddr); - return size; + return result; } s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, @@ -212,9 +221,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* out_size) { return ORBIS_OK; } -void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) { +void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]) { auto* linker = Common::Singleton::Instance(); - linker->SetHeapApiFunc(func); + linker->SetHeapAPI(func); } int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut, diff --git a/src/core/libraries/kernel/memory_management.h b/src/core/libraries/kernel/memory_management.h index 6735ead71..378449cc5 100644 --- a/src/core/libraries/kernel/memory_management.h +++ b/src/core/libraries/kernel/memory_management.h @@ -98,7 +98,7 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, size_t infoSize); s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* sizeOut); -void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func); +void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]); int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut, void** directMemoryStartOut, void** directMemoryEndOut); diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 85e2d0e66..689532693 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -11,6 +11,7 @@ #include "common/singleton.h" #include "common/thread.h" #include "core/libraries/error_codes.h" +#include "core/libraries/kernel/libkernel.h" #include "core/libraries/kernel/thread_management.h" #include "core/libraries/kernel/threads/threads.h" #include "core/libraries/libs.h" @@ -421,13 +422,21 @@ ScePthreadMutex* createMutex(ScePthreadMutex* addr) { return addr; } -int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* attr, +int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* mutex_attr, const char* name) { + const ScePthreadMutexattr* attr; + if (mutex == nullptr) { return SCE_KERNEL_ERROR_EINVAL; } - if (attr == nullptr) { + if (mutex_attr == nullptr) { attr = g_pthread_cxt->getDefaultMutexattr(); + } else { + if (*mutex_attr == nullptr) { + attr = g_pthread_cxt->getDefaultMutexattr(); + } else { + attr = mutex_attr; + } } *mutex = new PthreadMutexInternal{}; @@ -1086,6 +1095,19 @@ int PS4_SYSV_ABI scePthreadAttrGetstack(ScePthreadAttr* attr, void** addr, size_ return SCE_KERNEL_ERROR_EINVAL; } +int PS4_SYSV_ABI scePthreadAttrSetstack(ScePthreadAttr* attr, void* addr, size_t size) { + if (attr == nullptr || *attr == nullptr || addr == nullptr || size < 0x4000) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + int result = pthread_attr_setstack(&(*attr)->pth_attr, addr, size); + LOG_INFO(Kernel_Pthread, "scePthreadAttrSetstack: result = {}", result); + + if (result == 0) { + return ORBIS_OK; + } + return ORBIS_KERNEL_ERROR_EINVAL; +} + int PS4_SYSV_ABI scePthreadJoin(ScePthread thread, void** res) { int result = pthread_join(thread->pth, res); LOG_INFO(Kernel_Pthread, "scePthreadJoin result = {}", result); @@ -1353,15 +1375,27 @@ int PS4_SYSV_ABI posix_pthread_detach(ScePthread thread) { } int PS4_SYSV_ABI posix_sem_init(sem_t* sem, int pshared, unsigned int value) { - return sem_init(sem, pshared, value); + int result = sem_init(sem, pshared, value); + if (result == -1) { + SetPosixErrno(errno); + } + return result; } int PS4_SYSV_ABI posix_sem_wait(sem_t* sem) { - return sem_wait(sem); + int result = sem_wait(sem); + if (result == -1) { + SetPosixErrno(errno); + } + return result; } int PS4_SYSV_ABI posix_sem_trywait(sem_t* sem) { - return sem_trywait(sem); + int result = sem_trywait(sem); + if (result == -1) { + SetPosixErrno(errno); + } + return result; } #ifndef HAVE_SEM_TIMEDWAIT @@ -1395,19 +1429,35 @@ int sem_timedwait(sem_t* sem, const struct timespec* abstime) { #endif int PS4_SYSV_ABI posix_sem_timedwait(sem_t* sem, const timespec* t) { - return sem_timedwait(sem, t); + int result = sem_timedwait(sem, t); + if (result == -1) { + SetPosixErrno(errno); + } + return result; } int PS4_SYSV_ABI posix_sem_post(sem_t* sem) { - return sem_post(sem); + int result = sem_post(sem); + if (result == -1) { + SetPosixErrno(errno); + } + return result; } int PS4_SYSV_ABI posix_sem_destroy(sem_t* sem) { - return sem_destroy(sem); + int result = sem_destroy(sem); + if (result == -1) { + SetPosixErrno(errno); + } + return result; } int PS4_SYSV_ABI posix_sem_getvalue(sem_t* sem, int* sval) { - return sem_getvalue(sem, sval); + int result = sem_getvalue(sem, sval); + if (result == -1) { + SetPosixErrno(errno); + } + return result; } int PS4_SYSV_ABI posix_pthread_attr_getstacksize(const pthread_attr_t* attr, size_t* size) { @@ -1542,6 +1592,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("B5GmVDKwpn0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_yield); LIB_FUNCTION("-quPa4SEJUw", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstack); + LIB_FUNCTION("Bvn74vj6oLo", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetstack); LIB_FUNCTION("Ru36fiTtJzA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstackaddr); LIB_FUNCTION("-fA+7ZlGDQs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstacksize); LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, scePthreadOnce); diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index 370dba445..5304dc57b 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -9,7 +9,6 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" -#include "core/libraries/kernel/thread_management.h" #include "core/libraries/libs.h" namespace Libraries::Kernel { @@ -82,7 +81,6 @@ public: public: struct WaitingThread : public ListBaseHook { - std::string name; std::condition_variable cv; u32 priority; s32 need_count; @@ -90,7 +88,6 @@ public: bool was_cancled{}; explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} { - name = scePthreadSelf()->name; if (is_fifo) { return; } @@ -174,10 +171,16 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3 } s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Wait(true, needCount, pTimeout); } s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } if (!sem->Signal(signalCount)) { return ORBIS_KERNEL_ERROR_EINVAL; } @@ -185,10 +188,16 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { } s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Wait(false, needCount, nullptr); } int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Cancel(setCount, pNumWaitThreads); } diff --git a/src/core/libraries/kernel/time_management.cpp b/src/core/libraries/kernel/time_management.cpp index c4854937b..214f039b4 100644 --- a/src/core/libraries/kernel/time_management.cpp +++ b/src/core/libraries/kernel/time_management.cpp @@ -143,6 +143,7 @@ int PS4_SYSV_ABI sceKernelGettimeofday(OrbisKernelTimeval* tp) { return ORBIS_KERNEL_ERROR_EFAULT; } +#ifdef _WIN64 auto now = std::chrono::system_clock::now(); auto duration = now.time_since_epoch(); auto seconds = std::chrono::duration_cast(duration); @@ -150,6 +151,12 @@ int PS4_SYSV_ABI sceKernelGettimeofday(OrbisKernelTimeval* tp) { tp->tv_sec = seconds.count(); tp->tv_usec = microsecs.count(); +#else + timeval tv; + gettimeofday(&tv, nullptr); + tp->tv_sec = tv.tv_sec; + tp->tv_usec = tv.tv_usec; +#endif return ORBIS_OK; } diff --git a/src/core/libraries/network/net.cpp b/src/core/libraries/network/net.cpp index 958f9264e..2c03dde3e 100644 --- a/src/core/libraries/network/net.cpp +++ b/src/core/libraries/network/net.cpp @@ -10,7 +10,7 @@ #include #endif -#include +#include "common/assert.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" diff --git a/src/core/libraries/ngs2/ngs2.cpp b/src/core/libraries/ngs2/ngs2.cpp new file mode 100644 index 000000000..b66c9b15b --- /dev/null +++ b/src/core/libraries/ngs2/ngs2.cpp @@ -0,0 +1,419 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "ngs2.h" +#include "ngs2_error.h" +#include "ngs2_impl.h" + +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" + +namespace Libraries::Ngs2 { + +int PS4_SYSV_ABI sceNgs2CalcWaveformBlock() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2CustomRackGetModuleInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2FftInit() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2FftProcess() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2FftQuerySize() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2GeomApply() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2GeomCalcListener() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2GeomResetListenerParam() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2GeomResetSourceParam() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2GetWaveformFrameInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2JobSchedulerResetOption() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2ModuleArrayEnumItems() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2ModuleEnumConfigs() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2ModuleQueueEnumItems() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2PanGetVolumeMatrix() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2PanInit() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2ParseWaveformData() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2ParseWaveformFile() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2ParseWaveformUser() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackCreate() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackCreateWithAllocator() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackDestroy() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackGetInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackGetUserData() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackGetVoiceHandle() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackLock() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackQueryBufferSize() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackQueryInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackRunCommands() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackSetUserData() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2RackUnlock() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2ReportRegisterHandler() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2ReportUnregisterHandler() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemCreate() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemCreateWithAllocator() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemDestroy() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemEnumHandles() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemEnumRackHandles() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemGetInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemGetUserData() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemLock() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemQueryBufferSize() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemQueryInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemRender() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemResetOption() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemRunCommands() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemSetGrainSamples() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemSetLoudThreshold() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemSetSampleRate() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemSetUserData() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2SystemUnlock() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2StreamCreate() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2StreamCreateWithAllocator() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2StreamDestroy() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2StreamQueryBufferSize() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2StreamQueryInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2StreamResetOption() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2StreamRunCommands() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2VoiceControl() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2VoiceGetMatrixInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2VoiceGetOwner() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2VoiceGetPortInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2VoiceGetState() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2VoiceGetStateFlags() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2VoiceQueryInfo() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceNgs2VoiceRunCommands() { + LOG_ERROR(Lib_Ngs2, "(STUBBED) called"); + return ORBIS_OK; +} + +void RegisterlibSceNgs2(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("3pCNbVM11UA", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2CalcWaveformBlock); + LIB_FUNCTION("6qN1zaEZuN0", "libSceNgs2", 1, "libSceNgs2", 1, 1, + sceNgs2CustomRackGetModuleInfo); + LIB_FUNCTION("Kg1MA5j7KFk", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2FftInit); + LIB_FUNCTION("D8eCqBxSojA", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2FftProcess); + LIB_FUNCTION("-YNfTO6KOMY", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2FftQuerySize); + LIB_FUNCTION("eF8yRCC6W64", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2GeomApply); + LIB_FUNCTION("1WsleK-MTkE", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2GeomCalcListener); + LIB_FUNCTION("7Lcfo8SmpsU", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2GeomResetListenerParam); + LIB_FUNCTION("0lbbayqDNoE", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2GeomResetSourceParam); + LIB_FUNCTION("ekGJmmoc8j4", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2GetWaveformFrameInfo); + LIB_FUNCTION("BcoPfWfpvVI", "libSceNgs2", 1, "libSceNgs2", 1, 1, + sceNgs2JobSchedulerResetOption); + LIB_FUNCTION("EEemGEQCjO8", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2ModuleArrayEnumItems); + LIB_FUNCTION("TaoNtmMKkXQ", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2ModuleEnumConfigs); + LIB_FUNCTION("ve6bZi+1sYQ", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2ModuleQueueEnumItems); + LIB_FUNCTION("gbMKV+8Enuo", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2PanGetVolumeMatrix); + LIB_FUNCTION("xa8oL9dmXkM", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2PanInit); + LIB_FUNCTION("hyVLT2VlOYk", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2ParseWaveformData); + LIB_FUNCTION("iprCTXPVWMI", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2ParseWaveformFile); + LIB_FUNCTION("t9T0QM17Kvo", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2ParseWaveformUser); + LIB_FUNCTION("cLV4aiT9JpA", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackCreate); + LIB_FUNCTION("U546k6orxQo", "libSceNgs2", 1, "libSceNgs2", 1, 1, + sceNgs2RackCreateWithAllocator); + LIB_FUNCTION("lCqD7oycmIM", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackDestroy); + LIB_FUNCTION("M4LYATRhRUE", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackGetInfo); + LIB_FUNCTION("Mn4XNDg03XY", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackGetUserData); + LIB_FUNCTION("MwmHz8pAdAo", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackGetVoiceHandle); + LIB_FUNCTION("MzTa7VLjogY", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackLock); + LIB_FUNCTION("0eFLVCfWVds", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackQueryBufferSize); + LIB_FUNCTION("TZqb8E-j3dY", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackQueryInfo); + LIB_FUNCTION("MI2VmBx2RbM", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackRunCommands); + LIB_FUNCTION("JNTMIaBIbV4", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackSetUserData); + LIB_FUNCTION("++YZ7P9e87U", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2RackUnlock); + LIB_FUNCTION("uBIN24Tv2MI", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2ReportRegisterHandler); + LIB_FUNCTION("nPzb7Ly-VjE", "libSceNgs2", 1, "libSceNgs2", 1, 1, + sceNgs2ReportUnregisterHandler); + LIB_FUNCTION("koBbCMvOKWw", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemCreate); + LIB_FUNCTION("mPYgU4oYpuY", "libSceNgs2", 1, "libSceNgs2", 1, 1, + sceNgs2SystemCreateWithAllocator); + LIB_FUNCTION("u-WrYDaJA3k", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemDestroy); + LIB_FUNCTION("vubFP0T6MP0", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemEnumHandles); + LIB_FUNCTION("U-+7HsswcIs", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemEnumRackHandles); + LIB_FUNCTION("vU7TQ62pItw", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemGetInfo); + LIB_FUNCTION("4lFaRxd-aLs", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemGetUserData); + LIB_FUNCTION("gThZqM5PYlQ", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemLock); + LIB_FUNCTION("pgFAiLR5qT4", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemQueryBufferSize); + LIB_FUNCTION("3oIK7y7O4k0", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemQueryInfo) + LIB_FUNCTION("i0VnXM-C9fc", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemRender); + LIB_FUNCTION("AQkj7C0f3PY", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemResetOption); + LIB_FUNCTION("gXiormHoZZ4", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemRunCommands); + LIB_FUNCTION("l4Q2dWEH6UM", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemSetGrainSamples); + LIB_FUNCTION("Wdlx0ZFTV9s", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemSetLoudThreshold); + LIB_FUNCTION("-tbc2SxQD60", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemSetSampleRate); + LIB_FUNCTION("GZB2v0XnG0k", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemSetUserData); + LIB_FUNCTION("JXRC5n0RQls", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2SystemUnlock); + LIB_FUNCTION("sU2St3agdjg", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2StreamCreate); + LIB_FUNCTION("I+RLwaauggA", "libSceNgs2", 1, "libSceNgs2", 1, 1, + sceNgs2StreamCreateWithAllocator); + LIB_FUNCTION("bfoMXnTRtwE", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2StreamDestroy); + LIB_FUNCTION("dxulc33msHM", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2StreamQueryBufferSize); + LIB_FUNCTION("rfw6ufRsmow", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2StreamQueryInfo); + LIB_FUNCTION("q+2W8YdK0F8", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2StreamResetOption); + LIB_FUNCTION("qQHCi9pjDps", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2StreamRunCommands); + LIB_FUNCTION("uu94irFOGpA", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2VoiceControl); + LIB_FUNCTION("jjBVvPN9964", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2VoiceGetMatrixInfo); + LIB_FUNCTION("W-Z8wWMBnhk", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2VoiceGetOwner); + LIB_FUNCTION("WCayTgob7-o", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2VoiceGetPortInfo); + LIB_FUNCTION("-TOuuAQ-buE", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2VoiceGetState); + LIB_FUNCTION("rEh728kXk3w", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2VoiceGetStateFlags); + LIB_FUNCTION("9eic4AmjGVI", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2VoiceQueryInfo); + LIB_FUNCTION("AbYvTOZ8Pts", "libSceNgs2", 1, "libSceNgs2", 1, 1, sceNgs2VoiceRunCommands); +}; + +} // namespace Libraries::Ngs2 \ No newline at end of file diff --git a/src/core/libraries/ngs2/ngs2.h b/src/core/libraries/ngs2/ngs2.h new file mode 100644 index 000000000..0df83f565 --- /dev/null +++ b/src/core/libraries/ngs2/ngs2.h @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +#include +#include + +namespace Core::Loader { +class SymbolsResolver; +} + +namespace Libraries::Ngs2 { + +class Ngs2; + +using SceNgs2Handle = Ngs2*; + +enum SceNgs2HandleType { SCE_NGS2_HANDLE_TYPE_SYSTEM = 0 }; + +struct Ngs2Handle { + void* selfPointer; + void* dataPointer; + std::atomic* atomicPtr; + u32 handleType; + u32 flags_unk; + + u32 uid; + u16 maxGrainSamples; + u16 minGrainSamples; + u16 currentGrainSamples; + u16 numGrainSamples; + u16 unknown2; + u32 sampleRate; + u32 unknown3; + + void* flushMutex; + u32 flushMutexInitialized; + void* processMutex; + u32 processMutexInitialized; + + // Linked list pointers for system list + Ngs2Handle* prev; + Ngs2Handle* next; +}; + +struct SystemOptions { + char padding[6]; + s32 maxGrainSamples; + s32 numGrainSamples; + s32 sampleRate; +}; + +struct SystemState { + // TODO +}; + +struct StackBuffer { + void** top; + void* base; + void* curr; + size_t usedSize; + size_t totalSize; + size_t alignment; + char isVerifyEnabled; + char padding[7]; +}; + +void RegisterlibSceNgs2(Core::Loader::SymbolsResolver* sym); +} // namespace Libraries::Ngs2 \ No newline at end of file diff --git a/src/core/libraries/ngs2/ngs2_error.h b/src/core/libraries/ngs2/ngs2_error.h new file mode 100644 index 000000000..254ae26e6 --- /dev/null +++ b/src/core/libraries/ngs2/ngs2_error.h @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +constexpr int ORBIS_NGS2_ERROR_INVALID_PARAMETERS = 0x804A0001; +constexpr int ORBIS_NGS2_ERROR_INVALID_MAXIMUM_GRAIN_SAMPLES = 0x804A0050; +constexpr int ORBIS_NGS2_ERROR_INVALID_GRAIN_SAMPLES = 0x804A0051; +constexpr int ORBIS_NGS2_ERROR_INVALID_CHANNELS = 0x804A0052; +constexpr int ORBIS_NGS2_ERROR_INVALD_ADDRESS = 0x804A0053; +constexpr int ORBIS_NGS2_ERROR_INVALD_SIZE = 0x804A0054; +constexpr int ORBIS_NGS2_ERROR_INVALID_OPTION_SIZE = 0x804A0081; +constexpr int ORBIS_NGS2_ERROR_INVALID_RACK_OPTION_MAX_MATRICES = 0x804A0100; +constexpr int ORBIS_NGS2_ERROR_INVALID_RACK_OPTION_MAX_PORTS = 0x804A0101; +constexpr int ORBIS_NGS2_ERROR_INVALID_RACK_OPTION_MAX_INPUT_DELAY_BLOCKS = 0x804A0102; +constexpr int ORBIS_NGS2_ERROR_INVALID_MATRIX_LEVELS = 0x804A0150; +constexpr int ORBIS_NGS2_ERROR_SAMPLER_WAVEFORM_TERMINATED = 0x804A0151; +constexpr int ORBIS_NGS2_ERROR_INVALID_ENVELOPE_POINTS = 0x804A0152; +constexpr int ORBIS_NGS2_ERROR_INVALID_MATRIX_LEVEL_ADDRESS = 0x804A0153; +constexpr int ORBIS_NGS2_ERROR_INVALID_SAMPLER_WAVEFORM_BLOCK_ADDRESS = 0x804A0154; +constexpr int ORBIS_NGS2_ERROR_INVALID_ENVELOPE_POINT_ADDRESS = 0x804A0155; +constexpr int ORBIS_NGS2_ERROR_INVALID_HANDLE = 0x804A0200; +constexpr int ORBIS_NGS2_ERROR_INVALID_SAMPLE_RATE = 0x804A0201; +constexpr int ORBIS_NGS2_ERROR_INVALID_REPORT_HANDLE = 0x804A0204; +constexpr int ORBIS_NGS2_ERROR_INVALID_BUFFER_INFO = 0x804A0206; +constexpr int ORBIS_NGS2_ERROR_INVALID_BUFFER_ADDRESS = 0x804A0207; +constexpr int ORBIS_NGS2_ERROR_INVALID_BUFFER_ALIGNMENT = 0x804A0208; +constexpr int ORBIS_NGS2_ERROR_INVALID_BUFFER_SIZE = 0x804A0209; +constexpr int ORBIS_NGS2_ERROR_INVALID_BUFFER_ALLOCATOR = 0x804A020A; +constexpr int ORBIS_NGS2_ERROR_BUFFER_VERIFY_FAILED = 0x804A020B; +constexpr int ORBIS_NGS2_ERROR_MODULE_PLAYER_DATA_EMPTY = 0x804A020C; +constexpr int ORBIS_NGS2_ERROR_INVALID_SYSTEM_HANDLE = 0x804A0230; +constexpr int ORBIS_NGS2_ERROR_INVALID_RACK_ID = 0x804A0260; +constexpr int ORBIS_NGS2_ERROR_INVALID_RACK_HANDLE = 0x804A0261; +constexpr int ORBIS_NGS2_ERROR_INVALID_VOICE_HANDLE = 0x804A0300; +constexpr int ORBIS_NGS2_ERROR_INVALID_VOICE_INDEX = 0x804A0302; +constexpr int ORBIS_NGS2_ERROR_INVALID_VOICE_EVENT = 0x804A0303; +constexpr int ORBIS_NGS2_ERROR_INVALID_VOICE_PORT_INDEX = 0x804A0304; +constexpr int ORBIS_NGS2_ERROR_INVALID_VOICE_INPUT_OR_RACK_OCCUPIED = 0x804A0305; +constexpr int ORBIS_NGS2_ERROR_INVALID_CONTROL_ID = 0x804A0308; +constexpr int ORBIS_NGS2_ERROR_INVALID_VOICE_CONTROL_PARAMETER = 0x804A0309; +constexpr int ORBIS_NGS2_ERROR_INVALID_PARAMETER_SIZE = 0x804A030A; +constexpr int ORBIS_NGS2_ERROR_DETECTED_CIRCULAR_VOICE_CONTROL = 0x804A030B; +constexpr int ORBIS_NGS2_ERROR_INVALID_SAMPLER_WAVEFORM_DATA = 0x804A0400; +constexpr int ORBIS_NGS2_ERROR_INVALID_SAMPLER_WAVEFORM_FORMAT = 0x804A0401; +constexpr int ORBIS_NGS2_ERROR_INVALID_SAMPLER_WAVEFORM_TYPE_NO_ATRAC9_DECODERS = 0x804A0402; +constexpr int ORBIS_NGS2_ERROR_INVALID_SAMPLER_ATRAC9_CONFIG_DATA = 0x804A0403; +constexpr int ORBIS_NGS2_ERROR_INVALID_SAMPLER_WAVEFORM_SAMPLE_RATE = 0x804A0404; +constexpr int ORBIS_NGS2_ERROR_INVALID_SAMPLER_WAVEFORM_FRAME = 0x804A0405; +constexpr int ORBIS_NGS2_ERROR_INVALID_SAMPLER_WAVEFORM_ADDRESS = 0x804A0406; +constexpr int ORBIS_NGS2_ERROR_INVALID_ENVELOPE_CURVE = 0x804A0500; +constexpr int ORBIS_NGS2_ERROR_INVALID_FILTER_INDEX = 0x804A0600; +constexpr int ORBIS_NGS2_ERROR_INVALID_FILTER_TYPE = 0x804A0601; +constexpr int ORBIS_NGS2_ERROR_INVALID_FILTER_LOCATION = 0x804A0602; +constexpr int ORBIS_NGS2_ERROR_INVALID_LFE_CUT_OFF_FREQUENCY = 0x804A0603; +constexpr int ORBIS_NGS2_ERROR_INVALID_MATRIX_INDEX_OR_TYPE = 0x804A0700; \ No newline at end of file diff --git a/src/core/libraries/ngs2/ngs2_impl.cpp b/src/core/libraries/ngs2/ngs2_impl.cpp new file mode 100644 index 000000000..50c62f5e4 --- /dev/null +++ b/src/core/libraries/ngs2/ngs2_impl.cpp @@ -0,0 +1,164 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "ngs2_error.h" +#include "ngs2_impl.h" + +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/kernel/libkernel.h" + +using namespace Libraries::Kernel; + +namespace Libraries::Ngs2 { + +s32 Ngs2::ReportInvalid(Ngs2Handle* handle, u32 handle_type) const { + uintptr_t hAddress = reinterpret_cast(handle); + switch (handle_type) { + case 1: + LOG_ERROR(Lib_Ngs2, "Invalid system handle {}", hAddress); + return ORBIS_NGS2_ERROR_INVALID_SYSTEM_HANDLE; + case 2: + LOG_ERROR(Lib_Ngs2, "Invalid rack handle {}", hAddress); + return ORBIS_NGS2_ERROR_INVALID_RACK_HANDLE; + case 4: + LOG_ERROR(Lib_Ngs2, "Invalid voice handle {}", hAddress); + return ORBIS_NGS2_ERROR_INVALID_VOICE_HANDLE; + case 8: + LOG_ERROR(Lib_Ngs2, "Invalid report handle {}", hAddress); + return ORBIS_NGS2_ERROR_INVALID_REPORT_HANDLE; + default: + LOG_ERROR(Lib_Ngs2, "Invalid handle {}", hAddress); + return ORBIS_NGS2_ERROR_INVALID_HANDLE; + } +} + +s32 Ngs2::HandleSetup(Ngs2Handle* handle, void* data, std::atomic* atomic, u32 type, + u32 flags) { + handle->dataPointer = data; + handle->atomicPtr = atomic; + handle->handleType = type; + handle->flags_unk = flags; + return ORBIS_OK; +} + +s32 Ngs2::HandleCleanup(Ngs2Handle* handle, u32 hType, void* dataOut) { + if (handle && handle->selfPointer == handle) { + std::atomic* tmp_atomic = handle->atomicPtr; + if (tmp_atomic && handle->handleType == hType) { + while (tmp_atomic->load() != 0) { + u32 expected = 1; + if (tmp_atomic->compare_exchange_strong(expected, 0)) { + if (dataOut) { + dataOut = handle->dataPointer; + } + // sceNgs2MemoryClear(handle, 32); + return ORBIS_OK; + } + tmp_atomic = handle->atomicPtr; + } + } + } + return this->ReportInvalid(handle, hType); +} + +s32 Ngs2::HandleEnter(Ngs2Handle* handle, u32 hType, Ngs2Handle* handleOut) { + if (!handle) { + return this->ReportInvalid(handle, 0); + } + + if (handle->selfPointer != handle || !handle->atomicPtr || !handle->dataPointer || + (~hType & handle->handleType)) { + return this->ReportInvalid(handle, handle->handleType); + } + + std::atomic* atomic = handle->atomicPtr; + while (true) { + u32 i = atomic->load(); + if (i == 0) { + return this->ReportInvalid(handle, handle->handleType); + } + if (atomic->compare_exchange_strong(i, i + 1)) { + break; + } + } + + if (handleOut) { + handleOut = handle; + } + return ORBIS_OK; +} + +s32 Ngs2::HandleLeave(Ngs2Handle* handle) { + std::atomic* tmp_atomic; + u32 i; + do { + tmp_atomic = handle->atomicPtr; + i = tmp_atomic->load(); + } while (!tmp_atomic->compare_exchange_strong(i, i - 1)); + return ORBIS_OK; +} + +s32 Ngs2::StackBufferOpen(StackBuffer* buf, void* base_addr, size_t size, void** stackTop, + bool verify) { + buf->top = stackTop; + buf->base = base_addr; + buf->curr = base_addr; + buf->usedSize = 0; + buf->totalSize = size; + buf->alignment = 8; + buf->isVerifyEnabled = verify; + + if (stackTop) { + *stackTop = nullptr; + } + + return ORBIS_OK; +} + +s32 Ngs2::StackBufferClose(StackBuffer* buf, size_t* usedSize) { + if (usedSize) { + *usedSize = buf->usedSize + buf->alignment; + } + + return ORBIS_OK; +} + +s32 Ngs2::SystemSetupCore(StackBuffer* buf, SystemOptions* options, Ngs2Handle** sysOut) { + u32 maxGrainSamples = 512; + u32 numGrainSamples = 256; + u32 sampleRate = 48000; + + if (options) { + maxGrainSamples = options->maxGrainSamples; + numGrainSamples = options->numGrainSamples; + sampleRate = options->sampleRate; + } + + // Validate maxGrainSamples + if (maxGrainSamples < 64 || maxGrainSamples > 1024 || (maxGrainSamples & 0x3F) != 0) { + LOG_ERROR(Lib_Ngs2, "Invalid system option (maxGrainSamples={},x64)", maxGrainSamples); + return ORBIS_NGS2_ERROR_INVALID_MAXIMUM_GRAIN_SAMPLES; + } + + // Validate numGrainSamples + if (numGrainSamples < 64 || numGrainSamples > 1024 || (numGrainSamples & 0x3F) != 0) { + LOG_ERROR(Lib_Ngs2, "Invalid system option (numGrainSamples={},x64)", numGrainSamples); + return ORBIS_NGS2_ERROR_INVALID_GRAIN_SAMPLES; + } + + // Validate sampleRate + if (sampleRate != 11025 && sampleRate != 12000 && sampleRate != 22050 && sampleRate != 24000 && + sampleRate != 44100 && sampleRate != 48000 && sampleRate != 88200 && sampleRate != 96000) { + LOG_ERROR(Lib_Ngs2, "Invalid system option(sampleRate={}:44.1/48kHz series)", sampleRate); + return ORBIS_NGS2_ERROR_INVALID_SAMPLE_RATE; + } + + int result = ORBIS_OK; + + // TODO + + return result; // Success +} + +} // namespace Libraries::Ngs2 diff --git a/src/core/libraries/ngs2/ngs2_impl.h b/src/core/libraries/ngs2/ngs2_impl.h new file mode 100644 index 000000000..fea87c51c --- /dev/null +++ b/src/core/libraries/ngs2/ngs2_impl.h @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "ngs2.h" + +namespace Libraries::Ngs2 { + +class Ngs2 { +public: + s32 ReportInvalid(Ngs2Handle* handle, u32 handle_type) const; + s32 HandleSetup(Ngs2Handle* handle, void* data, std::atomic* atomic, u32 type, u32 flags); + s32 HandleCleanup(Ngs2Handle* handle, u32 hType, void* dataOut); + s32 HandleEnter(Ngs2Handle* handle, u32 hType, Ngs2Handle* handleOut); + s32 HandleLeave(Ngs2Handle* handle); + s32 StackBufferOpen(StackBuffer* buf, void* base_addr, size_t size, void** stackTop, + bool verify); + s32 StackBufferClose(StackBuffer* buf, size_t* usedSize); + s32 SystemSetupCore(StackBuffer* buf, SystemOptions* options, Ngs2Handle** sysOut); + +private: +}; + +} // namespace Libraries::Ngs2 diff --git a/src/core/libraries/np_manager/np_manager.cpp b/src/core/libraries/np_manager/np_manager.cpp index 33308abc1..c657fbf60 100644 --- a/src/core/libraries/np_manager/np_manager.cpp +++ b/src/core/libraries/np_manager/np_manager.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later // Generated By moduleGenerator +#include "common/config.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" @@ -974,8 +975,11 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() { return ORBIS_OK; } -int PS4_SYSV_ABI sceNpGetNpId() { - LOG_ERROR(Lib_NpManager, "(STUBBED) called"); +int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId) { + LOG_ERROR(Lib_NpManager, "(DUMMY) called"); + + std::string name = Config::getUserName(); + strcpy(npId->handle.data, name.c_str()); return ORBIS_OK; } diff --git a/src/core/libraries/np_manager/np_manager.h b/src/core/libraries/np_manager/np_manager.h index 5b11355ad..5955a40b4 100644 --- a/src/core/libraries/np_manager/np_manager.h +++ b/src/core/libraries/np_manager/np_manager.h @@ -11,6 +11,22 @@ class SymbolsResolver; namespace Libraries::NpManager { +constexpr int ORBIS_NP_ONLINEID_MAX_LENGTH = 16; + +typedef int OrbisUserServiceUserId; + +struct OrbisNpOnlineId { + char data[ORBIS_NP_ONLINEID_MAX_LENGTH]; + char term; + char dummy[3]; +}; + +struct OrbisNpId { + OrbisNpOnlineId handle; + u8 opt[8]; + u8 reserved[8]; +}; + int PS4_SYSV_ABI Func_EF4378573542A508(); int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromKernel(); int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromPool(); @@ -204,7 +220,7 @@ int PS4_SYSV_ABI sceNpGetAccountLanguage2(); int PS4_SYSV_ABI sceNpGetAccountLanguageA(); int PS4_SYSV_ABI sceNpGetGamePresenceStatus(); int PS4_SYSV_ABI sceNpGetGamePresenceStatusA(); -int PS4_SYSV_ABI sceNpGetNpId(); +int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId); int PS4_SYSV_ABI sceNpGetNpReachabilityState(); int PS4_SYSV_ABI sceNpGetOnlineId(); int PS4_SYSV_ABI sceNpGetParentalControlInfo(); diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index d39935503..305b20bd6 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -105,7 +105,7 @@ int PS4_SYSV_ABI scePadGetControllerInformation(s32 handle, OrbisPadControllerIn pInfo->stickInfo.deadZoneRight = 2; pInfo->connectionType = ORBIS_PAD_PORT_TYPE_STANDARD; pInfo->connectedCount = 1; - pInfo->connected = 1; + pInfo->connected = true; pInfo->deviceClass = ORBIS_PAD_DEVICE_CLASS_STANDARD; return SCE_OK; } @@ -125,9 +125,16 @@ int PS4_SYSV_ABI scePadGetDeviceInfo() { return ORBIS_OK; } -int PS4_SYSV_ABI scePadGetExtControllerInformation() { - LOG_ERROR(Lib_Pad, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI scePadGetExtControllerInformation(s32 handle, + OrbisPadExtendedControllerInformation* pInfo) { + LOG_INFO(Lib_Pad, "called handle = {}", handle); + + pInfo->padType1 = 0; + pInfo->padType2 = 0; + pInfo->capability = 0; + + auto res = scePadGetControllerInformation(handle, &pInfo->base); + return res; } int PS4_SYSV_ABI scePadGetExtensionUnitInfo() { @@ -237,7 +244,7 @@ int PS4_SYSV_ABI scePadOpen(s32 userId, s32 type, s32 index, const OrbisPadOpenP int PS4_SYSV_ABI scePadOpenExt() { LOG_ERROR(Lib_Pad, "(STUBBED) called"); - return ORBIS_OK; + return 1; // dummy } int PS4_SYSV_ABI scePadOpenExt2() { @@ -419,8 +426,20 @@ int PS4_SYSV_ABI scePadSetForceIntercepted() { } int PS4_SYSV_ABI scePadSetLightBar(s32 handle, const OrbisPadLightBarParam* pParam) { - LOG_ERROR(Lib_Pad, "(STUBBED) called"); - return ORBIS_OK; + if (pParam != nullptr) { + LOG_INFO(Lib_Pad, "scePadSetLightBar called handle = {} rgb = {} {} {}", handle, pParam->r, + pParam->g, pParam->b); + + if (pParam->r < 0xD && pParam->g < 0xD && pParam->b < 0xD) { + LOG_INFO(Lib_Pad, "Invalid lightbar setting"); + return ORBIS_PAD_ERROR_INVALID_LIGHTBAR_SETTING; + } + + auto* controller = Common::Singleton::Instance(); + controller->SetLightBarRGB(pParam->r, pParam->g, pParam->b); + return ORBIS_OK; + } + return ORBIS_PAD_ERROR_INVALID_ARG; } int PS4_SYSV_ABI scePadSetLightBarBaseBrightness() { @@ -479,8 +498,14 @@ int PS4_SYSV_ABI scePadSetUserColor() { } int PS4_SYSV_ABI scePadSetVibration(s32 handle, const OrbisPadVibrationParam* pParam) { - LOG_DEBUG(Lib_Pad, "(STUBBED) called"); - return ORBIS_OK; + if (pParam != nullptr) { + LOG_INFO(Lib_Pad, "scePadSetVibration called handle = {} data = {} , {}", handle, + pParam->smallMotor, pParam->largeMotor); + auto* controller = Common::Singleton::Instance(); + controller->SetVibration(pParam->smallMotor, pParam->largeMotor); + return ORBIS_OK; + } + return ORBIS_PAD_ERROR_INVALID_ARG; } int PS4_SYSV_ABI scePadSetVibrationForce() { diff --git a/src/core/libraries/pad/pad.h b/src/core/libraries/pad/pad.h index 4f8542906..3e9c14a17 100644 --- a/src/core/libraries/pad/pad.h +++ b/src/core/libraries/pad/pad.h @@ -212,6 +212,19 @@ struct OrbisPadControllerInformation { u8 reserve[8]; }; +struct OrbisPadExtendedControllerInformation { + OrbisPadControllerInformation base; + u16 padType1; + u16 padType2; + u8 capability; + + union { + u8 quantityOfSelectorSwitch; + int maxPhysicalWheelAngle; + u8 data[8]; + }; +}; + struct OrbisPadOpenParam { u8 reserve[8]; }; @@ -248,7 +261,8 @@ int PS4_SYSV_ABI scePadGetControllerInformation(s32 handle, OrbisPadControllerIn int PS4_SYSV_ABI scePadGetDataInternal(); int PS4_SYSV_ABI scePadGetDeviceId(); int PS4_SYSV_ABI scePadGetDeviceInfo(); -int PS4_SYSV_ABI scePadGetExtControllerInformation(); +int PS4_SYSV_ABI scePadGetExtControllerInformation(s32 handle, + OrbisPadExtendedControllerInformation* pInfo); int PS4_SYSV_ABI scePadGetExtensionUnitInfo(); int PS4_SYSV_ABI scePadGetFeatureReport(); int PS4_SYSV_ABI scePadGetHandle(s32 userId, s32 type, s32 index); diff --git a/src/core/libraries/playgo/playgo.cpp b/src/core/libraries/playgo/playgo.cpp index af98253f4..66422dc28 100644 --- a/src/core/libraries/playgo/playgo.cpp +++ b/src/core/libraries/playgo/playgo.cpp @@ -6,6 +6,7 @@ #include "common/singleton.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" +#include "core/libraries/system/systemservice.h" #include "playgo.h" namespace Libraries::PlayGo { @@ -20,42 +21,129 @@ s32 PS4_SYSV_ABI sceDbgPlayGoSnapshot() { return ORBIS_OK; } -s32 PS4_SYSV_ABI scePlayGoClose() { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); +s32 PS4_SYSV_ABI scePlayGoClose(OrbisPlayGoHandle handle) { + LOG_INFO(Lib_PlayGo, "called"); + + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; return ORBIS_OK; } -s32 PS4_SYSV_ABI scePlayGoGetChunkId() { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); +s32 PS4_SYSV_ABI scePlayGoGetChunkId(OrbisPlayGoHandle handle, OrbisPlayGoChunkId* outChunkIdList, + u32 numberOfEntries, u32* outEntries) { + LOG_INFO(Lib_PlayGo, "called"); + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (outEntries == nullptr) + return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (outChunkIdList != nullptr && numberOfEntries == 0) + return ORBIS_PLAYGO_ERROR_BAD_SIZE; + + if (playgo->GetPlaygoHeader().file_size == 0) { + *outEntries = 0; + } else { + if (outChunkIdList == nullptr) { + *outEntries = playgo->chunks.size(); + } else { + if (numberOfEntries > playgo->chunks.size()) { + numberOfEntries = playgo->chunks.size(); + } + + if (numberOfEntries != 0) { + for (u32 i = 0; i < numberOfEntries; i++) { + outChunkIdList[i] = i; + } + *outEntries = numberOfEntries; + } + } + } return ORBIS_OK; } -s32 PS4_SYSV_ABI scePlayGoGetEta() { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); +s32 PS4_SYSV_ABI scePlayGoGetEta(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, + u32 numberOfEntries, OrbisPlayGoEta* outEta) { + LOG_INFO(Lib_PlayGo, "called"); + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (chunkIds == nullptr || outEta == nullptr) + return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (numberOfEntries == 0) + return ORBIS_PLAYGO_ERROR_BAD_SIZE; + + *outEta = 0; // all is loaded return ORBIS_OK; } -s32 PS4_SYSV_ABI scePlayGoGetInstallSpeed() { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); +s32 PS4_SYSV_ABI scePlayGoGetInstallSpeed(OrbisPlayGoHandle handle, + OrbisPlayGoInstallSpeed* outSpeed) { + LOG_INFO(Lib_PlayGo, "called"); + + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (outSpeed == nullptr) + return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; + + std::scoped_lock lk{playgo->GetSpeedMutex()}; + + if (playgo->speed == 0) { + using namespace std::chrono; + if ((duration_cast(steady_clock::now().time_since_epoch()).count() - + playgo->speed_tick) > 30 * 1000) { // 30sec + playgo->speed = ORBIS_PLAYGO_INSTALL_SPEED_TRICKLE; + } + } + *outSpeed = playgo->speed; + return ORBIS_OK; } s32 PS4_SYSV_ABI scePlayGoGetLanguageMask(OrbisPlayGoHandle handle, - OrbisPlayGoLanguageMask* languageMask) { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); - *languageMask = 1; // En, todo; + OrbisPlayGoLanguageMask* outLanguageMask) { + LOG_INFO(Lib_PlayGo, "called"); + + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (outLanguageMask == nullptr) + return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; + + *outLanguageMask = playgo->langMask; return ORBIS_OK; } s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called handle = {}, chunkIds = {}, numberOfEntries = {}", - handle, *chunkIds, numberOfEntries); + LOG_INFO(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle, + *chunkIds, numberOfEntries); - auto* playgo = Common::Singleton::Instance(); + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (chunkIds == nullptr || outLoci == nullptr) + return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (numberOfEntries == 0) + return ORBIS_PLAYGO_ERROR_BAD_SIZE; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; + if (playgo->GetPlaygoHeader().file_size == 0) + return ORBIS_PLAYGO_ERROR_NOT_SUPPORT_PLAYGO; for (uint32_t i = 0; i < numberOfEntries; i++) { - if (chunkIds[i] <= playgo->GetPlaygoHeader().mchunk_count) { + if (chunkIds[i] <= playgo->chunks.size()) { outLoci[i] = OrbisPlayGoLocusValue::ORBIS_PLAYGO_LOCUS_LOCAL_FAST; } else { outLoci[i] = ORBIS_PLAYGO_LOCUS_NOT_DOWNLOADED; @@ -67,64 +155,202 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh s32 PS4_SYSV_ABI scePlayGoGetProgress(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, uint32_t numberOfEntries, OrbisPlayGoProgress* outProgress) { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called handle = {}, chunkIds = {}, numberOfEntries = {}", - handle, *chunkIds, numberOfEntries); - outProgress->progressSize = 0x10000; // todo? - outProgress->totalSize = 0x10000; + LOG_INFO(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle, + *chunkIds, numberOfEntries); + + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (chunkIds == nullptr || outProgress == nullptr) + return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (numberOfEntries == 0) + return ORBIS_PLAYGO_ERROR_BAD_SIZE; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; + if (playgo->GetPlaygoHeader().file_size == 0) + return ORBIS_PLAYGO_ERROR_BAD_CHUNK_ID; + + outProgress->progressSize = 0; + outProgress->totalSize = 0; + + u64 total_size = 0; + for (u32 i = 0; i < numberOfEntries; i++) { + u32 chunk_id = chunkIds[i]; + if (chunk_id < playgo->chunks.size()) { + total_size += playgo->chunks[chunk_id].total_size; + } else { + return ORBIS_PLAYGO_ERROR_BAD_CHUNK_ID; + } + } + + outProgress->progressSize = total_size; + outProgress->totalSize = total_size; + return ORBIS_OK; } s32 PS4_SYSV_ABI scePlayGoGetToDoList(OrbisPlayGoHandle handle, OrbisPlayGoToDo* outTodoList, u32 numberOfEntries, u32* outEntries) { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called handle = {} numberOfEntries = {}", handle, - numberOfEntries); + LOG_INFO(Lib_PlayGo, "called handle = {} numberOfEntries = {}", handle, numberOfEntries); + + auto* playgo = Common::Singleton::Instance(); + if (handle != 1) return ORBIS_PLAYGO_ERROR_BAD_HANDLE; - if (outTodoList == nullptr) + if (outTodoList == nullptr || outEntries == nullptr) return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (numberOfEntries == 0) + return ORBIS_PLAYGO_ERROR_BAD_SIZE; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; *outEntries = 0; // nothing to do return ORBIS_OK; } +int scePlayGoConvertLanguage(int systemLang) { + if (systemLang >= 0 && systemLang < 48) { + return (1 << (64 - systemLang - 1)); + } else { + return 0; + } +} + s32 PS4_SYSV_ABI scePlayGoInitialize(OrbisPlayGoInitParams* param) { + LOG_INFO(Lib_PlayGo, "called, bufSize = {}", param->bufSize); if (param->bufAddr == nullptr) return ORBIS_PLAYGO_ERROR_BAD_POINTER; if (param->bufSize < 0x200000) return ORBIS_PLAYGO_ERROR_BAD_SIZE; - LOG_INFO(Lib_PlayGo, "(STUBBED)called, bufSize = {}", param->bufSize); + + auto* playgo = Common::Singleton::Instance(); + + if (!playgo->initialized) { + using namespace SystemService; + // get system lang + int systemLang = 0; + sceSystemServiceParamGetInt(ORBIS_SYSTEM_SERVICE_PARAM_ID_LANG, &systemLang); + playgo->langMask = scePlayGoConvertLanguage(systemLang); + playgo->initialized = true; + } else { + return ORBIS_PLAYGO_ERROR_ALREADY_INITIALIZED; + } return ORBIS_OK; } s32 PS4_SYSV_ABI scePlayGoOpen(OrbisPlayGoHandle* outHandle, const void* param) { - *outHandle = 1; - LOG_INFO(Lib_PlayGo, "(STUBBED)called"); + LOG_INFO(Lib_PlayGo, "called"); + + auto* playgo = Common::Singleton::Instance(); + + if (outHandle == nullptr) + return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (param) + return ORBIS_PLAYGO_ERROR_INVALID_ARGUMENT; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; + if (playgo->GetPlaygoHeader().file_size == 0) + return ORBIS_PLAYGO_ERROR_NOT_SUPPORT_PLAYGO; + + playgo->handle = *outHandle = 1; return ORBIS_OK; } -s32 PS4_SYSV_ABI scePlayGoPrefetch() { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); +s32 PS4_SYSV_ABI scePlayGoPrefetch(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, + u32 numberOfEntries, OrbisPlayGoLocus minimumLocus) { + LOG_INFO(Lib_PlayGo, "called"); + + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (chunkIds == nullptr) + return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (numberOfEntries == 0) + return ORBIS_PLAYGO_ERROR_BAD_SIZE; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; + + switch (minimumLocus) { + case ORBIS_PLAYGO_LOCUS_NOT_DOWNLOADED: + case ORBIS_PLAYGO_LOCUS_LOCAL_SLOW: + case ORBIS_PLAYGO_LOCUS_LOCAL_FAST: + break; + default: + return ORBIS_PLAYGO_ERROR_BAD_LOCUS; + } return ORBIS_OK; } -s32 PS4_SYSV_ABI scePlayGoSetInstallSpeed() { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); +s32 PS4_SYSV_ABI scePlayGoSetInstallSpeed(OrbisPlayGoHandle handle, OrbisPlayGoInstallSpeed speed) { + LOG_INFO(Lib_PlayGo, "called"); + + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; + + switch (speed) { + case ORBIS_PLAYGO_INSTALL_SPEED_SUSPENDED: + case ORBIS_PLAYGO_INSTALL_SPEED_TRICKLE: + case ORBIS_PLAYGO_INSTALL_SPEED_FULL: + break; + default: + return ORBIS_PLAYGO_ERROR_INVALID_ARGUMENT; + } + + std::scoped_lock lk{playgo->GetSpeedMutex()}; + + using namespace std::chrono; + playgo->speed = speed; + playgo->speed_tick = + duration_cast(steady_clock::now().time_since_epoch()).count(); + return ORBIS_OK; } s32 PS4_SYSV_ABI scePlayGoSetLanguageMask(OrbisPlayGoHandle handle, OrbisPlayGoLanguageMask languageMask) { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); + LOG_INFO(Lib_PlayGo, "called"); + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; + + playgo->langMask = languageMask; return ORBIS_OK; } s32 PS4_SYSV_ABI scePlayGoSetToDoList(OrbisPlayGoHandle handle, const OrbisPlayGoToDo* todoList, uint32_t numberOfEntries) { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); + LOG_INFO(Lib_PlayGo, "called"); + + auto* playgo = Common::Singleton::Instance(); + + if (handle != 1) + return ORBIS_PLAYGO_ERROR_BAD_HANDLE; + if (todoList == nullptr) + return ORBIS_PLAYGO_ERROR_BAD_POINTER; + if (numberOfEntries == 0) + return ORBIS_PLAYGO_ERROR_BAD_SIZE; + if (!playgo->initialized) + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; return ORBIS_OK; } s32 PS4_SYSV_ABI scePlayGoTerminate() { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); + LOG_INFO(Lib_PlayGo, "called"); + + auto* playgo = Common::Singleton::Instance(); + if (playgo->initialized) { + playgo->initialized = false; + } else { + return ORBIS_PLAYGO_ERROR_NOT_INITIALIZED; + } return ORBIS_OK; } diff --git a/src/core/libraries/playgo/playgo.h b/src/core/libraries/playgo/playgo.h index a6b8c91d3..f5ae1baa6 100644 --- a/src/core/libraries/playgo/playgo.h +++ b/src/core/libraries/playgo/playgo.h @@ -14,10 +14,13 @@ constexpr int shadMagic = 0x53484144; s32 PS4_SYSV_ABI sceDbgPlayGoRequestNextChunk(); s32 PS4_SYSV_ABI sceDbgPlayGoSnapshot(); -s32 PS4_SYSV_ABI scePlayGoClose(); -s32 PS4_SYSV_ABI scePlayGoGetChunkId(); -s32 PS4_SYSV_ABI scePlayGoGetEta(); -s32 PS4_SYSV_ABI scePlayGoGetInstallSpeed(); +s32 PS4_SYSV_ABI scePlayGoClose(OrbisPlayGoHandle handle); +s32 PS4_SYSV_ABI scePlayGoGetChunkId(OrbisPlayGoHandle handle, OrbisPlayGoChunkId* outChunkIdList, + u32 numberOfEntries, u32* outEntries); +s32 PS4_SYSV_ABI scePlayGoGetEta(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, + u32 numberOfEntries, OrbisPlayGoEta* outEta); +s32 PS4_SYSV_ABI scePlayGoGetInstallSpeed(OrbisPlayGoHandle handle, + OrbisPlayGoInstallSpeed* outSpeed); s32 PS4_SYSV_ABI scePlayGoGetLanguageMask(OrbisPlayGoHandle handle, OrbisPlayGoLanguageMask* outLanguageMask); s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, @@ -28,8 +31,9 @@ s32 PS4_SYSV_ABI scePlayGoGetToDoList(OrbisPlayGoHandle handle, OrbisPlayGoToDo* u32 numberOfEntries, u32* outEntries); s32 PS4_SYSV_ABI scePlayGoInitialize(OrbisPlayGoInitParams* param); s32 PS4_SYSV_ABI scePlayGoOpen(OrbisPlayGoHandle* outHandle, const void* param); -s32 PS4_SYSV_ABI scePlayGoPrefetch(); -s32 PS4_SYSV_ABI scePlayGoSetInstallSpeed(); +s32 PS4_SYSV_ABI scePlayGoPrefetch(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, + u32 numberOfEntries, OrbisPlayGoLocus minimumLocus); +s32 PS4_SYSV_ABI scePlayGoSetInstallSpeed(OrbisPlayGoHandle handle, OrbisPlayGoInstallSpeed speed); s32 PS4_SYSV_ABI scePlayGoSetLanguageMask(OrbisPlayGoHandle handle, OrbisPlayGoLanguageMask languageMask); s32 PS4_SYSV_ABI scePlayGoSetToDoList(OrbisPlayGoHandle handle, const OrbisPlayGoToDo* todoList, diff --git a/src/core/libraries/rtc/rtc.cpp b/src/core/libraries/rtc/rtc.cpp index 82e6db67e..f6faa5382 100644 --- a/src/core/libraries/rtc/rtc.cpp +++ b/src/core/libraries/rtc/rtc.cpp @@ -7,6 +7,7 @@ #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" #include "rtc.h" +#include "rtc_error.h" namespace Libraries::Rtc { @@ -123,8 +124,7 @@ int PS4_SYSV_ABI sceRtcGetTick() { } int PS4_SYSV_ABI sceRtcGetTickResolution() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; + return 1000000; } int PS4_SYSV_ABI sceRtcGetTime_t() { diff --git a/src/core/libraries/rtc/rtc_error.h b/src/core/libraries/rtc/rtc_error.h new file mode 100644 index 000000000..04eecbbdf --- /dev/null +++ b/src/core/libraries/rtc/rtc_error.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +constexpr int ORBIS_RTC_ERROR_INVALID_PARAMETER = 0x80010602; +constexpr int ORBIS_RTC_ERROR_INVALID_TICK_PARAMETER = 0x80010603; +constexpr int ORBIS_RTC_ERROR_INVALID_DATE_PARAMETER = 0x80010604; +constexpr int ORBIS_RTC_ERROR_NOT_IMPLEMENTED = 0x80010605; +constexpr int ORBIS_RTC_ERROR_INVALID_TIMEZONE_FORMAT = 0x80010607; +constexpr int ORBIS_RTC_ERROR_INVALID_YEARS_PARAMETER = 0x80010621; +constexpr int ORBIS_RTC_ERROR_INVALID_MONTHS_PARAMETER = 0x80010622; +constexpr int ORBIS_RTC_ERROR_INVALID_DAYS_PARAMETER = 0x80010623; +constexpr int ORBIS_RTC_ERROR_INVALID_HOURS_PARAMETER = 0x80010624; +constexpr int ORBIS_RTC_ERROR_INVALID_MINUTES_PARAMETER = 0x80010625; +constexpr int ORBIS_RTC_ERROR_INVALID_SECONDS_PARAMETER = 0x80010626; +constexpr int ORBIS_RTC_ERROR_INVALID_MILLISECONDS_PARAMETER = 0x80010627; \ No newline at end of file diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index 64237994e..20496d76d 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -792,8 +792,8 @@ int PS4_SYSV_ABI sceSaveDataTransferringMount() { } s32 PS4_SYSV_ABI sceSaveDataUmount(const OrbisSaveDataMountPoint* mountPoint) { - LOG_INFO(Lib_SaveData, "mountPoint = {}", std::string(mountPoint->data)); - if (std::string(mountPoint->data).empty()) { + LOG_INFO(Lib_SaveData, "mountPoint = {}", mountPoint->data); + if (std::string_view(mountPoint->data).empty()) { return ORBIS_SAVE_DATA_ERROR_NOT_MOUNTED; } const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 97b1816e5..25de48a4d 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -9,6 +9,7 @@ #include "core/libraries/error_codes.h" #include "core/libraries/kernel/time_management.h" #include "core/libraries/videoout/driver.h" +#include "core/platform.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" extern std::unique_ptr renderer; @@ -173,14 +174,19 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { // Update flip status. auto* port = req.port; - auto& flip_status = port->flip_status; - flip_status.count++; - flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); - flip_status.tsc = Libraries::Kernel::sceKernelReadTsc(); - flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc(); - flip_status.flipArg = req.flip_arg; - flip_status.currentBuffer = req.index; - flip_status.flipPendingNum = static_cast(requests.size()); + { + std::unique_lock lock{port->port_mutex}; + auto& flip_status = port->flip_status; + flip_status.count++; + flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); + flip_status.tsc = Libraries::Kernel::sceKernelReadTsc(); + flip_status.flipArg = req.flip_arg; + flip_status.currentBuffer = req.index; + if (req.eop) { + --flip_status.gcQueueNum; + } + --flip_status.flipPendingNum; + } // Trigger flip events for the port. for (auto& event : port->flip_events) { @@ -202,34 +208,54 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop /*= false*/) { + { + std::unique_lock lock{port->port_mutex}; + if (index != -1 && port->flip_status.flipPendingNum >= port->NumRegisteredBuffers()) { + LOG_ERROR(Lib_VideoOut, "Flip queue is full"); + return false; + } + + if (is_eop) { + ++port->flip_status.gcQueueNum; + } + ++port->flip_status.flipPendingNum; // integral GPU and CPU pending flips counter + port->flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc(); + } + + if (!is_eop) { + // Before processing the flip we need to ask GPU thread to flush command list as at this + // point VO surface is ready to be presented, and we will need have an actual state of + // Vulkan image at the time of frame presentation. + liverpool->SendCommand([=, this]() { + renderer->FlushDraw(); + SubmitFlipInternal(port, index, flip_arg, is_eop); + }); + } else { + SubmitFlipInternal(port, index, flip_arg, is_eop); + } + + return true; +} + +void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, + bool is_eop /*= false*/) { Vulkan::Frame* frame; if (index == -1) { - frame = renderer->PrepareBlankFrame(); + frame = renderer->PrepareBlankFrame(is_eop); } else { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; frame = renderer->PrepareFrame(group, buffer.address_left, is_eop); } - if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) { - LOG_ERROR(Lib_VideoOut, "Flip queue is full"); - return false; - } - std::scoped_lock lock{mutex}; requests.push({ .frame = frame, .port = port, .index = index, .flip_arg = flip_arg, - .submit_tsc = Libraries::Kernel::sceKernelReadTsc(), .eop = is_eop, }); - - port->flip_status.flipPendingNum = static_cast(requests.size()); - port->flip_status.gcQueueNum = 0; - - return true; } void VideoOutDriver::PresentThread(std::stop_token token) { diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index 104056ded..bee800602 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -29,6 +29,7 @@ struct VideoOutPort { std::vector flip_events; std::vector vblank_events; std::mutex vo_mutex; + std::mutex port_mutex; std::condition_variable vo_cv; std::condition_variable vblank_cv; int flip_rate = 0; @@ -93,7 +94,6 @@ private: VideoOutPort* port; s32 index; s64 flip_arg; - u64 submit_tsc; bool eop; operator bool() const noexcept { @@ -102,6 +102,7 @@ private: }; std::chrono::microseconds Flip(const Request& req); + void SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); void PresentThread(std::stop_token token); std::mutex mutex; diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 15e14662a..d13062cd4 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -113,7 +113,9 @@ s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate) { s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle) { LOG_INFO(Lib_VideoOut, "called"); - s32 pending = driver->GetPort(handle)->flip_status.flipPendingNum; + auto* port = driver->GetPort(handle); + std::unique_lock lock{port->port_mutex}; + s32 pending = port->flip_status.flipPendingNum; return pending; } @@ -149,6 +151,28 @@ s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode return ORBIS_OK; } +int PS4_SYSV_ABI sceVideoOutGetEventId(const Kernel::SceKernelEvent* ev) { + if (ev == nullptr) { + return SCE_VIDEO_OUT_ERROR_INVALID_ADDRESS; + } + if (ev->filter != Kernel::SceKernelEvent::Filter::VideoOut) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_EVENT_QUEUE; + } + return ev->ident; +} + +int PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::SceKernelEvent* ev, int64_t* data) { + if (ev == nullptr || data == nullptr) { + return SCE_VIDEO_OUT_ERROR_INVALID_ADDRESS; + } + if (ev->filter != Kernel::SceKernelEvent::Filter::VideoOut) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_EVENT_QUEUE; + } + + *data = ev->data; + return ORBIS_OK; +} + s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) { if (!status) { LOG_ERROR(Lib_VideoOut, "Flip status is null"); @@ -161,6 +185,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) { return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; } + std::unique_lock lock{port->port_mutex}; *status = port->flip_status; LOG_INFO(Lib_VideoOut, @@ -197,7 +222,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutio s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 index, const void* param) { LOG_INFO(Lib_VideoOut, "called"); - ASSERT(userId == UserService::ORBIS_USER_SERVICE_USER_ID_SYSTEM || userId == 0); ASSERT(busType == SCE_VIDEO_OUT_BUS_TYPE_MAIN); if (index != 0) { @@ -300,6 +324,9 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("kGVLc3htQE8", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutGetDeviceCapabilityInfo); LIB_FUNCTION("j6RaAUlaLv0", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutWaitVblank); + LIB_FUNCTION("U2JJtSqNKZI", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutGetEventId); + LIB_FUNCTION("rWUTcKdkUzQ", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, + sceVideoOutGetEventData); // openOrbis appears to have libSceVideoOut_v1 module libSceVideoOut_v1.1 LIB_FUNCTION("Up36PTk687E", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, sceVideoOutOpen); diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index b4423efdf..63cd8fede 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -104,6 +104,8 @@ s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutio s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 index, const void* param); s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle); +int PS4_SYSV_ABI sceVideoOutGetEventId(const Kernel::SceKernelEvent* ev); +int PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::SceKernelEvent* ev, int64_t* data); // Internal system functions void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index e4cbe5739..d4a15825b 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -305,7 +305,8 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { // Module was just loaded by above code. Allocate TLS block for it. Module* module = m_modules[module_index - 1].get(); const u32 init_image_size = module->tls.init_image_size; - u8* dest = reinterpret_cast(heap_api_func(module->tls.image_size)); + // TODO: Determine if Windows will crash from this + u8* dest = reinterpret_cast(heap_api->heap_malloc(module->tls.image_size)); const u8* src = reinterpret_cast(module->tls.image_virtual_addr); std::memcpy(dest, src, init_image_size); std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size); @@ -335,10 +336,23 @@ void Linker::InitTlsForThread(bool is_primary) { &addr_out, tls_aligned, 3, 0, "SceKernelPrimaryTcbTls"); ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread"); } else { - if (heap_api_func) { - addr_out = heap_api_func(total_tls_size); + if (heap_api) { +#ifndef WIN32 + addr_out = heap_api->heap_malloc(total_tls_size); } else { addr_out = std::malloc(total_tls_size); +#else + // TODO: Windows tls malloc replacement, refer to rtld_tls_block_malloc + LOG_ERROR(Core_Linker, "TLS user malloc called, using std::malloc"); + addr_out = std::malloc(total_tls_size); + if (!addr_out) { + auto pth_id = pthread_self(); + auto handle = pthread_gethandle(pth_id); + ASSERT_MSG(addr_out, + "Cannot allocate TLS block defined for handle=%x, index=%d size=%d", + handle, pth_id, total_tls_size); + } +#endif } } diff --git a/src/core/linker.h b/src/core/linker.h index aee8c8fd3..ed1fe400c 100644 --- a/src/core/linker.h +++ b/src/core/linker.h @@ -46,7 +46,21 @@ struct EntryParams { const char* argv[3]; }; -using HeapApiFunc = PS4_SYSV_ABI void* (*)(size_t); +struct HeapAPI { + PS4_SYSV_ABI void* (*heap_malloc)(size_t); + PS4_SYSV_ABI void (*heap_free)(void*); + PS4_SYSV_ABI void* (*heap_calloc)(size_t, size_t); + PS4_SYSV_ABI void* (*heap_realloc)(void*, size_t); + PS4_SYSV_ABI void* (*heap_memalign)(size_t, size_t); + PS4_SYSV_ABI int (*heap_posix_memalign)(void**, size_t, size_t); + // NOTE: Fields below may be inaccurate + PS4_SYSV_ABI int (*heap_reallocalign)(void); + PS4_SYSV_ABI void (*heap_malloc_stats)(void); + PS4_SYSV_ABI int (*heap_malloc_stats_fast)(void); + PS4_SYSV_ABI size_t (*heap_malloc_usable_size)(void*); +}; + +using AppHeapAPI = HeapAPI*; class Linker { public: @@ -75,8 +89,8 @@ public: } } - void SetHeapApiFunc(void* func) { - heap_api_func = *reinterpret_cast(func); + void SetHeapAPI(void* func[]) { + heap_api = reinterpret_cast(func); } void AdvanceGenerationCounter() noexcept { @@ -104,7 +118,7 @@ private: size_t static_tls_size{}; u32 max_tls_index{}; u32 num_static_modules{}; - HeapApiFunc heap_api_func{}; + AppHeapAPI heap_api{}; std::vector> m_modules; Loader::SymbolsResolver m_hle_symbols{}; }; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index dc5ded41d..6d0d581f9 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -54,7 +54,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr; // Add the allocated region to the list and commit its pages. - auto& area = CarveDmemArea(free_addr, size); + auto& area = CarveDmemArea(free_addr, size)->second; area.memory_type = memory_type; area.is_free = false; return free_addr; @@ -63,9 +63,8 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, void MemoryManager::Free(PAddr phys_addr, size_t size) { std::scoped_lock lk{mutex}; - const auto dmem_area = FindDmemArea(phys_addr); - ASSERT(dmem_area != dmem_map.end() && dmem_area->second.base == phys_addr && - dmem_area->second.size == size); + auto dmem_area = CarveDmemArea(phys_addr, size); + ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size); // Release any dmem mappings that reference this physical block. std::vector> remove_list; @@ -74,10 +73,11 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) { continue; } if (mapping.phys_base <= phys_addr && phys_addr < mapping.phys_base + mapping.size) { - LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}", addr, - mapping.size); + auto vma_segment_start_addr = phys_addr - mapping.phys_base + addr; + LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}", + vma_segment_start_addr, size); // Unmaping might erase from vma_map. We can't do it here. - remove_list.emplace_back(addr, mapping.size); + remove_list.emplace_back(vma_segment_start_addr, size); } } for (const auto& [addr, size] : remove_list) { @@ -104,8 +104,6 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem const auto& vma = FindVMA(mapped_addr)->second; // If the VMA is mapped, unmap the region first. if (vma.IsMapped()) { - ASSERT_MSG(vma.base == mapped_addr && vma.size == size, - "Region must match when reserving a mapped region"); UnmapMemory(mapped_addr, size); } const size_t remaining_size = vma.base + vma.size - mapped_addr; @@ -169,6 +167,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M new_vma.prot = prot; new_vma.name = name; new_vma.type = type; + new_vma.is_exec = is_exec; if (type == VMAType::Direct) { new_vma.phys_base = phys_addr; @@ -216,10 +215,16 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { std::scoped_lock lk{mutex}; const auto it = FindVMA(virtual_addr); - ASSERT_MSG(it->second.Contains(virtual_addr, size), + const auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(virtual_addr, size), "Existing mapping does not contain requested unmap range"); - const auto type = it->second.type; + const auto vma_base_addr = vma_base.base; + const auto vma_base_size = vma_base.size; + const auto phys_base = vma_base.phys_base; + const bool is_exec = vma_base.is_exec; + const auto start_in_vma = virtual_addr - vma_base_addr; + const auto type = vma_base.type; const bool has_backing = type == VMAType::Direct || type == VMAType::File; if (type == VMAType::Direct) { rasterizer->UnmapMemory(virtual_addr, size); @@ -239,7 +244,8 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { MergeAdjacent(vma_map, new_it); // Unmap the memory region. - impl.Unmap(virtual_addr, size, has_backing); + impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec, + has_backing); TRACK_FREE(virtual_addr, "VMEM"); } @@ -267,10 +273,10 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, std::scoped_lock lk{mutex}; auto it = FindVMA(addr); - if (!it->second.IsMapped() && flags == 1) { + if (it->second.type == VMAType::Free && flags == 1) { it++; } - if (!it->second.IsMapped()) { + if (it->second.type == VMAType::Free) { LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region"); return ORBIS_KERNEL_ERROR_EACCES; } @@ -397,13 +403,12 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size return vma_handle; } -DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) { +MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) { auto dmem_handle = FindDmemArea(addr); ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map"); const DirectMemoryArea& area = dmem_handle->second; - ASSERT_MSG(area.is_free && area.base <= addr, - "Adding an allocation to already allocated region"); + ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region"); const PAddr start_in_area = addr - area.base; const PAddr end_in_vma = start_in_area + size; @@ -418,7 +423,7 @@ DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) { dmem_handle = Split(dmem_handle, start_in_area); } - return dmem_handle->second; + return dmem_handle; } MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, size_t offset_in_vma) { diff --git a/src/core/memory.h b/src/core/memory.h index 6d0a977fc..d58269678 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -84,6 +84,7 @@ struct VirtualMemoryArea { bool disallow_merge = false; std::string name = ""; uintptr_t fd = 0; + bool is_exec = false; bool Contains(VAddr addr, size_t size) const { return addr >= base && (addr + size) <= (base + this->size); @@ -205,7 +206,7 @@ private: VMAHandle CarveVMA(VAddr virtual_addr, size_t size); - DirectMemoryArea& CarveDmemArea(PAddr addr, size_t size); + DMemHandle CarveDmemArea(PAddr addr, size_t size); VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma); diff --git a/src/emulator.cpp b/src/emulator.cpp index 034b8706b..37e227ddf 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -10,6 +10,7 @@ #include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" +#include "common/scm_rev.h" #include "common/singleton.h" #include "common/version.h" #include "core/file_format/playgo_chunk.h" @@ -21,21 +22,18 @@ #include "core/libraries/libc/libc.h" #include "core/libraries/libc_internal/libc_internal.h" #include "core/libraries/libs.h" +#include "core/libraries/ngs2/ngs2.h" #include "core/libraries/rtc/rtc.h" #include "core/libraries/videoout/video_out.h" #include "core/linker.h" #include "core/memory.h" #include "emulator.h" -#include "src/common/scm_rev.h" #include "video_core/renderdoc.h" Frontend::WindowSDL* g_window = nullptr; namespace Core { -static constexpr s32 WindowWidth = 1280; -static constexpr s32 WindowHeight = 720; - Emulator::Emulator() { // Read configuration file. const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); @@ -90,8 +88,11 @@ void Emulator::Run(const std::filesystem::path& file) { app_version = param_sfo->GetString("APP_VER"); LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); } else if (entry.path().filename() == "playgo-chunk.dat") { - auto* playgo = Common::Singleton::Instance(); - playgo->Open(sce_sys_folder.string() + "/playgo-chunk.dat"); + auto* playgo = Common::Singleton::Instance(); + auto filepath = sce_sys_folder / "playgo-chunk.dat"; + if (!playgo->Open(filepath)) { + LOG_ERROR(Loader, "PlayGo: unable to open file"); + } } else if (entry.path().filename() == "pic0.png" || entry.path().filename() == "pic1.png") { auto* splash = Common::Singleton::Instance(); @@ -113,8 +114,8 @@ void Emulator::Run(const std::filesystem::path& file) { window_title = fmt::format("shadPS4 v{} {} | {}", Common::VERSION, Common::g_scm_desc, game_title); } - window = - std::make_unique(WindowWidth, WindowHeight, controller, window_title); + window = std::make_unique( + Config::getScreenWidth(), Config::getScreenHeight(), controller, window_title); g_window = window.get(); @@ -184,7 +185,7 @@ void Emulator::Run(const std::filesystem::path& file) { void Emulator::LoadSystemModules(const std::filesystem::path& file) { constexpr std::array ModulesToLoad{ - {{"libSceNgs2.sprx", nullptr}, + {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, {"libSceFiber.sprx", nullptr}, {"libSceUlt.sprx", nullptr}, {"libSceJson.sprx", nullptr}, diff --git a/src/images/about_icon.png b/src/images/about_icon.png new file mode 100644 index 000000000..bbb517982 Binary files /dev/null and b/src/images/about_icon.png differ diff --git a/src/input/controller.cpp b/src/input/controller.cpp index 247e08ce8..4a3db1633 100644 --- a/src/input/controller.cpp +++ b/src/input/controller.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "core/libraries/kernel/time_management.h" #include "core/libraries/pad/pad.h" #include "input/controller.h" @@ -117,4 +118,29 @@ void GameController::Axis(int id, Input::Axis axis, int value) { AddState(state); } +void GameController::SetLightBarRGB(u8 r, u8 g, u8 b) { + if (m_sdl_gamepad != nullptr) { + SDL_SetGamepadLED(m_sdl_gamepad, r, g, b); + } +} + +bool GameController::SetVibration(u8 smallMotor, u8 largeMotor) { + if (m_sdl_gamepad != nullptr) { + return SDL_RumbleGamepad(m_sdl_gamepad, (smallMotor / 255.0f) * 0xFFFF, + (largeMotor / 255.0f) * 0xFFFF, -1) == 0; + } + return true; +} + +void GameController::TryOpenSDLController() { + if (m_sdl_gamepad == nullptr || !SDL_GamepadConnected(m_sdl_gamepad)) { + int gamepad_count; + SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count); + m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr; + SDL_free(gamepads); + } + + SetLightBarRGB(0, 0, 255); +} + } // namespace Input diff --git a/src/input/controller.h b/src/input/controller.h index a16f7dd06..ef0991568 100644 --- a/src/input/controller.h +++ b/src/input/controller.h @@ -6,6 +6,8 @@ #include #include "common/types.h" +struct SDL_Gamepad; + namespace Input { enum class Axis { @@ -43,6 +45,9 @@ public: void CheckButton(int id, u32 button, bool isPressed); void AddState(const State& state); void Axis(int id, Input::Axis axis, int value); + void SetLightBarRGB(u8 r, u8 g, u8 b); + bool SetVibration(u8 smallMotor, u8 largeMotor); + void TryOpenSDLController(); private: struct StateInternal { @@ -57,6 +62,8 @@ private: u32 m_first_state = 0; std::array m_states; std::array m_private; + + SDL_Gamepad* m_sdl_gamepad = nullptr; }; } // namespace Input diff --git a/src/qt_gui/about_dialog.cpp b/src/qt_gui/about_dialog.cpp new file mode 100644 index 000000000..a932d65a0 --- /dev/null +++ b/src/qt_gui/about_dialog.cpp @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "about_dialog.h" +#include "ui_about_dialog.h" + +AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) { + ui->setupUi(this); +} + +AboutDialog::~AboutDialog() { + delete ui; +} diff --git a/src/qt_gui/about_dialog.h b/src/qt_gui/about_dialog.h new file mode 100644 index 000000000..8c802221b --- /dev/null +++ b/src/qt_gui/about_dialog.h @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace Ui { +class AboutDialog; +} + +class AboutDialog : public QDialog { + Q_OBJECT + +public: + explicit AboutDialog(QWidget* parent = nullptr); + ~AboutDialog(); + +private: + Ui::AboutDialog* ui; +}; \ No newline at end of file diff --git a/src/qt_gui/about_dialog.ui b/src/qt_gui/about_dialog.ui new file mode 100644 index 000000000..2b60476b8 --- /dev/null +++ b/src/qt_gui/about_dialog.ui @@ -0,0 +1,110 @@ + + + + AboutDialog + + + + 0 + 0 + 780 + 320 + + + + About shadPS4 + + + + :/images/shadps4.ico:/images/shadps4.ico + + + + + 10 + 30 + 271 + 261 + + + + QFrame::Shape::NoFrame + + + + + + :/images/shadps4.ico + + + true + + + true + + + + + + 310 + 40 + 171 + 41 + + + + + 24 + true + + + + shadPS4 + + + + + + 310 + 90 + 451 + 101 + + + + + 14 + + + + shadPS4 is an experimental open-source emulator for the PlayStation 4. + + + true + + + + + + 310 + 180 + 451 + 101 + + + + + 14 + + + + This software should not be used to play games you have not legally obtained. + + + true + + + + + diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h index 146d5c34d..2f4b884cf 100644 --- a/src/qt_gui/gui_context_menus.h +++ b/src/qt_gui/gui_context_menus.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -53,6 +54,18 @@ public: menu.addAction(&openSfoViewer); menu.addAction(&openTrophyViewer); + // "Copy" submenu. + QMenu* copyMenu = new QMenu("Copy info", widget); + QAction* copyName = new QAction("Copy Name", widget); + QAction* copySerial = new QAction("Copy Serial", widget); + QAction* copyNameAll = new QAction("Copy All", widget); + + copyMenu->addAction(copyName); + copyMenu->addAction(copySerial); + copyMenu->addAction(copyNameAll); + + menu.addMenu(copyMenu); + // Show menu. auto selected = menu.exec(global_pos); if (!selected) { @@ -191,6 +204,27 @@ public: } } } + + // Handle the "Copy" actions + if (selected == copyName) { + QClipboard* clipboard = QGuiApplication::clipboard(); + clipboard->setText(QString::fromStdString(m_games[itemID].name)); + } + + if (selected == copySerial) { + QClipboard* clipboard = QGuiApplication::clipboard(); + clipboard->setText(QString::fromStdString(m_games[itemID].serial)); + } + + if (selected == copyNameAll) { + QClipboard* clipboard = QGuiApplication::clipboard(); + QString combinedText = QString("Name:%1 | Serial:%2 | Version:%3 | Size:%4") + .arg(QString::fromStdString(m_games[itemID].name)) + .arg(QString::fromStdString(m_games[itemID].serial)) + .arg(QString::fromStdString(m_games[itemID].version)) + .arg(QString::fromStdString(m_games[itemID].size)); + clipboard->setText(combinedText); + } } int GetRowIndex(QTreeWidget* treeWidget, QTreeWidgetItem* item) { diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index f96afcc19..d776593bd 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -9,6 +9,7 @@ #include #include +#include "about_dialog.h" #include "common/io_file.h" #include "common/version.h" #include "core/file_format/pkg.h" @@ -17,6 +18,7 @@ #include "main_window.h" #include "sdl_window_manager.h" #include "settings_dialog.h" +#include "video_core/renderer_vulkan/vk_instance.h" MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWindow) { ui->setupUi(this); @@ -40,6 +42,7 @@ bool MainWindow::Init() { CreateConnects(); SetLastUsedTheme(); SetLastIconSizeBullet(); + GetPhysicalDevices(); // show ui setMinimumSize(350, minimumSizeHint().height()); setWindowTitle(QString::fromStdString("shadPS4 v" + std::string(Common::VERSION))); @@ -159,11 +162,25 @@ void MainWindow::LoadGameLists() { } } +void MainWindow::GetPhysicalDevices() { + Vulkan::Instance instance(false, false); + auto physical_devices = instance.GetPhysicalDevices(); + for (const vk::PhysicalDevice physical_device : physical_devices) { + auto prop = physical_device.getProperties(); + QString name = QString::fromUtf8(prop.deviceName, -1); + if (prop.apiVersion < Vulkan::TargetVulkanApiVersion) { + name += " * Unsupported Vulkan Version"; + } + m_physical_devices.push_back(name); + } +} + void MainWindow::CreateConnects() { connect(this, &MainWindow::WindowResized, this, &MainWindow::HandleResize); connect(ui->mw_searchbar, &QLineEdit::textChanged, this, &MainWindow::SearchGameTable); connect(ui->exitAct, &QAction::triggered, this, &QWidget::close); connect(ui->refreshGameListAct, &QAction::triggered, this, &MainWindow::RefreshGameTable); + connect(ui->showGameListAct, &QAction::triggered, this, &MainWindow::ShowGameList); connect(this, &MainWindow::ExtractionFinished, this, &MainWindow::RefreshGameTable); connect(ui->sizeSlider, &QSlider::valueChanged, this, [this](int value) { @@ -187,14 +204,23 @@ void MainWindow::CreateConnects() { connect(m_game_list_frame.get(), &QTableWidget::cellDoubleClicked, this, &MainWindow::StartGame); - connect(ui->stopButton, &QPushButton::clicked, this, &MainWindow::StopGame); - connect(ui->settingsButton, &QPushButton::clicked, this, [this]() { - auto settingsDialog = new SettingsDialog(this); + connect(ui->configureAct, &QAction::triggered, this, [this]() { + auto settingsDialog = new SettingsDialog(m_physical_devices, this); settingsDialog->exec(); }); + connect(ui->settingsButton, &QPushButton::clicked, this, [this]() { + auto settingsDialog = new SettingsDialog(m_physical_devices, this); + settingsDialog->exec(); + }); + + connect(ui->aboutAct, &QAction::triggered, this, [this]() { + auto aboutDialog = new AboutDialog(this); + aboutDialog->exec(); + }); + connect(ui->setIconSizeTinyAct, &QAction::triggered, this, [this]() { if (isTableList) { m_game_list_frame->icon_size = @@ -317,6 +343,7 @@ void MainWindow::CreateConnects() { // Package install. connect(ui->bootInstallPkgAct, &QAction::triggered, this, &MainWindow::InstallPkg); + connect(ui->bootGameAct, &QAction::triggered, this, &MainWindow::BootGame); connect(ui->gameInstallPathAct, &QAction::triggered, this, &MainWindow::InstallDirectory); // elf viewer @@ -426,6 +453,15 @@ void MainWindow::SearchGameTable(const QString& text) { } } +void MainWindow::ShowGameList() { + if (ui->showGameListAct->isChecked()) { + RefreshGameTable(); + } else { + m_game_grid_frame->clearContents(); + m_game_list_frame->clearContents(); + } +}; + void MainWindow::RefreshGameTable() { // m_game_info->m_games.clear(); m_game_info->GetGameInfo(this); @@ -477,6 +513,27 @@ void MainWindow::InstallPkg() { } } +void MainWindow::BootGame() { + QFileDialog dialog; + dialog.setFileMode(QFileDialog::ExistingFile); + dialog.setNameFilter(tr("ELF files (*.bin *.elf *.oelf)")); + if (dialog.exec()) { + QStringList fileNames = dialog.selectedFiles(); + int nFiles = fileNames.size(); + + if (nFiles > 1) { + QMessageBox::critical(nullptr, "Game Boot", QString("Only one file can be selected!")); + } else { + std::filesystem::path path(fileNames[0].toStdString()); +#ifdef _WIN64 + path = std::filesystem::path(fileNames[0].toStdWString()); +#endif + Core::Emulator emulator; + emulator.Run(path); + } + } +} + void MainWindow::InstallDragDropPkg(std::filesystem::path file, int pkgNum, int nPkg) { if (Loader::DetectFileType(file) == Loader::FileTypes::Pkg) { pkg = PKG(); @@ -662,7 +719,9 @@ QIcon MainWindow::RecolorIcon(const QIcon& icon, bool isWhite) { void MainWindow::SetUiIcons(bool isWhite) { ui->bootInstallPkgAct->setIcon(RecolorIcon(ui->bootInstallPkgAct->icon(), isWhite)); + ui->bootGameAct->setIcon(RecolorIcon(ui->bootGameAct->icon(), isWhite)); ui->exitAct->setIcon(RecolorIcon(ui->exitAct->icon(), isWhite)); + ui->aboutAct->setIcon(RecolorIcon(ui->aboutAct->icon(), isWhite)); ui->setlistModeListAct->setIcon(RecolorIcon(ui->setlistModeListAct->icon(), isWhite)); ui->setlistModeGridAct->setIcon(RecolorIcon(ui->setlistModeGridAct->icon(), isWhite)); ui->gameInstallPathAct->setIcon(RecolorIcon(ui->gameInstallPathAct->icon(), isWhite)); @@ -676,6 +735,8 @@ void MainWindow::SetUiIcons(bool isWhite) { ui->refreshGameListAct->setIcon(RecolorIcon(ui->refreshGameListAct->icon(), isWhite)); ui->menuGame_List_Mode->setIcon(RecolorIcon(ui->menuGame_List_Mode->icon(), isWhite)); ui->pkgViewerAct->setIcon(RecolorIcon(ui->pkgViewerAct->icon(), isWhite)); + ui->configureAct->setIcon(RecolorIcon(ui->configureAct->icon(), isWhite)); + ui->addElfFolderAct->setIcon(RecolorIcon(ui->addElfFolderAct->icon(), isWhite)); } void MainWindow::resizeEvent(QResizeEvent* event) { diff --git a/src/qt_gui/main_window.h b/src/qt_gui/main_window.h index 51c67efb6..279313876 100644 --- a/src/qt_gui/main_window.h +++ b/src/qt_gui/main_window.h @@ -46,6 +46,7 @@ private Q_SLOTS: void ConfigureGuiFromSettings(); void SaveWindowState() const; void SearchGameTable(const QString& text); + void ShowGameList(); void RefreshGameTable(); void HandleResize(QResizeEvent* event); @@ -55,12 +56,14 @@ private: void CreateActions(); void CreateRecentGameActions(); void CreateDockWindows(); + void GetPhysicalDevices(); void LoadGameLists(); void CreateConnects(); void SetLastUsedTheme(); void SetLastIconSizeBullet(); void SetUiIcons(bool isWhite); void InstallPkg(); + void BootGame(); void AddRecentFiles(QString filePath); QIcon RecolorIcon(const QIcon& icon, bool isWhite); bool isIconBlack = false; @@ -80,6 +83,8 @@ private: QScopedPointer m_elf_viewer; // Status Bar. QScopedPointer statusBar; + // Available GPU devices + std::vector m_physical_devices; PSF psf; diff --git a/src/qt_gui/main_window_ui.h b/src/qt_gui/main_window_ui.h index 06e5cf7fb..7d0c58dd2 100644 --- a/src/qt_gui/main_window_ui.h +++ b/src/qt_gui/main_window_ui.h @@ -30,6 +30,7 @@ QT_BEGIN_NAMESPACE class Ui_MainWindow { public: QAction* bootInstallPkgAct; + QAction* bootGameAct; QAction* addElfFolderAct; QAction* exitAct; QAction* showGameListAct; @@ -44,6 +45,8 @@ public: QAction* gameInstallPathAct; QAction* dumpGameListAct; QAction* pkgViewerAct; + QAction* aboutAct; + QAction* configureAct; QAction* setThemeDark; QAction* setThemeLight; QAction* setThemeGreen; @@ -69,6 +72,7 @@ public: QMenu* menuSettings; QMenu* menuUtils; QMenu* menuThemes; + QMenu* menuAbout; QToolBar* toolBar; void setupUi(QMainWindow* MainWindow) { @@ -92,8 +96,12 @@ public: bootInstallPkgAct = new QAction(MainWindow); bootInstallPkgAct->setObjectName("bootInstallPkgAct"); bootInstallPkgAct->setIcon(QIcon(":images/file_icon.png")); + bootGameAct = new QAction(MainWindow); + bootGameAct->setObjectName("bootGameAct"); + bootGameAct->setIcon(QIcon(":images/play_icon.png")); addElfFolderAct = new QAction(MainWindow); addElfFolderAct->setObjectName("addElfFolderAct"); + addElfFolderAct->setIcon(QIcon(":images/folder_icon.png")); exitAct = new QAction(MainWindow); exitAct->setObjectName("exitAct"); exitAct->setIcon(QIcon(":images/exit_icon.png")); @@ -136,6 +144,12 @@ public: pkgViewerAct->setObjectName("pkgViewer"); pkgViewerAct->setObjectName("pkgViewer"); pkgViewerAct->setIcon(QIcon(":images/file_icon.png")); + aboutAct = new QAction(MainWindow); + aboutAct->setObjectName("aboutAct"); + aboutAct->setIcon(QIcon(":images/about_icon.png")); + configureAct = new QAction(MainWindow); + configureAct->setObjectName("configureAct"); + configureAct->setIcon(QIcon(":images/settings_icon.png")); setThemeDark = new QAction(MainWindow); setThemeDark->setObjectName("setThemeDark"); setThemeDark->setCheckable(true); @@ -242,6 +256,8 @@ public: menuThemes = new QMenu(menuView); menuThemes->setObjectName("menuThemes"); menuThemes->setIcon(QIcon(":images/themes_icon.png")); + menuAbout = new QMenu(menuBar); + menuAbout->setObjectName("menuAbout"); MainWindow->setMenuBar(menuBar); toolBar = new QToolBar(MainWindow); toolBar->setObjectName("toolBar"); @@ -250,7 +266,9 @@ public: menuBar->addAction(menuFile->menuAction()); menuBar->addAction(menuView->menuAction()); menuBar->addAction(menuSettings->menuAction()); + menuBar->addAction(menuAbout->menuAction()); menuFile->addAction(bootInstallPkgAct); + menuFile->addAction(bootGameAct); menuFile->addAction(addElfFolderAct); menuFile->addSeparator(); menuFile->addAction(menuRecent->menuAction()); @@ -274,10 +292,12 @@ public: menuGame_List_Mode->addAction(setlistModeListAct); menuGame_List_Mode->addAction(setlistModeGridAct); menuGame_List_Mode->addAction(setlistElfAct); + menuSettings->addAction(configureAct); menuSettings->addAction(gameInstallPathAct); menuSettings->addAction(menuUtils->menuAction()); menuUtils->addAction(dumpGameListAct); menuUtils->addAction(pkgViewerAct); + menuAbout->addAction(aboutAct); retranslateUi(MainWindow); @@ -290,6 +310,9 @@ public: QCoreApplication::translate("MainWindow", "Open/Add Elf Folder", nullptr)); bootInstallPkgAct->setText( QCoreApplication::translate("MainWindow", "Install Packages (PKG)", nullptr)); + bootGameAct->setText(QCoreApplication::translate("MainWindow", "Boot Game", nullptr)); + aboutAct->setText(QCoreApplication::translate("MainWindow", "About shadPS4", nullptr)); + configureAct->setText(QCoreApplication::translate("MainWindow", "Configure...", nullptr)); #if QT_CONFIG(tooltip) bootInstallPkgAct->setToolTip(QCoreApplication::translate( "MainWindow", "Install application from a .pkg file", nullptr)); @@ -332,6 +355,7 @@ public: menuSettings->setTitle(QCoreApplication::translate("MainWindow", "Settings", nullptr)); menuUtils->setTitle(QCoreApplication::translate("MainWindow", "Utils", nullptr)); menuThemes->setTitle(QCoreApplication::translate("MainWindow", "Themes", nullptr)); + menuAbout->setTitle(QCoreApplication::translate("MainWindow", "About", nullptr)); setThemeDark->setText(QCoreApplication::translate("MainWindow", "Dark", nullptr)); setThemeLight->setText(QCoreApplication::translate("MainWindow", "Light", nullptr)); setThemeGreen->setText(QCoreApplication::translate("MainWindow", "Green", nullptr)); diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 722abe7e0..aa2c73384 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -1,16 +1,64 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "settings_dialog.h" #include "ui_settings_dialog.h" -SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::SettingsDialog) { +QStringList languageNames = {"Arabic", + "Czech", + "Danish", + "Dutch", + "English (United Kingdom)", + "English (United States)", + "Finnish", + "French (Canada)", + "French (France)", + "German", + "Greek", + "Hungarian", + "Indonesian", + "Italian", + "Japanese", + "Korean", + "Norwegian", + "Polish", + "Portuguese (Brazil)", + "Portuguese (Portugal)", + "Romanian", + "Russian", + "Simplified Chinese", + "Spanish (Latin America)", + "Spanish (Spain)", + "Swedish", + "Thai", + "Traditional Chinese", + "Turkish", + "Vietnamese"}; + +const QVector languageIndexes = {21, 23, 14, 6, 18, 1, 12, 22, 2, 4, 25, 24, 29, 5, 0, + 9, 15, 16, 17, 7, 26, 8, 11, 20, 3, 13, 27, 10, 19, 28}; + +SettingsDialog::SettingsDialog(std::span physical_devices, QWidget* parent) + : QDialog(parent), ui(new Ui::SettingsDialog) { ui->setupUi(this); ui->tabWidgetSettings->setUsesScrollButtons(false); const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus(); + // Add list of available GPUs + ui->graphicsAdapterBox->addItem("Auto Select"); // -1, auto selection + for (const auto& device : physical_devices) { + ui->graphicsAdapterBox->addItem(device); + } + + ui->consoleLanguageComboBox->addItems(languageNames); + + QCompleter* completer = new QCompleter(languageNames, this); + completer->setCaseSensitivity(Qt::CaseInsensitive); + ui->consoleLanguageComboBox->setCompleter(completer); + LoadValuesFromConfig(); connect(ui->buttonBox, &QDialogButtonBox::rejected, this, &QWidget::close); @@ -32,15 +80,41 @@ SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::Se ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus(); }); - // EMULATOR TAB + // GENERAL TAB { - connect(ui->consoleLanguageComboBox, &QComboBox::currentIndexChanged, this, - [](int index) { Config::setLanguage(index); }); + connect(ui->userNameLineEdit, &QLineEdit::textChanged, this, + [](const QString& text) { Config::setUserName(text.toStdString()); }); + + connect(ui->consoleLanguageComboBox, QOverload::of(&QComboBox::currentIndexChanged), + this, [this](int index) { + if (index >= 0 && index < languageIndexes.size()) { + int languageCode = languageIndexes[index]; + Config::setLanguage(languageCode); + } + }); + + connect(ui->fullscreenCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setFullscreenMode(val); }); + + connect(ui->showSplashCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setShowSplash(val); }); + + connect(ui->ps4proCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setNeoMode(val); }); + + connect(ui->logTypeComboBox, &QComboBox::currentTextChanged, this, + [](const QString& text) { Config::setLogType(text.toStdString()); }); + + connect(ui->logFilterLineEdit, &QLineEdit::textChanged, this, + [](const QString& text) { Config::setLogFilter(text.toStdString()); }); } // GPU TAB { - // TODO: Implement graphics device changing + // First options is auto selection -1, so gpuId on the GUI will always have to subtract 1 + // when setting and add 1 when getting to select the correct gpu in Qt + connect(ui->graphicsAdapterBox, &QComboBox::currentIndexChanged, this, + [](int index) { Config::setGpuId(index - 1); }); connect(ui->widthSpinBox, &QSpinBox::valueChanged, this, [](int val) { Config::setScreenWidth(val); }); @@ -61,24 +135,6 @@ SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::Se [](int val) { Config::setDumpPM4(val); }); } - // GENERAL TAB - { - connect(ui->fullscreenCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setFullscreenMode(val); }); - - connect(ui->showSplashCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setShowSplash(val); }); - - connect(ui->ps4proCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setNeoMode(val); }); - - connect(ui->logTypeComboBox, &QComboBox::currentTextChanged, this, - [](const QString& text) { Config::setLogType(text.toStdString()); }); - - connect(ui->logFilterLineEdit, &QLineEdit::textChanged, this, - [](const QString& text) { Config::setLogFilter(text.toStdString()); }); - } - // DEBUG TAB { connect(ui->debugDump, &QCheckBox::stateChanged, this, @@ -96,8 +152,12 @@ SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::Se } void SettingsDialog::LoadValuesFromConfig() { - ui->consoleLanguageComboBox->setCurrentIndex(Config::GetLanguage()); - + ui->consoleLanguageComboBox->setCurrentIndex( + std::distance( + languageIndexes.begin(), + std::find(languageIndexes.begin(), languageIndexes.end(), Config::GetLanguage())) % + languageIndexes.size()); + ui->graphicsAdapterBox->setCurrentIndex(Config::getGpuId() + 1); ui->widthSpinBox->setValue(Config::getScreenWidth()); ui->heightSpinBox->setValue(Config::getScreenHeight()); ui->vblankSpinBox->setValue(Config::vblankDiv()); @@ -110,6 +170,7 @@ void SettingsDialog::LoadValuesFromConfig() { ui->ps4proCheckBox->setChecked(Config::isNeoMode()); ui->logTypeComboBox->setCurrentText(QString::fromStdString(Config::getLogType())); ui->logFilterLineEdit->setText(QString::fromStdString(Config::getLogFilter())); + ui->userNameLineEdit->setText(QString::fromStdString(Config::getUserName())); ui->debugDump->setChecked(Config::debugDump()); ui->vkValidationCheckBox->setChecked(Config::vkValidationEnabled()); @@ -121,4 +182,4 @@ int SettingsDialog::exec() { return QDialog::exec(); } -SettingsDialog::~SettingsDialog() {} \ No newline at end of file +SettingsDialog::~SettingsDialog() {} diff --git a/src/qt_gui/settings_dialog.h b/src/qt_gui/settings_dialog.h index 2bffa795c..7d8701093 100644 --- a/src/qt_gui/settings_dialog.h +++ b/src/qt_gui/settings_dialog.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include @@ -16,7 +17,7 @@ class SettingsDialog; class SettingsDialog : public QDialog { Q_OBJECT public: - explicit SettingsDialog(QWidget* parent = nullptr); + explicit SettingsDialog(std::span physical_devices, QWidget* parent = nullptr); ~SettingsDialog(); int exec() override; diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui index 4893bd613..dbb15206e 100644 --- a/src/qt_gui/settings_dialog.ui +++ b/src/qt_gui/settings_dialog.ui @@ -1,7 +1,6 @@ - SettingsDialog @@ -12,8 +11,8 @@ 0 0 - 1024 - 768 + 854 + 480 @@ -22,6 +21,12 @@ 0 + + + 11 + false + + Settings @@ -46,8 +51,8 @@ 0 0 - 1006 - 720 + 832 + 418 @@ -59,262 +64,240 @@ 0 - + - Emulator + General - + - + - + - + - Console Language + System - + - + + + 6 + + + 0 + - - Japanese - - - - - English (United States) - - - - - French (France) - - - - - Spanish (Spain) - - - - - German - - - - - Italian - - - - - Dutch - - - - - Portuguese (Portugal) - - - - - Russian - - - - - Korean - - - - - Traditional Chinese - - - - - Simplified Chinese - - - - - Finnish - - - - - Swedish - - - - - Danish - - - - - Norwegian - - - - - Polish - - - - - Portuguese (Brazil) - - - - - English (United Kingdom) - - - - - Turkish - - - - - Spanish (Latin America) - - - - - Arabic - - - - - French (Canada) - - - - - Czech - - - - - Hungarian - - - - - Greek - - - - - Romanian - - - - - Thai - - - - - Vietnamese - - - - - Indonesian - + + + + + Username + + + + + + + + + + + + + + + Console Language + + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + - - - - 0 - - - 0 - - - 0 - - - 0 - + + + Emulator + + + + + + Enable Fullscreen + + + + + + + Show Splash + + + + + + + Is PS4 Pro + + + + + + + Qt::Orientation::Vertical + + + + 20 + 40 + + + + - - - - - 0 - - - 0 - - - 0 - - - 0 - - - - - - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::MinimumExpanding - - - - 0 - 0 - - - - - - - - - - 12 - - - 12 - + + + + + Logger + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Log Type + + + + + + + async + + + + + sync + + + + + + + + + + + + + + 6 + + + 0 + + + + + + + Log Filter + + + + + + + + + + + + + + + + Qt::Orientation::Vertical + + + + 20 + 40 + + + + + + + - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::MinimumExpanding - - - - 0 - 0 - - - + + + + 0 + + + 0 + + + 0 + + + 0 + + + @@ -322,7 +305,7 @@ GPU - + @@ -601,208 +584,6 @@ - - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::MinimumExpanding - - - - 0 - 0 - - - - - - - - - General - - - - - - - - - - Emulator Settings - - - - - - Enable Fullscreen - - - - - - - Show Splash - - - - - - - Is PS4 Pro - - - - - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::MinimumExpanding - - - - 0 - 0 - - - - - - - - - - - - - - - Logger Settings - - - - - - - 0 - - - 0 - - - 0 - - - 0 - - - - - Log Type - - - - - - - async - - - - - sync - - - - - - - - - - - - - - 6 - - - 0 - - - - - - - Log Filter - - - - - - - - - - - - - - - - - - - - - - - Additional Settings - - - - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::MinimumExpanding - - - - 0 - 0 - - - - - - - - - - - - - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::MinimumExpanding - - - - 0 - 0 - - - - diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index a46cee313..ac0aface1 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -44,6 +44,9 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_ SDL_SetWindowFullscreen(window, Config::isFullscreenMode()); + SDL_InitSubSystem(SDL_INIT_GAMEPAD); + controller->TryOpenSDLController(); + #if defined(SDL_PLATFORM_WIN32) window_info.type = WindowSystemType::Windows; window_info.render_surface = SDL_GetPointerProperty(SDL_GetWindowProperties(window), @@ -93,6 +96,11 @@ void WindowSDL::waitEvent() { case SDL_EVENT_KEY_UP: onKeyPress(&event); break; + case SDL_EVENT_GAMEPAD_BUTTON_DOWN: + case SDL_EVENT_GAMEPAD_BUTTON_UP: + case SDL_EVENT_GAMEPAD_AXIS_MOTION: + onGamepadEvent(&event); + break; case SDL_EVENT_QUIT: is_open = false; break; @@ -282,4 +290,71 @@ void WindowSDL::onKeyPress(const SDL_Event* event) { } } +void WindowSDL::onGamepadEvent(const SDL_Event* event) { + using Libraries::Pad::OrbisPadButtonDataOffset; + + u32 button = 0; + Input::Axis axis = Input::Axis::AxisMax; + switch (event->type) { + case SDL_EVENT_GAMEPAD_BUTTON_DOWN: + case SDL_EVENT_GAMEPAD_BUTTON_UP: + button = sdlGamepadToOrbisButton(event->gbutton.button); + if (button != 0) { + controller->CheckButton(0, button, event->type == SDL_EVENT_GAMEPAD_BUTTON_DOWN); + } + break; + case SDL_EVENT_GAMEPAD_AXIS_MOTION: + axis = event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFTX ? Input::Axis::LeftX + : event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFTY ? Input::Axis::LeftY + : event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHTX ? Input::Axis::RightX + : event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHTY ? Input::Axis::RightY + : event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFT_TRIGGER ? Input::Axis::TriggerLeft + : event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHT_TRIGGER ? Input::Axis::TriggerRight + : Input::Axis::AxisMax; + if (axis != Input::Axis::AxisMax) { + controller->Axis(0, axis, Input::GetAxis(-0x8000, 0x8000, event->gaxis.value)); + } + break; + } +} + +int WindowSDL::sdlGamepadToOrbisButton(u8 button) { + using Libraries::Pad::OrbisPadButtonDataOffset; + + switch (button) { + case SDL_GAMEPAD_BUTTON_DPAD_DOWN: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_DOWN; + case SDL_GAMEPAD_BUTTON_DPAD_UP: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_UP; + case SDL_GAMEPAD_BUTTON_DPAD_LEFT: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_LEFT; + case SDL_GAMEPAD_BUTTON_DPAD_RIGHT: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_RIGHT; + case SDL_GAMEPAD_BUTTON_SOUTH: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_CROSS; + case SDL_GAMEPAD_BUTTON_NORTH: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TRIANGLE; + case SDL_GAMEPAD_BUTTON_WEST: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_SQUARE; + case SDL_GAMEPAD_BUTTON_EAST: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_CIRCLE; + case SDL_GAMEPAD_BUTTON_START: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_OPTIONS; + case SDL_GAMEPAD_BUTTON_TOUCHPAD: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TOUCH_PAD; + case SDL_GAMEPAD_BUTTON_BACK: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TOUCH_PAD; + case SDL_GAMEPAD_BUTTON_LEFT_SHOULDER: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L1; + case SDL_GAMEPAD_BUTTON_RIGHT_SHOULDER: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R1; + case SDL_GAMEPAD_BUTTON_LEFT_STICK: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L3; + case SDL_GAMEPAD_BUTTON_RIGHT_STICK: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R3; + default: + return 0; + } +} + } // namespace Frontend diff --git a/src/sdl_window.h b/src/sdl_window.h index 02d011285..cf6c37116 100644 --- a/src/sdl_window.h +++ b/src/sdl_window.h @@ -7,6 +7,7 @@ #include "common/types.h" struct SDL_Window; +struct SDL_Gamepad; union SDL_Event; namespace Input { @@ -66,6 +67,9 @@ public: private: void onResize(); void onKeyPress(const SDL_Event* event); + void onGamepadEvent(const SDL_Event* event); + + int sdlGamepadToOrbisButton(u8 button); private: s32 width; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index e0bc4b77d..37e91d3b1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -12,6 +12,25 @@ std::pair AtomicArgs(EmitContext& ctx) { return {scope, semantics}; } +Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id shift_id{ctx.ConstU32(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + +Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + auto& buffer = ctx.buffers[handle]; + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index); + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value); +} + Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& texture = ctx.images[handle & 0xFFFF]; @@ -21,6 +40,72 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va } } // Anonymous namespace +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitBufferAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { + // TODO + UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicInc32); +} + +Id EmitBufferAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) { + // TODO + UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicDec32); +} + +Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange); +} + Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e85272e96..0b02f3a37 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -128,11 +128,7 @@ Id EmitReadConst(EmitContext& ctx) { Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { auto& buffer = ctx.buffers[handle]; - if (!Sirit::ValidId(buffer.offset)) { - buffer.offset = ctx.GetBufferOffset(buffer.global_binding); - } - const Id offset_dwords{ctx.OpShiftRightLogical(ctx.U32[1], buffer.offset, ctx.ConstU32(2U))}; - index = ctx.OpIAdd(ctx.U32[1], index, offset_dwords); + index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; return ctx.OpLoad(buffer.data_types->Get(1), ptr); } @@ -218,6 +214,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { + if (attr == IR::Attribute::Position1) { + LOG_WARNING(Render_Vulkan, "Ignoring pos1 export"); + return; + } const Id pointer{OutputAttrPointer(ctx, attr, element)}; ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value)); } @@ -229,9 +229,6 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { template static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; - if (!Sirit::ValidId(buffer.offset)) { - buffer.offset = ctx.GetBufferOffset(buffer.global_binding); - } address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); if constexpr (N == 1) { @@ -326,7 +323,7 @@ static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offs static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) { auto& buffer = ctx.buffers[handle]; - const auto format = buffer.buffer.GetDataFmt(); + const auto format = buffer.dfmt; switch (format) { case AmdGpu::DataFormat::FormatInvalid: return ctx.f32_zero_value; @@ -351,7 +348,7 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com // uint index = address / 4; Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const u32 stride = buffer.buffer.GetStride(); + const u32 stride = buffer.stride; if (stride > 4) { const u32 index_offset = u32(AmdGpu::ComponentOffset(format, comp) / 32); if (index_offset > 0) { @@ -363,7 +360,7 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com const u32 bit_offset = AmdGpu::ComponentOffset(format, comp) % 32; const u32 bit_width = AmdGpu::ComponentBits(format, comp); - const auto num_format = buffer.buffer.GetNumberFmt(); + const auto num_format = buffer.nfmt; if (num_format == AmdGpu::NumberFormat::Float) { if (bit_width == 32) { return ctx.OpLoad(ctx.F32[1], ptr); @@ -386,19 +383,12 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com if (is_signed) { value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, ctx.ConstU32(bit_width)); - value = ctx.OpConvertSToF(ctx.F32[1], value); } else { value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset, ctx.ConstU32(bit_width)); - value = ctx.OpConvertUToF(ctx.F32[1], value); - } - } else { - if (is_signed) { - value = ctx.OpConvertSToF(ctx.F32[1], value); - } else { - value = ctx.OpConvertUToF(ctx.F32[1], value); } } + value = ctx.OpBitcast(ctx.F32[1], value); return ConvertValue(ctx, value, num_format, bit_width); } break; @@ -411,9 +401,6 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com template static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; - if (!Sirit::ValidId(buffer.offset)) { - buffer.offset = ctx.GetBufferOffset(buffer.global_binding); - } address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); if constexpr (N == 1) { return GetBufferFormatValue(ctx, handle, address, 0); @@ -445,9 +432,6 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad template static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) { auto& buffer = ctx.buffers[handle]; - if (!Sirit::ValidId(buffer.offset)) { - buffer.offset = ctx.GetBufferOffset(buffer.global_binding); - } address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); if constexpr (N == 1) { @@ -483,4 +467,97 @@ void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address EmitStoreBufferF32xN<1>(ctx, handle, address, value); } +static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat format, + u32 bit_width) { + switch (format) { + case AmdGpu::NumberFormat::Unorm: + return ctx.OpConvertFToU( + ctx.U32[1], ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width))))); + case AmdGpu::NumberFormat::Uint: + return ctx.OpBitcast(ctx.U32[1], value); + case AmdGpu::NumberFormat::Float: + return value; + default: + UNREACHABLE_MSG("Unsupported number fromat for conversion: {}", + magic_enum::enum_name(format)); + } +} + +template +static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address, Id value) { + auto& buffer = ctx.buffers[handle]; + const auto format = buffer.dfmt; + const auto num_format = buffer.nfmt; + + switch (format) { + case AmdGpu::DataFormat::FormatInvalid: + return; + case AmdGpu::DataFormat::Format8_8_8_8: + case AmdGpu::DataFormat::Format16: + case AmdGpu::DataFormat::Format32: + case AmdGpu::DataFormat::Format32_32: + case AmdGpu::DataFormat::Format32_32_32_32: { + ASSERT(N == AmdGpu::NumComponents(format)); + + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index); + + Id packed_value{}; + for (u32 i = 0; i < N; i++) { + const u32 bit_width = AmdGpu::ComponentBits(format, i); + const u32 bit_offset = AmdGpu::ComponentOffset(format, i) % 32; + + const Id comp{ConvertF32ToFormat( + ctx, N == 1 ? value : ctx.OpCompositeExtract(ctx.F32[1], value, i), num_format, + bit_width)}; + + if (bit_width == 32) { + if constexpr (N == 1) { + ctx.OpStore(ptr, comp); + } else { + const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, + ctx.u32_zero_value, index_i); + ctx.OpStore(ptr, comp); + } + } else { + if (i == 0) { + packed_value = comp; + } else { + packed_value = + ctx.OpBitFieldInsert(ctx.U32[1], packed_value, comp, + ctx.ConstU32(bit_offset), ctx.ConstU32(bit_width)); + } + + if (i == N - 1) { + ctx.OpStore(ptr, packed_value); + } + } + } + } break; + default: + UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format)); + } +} + +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferFormatF32xN<1>(ctx, handle, address, value); +} + +void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + EmitStoreBufferFormatF32xN<2>(ctx, handle, address, value); +} + +void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + EmitStoreBufferFormatF32xN<3>(ctx, handle, address, value); +} + +void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + EmitStoreBufferFormatF32xN<4>(ctx, handle, address, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 030d39485..5526e5411 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -17,86 +17,133 @@ struct ImageOperands { operands.push_back(value); } + void AddOffset(EmitContext& ctx, const IR::Value& offset, + bool can_use_runtime_offsets = false) { + if (offset.IsEmpty()) { + return; + } + if (offset.IsImmediate()) { + const s32 operand = offset.U32(); + Add(spv::ImageOperandsMask::ConstOffset, ctx.ConstS32(operand)); + return; + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.ConstS32(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()))); + return; + case IR::Opcode::CompositeConstructU32x3: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.ConstS32(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()), + static_cast(inst->Arg(2).U32()))); + return; + default: + break; + } + } + if (can_use_runtime_offsets) { + Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + } else { + LOG_WARNING(Render_Vulkan, + "Runtime offset provided to unsupported image sample instruction"); + } + } + spv::ImageOperandsMask mask{}; boost::container::static_vector operands; }; -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, - Id offset) { +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, + const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.Add(spv::ImageOperandsMask::Offset, offset); + operands.Add(spv::ImageOperandsMask::Bias, bias); + operands.AddOffset(ctx, offset); return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands); } -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, - Id offset) { +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, - spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); + ImageOperands operands; + operands.Add(spv::ImageOperandsMask::Lod, lod); + operands.AddOffset(ctx, offset); + return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, + operands.operands); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, const IR::Value& offset) { + Id bias, const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref); + ImageOperands operands; + operands.Add(spv::ImageOperandsMask::Bias, bias); + operands.AddOffset(ctx, offset); + return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask, + operands.operands); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, Id offset) { + Id lod, const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, - spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); + ImageOperands operands; + operands.AddOffset(ctx, offset); + operands.Add(spv::ImageOperandsMask::Lod, lod); + return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask, + operands.operands); } -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2) { +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); const u32 comp = inst->Flags().gather_comp.Value(); ImageOperands operands; - operands.Add(spv::ImageOperandsMask::Offset, offset); + operands.AddOffset(ctx, offset); return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask, operands.operands); } -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, - Id offset2, Id dref) { +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset, Id dref) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.Add(spv::ImageOperandsMask::Offset, offset); + operands.AddOffset(ctx, offset); return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask, operands.operands); } -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, - Id ms) { +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, + Id lod, Id ms) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); - if (Sirit::ValidId(lod)) { - return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, - spv::ImageOperandsMask::Lod, lod)); - } else { - return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords)); - } + ImageOperands operands; + operands.AddOffset(ctx, offset); + operands.Add(spv::ImageOperandsMask::Lod, lod); + return ctx.OpBitcast( + ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands)); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 51899eb4d..bc39bc0f3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -76,7 +76,22 @@ void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); @@ -93,18 +108,17 @@ Id EmitUndefU8(EmitContext& ctx); Id EmitUndefU16(EmitContext& ctx); Id EmitUndefU32(EmitContext& ctx); Id EmitUndefU64(EmitContext& ctx); -Id EmitLoadSharedU8(EmitContext& ctx, Id offset); -Id EmitLoadSharedS8(EmitContext& ctx, Id offset); -Id EmitLoadSharedU16(EmitContext& ctx, Id offset); -Id EmitLoadSharedS16(EmitContext& ctx, Id offset); Id EmitLoadSharedU32(EmitContext& ctx, Id offset); Id EmitLoadSharedU64(EmitContext& ctx, Id offset); Id EmitLoadSharedU128(EmitContext& ctx, Id offset); -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); @@ -357,19 +371,20 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitConvertU16U32(EmitContext& ctx, Id value); Id EmitConvertU32U16(EmitContext& ctx, Id value); -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, - Id offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, - Id offset); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, + const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, const IR::Value& offset); + Id bias, const IR::Value& offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, Id offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, - Id offset2, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, - Id ms); + Id lod, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, + Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 1582d9dd2..57ea476f1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -5,99 +5,25 @@ #include "shader_recompiler/backend/spirv/spirv_emit_context.h" namespace Shader::Backend::SPIRV { -namespace { -Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { - const Id shift_id{ctx.ConstU32(shift)}; - const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); -} -Id Word(EmitContext& ctx, Id offset) { +Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { const Id shift_id{ctx.ConstU32(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; return ctx.OpLoad(ctx.U32[1], pointer); } -std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { - const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))}; - const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))}; - const Id count_id{ctx.ConstU32(count)}; - return {bit, count_id}; -} -} // Anonymous namespace - -Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; - return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; - return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; - return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; - return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; - return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; - return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; - return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; - return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; - return ctx.OpLoad(ctx.U32[1], pointer); - } else { - return Word(ctx, offset); - } -} - Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; - return ctx.OpLoad(ctx.U32[2], pointer); - } else { - const Id shift_id{ctx.ConstU32(2U)}; - const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))}; - const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; - const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; - return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), - ctx.OpLoad(ctx.U32[1], rhs_pointer)); - } + const Id shift_id{ctx.ConstU32(2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); } Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; - return ctx.OpLoad(ctx.U32[4], pointer); - } const Id shift_id{ctx.ConstU32(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; std::array values{}; @@ -109,35 +35,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { return ctx.OpCompositeConstruct(ctx.U32[4], values); } -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; - ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); -} - -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { - const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; - ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); -} - void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { - Id pointer{}; - if (ctx.profile.support_explicit_workgroup_layout) { - pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); - } else { - const Id shift{ctx.ConstU32(2U)}; - const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; - pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); - } + const Id shift{ctx.ConstU32(2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); ctx.OpStore(pointer, value); } void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; - ctx.OpStore(pointer, value); - return; - } const Id shift{ctx.ConstU32(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))}; @@ -148,11 +53,6 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { } void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; - ctx.OpStore(pointer, value); - return; - } const Id shift{ctx.ConstU32(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; for (u32 i = 0; i < 4; ++i) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 891e41df7..3ed89692b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -6,7 +6,9 @@ namespace Shader::Backend::SPIRV { -void EmitPrologue(EmitContext& ctx) {} +void EmitPrologue(EmitContext& ctx) { + ctx.DefineBufferOffsets(); +} void EmitEpilogue(EmitContext& ctx) {} diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 61b55437d..d61e108f6 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -165,14 +165,18 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f throw InvalidArgument("Invalid attribute type {}", fmt); } -Id EmitContext::GetBufferOffset(u32 binding) { - const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4); - const u32 comp = (binding & 0xf) >> 2; - const u32 offset = (binding & 0x3) << 3; - const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), - push_data_block, ConstU32(half), ConstU32(comp))}; - const Id value{OpLoad(U32[1], ptr)}; - return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); +void EmitContext::DefineBufferOffsets() { + for (auto& buffer : buffers) { + const u32 binding = buffer.binding; + const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4); + const u32 comp = (binding & 0xf) >> 2; + const u32 offset = (binding & 0x3) << 3; + const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), + push_data_block, ConstU32(half), ConstU32(comp))}; + const Id value{OpLoad(U32[1], ptr)}; + buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); + buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); + } } Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { @@ -327,7 +331,9 @@ void EmitContext::DefineBuffers() { for (u32 i = 0; const auto& buffer : info.buffers) { const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32; const Id data_type = (*data_types)[1]; - const Id record_array_type{TypeArray(data_type, ConstU32(buffer.length))}; + const Id record_array_type{buffer.is_storage + ? TypeRuntimeArray(data_type) + : TypeArray(data_type, ConstU32(buffer.length))}; const Id struct_type{TypeStruct(record_array_type)}; if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) { Decorate(record_array_type, spv::Decoration::ArrayStride, 4); @@ -354,10 +360,12 @@ void EmitContext::DefineBuffers() { buffers.push_back({ .id = id, - .global_binding = binding++, + .binding = binding++, .data_types = data_types, .pointer_type = pointer_type, - .buffer = buffer.GetVsharp(info), + .dfmt = buffer.dfmt, + .nfmt = buffer.nfmt, + .stride = buffer.GetVsharp(info).GetStride(), }); interfaces.push_back(id); i++; @@ -389,6 +397,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::R16f; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { + return spv::ImageFormat::R16ui; + } if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 && image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::Rg16f; @@ -401,6 +413,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::Rgba16f; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { + return spv::ImageFormat::Rgba16; + } if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 && image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { return spv::ImageFormat::R8; @@ -507,43 +523,9 @@ void EmitContext::DefineSharedMemory() { if (info.shared_memory_size == 0) { info.shared_memory_size = DefaultSharedMemSize; } - const auto make{[&](Id element_type, u32 element_size) { - const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)}; - const Id array_type{TypeArray(element_type, ConstU32(num_elements))}; - Decorate(array_type, spv::Decoration::ArrayStride, element_size); - - const Id struct_type{TypeStruct(array_type)}; - MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); - Decorate(struct_type, spv::Decoration::Block); - - const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; - const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; - const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; - Decorate(variable, spv::Decoration::Aliased); - interfaces.push_back(variable); - - return std::make_tuple(variable, element_pointer, pointer); - }}; - if (profile.support_explicit_workgroup_layout) { - AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); - AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); - if (info.uses_shared_u8) { - AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); - std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); - } - if (info.uses_shared_u16) { - AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); - std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); - } - std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); - std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); - std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); - return; - } const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], ConstU32(num_elements))}; shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); - shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 0d090eb31..768b591f5 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -40,7 +40,7 @@ public: ~EmitContext(); Id Def(const IR::Value& value); - Id GetBufferOffset(u32 binding); + void DefineBufferOffsets(); [[nodiscard]] Id DefineInput(Id type, u32 location) { const Id input_id{DefineVar(type, spv::StorageClass::Input)}; @@ -203,10 +203,13 @@ public: struct BufferDefinition { Id id; Id offset; - u32 global_binding; + Id offset_dwords; + u32 binding; const VectorIds* data_types; Id pointer_type; - AmdGpu::Buffer buffer; + AmdGpu::DataFormat dfmt; + AmdGpu::NumberFormat nfmt; + u32 stride; }; u32& binding; diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 2925c05db..3faf8665b 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -35,15 +35,23 @@ static IR::Condition MakeCondition(Opcode opcode) { return IR::Condition::Execz; case Opcode::S_CBRANCH_EXECNZ: return IR::Condition::Execnz; + case Opcode::S_AND_SAVEEXEC_B64: + case Opcode::S_ANDN2_B64: + case Opcode::V_CMPX_NE_U32: + return IR::Condition::Execnz; default: return IR::Condition::True; } } +static constexpr size_t LabelReserveSize = 32; + CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_list_) : block_pool{block_pool_}, inst_list{inst_list_} { index_to_pc.resize(inst_list.size() + 1); + labels.reserve(LabelReserveSize); EmitLabels(); + EmitDivergenceLabels(); EmitBlocks(); LinkBlocks(); } @@ -51,14 +59,7 @@ CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_l void CFG::EmitLabels() { // Always set a label at entry point. u32 pc = 0; - labels.push_back(pc); - - const auto add_label = [this](u32 address) { - const auto it = std::ranges::find(labels, address); - if (it == labels.end()) { - labels.push_back(address); - } - }; + AddLabel(pc); // Iterate instruction list and add labels to branch targets. for (u32 i = 0; i < inst_list.size(); i++) { @@ -66,15 +67,15 @@ void CFG::EmitLabels() { const GcnInst inst = inst_list[i]; if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); - add_label(target); + AddLabel(target); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; - add_label(true_label); - add_label(false_label); + AddLabel(true_label); + AddLabel(false_label); } else if (inst.opcode == Opcode::S_ENDPGM) { const u32 next_label = pc + inst.length; - add_label(next_label); + AddLabel(next_label); } pc += inst.length; } @@ -84,16 +85,70 @@ void CFG::EmitLabels() { std::ranges::sort(labels); } -void CFG::EmitBlocks() { - const auto get_index = [this](Label label) -> size_t { - if (label == 0) { - return 0ULL; - } - const auto it_index = std::ranges::lower_bound(index_to_pc, label); - ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back()); - return std::distance(index_to_pc.begin(), it_index); +void CFG::EmitDivergenceLabels() { + const auto is_open_scope = [](const GcnInst& inst) { + // An open scope instruction is an instruction that modifies EXEC + // but also saves the previous value to restore later. This indicates + // we are entering a scope. + return inst.opcode == Opcode::S_AND_SAVEEXEC_B64 || + // While this instruction does not save EXEC it is often used paired + // with SAVEEXEC to mask the threads that didn't pass the condition + // of initial branch. + inst.opcode == Opcode::S_ANDN2_B64 || inst.opcode == Opcode::V_CMPX_NE_U32; + }; + const auto is_close_scope = [](const GcnInst& inst) { + // Closing an EXEC scope can be either a branch instruction + // (typical case when S_AND_SAVEEXEC_B64 is right before a branch) + // or by a move instruction to EXEC that restores the backup. + return (inst.opcode == Opcode::S_MOV_B64 && inst.dst[0].field == OperandField::ExecLo) || + // Sometimes compiler might insert instructions between the SAVEEXEC and the branch. + // Those instructions need to be wrapped in the condition as well so allow branch + // as end scope instruction. + inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ANDN2_B64; }; + // Since we will be adding new labels, avoid iterating those as well. + const size_t end_size = labels.size(); + for (u32 l = 0; l < end_size; l++) { + const Label start = labels[l]; + // Stop if we reached end of existing labels. + if (l == end_size - 1) { + break; + } + const Label end = labels[l + 1]; + const size_t end_index = GetIndex(end); + + s32 curr_begin = -1; + for (size_t index = GetIndex(start); index < end_index; index++) { + const auto& inst = inst_list[index]; + if (is_close_scope(inst) && curr_begin != -1) { + // If there are no instructions inside scope don't do anything. + if (index - curr_begin == 1) { + curr_begin = -1; + continue; + } + // Add a label to the instruction right after the open scope call. + // It is the start of a new basic block. + const auto& save_inst = inst_list[curr_begin]; + const Label label = index_to_pc[curr_begin] + save_inst.length; + AddLabel(label); + // Add a label to the close scope instruction as well. + AddLabel(index_to_pc[index]); + // Reset scope begin. + curr_begin = -1; + } + // Mark a potential start of an exec scope. + if (is_open_scope(inst)) { + curr_begin = index; + } + } + } + + // Sort labels to make sure block insertion is correct. + std::ranges::sort(labels); +} + +void CFG::EmitBlocks() { for (auto it = labels.begin(); it != labels.end(); it++) { const Label start = *it; const auto next_it = std::next(it); @@ -102,8 +157,10 @@ void CFG::EmitBlocks() { // Last label is special. return; } + // The end label is the start instruction of next block. + // The end instruction of this block is the previous one. const Label end = *next_it; - const size_t end_index = get_index(end) - 1; + const size_t end_index = GetIndex(end) - 1; const auto& end_inst = inst_list[end_index]; // Insert block between the labels using the last instruction @@ -111,7 +168,7 @@ void CFG::EmitBlocks() { Block* block = block_pool.Create(); block->begin = start; block->end = end; - block->begin_index = get_index(start); + block->begin_index = GetIndex(start); block->end_index = end_index; block->end_inst = end_inst; block->cond = MakeCondition(end_inst.opcode); @@ -126,8 +183,26 @@ void CFG::LinkBlocks() { return &*it; }; - for (auto& block : blocks) { + for (auto it = blocks.begin(); it != blocks.end(); it++) { + auto& block = *it; const auto end_inst{block.end_inst}; + // Handle divergence block inserted here. + if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 || + end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.opcode == Opcode::V_CMPX_NE_U32) { + // Blocks are stored ordered by address in the set + auto next_it = std::next(it); + auto* target_block = &(*next_it); + ++target_block->num_predecessors; + block.branch_true = target_block; + + auto merge_it = std::next(next_it); + auto* merge_block = &(*merge_it); + ++merge_block->num_predecessors; + block.branch_false = merge_block; + block.end_class = EndClass::Branch; + continue; + } + // If the block doesn't end with a branch we simply // need to link with the next block. if (!end_inst.IsTerminateInstruction()) { diff --git a/src/shader_recompiler/frontend/control_flow_graph.h b/src/shader_recompiler/frontend/control_flow_graph.h index ebe614ee5..d98d4b05d 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.h +++ b/src/shader_recompiler/frontend/control_flow_graph.h @@ -3,11 +3,13 @@ #pragma once +#include #include #include #include #include +#include "common/assert.h" #include "common/object_pool.h" #include "common/types.h" #include "shader_recompiler/frontend/instruction.h" @@ -55,9 +57,26 @@ public: private: void EmitLabels(); + void EmitDivergenceLabels(); void EmitBlocks(); void LinkBlocks(); + void AddLabel(Label address) { + const auto it = std::ranges::find(labels, address); + if (it == labels.end()) { + labels.push_back(address); + } + }; + + size_t GetIndex(Label label) { + if (label == 0) { + return 0ULL; + } + const auto it_index = std::ranges::lower_bound(index_to_pc, label); + ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back()); + return std::distance(index_to_pc.begin(), it_index); + }; + public: Common::ObjectPool& block_pool; std::span inst_list; diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 532e024e2..b7b5aa138 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -25,6 +25,18 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_WRITE(32, false, true, inst); case Opcode::DS_WRITE2_B64: return DS_WRITE(64, false, true, inst); + case Opcode::DS_ADD_U32: + return DS_ADD_U32(inst, false); + case Opcode::DS_MIN_U32: + return DS_MIN_U32(inst, false); + case Opcode::DS_MAX_U32: + return DS_MAX_U32(inst, false); + case Opcode::DS_ADD_RTN_U32: + return DS_ADD_U32(inst, true); + case Opcode::DS_MIN_RTN_U32: + return DS_MIN_U32(inst, true); + case Opcode::DS_MAX_RTN_U32: + return DS_MAX_U32(inst, true); default: LogMissingOpcode(inst); } @@ -110,6 +122,42 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI } } +void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + void Translator::S_BARRIER() { ir.Barrier(); } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 795b148d6..812d93bae 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -29,6 +29,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_CMP(ConditionOp::LG, true, inst); case Opcode::S_CMP_GT_I32: return S_CMP(ConditionOp::GT, true, inst); + case Opcode::S_CMP_LE_I32: + return S_CMP(ConditionOp::LE, true, inst); case Opcode::S_CMP_GE_I32: return S_CMP(ConditionOp::GE, true, inst); case Opcode::S_CMP_EQ_I32: diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index b295c1bef..4070560ae 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -64,110 +64,205 @@ void Translator::EmitPrologue() { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2)); - ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0)); - ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1)); - ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); + if (info.tgid_enable[0]) { + ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0)); + } + if (info.tgid_enable[1]) { + ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1)); + } + if (info.tgid_enable[2]) { + ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); + } break; default: throw NotImplementedException("Unknown shader stage"); } } -template <> -IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { - IR::U32F32 value{}; +template +T Translator::GetSrc(const InstOperand& operand) { + constexpr bool is_float = std::is_same_v; - const bool is_float = operand.type == ScalarType::Float32 || force_flt; + const auto get_imm = [&](auto value) -> T { + if constexpr (is_float) { + return ir.Imm32(std::bit_cast(value)); + } else { + return ir.Imm32(std::bit_cast(value)); + } + }; + + T value{}; switch (operand.field) { case OperandField::ScalarGPR: - if (is_float) { - value = ir.GetScalarReg(IR::ScalarReg(operand.code)); - } else { - value = ir.GetScalarReg(IR::ScalarReg(operand.code)); - } + value = ir.GetScalarReg(IR::ScalarReg(operand.code)); break; case OperandField::VectorGPR: - if (is_float) { - value = ir.GetVectorReg(IR::VectorReg(operand.code)); - } else { - value = ir.GetVectorReg(IR::VectorReg(operand.code)); - } + value = ir.GetVectorReg(IR::VectorReg(operand.code)); break; case OperandField::ConstZero: - if (is_float) { - value = ir.Imm32(0.f); - } else { - value = ir.Imm32(0U); - } + value = get_imm(0U); break; case OperandField::SignedConstIntPos: - ASSERT(!force_flt); - value = ir.Imm32(operand.code - SignedConstIntPosMin + 1); + value = get_imm(operand.code - SignedConstIntPosMin + 1); break; case OperandField::SignedConstIntNeg: - ASSERT(!force_flt); - value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1); + value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1); break; case OperandField::LiteralConst: - if (is_float) { - value = ir.Imm32(std::bit_cast(operand.code)); - } else { - value = ir.Imm32(operand.code); - } + value = get_imm(operand.code); break; case OperandField::ConstFloatPos_1_0: - if (is_float) { - value = ir.Imm32(1.f); - } else { - value = ir.Imm32(std::bit_cast(1.f)); - } + value = get_imm(1.f); break; case OperandField::ConstFloatPos_0_5: - value = ir.Imm32(0.5f); + value = get_imm(0.5f); break; case OperandField::ConstFloatPos_2_0: - value = ir.Imm32(2.0f); + value = get_imm(2.0f); break; case OperandField::ConstFloatPos_4_0: - value = ir.Imm32(4.0f); + value = get_imm(4.0f); break; case OperandField::ConstFloatNeg_0_5: - value = ir.Imm32(-0.5f); + value = get_imm(-0.5f); break; case OperandField::ConstFloatNeg_1_0: - if (is_float) { - value = ir.Imm32(-1.0f); - } else { - value = ir.Imm32(std::bit_cast(-1.0f)); - } + value = get_imm(-1.0f); break; case OperandField::ConstFloatNeg_2_0: - value = ir.Imm32(-2.0f); + value = get_imm(-2.0f); break; case OperandField::ConstFloatNeg_4_0: - value = ir.Imm32(-4.0f); + value = get_imm(-4.0f); break; case OperandField::VccLo: - if (force_flt) { + if constexpr (is_float) { value = ir.BitCast(ir.GetVccLo()); } else { value = ir.GetVccLo(); } break; case OperandField::VccHi: - if (force_flt) { + if constexpr (is_float) { value = ir.BitCast(ir.GetVccHi()); } else { value = ir.GetVccHi(); } break; case OperandField::M0: - return m0_value; + if constexpr (is_float) { + UNREACHABLE(); + } else { + return m0_value; + } default: UNREACHABLE(); } - if (is_float) { + if constexpr (is_float) { + if (operand.input_modifier.abs) { + value = ir.FPAbs(value); + } + if (operand.input_modifier.neg) { + value = ir.FPNeg(value); + } + } else { + if (operand.input_modifier.abs) { + LOG_WARNING(Render_Vulkan, "Input abs modifier on integer instruction"); + } + if (operand.input_modifier.neg) { + UNREACHABLE(); + } + } + return value; +} + +template IR::U32 Translator::GetSrc(const InstOperand&); +template IR::F32 Translator::GetSrc(const InstOperand&); + +template +T Translator::GetSrc64(const InstOperand& operand) { + constexpr bool is_float = std::is_same_v; + + const auto get_imm = [&](auto value) -> T { + if constexpr (is_float) { + return ir.Imm64(std::bit_cast(value)); + } else { + return ir.Imm64(std::bit_cast(value)); + } + }; + + T value{}; + switch (operand.field) { + case OperandField::ScalarGPR: { + const auto value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); + const auto value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); + if constexpr (is_float) { + UNREACHABLE(); + } else { + value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); + } + break; + } + case OperandField::VectorGPR: { + const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); + const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); + if constexpr (is_float) { + UNREACHABLE(); + } else { + value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); + } + break; + } + case OperandField::ConstZero: + value = get_imm(0ULL); + break; + case OperandField::SignedConstIntPos: + value = get_imm(s64(operand.code) - SignedConstIntPosMin + 1); + break; + case OperandField::SignedConstIntNeg: + value = get_imm(-s64(operand.code) + SignedConstIntNegMin - 1); + break; + case OperandField::LiteralConst: + value = get_imm(u64(operand.code)); + break; + case OperandField::ConstFloatPos_1_0: + value = get_imm(1.0); + break; + case OperandField::ConstFloatPos_0_5: + value = get_imm(0.5); + break; + case OperandField::ConstFloatPos_2_0: + value = get_imm(2.0); + break; + case OperandField::ConstFloatPos_4_0: + value = get_imm(4.0); + break; + case OperandField::ConstFloatNeg_0_5: + value = get_imm(-0.5); + break; + case OperandField::ConstFloatNeg_1_0: + value = get_imm(-1.0); + break; + case OperandField::ConstFloatNeg_2_0: + value = get_imm(-2.0); + break; + case OperandField::ConstFloatNeg_4_0: + value = get_imm(-4.0); + break; + case OperandField::VccLo: + if constexpr (is_float) { + UNREACHABLE(); + } else { + value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi())); + } + break; + case OperandField::VccHi: + default: + UNREACHABLE(); + } + + if constexpr (is_float) { if (operand.input_modifier.abs) { value = ir.FPAbs(value); } @@ -178,148 +273,8 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { return value; } -template <> -IR::U32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { - return GetSrc(operand, force_flt); -} - -template <> -IR::F32 Translator::GetSrc(const InstOperand& operand, bool) { - return GetSrc(operand, true); -} - -template <> -IR::U64F64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) { - IR::Value value_hi{}; - IR::Value value_lo{}; - - bool immediate = false; - const bool is_float = operand.type == ScalarType::Float64 || force_flt; - switch (operand.field) { - case OperandField::ScalarGPR: - if (is_float) { - value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); - value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); - } else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) { - value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); - value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); - } else { - UNREACHABLE(); - } - break; - case OperandField::VectorGPR: - if (is_float) { - value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); - value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); - } else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) { - value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); - value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); - } else { - UNREACHABLE(); - } - break; - case OperandField::ConstZero: - immediate = true; - if (force_flt) { - value_lo = ir.Imm64(0.0); - } else { - value_lo = ir.Imm64(u64(0U)); - } - break; - case OperandField::SignedConstIntPos: - ASSERT(!force_flt); - immediate = true; - value_lo = ir.Imm64(s64(operand.code) - SignedConstIntPosMin + 1); - break; - case OperandField::SignedConstIntNeg: - ASSERT(!force_flt); - immediate = true; - value_lo = ir.Imm64(-s64(operand.code) + SignedConstIntNegMin - 1); - break; - case OperandField::LiteralConst: - immediate = true; - if (force_flt) { - UNREACHABLE(); // There is a literal double? - } else { - value_lo = ir.Imm64(u64(operand.code)); - } - break; - case OperandField::ConstFloatPos_1_0: - immediate = true; - if (force_flt) { - value_lo = ir.Imm64(1.0); - } else { - value_lo = ir.Imm64(std::bit_cast(f64(1.0))); - } - break; - case OperandField::ConstFloatPos_0_5: - immediate = true; - value_lo = ir.Imm64(0.5); - break; - case OperandField::ConstFloatPos_2_0: - immediate = true; - value_lo = ir.Imm64(2.0); - break; - case OperandField::ConstFloatPos_4_0: - immediate = true; - value_lo = ir.Imm64(4.0); - break; - case OperandField::ConstFloatNeg_0_5: - immediate = true; - value_lo = ir.Imm64(-0.5); - break; - case OperandField::ConstFloatNeg_1_0: - immediate = true; - value_lo = ir.Imm64(-1.0); - break; - case OperandField::ConstFloatNeg_2_0: - immediate = true; - value_lo = ir.Imm64(-2.0); - break; - case OperandField::ConstFloatNeg_4_0: - immediate = true; - value_lo = ir.Imm64(-4.0); - break; - case OperandField::VccLo: { - value_lo = ir.GetVccLo(); - value_hi = ir.GetVccHi(); - } break; - case OperandField::VccHi: - UNREACHABLE(); - default: - UNREACHABLE(); - } - - IR::Value value; - - if (immediate) { - value = value_lo; - } else if (is_float) { - throw NotImplementedException("required OpPackDouble2x32 implementation"); - } else { - IR::Value packed = ir.CompositeConstruct(value_lo, value_hi); - value = ir.PackUint2x32(packed); - } - - if (is_float) { - if (operand.input_modifier.abs) { - value = ir.FPAbs(IR::F32F64(value)); - } - if (operand.input_modifier.neg) { - value = ir.FPNeg(IR::F32F64(value)); - } - } - return IR::U64F64(value); -} - -template <> -IR::U64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) { - return GetSrc64(operand, force_flt); -} -template <> -IR::F64 Translator::GetSrc64(const InstOperand& operand, bool) { - return GetSrc64(operand, true); -} +template IR::U64 Translator::GetSrc64(const InstOperand&); +template IR::F64 Translator::GetSrc64(const InstOperand&); void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { IR::U32F32 result = value; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index fe4457d27..009acabdf 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -186,7 +186,8 @@ public: // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); - void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); + void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); + void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst); // Vector interpolation void V_INTERP_P2_F32(const GcnInst& inst); @@ -196,6 +197,9 @@ public: void DS_SWIZZLE_B32(const GcnInst& inst); void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); + void DS_ADD_U32(const GcnInst& inst, bool rtn); + void DS_MIN_U32(const GcnInst& inst, bool rtn); + void DS_MAX_U32(const GcnInst& inst, bool rtn); void V_READFIRSTLANE_B32(const GcnInst& inst); void V_READLANE_B32(const GcnInst& inst); void V_WRITELANE_B32(const GcnInst& inst); @@ -211,10 +215,10 @@ public: void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst); private: - template - [[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false); - template - [[nodiscard]] T GetSrc64(const InstOperand& operand, bool flt_zero = false); + template + [[nodiscard]] T GetSrc(const InstOperand& operand); + template + [[nodiscard]] T GetSrc64(const InstOperand& operand); void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 89428c44f..1bbc3c162 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" -#include "shader_recompiler/profile.h" namespace Shader::Gcn { @@ -312,7 +311,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { } void Translator::V_MOV(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0])); + SetDst(inst.dst[0], GetSrc(inst.src[0])); } void Translator::V_SAD(const GcnInst& inst) { @@ -321,14 +320,14 @@ void Translator::V_SAD(const GcnInst& inst) { } void Translator::V_MAC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true), - GetSrc(inst.dst[0], true))); + SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0]), GetSrc(inst.src[1]), + GetSrc(inst.dst[0]))); } void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::Value vec_f32 = - ir.CompositeConstruct(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)); + ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); } @@ -339,13 +338,13 @@ void Translator::V_CVT_F32_F16(const GcnInst& inst) { } void Translator::V_CVT_F16_F32(const GcnInst& inst) { - const IR::F32 src0 = GetSrc(inst.src[0], true); + const IR::F32 src0 = GetSrc(inst.src[0]); const IR::F16 src0fp16 = ir.FPConvert(16, src0); SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast(src0fp16))); } void Translator::V_MUL_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); + SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); } void Translator::V_CNDMASK_B32(const GcnInst& inst) { @@ -354,24 +353,8 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) { const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR ? ir.GetThreadBitScalarReg(flag_reg) : ir.GetVcc(); - - // We can treat the instruction as integer most of the time, but when a source is - // a floating point constant we will force the other as float for better readability - // The other operand is also higly likely to be float as well. - const auto is_float_const = [](OperandField field) { - return field >= OperandField::ConstFloatPos_0_5 && field <= OperandField::ConstFloatNeg_4_0; - }; - const bool has_flt_source = - is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field); - IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source); - IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source); - if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) { - src1 = ir.BitCast(src1); - } - if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) { - src0 = ir.BitCast(src0); - } - const IR::Value result = ir.Select(flag, src1, src0); + const IR::Value result = + ir.Select(flag, GetSrc(inst.src[1]), GetSrc(inst.src[0])); ir.SetVectorReg(dst_reg, IR::U32F32{result}); } @@ -448,21 +431,21 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) { } void Translator::V_MAD_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); } void Translator::V_FRACT_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; ir.SetVectorReg(dst_reg, ir.Fract(src0)); } void Translator::V_ADD_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPAdd(src0, src1)); } @@ -476,9 +459,9 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) { } void Translator::V_MED3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2); SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx)); } @@ -492,32 +475,32 @@ void Translator::V_MED3_I32(const GcnInst& inst) { } void Translator::V_FLOOR_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; ir.SetVectorReg(dst_reg, ir.FPFloor(src0)); } void Translator::V_SUB_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPSub(src0, src1)); } void Translator::V_RCP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPRecip(src0)); } void Translator::V_FMA_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); } void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; const IR::U1 result = [&] { switch (op) { case ConditionOp::F: @@ -557,8 +540,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { } void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy)); } @@ -569,40 +552,40 @@ void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) { } void Translator::V_RSQ_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); } void Translator::V_SIN_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPSin(src0)); } void Translator::V_LOG_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPLog2(src0)); } void Translator::V_EXP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPExp2(src0)); } void Translator::V_SQRT_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPSqrt(src0)); } void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy)); } void Translator::V_MIN3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2))); } @@ -614,9 +597,9 @@ void Translator::V_MIN3_I32(const GcnInst& inst) { } void Translator::V_MADMK_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 k{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 k{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPFma(src0, k, src1)); } @@ -625,25 +608,25 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) { } void Translator::V_CUBESC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0], true)); + SetDst(inst.dst[0], GetSrc(inst.src[0])); } void Translator::V_CUBETC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[1], true)); + SetDst(inst.dst[0], GetSrc(inst.src[1])); } void Translator::V_CUBEID_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[2], true)); + SetDst(inst.dst[0], GetSrc(inst.src[2])); } void Translator::V_CVT_U32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertFToU(32, src0)); } void Translator::V_SUBREV_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPSub(src1, src0)); } @@ -727,9 +710,17 @@ void Translator::V_SAD_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src2{GetSrc(inst.src[2])}; - const IR::U32 max{ir.IMax(src0, src1, false)}; - const IR::U32 min{ir.IMin(src0, src1, false)}; - SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2)); + IR::U32 result; + if (src0.IsImmediate() && src0.U32() == 0U) { + result = src1; + } else if (src1.IsImmediate() && src1.U32() == 0U) { + result = src0; + } else { + const IR::U32 max{ir.IMax(src0, src1, false)}; + const IR::U32 min{ir.IMin(src0, src1, false)}; + result = ir.ISub(max, min); + } + SetDst(inst.dst[0], ir.IAdd(result, src2)); } void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { @@ -783,7 +774,7 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) { } void Translator::V_RNDNE_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPRoundEven(src0)); } @@ -794,14 +785,14 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) { } void Translator::V_COS_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPCos(src0)); } void Translator::V_MAX3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2))); } @@ -813,7 +804,7 @@ void Translator::V_MAX3_U32(const GcnInst& inst) { } void Translator::V_CVT_I32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertFToS(32, src0)); } @@ -830,12 +821,12 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) { } void Translator::V_TRUNC_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPTrunc(src0)); } void Translator::V_CEIL_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPCeil(src0)); } @@ -899,18 +890,18 @@ void Translator::V_BFREV_B32(const GcnInst& inst) { } void Translator::V_LDEXP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPLdexp(src0, src1)); } void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0))); } void Translator::V_CMP_CLASS_F32(const GcnInst& inst) { - const IR::F32F64 src0{GetSrc(inst.src[0])}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; IR::U1 value; if (src1.IsImmediate()) { diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 3c6dfbda4..08674fa2d 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -53,6 +53,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::IMAGE_GET_RESINFO: return IMAGE_GET_RESINFO(inst); + // Buffer load operations case Opcode::TBUFFER_LOAD_FORMAT_X: return BUFFER_LOAD_FORMAT(1, true, true, inst); case Opcode::TBUFFER_LOAD_FORMAT_XY: @@ -61,6 +62,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_LOAD_FORMAT(3, true, true, inst); case Opcode::TBUFFER_LOAD_FORMAT_XYZW: return BUFFER_LOAD_FORMAT(4, true, true, inst); + case Opcode::BUFFER_LOAD_FORMAT_X: return BUFFER_LOAD_FORMAT(1, false, true, inst); case Opcode::BUFFER_LOAD_FORMAT_XY: @@ -69,6 +71,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_LOAD_FORMAT(3, false, true, inst); case Opcode::BUFFER_LOAD_FORMAT_XYZW: return BUFFER_LOAD_FORMAT(4, false, true, inst); + case Opcode::BUFFER_LOAD_DWORD: return BUFFER_LOAD_FORMAT(1, false, false, inst); case Opcode::BUFFER_LOAD_DWORDX2: @@ -77,16 +80,34 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_LOAD_FORMAT(3, false, false, inst); case Opcode::BUFFER_LOAD_DWORDX4: return BUFFER_LOAD_FORMAT(4, false, false, inst); + + // Buffer store operations case Opcode::BUFFER_STORE_FORMAT_X: - case Opcode::BUFFER_STORE_DWORD: - return BUFFER_STORE_FORMAT(1, false, inst); - case Opcode::BUFFER_STORE_DWORDX2: - return BUFFER_STORE_FORMAT(2, false, inst); - case Opcode::BUFFER_STORE_DWORDX3: - return BUFFER_STORE_FORMAT(3, false, inst); + return BUFFER_STORE_FORMAT(1, false, true, inst); + case Opcode::BUFFER_STORE_FORMAT_XY: + return BUFFER_STORE_FORMAT(2, false, true, inst); + case Opcode::BUFFER_STORE_FORMAT_XYZ: + return BUFFER_STORE_FORMAT(3, false, true, inst); case Opcode::BUFFER_STORE_FORMAT_XYZW: + return BUFFER_STORE_FORMAT(4, false, true, inst); + + case Opcode::TBUFFER_STORE_FORMAT_X: + return BUFFER_STORE_FORMAT(1, true, true, inst); + case Opcode::TBUFFER_STORE_FORMAT_XYZ: + return BUFFER_STORE_FORMAT(3, true, true, inst); + + case Opcode::BUFFER_STORE_DWORD: + return BUFFER_STORE_FORMAT(1, false, false, inst); + case Opcode::BUFFER_STORE_DWORDX2: + return BUFFER_STORE_FORMAT(2, false, false, inst); + case Opcode::BUFFER_STORE_DWORDX3: + return BUFFER_STORE_FORMAT(3, false, false, inst); case Opcode::BUFFER_STORE_DWORDX4: - return BUFFER_STORE_FORMAT(4, false, inst); + return BUFFER_STORE_FORMAT(4, false, false, inst); + + // Buffer atomic operations + case Opcode::BUFFER_ATOMIC_ADD: + return BUFFER_ATOMIC(AtomicOp::Add, inst); default: LogMissingOpcode(inst); } @@ -135,8 +156,8 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction // Set Architecture - const IR::Value offset = - flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{}; + const IR::U32 offset = + flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::U32{}; const IR::F32 bias = flags.test(MimgModifier::LodBias) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; const IR::F32 dref = @@ -168,18 +189,17 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { - const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; if (!flags.test(MimgModifier::Pcf)) { if (explicit_lod) { - return ir.ImageSampleExplicitLod(handle, body, lod, offset, info); + return ir.ImageSampleExplicitLod(handle, body, offset, info); } else { - return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, info); + return ir.ImageSampleImplicitLod(handle, body, bias, offset, info); } } if (explicit_lod) { - return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, info); + return ir.ImageSampleDrefExplicitLod(handle, body, dref, offset, info); } - return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, info); + return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, info); }(); for (u32 i = 0; i < 4; i++) { @@ -251,10 +271,10 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { const IR::Value texel = [&]() -> IR::Value { const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; if (!flags.test(MimgModifier::Pcf)) { - return ir.ImageGather(handle, body, offset, {}, info); + return ir.ImageGather(handle, body, offset, info); } ASSERT(mimg.dmask & 1); // should be always 1st (R) component - return ir.ImageGatherDref(handle, body, offset, {}, dref, info); + return ir.ImageGatherDref(handle, body, offset, dref, info); }(); // For gather4 instructions dmask selects which component to read and must have @@ -360,7 +380,8 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma } } -void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) { +void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, + const GcnInst& inst) { const auto& mtbuf = inst.control.mtbuf; const IR::VectorReg vaddr{inst.src[0].code}; const IR::ScalarReg sharp{inst.src[2].code * 4}; @@ -411,7 +432,65 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - ir.StoreBuffer(num_dwords, handle, address, value, info); + if (is_format) { + ir.StoreBufferFormat(num_dwords, handle, address, value, info); + } else { + ir.StoreBuffer(num_dwords, handle, address, value, info); + } +} + +// TODO: U64 +void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { + const auto& mubuf = inst.control.mubuf; + const IR::VectorReg vaddr{inst.src[0].code}; + const IR::VectorReg vdata{inst.src[1].code}; + const IR::ScalarReg srsrc{inst.src[2].code * 4}; + const IR::U32 soffset{GetSrc(inst.src[3])}; + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); + + IR::BufferInstInfo info{}; + info.index_enable.Assign(mubuf.idxen); + info.inst_offset.Assign(mubuf.offset); + info.offset_enable.Assign(mubuf.offen); + + IR::Value vdata_val = ir.GetVectorReg(vdata); + const IR::U32 address = ir.GetVectorReg(vaddr); + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), + ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); + + const IR::Value original_val = [&] { + switch (op) { + case AtomicOp::Swap: + return ir.BufferAtomicExchange(handle, address, vdata_val, info); + case AtomicOp::Add: + return ir.BufferAtomicIAdd(handle, address, vdata_val, info); + case AtomicOp::Smin: + return ir.BufferAtomicIMin(handle, address, vdata_val, true, info); + case AtomicOp::Umin: + return ir.BufferAtomicIMin(handle, address, vdata_val, false, info); + case AtomicOp::Smax: + return ir.BufferAtomicIMax(handle, address, vdata_val, true, info); + case AtomicOp::Umax: + return ir.BufferAtomicIMax(handle, address, vdata_val, false, info); + case AtomicOp::And: + return ir.BufferAtomicAnd(handle, address, vdata_val, info); + case AtomicOp::Or: + return ir.BufferAtomicOr(handle, address, vdata_val, info); + case AtomicOp::Xor: + return ir.BufferAtomicXor(handle, address, vdata_val, info); + case AtomicOp::Inc: + return ir.BufferAtomicInc(handle, address, vdata_val, info); + case AtomicOp::Dec: + return ir.BufferAtomicDec(handle, address, vdata_val, info); + default: + UNREACHABLE(); + } + }(); + + if (mubuf.glc) { + ir.SetVectorReg(vdata, IR::U32{original_val}); + } } void Translator::IMAGE_GET_LOD(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 3f95ff7ac..2c67411ff 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -105,7 +105,7 @@ struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - auto format(const Shader::IR::Attribute& attribute, format_context& ctx) const { + auto format(const Shader::IR::Attribute attribute, format_context& ctx) const { return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute)); } }; diff --git a/src/shader_recompiler/ir/condition.h b/src/shader_recompiler/ir/condition.h index 4b60be674..da986c48d 100644 --- a/src/shader_recompiler/ir/condition.h +++ b/src/shader_recompiler/ir/condition.h @@ -44,7 +44,7 @@ constexpr std::string_view NameOf(Condition condition) { template <> struct fmt::formatter : formatter { - auto format(const Shader::IR::Condition& cond, format_context& ctx) const { + auto format(const Shader::IR::Condition cond, format_context& ctx) const { return formatter::format(NameOf(cond), ctx); } }; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 03404aca0..3ae068072 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -16,18 +16,6 @@ namespace { UNREACHABLE_MSG("Invalid type = {}, functionName = {}, line = {}", u32(type), functionName, lineNumber); } - -Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { - if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { - return ir.CompositeConstruct(bias_lod, lod_clamp); - } else if (!bias_lod.IsEmpty()) { - return bias_lod; - } else if (!lod_clamp.IsEmpty()) { - return lod_clamp; - } else { - return Value{}; - } -} } // Anonymous namespace U1 IREmitter::Imm1(bool value) const { @@ -271,10 +259,6 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { switch (bit_size) { - case 8: - return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); - case 16: - return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); case 32: return Inst(Opcode::LoadSharedU32, offset); case 64: @@ -288,12 +272,6 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) { switch (bit_size) { - case 8: - Inst(Opcode::WriteSharedU8, offset, value); - break; - case 16: - Inst(Opcode::WriteSharedU16, offset, value); - break; case 32: Inst(Opcode::WriteSharedU32, offset, value); break; @@ -308,6 +286,25 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) } } +U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) { + switch (data.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicIAdd32, address, data); + default: + ThrowInvalidType(data.Type()); + } +} + +U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) { + return is_signed ? Inst(Opcode::SharedAtomicSMin32, address, data) + : Inst(Opcode::SharedAtomicUMin32, address, data); +} + +U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) { + return is_signed ? Inst(Opcode::SharedAtomicSMax32, address, data) + : Inst(Opcode::SharedAtomicUMax32, address, data); +} + U32 IREmitter::ReadConst(const Value& base, const U32& offset) { return Inst(Opcode::ReadConst, base, offset); } @@ -369,6 +366,73 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad } } +Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value, + bool is_signed, BufferInstInfo info) { + return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value) + : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value, + bool is_signed, BufferInstInfo info) { + return is_signed ? Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value) + : Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicAnd(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicAnd32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicOr(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicOr32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicXor(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicXor32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicExchange(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicExchange32, Flags{info}, handle, address, value); +} + +void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, + const Value& data, BufferInstInfo info) { + switch (num_dwords) { + case 1: + Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data); + break; + case 2: + Inst(Opcode::StoreBufferFormatF32x2, Flags{info}, handle, address, data); + break; + case 3: + Inst(Opcode::StoreBufferFormatF32x3, Flags{info}, handle, address, data); + break; + case 4: + Inst(Opcode::StoreBufferFormatF32x4, Flags{info}, handle, address, data); + break; + default: + UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); + } +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } @@ -1386,41 +1450,37 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value); } -Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, - const Value& offset, const F32& lod_clamp, +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& body, const F32& bias, + const U32& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, body, bias, offset); +} + +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& body, const U32& offset, TextureInstInfo info) { - const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; - return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, coords, bias_lc, offset); + return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, body, IR::F32{}, offset); } -Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, - const Value& offset, TextureInstInfo info) { - return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, coords, lod, offset); -} - -F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, - const F32& bias, const Value& offset, - const F32& lod_clamp, TextureInstInfo info) { - const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; - return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, coords, dref, bias_lc, +F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref, + const F32& bias, const U32& offset, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, body, dref, bias, offset); } -F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, - const F32& lod, const Value& offset, - TextureInstInfo info) { - return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, coords, dref, lod, +F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body, const F32& dref, + const U32& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, body, dref, IR::F32{}, offset); } Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, TextureInstInfo info) { - return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset, offset2); + TextureInstInfo info) { + return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset); } Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, const F32& dref, TextureInstInfo info) { - return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, offset2, dref); + const F32& dref, TextureInstInfo info) { + return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref); } Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index a65e46136..be7f25153 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -84,6 +84,10 @@ public: [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); + [[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data); + [[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed); + [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); + [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); @@ -93,6 +97,27 @@ public: BufferInstInfo info); void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, BufferInstInfo info); + void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, + const Value& data, BufferInstInfo info); + + [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, + const Value& value, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, + const Value& value, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicAnd(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicOr(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicXor(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicExchange(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 WarpId(); @@ -241,31 +266,32 @@ public: [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); - [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, - const F32& bias, const Value& offset, - const F32& lod_clamp, TextureInstInfo info); - [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, - const F32& lod, const Value& offset, + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body, + const F32& bias, const U32& offset, TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + + [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& body, + const U32& offset, TextureInstInfo info); + + [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref, const F32& bias, - const Value& offset, const F32& lod_clamp, + const U32& offset, TextureInstInfo info); + + [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& body, + const F32& dref, const U32& offset, TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, - const F32& dref, const F32& lod, - const Value& offset, TextureInstInfo info); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, - const IR::U1& skip_mips); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, - const IR::U1& skip_mips, TextureInstInfo info); + + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, + const U1& skip_mips); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, + const U1& skip_mips, TextureInstInfo info); [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info); [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, TextureInstInfo info); + TextureInstInfo info); [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, - const Value& offset, const Value& offset2, const F32& dref, - TextureInstInfo info); + const Value& offset, const F32& dref, TextureInstInfo info); [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index aa03e3d6e..e35be8a7f 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -55,12 +55,30 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::StoreBufferF32x2: case Opcode::StoreBufferF32x3: case Opcode::StoreBufferF32x4: + case Opcode::StoreBufferFormatF32: + case Opcode::StoreBufferFormatF32x2: + case Opcode::StoreBufferFormatF32x3: + case Opcode::StoreBufferFormatF32x4: case Opcode::StoreBufferU32: + case Opcode::BufferAtomicIAdd32: + case Opcode::BufferAtomicSMin32: + case Opcode::BufferAtomicUMin32: + case Opcode::BufferAtomicSMax32: + case Opcode::BufferAtomicUMax32: + case Opcode::BufferAtomicInc32: + case Opcode::BufferAtomicDec32: + case Opcode::BufferAtomicAnd32: + case Opcode::BufferAtomicOr32: + case Opcode::BufferAtomicXor32: + case Opcode::BufferAtomicExchange32: case Opcode::WriteSharedU128: case Opcode::WriteSharedU64: case Opcode::WriteSharedU32: - case Opcode::WriteSharedU16: - case Opcode::WriteSharedU8: + case Opcode::SharedAtomicIAdd32: + case Opcode::SharedAtomicSMin32: + case Opcode::SharedAtomicUMin32: + case Opcode::SharedAtomicSMax32: + case Opcode::SharedAtomicUMax32: case Opcode::ImageWrite: case Opcode::ImageAtomicIAdd32: case Opcode::ImageAtomicSMin32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index aa2fd3f82..e9ecd4350 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -26,19 +26,20 @@ OPCODE(WorkgroupMemoryBarrier, Void, OPCODE(DeviceMemoryBarrier, Void, ) // Shared memory operations -OPCODE(LoadSharedU8, U32, U32, ) -OPCODE(LoadSharedS8, U32, U32, ) -OPCODE(LoadSharedU16, U32, U32, ) -OPCODE(LoadSharedS16, U32, U32, ) OPCODE(LoadSharedU32, U32, U32, ) OPCODE(LoadSharedU64, U32x2, U32, ) OPCODE(LoadSharedU128, U32x4, U32, ) -OPCODE(WriteSharedU8, Void, U32, U32, ) -OPCODE(WriteSharedU16, Void, U32, U32, ) OPCODE(WriteSharedU32, Void, U32, U32, ) OPCODE(WriteSharedU64, Void, U32, U32x2, ) OPCODE(WriteSharedU128, Void, U32, U32x4, ) +// Shared atomic operations +OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) +OPCODE(SharedAtomicSMin32, U32, U32, U32, ) +OPCODE(SharedAtomicUMin32, U32, U32, U32, ) +OPCODE(SharedAtomicSMax32, U32, U32, U32, ) +OPCODE(SharedAtomicUMax32, U32, U32, U32, ) + // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, ) OPCODE(GetThreadBitScalarReg, U1, ScalarReg, ) @@ -88,8 +89,25 @@ OPCODE(StoreBufferF32, Void, Opaq OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, ) OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, ) OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, ) +OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32, ) +OPCODE(StoreBufferFormatF32x2, Void, Opaque, Opaque, F32x2, ) +OPCODE(StoreBufferFormatF32x3, Void, Opaque, Opaque, F32x3, ) +OPCODE(StoreBufferFormatF32x4, Void, Opaque, Opaque, F32x4, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) +// Buffer atomic operations +OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, ) + // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) @@ -298,12 +316,12 @@ OPCODE(ConvertU16U32, U16, U32, OPCODE(ConvertU32U16, U32, U16, ) // Image operations -OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index bf2ba4d66..7e2b962b5 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -14,5 +14,6 @@ void DeadCodeEliminationPass(IR::Program& program); void ConstantPropagationPass(IR::BlockList& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); +void LowerSharedMemToRegisters(IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp new file mode 100644 index 000000000..a87cf31b1 --- /dev/null +++ b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +void LowerSharedMemToRegisters(IR::Program& program) { + boost::container::small_vector ds_writes; + Info& info{program.info}; + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + const auto opcode = inst.GetOpcode(); + if (opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64) { + ds_writes.emplace_back(&inst); + continue; + } + if (opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64) { + // Search for write instruction with same offset + const IR::Inst* prod = inst.Arg(0).InstRecursive(); + const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) { + const IR::Inst* write_prod = write->Arg(0).InstRecursive(); + return write_prod->Arg(1).U32() == prod->Arg(1).U32() && + write_prod->Arg(0) == prod->Arg(0); + }); + ASSERT(it != ds_writes.end()); + // Replace data read with value written. + inst.ReplaceUsesWith((*it)->Arg(1)); + } + } + } + // We should have eliminated everything. Invalidate data write instructions. + for (const auto inst : ds_writes) { + inst->Invalidate(); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 97438f80a..20a66ad0c 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -20,6 +20,42 @@ struct SharpLocation { auto operator<=>(const SharpLocation&) const = default; }; +bool IsBufferAtomic(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::BufferAtomicIAdd32: + case IR::Opcode::BufferAtomicSMin32: + case IR::Opcode::BufferAtomicUMin32: + case IR::Opcode::BufferAtomicSMax32: + case IR::Opcode::BufferAtomicUMax32: + case IR::Opcode::BufferAtomicInc32: + case IR::Opcode::BufferAtomicDec32: + case IR::Opcode::BufferAtomicAnd32: + case IR::Opcode::BufferAtomicOr32: + case IR::Opcode::BufferAtomicXor32: + case IR::Opcode::BufferAtomicExchange32: + return true; + default: + return false; + } +} + +bool IsBufferStore(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::StoreBufferF32: + case IR::Opcode::StoreBufferF32x2: + case IR::Opcode::StoreBufferF32x3: + case IR::Opcode::StoreBufferF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: + case IR::Opcode::StoreBufferU32: + return true; + default: + return IsBufferAtomic(inst); + } +} + bool IsBufferInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::LoadBufferF32: @@ -33,14 +69,9 @@ bool IsBufferInstruction(const IR::Inst& inst) { case IR::Opcode::LoadBufferU32: case IR::Opcode::ReadConstBuffer: case IR::Opcode::ReadConstBufferU32: - case IR::Opcode::StoreBufferF32: - case IR::Opcode::StoreBufferF32x2: - case IR::Opcode::StoreBufferF32x3: - case IR::Opcode::StoreBufferF32x4: - case IR::Opcode::StoreBufferU32: return true; default: - return false; + return IsBufferStore(inst); } } @@ -73,6 +104,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { case IR::Opcode::LoadBufferFormatF32x2: case IR::Opcode::LoadBufferFormatF32x3: case IR::Opcode::LoadBufferFormatF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: switch (num_format) { case AmdGpu::NumberFormat::Unorm: case AmdGpu::NumberFormat::Snorm: @@ -100,25 +135,13 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { case IR::Opcode::LoadBufferU32: case IR::Opcode::ReadConstBufferU32: case IR::Opcode::StoreBufferU32: + case IR::Opcode::BufferAtomicIAdd32: return IR::Type::U32; default: UNREACHABLE(); } } -bool IsBufferStore(const IR::Inst& inst) { - switch (inst.GetOpcode()) { - case IR::Opcode::StoreBufferF32: - case IR::Opcode::StoreBufferF32x2: - case IR::Opcode::StoreBufferF32x3: - case IR::Opcode::StoreBufferF32x4: - case IR::Opcode::StoreBufferU32: - return true; - default: - return false; - } -} - bool IsImageInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ImageSampleExplicitLod: @@ -171,6 +194,22 @@ bool IsImageStorageInstruction(const IR::Inst& inst) { } } +u32 ImageOffsetArgumentPosition(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::ImageGather: + case IR::Opcode::ImageGatherDref: + return 2; + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + return 3; + case IR::Opcode::ImageSampleDrefExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + return 4; + default: + UNREACHABLE(); + } +} + class Descriptors { public: explicit Descriptors(Info& info_) @@ -376,9 +415,11 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, return -1; } // We have found this pattern. Build the sharp. - std::array buffer; + std::array buffer; buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32(); - buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32; + buffer[1] = 0; + buffer[2] = handle->Arg(2).U32(); + buffer[3] = handle->Arg(3).U32(); cbuf = std::bit_cast(buffer); // Assign a binding to this sharp. return descriptors.Add(BufferResource{ @@ -492,6 +533,13 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip const auto tsharp = TrackSharp(tsharp_handle); const auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); const auto inst_info = inst.Flags(); + if (!image.Valid()) { + LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith( + ir.CompositeConstruct(ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f))); + return; + } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); u32 image_binding = descriptors.Add(ImageResource{ .sgpr_base = tsharp.sgpr_base, @@ -565,25 +613,43 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip if (inst_info.has_offset) { // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. - const bool is_gather = inst.GetOpcode() == IR::Opcode::ImageGather || - inst.GetOpcode() == IR::Opcode::ImageGatherDref; - const u32 arg_pos = is_gather ? 2 : (inst_info.is_depth ? 4 : 3); + const u32 arg_pos = ImageOffsetArgumentPosition(inst); const IR::Value arg = inst.Arg(arg_pos); ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); - const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); }; - union { - u32 raw; - BitField<0, 6, u32> x; - BitField<8, 6, u32> y; - BitField<16, 6, u32> z; - } offset{arg.U32()}; - const IR::Value value = ir.CompositeConstruct(sign_ext(offset.x), sign_ext(offset.y)); - inst.SetArg(arg_pos, value); + + const auto read = [&](u32 offset) -> auto { + return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true); + }; + + switch (image.GetType()) { + case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color1DArray: + inst.SetArg(arg_pos, read(0)); + break; + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color2DArray: + inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8))); + break; + case AmdGpu::ImageType::Color3D: + inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8), read(16))); + break; + default: + UNREACHABLE(); + } } if (inst_info.has_lod_clamp) { - // Final argument contains lod_clamp - const u32 arg_pos = inst_info.is_depth ? 5 : 4; + const u32 arg_pos = [&]() -> u32 { + switch (inst.GetOpcode()) { + case IR::Opcode::ImageSampleImplicitLod: + return 2; + case IR::Opcode::ImageSampleDrefImplicitLod: + return 3; + default: + break; + } + return inst_info.is_depth ? 5 : 4; + }(); inst.SetArg(arg_pos, arg); } if (inst_info.explicit_lod) { @@ -591,7 +657,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod || inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod); const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3; - inst.SetArg(pos, arg); + const IR::Value value = inst_info.force_level0 ? ir.Imm32(0.f) : arg; + inst.SetArg(pos, value); } } diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 7100b3844..52087a653 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -16,18 +16,6 @@ void Visit(Info& info, IR::Inst& inst) { info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); break; } - case IR::Opcode::LoadSharedS8: - case IR::Opcode::LoadSharedU8: - case IR::Opcode::WriteSharedU8: - info.uses_shared_u8 = true; - info.uses_shared = true; - break; - case IR::Opcode::LoadSharedS16: - case IR::Opcode::LoadSharedU16: - case IR::Opcode::WriteSharedU16: - info.uses_shared_u16 = true; - info.uses_shared = true; - break; case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index 805914924..eef73a659 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -129,19 +129,19 @@ IR::Opcode UndefOpcode(IR::VectorReg) noexcept { return IR::Opcode::UndefU32; } -IR::Opcode UndefOpcode(const VccLoTag&) noexcept { +IR::Opcode UndefOpcode(const VccLoTag) noexcept { return IR::Opcode::UndefU32; } -IR::Opcode UndefOpcode(const SccLoTag&) noexcept { +IR::Opcode UndefOpcode(const SccLoTag) noexcept { return IR::Opcode::UndefU32; } -IR::Opcode UndefOpcode(const VccHiTag&) noexcept { +IR::Opcode UndefOpcode(const VccHiTag) noexcept { return IR::Opcode::UndefU32; } -IR::Opcode UndefOpcode(const FlagTag&) noexcept { +IR::Opcode UndefOpcode(const FlagTag) noexcept { return IR::Opcode::UndefU1; } diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 69eec50f3..0f9fd6d41 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -58,6 +58,9 @@ IR::Program TranslateProgram(Common::ObjectPool& inst_pool, Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + if (program.info.stage != Stage::Compute) { + Shader::Optimization::LowerSharedMemToRegisters(program); + } Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 4ab71c3b7..b1eb6aea7 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -116,7 +116,7 @@ struct PushData { std::array buf_offsets; void AddOffset(u32 binding, u32 offset) { - ASSERT(offset < 64 && binding < 32); + ASSERT(offset < 256 && binding < buf_offsets.size()); buf_offsets[binding] = offset; } }; @@ -180,6 +180,7 @@ struct Info { SamplerResourceList samplers; std::array workgroup_size{}; + std::array tgid_enable; u32 num_user_data; u32 num_input_vgprs; @@ -195,8 +196,6 @@ struct Info { bool has_image_query{}; bool uses_group_quad{}; bool uses_shared{}; - bool uses_shared_u8{}; - bool uses_shared_u16{}; bool uses_fp16{}; bool uses_step_rates{}; bool translation_failed{}; // indicates that shader has unsupported instructions @@ -228,7 +227,7 @@ struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - auto format(const Shader::Stage& stage, format_context& ctx) const { + auto format(const Shader::Stage stage, format_context& ctx) const { constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); } diff --git a/src/shadps4.qrc b/src/shadps4.qrc index cdbae7861..c22b837bd 100644 --- a/src/shadps4.qrc +++ b/src/shadps4.qrc @@ -1,6 +1,7 @@ images/shadps4.ico + images/about_icon.png images/play_icon.png images/pause_icon.png images/stop_icon.png diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index af1963eec..dce2d4b42 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -35,7 +35,7 @@ void Liverpool::Process(std::stop_token stoken) { { std::unique_lock lk{submit_mutex}; Common::CondvarWait(submit_cv, lk, stoken, - [this] { return num_submits != 0 || submit_done; }); + [this] { return num_commands || num_submits || submit_done; }); } if (stoken.stop_requested()) { break; @@ -45,7 +45,23 @@ void Liverpool::Process(std::stop_token stoken) { int qid = -1; - while (num_submits) { + while (num_submits || num_commands) { + + // Process incoming commands with high priority + while (num_commands) { + + Common::UniqueFunction callback{}; + { + std::unique_lock lk{submit_mutex}; + callback = std::move(command_queue.back()); + command_queue.pop(); + } + + callback(); + + --num_commands; + } + qid = (qid + 1) % NumTotalQueues; auto& queue = mapped_queues[qid]; @@ -180,6 +196,17 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanSignal(Platform::InterruptId::GfxFlip); break; } + case PM4CmdNop::PayloadType::DebugMarkerPush: { + const auto marker_sz = nop->header.count.Value() * 2; + const std::string_view label{reinterpret_cast(&nop->data_block[1]), + marker_sz}; + rasterizer->ScopeMarkerBegin(label); + break; + } + case PM4CmdNop::PayloadType::DebugMarkerPop: { + rasterizer->ScopeMarkerEnd(); + break; + } default: break; } @@ -208,7 +235,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::spantype3.count; - if (nop_offset == 0x0e || nop_offset == 0x0d) { + if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) { ASSERT_MSG(payload[nop_offset] == 0xc0001000, "NOP hint is missing in CB setup sequence"); last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; @@ -295,8 +322,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:DrawIndex2", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true); rasterizer->ScopeMarkerEnd(); } @@ -308,8 +336,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index_off->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin(fmt::format( - "dcb:{}:DrawIndexOffset2", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexOffset2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true, draw_index_off->index_offset); rasterizer->ScopeMarkerEnd(); } @@ -320,8 +349,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:DrawIndexAuto", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(false); rasterizer->ScopeMarkerEnd(); } @@ -334,8 +364,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:Dispatch", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } @@ -393,7 +424,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); + // ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); // Optimization: VO label waits are special because the emulator // will write to the label when presentation is finished. So if // there are no other submits to yield to we can sleep the thread @@ -486,8 +517,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { regs.cs_program.dim_z = dispatch_direct->dim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { - rasterizer->ScopeMarkerBegin(fmt::format( - "acb[{}]:{}:Dispatch", vqid, reinterpret_cast(acb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 3ebd9a971..896927df6 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -11,10 +11,12 @@ #include #include #include + #include "common/assert.h" #include "common/bit_field.h" #include "common/polyfill_thread.h" #include "common/types.h" +#include "common/unique_function.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -128,6 +130,7 @@ struct Liverpool { BitField<0, 6, u64> num_vgprs; BitField<6, 4, u64> num_sgprs; BitField<33, 5, u64> num_user_regs; + BitField<39, 3, u64> tgid_enable; BitField<47, 9, u64> lds_dwords; } settings; INSERT_PADDING_WORDS(1); @@ -146,6 +149,10 @@ struct Liverpool { return settings.lds_dwords.Value() * 128 * 4; } + bool IsTgidEnabled(u32 i) const noexcept { + return (settings.tgid_enable.Value() >> i) & 1; + } + std::span Code() const { const u32* code = Address(); BinaryInfo bininfo; @@ -445,7 +452,7 @@ struct Liverpool { BitField<11, 1, u32> enable_polygon_offset_front; BitField<12, 1, u32> enable_polygon_offset_back; BitField<13, 1, u32> enable_polygon_offset_para; - BitField<13, 1, u32> enable_window_offset; + BitField<16, 1, u32> enable_window_offset; BitField<19, 1, ProvokingVtxLast> provoking_vtx_last; PolygonMode PolyMode() const { @@ -766,7 +773,8 @@ struct Liverpool { } TilingMode GetTilingMode() const { - return attrib.tile_mode_index; + return info.linear_general ? TilingMode::Display_Linear + : attrib.tile_mode_index.Value(); } bool IsTiled() const { @@ -866,6 +874,33 @@ struct Liverpool { } }; + union ShaderStageEnable { + u32 raw; + BitField<0, 2, u32> ls_en; + BitField<2, 1, u32> hs_en; + BitField<3, 2, u32> es_en; + BitField<5, 1, u32> gs_en; + BitField<6, 1, u32> vs_en; + + bool IsStageEnabled(u32 stage) { + switch (stage) { + case 0: + case 1: + return true; + case 2: + return gs_en.Value(); + case 3: + return es_en.Value(); + case 4: + return hs_en.Value(); + case 5: + return ls_en.Value(); + default: + UNREACHABLE(); + } + } + }; + union Regs { struct { INSERT_PADDING_WORDS(0x2C08); @@ -902,7 +937,9 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA094 - 0xA08E - 2); std::array viewport_scissors; std::array viewport_depths; - INSERT_PADDING_WORDS(0xA105 - 0xA0D4); + INSERT_PADDING_WORDS(0xA103 - 0xA0D4); + u32 primitive_restart_index; + INSERT_PADDING_WORDS(1); BlendConstants blend_constants; INSERT_PADDING_WORDS(0xA10B - 0xA105 - 4); StencilControl stencil_control; @@ -941,10 +978,14 @@ struct Liverpool { IndexBufferType index_buffer_type; INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2); u32 enable_primitive_id; - INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1); + INSERT_PADDING_WORDS(3); + u32 enable_primitive_restart; + INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1); u32 vgt_instance_step_rate_0; u32 vgt_instance_step_rate_1; - INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1); + INSERT_PADDING_WORDS(0xA2D5 - 0xA2A9 - 1); + ShaderStageEnable stage_enable; + INSERT_PADDING_WORDS(9); PolygonOffset poly_offset; INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5); AaConfig aa_config; @@ -1024,6 +1065,13 @@ public: rasterizer = rasterizer_; } + void SendCommand(Common::UniqueFunction&& func) { + std::scoped_lock lk{submit_mutex}; + command_queue.emplace(std::move(func)); + ++num_commands; + submit_cv.notify_one(); + } + private: struct Task { struct promise_type { @@ -1092,9 +1140,11 @@ private: Libraries::VideoOut::VideoOutPort* vo_port{}; std::jthread process_thread{}; std::atomic num_submits{}; + std::atomic num_commands{}; std::atomic submit_done{}; std::mutex submit_mutex; std::condition_variable_any submit_cv; + std::queue> command_queue{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); @@ -1117,6 +1167,7 @@ static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); +static_assert(GFX6_3D_REG_INDEX(primitive_restart_index) == 0xA103); static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); @@ -1137,8 +1188,10 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D); static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); +static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); +static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index e5f618cc2..5ab233fdc 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -282,6 +282,13 @@ enum class InterruptSelect : u32 { IrqUndocumented = 3, }; +static u64 GetGpuClock64() { + auto now = std::chrono::high_resolution_clock::now(); + auto duration = now.time_since_epoch(); + auto ticks = std::chrono::duration_cast(duration).count(); + return static_cast(ticks); +} + struct PM4CmdEventWriteEop { PM4Type3Header header; union { @@ -325,6 +332,10 @@ struct PM4CmdEventWriteEop { *Address() = DataQWord(); break; } + case DataSelect::GpuClock64: { + *Address() = GetGpuClock64(); + break; + } case DataSelect::PerfCounter: { *Address() = Common::FencedRDTSC(); break; @@ -652,13 +663,6 @@ struct PM4CmdReleaseMem { return data_lo | u64(data_hi) << 32; } - uint64_t GetGpuClock64() const { - auto now = std::chrono::high_resolution_clock::now(); - auto duration = now.time_since_epoch(); - auto ticks = std::chrono::duration_cast(duration).count(); - return static_cast(ticks); - } - void SignalFence(Platform::InterruptId irq_id) const { switch (data_sel.Value()) { case DataSelect::Data32Low: { diff --git a/src/video_core/amdgpu/pm4_opcodes.h b/src/video_core/amdgpu/pm4_opcodes.h index 8922c4ea3..fba0cbb9f 100644 --- a/src/video_core/amdgpu/pm4_opcodes.h +++ b/src/video_core/amdgpu/pm4_opcodes.h @@ -41,6 +41,7 @@ enum class PM4ItOpcode : u32 { CondIndirectBuffer = 0x3F, CopyData = 0x40, CommandProcessorDma = 0x41, + PfpSyncMe = 0x42, SurfaceSync = 0x43, CondWrite = 0x45, EventWrite = 0x46, diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index e9498b352..d112864d5 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -106,10 +106,8 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_ VmaAllocationInfo alloc_info{}; buffer.Create(buffer_ci, usage, &alloc_info); - if (instance->HasDebuggingToolAttached()) { - const auto device = instance->GetDevice(); - Vulkan::SetObjectName(device, Handle(), "Buffer {:#x} {} KiB", cpu_addr, size_bytes / 1024); - } + const auto device = instance->GetDevice(); + Vulkan::SetObjectName(device, Handle(), "Buffer {:#x}:{:#x}", cpu_addr, size_bytes); // Map it if it is host visible. VkMemoryPropertyFlags property_flags{}; @@ -152,10 +150,8 @@ StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); const auto device = instance.GetDevice(); - if (instance.HasDebuggingToolAttached()) { - Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}): {} KiB", BufferTypeName(usage), - size_bytes / 1024); - } + Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}):{:#x}", BufferTypeName(usage), + size_bytes); } std::pair StreamBuffer::Map(u64 size, u64 alignment) { diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index e0d9da08f..d373fbffb 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -146,6 +146,10 @@ public: return offset; } + u64 GetFreeSize() const { + return size_bytes - offset - mapped_size; + } + private: struct Watch { u64 tick{}; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 7ab0d8171..02d6b2ce4 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -13,7 +13,7 @@ namespace VideoCore { -static constexpr size_t StagingBufferSize = 256_MB; +static constexpr size_t StagingBufferSize = 512_MB; static constexpr size_t UboStreamBufferSize = 64_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, @@ -87,6 +87,15 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si } bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { + boost::container::small_vector attributes; + boost::container::small_vector bindings; + SCOPE_EXIT { + if (instance.IsVertexInputDynamicState()) { + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.setVertexInputEXT(bindings, attributes); + } + }; + if (vs_info.vs_inputs.empty()) { return false; } @@ -122,6 +131,21 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { } guest_buffers.emplace_back(buffer); ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); + attributes.push_back({ + .location = input.binding, + .binding = input.binding, + .format = + Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), + .offset = 0, + }); + bindings.push_back({ + .binding = input.binding, + .stride = buffer.GetStride(), + .inputRate = input.instance_step_rate == Shader::Info::VsInput::None + ? vk::VertexInputRate::eVertex + : vk::VertexInputRate::eInstance, + .divisor = 1, + }); } std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) { @@ -224,6 +248,19 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b return {&buffer, buffer.Offset(device_addr)}; } +std::pair BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) { + const u64 page = gpu_addr >> CACHING_PAGEBITS; + const BufferId buffer_id = page_table[page]; + if (buffer_id) { + const Buffer& buffer = slot_buffers[buffer_id]; + if (buffer.IsInBounds(gpu_addr, size)) { + return {&buffer, buffer.Offset(gpu_addr)}; + } + } + const u32 offset = staging_buffer.Copy(gpu_addr, size, 16); + return {&staging_buffer, offset}; +} + bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { const VAddr end_addr = addr + size; const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); @@ -248,6 +285,10 @@ bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) { return memory_tracker.IsRegionCpuModified(addr, size); } +bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) { + return memory_tracker.IsRegionGpuModified(addr, size); +} + BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { if (device_addr == 0) { return NULL_BUFFER_ID; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0dee87cf5..2bcc4f0e8 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -3,7 +3,6 @@ #pragma once -#include #include #include #include @@ -69,12 +68,18 @@ public: /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written); + /// Obtains a temporary buffer for usage in texture cache. + [[nodiscard]] std::pair ObtainTempBuffer(VAddr gpu_addr, u32 size); + /// Return true when a region is registered on the cache [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + private: template void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 01526265a..e86d0652f 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -81,6 +81,8 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleListWithAdjacency; case Liverpool::PrimitiveType::AdjTriangleStrip: return vk::PrimitiveTopology::eTriangleStripWithAdjacency; + case Liverpool::PrimitiveType::PatchPrimitive: + return vk::PrimitiveTopology::ePatchList; case Liverpool::PrimitiveType::QuadList: // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; @@ -327,6 +329,7 @@ std::span GetAllFormats() { vk::Format::eR16G16Sint, vk::Format::eR16G16Snorm, vk::Format::eR16Sfloat, + vk::Format::eR16Uint, vk::Format::eR16Unorm, vk::Format::eR32G32B32A32Sfloat, vk::Format::eR32G32B32A32Sint, @@ -339,6 +342,7 @@ std::span GetAllFormats() { vk::Format::eR32Sint, vk::Format::eR32Uint, vk::Format::eBc6HUfloatBlock, + vk::Format::eBc6HSfloatBlock, vk::Format::eR16G16Unorm, vk::Format::eR16G16B16A16Sscaled, vk::Format::eR16G16Sscaled, @@ -540,6 +544,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc6HUfloatBlock; } + if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eBc6HSfloatBlock; + } if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR8G8B8A8Sint; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 8178c88de..eab9d527c 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -47,21 +47,22 @@ public: Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address, bool is_eop) { const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info, false); + const auto image_id = texture_cache.FindImage(info); + texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); auto& image = texture_cache.GetImage(image_id); return PrepareFrameInternal(image, is_eop); } - Frame* PrepareBlankFrame() { + Frame* PrepareBlankFrame(bool is_eop) { auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); - return PrepareFrameInternal(image, true); + return PrepareFrameInternal(image, is_eop); } VideoCore::Image& RegisterVideoOutSurface( const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { vo_buffers_addr.emplace_back(cpu_address); const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info, false); + const auto image_id = texture_cache.FindImage(info); return texture_cache.GetImage(image_id); } @@ -75,6 +76,11 @@ public: void Present(Frame* frame); void RecreateFrame(Frame* frame, u32 width, u32 height); + void FlushDraw() { + SubmitInfo info{}; + draw_scheduler.Flush(info); + } + private: Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true); Frame* GetRenderFrame(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 21710a76a..81cf9c02a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -12,18 +12,19 @@ namespace Vulkan { ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_, - vk::PipelineCache pipeline_cache, const Shader::Info* info_, - u64 compute_key_, vk::ShaderModule module) - : instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{*info_} { + vk::PipelineCache pipeline_cache, u64 compute_key_, + const Program* program) + : instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, + info{&program->pgm.info} { const vk::PipelineShaderStageCreateInfo shader_ci = { .stage = vk::ShaderStageFlagBits::eCompute, - .module = module, + .module = program->module, .pName = "main", }; u32 binding{}; boost::container::small_vector bindings; - for (const auto& buffer : info.buffers) { + for (const auto& buffer : info->buffers) { bindings.push_back({ .binding = binding++, .descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer @@ -32,7 +33,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler .stageFlags = vk::ShaderStageFlagBits::eCompute, }); } - for (const auto& image : info.images) { + for (const auto& image : info->images) { bindings.push_back({ .binding = binding++, .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage @@ -41,7 +42,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler .stageFlags = vk::ShaderStageFlagBits::eCompute, }); } - for (const auto& sampler : info.samplers) { + for (const auto& sampler : info->samplers) { bindings.push_back({ .binding = binding++, .descriptorType = vk::DescriptorType::eSampler, @@ -96,8 +97,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, Shader::PushData push_data{}; u32 binding{}; - for (u32 i = 0; const auto& buffer : info.buffers) { - const auto vsharp = buffer.GetVsharp(info); + for (const auto& buffer : info->buffers) { + const auto vsharp = buffer.GetVsharp(*info); const VAddr address = vsharp.base_address; // Most of the time when a metadata is updated with a shader it gets cleared. It means we // can skip the whole dispatch and update the tracked state instead. Also, it is not @@ -115,7 +116,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } const u32 size = vsharp.GetSize(); if (buffer.is_written) { - texture_cache.InvalidateMemory(address, size); + texture_cache.InvalidateMemory(address, size, true); } const u32 alignment = buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); @@ -137,12 +138,11 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); - i++; } - for (const auto& image_desc : info.images) { + for (const auto& image_desc : info->images) { const auto tsharp = - info.ReadUd(image_desc.sgpr_base, image_desc.dword_offset); + info->ReadUd(image_desc.sgpr_base, image_desc.dword_offset); VideoCore::ImageInfo image_info{tsharp}; VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; const auto& image_view = texture_cache.FindTexture(image_info, view_info); @@ -162,8 +162,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (texture)"); } } - for (const auto& sampler : info.samplers) { - const auto ssharp = sampler.GetSsharp(info); + for (const auto& sampler : info->samplers) { + const auto ssharp = sampler.GetSsharp(*info); const auto vk_sampler = texture_cache.GetSampler(ssharp); image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 16de5635f..5da9dc7e7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -3,6 +3,7 @@ #pragma once +#include "shader_recompiler/ir/program.h" #include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -16,11 +17,18 @@ namespace Vulkan { class Instance; class Scheduler; +struct Program { + Shader::IR::Program pgm; + std::vector spv; + vk::ShaderModule module; + u32 end_binding; +}; + class ComputePipeline { public: explicit ComputePipeline(const Instance& instance, Scheduler& scheduler, - vk::PipelineCache pipeline_cache, const Shader::Info* info, - u64 compute_key, vk::ShaderModule module); + vk::PipelineCache pipeline_cache, u64 compute_key, + const Program* program); ~ComputePipeline(); [[nodiscard]] vk::Pipeline Handle() const noexcept { @@ -37,7 +45,7 @@ private: vk::UniquePipelineLayout pipeline_layout; vk::UniqueDescriptorSetLayout desc_layout; u64 compute_key; - Shader::Info info{}; + const Shader::Info* info; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5d87a1caf..887a6d871 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,15 +19,14 @@ namespace Vulkan { GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache, - std::span infos, - std::array modules) + std::span programs) : instance{instance_}, scheduler{scheduler_}, key{key_} { const vk::Device device = instance.GetDevice(); for (u32 i = 0; i < MaxShaderStages; i++) { - if (!infos[i]) { + if (!programs[i]) { continue; } - stages[i] = *infos[i]; + stages[i] = &programs[i]->pgm.info; } BuildDescSetLayout(); @@ -49,14 +48,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector bindings; boost::container::static_vector attributes; const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; - for (const auto& input : vs_info.vs_inputs) { + for (const auto& input : vs_info->vs_inputs) { if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { // Skip attribute binding as the data will be pulled by shader continue; } - const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); + const auto buffer = vs_info->ReadUd(input.sgpr_base, input.dword_offset); attributes.push_back({ .location = input.binding, .binding = input.binding, @@ -86,8 +85,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { .topology = LiverpoolToVK::PrimitiveType(key.prim_type), - .primitiveRestartEnable = false, + .primitiveRestartEnable = key.enable_primitive_restart != 0, }; + ASSERT_MSG(!key.enable_primitive_restart || key.primitive_restart_index == 0xFFFF, + "Primitive restart index other than 0xFFFF is not supported yet"); const vk::PipelineRasterizationStateCreateInfo raster_state = { .depthClampEnable = false, @@ -145,6 +146,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT); dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT); } + if (instance.IsVertexInputDynamicState()) { + dynamic_states.push_back(vk::DynamicState::eVertexInputEXT); + } const vk::PipelineDynamicStateCreateInfo dynamic_info = { .dynamicStateCount = static_cast(dynamic_states.size()), @@ -167,10 +171,18 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .reference = key.stencil_ref_front.stencil_test_val, }, .back{ - .failOp = LiverpoolToVK::StencilOp(key.stencil.stencil_fail_back), - .passOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zpass_back), - .depthFailOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zfail_back), - .compareOp = LiverpoolToVK::CompareOp(key.depth.stencil_bf_func), + .failOp = LiverpoolToVK::StencilOp(key.depth.backface_enable + ? key.stencil.stencil_fail_back.Value() + : key.stencil.stencil_fail_front.Value()), + .passOp = LiverpoolToVK::StencilOp(key.depth.backface_enable + ? key.stencil.stencil_zpass_back.Value() + : key.stencil.stencil_zpass_front.Value()), + .depthFailOp = LiverpoolToVK::StencilOp(key.depth.backface_enable + ? key.stencil.stencil_zfail_back.Value() + : key.stencil.stencil_zfail_front.Value()), + .compareOp = LiverpoolToVK::CompareOp(key.depth.backface_enable + ? key.depth.stencil_bf_func.Value() + : key.depth.stencil_ref_func.Value()), .compareMask = key.stencil_ref_back.stencil_mask, .writeMask = key.stencil_ref_back.stencil_write_mask, .reference = key.stencil_ref_back.stencil_test_val, @@ -179,21 +191,21 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .maxDepthBounds = key.depth_bounds_max, }; - u32 shader_count{}; auto stage = u32(Shader::Stage::Vertex); - std::array shader_stages; - shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ + boost::container::static_vector + shader_stages; + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, - .module = modules[stage], + .module = programs[stage]->module, .pName = "main", - }; + }); stage = u32(Shader::Stage::Fragment); - if (modules[stage]) { - shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ + if (programs[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eFragment, - .module = modules[stage], + .module = programs[stage]->module, .pName = "main", - }; + }); } const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined); @@ -202,7 +214,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .colorAttachmentCount = num_color_formats, .pColorAttachmentFormats = key.color_formats.data(), .depthAttachmentFormat = key.depth_format, - .stencilAttachmentFormat = vk::Format::eUndefined, + .stencilAttachmentFormat = + key.depth.stencil_enable ? key.depth_format : vk::Format::eUndefined, }; std::array attachments; @@ -266,7 +279,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul const vk::GraphicsPipelineCreateInfo pipeline_info = { .pNext = &pipeline_rendering_ci, - .stageCount = shader_count, + .stageCount = static_cast(shader_stages.size()), .pStages = shader_stages.data(), .pVertexInputState = &vertex_input_info, .pInputAssemblyState = &input_assembly, @@ -292,8 +305,11 @@ GraphicsPipeline::~GraphicsPipeline() = default; void GraphicsPipeline::BuildDescSetLayout() { u32 binding{}; boost::container::small_vector bindings; - for (const auto& stage : stages) { - for (const auto& buffer : stage.buffers) { + for (const auto* stage : stages) { + if (!stage) { + continue; + } + for (const auto& buffer : stage->buffers) { bindings.push_back({ .binding = binding++, .descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer @@ -302,7 +318,7 @@ void GraphicsPipeline::BuildDescSetLayout() { .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, }); } - for (const auto& image : stage.images) { + for (const auto& image : stage->images) { bindings.push_back({ .binding = binding++, .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage @@ -311,7 +327,7 @@ void GraphicsPipeline::BuildDescSetLayout() { .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, }); } - for (const auto& sampler : stage.samplers) { + for (const auto& sampler : stage->samplers) { bindings.push_back({ .binding = binding++, .descriptorType = vk::DescriptorType::eSampler, @@ -338,13 +354,16 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, Shader::PushData push_data{}; u32 binding{}; - for (const auto& stage : stages) { - if (stage.uses_step_rates) { + for (const auto* stage : stages) { + if (!stage) { + continue; + } + if (stage->uses_step_rates) { push_data.step0 = regs.vgt_instance_step_rate_0; push_data.step1 = regs.vgt_instance_step_rate_1; } - for (const auto& buffer : stage.buffers) { - const auto vsharp = buffer.GetVsharp(stage); + for (const auto& buffer : stage->buffers) { + const auto vsharp = buffer.GetVsharp(*stage); if (vsharp) { const VAddr address = vsharp.base_address; if (texture_cache.IsMeta(address)) { @@ -377,9 +396,9 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } boost::container::static_vector tsharps; - for (const auto& image_desc : stage.images) { + for (const auto& image_desc : stage->images) { const auto& tsharp = tsharps.emplace_back( - stage.ReadUd(image_desc.sgpr_base, image_desc.dword_offset)); + stage->ReadUd(image_desc.sgpr_base, image_desc.dword_offset)); VideoCore::ImageInfo image_info{tsharp}; VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; const auto& image_view = texture_cache.FindTexture(image_info, view_info); @@ -399,8 +418,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)"); } } - for (const auto& sampler : stage.samplers) { - auto ssharp = sampler.GetSsharp(stage); + for (const auto& sampler : stage->samplers) { + auto ssharp = sampler.GetSsharp(*stage); if (sampler.disable_aniso) { const auto& tsharp = tsharps[sampler.associated_image]; if (tsharp.base_level == 0 && tsharp.last_level == 0) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index f818d980b..f7ea32d95 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -3,9 +3,9 @@ #include #include "common/types.h" -#include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" namespace VideoCore { class BufferCache; @@ -39,6 +39,8 @@ struct GraphicsPipelineKey { Liverpool::StencilRefMask stencil_ref_front; Liverpool::StencilRefMask stencil_ref_back; Liverpool::PrimitiveType prim_type; + u32 enable_primitive_restart; + u32 primitive_restart_index; Liverpool::PolygonMode polygon_mode; Liverpool::CullMode cull_mode; Liverpool::FrontFace front_face; @@ -56,8 +58,7 @@ class GraphicsPipeline { public: explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler, const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, - std::span infos, - std::array modules); + std::span programs); ~GraphicsPipeline(); void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache, @@ -72,7 +73,7 @@ public: } const Shader::Info& GetStage(Shader::Stage stage) const noexcept { - return stages[u32(stage)]; + return *stages[u32(stage)]; } bool IsEmbeddedVs() const noexcept { @@ -97,7 +98,7 @@ private: vk::UniquePipeline pipeline; vk::UniquePipelineLayout pipeline_layout; vk::UniqueDescriptorSetLayout desc_layout; - std::array stages{}; + std::array stages{}; GraphicsPipelineKey key; }; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 2d396daf0..66da030f1 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -8,6 +8,7 @@ #include #include "common/assert.h" +#include "common/config.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -163,7 +164,8 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceVulkan12Features, vk::PhysicalDeviceVulkan13Features, vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR, - vk::PhysicalDeviceDepthClipControlFeaturesEXT>(); + vk::PhysicalDeviceDepthClipControlFeaturesEXT, vk::PhysicalDeviceRobustness2FeaturesEXT, + vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(); const vk::StructureChain properties_chain = physical_device.getProperties2< vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR, vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>(); @@ -197,10 +199,12 @@ bool Instance::CreateDevice() { external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME); custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); - add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + const bool depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); workgroup_memory_explicit_layout = add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); @@ -210,9 +214,21 @@ bool Instance::CreateDevice() { // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); - add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); + const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); + const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); + + if (has_sync2) { + has_nv_checkpoints = Config::isMarkersEnabled() + ? add_extension(VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME) + : false; + } + +#ifdef __APPLE__ + // Required by Vulkan spec if supported. + add_extension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME); +#endif const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -308,21 +324,51 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceRobustness2FeaturesEXT{ .nullDescriptor = true, }, + vk::PhysicalDeviceSynchronization2Features{ + .synchronization2 = true, + }, + vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{ + .vertexInputDynamicState = true, + }, +#ifdef __APPLE__ + feature_chain.get(), +#endif }; + if (!maintenance4) { + device_chain.unlink(); + } + if (!custom_border_color) { + device_chain.unlink(); + } if (!color_write_en) { device_chain.unlink(); device_chain.unlink(); } - if (!robustness) { + if (!depth_clip_control) { + device_chain.unlink(); + } + if (!workgroup_memory_explicit_layout) { + device_chain.unlink(); + } + if (robustness) { + device_chain.get().nullDescriptor = + feature_chain.get().nullDescriptor; + } else { device_chain.unlink(); } + if (!vertex_input_dynamic_state) { + device_chain.unlink(); + } try { device = physical_device.createDeviceUnique(device_chain.get()); } catch (vk::ExtensionNotPresentError& err) { LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available {}", err.what()); return false; + } catch (vk::FeatureNotPresentError& err) { + LOG_CRITICAL(Render_Vulkan, "Some required features are not available {}", err.what()); + return false; } VULKAN_HPP_DEFAULT_DISPATCHER.init(*device); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index a8c0dcf45..4cb4741a5 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -88,6 +88,10 @@ public: return profiler_context; } + bool HasNvCheckpoints() const { + return has_nv_checkpoints; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -128,6 +132,11 @@ public: return color_write_en; } + /// Returns true when VK_EXT_vertex_input_dynamic_state is supported. + bool IsVertexInputDynamicState() const { + return vertex_input_dynamic_state; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -253,12 +262,14 @@ private: bool external_memory_host{}; bool workgroup_memory_explicit_layout{}; bool color_write_en{}; + bool vertex_input_dynamic_state{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; bool debug_utils_supported{}; bool has_nsight_graphics{}; bool has_renderdoc{}; + bool has_nv_checkpoints{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8d27d252c..617a78125 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -20,6 +20,10 @@ namespace Vulkan { using Shader::VsOutput; +[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { + return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +} + void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || @@ -93,6 +97,8 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_d info.num_user_data = cs_pgm.settings.num_user_regs; info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, cs_pgm.num_thread_z.full}; + info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), + cs_pgm.IsTgidEnabled(2)}; info.shared_memory_size = cs_pgm.SharedMemSize(); break; } @@ -115,6 +121,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, } const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { + // Tessellation is unsupported so skip the draw to avoid locking up the driver. + if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) { + return nullptr; + } RefreshGraphicsKey(); const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); if (is_new) { @@ -161,6 +171,8 @@ void PipelineCache::RefreshGraphicsKey() { key.stencil_ref_front = regs.stencil_ref_front; key.stencil_ref_back = regs.stencil_ref_back; key.prim_type = regs.primitive_type; + key.enable_primitive_restart = regs.enable_primitive_restart & 1; + key.primitive_restart_index = regs.primitive_restart_index; key.polygon_mode = regs.polygon_control.PolyMode(); key.cull_mode = regs.polygon_control.CullingMode(); key.clip_space = regs.clipper_control.clip_space; @@ -203,12 +215,20 @@ void PipelineCache::RefreshGraphicsKey() { } for (u32 i = 0; i < MaxShaderStages; i++) { + if (!regs.stage_enable.IsStageEnabled(i)) { + key.stage_hashes[i] = 0; + continue; + } auto* pgm = regs.ProgramForStage(i); if (!pgm || !pgm->Address()) { key.stage_hashes[i] = 0; continue; } const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); + if (!bininfo->Valid()) { + key.stage_hashes[i] = 0; + continue; + } key.stage_hashes[i] = bininfo->shader_hash; } } @@ -230,23 +250,14 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { } u32 binding{}; - std::array programs; - std::array infos{}; - for (u32 i = 0; i < MaxShaderStages; i++) { if (!graphics_key.stage_hashes[i]) { - stages[i] = VK_NULL_HANDLE; + programs[i] = nullptr; continue; } auto* pgm = regs.ProgramForStage(i); const auto code = pgm->Code(); - const auto it = module_map.find(graphics_key.stage_hashes[i]); - if (it != module_map.end()) { - stages[i] = *it->second; - continue; - } - // Dump shader code if requested. const auto stage = Shader::Stage{i}; const u64 hash = graphics_key.stage_hashes[i]; @@ -257,39 +268,56 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { block_pool.ReleaseContents(); inst_pool.ReleaseContents(); - if (stage != Shader::Stage::Compute && stage != Shader::Stage::Fragment && - stage != Shader::Stage::Vertex) { + if (stage != Shader::Stage::Fragment && stage != Shader::Stage::Vertex) { LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage); return {}; } + const u64 lookup_hash = HashCombine(hash, binding); + auto it = program_cache.find(lookup_hash); + if (it != program_cache.end()) { + const Program* program = it.value().get(); + ASSERT(program->pgm.info.stage == stage); + programs[i] = program; + binding = program->end_binding; + continue; + } + // Recompile shader to IR. try { + auto program = std::make_unique(); + block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash); Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); info.pgm_base = pgm->Address(); info.pgm_hash = hash; - programs[i] = + program->pgm = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile); // Compile IR to SPIR-V - auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding); + program->spv = Shader::Backend::SPIRV::EmitSPIRV(profile, program->pgm, binding); if (Config::dumpShaders()) { - DumpShader(spv_code, hash, stage, "spv"); + DumpShader(program->spv, hash, stage, "spv"); } - stages[i] = CompileSPV(spv_code, instance.GetDevice()); - infos[i] = &programs[i].info; + + // Compile module and set name to hash in renderdoc + program->end_binding = binding; + program->module = CompileSPV(program->spv, instance.GetDevice()); + const auto name = fmt::format("{}_{:#x}", stage, hash); + Vulkan::SetObjectName(instance.GetDevice(), program->module, name); + + // Cache program + const auto [it, _] = program_cache.emplace(lookup_hash, std::move(program)); + programs[i] = it.value().get(); } catch (const Shader::Exception& e) { UNREACHABLE_MSG("{}", e.what()); } - - // Set module name to hash in renderdoc - const auto name = fmt::format("{}_{:#x}", stage, hash); - Vulkan::SetObjectName(instance.GetDevice(), stages[i], name); } return std::make_unique(instance, scheduler, graphics_key, *pipeline_cache, - infos, stages); + programs); } std::unique_ptr PipelineCache::CreateComputePipeline() { @@ -306,25 +334,31 @@ std::unique_ptr PipelineCache::CreateComputePipeline() { // Recompile shader to IR. try { + auto program = std::make_unique(); LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key); Shader::Info info = MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs); info.pgm_base = cs_pgm.Address(); - auto program = + info.pgm_hash = compute_key; + program->pgm = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile); // Compile IR to SPIR-V u32 binding{}; - const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, program, binding); + program->spv = Shader::Backend::SPIRV::EmitSPIRV(profile, program->pgm, binding); if (Config::dumpShaders()) { - DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv"); + DumpShader(program->spv, compute_key, Shader::Stage::Compute, "spv"); } - const auto module = CompileSPV(spv_code, instance.GetDevice()); - // Set module name to hash in renderdoc + + // Compile module and set name to hash in renderdoc + program->module = CompileSPV(program->spv, instance.GetDevice()); const auto name = fmt::format("cs_{:#x}", compute_key); - Vulkan::SetObjectName(instance.GetDevice(), module, name); - return std::make_unique(instance, scheduler, *pipeline_cache, - &program.info, compute_key, module); + Vulkan::SetObjectName(instance.GetDevice(), program->module, name); + + // Cache program + const auto [it, _] = program_cache.emplace(compute_key, std::move(program)); + return std::make_unique(instance, scheduler, *pipeline_cache, compute_key, + it.value().get()); } catch (const Shader::Exception& e) { UNREACHABLE_MSG("{}", e.what()); return nullptr; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index d41723ec8..8f3b806cb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -5,6 +5,7 @@ #include #include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -43,10 +44,10 @@ private: AmdGpu::Liverpool* liverpool; vk::UniquePipelineCache pipeline_cache; vk::UniquePipelineLayout pipeline_layout; - tsl::robin_map module_map; - std::array stages{}; + tsl::robin_map> program_cache; tsl::robin_map> compute_pipelines; tsl::robin_map> graphics_pipelines; + std::array programs{}; Shader::Profile profile{}; GraphicsPipelineKey graphics_key{}; u64 compute_key{}; diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 0915514b8..c73a8139d 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -157,6 +157,10 @@ std::vector GetInstanceExtensions(Frontend::WindowSystemType window break; } +#ifdef __APPLE__ + extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); +#endif + if (window_type != Frontend::WindowSystemType::Headless) { extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } @@ -221,12 +225,61 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT vk::Bool32 enable_sync = enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - vk::LayerSettingEXT layer_set = { - .pLayerName = VALIDATION_LAYER_NAME, - .pSettingName = "validate_sync", - .type = vk::LayerSettingTypeEXT::eBool32, - .valueCount = 1, - .pValues = &enable_sync, + vk::Bool32 enable_gpuav = + enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; + const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled() + ? "GPU_BASED_GPU_ASSISTED" + : "GPU_BASED_NONE"; + const std::array layer_setings = { + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "validate_sync", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_sync, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "sync_queue_submit", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_sync, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "validate_gpu_based", + .type = vk::LayerSettingTypeEXT::eString, + .valueCount = 1, + .pValues = &gpuav_mode, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_reserve_binding_slot", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_descriptor_checks", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_validate_indirect_buffer", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_buffer_copies", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, }; vk::StructureChain instance_ci_chain = { @@ -236,10 +289,13 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT .ppEnabledLayerNames = layers.data(), .enabledExtensionCount = static_cast(extensions.size()), .ppEnabledExtensionNames = extensions.data(), +#ifdef __APPLE__ + .flags = vk::InstanceCreateFlagBits::eEnumeratePortabilityKHR, +#endif }, vk::LayerSettingsCreateInfoEXT{ - .settingCount = 1, - .pSettings = &layer_set, + .settingCount = layer_setings.size(), + .pSettings = layer_setings.data(), }, }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 51de09f7a..9ec8fe218 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -152,7 +152,8 @@ void Rasterizer::BeginRendering() { .stencil = regs.stencil_clear}}, }; texture_cache.TouchMeta(htile_address, false); - state.num_depth_attachments++; + state.has_depth = true; + state.has_stencil = regs.depth_control.stencil_enable; } scheduler.BeginRendering(state); } @@ -230,16 +231,42 @@ void Rasterizer::UpdateDepthStencilState() { cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable); } -void Rasterizer::ScopeMarkerBegin(const std::string& str) { +void Rasterizer::ScopeMarkerBegin(const std::string_view& str) { + if (!Config::isMarkersEnabled()) { + return; + } + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ - .pLabelName = str.c_str(), + .pLabelName = str.data(), }); } void Rasterizer::ScopeMarkerEnd() { + if (!Config::isMarkersEnabled()) { + return; + } + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.endDebugUtilsLabelEXT(); } +void Rasterizer::ScopedMarkerInsert(const std::string_view& str) { + if (!Config::isMarkersEnabled()) { + return; + } + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.insertDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ + .pLabelName = str.data(), + }); +} + +void Rasterizer::Breadcrumb(u64 id) { + if (!instance.HasNvCheckpoints()) { + return; + } + scheduler.CommandBuffer().setCheckpointNV(id); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 685ba6e00..a151ebc27 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -35,8 +35,10 @@ public: void DispatchDirect(); - void ScopeMarkerBegin(const std::string& str); + void ScopeMarkerBegin(const std::string_view& str); void ScopeMarkerEnd(); + void ScopedMarkerInsert(const std::string_view& str); + void Breadcrumb(u64 id); void InvalidateMemory(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fb64285f1..10ee6ea62 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -38,8 +38,8 @@ void Scheduler::BeginRendering(const RenderState& new_state) { .layerCount = 1, .colorAttachmentCount = render_state.num_color_attachments, .pColorAttachments = render_state.color_attachments.data(), - .pDepthAttachment = - render_state.num_depth_attachments ? &render_state.depth_attachment : nullptr, + .pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr, + .pStencilAttachment = render_state.has_stencil ? &render_state.depth_attachment : nullptr, }; current_cmdbuf.beginRendering(rendering_info); @@ -50,6 +50,8 @@ void Scheduler::EndRendering() { return; } is_rendering = false; + current_cmdbuf.endRendering(); + boost::container::static_vector barriers; for (size_t i = 0; i < render_state.num_color_attachments; ++i) { barriers.push_back(vk::ImageMemoryBarrier{ @@ -70,10 +72,35 @@ void Scheduler::EndRendering() { }, }); } - current_cmdbuf.endRendering(); + if (render_state.has_depth) { + barriers.push_back(vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, + .oldLayout = render_state.depth_attachment.imageLayout, + .newLayout = render_state.depth_attachment.imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = render_state.depth_image, + .subresourceRange = + { + .aspectMask = vk::ImageAspectFlagBits::eDepth | + (render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil + : vk::ImageAspectFlagBits::eNone), + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); + } + if (!barriers.empty()) { - current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eFragmentShader, + const auto src_stages = + vk::PipelineStageFlagBits::eColorAttachmentOutput | + (render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests | + vk::PipelineStageFlagBits::eEarlyFragmentTests + : vk::PipelineStageFlagBits::eNone); + current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader, vk::DependencyFlagBits::eByRegion, {}, {}, barriers); } } @@ -158,6 +185,13 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { try { instance.GetGraphicsQueue().submit(submit_info, info.fence); } catch (vk::DeviceLostError& err) { + if (instance.HasNvCheckpoints()) { + const auto checkpoint_data = instance.GetGraphicsQueue().getCheckpointData2NV(); + for (const auto& cp : checkpoint_data) { + LOG_CRITICAL(Render_Vulkan, "{}: {:#x}", vk::to_string(cp.stage), + reinterpret_cast(cp.pCheckpointMarker)); + } + } UNREACHABLE_MSG("Device lost during submit: {}", err.what()); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index b82d558ca..1140bfbc2 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -20,7 +20,8 @@ struct RenderState { vk::RenderingAttachmentInfo depth_attachment{}; vk::Image depth_image{}; u32 num_color_attachments{}; - u32 num_depth_attachments{}; + bool has_depth{}; + bool has_stencil{}; u32 width = std::numeric_limits::max(); u32 height = std::numeric_limits::max(); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 20c99e302..dcc19bf3b 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -37,6 +37,16 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { instance.GetPresentQueueFamilyIndex(), }; + const auto modes = instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface); + const auto find_mode = [&modes](vk::PresentModeKHR requested) { + const auto it = + std::find_if(modes.begin(), modes.end(), + [&requested](vk::PresentModeKHR mode) { return mode == requested; }); + + return it != modes.end(); + }; + const bool has_mailbox = find_mode(vk::PresentModeKHR::eMailbox); + const bool exclusive = queue_family_indices[0] == queue_family_indices[1]; const u32 queue_family_indices_count = exclusive ? 1u : 2u; const vk::SharingMode sharing_mode = @@ -55,7 +65,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { .pQueueFamilyIndices = queue_family_indices.data(), .preTransform = transform, .compositeAlpha = composite_alpha, - .presentMode = vk::PresentModeKHR::eMailbox, + .presentMode = has_mailbox ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eImmediate, .clipped = true, .oldSwapchain = nullptr, }; @@ -83,6 +93,7 @@ bool Swapchain::AcquireNextImage() { case vk::Result::eSuboptimalKHR: case vk::Result::eErrorSurfaceLostKHR: case vk::Result::eErrorOutOfDateKHR: + case vk::Result::eErrorUnknown: needs_recreation = true; break; default: diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index f7aef8471..bae4b89d4 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/config.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -116,6 +117,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { + mip_hashes.resize(info.resources.levels); ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case // the texture cache should re-create the resource with the usage requested @@ -154,6 +156,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, }; image.Create(image_ci); + + Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {:#x}:{:#x}", + info.guest_address, info.guest_size_bytes); } void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index b18f1002b..5a888346f 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -111,6 +111,7 @@ struct Image { vk::Flags pl_stage = vk::PipelineStageFlagBits::eAllCommands; vk::Flags access_mask = vk::AccessFlagBits::eNone; vk::ImageLayout layout = vk::ImageLayout::eUndefined; + boost::container::small_vector mip_hashes; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 94917be0a..17b78a6d5 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -189,6 +189,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice resources.layers = num_slices; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; usage.depth_target = true; + usage.stencil = + buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; guest_address = buffer.Address(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); @@ -260,7 +262,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { case AmdGpu::TilingMode::Display_MacroTiled: case AmdGpu::TilingMode::Texture_MacroTiled: case AmdGpu::TilingMode::Depth_MacroTiled: { - // ASSERT(!props.is_cube && !props.is_block); + ASSERT(!props.is_block); ASSERT(num_samples == 1); std::tie(mip_info.pitch, mip_info.size) = ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index ef6163c4e..cbf77f2d8 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -92,6 +92,8 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept { const auto base_format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); + range.base.layer = col_buffer.view.slice_start; + range.extent.layers = col_buffer.NumSlices(); format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); } diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 53596f8e3..6bc893b09 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -3,6 +3,7 @@ #include #include "common/assert.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -11,13 +12,11 @@ namespace VideoCore { -static constexpr u64 StreamBufferSize = 512_MB; static constexpr u64 PageShift = 12; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache& buffer_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, - staging{instance, scheduler, MemoryUsage::Upload, StreamBufferSize}, tile_manager{instance, scheduler} { ImageInfo info; info.pixel_format = vk::Format::eR8G8B8A8Unorm; @@ -31,9 +30,12 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& TextureCache::~TextureCache() = default; -void TextureCache::InvalidateMemory(VAddr address, size_t size) { +void TextureCache::InvalidateMemory(VAddr address, size_t size, bool from_compute) { std::unique_lock lock{mutex}; ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { + if (from_compute && !image.Overlaps(address, size)) { + return; + } // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. @@ -57,7 +59,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { } } -ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { +ImageId TextureCache::FindImage(const ImageInfo& info) { if (info.guest_address == 0) [[unlikely]] { return NULL_IMAGE_VIEW_ID; } @@ -87,12 +89,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { image_id = image_ids[image_ids.size() > 1 ? 1 : 0]; } - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) { - RefreshImage(image); - TrackImage(image, image_id); - } - return image_id; } @@ -119,6 +115,7 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(info); + UpdateImage(image_id); Image& image = slot_images[image_id]; auto& usage = image.info.usage; @@ -165,7 +162,8 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; - image.flags &= ~ImageFlagBits::CpuModified; + image.flags |= ImageFlagBits::GpuModified; + UpdateImage(image_id); image.Transit(vk::ImageLayout::eColorAttachmentOptimal, vk::AccessFlagBits::eColorAttachmentWrite | @@ -198,8 +196,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, const ImageViewInfo& view_info) { - const ImageId image_id = FindImage(image_info, false); + const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; + image.flags |= ImageFlagBits::GpuModified; image.flags &= ~ImageFlagBits::CpuModified; const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal @@ -224,26 +223,10 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, return RegisterImageView(image_id, view_info); } -void TextureCache::RefreshImage(Image& image) { +void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; - scheduler.EndRendering(); - - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); - - vk::Buffer buffer{staging.Handle()}; - u32 offset{0}; - - auto upload_buffer = tile_manager.TryDetile(image); - if (upload_buffer) { - buffer = *upload_buffer; - } else { - // Upload data to the staging buffer. - offset = staging.Copy(image.info.guest_address, image.info.guest_size_bytes, 16); - } - const auto& num_layers = image.info.resources.layers; const auto& num_mips = image.info.resources.levels; ASSERT(num_mips == image.info.mips_layout.size()); @@ -254,12 +237,23 @@ void TextureCache::RefreshImage(Image& image) { const u32 height = std::max(image.info.size.height >> m, 1u); const u32 depth = image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const auto& [_, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + + // Protect GPU modified resources from accidental reuploads. + if (True(image.flags & ImageFlagBits::GpuModified) && + !buffer_cache.IsRegionGpuModified(image.info.guest_address + mip_ofs, mip_size)) { + const u8* addr = std::bit_cast(image.info.guest_address); + const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size); + if (image.mip_hashes[m] == hash) { + continue; + } + image.mip_hashes[m] = hash; + } image_copy.push_back({ - .bufferOffset = offset + mip_ofs * num_layers, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), + .bufferOffset = mip_ofs * num_layers, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), .imageSubresource{ .aspectMask = vk::ImageAspectFlagBits::eColor, .mipLevel = m, @@ -271,6 +265,32 @@ void TextureCache::RefreshImage(Image& image) { }); } + if (image_copy.empty()) { + return; + } + + auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler; + sched_ptr->EndRendering(); + + const auto cmdbuf = sched_ptr->CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); + + const VAddr image_addr = image.info.guest_address; + const size_t image_size = image.info.guest_size_bytes; + vk::Buffer buffer{}; + u32 offset{}; + if (auto upload_buffer = tile_manager.TryDetile(image); upload_buffer) { + buffer = *upload_buffer; + } else { + const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size); + buffer = vk_buffer->Handle(); + offset = buf_offset; + } + + for (auto& copy : image_copy) { + copy.bufferOffset += offset; + } + cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 17a09898d..137b60141 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -38,13 +38,13 @@ public: ~TextureCache(); /// Invalidates any image in the logical page range. - void InvalidateMemory(VAddr address, size_t size); + void InvalidateMemory(VAddr address, size_t size, bool from_compute = false); /// Evicts any images that overlap the unmapped range. void UnmapMemory(VAddr cpu_addr, size_t size); /// Retrieves the image handle of the image with the provided attributes. - [[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true); + [[nodiscard]] ImageId FindImage(const ImageInfo& info); /// Retrieves an image view with the properties of the specified image descriptor. [[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info, @@ -58,8 +58,18 @@ public: [[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info, const ImageViewInfo& view_info); + /// Updates image contents if it was modified by CPU. + void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) { + Image& image = slot_images[image_id]; + if (False(image.flags & ImageFlagBits::CpuModified)) { + return; + } + RefreshImage(image, custom_scheduler); + TrackImage(image, image_id); + } + /// Reuploads image contents. - void RefreshImage(Image& image); + void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr); /// Retrieves the sampler that matches the provided S# descriptor. [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler); @@ -170,7 +180,6 @@ private: Vulkan::Scheduler& scheduler; BufferCache& buffer_cache; PageManager& tracker; - StreamBuffer staging; TileManager tile_manager; Common::SlotVector slot_images; Common::SlotVector slot_image_views; diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index d3a7d7960..6bb104a66 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -5,7 +5,6 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/texture_cache/image_view.h" -#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/tile_manager.h" #include "video_core/host_shaders/detile_m32x1_comp.h" @@ -187,6 +186,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR32Sfloat: case vk::Format::eR32Uint: case vk::Format::eR16G16Sfloat: + case vk::Format::eR16G16Unorm: return vk::Format::eR32Uint; case vk::Format::eBc1RgbaSrgbBlock: case vk::Format::eBc1RgbaUnormBlock: @@ -194,6 +194,8 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR32G32Sfloat: case vk::Format::eR32G32Uint: case vk::Format::eR16G16B16A16Unorm: + case vk::Format::eR16G16B16A16Uint: + case vk::Format::eR16G16B16A16Sfloat: return vk::Format::eR32G32Uint; case vk::Format::eBc2SrgbBlock: case vk::Format::eBc2UnormBlock: @@ -247,11 +249,11 @@ struct DetilerParams { u32 sizes[14]; }; -static constexpr size_t StreamBufferSize = 128_MB; +static constexpr size_t StreamBufferSize = 1_GB; TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) : instance{instance}, scheduler{scheduler}, - stream_buffer{instance, scheduler, MemoryUsage::Stream, StreamBufferSize} { + stream_buffer{instance, scheduler, MemoryUsage::Upload, StreamBufferSize} { static const std::array detiler_shaders{ HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP, HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP, @@ -397,7 +399,7 @@ std::optional TileManager::TryDetile(Image& image) { const u32 image_size = image.info.guest_size_bytes; const auto [in_buffer, in_offset] = [&] -> std::pair { // Use stream buffer for smaller textures. - if (image_size <= StreamBufferSize) { + if (image_size <= stream_buffer.GetFreeSize()) { u32 offset = stream_buffer.Copy(image.info.guest_address, image_size); return {stream_buffer.Handle(), offset}; }