Merge branch 'master' into spurs0optimize

This commit is contained in:
Elad 2024-11-03 09:48:29 +02:00 committed by GitHub
commit d09bc6a0dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
98 changed files with 1756 additions and 730 deletions

View file

@ -12,8 +12,8 @@ git submodule -q update --init $(awk '/path/ && !/llvm/ { print $3 }' .gitmodule
mkdir build && cd build || exit 1
export CC=clang
export CXX=clang++
export CC="${CLANG_BINARY}"
export CXX="${CLANGXX_BINARY}"
cmake .. \
-DCMAKE_INSTALL_PREFIX=/usr \
@ -27,8 +27,8 @@ cmake .. \
-DUSE_SYSTEM_FFMPEG=OFF \
-DUSE_DISCORD_RPC=ON \
-DOpenGL_GL_PREFERENCE=LEGACY \
-DLLVM_DIR=/opt/llvm/lib/cmake/llvm \
-DSTATIC_LINK_LLVM=ON \
-DBUILD_LLVM=OFF \
-G Ninja
ninja; build_status=$?;

View file

@ -42,6 +42,7 @@ cmake .. \
-DCMAKE_RANLIB="$RANLIB" \
-DUSE_SYSTEM_CURL=ON \
-DUSE_SDL=ON \
-DUSE_SYSTEM_SDL=ON \
-DUSE_SYSTEM_FFMPEG=OFF \
-DUSE_DISCORD_RPC=ON \
-DOpenGL_GL_PREFERENCE=LEGACY \
@ -61,5 +62,5 @@ shellcheck .ci/*.sh
} && SHOULD_DEPLOY="true" || SHOULD_DEPLOY="false"
if [ "$build_status" -eq 0 ] && [ "$SHOULD_DEPLOY" = "true" ]; then
.ci/deploy-linux-legacy.sh "x86_64"
.ci/deploy-linux.sh "x86_64"
fi

View file

@ -3,18 +3,18 @@
# shellcheck disable=SC2086
brew_arm64_install_packages() {
for pkg in "$@"; do
echo "Fetching bottle for $pkg..."
bottle_path="$("$BREW_ARM64_PATH/bin/brew" --cache --bottle-tag=arm64_sonoma "$pkg")"
echo "Fetching bottle for $pkg (arm64)..."
bottle_path="$("$BREW_ARM64_PATH/bin/brew" --cache --bottle-tag=arm64_ventura "$pkg")"
if [ ! -f "$bottle_path" ]; then
if ! "$BREW_ARM64_PATH/bin/brew" fetch --force --bottle-tag=arm64_sonoma "$pkg"; then
if ! "$BREW_ARM64_PATH/bin/brew" fetch --force --verbose --debug --bottle-tag=arm64_ventura "$pkg"; then
echo "Failed to fetch bottle for $pkg"
return 1
fi
bottle_path="$("$BREW_ARM64_PATH/bin/brew" --cache --bottle-tag=arm64_sonoma "$pkg")"
bottle_path="$("$BREW_ARM64_PATH/bin/brew" --cache --bottle-tag=arm64_ventura "$pkg")"
fi
echo "Installing $pkg..."
"$BREW_ARM64_PATH/bin/brew" install --ignore-dependencies "$bottle_path" || true
echo "Installing $pkg (arm64)..."
"$BREW_ARM64_PATH/bin/brew" install --force --force-bottle --ignore-dependencies "$bottle_path" || true
done
}
@ -23,9 +23,14 @@ export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
export HOMEBREW_NO_INSTALL_CLEANUP=1
/usr/local/bin/brew update
sudo rm -rf /usr/local/Cellar/curl /usr/local/opt/curl
/usr/local/bin/brew install -f --overwrite curl
/usr/local/bin/brew uninstall -f --ignore-dependencies ffmpeg
/usr/local/bin/brew install -f --build-from-source ffmpeg@5 || true
/usr/local/bin/brew install -f --overwrite python || true
/usr/local/bin/brew link --overwrite python || true
/usr/local/bin/brew install -f --overwrite nasm ninja p7zip ccache pipenv #create-dmg
/usr/local/bin/brew link -f curl || true
/usr/local/bin/brew install llvm@$LLVM_COMPILER_VER glew cmake sdl2 vulkan-headers coreutils
/usr/local/bin/brew link -f llvm@$LLVM_COMPILER_VER ffmpeg@5 || true
@ -34,12 +39,17 @@ sudo mkdir -p "$BREW_ARM64_PATH"
sudo chmod 777 "$BREW_ARM64_PATH"
curl -L https://github.com/Homebrew/brew/tarball/master | tar xz --strip 1 -C "$BREW_ARM64_PATH"
"$BREW_ARM64_PATH/bin/brew" update
brew_arm64_install_packages 0mq aom aribb24 ca-certificates cjson curl dav1d ffmpeg@5 fontconfig freetype freetype2 gettext glew gmp gnutls lame libbluray libidn2 libnettle libogg libpng librist libsodium libsoxr libtasn libtasn1 libunistring libvmaf libvorbis libvpx libx11 libxau libxcb libxdmcp llvm@$LLVM_COMPILER_VER mbedtls molten-vk nettle opencore-amr openjpeg openssl opus p11-kit pkg-config pkgconfig pzstd rav1e sdl2 snappy speex srt svt-av1 theora vulkan-headers webp x264 x265 xz z3 zeromq zmq zstd
#"$BREW_ARM64_PATH/bin/brew" update
# libvorbis requires Homebrew-installed curl, but we can't run it on x64, and we also need the aarch64 libs, so we swap the binary
brew_arm64_install_packages curl
mv /opt/homebrew1/opt/curl/bin/curl /opt/homebrew1/opt/curl/bin/curl.bak
ln -s /usr/local/opt/curl/bin/curl /opt/homebrew1/opt/curl/bin/curl
brew_arm64_install_packages 0mq aom aribb24 ca-certificates cjson dav1d ffmpeg@5 fontconfig freetype freetype2 gettext glew gmp gnutls lame libbluray libidn2 libnettle libogg libpng librist libsodium libsoxr libtasn libtasn1 libunistring libvmaf libvorbis libvpx libx11 libxau libxcb libxdmcp llvm@$LLVM_COMPILER_VER mbedtls molten-vk nettle opencore-amr openjpeg openssl opus p11-kit pkg-config pkgconfig pzstd rav1e sdl2 snappy speex srt svt-av1 theora vulkan-headers webp x264 x265 xz z3 zeromq zmq zstd
"$BREW_ARM64_PATH/bin/brew" link -f ffmpeg@5
# moltenvk based on commit for 1.2.10 release
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/0d9f25fbd1658e975d00bd0e8cccd20a0c2cb74b/Formula/m/molten-vk.rb
# moltenvk based on commit for 1.2.11 release
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/6bfc8950c696d1f952425e8af2a6248603dc0df9/Formula/m/molten-vk.rb
/usr/local/bin/brew install -f --overwrite ./molten-vk.rb
export CXX=clang++
export CC=clang
@ -125,6 +135,7 @@ export MACOSX_DEPLOYMENT_TARGET=13.0
-DLLVM_TARGET_ARCH=arm64 \
-DCMAKE_OSX_ARCHITECTURES=arm64 \
-DCMAKE_IGNORE_PATH="$BREW_X64_PATH/lib" \
-DCMAKE_IGNORE_PREFIX_PATH=/usr/local/opt \
-DCMAKE_SYSTEM_PROCESSOR=arm64 \
-DCMAKE_TOOLCHAIN_FILE=buildfiles/cmake/TCDarwinARM64.cmake \
-DCMAKE_CXX_FLAGS="-D__MAC_OS_X_VERSION_MIN_REQUIRED=130000" \

View file

@ -9,15 +9,15 @@ brew install -f --overwrite nasm ninja p7zip ccache pipenv #create-dmg
#/usr/sbin/softwareupdate --install-rosetta --agree-to-license
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
arch -x86_64 /usr/local/bin/brew update
arch -x86_64 /usr/local/bin/brew install -f --overwrite python@3.12 || arch -x86_64 /usr/local/bin/brew link --overwrite python@3.12
arch -x86_64 /usr/local/bin/brew install -f --overwrite python || arch -x86_64 /usr/local/bin/brew link --overwrite python
arch -x86_64 /usr/local/bin/brew uninstall -f --ignore-dependencies ffmpeg
arch -x86_64 /usr/local/bin/brew install -f --build-from-source ffmpeg@5
arch -x86_64 /usr/local/bin/brew reinstall -f --build-from-source gnutls freetype
arch -x86_64 /usr/local/bin/brew install llvm@$LLVM_COMPILER_VER glew cmake sdl2 vulkan-headers coreutils
arch -x86_64 /usr/local/bin/brew link -f llvm@$LLVM_COMPILER_VER ffmpeg@5
# moltenvk based on commit for 1.2.10 release
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/0d9f25fbd1658e975d00bd0e8cccd20a0c2cb74b/Formula/m/molten-vk.rb
# moltenvk based on commit for 1.2.11 release
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/6bfc8950c696d1f952425e8af2a6248603dc0df9/Formula/m/molten-vk.rb
arch -x86_64 /usr/local/bin/brew install -f --overwrite ./molten-vk.rb
export CXX=clang++
export CC=clang

View file

@ -1,48 +0,0 @@
#!/bin/sh -ex
cd build || exit 1
if [ "$DEPLOY_APPIMAGE" = "true" ]; then
DESTDIR=AppDir ninja install
curl -fsSLo /usr/bin/linuxdeploy https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage
chmod +x /usr/bin/linuxdeploy
curl -fsSLo /usr/bin/linuxdeploy-plugin-qt https://github.com/linuxdeploy/linuxdeploy-plugin-qt/releases/download/continuous/linuxdeploy-plugin-qt-x86_64.AppImage
chmod +x /usr/bin/linuxdeploy-plugin-qt
curl -fsSLo linuxdeploy-plugin-checkrt.sh https://github.com/linuxdeploy/linuxdeploy-plugin-checkrt/releases/download/continuous/linuxdeploy-plugin-checkrt-x86_64.sh
chmod +x ./linuxdeploy-plugin-checkrt.sh
export EXTRA_PLATFORM_PLUGINS="libqwayland-egl.so;libqwayland-generic.so"
export EXTRA_QT_PLUGINS="svg;wayland-decoration-client;wayland-graphics-integration-client;wayland-shell-integration"
APPIMAGE_EXTRACT_AND_RUN=1 linuxdeploy --appdir AppDir --plugin qt
# Remove libwayland-client because it has platform-dependent exports and breaks other OSes
rm -f ./AppDir/usr/lib/libwayland-client.so*
# Remove libvulkan because it causes issues with gamescope
rm -f ./AppDir/usr/lib/libvulkan.so*
# Remove git directory containing local commit history file
rm -rf ./AppDir/usr/share/rpcs3/git
./linuxdeploy-plugin-checkrt.sh --appdir AppDir
linuxdeploy --appimage-extract
./squashfs-root/plugins/linuxdeploy-plugin-appimage/usr/bin/appimagetool AppDir -g
COMM_TAG=$(awk '/version{.*}/ { printf("%d.%d.%d", $5, $6, $7) }' ../rpcs3/rpcs3_version.cpp)
COMM_COUNT="$(git rev-list --count HEAD)"
COMM_HASH="$(git rev-parse --short=8 HEAD)"
RPCS3_APPIMAGE="rpcs3-v${COMM_TAG}-${COMM_COUNT}-${COMM_HASH}_linux64.AppImage"
mv ./RPCS3*.AppImage "$RPCS3_APPIMAGE"
# If we're building using a CI, let's copy over the AppImage artifact
if [ -n "$BUILD_ARTIFACTSTAGINGDIRECTORY" ]; then
cp "$RPCS3_APPIMAGE" "$ARTDIR"
fi
FILESIZE=$(stat -c %s ./rpcs3*.AppImage)
SHA256SUM=$(sha256sum ./rpcs3*.AppImage | awk '{ print $1 }')
echo "${SHA256SUM};${FILESIZE}B" > "$RELEASE_MESSAGE"
fi

View file

@ -17,7 +17,7 @@ if [ "$DEPLOY_APPIMAGE" = "true" ]; then
export EXTRA_PLATFORM_PLUGINS="libqwayland-egl.so;libqwayland-generic.so"
export EXTRA_QT_PLUGINS="svg;wayland-decoration-client;wayland-graphics-integration-client;wayland-shell-integration;waylandcompositor"
APPIMAGE_EXTRACT_AND_RUN=1 linuxdeploy --appdir AppDir --plugin qt
APPIMAGE_EXTRACT_AND_RUN=1 linuxdeploy --appdir AppDir --plugin qt --plugin checkrt
# Remove libwayland-client because it has platform-dependent exports and breaks other OSes
rm -f ./AppDir/usr/lib/libwayland-client.so*
@ -28,8 +28,6 @@ if [ "$DEPLOY_APPIMAGE" = "true" ]; then
# Remove git directory containing local commit history file
rm -rf ./AppDir/usr/share/rpcs3/git
./linuxdeploy-plugin-checkrt.sh --appdir AppDir
linuxdeploy --appimage-extract
./squashfs-root/plugins/linuxdeploy-plugin-appimage/usr/bin/appimagetool AppDir -g

View file

@ -132,7 +132,7 @@ linux_aarch64_task:
matrix:
- name: Cirrus Linux AArch64 Clang
arm_container:
image: 'docker.io/kd117/rpcs3-ci-aarch64:latest'
image: 'docker.io/rpcs3/rpcs3-ci-focal-aarch64:1.0'
cpu: 8
memory: 8G
clang_script:

View file

@ -4,7 +4,7 @@ include(ExternalProject)
ExternalProject_Add(moltenvk
GIT_REPOSITORY https://github.com/KhronosGroup/MoltenVK.git
GIT_TAG edbdcf0
GIT_TAG 81541f6
BUILD_IN_SOURCE 1
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK
CONFIGURE_COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/fetchDependencies" --macos

View file

@ -5,8 +5,8 @@ if(USE_SYSTEM_OPENAL)
target_include_directories(3rdparty_openal INTERFACE ${OPENAL_INCLUDE_DIR})
target_link_libraries(3rdparty_openal INTERFACE ${OPENAL_LIBRARY})
else()
option(ALSOFT_UTILS "Build utility programs" OFF)
option(ALSOFT_EXAMPLES "Build example programs" OFF)
option(ALSOFT_UTILS "Build utility programs" OFF)
option(ALSOFT_EXAMPLES "Build example programs" OFF)
add_subdirectory(openal-soft EXCLUDE_FROM_ALL)
add_library(3rdparty_openal INTERFACE)
target_link_libraries(3rdparty_openal INTERFACE OpenAL::OpenAL)

View file

@ -17,7 +17,10 @@ if(WITH_LLVM)
option(LLVM_CCACHE_BUILD OFF)
set(LLVM_ENABLE_WARNINGS OFF CACHE BOOL "Enable compiler warnings.")
if(WIN32 AND COMPILER_X86)
# For Windows x86 (not Windows AArch64).
# Check on MSVC is needed due to COMPILER_X86, COMPILER_ARM etc. are not set/supported by the MSVC compiler, if used.
# Furthermore, the MSVC compiler is not available/supported on Windows AArch64
if(WIN32 AND (COMPILER_X86 OR MSVC))
set(LLVM_USE_INTEL_JITEVENTS ON)
endif()
@ -70,12 +73,18 @@ if(WITH_LLVM)
set(LLVM_TARGETS_TO_BUILD "X86" CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
endif()
endif()
if((WIN32 AND BUILD_LLVM) OR (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND COMPILER_X86))
# For Windows x86 (not Windows AArch64) only when BUILD_LLVM is enabled and
# for Linux x86 (not Linux AArch64) even if BUILD_LLVM is disabled (precompiled llvm used)
if(LLVM_USE_INTEL_JITEVENTS OR (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND COMPILER_X86))
list (APPEND LLVM_ADDITIONAL_LIBS IntelJITEvents)
endif()
# For Linux even if BUILD_LLVM is disabled (precompiled llvm used)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
list (APPEND LLVM_ADDITIONAL_LIBS PerfJITEvents)
endif()
llvm_map_components_to_libnames(LLVM_LIBS
${LLVM_TARGETS_TO_BUILD}
${LLVM_ADDITIONAL_LIBS}

View file

@ -7,25 +7,40 @@ Other instructions may be found [here](https://wiki.rpcs3.net/index.php?title=Bu
### Windows 10 or later
* [CMake 3.28.0+](https://www.cmake.org/download/) (add to PATH)
* [Python 3.6+](https://www.python.org/downloads/) (add to PATH)
* [Qt 6.7.3](https://www.qt.io/download-qt-installer)
* [Visual Studio 2022](https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community) (or at least Visual Studio 2019 16.11.xx+ as C++20 is not included in previous versions)
* [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/windows/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0.
The following tools are required to build RPCS3 on Windows 10 or later:
- [Visual Studio 2022](https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community) (or at least Visual Studio 2019 16.11.xx+ as C++20 is not included in previous versions)
- **Optional** - [CMake 3.28.0+](https://www.cmake.org/download/) (add to PATH)
**Either add the** `QTDIR` **environment variable, e.g.** `<QtInstallFolder>\6.7.3\msvc2019_64\` **, or use the [Visual Studio Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.QtVisualStudioTools2019)**
**NOTES:**
- **Visual Studio 2022** integrates **CMake 3.29+** and it also supports both the `sln` solution (`.sln`, `.vcxproj`) and `CMake` solution (`CMakeLists.txt`, `CMakePresets.json`).
See sections [Building with Visual Studio sln solution](#building-with-visual-studio-sln-solution) and [Building with Visual Studio CMake solution](#building-with-visual-studio-cmake-solution)
on how to build the project with **Visual Studio**.
- Install and use this standalone **CMake** tool just in case of your preference. See section [Building with standalone CMake tool](#building-with-standalone-cmake-tool) on how to build the project
with standalone **CMake** tool.
**NOTE: If you have issues with the Qt plugin, you may want to uninstall the Qt Plugin and install the [Legacy Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.LEGACYQtVisualStudioTools2019) instead.**
- [Python 3.6+](https://www.python.org/downloads/) (add to PATH)
- [Qt 6.7.3](https://www.qt.io/download-qt-installer) In case you can't download from the official installer, you can use [Another Qt installer](https://github.com/miurahr/aqtinstall) (In that case you will need to manually add the "qtmultimedia" module when installing Qt)
- [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (see "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/windows/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0.
The `sln` solution available only on **Visual Studio** is the preferred building solution. It easily allows to build the **RPCS3** application in `Release` and `Debug` mode.
In order to build **RPCS3** with the `sln` solution (with **Visual Studio**), **Qt** libs need to be detected. To detect the libs:
- add and set the `QTDIR` environment variable, e.g. `<QtInstallFolder>\6.7.3\msvc2019_64\`
- or use the [Visual Studio Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.QtVisualStudioTools2019)
**NOTE:** If you have issues with the **Visual Studio Qt Plugin**, you may want to uninstall it and install the [Legacy Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.LEGACYQtVisualStudioTools2019) instead.
In order to build **RPCS3** with the `CMake` solution (with both **Visual Studio** and standalone **CMake** tool):
- add and set the `CMAKE_PREFIX_PATH` environment variable to the **Qt** libs path, e.g. `<QtInstallFolder>\6.7.3\msvc2019_64\`
### Linux
These are the essentials tools to build RPCS3 on Linux. Some of them can be installed through your favorite package manager.
* Clang 17+ or GCC 13+
* [CMake 3.28.0+](https://www.cmake.org/download/)
* [Qt 6.7.3](https://www.qt.io/download-qt-installer)
* [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0.
* [SDL2](https://github.com/libsdl-org/SDL/releases) (for the FAudio backend)
These are the essentials tools to build RPCS3 on Linux. Some of them can be installed through your favorite package manager:
- Clang 17+ or GCC 13+
- [CMake 3.28.0+](https://www.cmake.org/download/)
- [Qt 6.7.3](https://www.qt.io/download-qt-installer)
- [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0.
- [SDL2](https://github.com/libsdl-org/SDL/releases) (for the FAudio backend)
**If you have an NVIDIA GPU, you may need to install the libglvnd package.**
@ -38,6 +53,7 @@ These are the essentials tools to build RPCS3 on Linux. Some of them can be inst
sudo apt-get install build-essential libasound2-dev libpulse-dev libopenal-dev libglew-dev zlib1g-dev libedit-dev libvulkan-dev libudev-dev git libevdev-dev libsdl2-2.0 libsdl2-dev libjack-dev libsndio-dev
Ubuntu is usually horrendously out of date, and some packages need to be downloaded by hand. This part is for Qt, GCC, Vulkan, and CMake
##### Qt PPA
Ubuntu usually does not have a new enough Qt package to suit rpcs3's needs. There is currently no PPA available to work around this.
@ -65,6 +81,7 @@ sudo apt install vulkan-sdk
```
##### CMake
```
. /etc/os-release
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | sudo apt-key add -
@ -95,26 +112,64 @@ git submodule update --init
### Windows
#### Configuring the Qt plugin (if used)
#### Building with Visual Studio sln solution
1) Go to `Extensions->Qt VS Tools->Qt Versions`.
2) Add the path to your Qt installation with compiler e.g. `<QtInstallFolder>\6.7.3\msvc2019_64`, version will fill in automatically.
3) Go to `Extensions->Qt VS Tools->Options->Legacy Project Format`. (Only available in the legacy Qt plugin)
4) Set `Build: Run pre-build setup` to `true`. (Only available in the legacy Qt plugin)
Start **Visual Studio**, click on `Open a project or solution` and select the `rpcs3.sln` file inside the RPCS3's root folder
#### Building the projects
##### Configuring the Qt Plugin (if used)
Open `rpcs3.sln`. The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`)
1) go to `Extensions->Qt VS Tools->Qt Versions`
2) add the path to your Qt installation with compiler e.g. `<QtInstallFolder>\6.7.3\msvc2019_64`, version will fill in automatically
3) go to `Extensions->Qt VS Tools->Options->Legacy Project Format`. (Only available in the **Legacy Qt Plugin**)
4) set `Build: Run pre-build setup` to `true`. (Only available in the **Legacy Qt Plugin**)
You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.1/llvmlibs_mt.7z) and extract them to `3rdparty\llvm\`, as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/latest/download/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled).
##### Building the projects
If you're not using the precompiled libs, build the following projects in *__BUILD_BEFORE* folder by right-clicking on a project > *Build*.:
* glslang
* **Either** llvm_build **or** llvm_build_clang_cl
**NOTE:** The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`)
Afterwards:
You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.1/llvmlibs_mt.7z) and extract them to `3rdparty\llvm\`,
as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/latest/download/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation
time (unoptimised/debug libs are currently not available precompiled).
`Build > Build Solution`
If you're not using the precompiled libs, those dependency libs need to be compiled first. From the `Solution Explorer` panel:
1) expand `__BUILD_BEFORE`
2) from the `Solution Configurations` drop-down menu, select `Release` (select `Debug` if you want to build in `Debug` mode)
3) one after another, right-click on the following projects and then click on `Build` to build the selected lib:
- `glslang`
- either `llvm_build`
- or `llvm_build_clang_cl` (if you installed **clang** on VS)
In order to build the **RPCS3** application:
1) from the `Solution Configurations` drop-down menu, select `Release` (select `Debug` if you want to build in `Debug` mode)
**NOTE:** In case you previously compiled the dependency libs under `__BUILD_BEFORE`, you have also to select the same build configuration (e.g. `Release`, if you compiled the dependency libs in `Release` mode)
2) click on `Build` menu and then on `Build Solution`
3) once the build is completed, the **RPCS3** application will be available under the `<rpcs3_root>\bin` folder
#### Building with Visual Studio CMake solution
Start **Visual Studio**, click on `Open a local folder` and select the RPCS3's root folder
Once the project is open on VS, from the `Solution Explorer` panel:
1) right-click on `rpcs3` and then click on `Switch to CMake Targets View`
2) from the `Configuration` drop-down menu, select `msvc-release` (select `msvc-debug` if you want to build in `Debug` mode)
3) right-click on `CMakeLists.txt Project` and then click on `Configure Cache`
4) once the cache is created, the `rpcs3 project` will be available
5) right-click on `rpcs3 Project` and then click on `Build All`, or click on `Build` menu and then on `Build All`
6) once the build is completed, the **RPCS3** application will be available under the `<rpcs3_root>\build-msvc\bin` folder
#### Building with standalone CMake tool
In case you preferred to install and use the standalone **CMake** tool:
1) move on the RPCS3's root folder
2) execute the following commands to create the cache and to build the application (for the build, use `--preset msvc-debug` if you want to build in `Debug` mode), respectively:
```
cmake --preset msvc
cmake --build --preset msvc-release
```
3) once the build is completed, the **RPCS3** application will be available under the `<rpcs3_root>\build-msvc\bin` folder
### Linux
@ -122,7 +177,7 @@ While still in the project root:
1) `cd .. && mkdir --parents rpcs3_build && cd rpcs3_build`
2) `cmake ../rpcs3/ && make` or `CXX=g++-13 CC=gcc-13 cmake ../rpcs3/ && make` to force these compilers
3) Run RPCS3 with `./bin/rpcs3`
3) run RPCS3 with `./bin/rpcs3`
If compiling for ARM, pass the flag `-DUSE_NATIVE_INSTRUCTIONS=OFF` to the cmake command. This resolves some Neon errors when compiling our SIMD headers.

View file

@ -25,7 +25,7 @@ option(USE_SYSTEM_ZLIB "Prefer system ZLIB instead of the builtin one" ON)
option(USE_VULKAN "Vulkan render backend" ON)
option(USE_PRECOMPILED_HEADERS "Use precompiled headers" OFF)
option(USE_SDL "Enables SDL input handler" OFF)
option(USE_SYSTEM_SDL "Prefer system SDL instead of the builtin one" OFF)
option(USE_SYSTEM_SDL "Prefer system SDL instead of the builtin one" ON)
option(USE_SYSTEM_FFMPEG "Prefer system ffmpeg instead of the prebuild one" OFF)
option(USE_SYSTEM_OPENAL "Prefer system OpenAL instead of the prebuild one" ON)
option(USE_SYSTEM_CURL "Prefer system Curl instead of the prebuild one" ON)

View file

@ -7,12 +7,12 @@
"binaryDir": "build-gcc",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"USE_NATIVE_INSTRUCTIONS": "ON",
"USE_PRECOMPILED_HEADERS": "ON",
"USE_FAUDIO": "OFF",
"USE_SYSTEM_CURL": "OFF",
"USE_SYSTEM_ZLIB": "OFF",
"USE_SYSTEM_LIBPNG": "OFF",
"USE_NATIVE_INSTRUCTIONS": "ON",
"USE_PRECOMPILED_HEADERS": "ON",
"BUILD_LLVM": "OFF",
"STATIC_LINK_LLVM": "ON"
}
@ -23,13 +23,13 @@
"binaryDir": "build-clang64",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"USE_NATIVE_INSTRUCTIONS": "ON",
"USE_PRECOMPILED_HEADERS": "ON",
"USE_FAUDIO": "OFF",
"USE_SYSTEM_CURL": "OFF",
"USE_SYSTEM_ZLIB": "OFF",
"USE_SYSTEM_LIBPNG": "OFF",
"LLVM_ENABLE_LIBCXX": "ON",
"USE_NATIVE_INSTRUCTIONS": "ON",
"USE_PRECOMPILED_HEADERS": "ON",
"BUILD_LLVM": "OFF",
"STATIC_LINK_LLVM": "ON"
},
@ -48,11 +48,14 @@
"strategy": "external"
},
"cacheVariables": {
"CMAKE_CONFIGURATION_TYPES": "Debug;Release",
"CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}",
"USE_FAUDIO": "OFF",
"USE_PRECOMPILED_HEADERS": "ON",
"USE_SYSTEM_ZLIB": "OFF",
"USE_NATIVE_INSTRUCTIONS": "ON",
"USE_PRECOMPILED_HEADERS": "ON",
"USE_FAUDIO": "OFF",
"USE_SYSTEM_CURL": "OFF",
"USE_SYSTEM_ZLIB": "OFF",
"USE_SYSTEM_OPENAL": "OFF",
"BUILD_LLVM": "ON",
"STATIC_LINK_LLVM": "ON"
},
@ -64,5 +67,17 @@
}
}
}
],
"buildPresets": [
{
"name": "msvc-debug",
"configurePreset": "msvc",
"configuration": "Debug"
},
{
"name": "msvc-release",
"configurePreset": "msvc",
"configuration": "Release"
}
]
}

View file

@ -2412,6 +2412,13 @@ fs::file fs::make_gather(std::vector<fs::file> files)
return result;
}
std::string fs::generate_neighboring_path(std::string_view source, [[maybe_unused]] u64 seed)
{
// Seed is currently not used
return fmt::format(u8"%s/%s.%s.tmp", get_parent_dir(source), source.substr(source.find_last_of(fs::delim) + 1), fmt::base57(utils::get_unique_tsc()));
}
bool fs::pending_file::open(std::string_view path)
{
file.close();
@ -2430,7 +2437,7 @@ bool fs::pending_file::open(std::string_view path)
do
{
m_path = fmt::format(u8"%s/%s.%s.tmp", get_parent_dir(path), path.substr(path.find_last_of(fs::delim) + 1), fmt::base57(utils::get_unique_tsc()));
m_path = fs::generate_neighboring_path(path, 0);
if (file.open(m_path, fs::create + fs::write + fs::read + fs::excl))
{
@ -2475,7 +2482,6 @@ bool fs::pending_file::commit(bool overwrite)
{
file.sync();
}
#endif
#ifdef _WIN32
@ -2486,16 +2492,130 @@ bool fs::pending_file::commit(bool overwrite)
disp.DeleteFileW = false;
ensure(SetFileInformationByHandle(file.get_handle(), FileDispositionInfo, &disp, sizeof(disp)));
}
std::vector<std::wstring> hardlink_paths;
const auto ws1 = to_wchar(m_path);
const HANDLE file_handle = !overwrite ? INVALID_HANDLE_VALUE
: CreateFileW(ws1.get(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, nullptr, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, nullptr);
while (file_handle != INVALID_HANDLE_VALUE)
{
// Get file ID (used to check for hardlinks)
BY_HANDLE_FILE_INFORMATION file_info;
if (!GetFileInformationByHandle(file_handle, &file_info) || file_info.nNumberOfLinks == 1)
{
CloseHandle(file_handle);
break;
}
// Buffer for holding link name
std::wstring link_name_buffer(MAX_PATH, wchar_t{});
DWORD buffer_size{};
HANDLE find_handle = INVALID_HANDLE_VALUE;
while (true)
{
buffer_size = static_cast<DWORD>(link_name_buffer.size() - 1);
find_handle = FindFirstFileNameW(ws1.get(), 0, &buffer_size, link_name_buffer.data());
if (find_handle != INVALID_HANDLE_VALUE || GetLastError() != ERROR_MORE_DATA)
{
break;
}
link_name_buffer.resize(buffer_size + 1);
}
if (find_handle != INVALID_HANDLE_VALUE)
{
const std::wstring_view ws1_sv = ws1.get();
while (true)
{
if (link_name_buffer.c_str() != ws1_sv)
{
// Note: link_name_buffer is a buffer which may contain zeroes so truncate it
hardlink_paths.push_back(link_name_buffer.c_str());
}
buffer_size = static_cast<DWORD>(link_name_buffer.size() - 1);
if (!FindNextFileNameW(find_handle, &buffer_size, link_name_buffer.data()))
{
if (GetLastError() != ERROR_MORE_DATA)
{
break;
}
link_name_buffer.resize(buffer_size + 1);
}
}
}
// Clean up
FindClose(find_handle);
CloseHandle(file_handle);
break;
}
if (!hardlink_paths.empty())
{
// REPLACEFILE_WRITE_THROUGH is not supported
file.sync();
}
#endif
file.close();
#ifdef _WIN32
const auto ws1 = to_wchar(m_path);
const auto ws2 = to_wchar(m_dest);
const auto wdest = to_wchar(m_dest);
if (MoveFileExW(ws1.get(), ws2.get(), overwrite ? MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH : MOVEFILE_WRITE_THROUGH))
bool ok = false;
if (hardlink_paths.empty())
{
ok = MoveFileExW(ws1.get(), wdest.get(), overwrite ? MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH : MOVEFILE_WRITE_THROUGH);
}
else
{
ok = ReplaceFileW(ws1.get(), wdest.get(), nullptr, 0, nullptr, nullptr);
}
if (ok)
{
for (const std::wstring& link_name : hardlink_paths)
{
std::unique_ptr<wchar_t[]> write_temp_path;
do
{
write_temp_path = to_wchar(fs::generate_neighboring_path(m_dest, 0));
// Generate a temporary hard linke
if (CreateHardLinkW(wdest.get(), write_temp_path.get(), nullptr))
{
if (MoveFileExW(write_temp_path.get(), link_name.data(), MOVEFILE_REPLACE_EXISTING))
{
// Success
write_temp_path.reset();
break;
}
break;
}
}
while (fs::g_tls_error == fs::error::exist); // Only retry if failed due to existing file
if (write_temp_path)
{
// Failure
g_tls_error = to_error(GetLastError());
return false;
}
}
// Disable the destructor
m_path.clear();
return true;
@ -2557,6 +2677,17 @@ void fmt_class_string<fs::seek_mode>::format(std::string& out, u64 arg)
template<>
void fmt_class_string<fs::error>::format(std::string& out, u64 arg)
{
if (arg == static_cast<u64>(fs::error::unknown))
{
// Note: may not be the correct error code because it only prints the last
#ifdef _WIN32
fmt::append(out, "Unknown error [errno=%d]", GetLastError());
#else
fmt::append(out, "Unknown error [errno=%d]", errno);
#endif
return;
}
format_enum(out, arg, [](auto arg)
{
switch (arg)

View file

@ -601,6 +601,8 @@ namespace fs
// Temporary directory
const std::string& get_temp_dir();
std::string generate_neighboring_path(std::string_view source, u64 seed);
// Unique pending file creation destined to be renamed to the destination file
struct pending_file
{

View file

@ -38,13 +38,13 @@ jobs:
displayName: ccache
- bash: |
docker pull --quiet rpcs3/rpcs3-ci-focal:1.7.1
docker pull --quiet rpcs3/rpcs3-ci-focal:1.9
docker run \
-v $(pwd):/rpcs3 \
--env-file .ci/docker.env \
-v $CCACHE_DIR:/root/.ccache \
-v $BUILD_ARTIFACTSTAGINGDIRECTORY:/root/artifacts \
rpcs3/rpcs3-ci-focal:1.7.1 \
rpcs3/rpcs3-ci-focal:1.9 \
/rpcs3/.ci/build-linux.sh
displayName: Docker setup and build

View file

@ -132,29 +132,29 @@ if(APPLE)
endif()
qt_finalize_target(rpcs3)
add_custom_command(TARGET rpcs3 POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_SOURCE_DIR}/rpcs3.icns $<TARGET_FILE_DIR:rpcs3>/../Resources/rpcs3.icns
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/bin/Icons $<TARGET_FILE_DIR:rpcs3>/../Resources/Icons
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/bin/GuiConfigs $<TARGET_FILE_DIR:rpcs3>/../Resources/GuiConfigs
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/bin/git $<TARGET_FILE_DIR:rpcs3>/../Resources/git
COMMAND "${MACDEPLOYQT_EXECUTABLE}" "${PROJECT_BINARY_DIR}/bin/rpcs3.app" "${QT_DEPLOY_FLAGS}")
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/rpcs3.icns $<TARGET_FILE_DIR:rpcs3>/../Resources/rpcs3.icns
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/Icons $<TARGET_FILE_DIR:rpcs3>/../Resources/Icons
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/GuiConfigs $<TARGET_FILE_DIR:rpcs3>/../Resources/GuiConfigs
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/git $<TARGET_FILE_DIR:rpcs3>/../Resources/git
COMMAND "${MACDEPLOYQT_EXECUTABLE}" "${PROJECT_BINARY_DIR}/bin/rpcs3.app" "${QT_DEPLOY_FLAGS}")
elseif(UNIX)
add_custom_command(TARGET rpcs3 POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/bin/Icons $<TARGET_FILE_DIR:rpcs3>/Icons)
add_custom_command(TARGET rpcs3 POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/bin/GuiConfigs $<TARGET_FILE_DIR:rpcs3>/GuiConfigs)
add_custom_command(TARGET rpcs3 POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/bin/git $<TARGET_FILE_DIR:rpcs3>/git)
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/Icons $<TARGET_FILE_DIR:rpcs3>/Icons
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/GuiConfigs $<TARGET_FILE_DIR:rpcs3>/GuiConfigs
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/git $<TARGET_FILE_DIR:rpcs3>/git)
elseif(WIN32)
if(MSVC)
add_custom_command(TARGET rpcs3 POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/3rdparty/OpenAL/openal-soft/$<CONFIG>/OpenAL32.dll $<TARGET_FILE_DIR:rpcs3>)
endif()
add_custom_command(TARGET rpcs3 POST_BUILD
COMMAND "${CMAKE_COMMAND}" -E copy_directory "${CMAKE_SOURCE_DIR}/bin" "$<TARGET_FILE_DIR:rpcs3>"
COMMAND "${WINDEPLOYQT_EXECUTABLE}" --no-compiler-runtime --no-opengl-sw --no-patchqt --no-translations --no-quick --no-system-d3d-compiler --no-quick-import --plugindir "$<IF:$<CXX_COMPILER_ID:MSVC>,$<TARGET_FILE_DIR:rpcs3>/plugins,$<TARGET_FILE_DIR:rpcs3>/share/qt6/plugins>" --verbose 0 "$<TARGET_FILE:rpcs3>")
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/Icons $<TARGET_FILE_DIR:rpcs3>/Icons
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/GuiConfigs $<TARGET_FILE_DIR:rpcs3>/GuiConfigs
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/git $<TARGET_FILE_DIR:rpcs3>/git
COMMAND "${WINDEPLOYQT_EXECUTABLE}" --no-compiler-runtime --no-opengl-sw --no-patchqt
--no-translations --no-quick --no-system-d3d-compiler --no-quick-import
--plugindir "$<IF:$<CXX_COMPILER_ID:MSVC>,$<TARGET_FILE_DIR:rpcs3>/plugins,$<TARGET_FILE_DIR:rpcs3>/share/qt6/plugins>"
--verbose 0 "$<TARGET_FILE:rpcs3>")
endif()
# Unix installation

View file

@ -478,6 +478,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/gcm_printing.cpp
RSX/GL/GLCommonDecompiler.cpp
RSX/GL/GLCompute.cpp
RSX/GL/GLDMA.cpp
RSX/GL/GLDraw.cpp
RSX/GL/GLFragmentProgram.cpp
RSX/GL/GLGSRender.cpp
@ -503,6 +504,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/GL/OpenGL.cpp
RSX/GL/upscalers/fsr1/fsr_pass.cpp
RSX/GSRender.cpp
RSX/Host/RSXDMAWriter.cpp
RSX/Null/NullGSRender.cpp
RSX/NV47/FW/draw_call.cpp
RSX/NV47/FW/reg_context.cpp

View file

@ -472,7 +472,7 @@ error_code cellSyncQueuePush(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue, vm::
u32 position;
while (!queue->ctrl.atomic_op([&](auto& ctrl)
while (!queue->ctrl.atomic_op([&](CellSyncQueue::ctrl_t& ctrl)
{
return CellSyncQueue::try_push_begin(ctrl, depth, &position);
}))
@ -509,7 +509,7 @@ error_code cellSyncQueueTryPush(vm::ptr<CellSyncQueue> queue, vm::cptr<void> buf
u32 position;
while (!queue->ctrl.atomic_op([&](auto& ctrl)
while (!queue->ctrl.atomic_op([&](CellSyncQueue::ctrl_t& ctrl)
{
return CellSyncQueue::try_push_begin(ctrl, depth, &position);
}))
@ -543,7 +543,7 @@ error_code cellSyncQueuePop(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue, vm::p
u32 position;
while (!queue->ctrl.atomic_op([&](auto& ctrl)
while (!queue->ctrl.atomic_op([&](CellSyncQueue::ctrl_t& ctrl)
{
return CellSyncQueue::try_pop_begin(ctrl, depth, &position);
}))
@ -580,7 +580,7 @@ error_code cellSyncQueueTryPop(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffe
u32 position;
while (!queue->ctrl.atomic_op([&](auto& ctrl)
while (!queue->ctrl.atomic_op([&](CellSyncQueue::ctrl_t& ctrl)
{
return CellSyncQueue::try_pop_begin(ctrl, depth, &position);
}))
@ -614,7 +614,7 @@ error_code cellSyncQueuePeek(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue, vm::
u32 position;
while (!queue->ctrl.atomic_op([&](auto& ctrl)
while (!queue->ctrl.atomic_op([&](CellSyncQueue::ctrl_t& ctrl)
{
return CellSyncQueue::try_peek_begin(ctrl, depth, &position);
}))
@ -651,7 +651,7 @@ error_code cellSyncQueueTryPeek(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buff
u32 position;
while (!queue->ctrl.atomic_op([&](auto& ctrl)
while (!queue->ctrl.atomic_op([&](CellSyncQueue::ctrl_t& ctrl)
{
return CellSyncQueue::try_peek_begin(ctrl, depth, &position);
}))

View file

@ -7,20 +7,30 @@
inline void try_start(spu_thread& spu)
{
reader_lock lock(spu.run_ctrl_mtx);
bool notify = false;
if (spu.status_npc.fetch_op([](spu_thread::status_npc_sync_var& value)
if (~spu.status_npc.load().status & SPU_STATUS_RUNNING)
{
if (value.status & SPU_STATUS_RUNNING)
reader_lock lock(spu.run_ctrl_mtx);
if (spu.status_npc.fetch_op([](spu_thread::status_npc_sync_var& value)
{
return false;
}
if (value.status & SPU_STATUS_RUNNING)
{
return false;
}
value.status = SPU_STATUS_RUNNING | (value.status & SPU_STATUS_IS_ISOLATED);
return true;
}).second)
value.status = SPU_STATUS_RUNNING | (value.status & SPU_STATUS_IS_ISOLATED);
return true;
}).second)
{
spu.state -= cpu_flag::stop;
notify = true;
}
}
if (notify)
{
spu.state -= cpu_flag::stop;
spu.state.notify_one();
}
};

View file

@ -543,9 +543,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
template <typename T = u8>
llvm::Value* _ptr(llvm::Value* base, llvm::Value* offset)
{
const auto off = m_ir->CreateGEP(get_type<u8>(), base, offset);
const auto ptr = m_ir->CreateBitCast(off, get_type<T*>());
return ptr;
return m_ir->CreateGEP(get_type<u8>(), base, offset);
}
template <typename T, typename... Args>

View file

@ -1678,7 +1678,6 @@ void spu_thread::cpu_init()
spurs_average_task_duration = 0;
spurs_waited = false;
spurs_entered_wait = false;
spurs_read_events = false;
int_ctrl[0].clear();
int_ctrl[1].clear();
@ -2699,7 +2698,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
bool ok = false;
std::tie(old, ok) = bits->fetch_op([&](auto& v)
std::tie(old, ok) = bits->fetch_op([&](u128& v)
{
if (v & wmask)
{
@ -2797,7 +2796,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
res += 127;
// Release bits and notify
bits->atomic_op([&](auto& v)
bits->atomic_op([&](u128& v)
{
v &= ~wmask;
});
@ -4807,7 +4806,7 @@ bool spu_thread::process_mfc_cmd()
getllar_spin_count = 0;
getllar_busy_waiting_switch = umax;
u64 ntime;
u64 ntime = 0;
rsx::reservation_lock rsx_lock(addr, 128);
for (u64 i = 0; i != umax; [&]()
@ -4913,21 +4912,14 @@ bool spu_thread::process_mfc_cmd()
// Avoid logging useless commands if there is no reservation
const bool dump = g_cfg.core.mfc_debug && raddr;
const bool is_spurs_task_wait = pc == 0x11e4;
const bool is_spurs_task_wait = pc == 0x11e4 && spurs_addr != 0u - 0x80u;
do
if (!is_spurs_task_wait || spurs_addr != raddr || spurs_waited)
{
//
}
else if ((_ref<u8>(0x100 + 0x73) & (1u << index)) == 0 && (static_cast<u8>(rdata[0x73]) & (1u << index)) != 0)
{
if (!is_spurs_task_wait)
{
break;
}
if (spurs_addr != raddr || g_cfg.core.max_spurs_threads == g_cfg.core.max_spurs_threads.def || spurs_waited || spurs_read_events)
{
spurs_read_events = false;
break;
}
// Wait for other threads to complete their tasks (temporarily)
u32 max_run = group->max_run;
@ -4973,14 +4965,25 @@ bool spu_thread::process_mfc_cmd()
spurs_waited = true;
spurs_entered_wait = true;
// Wait the duration of 4 tasks
const u64 spurs_wait_time = std::clamp<u64>(spurs_average_task_duration / spurs_task_count_to_calculate * 4, 3000, 100'000);
// Wait the duration of one and a half tasks
const u64 spurs_wait_time = std::clamp<u64>(spurs_average_task_duration / spurs_task_count_to_calculate * 3 / 2, 10'000, 100'000);
spurs_wait_duration_last = spurs_wait_time;
if (spurs_last_task_timestamp)
{
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
spurs_average_task_duration -= avg_entry;
spurs_average_task_duration += std::min<u64>(45'000, current - spurs_last_task_timestamp);
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
spurs_last_task_timestamp = 0;
}
while (true)
{
if (is_stopped())
if (is_stopped() || current - before >= spurs_wait_time)
{
// Timed-out
group->spurs_running++;
break;
}
@ -5008,20 +5011,12 @@ bool spu_thread::process_mfc_cmd()
}
current = get_system_time();
if (current - before >= spurs_wait_time)
{
// Timed-out
group->spurs_running++;
break;
}
}
state += cpu_flag::temp;
static_cast<void>(test_stopped());
}
}
while (false);
if (do_putllc(ch_mfc_cmd))
{
@ -5029,19 +5024,50 @@ bool spu_thread::process_mfc_cmd()
if (is_spurs_task_wait)
{
const u64 current = get_system_time();
const bool is_idle = (_ref<u8>(0x100 + 0x73) & (1u << index)) != 0;
const bool was_idle = (static_cast<u8>(rdata[0x73]) & (1u << index)) != 0;
if (spurs_last_task_timestamp)
if (!was_idle && is_idle)
{
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
spurs_average_task_duration -= spurs_waited && !is_stopped() ? spurs_wait_duration_last + avg_entry : avg_entry;
spurs_average_task_duration += std::min<u64>(45'000, current - spurs_last_task_timestamp);
}
const u32 prev_running = group->spurs_running.fetch_op([](u32& x)
{
if (x)
{
x--;
return true;
}
spurs_last_task_timestamp = current;
spurs_read_events = false;
spurs_waited = false;
spurs_entered_wait = false;
return false;
}).first;
if (prev_running)
{
spurs_entered_wait = true;
}
if (prev_running == group->max_run && prev_running < group->max_num)
{
group->spurs_running.notify_one();
}
}
else if (was_idle && !is_idle)
{
// Cleanup
const u64 current = get_system_time();
if (spurs_last_task_timestamp)
{
const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate;
spurs_average_task_duration -= avg_entry;
spurs_average_task_duration += std::min<u64>(45'000, current - spurs_last_task_timestamp);
spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate);
spurs_last_task_timestamp = 0;
}
spurs_last_task_timestamp = current;
spurs_waited = false;
spurs_entered_wait = false;
}
}
}
else
@ -5560,6 +5586,8 @@ s64 spu_thread::get_ch_value(u32 ch)
thread_ctrl::wait_on(state, old);
}
fmt::throw_exception("Unreachable"); // Fix unannotated fallthrough warning
}
case MFC_RdTagStat:
@ -5642,53 +5670,11 @@ s64 spu_thread::get_ch_value(u32 ch)
auto events = get_events(mask1, false, true);
const bool is_spurs_task_wait = pc == 0x11a8 && spurs_addr == raddr;
if (events.count)
{
if (is_spurs_task_wait)
{
spurs_read_events = true;
}
return events.events & mask1;
}
if (is_spurs_task_wait)
{
spurs_read_events = true;
if (g_cfg.core.max_spurs_threads != g_cfg.core.max_spurs_threads.def && !spurs_entered_wait)
{
const u32 prev_running = group->spurs_running.fetch_op([](u32& x)
{
if (x)
{
x--;
return true;
}
return false;
}).first;
if (prev_running)
{
spurs_entered_wait = true;
}
if (prev_running == group->max_run && prev_running < group->max_num)
{
group->spurs_running.notify_one();
if (group->spurs_running == prev_running - 1)
{
// Try to let another thread slip in and take over execution
thread_ctrl::wait_for(300);
}
}
}
}
spu_function_logger logger(*this, "MFC Events read");
lv2_obj::prepare_for_sleep(*this);

View file

@ -188,10 +188,10 @@ struct spu_channel_op_state
struct alignas(16) spu_channel
{
// Low 32 bits contain value
atomic_t<u64> data;
atomic_t<u64> data{};
// Pending value to be inserted when it is possible in pop() or pop_wait()
atomic_t<u64> jostling_value;
atomic_t<u64> jostling_value{};
public:
static constexpr u32 off_wait = 32;
@ -667,11 +667,11 @@ public:
u8* reserv_base_addr = vm::g_reservations;
// General-Purpose Registers
std::array<v128, 128> gpr;
SPU_FPSCR fpscr;
std::array<v128, 128> gpr{};
SPU_FPSCR fpscr{};
// MFC command data
spu_mfc_cmd ch_mfc_cmd;
spu_mfc_cmd ch_mfc_cmd{};
// MFC command queue
spu_mfc_cmd mfc_queue[16]{};
@ -683,9 +683,9 @@ public:
u64 mfc_last_timestamp = 0;
// MFC proxy command data
spu_mfc_cmd mfc_prxy_cmd;
spu_mfc_cmd mfc_prxy_cmd{};
shared_mutex mfc_prxy_mtx;
atomic_t<u32> mfc_prxy_mask;
atomic_t<u32> mfc_prxy_mask = 0;
// Tracks writes to MFC proxy command data
union
@ -707,11 +707,11 @@ public:
// Range Lock pointer
atomic_t<u64, 64>* range_lock{};
u32 srr0;
u32 ch_tag_upd;
u32 ch_tag_mask;
u32 srr0 = 0;
u32 ch_tag_upd = 0;
u32 ch_tag_mask = 0;
spu_channel ch_tag_stat;
u32 ch_stall_mask;
u32 ch_stall_mask = 0;
spu_channel ch_stall_stat;
spu_channel ch_atomic_stat;
@ -736,14 +736,14 @@ public:
};
atomic_t<ch_events_t> ch_events;
bool interrupts_enabled;
bool interrupts_enabled = false;
u64 ch_dec_start_timestamp; // timestamp of writing decrementer value
u32 ch_dec_value; // written decrementer value
u64 ch_dec_start_timestamp = 0; // timestamp of writing decrementer value
u32 ch_dec_value = 0; // written decrementer value
bool is_dec_frozen = false;
std::pair<u32, u32> read_dec() const; // Read decrementer
atomic_t<u32> run_ctrl; // SPU Run Control register (only provided to get latest data written)
atomic_t<u32> run_ctrl = 0; // SPU Run Control register (only provided to get latest data written)
shared_mutex run_ctrl_mtx;
struct alignas(8) status_npc_sync_var
@ -752,10 +752,10 @@ public:
u32 npc; // SPU Next Program Counter register
};
atomic_t<status_npc_sync_var> status_npc;
std::array<spu_int_ctrl_t, 3> int_ctrl; // SPU Class 0, 1, 2 Interrupt Management
atomic_t<status_npc_sync_var> status_npc{};
std::array<spu_int_ctrl_t, 3> int_ctrl{}; // SPU Class 0, 1, 2 Interrupt Management
std::array<std::pair<u32, std::shared_ptr<lv2_event_queue>>, 32> spuq; // Event Queue Keys for SPU Thread
std::array<std::pair<u32, std::shared_ptr<lv2_event_queue>>, 32> spuq{}; // Event Queue Keys for SPU Thread
std::shared_ptr<lv2_event_queue> spup[64]; // SPU Ports
spu_channel exit_status{}; // Threaded SPU exit status (not a channel, but the interface fits)
atomic_t<u32> last_exit_status; // Value to be written in exit_status after checking group termination
@ -769,7 +769,6 @@ public:
u32 spurs_addr = 0;
bool spurs_waited = false;
bool spurs_entered_wait = false;
bool spurs_read_events = false;
u64 spurs_wait_duration_last = 0;
u64 spurs_average_task_duration = 0;
u64 spurs_last_task_timestamp = 0;

View file

@ -1852,7 +1852,7 @@ void lv2_obj::schedule_all(u64 current_time)
target->start_time = 0;
if ((target->state.fetch_op(FN(x += cpu_flag::signal, x -= cpu_flag::suspend, x-= remove_yield, void())) & (cpu_flag::wait + cpu_flag::signal)) != cpu_flag::wait)
if ((target->state.fetch_op(AOFN(x += cpu_flag::signal, x -= cpu_flag::suspend, x-= remove_yield, void())) & (cpu_flag::wait + cpu_flag::signal)) != cpu_flag::wait)
{
continue;
}

View file

@ -119,8 +119,13 @@ std::shared_ptr<lv2_event_queue> lv2_event_queue::find(u64 ipc_key)
extern void resume_spu_thread_group_from_waiting(spu_thread& spu);
CellError lv2_event_queue::send(lv2_event event)
CellError lv2_event_queue::send(lv2_event event, bool* notified_thread, lv2_event_port* port)
{
if (notified_thread)
{
*notified_thread = false;
}
std::lock_guard lock(mutex);
if (!exists)
@ -162,6 +167,15 @@ CellError lv2_event_queue::send(lv2_event event)
std::tie(ppu.gpr[4], ppu.gpr[5], ppu.gpr[6], ppu.gpr[7]) = event;
awake(&ppu);
if (port && ppu.prio.load().prio < ensure(cpu_thread::get_current<ppu_thread>())->prio.load().prio)
{
// Block event port disconnection for the time being of sending events
// PPU -> lower prio PPU is the only case that can cause thread blocking
port->is_busy++;
ensure(notified_thread);
*notified_thread = true;
}
}
else
{
@ -709,7 +723,10 @@ error_code sys_event_port_disconnect(ppu_thread& ppu, u32 eport_id)
return CELL_ENOTCONN;
}
// TODO: return CELL_EBUSY if necessary (can't detect the condition)
if (port->is_busy)
{
return CELL_EBUSY;
}
port->queue.reset();
@ -718,20 +735,32 @@ error_code sys_event_port_disconnect(ppu_thread& ppu, u32 eport_id)
error_code sys_event_port_send(u32 eport_id, u64 data1, u64 data2, u64 data3)
{
if (auto cpu = get_current_cpu_thread())
const auto cpu = cpu_thread::get_current();
const auto ppu = cpu ? cpu->try_get<ppu_thread>() : nullptr;
if (cpu)
{
cpu->state += cpu_flag::wait;
}
sys_event.trace("sys_event_port_send(eport_id=0x%x, data1=0x%llx, data2=0x%llx, data3=0x%llx)", eport_id, data1, data2, data3);
bool notified_thread = false;
const auto port = idm::check<lv2_obj, lv2_event_port>(eport_id, [&, notify = lv2_obj::notify_all_t()](lv2_event_port& port) -> CellError
{
if (ppu && ppu->loaded_from_savestate)
{
port.is_busy++;
notified_thread = true;
return {};
}
if (lv2_obj::check(port.queue))
{
const u64 source = port.name ? port.name : (u64{process_getpid() + 0u} << 32) | u64{eport_id};
return port.queue->send(source, data1, data2, data3);
return port.queue->send(source, data1, data2, data3, &notified_thread, ppu && port.queue->type == SYS_PPU_QUEUE ? &port : nullptr);
}
return CELL_ENOTCONN;
@ -742,6 +771,19 @@ error_code sys_event_port_send(u32 eport_id, u64 data1, u64 data2, u64 data3)
return CELL_ESRCH;
}
if (ppu && notified_thread)
{
// Wait to be requeued
if (ppu->test_stopped())
{
// Wait again on savestate load
ppu->state += cpu_flag::again;
}
port->is_busy--;
return CELL_OK;
}
if (port.ret)
{
if (port.ret == CELL_EAGAIN)

View file

@ -79,6 +79,8 @@ struct sys_event_t
// Source, data1, data2, data3
using lv2_event = std::tuple<u64, u64, u64, u64>;
struct lv2_event_port;
struct lv2_event_queue final : public lv2_obj
{
static const u32 id_base = 0x8d000000;
@ -103,11 +105,11 @@ struct lv2_event_queue final : public lv2_obj
static void save_ptr(utils::serial&, lv2_event_queue*);
static std::shared_ptr<lv2_event_queue> load_ptr(utils::serial& ar, std::shared_ptr<lv2_event_queue>& queue, std::string_view msg = {});
CellError send(lv2_event event);
CellError send(lv2_event event, bool* notified_thread = nullptr, lv2_event_port* port = nullptr);
CellError send(u64 source, u64 d1, u64 d2, u64 d3)
CellError send(u64 source, u64 d1, u64 d2, u64 d3, bool* notified_thread = nullptr, lv2_event_port* port = nullptr)
{
return send(std::make_tuple(source, d1, d2, d3));
return send(std::make_tuple(source, d1, d2, d3), notified_thread, port);
}
// Get event queue by its global key
@ -121,6 +123,7 @@ struct lv2_event_port final : lv2_obj
const s32 type; // Port type, either IPC or local
const u64 name; // Event source (generated from id and process id if not set)
atomic_t<usz> is_busy = 0; // Counts threads waiting on event sending
std::shared_ptr<lv2_event_queue> queue; // Event queue this port is connected to
lv2_event_port(s32 type, u64 name)

View file

@ -357,16 +357,14 @@ error_code sys_event_flag_set(cpu_thread& cpu, u32 id, u64 bitptn)
}
}
// Process all waiters in single atomic op
const u32 count = flag->pattern.atomic_op([&](u64& value)
{
value |= bitptn;
u32 count = 0;
u32 count = 0;
if (!flag->sq)
{
return count;
}
// Process all waiters in single atomic op
for (u64 pattern = flag->pattern, to_write = pattern, dependant_mask = 0;; to_write = pattern, dependant_mask = 0)
{
count = 0;
to_write |= bitptn;
dependant_mask = 0;
for (auto ppu = +flag->sq; ppu; ppu = ppu->next_cpu)
{
@ -405,10 +403,20 @@ error_code sys_event_flag_set(cpu_thread& cpu, u32 id, u64 bitptn)
const u64 pattern = ppu.gpr[4];
const u64 mode = ppu.gpr[5];
if (lv2_event_flag::check_pattern(value, pattern, mode, &ppu.gpr[6]))
// If it's OR mode, set bits must have waken up the thread therefore no
// dependency on old value
const u64 dependant_mask_or = ((mode & 0xf) == SYS_EVENT_FLAG_WAIT_OR || (bitptn & pattern & to_write) == pattern ? 0 : pattern);
if (lv2_event_flag::check_pattern(to_write, pattern, mode, &ppu.gpr[6]))
{
dependant_mask |= dependant_mask_or;
ppu.gpr[3] = CELL_OK;
count++;
if (!to_write)
{
break;
}
}
else
{
@ -416,8 +424,29 @@ error_code sys_event_flag_set(cpu_thread& cpu, u32 id, u64 bitptn)
}
}
return count;
});
dependant_mask &= ~bitptn;
auto [new_val, ok] = flag->pattern.fetch_op([&](u64& x)
{
if ((x ^ pattern) & dependant_mask)
{
return false;
}
x |= bitptn;
// Clear the bit-wise difference
x &= ~((pattern | bitptn) & ~to_write);
return true;
});
if (ok)
{
break;
}
pattern = new_val;
}
if (!count)
{

View file

@ -142,7 +142,7 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout)
const auto mutex = idm::get<lv2_obj, lv2_lwmutex>(lwmutex_id, [&, notify = lv2_obj::notify_all_t()](lv2_lwmutex& mutex)
{
if (s32 signal = mutex.lv2_control.fetch_op([](auto& data)
if (s32 signal = mutex.lv2_control.fetch_op([](lv2_lwmutex::control_data_t& data)
{
if (data.signaled)
{
@ -297,7 +297,7 @@ error_code _sys_lwmutex_trylock(ppu_thread& ppu, u32 lwmutex_id)
const auto mutex = idm::check<lv2_obj, lv2_lwmutex>(lwmutex_id, [&](lv2_lwmutex& mutex)
{
auto [_, ok] = mutex.lv2_control.fetch_op([](auto& data)
auto [_, ok] = mutex.lv2_control.fetch_op([](lv2_lwmutex::control_data_t& data)
{
if (data.signaled & 1)
{

View file

@ -93,7 +93,7 @@ std::shared_ptr<vm::block_t> reserve_map(u32 alloc_size, u32 align)
// Todo: fix order of error checks
error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32> alloc_addr)
error_code sys_memory_allocate(cpu_thread& cpu, u64 size, u64 flags, vm::ptr<u32> alloc_addr)
{
cpu.state += cpu_flag::wait;
@ -129,9 +129,9 @@ error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32
return {CELL_ENOMEM, dct.size - dct.used};
}
if (const auto area = reserve_map(size, align))
if (const auto area = reserve_map(static_cast<u32>(size), align))
{
if (const u32 addr = area->alloc(size, nullptr, align))
if (const u32 addr = area->alloc(static_cast<u32>(size), nullptr, align))
{
ensure(!g_fxo->get<sys_memory_address_table>().addrs[addr >> 16].exchange(&dct));
@ -139,7 +139,7 @@ error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32
{
sys_memory.notice("sys_memory_allocate(): Allocated 0x%x address (size=0x%x)", addr, size);
vm::lock_sudo(addr, size);
vm::lock_sudo(addr, static_cast<u32>(size));
cpu.check_state();
*alloc_addr = addr;
return CELL_OK;
@ -155,7 +155,7 @@ error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32
return CELL_ENOMEM;
}
error_code sys_memory_allocate_from_container(cpu_thread& cpu, u32 size, u32 cid, u64 flags, vm::ptr<u32> alloc_addr)
error_code sys_memory_allocate_from_container(cpu_thread& cpu, u64 size, u32 cid, u64 flags, vm::ptr<u32> alloc_addr)
{
cpu.state += cpu_flag::wait;
@ -203,15 +203,15 @@ error_code sys_memory_allocate_from_container(cpu_thread& cpu, u32 size, u32 cid
return {ct.ret, ct->size - ct->used};
}
if (const auto area = reserve_map(size, align))
if (const auto area = reserve_map(static_cast<u32>(size), align))
{
if (const u32 addr = area->alloc(size))
if (const u32 addr = area->alloc(static_cast<u32>(size)))
{
ensure(!g_fxo->get<sys_memory_address_table>().addrs[addr >> 16].exchange(ct.ptr.get()));
if (alloc_addr)
{
vm::lock_sudo(addr, size);
vm::lock_sudo(addr, static_cast<u32>(size));
cpu.check_state();
*alloc_addr = addr;
return CELL_OK;
@ -320,7 +320,7 @@ error_code sys_memory_get_user_memory_stat(cpu_thread& cpu, vm::ptr<sys_memory_u
return CELL_OK;
}
error_code sys_memory_container_create(cpu_thread& cpu, vm::ptr<u32> cid, u32 size)
error_code sys_memory_container_create(cpu_thread& cpu, vm::ptr<u32> cid, u64 size)
{
cpu.state += cpu_flag::wait;
@ -345,7 +345,7 @@ error_code sys_memory_container_create(cpu_thread& cpu, vm::ptr<u32> cid, u32 si
}
// Create the memory container
if (const u32 id = idm::make<lv2_memory_container>(size, true))
if (const u32 id = idm::make<lv2_memory_container>(static_cast<u32>(size), true))
{
cpu.check_state();
*cid = id;

View file

@ -128,13 +128,13 @@ struct sys_memory_user_memory_stat_t
};
// SysCalls
error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32> alloc_addr);
error_code sys_memory_allocate_from_container(cpu_thread& cpu, u32 size, u32 cid, u64 flags, vm::ptr<u32> alloc_addr);
error_code sys_memory_allocate(cpu_thread& cpu, u64 size, u64 flags, vm::ptr<u32> alloc_addr);
error_code sys_memory_allocate_from_container(cpu_thread& cpu, u64 size, u32 cid, u64 flags, vm::ptr<u32> alloc_addr);
error_code sys_memory_free(cpu_thread& cpu, u32 start_addr);
error_code sys_memory_get_page_attribute(cpu_thread& cpu, u32 addr, vm::ptr<sys_page_attr_t> attr);
error_code sys_memory_get_user_memory_size(cpu_thread& cpu, vm::ptr<sys_memory_info_t> mem_info);
error_code sys_memory_get_user_memory_stat(cpu_thread& cpu, vm::ptr<sys_memory_user_memory_stat_t> mem_stat);
error_code sys_memory_container_create(cpu_thread& cpu, vm::ptr<u32> cid, u32 size);
error_code sys_memory_container_create(cpu_thread& cpu, vm::ptr<u32> cid, u64 size);
error_code sys_memory_container_destroy(cpu_thread& cpu, u32 cid);
error_code sys_memory_container_get_size(cpu_thread& cpu, vm::ptr<sys_memory_info_t> mem_info, u32 cid);
error_code sys_memory_container_destroy_parent_with_childs(cpu_thread& cpu, u32 cid, u32 must_0, vm::ptr<u32> mc_child);

View file

@ -562,20 +562,48 @@ error_code sys_spu_thread_initialize(ppu_thread& ppu, vm::ptr<u32> thread, u32 g
sys_spu.warning("sys_spu_thread_initialize(thread=*0x%x, group=0x%x, spu_num=%d, img=*0x%x, attr=*0x%x, arg=*0x%x)", thread, group_id, spu_num, img, attr, arg);
const u32 option = attr->option;
if (attr->name_len > 0x80 || option & ~(SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE | SYS_SPU_THREAD_OPTION_ASYNC_INTR_ENABLE))
if (spu_num >= std::size(decltype(lv2_spu_group::threads_map){}))
{
return CELL_EINVAL;
}
sys_spu_image image;
if (!attr)
{
return CELL_EFAULT;
}
switch (img->type)
const sys_spu_thread_attribute attr_data = *attr;
if (attr_data.name_len > 0x80)
{
return CELL_EINVAL;
}
if (!arg)
{
return CELL_EFAULT;
}
const sys_spu_thread_argument args = *arg;
const u32 option = attr_data.option;
if (option & ~(SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE | SYS_SPU_THREAD_OPTION_ASYNC_INTR_ENABLE))
{
return CELL_EINVAL;
}
if (!img)
{
return CELL_EFAULT;
}
sys_spu_image image = *img;
switch (image.type)
{
case SYS_SPU_IMAGE_TYPE_KERNEL:
{
const auto handle = idm::get<lv2_obj, lv2_spu_image>(img->entry_point);
const auto handle = idm::get<lv2_obj, lv2_spu_image>(image.entry_point);
if (!handle)
{
@ -591,12 +619,11 @@ error_code sys_spu_thread_initialize(ppu_thread& ppu, vm::ptr<u32> thread, u32 g
}
case SYS_SPU_IMAGE_TYPE_USER:
{
if (img->entry_point > 0x3fffc || img->nsegs <= 0 || img->nsegs > 0x20)
if (image.entry_point > 0x3fffc || image.nsegs <= 0 || image.nsegs > 0x20)
{
return CELL_EINVAL;
}
image = *img;
break;
}
default: return CELL_EINVAL;
@ -672,7 +699,7 @@ error_code sys_spu_thread_initialize(ppu_thread& ppu, vm::ptr<u32> thread, u32 g
}
// Read thread name
const std::string thread_name(attr->name.get_ptr(), std::max<u32>(attr->name_len, 1) - 1);
const std::string thread_name(attr_data.name.get_ptr(), std::max<u32>(attr_data.name_len, 1) - 1);
const auto group = idm::get<lv2_spu_group>(group_id);
@ -681,11 +708,6 @@ error_code sys_spu_thread_initialize(ppu_thread& ppu, vm::ptr<u32> thread, u32 g
return CELL_ESRCH;
}
if (spu_num >= group->threads_map.size())
{
return CELL_EINVAL;
}
std::unique_lock lock(group->mutex);
if (auto state = +group->run_state; state != SPU_THREAD_GROUP_STATUS_NOT_INITIALIZED)
@ -725,7 +747,7 @@ error_code sys_spu_thread_initialize(ppu_thread& ppu, vm::ptr<u32> thread, u32 g
ensure(vm::get(vm::spu)->falloc(spu->vm_offset(), SPU_LS_SIZE, &spu->shm, static_cast<u64>(vm::page_size_64k) | static_cast<u64>(vm::alloc_hidden)));
spu->map_ls(*spu->shm, spu->ls);
group->args[inited] = {arg->arg1, arg->arg2, arg->arg3, arg->arg4};
group->args[inited] = {args.arg1, args.arg2, args.arg3, args.arg4};
group->imgs[inited].first = image.entry_point;
group->imgs[inited].second = std::move(spu_segs);
@ -800,12 +822,14 @@ error_code sys_spu_thread_group_create(ppu_thread& ppu, vm::ptr<u32> id, u32 num
const s32 min_prio = g_ps3_process_info.has_root_perm() ? 0 : 16;
if (attr->nsize > 0x80 || !num)
const sys_spu_thread_group_attribute attr_data = *attr;
if (attr_data.nsize > 0x80 || !num)
{
return CELL_EINVAL;
}
const s32 type = attr->type;
const s32 type = attr_data.type;
bool use_scheduler = true;
bool use_memct = !!(type & SYS_SPU_THREAD_GROUP_TYPE_MEMORY_FROM_CONTAINER);
@ -902,7 +926,7 @@ error_code sys_spu_thread_group_create(ppu_thread& ppu, vm::ptr<u32> id, u32 num
if (use_memct && mem_size)
{
const auto sct = idm::get<lv2_memory_container>(attr->ct);
const auto sct = idm::get<lv2_memory_container>(attr_data.ct);
if (!sct)
{
@ -936,7 +960,7 @@ error_code sys_spu_thread_group_create(ppu_thread& ppu, vm::ptr<u32> id, u32 num
return CELL_EBUSY;
}
const auto group = idm::make_ptr<lv2_spu_group>(std::string(attr->name.get_ptr(), std::max<u32>(attr->nsize, 1) - 1), num, prio, type, ct, use_scheduler, mem_size);
const auto group = idm::make_ptr<lv2_spu_group>(std::string(attr_data.name.get_ptr(), std::max<u32>(attr_data.nsize, 1) - 1), num, prio, type, ct, use_scheduler, mem_size);
if (!group)
{
@ -1807,6 +1831,11 @@ error_code sys_spu_thread_write_snr(ppu_thread& ppu, u32 id, u32 number, u32 val
sys_spu.trace("sys_spu_thread_write_snr(id=0x%x, number=%d, value=0x%x)", id, number, value);
if (number > 1)
{
return CELL_EINVAL;
}
const auto [thread, group] = lv2_spu_group::get_thread(id);
if (!thread) [[unlikely]]
@ -1814,11 +1843,6 @@ error_code sys_spu_thread_write_snr(ppu_thread& ppu, u32 id, u32 number, u32 val
return CELL_ESRCH;
}
if (number > 1)
{
return CELL_EINVAL;
}
thread->push_snr(number, value);
return CELL_OK;
@ -1895,21 +1919,19 @@ error_code sys_spu_thread_group_disconnect_event(ppu_thread& ppu, u32 id, u32 et
if (!ep)
{
sys_spu.error("sys_spu_thread_group_disconnect_event(): unknown event type (%d)", et);
return CELL_EINVAL;
return CELL_OK;
}
// No error checking is performed
std::lock_guard lock(group->mutex);
if (!lv2_obj::check(*ep))
{
return CELL_EINVAL;
}
ep->reset();
return CELL_OK;
}
error_code sys_spu_thread_connect_event(ppu_thread& ppu, u32 id, u32 eq, u32 et, u8 spup)
error_code sys_spu_thread_connect_event(ppu_thread& ppu, u32 id, u32 eq, u32 et, u32 spup)
{
ppu.state += cpu_flag::wait;
@ -1943,7 +1965,7 @@ error_code sys_spu_thread_connect_event(ppu_thread& ppu, u32 id, u32 eq, u32 et,
return CELL_OK;
}
error_code sys_spu_thread_disconnect_event(ppu_thread& ppu, u32 id, u32 et, u8 spup)
error_code sys_spu_thread_disconnect_event(ppu_thread& ppu, u32 id, u32 et, u32 spup)
{
ppu.state += cpu_flag::wait;
@ -2068,6 +2090,11 @@ error_code sys_spu_thread_group_connect_event_all_threads(ppu_thread& ppu, u32 i
sys_spu.warning("sys_spu_thread_group_connect_event_all_threads(id=0x%x, eq=0x%x, req=0x%llx, spup=*0x%x)", id, eq, req, spup);
if (!req)
{
return CELL_EINVAL;
}
const auto group = idm::get<lv2_spu_group>(id);
const auto queue = idm::get<lv2_obj, lv2_event_queue>(eq);
@ -2076,11 +2103,6 @@ error_code sys_spu_thread_group_connect_event_all_threads(ppu_thread& ppu, u32 i
return CELL_ESRCH;
}
if (!req)
{
return CELL_EINVAL;
}
std::unique_lock lock(group->mutex);
if (auto state = +group->run_state;
@ -2144,12 +2166,17 @@ error_code sys_spu_thread_group_connect_event_all_threads(ppu_thread& ppu, u32 i
return CELL_OK;
}
error_code sys_spu_thread_group_disconnect_event_all_threads(ppu_thread& ppu, u32 id, u8 spup)
error_code sys_spu_thread_group_disconnect_event_all_threads(ppu_thread& ppu, u32 id, u32 spup)
{
ppu.state += cpu_flag::wait;
sys_spu.warning("sys_spu_thread_group_disconnect_event_all_threads(id=0x%x, spup=%d)", id, spup);
if (spup > 63)
{
return CELL_EINVAL;
}
const auto group = idm::get<lv2_spu_group>(id);
if (!group)
@ -2157,11 +2184,6 @@ error_code sys_spu_thread_group_disconnect_event_all_threads(ppu_thread& ppu, u3
return CELL_ESRCH;
}
if (spup > 63)
{
return CELL_EINVAL;
}
std::lock_guard lock(group->mutex);
for (auto& t : group->threads)

View file

@ -372,7 +372,7 @@ error_code sys_spu_thread_group_get_priority(ppu_thread&, u32 id, vm::ptr<s32> p
error_code sys_spu_thread_group_connect_event(ppu_thread&, u32 id, u32 eq, u32 et);
error_code sys_spu_thread_group_disconnect_event(ppu_thread&, u32 id, u32 et);
error_code sys_spu_thread_group_connect_event_all_threads(ppu_thread&, u32 id, u32 eq_id, u64 req, vm::ptr<u8> spup);
error_code sys_spu_thread_group_disconnect_event_all_threads(ppu_thread&, u32 id, u8 spup);
error_code sys_spu_thread_group_disconnect_event_all_threads(ppu_thread&, u32 id, u32 spup);
error_code sys_spu_thread_group_set_cooperative_victims(ppu_thread&, u32 id, u32 threads_mask);
error_code sys_spu_thread_group_syscall_253(ppu_thread& ppu, u32 id, vm::ptr<sys_spu_thread_group_syscall_253_info> info);
error_code sys_spu_thread_group_log(ppu_thread&, s32 command, vm::ptr<s32> stat);
@ -382,8 +382,8 @@ error_code sys_spu_thread_write_spu_mb(ppu_thread&, u32 id, u32 value);
error_code sys_spu_thread_set_spu_cfg(ppu_thread&, u32 id, u64 value);
error_code sys_spu_thread_get_spu_cfg(ppu_thread&, u32 id, vm::ptr<u64> value);
error_code sys_spu_thread_write_snr(ppu_thread&, u32 id, u32 number, u32 value);
error_code sys_spu_thread_connect_event(ppu_thread&, u32 id, u32 eq, u32 et, u8 spup);
error_code sys_spu_thread_disconnect_event(ppu_thread&, u32 id, u32 et, u8 spup);
error_code sys_spu_thread_connect_event(ppu_thread&, u32 id, u32 eq, u32 et, u32 spup);
error_code sys_spu_thread_disconnect_event(ppu_thread&, u32 id, u32 et, u32 spup);
error_code sys_spu_thread_bind_queue(ppu_thread&, u32 id, u32 spuq, u32 spuq_num);
error_code sys_spu_thread_unbind_queue(ppu_thread&, u32 id, u32 spuq_num);
error_code sys_spu_thread_get_exit_status(ppu_thread&, u32 id, vm::ptr<s32> status);

View file

@ -156,7 +156,7 @@ error_code sys_ss_random_number_generator(u64 pkg_id, vm::ptr<void> buf, u64 siz
error_code sys_ss_access_control_engine(u64 pkg_id, u64 a2, u64 a3)
{
sys_ss.todo("sys_ss_access_control_engine(pkg_id=0x%llx, a2=0x%llx, a3=0x%llx)", pkg_id, a2, a3);
sys_ss.success("sys_ss_access_control_engine(pkg_id=0x%llx, a2=0x%llx, a3=0x%llx)", pkg_id, a2, a3);
const u64 authid = g_ps3_process_info.self_info.valid ?
g_ps3_process_info.self_info.prog_id_hdr.program_authority_id : 0;
@ -167,7 +167,7 @@ error_code sys_ss_access_control_engine(u64 pkg_id, u64 a2, u64 a3)
{
if (!g_ps3_process_info.debug_or_root())
{
return CELL_ENOSYS;
return not_an_error(CELL_ENOSYS);
}
if (!a2)

View file

@ -7,6 +7,7 @@
#include "Emu/Cell/timers.hpp"
#include "util/asm.hpp"
#include "util/sysinfo.hpp"
static u64 timebase_offset;
static u64 systemtime_offset;
@ -146,6 +147,18 @@ u64 convert_to_timebased_time(u64 time)
u64 get_timebased_time()
{
if (u64 freq = utils::get_tsc_freq())
{
const u64 tsc = utils::get_tsc();
#if _MSC_VER
const u64 result = static_cast<u64>(u128_from_mul(tsc, g_timebase_freq) / freq) * g_cfg.core.clocks_scale / 100u;
#else
const u64 result = (tsc / freq * g_timebase_freq + tsc % freq * g_timebase_freq / freq) * g_cfg.core.clocks_scale / 100u;
#endif
return result - timebase_offset;
}
while (true)
{
#ifdef _WIN32
@ -155,7 +168,11 @@ u64 get_timebased_time()
const u64 time = count.QuadPart;
const u64 freq = s_time_aux_info.perf_freq;
#if _MSC_VER
const u64 result = static_cast<u64>(u128_from_mul(time * g_cfg.core.clocks_scale, g_timebase_freq) / freq / 100u);
#else
const u64 result = (time / freq * g_timebase_freq + time % freq * g_timebase_freq / freq) * g_cfg.core.clocks_scale / 100u;
#endif
#else
struct timespec ts;
ensure(::clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
@ -190,6 +207,18 @@ void initialize_timebased_time(u64 timebased_init, bool reset)
// Returns some relative time in microseconds, don't change this fact
u64 get_system_time()
{
if (u64 freq = utils::get_tsc_freq())
{
const u64 tsc = utils::get_tsc();
#if _MSC_VER
const u64 result = static_cast<u64>(u128_from_mul(tsc, 1000000ull) / freq);
#else
const u64 result = (tsc / freq * 1000000ull + tsc % freq * 1000000ull / freq);
#endif
return result;
}
while (true)
{
#ifdef _WIN32
@ -199,7 +228,11 @@ u64 get_system_time()
const u64 time = count.QuadPart;
const u64 freq = s_time_aux_info.perf_freq;
#if _MSC_VER
const u64 result = static_cast<u64>(u128_from_mul(time, 1000000ull) / freq);
#else
const u64 result = time / freq * 1000000ull + (time % freq) * 1000000ull / freq;
#endif
#else
struct timespec ts;
ensure(::clock_gettime(CLOCK_MONOTONIC, &ts) == 0);

View file

@ -48,13 +48,18 @@ sys_vm_t::sys_vm_t(utils::serial& ar)
g_fxo->get<sys_vm_global_t>().total_vsize += size;
}
error_code sys_vm_memory_map(ppu_thread& ppu, u32 vsize, u32 psize, u32 cid, u64 flag, u64 policy, vm::ptr<u32> addr)
error_code sys_vm_memory_map(ppu_thread& ppu, u64 vsize, u64 psize, u32 cid, u64 flag, u64 policy, vm::ptr<u32> addr)
{
ppu.state += cpu_flag::wait;
sys_vm.warning("sys_vm_memory_map(vsize=0x%x, psize=0x%x, cid=0x%x, flags=0x%x, policy=0x%x, addr=*0x%x)", vsize, psize, cid, flag, policy, addr);
if (!vsize || !psize || vsize % 0x2000000 || vsize > 0x10000000 || psize > 0x10000000 || policy != SYS_VM_POLICY_AUTO_RECOMMENDED)
if (!vsize || !psize || vsize % 0x200'0000 || vsize > 0x1000'0000 || psize % 0x1'0000 || policy != SYS_VM_POLICY_AUTO_RECOMMENDED)
{
return CELL_EINVAL;
}
if (ppu.gpr[11] == 300 && psize < 0x10'0000)
{
return CELL_EINVAL;
}
@ -68,16 +73,16 @@ error_code sys_vm_memory_map(ppu_thread& ppu, u32 vsize, u32 psize, u32 cid, u64
return CELL_ESRCH;
}
if (!g_fxo->get<sys_vm_global_t>().total_vsize.fetch_op([vsize](u32& size)
if (!g_fxo->get<sys_vm_global_t>().total_vsize.fetch_op([vsize, has_root = g_ps3_process_info.has_root_perm()](u32& size)
{
// A single process can hold up to 256MB of virtual memory, even on DECR
// VSH can hold more
if ((g_ps3_process_info.has_root_perm() ? 0x1E000000 : 0x10000000) - size < vsize)
if ((has_root ? 0x1E000000 : 0x10000000) - size < vsize)
{
return false;
}
size += vsize;
size += static_cast<u32>(vsize);
return true;
}).second)
{
@ -86,7 +91,7 @@ error_code sys_vm_memory_map(ppu_thread& ppu, u32 vsize, u32 psize, u32 cid, u64
if (!ct->take(psize))
{
g_fxo->get<sys_vm_global_t>().total_vsize -= vsize;
g_fxo->get<sys_vm_global_t>().total_vsize -= static_cast<u32>(vsize);
return CELL_ENOMEM;
}
@ -96,10 +101,10 @@ error_code sys_vm_memory_map(ppu_thread& ppu, u32 vsize, u32 psize, u32 cid, u64
sys_vm.warning("sys_vm_memory_map(): Found VM 0x%x area (vsize=0x%x)", addr, vsize);
// Alloc all memory (shall not fail)
ensure(area->alloc(vsize));
vm::lock_sudo(area->addr, vsize);
ensure(area->alloc(static_cast<u32>(vsize)));
vm::lock_sudo(area->addr, static_cast<u32>(vsize));
idm::make<sys_vm_t>(area->addr, vsize, ct, psize);
idm::make<sys_vm_t>(area->addr, static_cast<u32>(vsize), ct, static_cast<u32>(psize));
// Write a pointer for the allocated memory
ppu.check_state();
@ -108,11 +113,11 @@ error_code sys_vm_memory_map(ppu_thread& ppu, u32 vsize, u32 psize, u32 cid, u64
}
ct->free(psize);
g_fxo->get<sys_vm_global_t>().total_vsize -= vsize;
g_fxo->get<sys_vm_global_t>().total_vsize -= static_cast<u32>(vsize);
return CELL_ENOMEM;
}
error_code sys_vm_memory_map_different(ppu_thread& ppu, u32 vsize, u32 psize, u32 cid, u64 flag, u64 policy, vm::ptr<u32> addr)
error_code sys_vm_memory_map_different(ppu_thread& ppu, u64 vsize, u64 psize, u32 cid, u64 flag, u64 policy, vm::ptr<u32> addr)
{
ppu.state += cpu_flag::wait;
@ -153,7 +158,7 @@ error_code sys_vm_unmap(ppu_thread& ppu, u32 addr)
return CELL_OK;
}
error_code sys_vm_append_memory(ppu_thread& ppu, u32 addr, u32 size)
error_code sys_vm_append_memory(ppu_thread& ppu, u32 addr, u64 size)
{
ppu.state += cpu_flag::wait;
@ -176,7 +181,7 @@ error_code sys_vm_append_memory(ppu_thread& ppu, u32 addr, u32 size)
return CELL_ENOMEM;
}
vmo.psize += size;
vmo.psize += static_cast<u32>(size);
return {};
});
@ -193,7 +198,7 @@ error_code sys_vm_append_memory(ppu_thread& ppu, u32 addr, u32 size)
return CELL_OK;
}
error_code sys_vm_return_memory(ppu_thread& ppu, u32 addr, u32 size)
error_code sys_vm_return_memory(ppu_thread& ppu, u32 addr, u64 size)
{
ppu.state += cpu_flag::wait;
@ -213,12 +218,12 @@ error_code sys_vm_return_memory(ppu_thread& ppu, u32 addr, u32 size)
auto [_, ok] = vmo.psize.fetch_op([&](u32& value)
{
if (value < 0x100000ull + size)
if (value <= size || value - size < 0x100000ull)
{
return false;
}
value -= size;
value -= static_cast<u32>(size);
return true;
});

View file

@ -58,11 +58,11 @@ struct sys_vm_t
class ppu_thread;
// SysCalls
error_code sys_vm_memory_map(ppu_thread& ppu, u32 vsize, u32 psize, u32 cid, u64 flag, u64 policy, vm::ptr<u32> addr);
error_code sys_vm_memory_map_different(ppu_thread& ppu, u32 vsize, u32 psize, u32 cid, u64 flag, u64 policy, vm::ptr<u32> addr);
error_code sys_vm_memory_map(ppu_thread& ppu, u64 vsize, u64 psize, u32 cid, u64 flag, u64 policy, vm::ptr<u32> addr);
error_code sys_vm_memory_map_different(ppu_thread& ppu, u64 vsize, u64 psize, u32 cid, u64 flag, u64 policy, vm::ptr<u32> addr);
error_code sys_vm_unmap(ppu_thread& ppu, u32 addr);
error_code sys_vm_append_memory(ppu_thread& ppu, u32 addr, u32 size);
error_code sys_vm_return_memory(ppu_thread& ppu, u32 addr, u32 size);
error_code sys_vm_append_memory(ppu_thread& ppu, u32 addr, u64 size);
error_code sys_vm_return_memory(ppu_thread& ppu, u32 addr, u64 size);
error_code sys_vm_lock(ppu_thread& ppu, u32 addr, u32 size);
error_code sys_vm_unlock(ppu_thread& ppu, u32 addr, u32 size);
error_code sys_vm_touch(ppu_thread& ppu, u32 addr, u32 size);

View file

@ -350,7 +350,6 @@ void PadHandlerBase::get_motion_sensors(const std::string& pad_id, const motion_
// Get the current motion values
std::shared_ptr<Pad> pad = std::make_shared<Pad>(m_type, 0, 0, 0, 0);
pad->m_sensors.resize(preview_values.size(), AnalogSensor(0, 0, 0, 0, 0));
pad_ensemble binding{pad, device, nullptr};
get_extended_info(binding);
@ -505,18 +504,18 @@ bool PadHandlerBase::bindPadToDevice(std::shared_ptr<Pad> pad)
pad->m_buttons.emplace_back(CELL_PAD_BTN_OFFSET_PRESS_PIGGYBACK, mapping[button::skateboard_tilt_right], CELL_PAD_CTRL_PRESS_R1);
}
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_X, mapping[button::ls_left], mapping[button::ls_right]);
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_Y, mapping[button::ls_down], mapping[button::ls_up]);
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_X, mapping[button::rs_left], mapping[button::rs_right]);
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_Y, mapping[button::rs_down], mapping[button::rs_up]);
pad->m_sticks[0] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_X, mapping[button::ls_left], mapping[button::ls_right]);
pad->m_sticks[1] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_Y, mapping[button::ls_down], mapping[button::ls_up]);
pad->m_sticks[2] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_X, mapping[button::rs_left], mapping[button::rs_right]);
pad->m_sticks[3] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_Y, mapping[button::rs_down], mapping[button::rs_up]);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_X, 0, 0, 0, DEFAULT_MOTION_X);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_Y, 0, 0, 0, DEFAULT_MOTION_Y);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_Z, 0, 0, 0, DEFAULT_MOTION_Z);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_G, 0, 0, 0, DEFAULT_MOTION_G);
pad->m_sensors[0] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_X, 0, 0, 0, DEFAULT_MOTION_X);
pad->m_sensors[1] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_Y, 0, 0, 0, DEFAULT_MOTION_Y);
pad->m_sensors[2] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_Z, 0, 0, 0, DEFAULT_MOTION_Z);
pad->m_sensors[3] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_G, 0, 0, 0, DEFAULT_MOTION_G);
pad->m_vibrateMotors.emplace_back(true, 0);
pad->m_vibrateMotors.emplace_back(false, 0);
pad->m_vibrateMotors[0] = VibrateMotor(true, 0);
pad->m_vibrateMotors[1] = VibrateMotor(false, 0);
m_bindings.emplace_back(pad, pad_device, nullptr);

View file

@ -417,6 +417,7 @@ struct AnalogStick
std::map<u32, u16> m_pressed_keys_min{}; // only used in keyboard_pad_handler
std::map<u32, u16> m_pressed_keys_max{}; // only used in keyboard_pad_handler
AnalogStick() {}
AnalogStick(u32 offset, std::set<u32> key_codes_min, std::set<u32> key_codes_max)
: m_offset(offset)
, m_key_codes_min(std::move(key_codes_min))
@ -447,6 +448,7 @@ struct VibrateMotor
bool m_is_large_motor = false;
u8 m_value = 0;
VibrateMotor() {}
VibrateMotor(bool is_large_motor, u8 value)
: m_is_large_motor(is_large_motor)
, m_value(value)
@ -489,9 +491,9 @@ struct Pad
u8 m_battery_level{0};
std::vector<Button> m_buttons;
std::vector<AnalogStick> m_sticks;
std::vector<AnalogSensor> m_sensors;
std::vector<VibrateMotor> m_vibrateMotors;
std::array<AnalogStick, 4> m_sticks{};
std::array<AnalogSensor, 4> m_sensors{};
std::array<VibrateMotor, 2> m_vibrateMotors{};
// These hold bits for their respective buttons
u16 m_digital_1{0};

View file

@ -945,7 +945,7 @@ namespace vm
return true;
}
static u32 _page_unmap(u32 addr, u32 max_size, u64 bflags, utils::shm* shm)
static u32 _page_unmap(u32 addr, u32 max_size, u64 bflags, utils::shm* shm, std::vector<std::pair<u64, u64>>& unmap_events)
{
perf_meter<"PAGE_UNm"_u64> perf0;
@ -1009,7 +1009,7 @@ namespace vm
// the RSX might try to call VirtualProtect on memory that is already unmapped
if (auto rsxthr = g_fxo->try_get<rsx::thread>())
{
rsxthr->on_notify_memory_unmapped(addr, size);
rsxthr->on_notify_pre_memory_unmapped(addr, size, unmap_events);
}
// Deregister PPU related data
@ -1309,7 +1309,7 @@ namespace vm
}
}
bool block_t::unmap()
bool block_t::unmap(std::vector<std::pair<u64, u64>>* unmapped)
{
auto& m_map = (m.*block_map)();
@ -1320,7 +1320,10 @@ namespace vm
{
const auto next = std::next(it);
const auto size = it->second.first;
_page_unmap(it->first, size, this->flags, it->second.second.get());
std::vector<std::pair<u64, u64>> event_data;
ensure(size == _page_unmap(it->first, size, this->flags, it->second.second.get(), unmapped ? *unmapped : event_data));
it = next;
}
@ -1480,6 +1483,22 @@ namespace vm
{
auto& m_map = (m.*block_map)();
{
struct notify_t
{
std::vector<std::pair<u64, u64>> event_data;
~notify_t() noexcept
{
if (auto rsxthr = g_fxo->try_get<rsx::thread>())
{
for (const auto [event_data1, event_data2] : event_data)
{
rsxthr->on_notify_post_memory_unmapped(event_data1, event_data2);
}
}
}
} unmap_notification;
vm::writer_lock lock;
const auto found = m_map.find(addr - (flags & stack_guarded ? 0x1000 : 0));
@ -1505,7 +1524,7 @@ namespace vm
}
// Unmap "real" memory pages
ensure(size == _page_unmap(addr, size, this->flags, found->second.second.get()));
ensure(size == _page_unmap(addr, size, this->flags, found->second.second.get(), unmap_notification.event_data));
// Clear stack guards
if (flags & stack_guarded)
@ -1815,9 +1834,9 @@ namespace vm
}
}
bool _unmap_block(const std::shared_ptr<block_t>& block)
bool _unmap_block(const std::shared_ptr<block_t>& block, std::vector<std::pair<u64, u64>>* unmapped = nullptr)
{
return block->unmap();
return block->unmap(unmapped);
}
static bool _test_map(u32 addr, u32 size)
@ -1964,6 +1983,22 @@ namespace vm
std::pair<std::shared_ptr<block_t>, bool> result{};
struct notify_t
{
std::vector<std::pair<u64, u64>> unmap_data;
~notify_t() noexcept
{
if (auto rsxthr = g_fxo->try_get<rsx::thread>())
{
for (const auto [event_data1, event_data2] : unmap_data)
{
rsxthr->on_notify_post_memory_unmapped(event_data1, event_data2);
}
}
}
} unmap_notifications;
vm::writer_lock lock;
for (auto it = g_locations.begin() + memory_location_max; it != g_locations.end(); it++)
@ -1993,7 +2028,7 @@ namespace vm
result.first = std::move(*it);
g_locations.erase(it);
ensure(_unmap_block(result.first));
ensure(_unmap_block(result.first, &unmap_notifications.unmap_data));
result.second = true;
return result;
}

View file

@ -133,8 +133,8 @@ namespace vm
bool try_alloc(u32 addr, u64 bflags, u32 size, std::shared_ptr<utils::shm>&&) const;
// Unmap block
bool unmap();
friend bool _unmap_block(const std::shared_ptr<block_t>&);
bool unmap(std::vector<std::pair<u64, u64>>* unmapped = nullptr);
friend bool _unmap_block(const std::shared_ptr<block_t>&, std::vector<std::pair<u64, u64>>* unmapped);
public:
block_t(u32 addr, u32 size, u64 flags);

View file

@ -16,7 +16,7 @@ namespace rsx
{
if (enabled) [[unlikely]]
{
last = rsx::uclock();
last = get_system_time();
}
}
@ -28,7 +28,7 @@ namespace rsx
}
auto old = last;
last = rsx::uclock();
last = get_system_time();
return static_cast<s64>(last - old);
}
};

View file

@ -4,20 +4,3 @@
#include <util/sysinfo.hpp>
#include "Emu/Cell/timers.hpp"
namespace rsx
{
static inline u64 uclock()
{
static const ullong s_tsc_scaled_freq = (utils::get_tsc_freq() / 1000000);
if (s_tsc_scaled_freq)
{
return utils::get_tsc() / s_tsc_scaled_freq;
}
else
{
return get_system_time();
}
}
}

View file

@ -300,7 +300,7 @@ namespace gl
m_src = fmt::replace_all(m_src, syntax_replace);
param_buffer.create(gl::buffer::target::uniform, 32, nullptr, gl::buffer::memory_type::local, GL_DYNAMIC_COPY);
param_buffer.create(gl::buffer::target::uniform, 32, nullptr, gl::buffer::memory_type::local, gl::buffer::usage::dynamic_update);
}
~cs_deswizzle_3d()

126
rpcs3/Emu/RSX/GL/GLDMA.cpp Normal file
View file

@ -0,0 +1,126 @@
#include "stdafx.h"
#include "GLDMA.h"
#include "Emu/Memory/vm.h"
namespace gl
{
static constexpr u32 s_dma_block_size = 0x10000;
static constexpr u32 s_dma_block_mask = ~(s_dma_block_size - 1);
std::unordered_map<u32, std::unique_ptr<dma_block>> g_dma_pool;
void dma_block::allocate(u32 base_address, u32 block_size)
{
// Since this is a userptr block, we don't need to move data around on resize. Just "claim" a different chunk and move on.
if (m_data)
{
m_data->remove();
}
void* userptr = vm::get_super_ptr(base_address);
m_data = std::make_unique<gl::buffer>();
m_data->create(buffer::target::array, block_size, userptr, buffer::memory_type::userptr, 0);
m_base_address = base_address;
// Some drivers may reject userptr input for whatever reason. Check that the state is still valid.
gl::check_state();
}
void* dma_block::map(const utils::address_range& range) const
{
ensure(range.inside(this->range()));
return vm::get_super_ptr(range.start);
}
void dma_block::resize(u32 new_length)
{
if (new_length <= length())
{
return;
}
allocate(m_base_address, new_length);
}
void dma_block::set_parent(const dma_block* other)
{
ensure(this->range().inside(other->range()));
ensure(other != this);
m_parent = other;
if (m_data)
{
m_data->remove();
m_data.reset();
}
}
bool dma_block::can_map(const utils::address_range& range) const
{
if (m_parent)
{
return m_parent->can_map(range);
}
return range.inside(this->range());
}
void clear_dma_resources()
{
g_dma_pool.clear();
}
utils::address_range to_dma_block_range(u32 start, u32 length)
{
const auto start_block_address = start & s_dma_block_mask;
const auto end_block_address = (start + length + s_dma_block_size - 1) & s_dma_block_mask;
return utils::address_range::start_end(start_block_address, end_block_address);
}
const dma_block& get_block(u32 start, u32 length)
{
const auto block_range = to_dma_block_range(start, length);
auto& block = g_dma_pool[block_range.start];
if (!block)
{
block = std::make_unique<dma_block>();
block->allocate(block_range.start, block_range.length());
return *block;
}
const auto range = utils::address_range::start_length(start, length);
if (block->can_map(range)) [[ likely ]]
{
return *block;
}
const auto owner = block->head();
const auto new_length = (block_range.end + 1) - owner->base_addr();
const auto search_end = (block_range.end + 1);
// 1. Resize to new length
ensure((new_length & ~s_dma_block_mask) == 0);
auto new_owner = std::make_unique<dma_block>();
new_owner->allocate(owner->base_addr(), new_length);
// 2. Acquire all the extras
for (u32 id = owner->base_addr() + s_dma_block_size;
id < search_end;
id += s_dma_block_size)
{
ensure((id % s_dma_block_size) == 0);
g_dma_pool[id]->set_parent(new_owner.get());
}
block = std::move(new_owner);
return *block;
}
dma_mapping_handle map_dma(u32 guest_address, u32 length)
{
auto& block = get_block(guest_address, length);
return { guest_address - block.base_addr(), block.get() };
}
}

41
rpcs3/Emu/RSX/GL/GLDMA.h Normal file
View file

@ -0,0 +1,41 @@
#pragma once
#include <util/types.hpp>
#include "Utilities/address_range.h"
#include "glutils/buffer_object.h"
// TODO: Unify the DMA implementation across backends as part of RSX restructuring.
namespace gl
{
using dma_mapping_handle = std::pair<u32, gl::buffer*>;
dma_mapping_handle map_dma(u32 guest_addr, u32 length);
void clear_dma_resources();
// GL does not currently support mixed block types...
class dma_block
{
public:
dma_block() = default;
void allocate(u32 base_address, u32 block_size);
void resize(u32 new_length);
void* map(const utils::address_range& range) const;
void set_parent(const dma_block* other);
const dma_block* head() const { return m_parent ? m_parent : this; }
bool can_map(const utils::address_range& range) const;
u32 base_addr() const { return m_base_address; }
u32 length() const { return m_data ? static_cast<u32>(m_data->size()) : 0; }
bool empty() const { return length() == 0; }
buffer* get() const { return m_data.get(); }
utils::address_range range() const { return utils::address_range::start_length(m_base_address, length()); }
protected:
u32 m_base_address = 0;
const dma_block* m_parent = nullptr;
std::unique_ptr<gl::buffer> m_data;
};
}

View file

@ -3,9 +3,11 @@
#include "../Overlays/Shaders/shader_loading_dialog_native.h"
#include "GLGSRender.h"
#include "GLCompute.h"
#include "GLDMA.h"
#include "Emu/Memory/vm_locking.h"
#include "Emu/RSX/rsx_methods.h"
#include "Emu/RSX/Host/RSXDMAWriter.h"
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
[[noreturn]] extern void report_fatal_error(std::string_view _text, bool is_html = false, bool include_help_text = true);
@ -180,6 +182,20 @@ void GLGSRender::on_init_thread()
backend_config.supports_normalized_barycentrics = false;
}
if (gl_caps.AMD_pinned_memory && g_cfg.video.host_label_synchronization)
{
backend_config.supports_host_gpu_labels = true;
m_host_gpu_context_data = std::make_unique<gl::buffer>();
m_host_gpu_context_data->create(gl::buffer::target::array, 4096, nullptr, gl::buffer::memory_type::host_visible,
gl::buffer::usage::host_read | gl::buffer::usage::host_write | gl::buffer::usage::persistent_map);
auto host_context_ptr = reinterpret_cast<rsx::host_gpu_context_t*>(m_host_gpu_context_data->map(0, 4096, gl::buffer::access::persistent_rw));
m_host_dma_ctrl = std::make_unique<rsx::RSXDMAWriter>(host_context_ptr);
m_enqueued_host_write_buffer = std::make_unique<gl::scratch_ring_buffer>();
m_enqueued_host_write_buffer->create(gl::buffer::target::array, 64 * 0x100000, gl::buffer::usage::dynamic_update);
}
// Use industry standard resource alignment values as defaults
m_uniform_buffer_offset_align = 256;
m_min_texbuffer_alignment = 256;
@ -397,6 +413,7 @@ void GLGSRender::on_exit()
// TODO: Move these
gl::destroy_compute_tasks();
gl::destroy_overlay_passes();
gl::clear_dma_resources();
gl::destroy_global_texture_resources();
@ -407,6 +424,10 @@ void GLGSRender::on_exit()
m_prog_buffer.clear();
m_rtts.destroy();
m_host_dma_ctrl.reset();
m_host_gpu_context_data.reset();
m_enqueued_host_write_buffer.reset();
for (auto &fbo : m_framebuffer_cache)
{
fbo.remove();
@ -1193,7 +1214,7 @@ void GLGSRender::notify_tile_unbound(u32 tile)
if (false)
{
u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location);
on_notify_memory_unmapped(addr, tiles[tile].size);
on_notify_pre_memory_unmapped(addr, tiles[tile].size, *std::make_unique<std::vector<std::pair<u64, u64>>>());
m_rtts.invalidate_surface_address(addr, false);
}
@ -1203,6 +1224,66 @@ void GLGSRender::notify_tile_unbound(u32 tile)
}
}
bool GLGSRender::release_GCM_label(u32 address, u32 args)
{
if (!backend_config.supports_host_gpu_labels)
{
return false;
}
auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());
if (host_ctx->texture_loads_completed())
{
// We're about to poll waiting for GPU state, ensure the context is still valid.
gl::check_state();
// All texture loads already seen by the host GPU
// Wait for all previously submitted labels to be flushed
m_host_dma_ctrl->drain_label_queue();
return false;
}
const auto mapping = gl::map_dma(address, 4);
const auto write_data = std::bit_cast<u32, be_t<u32>>(args);
const auto release_event_id = host_ctx->on_label_acquire();
// We don't have async texture loads yet, so just release both the label and the commands complete
u64 write_buf[2] = { write_data, release_event_id };
const auto host_read_offset = m_enqueued_host_write_buffer->alloc(16, 16);
m_enqueued_host_write_buffer->get().sub_data(host_read_offset, 16, write_buf);
// Now write to DMA and then to host context
m_enqueued_host_write_buffer->get().copy_to(mapping.second, host_read_offset, mapping.first, 4);
m_enqueued_host_write_buffer->get().copy_to(m_host_gpu_context_data.get(), host_read_offset + 8, ::offset32(&rsx::host_gpu_context_t::commands_complete_event), 8);
m_enqueued_host_write_buffer->push_barrier(host_read_offset, 16);
host_ctx->on_label_release();
return true;
}
void GLGSRender::enqueue_host_context_write(u32 offset, u32 size, const void* data)
{
ensure(size <= 8);
const u32 host_read_offset = m_enqueued_host_write_buffer->alloc(8, 16);
m_enqueued_host_write_buffer->get().sub_data(host_read_offset, size, data);
m_enqueued_host_write_buffer->get().copy_to(m_host_gpu_context_data.get(), host_read_offset, offset, size);
m_enqueued_host_write_buffer->push_barrier(host_read_offset, 16);
}
void GLGSRender::on_guest_texture_read()
{
if (!backend_config.supports_host_gpu_labels)
{
return;
}
// Tag the read as being in progress
u64 event_id = m_host_dma_ctrl->host_ctx()->inc_counter();
m_host_dma_ctrl->host_ctx()->texture_load_request_event = event_id;
enqueue_host_context_write(::offset32(&rsx::host_gpu_context_t::texture_load_complete_event), 8, &event_id);
}
void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
{
query->result = 0;

View file

@ -128,7 +128,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
GLProgramBuffer m_prog_buffer;
//buffer
// Draw Buffers
gl::fbo* m_draw_fbo = nullptr;
std::list<gl::framebuffer_holder> m_framebuffer_cache;
std::unique_ptr<gl::texture> m_flip_tex_color[2];
@ -137,7 +137,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
std::unique_ptr<gl::upscaler> m_upscaler;
output_scaling_mode m_output_scaling = output_scaling_mode::bilinear;
//vaos are mandatory for core profile
// VAOs are mandatory for core profile
gl::vao m_vao;
shared_mutex m_sampler_mutex;
@ -150,6 +150,10 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
// Occlusion query type, can be SAMPLES_PASSED or ANY_SAMPLES_PASSED
GLenum m_occlusion_type = GL_ANY_SAMPLES_PASSED;
// Host context for GPU-driven work
std::unique_ptr<gl::buffer> m_host_gpu_context_data;
std::unique_ptr<gl::scratch_ring_buffer> m_enqueued_host_write_buffer;
public:
u64 get_cycles() final;
@ -193,6 +197,11 @@ public:
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;
// DMA
bool release_GCM_label(u32 address, u32 data) override;
void enqueue_host_context_write(u32 offset, u32 size, const void* data);
void on_guest_texture_read();
// GRAPH backend
void patch_transform_constants(rsx::context* ctx, u32 index, u32 count) override;

View file

@ -3,6 +3,7 @@
#include "GLCompute.h"
#include "GLRenderTargets.h"
#include "GLOverlays.h"
#include "GLGSRender.h"
#include "glutils/blitter.h"
#include "glutils/ring_buffer.h"
@ -285,7 +286,7 @@ namespace gl
if (!(*dst) || max_mem > static_cast<u64>(dst->size()))
{
if (*dst) dst->remove();
dst->create(buffer::target::ssbo, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
dst->create(buffer::target::ssbo, max_mem, nullptr, buffer::memory_type::local, 0);
}
if (auto as_vi = dynamic_cast<const gl::viewable_image*>(src);
@ -400,7 +401,7 @@ namespace gl
return;
}
scratch_mem.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
scratch_mem.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, 0);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
src->copy_to(&scratch_mem, in_offset, 0, mem_info->image_size_in_bytes);
@ -835,6 +836,10 @@ namespace gl
const GLenum gl_format = std::get<0>(format_type);
const GLenum gl_type = std::get<1>(format_type);
fill_texture(cmd, dst, gcm_format, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf);
// Notify the renderer of the upload
auto renderer = static_cast<GLGSRender*>(rsx::get_current_renderer());
renderer->on_guest_texture_read();
}
u32 get_format_texel_width(GLenum format)

View file

@ -59,7 +59,7 @@ namespace gl
pbo.remove();
}
pbo.create(buffer::target::pixel_pack, buffer_size, nullptr, buffer::memory_type::host_visible, GL_STREAM_READ);
pbo.create(buffer::target::pixel_pack, buffer_size, nullptr, buffer::memory_type::host_visible, buffer::usage::host_read);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
}

View file

@ -3,38 +3,35 @@
namespace gl
{
void buffer::allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
void buffer::allocate(GLsizeiptr size, const void* data_, memory_type type, GLuint usage_flags)
{
m_memory_type = type;
if (const auto& caps = get_driver_caps();
caps.ARB_buffer_storage_supported)
type != memory_type::userptr && caps.ARB_buffer_storage_supported)
{
GLenum flags = 0;
if (type == memory_type::host_visible)
if (usage_flags & usage::host_write)
{
switch (usage)
{
case GL_STREAM_DRAW:
case GL_STATIC_DRAW:
case GL_DYNAMIC_DRAW:
flags |= GL_MAP_WRITE_BIT;
break;
case GL_STREAM_READ:
case GL_STATIC_READ:
case GL_DYNAMIC_READ:
flags |= GL_MAP_READ_BIT;
break;
default:
fmt::throw_exception("Unsupported buffer usage 0x%x", usage);
}
flags |= GL_MAP_WRITE_BIT;
}
else
if (usage_flags & usage::host_read)
{
// Local memory hints
if (usage == GL_DYNAMIC_COPY)
{
flags |= GL_DYNAMIC_STORAGE_BIT;
}
flags |= GL_MAP_READ_BIT;
}
if (usage_flags & usage::persistent_map)
{
flags |= GL_MAP_PERSISTENT_BIT;
}
if (usage_flags & usage::dynamic_update)
{
flags |= GL_DYNAMIC_STORAGE_BIT;
}
ensure((flags & (GL_MAP_PERSISTENT_BIT | GL_DYNAMIC_STORAGE_BIT)) != (GL_MAP_PERSISTENT_BIT | GL_DYNAMIC_STORAGE_BIT),
"Mutually exclusive usage flags set!");
ensure(type == memory_type::local || flags != 0, "Host-visible memory must have usage flags set!");
if ((flags & GL_MAP_READ_BIT) && !caps.vendor_AMD)
{
@ -51,10 +48,8 @@ namespace gl
}
else
{
data(size, data_, usage);
data(size, data_, GL_STREAM_COPY);
}
m_memory_type = type;
}
buffer::~buffer()
@ -89,18 +84,18 @@ namespace gl
save_binding_state save(current_target(), *this);
}
void buffer::create(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
void buffer::create(GLsizeiptr size, const void* data_, memory_type type, GLuint usage_bits)
{
create();
allocate(size, data_, type, usage);
allocate(size, data_, type, usage_bits);
}
void buffer::create(target target_, GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
void buffer::create(target target_, GLsizeiptr size, const void* data_, memory_type type, GLuint usage_bits)
{
m_target = target_;
create();
allocate(size, data_, type, usage);
allocate(size, data_, type, usage_bits);
}
void buffer::remove()
@ -117,11 +112,19 @@ namespace gl
{
ensure(m_memory_type != memory_type::local);
DSA_CALL2(NamedBufferData, m_id, size, data_, usage);
m_size = size;
if (m_memory_type == memory_type::userptr)
{
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, m_id);
glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, size, data_, usage);
return;
}
DSA_CALL2(NamedBufferData, m_id, size, data_, usage);
}
void buffer::sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data)
void buffer::sub_data(GLsizeiptr offset, GLsizeiptr length, const GLvoid* data)
{
ensure(m_memory_type == memory_type::local);
DSA_CALL2(NamedBufferSubData, m_id, offset, length, data);

View file

@ -22,20 +22,30 @@ namespace gl
{
read = GL_MAP_READ_BIT,
write = GL_MAP_WRITE_BIT,
read_write = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT
rw = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
persistent_rw = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT
};
enum class memory_type
{
undefined = 0,
local = 1,
host_visible = 2
host_visible = 2,
userptr = 4
};
enum usage
{
host_write = (1 << 0),
host_read = (1 << 1),
persistent_map = (1 << 2),
dynamic_update = (1 << 3),
};
class save_binding_state
{
GLint m_last_binding;
GLenum m_target;
GLint m_last_binding = GL_ZERO;
GLenum m_target = GL_NONE;
public:
save_binding_state(target target_, const buffer& new_state) : save_binding_state(target_)
@ -64,6 +74,11 @@ namespace gl
~save_binding_state()
{
if (!m_target)
{
return;
}
glBindBuffer(m_target, m_last_binding);
}
};
@ -77,7 +92,7 @@ namespace gl
// Metadata
mutable std::pair<u32, u32> m_bound_range{};
void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage);
void allocate(GLsizeiptr size, const void* data_, memory_type type, GLuint usage_bits);
public:
buffer() = default;
@ -88,8 +103,8 @@ namespace gl
void recreate(GLsizeiptr size, const void* data = nullptr);
void create();
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW);
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW);
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLuint usage_bits = 0);
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLuint usage_bits = 0);
void remove();
@ -97,7 +112,7 @@ namespace gl
void bind() const { bind(current_target()); }
void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW);
void sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data);
void sub_data(GLsizeiptr offset, GLsizeiptr length, const GLvoid* data);
GLubyte* map(GLsizeiptr offset, GLsizeiptr length, access access_);
void unmap();

View file

@ -33,7 +33,7 @@ namespace gl
void capabilities::initialize()
{
int find_count = 16;
int find_count = 17;
int ext_count = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
@ -164,6 +164,13 @@ namespace gl
find_count--;
continue;
}
if (check(ext_name, "GL_AMD_pinned_memory"))
{
AMD_pinned_memory = true;
find_count--;
continue;
}
}
// Set GLSL version

View file

@ -25,6 +25,7 @@ namespace gl
bool EXT_dsa_supported = false;
bool EXT_depth_bounds_test = false;
bool AMD_pinned_memory = false;
bool ARB_dsa_supported = false;
bool ARB_bindless_texture_supported = false;
bool ARB_buffer_storage_supported = false;

View file

@ -79,4 +79,12 @@ namespace gl
{
glInsertEventMarkerEXT(static_cast<GLsizei>(strlen(label)), label);
}
// Checks if GL state is still valid
static inline void check_state()
{
// GL_OUT_OF_MEMORY invalidates the OpenGL context and is actually the GL version of DEVICE_LOST.
// This spec workaround allows it to be abused by ISVs to indicate a broken GL context.
ensure(glGetError() != GL_OUT_OF_MEMORY);
}
}

View file

@ -242,14 +242,14 @@ namespace gl
}
}
void scratch_ring_buffer::create(buffer::target target_, u64 size)
void scratch_ring_buffer::create(buffer::target target_, u64 size, u32 usage_flags)
{
if (m_storage)
{
remove();
}
m_storage.create(target_, size, nullptr, gl::buffer::memory_type::local, GL_STATIC_COPY);
m_storage.create(target_, size, nullptr, gl::buffer::memory_type::local, usage_flags);
}
void scratch_ring_buffer::remove()

View file

@ -103,7 +103,7 @@ namespace gl
scratch_ring_buffer(const scratch_ring_buffer&) = delete;
~scratch_ring_buffer();
void create(buffer::target _target, u64 size);
void create(buffer::target _target, u64 size, u32 usage_flags = 0);
void remove();
u32 alloc(u32 size, u32 alignment);

View file

@ -80,7 +80,7 @@ namespace gl
if (!m_ubo)
{
ensure(compiled);
m_ubo.create(gl::buffer::target::uniform, push_buffer_size, nullptr, gl::buffer::memory_type::local, GL_DYNAMIC_COPY);
m_ubo.create(gl::buffer::target::uniform, push_buffer_size, nullptr, gl::buffer::memory_type::local, gl::buffer::usage::dynamic_update);
// Statically bind the image sources
m_program.uniforms["InputTexture"] = GL_TEMP_IMAGE_SLOT(0);

View file

@ -0,0 +1,67 @@
#include "stdafx.h"
#include "RSXDMAWriter.h"
#include "Utilities//Thread.h"
#include <util/asm.hpp>
namespace rsx
{
void RSXDMAWriter::update()
{
if (m_dispatch_handlers.empty())
{
m_job_queue.clear();
return;
}
while (!m_job_queue.empty())
{
const auto job = m_job_queue.front();
if (const auto dispatch = m_dispatch_handlers.find(job.dispatch_class);
dispatch == m_dispatch_handlers.end() || dispatch->second.handler(m_host_context_ptr, &job))
{
// No handler registered, or callback consumed the job
m_job_queue.pop_front();
continue;
}
// Dispatcher found and rejected the job. Stop, we'll try again later.
break;
}
}
void RSXDMAWriter::register_handler(host_dispatch_handler_t handler)
{
m_dispatch_handlers[handler.dispatch_class] = handler;
}
void RSXDMAWriter::deregister_handler(int dispatch_class)
{
m_dispatch_handlers.erase(dispatch_class);
}
void RSXDMAWriter::enqueue(const host_gpu_write_op_t& request)
{
m_job_queue.push_back(request);
}
void RSXDMAWriter::drain_label_queue()
{
if (!m_host_context_ptr)
{
return;
}
// FIXME: This is a busy wait, consider yield to improve responsiveness on weak devices.
while (!m_host_context_ptr->in_flight_commands_completed())
{
utils::pause();
if (thread_ctrl::state() == thread_state::aborting)
{
break;
}
}
}
}

View file

@ -0,0 +1,115 @@
#pragma once
#include <util/types.hpp>
#include <unordered_map>
#include <functional>
#include <deque>
namespace rsx
{
struct host_gpu_context_t
{
u64 magic = 0xCAFEBABE;
u64 event_counter = 0;
u64 texture_load_request_event = 0;
u64 texture_load_complete_event = 0;
u64 last_label_acquire_event = 0;
u64 last_label_release2_event = 0;
u64 commands_complete_event = 0;
inline u64 inc_counter() volatile
{
// Workaround for volatile increment warning. GPU can see this value directly, but currently we do not modify it on the device.
event_counter = event_counter + 1;
return event_counter;
}
inline bool in_flight_commands_completed() const volatile
{
return last_label_release2_event <= commands_complete_event;
}
inline bool texture_loads_completed() const volatile
{
// Return true if all texture load requests are done.
return texture_load_complete_event == texture_load_request_event;
}
inline bool has_unflushed_texture_loads() const volatile
{
return texture_load_request_event > last_label_release2_event;
}
inline u64 on_texture_load_acquire() volatile
{
texture_load_request_event = inc_counter();
return texture_load_request_event;
}
inline void on_texture_load_release() volatile
{
// Normally released by the host device, but implemented nonetheless for software fallback
texture_load_complete_event = texture_load_request_event;
}
inline u64 on_label_acquire() volatile
{
last_label_acquire_event = inc_counter();
return last_label_acquire_event;
}
inline void on_label_release() volatile
{
last_label_release2_event = last_label_acquire_event;
}
inline bool needs_label_release() const volatile
{
return last_label_acquire_event > last_label_release2_event;
}
};
struct host_gpu_write_op_t
{
int dispatch_class = 0;
void* userdata = nullptr;
};
struct host_dispatch_handler_t
{
int dispatch_class = 0;
std::function<bool(const volatile host_gpu_context_t*, const host_gpu_write_op_t*)> handler;
};
class RSXDMAWriter
{
public:
RSXDMAWriter(void* mem)
: m_host_context_ptr(new (mem)host_gpu_context_t)
{}
RSXDMAWriter(host_gpu_context_t* pctx)
: m_host_context_ptr(pctx)
{}
void update();
void register_handler(host_dispatch_handler_t handler);
void deregister_handler(int dispatch_class);
void enqueue(const host_gpu_write_op_t& request);
void drain_label_queue();
volatile host_gpu_context_t* host_ctx() const
{
return m_host_context_ptr;
}
private:
std::unordered_map<int, host_dispatch_handler_t> m_dispatch_handlers;
volatile host_gpu_context_t* m_host_context_ptr = nullptr;
std::deque<host_gpu_write_op_t> m_job_queue;
};
}

View file

@ -44,7 +44,7 @@ namespace rsx
RSX(ctx)->flush_fifo();
}
u64 start = rsx::uclock();
u64 start = get_system_time();
u64 last_check_val = start;
while (sema != arg)
@ -57,7 +57,7 @@ namespace rsx
if (const auto tdr = static_cast<u64>(g_cfg.video.driver_recovery_timeout))
{
const u64 current = rsx::uclock();
const u64 current = get_system_time();
if (current - last_check_val > 20'000)
{
@ -81,7 +81,7 @@ namespace rsx
}
RSX(ctx)->fifo_wake_delay();
RSX(ctx)->performance_counters.idle_time += (rsx::uclock() - start);
RSX(ctx)->performance_counters.idle_time += (get_system_time() - start);
}
void semaphore_release(context* ctx, u32 /*reg*/, u32 arg)

View file

@ -25,11 +25,11 @@ namespace rsx
{
if (m_last_update_timestamp_us == 0)
{
m_last_update_timestamp_us = rsx::uclock();
m_last_update_timestamp_us = get_system_time();
}
else
{
const auto now = rsx::uclock();
const auto now = get_system_time();
m_current_frame_duration_us += (now - m_last_update_timestamp_us);
m_last_update_timestamp_us = now;
}

View file

@ -13,7 +13,7 @@ namespace rsx
return duration;
}
return rsx::uclock() + duration;
return get_system_time() + duration;
}
template <typename T>
@ -168,7 +168,7 @@ namespace rsx
void message::update_queue(std::deque<message_item>& vis_set, std::deque<message_item>& ready_set, message_pin_location origin)
{
const u64 cur_time = rsx::uclock();
const u64 cur_time = get_system_time();
for (auto it = vis_set.begin(); it != vis_set.end();)
{

View file

@ -499,7 +499,7 @@ namespace rsx
}
if (auto rsxthr = rsx::get_current_renderer(); rsxthr &&
(min_refresh_duration_us + rsxthr->last_host_flip_timestamp) < rsx::uclock())
(min_refresh_duration_us + rsxthr->last_host_flip_timestamp) < get_system_time())
{
rsxthr->async_flip_requested |= rsx::thread::flip_request::native_ui;
}

View file

@ -173,10 +173,10 @@ namespace rsx
break;
}
start_time = rsx::uclock();
start_time = get_system_time();
}
auto now = rsx::uclock();
auto now = get_system_time();
if (now - start_time >= 50u)
{
if (m_thread->is_stopped())
@ -186,7 +186,7 @@ namespace rsx
m_thread->cpu_wait({});
const auto then = std::exchange(now, rsx::uclock());
const auto then = std::exchange(now, get_system_time());
start_time = now;
m_thread->performance_counters.idle_time += now - then;
}
@ -623,7 +623,7 @@ namespace rsx
{
if (performance_counters.state == FIFO::state::running)
{
performance_counters.FIFO_idle_timestamp = rsx::uclock();
performance_counters.FIFO_idle_timestamp = get_system_time();
performance_counters.state = FIFO::state::nop;
}
@ -633,7 +633,7 @@ namespace rsx
{
if (performance_counters.state == FIFO::state::running)
{
performance_counters.FIFO_idle_timestamp = rsx::uclock();
performance_counters.FIFO_idle_timestamp = get_system_time();
performance_counters.state = FIFO::state::empty;
}
else
@ -668,7 +668,7 @@ namespace rsx
//Jump to self. Often preceded by NOP
if (performance_counters.state == FIFO::state::running)
{
performance_counters.FIFO_idle_timestamp = rsx::uclock();
performance_counters.FIFO_idle_timestamp = get_system_time();
sync_point_request.release(true);
}
@ -749,7 +749,7 @@ namespace rsx
}
// Update performance counters with time spent in idle mode
performance_counters.idle_time += (rsx::uclock() - performance_counters.FIFO_idle_timestamp);
performance_counters.idle_time += (get_system_time() - performance_counters.FIFO_idle_timestamp);
}
do

View file

@ -1,10 +1,6 @@
#include "stdafx.h"
#include "RSXThread.h"
#include "Emu/Cell/PPUCallback.h"
#include "Emu/Cell/SPUThread.h"
#include "Emu/Cell/timers.hpp"
#include "Capture/rsx_capture.h"
#include "Common/BufferUtils.h"
#include "Common/buffer_stream.hpp"
@ -13,9 +9,17 @@
#include "Common/time.hpp"
#include "Core/RSXReservationLock.hpp"
#include "Core/RSXEngLock.hpp"
#include "Host/RSXDMAWriter.h"
#include "NV47/HW/context.h"
#include "Program/GLSLCommon.h"
#include "rsx_methods.h"
#include "gcm_printing.h"
#include "RSXDisAsm.h"
#include "Emu/Cell/PPUCallback.h"
#include "Emu/Cell/SPUThread.h"
#include "Emu/Cell/timers.hpp"
#include "Emu/Cell/lv2/sys_event.h"
#include "Emu/Cell/lv2/sys_time.h"
#include "Emu/Cell/Modules/cellGcmSys.h"
@ -23,11 +27,10 @@
#include "Overlays/overlay_perf_metrics.h"
#include "Overlays/overlay_debug_overlay.h"
#include "Overlays/overlay_message.h"
#include "Program/GLSLCommon.h"
#include "Utilities/date_time.h"
#include "Utilities/StrUtil.h"
#include "Crypto/unzip.h"
#include "NV47/HW/context.h"
#include "util/asm.hpp"
@ -1021,7 +1024,7 @@ namespace rsx
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
fifo_ctrl->set_get(ctrl->get);
last_guest_flip_timestamp = rsx::uclock() - 1000000;
last_guest_flip_timestamp = get_system_time() - 1000000;
vblank_count = 0;
@ -1101,7 +1104,7 @@ namespace rsx
if (Emu.IsPaused())
{
// Save the difference before pause
start_time = rsx::uclock() - start_time;
start_time = get_system_time() - start_time;
while (Emu.IsPaused() && !is_stopped())
{
@ -1109,7 +1112,7 @@ namespace rsx
}
// Restore difference
start_time = rsx::uclock() - start_time;
start_time = get_system_time() - start_time;
}
}
})));
@ -1162,6 +1165,11 @@ namespace rsx
// Update other sub-units
zcull_ctrl->update(this);
if (m_host_dma_ctrl)
{
m_host_dma_ctrl->update();
}
}
// Execute FIFO queue
@ -3049,7 +3057,7 @@ namespace rsx
}
}
last_host_flip_timestamp = rsx::uclock();
last_host_flip_timestamp = get_system_time();
}
void thread::check_zcull_status(bool framebuffer_swap)
@ -3291,7 +3299,7 @@ namespace rsx
{
bool kill_itself = g_cfg.core.rsx_fifo_accuracy == rsx_fifo_mode::as_ps3;
const u64 current_time = rsx::uclock();
const u64 current_time = get_system_time();
if (recovered_fifo_cmds_history.size() == 20u)
{
@ -3373,7 +3381,7 @@ namespace rsx
// Some cases do not need full delay
remaining = utils::aligned_div(remaining, div);
const u64 until = rsx::uclock() + remaining;
const u64 until = get_system_time() + remaining;
while (true)
{
@ -3404,7 +3412,7 @@ namespace rsx
busy_wait(100);
}
const u64 current = rsx::uclock();
const u64 current = get_system_time();
if (current >= until)
{
@ -3500,58 +3508,71 @@ namespace rsx
}
}
void thread::on_notify_memory_unmapped(u32 address, u32 size)
void thread::on_notify_pre_memory_unmapped(u32 address, u32 size, std::vector<std::pair<u64, u64>>& event_data)
{
if (rsx_thread_running && address < rsx::constants::local_mem_base)
{
if (!isHLE)
// Each bit represents io entry to be unmapped
u64 unmap_status[512 / 64]{};
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
{
// Each bit represents io entry to be unmapped
u64 unmap_status[512 / 64]{};
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
{
const u32 io = utils::rol32(iomap_table.io[ea], 32 - 20);
if (io + 1)
{
unmap_status[io / 64] |= 1ull << (io & 63);
iomap_table.ea[io].release(-1);
iomap_table.io[ea].release(-1);
}
}
for (u32 i = 0; i < std::size(unmap_status); i++)
{
// TODO: Check order when sending multiple events
if (u64 to_unmap = unmap_status[i])
{
// Each 64 entries are grouped by a bit
const u64 io_event = SYS_RSX_EVENT_UNMAPPED_BASE << i;
send_event(0, io_event, to_unmap);
}
}
}
else
{
// TODO: Fix this
u32 ea = address >> 20, io = iomap_table.io[ea];
const u32 io = utils::rol32(iomap_table.io[ea], 32 - 20);
if (io + 1)
{
io >>= 20;
auto& cfg = g_fxo->get<gcm_config>();
std::lock_guard lock(cfg.gcmio_mutex);
for (const u32 end = ea + (size >> 20); ea < end;)
{
cfg.offsetTable.ioAddress[ea++] = 0xFFFF;
cfg.offsetTable.eaAddress[io++] = 0xFFFF;
}
unmap_status[io / 64] |= 1ull << (io & 63);
iomap_table.io[ea].release(-1);
iomap_table.ea[io].release(-1);
}
}
auto& cfg = g_fxo->get<gcm_config>();
std::unique_lock<shared_mutex> hle_lock;
for (u32 i = 0; i < std::size(unmap_status); i++)
{
// TODO: Check order when sending multiple events
if (u64 to_unmap = unmap_status[i])
{
if (isHLE)
{
if (!hle_lock)
{
hle_lock = std::unique_lock{cfg.gcmio_mutex};
}
int bit = 0;
while (to_unmap)
{
bit = (std::countr_zero<u64>(utils::rol64(to_unmap, 0 - bit)) + bit);
to_unmap &= ~(1ull << bit);
constexpr u16 null_entry = 0xFFFF;
const u32 ea = std::exchange(cfg.offsetTable.eaAddress[(i * 64 + bit)], null_entry);
if (ea < (rsx::constants::local_mem_base >> 20))
{
cfg.offsetTable.eaAddress[ea] = null_entry;
}
}
continue;
}
// Each 64 entries are grouped by a bit
const u64 io_event = SYS_RSX_EVENT_UNMAPPED_BASE << i;
event_data.emplace_back(io_event, to_unmap);
}
}
if (hle_lock)
{
hle_lock.unlock();
}
// Pause RSX thread momentarily to handle unmapping
eng_lock elock(this);
@ -3581,6 +3602,14 @@ namespace rsx
}
}
void thread::on_notify_post_memory_unmapped(u64 event_data1, u64 event_data2)
{
if (!isHLE)
{
send_event(0, event_data1, event_data2);
}
}
// NOTE: m_mtx_task lock must be acquired before calling this method
void thread::handle_invalidated_memory_range()
{
@ -3646,7 +3675,7 @@ namespace rsx
//Average load over around 30 frames
if (!performance_counters.last_update_timestamp || performance_counters.sampled_frames > 30)
{
const auto timestamp = rsx::uclock();
const auto timestamp = get_system_time();
const auto idle = performance_counters.idle_time.load();
const auto elapsed = timestamp - performance_counters.last_update_timestamp;
@ -3930,7 +3959,7 @@ namespace rsx
flip(m_queued_flip);
last_guest_flip_timestamp = rsx::uclock() - 1000000;
last_guest_flip_timestamp = get_system_time() - 1000000;
flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE;
m_queued_flip.in_progress = false;

View file

@ -42,6 +42,8 @@ extern rsx::frame_capture_data frame_capture;
namespace rsx
{
class RSXDMAWriter;
struct context;
namespace overlays
@ -212,6 +214,9 @@ namespace rsx
// Context
context* m_ctx = nullptr;
// Host DMA
std::unique_ptr<RSXDMAWriter> m_host_dma_ctrl;
public:
atomic_t<u64> new_get_put = u64{umax};
u32 restore_point = 0;
@ -494,11 +499,18 @@ namespace rsx
*/
void on_notify_memory_mapped(u32 address_base, u32 size);
/**
* Notify that a section of memory is to be unmapped
* Any data held in the defined range is discarded
* Sets optional unmap event data
*/
void on_notify_pre_memory_unmapped(u32 address_base, u32 size, std::vector<std::pair<u64, u64>>& event_data);
/**
* Notify that a section of memory has been unmapped
* Any data held in the defined range is discarded
*/
void on_notify_memory_unmapped(u32 address_base, u32 size);
void on_notify_post_memory_unmapped(u64 event_data1, u64 event_data2);
/**
* Notify to check internal state during semaphore wait

View file

@ -542,7 +542,7 @@ namespace rsx
}
}
if (m_tsc = rsx::uclock(); m_tsc < m_next_tsc)
if (m_tsc = get_system_time(); m_tsc < m_next_tsc)
{
return;
}

View file

@ -15,6 +15,7 @@
#include "vkutils/scratch.h"
#include "Emu/RSX/rsx_methods.h"
#include "Emu/RSX/Host/RSXDMAWriter.h"
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
#include "Emu/Memory/vm_locking.h"
@ -867,8 +868,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0,
VMM_ALLOCATION_POOL_SYSTEM);
m_host_data_ptr = new (m_host_object_data->map(0, 0x100000)) vk::host_data_t();
ensure(m_host_data_ptr->magic == 0xCAFEBABE);
m_host_dma_ctrl = std::make_unique<rsx::RSXDMAWriter>(m_host_object_data->map(0, 0x10000));
}
else
{
@ -1257,7 +1257,7 @@ void VKGSRender::notify_tile_unbound(u32 tile)
if (false)
{
u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location);
on_notify_memory_unmapped(addr, tiles[tile].size);
on_notify_pre_memory_unmapped(addr, tiles[tile].size, *std::make_unique<std::vector<std::pair<u64, u64>>>());
m_rtts.invalidate_surface_address(addr, false);
}
@ -1784,6 +1784,11 @@ void VKGSRender::flush_command_queue(bool hard_sync, bool do_not_switch)
m_current_command_buffer->begin();
}
std::pair<volatile vk::host_data_t*, VkBuffer> VKGSRender::map_host_object_data() const
{
return { m_host_dma_ctrl->host_ctx(), m_host_object_data->value };
}
bool VKGSRender::release_GCM_label(u32 address, u32 args)
{
if (!backend_config.supports_host_gpu_labels)
@ -1791,25 +1796,13 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
return false;
}
auto drain_label_queue = [this]()
{
while (m_host_data_ptr->last_label_release_event > m_host_data_ptr->commands_complete_event)
{
utils::pause();
auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());
if (thread_ctrl::state() == thread_state::aborting)
{
break;
}
}
};
ensure(m_host_data_ptr);
if (m_host_data_ptr->texture_load_complete_event == m_host_data_ptr->texture_load_request_event)
if (host_ctx->texture_loads_completed())
{
// All texture loads already seen by the host GPU
// Wait for all previously submitted labels to be flushed
drain_label_queue();
m_host_dma_ctrl->drain_label_queue();
return false;
}
@ -1821,13 +1814,13 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
// NVIDIA GPUs can disappoint when DMA blocks straddle VirtualAlloc boundaries.
// Take the L and try the fallback.
rsx_log.warning("Host label update at 0x%x was not possible.", address);
drain_label_queue();
m_host_dma_ctrl->drain_label_queue();
return false;
}
m_host_data_ptr->last_label_release_event = m_host_data_ptr->inc_counter();
const auto release_event_id = host_ctx->on_label_acquire();
if (m_host_data_ptr->texture_load_request_event > m_host_data_ptr->last_label_submit_event)
if (host_ctx->has_unflushed_texture_loads())
{
if (vk::is_renderpass_open(*m_current_command_buffer))
{
@ -1842,17 +1835,31 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
auto cmd = m_secondary_cb_list.next();
cmd->begin();
vkCmdUpdateBuffer(*cmd, mapping.second->value, mapping.first, 4, &write_data);
vkCmdUpdateBuffer(*cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::commands_complete_event), 8, const_cast<u64*>(&m_host_data_ptr->last_label_release_event));
vkCmdUpdateBuffer(*cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::commands_complete_event), 8, &release_event_id);
cmd->end();
vk::queue_submit_t submit_info = { m_device->get_graphics_queue(), nullptr };
cmd->submit(submit_info);
m_host_data_ptr->last_label_submit_event = m_host_data_ptr->last_label_release_event;
host_ctx->on_label_release();
}
return true;
}
void VKGSRender::on_guest_texture_read(const vk::command_buffer& cmd)
{
if (!backend_config.supports_host_gpu_labels)
{
return;
}
// Queue a sync update on the CB doing the load
auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());
const auto event_id = host_ctx->on_texture_load_acquire();
vkCmdUpdateBuffer(cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::texture_load_complete_event), sizeof(u64), &event_id);
}
void VKGSRender::sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload)
{
rsx::thread::sync_hint(hint, payload);
@ -1885,7 +1892,7 @@ void VKGSRender::sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hi
// OK, cell will be accessing the results, probably.
// Try to avoid flush spam, it is more costly to flush the CB than it is to just upload the vertex data
// This is supposed to be an optimization afterall.
const auto now = rsx::uclock();
const auto now = get_system_time();
if ((now - m_last_cond_render_eval_hint) > 50)
{
// Schedule a sync on the next loop iteration
@ -2516,15 +2523,15 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
}
if (m_host_data_ptr && m_host_data_ptr->last_label_release_event > m_host_data_ptr->last_label_submit_event)
if (m_host_dma_ctrl && m_host_dma_ctrl->host_ctx()->needs_label_release())
{
vkCmdUpdateBuffer(*m_current_command_buffer,
m_host_object_data->value,
::offset32(&vk::host_data_t::commands_complete_event),
sizeof(u64),
const_cast<u64*>(&m_host_data_ptr->last_label_release_event));
const_cast<u64*>(&m_host_dma_ctrl->host_ctx()->last_label_acquire_event));
m_host_data_ptr->last_label_submit_event = m_host_data_ptr->last_label_release_event;
m_host_dma_ctrl->host_ctx()->on_label_release();
}
m_current_command_buffer->end();

View file

@ -1,6 +1,4 @@
#pragma once
#include "Emu/RSX/GSRender.h"
#include "Emu/Cell/timers.hpp"
#include "upscalers/upscaling.h"
@ -19,15 +17,23 @@
#include "VKFramebuffer.h"
#include "VKShaderInterpreter.h"
#include "VKQueryPool.h"
#include "../GCM.h"
#include "util/asm.hpp"
#include "Emu/RSX/GCM.h"
#include "Emu/RSX/GSRender.h"
#include "Emu/RSX/Host/RSXDMAWriter.h"
#include <thread>
#include <optional>
using namespace vk::vmm_allocation_pool_; // clang workaround.
using namespace vk::upscaling_flags_; // ditto
namespace vk
{
using host_data_t = rsx::host_gpu_context_t;
}
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private:
@ -118,7 +124,6 @@ private:
vk::command_buffer_chain<VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
vk::command_buffer_chunk* m_current_command_buffer = nullptr;
volatile vk::host_data_t* m_host_data_ptr = nullptr;
std::unique_ptr<vk::buffer> m_host_object_data;
vk::descriptor_pool m_descriptor_pool;
@ -274,7 +279,8 @@ public:
void end_conditional_rendering() override;
// Host sync object
inline std::pair<volatile vk::host_data_t*, VkBuffer> map_host_object_data() { return { m_host_data_ptr, m_host_object_data->value }; }
std::pair<volatile vk::host_data_t*, VkBuffer> map_host_object_data() const;
void on_guest_texture_read(const vk::command_buffer& cmd);
// GRAPH backend
void patch_transform_constants(rsx::context* ctx, u32 index, u32 count) override;

View file

@ -6,10 +6,12 @@
#include "Emu/RSX/Common/simple_array.hpp"
#include "Emu/RSX/rsx_utils.h"
#include "Emu/RSX/rsx_cache.h"
#include "Utilities/mutex.h"
#include "util/asm.hpp"
#include <optional>
#include <thread>
// Initial heap allocation values. The heaps are growable and will automatically increase in size to accomodate demands
#define VK_ATTRIB_RING_BUFFER_SIZE_M 64

View file

@ -1240,15 +1240,9 @@ namespace vk
dst_image->queue_release(cmd2, cmd.get_queue_family(), dst_image->current_layout);
}
if (auto rsxthr = rsx::get_current_renderer();
rsxthr->get_backend_config().supports_host_gpu_labels)
if (auto rsxthr = static_cast<VKGSRender*>(rsx::get_current_renderer()))
{
// Queue a sync update on the CB doing the load
auto [host_data, host_buffer] = static_cast<VKGSRender*>(rsxthr)->map_host_object_data();
ensure(host_data);
const auto event_id = host_data->inc_counter();
host_data->texture_load_request_event = event_id;
vkCmdUpdateBuffer(cmd2, host_buffer, ::offset32(&vk::host_data_t::texture_load_complete_event), sizeof(u64), &event_id);
rsxthr->on_guest_texture_read(cmd2);
}
}

View file

@ -18,25 +18,6 @@ namespace vk
gpu = 1
};
struct host_data_t // Pick a better name
{
u64 magic = 0xCAFEBABE;
u64 event_counter = 0;
u64 texture_load_request_event = 0;
u64 texture_load_complete_event = 0;
u64 last_label_release_event = 0;
u64 last_label_submit_event = 0;
u64 commands_complete_event = 0;
u64 last_label_request_timestamp = 0;
inline u64 inc_counter() volatile
{
// Workaround for volatile increment warning. GPU can see this value directly, but currently we do not modify it on the device.
event_counter = event_counter + 1;
return event_counter;
}
};
struct fence
{
atomic_t<bool> flushed = false;

View file

@ -52,6 +52,7 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLCompute.h" />
<ClInclude Include="Emu\RSX\GL\GLDMA.h" />
<ClInclude Include="Emu\RSX\GL\GLOverlays.h" />
<ClInclude Include="Emu\RSX\GL\GLPipelineCompiler.h" />
<ClInclude Include="Emu\RSX\GL\GLCommonDecompiler.h" />
@ -88,6 +89,7 @@
<ItemGroup>
<ClCompile Include="Emu\RSX\GL\GLCommonDecompiler.cpp" />
<ClCompile Include="Emu\RSX\GL\GLCompute.cpp" />
<ClCompile Include="Emu\RSX\GL\GLDMA.cpp" />
<ClCompile Include="Emu\RSX\GL\GLDraw.cpp" />
<ClCompile Include="Emu\RSX\GL\GLFragmentProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />

View file

@ -47,6 +47,7 @@
<ClCompile Include="Emu\RSX\GL\upscalers\fsr1\fsr_pass.cpp">
<Filter>upscalers\fsr1</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\GL\GLDMA.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />
@ -118,6 +119,7 @@
<ClInclude Include="Emu\RSX\GL\upscalers\fsr_pass.h">
<Filter>upscalers</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\GL\GLDMA.h" />
</ItemGroup>
<ItemGroup>
<Filter Include="glutils">

View file

@ -1219,7 +1219,7 @@ void evdev_joystick_handler::apply_input_events(const std::shared_ptr<Pad>& pad)
s32 stick_val[4]{};
// Translate any corresponding keycodes to our two sticks. (ignoring thresholds for now)
for (int i = 0; i < static_cast<int>(pad->m_sticks.size()); i++)
for (usz i = 0; i < pad->m_sticks.size(); i++)
{
bool pressed{}; // unused
u16 val_min{};
@ -1424,23 +1424,23 @@ bool evdev_joystick_handler::bindPadToDevice(std::shared_ptr<Pad> pad)
m_dev->axis_right[2] = find_buttons(cfg->rs_up);
m_dev->axis_right[3] = find_buttons(cfg->rs_down);
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_X, m_dev->axis_left[1], m_dev->axis_left[0]);
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_Y, m_dev->axis_left[3], m_dev->axis_left[2]);
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_X, m_dev->axis_right[1], m_dev->axis_right[0]);
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_Y, m_dev->axis_right[3], m_dev->axis_right[2]);
pad->m_sticks[0] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_X, m_dev->axis_left[1], m_dev->axis_left[0]);
pad->m_sticks[1] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_Y, m_dev->axis_left[3], m_dev->axis_left[2]);
pad->m_sticks[2] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_X, m_dev->axis_right[1], m_dev->axis_right[0]);
pad->m_sticks[3] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_Y, m_dev->axis_right[3], m_dev->axis_right[2]);
m_dev->axis_motion[0] = find_motion_button(cfg->motion_sensor_x);
m_dev->axis_motion[1] = find_motion_button(cfg->motion_sensor_y);
m_dev->axis_motion[2] = find_motion_button(cfg->motion_sensor_z);
m_dev->axis_motion[3] = find_motion_button(cfg->motion_sensor_g);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_X, m_dev->axis_motion[0].code, m_dev->axis_motion[0].mirrored, m_dev->axis_motion[0].shift, DEFAULT_MOTION_X);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_Y, m_dev->axis_motion[1].code, m_dev->axis_motion[1].mirrored, m_dev->axis_motion[1].shift, DEFAULT_MOTION_Y);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_Z, m_dev->axis_motion[2].code, m_dev->axis_motion[2].mirrored, m_dev->axis_motion[2].shift, DEFAULT_MOTION_Z);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_G, m_dev->axis_motion[3].code, m_dev->axis_motion[3].mirrored, m_dev->axis_motion[3].shift, DEFAULT_MOTION_G);
pad->m_sensors[0] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_X, m_dev->axis_motion[0].code, m_dev->axis_motion[0].mirrored, m_dev->axis_motion[0].shift, DEFAULT_MOTION_X);
pad->m_sensors[1] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_Y, m_dev->axis_motion[1].code, m_dev->axis_motion[1].mirrored, m_dev->axis_motion[1].shift, DEFAULT_MOTION_Y);
pad->m_sensors[2] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_Z, m_dev->axis_motion[2].code, m_dev->axis_motion[2].mirrored, m_dev->axis_motion[2].shift, DEFAULT_MOTION_Z);
pad->m_sensors[3] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_G, m_dev->axis_motion[3].code, m_dev->axis_motion[3].mirrored, m_dev->axis_motion[3].shift, DEFAULT_MOTION_G);
pad->m_vibrateMotors.emplace_back(true, 0);
pad->m_vibrateMotors.emplace_back(false, 0);
pad->m_vibrateMotors[0] = VibrateMotor(true, 0);
pad->m_vibrateMotors[1] = VibrateMotor(false, 0);
if (std::shared_ptr<EvdevDevice> evdev_device = add_device(player_config->device, false))
{

View file

@ -314,11 +314,6 @@ void keyboard_pad_handler::release_all_keys()
for (usz i = 0; i < pad.m_sticks.size(); i++)
{
if (i >= max_sticks)
{
input_log.fatal("Too many sticks (%d vs %d)", pad.m_sticks.size(), max_sticks);
break;
}
m_stick_min[i] = 0;
m_stick_max[i] = 128;
m_stick_val[i] = 128;
@ -1067,18 +1062,18 @@ bool keyboard_pad_handler::bindPadToDevice(std::shared_ptr<Pad> pad)
pad->m_buttons.emplace_back(CELL_PAD_BTN_OFFSET_PRESS_PIGGYBACK, find_keys(cfg->tilt_right), CELL_PAD_CTRL_PRESS_R1);
}
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_X, find_keys(cfg->ls_left), find_keys(cfg->ls_right));
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_Y, find_keys(cfg->ls_up), find_keys(cfg->ls_down));
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_X, find_keys(cfg->rs_left), find_keys(cfg->rs_right));
pad->m_sticks.emplace_back(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_Y, find_keys(cfg->rs_up), find_keys(cfg->rs_down));
pad->m_sticks[0] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_X, find_keys(cfg->ls_left), find_keys(cfg->ls_right));
pad->m_sticks[1] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_LEFT_Y, find_keys(cfg->ls_up), find_keys(cfg->ls_down));
pad->m_sticks[2] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_X, find_keys(cfg->rs_left), find_keys(cfg->rs_right));
pad->m_sticks[3] = AnalogStick(CELL_PAD_BTN_OFFSET_ANALOG_RIGHT_Y, find_keys(cfg->rs_up), find_keys(cfg->rs_down));
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_X, 0, 0, 0, DEFAULT_MOTION_X);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_Y, 0, 0, 0, DEFAULT_MOTION_Y);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_Z, 0, 0, 0, DEFAULT_MOTION_Z);
pad->m_sensors.emplace_back(CELL_PAD_BTN_OFFSET_SENSOR_G, 0, 0, 0, DEFAULT_MOTION_G);
pad->m_sensors[0] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_X, 0, 0, 0, DEFAULT_MOTION_X);
pad->m_sensors[1] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_Y, 0, 0, 0, DEFAULT_MOTION_Y);
pad->m_sensors[2] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_Z, 0, 0, 0, DEFAULT_MOTION_Z);
pad->m_sensors[3] = AnalogSensor(CELL_PAD_BTN_OFFSET_SENSOR_G, 0, 0, 0, DEFAULT_MOTION_G);
pad->m_vibrateMotors.emplace_back(true, 0);
pad->m_vibrateMotors.emplace_back(false, 0);
pad->m_vibrateMotors[0] = VibrateMotor(true, 0);
pad->m_vibrateMotors[1] = VibrateMotor(false, 0);
m_bindings.emplace_back(pad, nullptr, nullptr);
m_pads_internal.push_back(*pad);
@ -1258,7 +1253,7 @@ void keyboard_pad_handler::process()
// Normalize and apply pad squircling
// Copy sticks first. We don't want to modify the raw internal values
std::vector<AnalogStick> squircled_sticks = pad_internal.m_sticks;
std::array<AnalogStick, 4> squircled_sticks = pad_internal.m_sticks;
// Apply squircling
if (cfg->lpadsquircling != 0)
@ -1278,6 +1273,6 @@ void keyboard_pad_handler::process()
}
pad->m_buttons = pad_internal.m_buttons;
pad->m_sticks = std::move(squircled_sticks);
pad->m_sticks = squircled_sticks; // Don't use std::move here. We assign values lockless, so std::move can lead to segfaults.
}
}

View file

@ -212,11 +212,8 @@ void pad_thread::SetRumble(const u32 pad, u8 large_motor, bool small_motor)
if (pad >= m_pads.size())
return;
if (m_pads[pad]->m_vibrateMotors.size() >= 2)
{
m_pads[pad]->m_vibrateMotors[0].m_value = large_motor;
m_pads[pad]->m_vibrateMotors[1].m_value = small_motor ? 255 : 0;
}
m_pads[pad]->m_vibrateMotors[0].m_value = large_motor;
m_pads[pad]->m_vibrateMotors[1].m_value = small_motor ? 255 : 0;
}
void pad_thread::SetIntercepted(bool intercepted)

View file

@ -104,6 +104,7 @@
<ClCompile Include="Emu\perf_monitor.cpp" />
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\reg_context.cpp" />
<ClCompile Include="Emu\RSX\NV47\HW\common.cpp" />
@ -617,6 +618,7 @@
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h" />
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h" />
<ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h" />
<ClInclude Include="Emu\RSX\NV47\FW\draw_call.hpp" />
<ClInclude Include="Emu\RSX\NV47\FW\draw_call.inc.h" />
<ClInclude Include="Emu\RSX\NV47\FW\GRAPH_backend.h" />

View file

@ -1300,6 +1300,9 @@
<ClCompile Include="Emu\RSX\gcm_enums.cpp">
<Filter>Emu\GPU\RSX\NV47\FW</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -2620,6 +2623,9 @@
<ClInclude Include="Emu\RSX\color_utils.h">
<Filter>Emu\GPU\RSX\Utils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">

View file

@ -252,11 +252,12 @@ LOG_CHANNEL(q_debug, "QDEBUG");
struct fatal_error_listener final : logs::listener
{
public:
~fatal_error_listener() override = default;
void log(u64 /*stamp*/, const logs::message& msg, const std::string& prefix, const std::string& text) override
{
if (msg <= logs::level::fatal)
if (msg == logs::level::fatal || (msg == logs::level::always && m_log_always))
{
std::string _msg = "RPCS3: ";
@ -276,10 +277,17 @@ struct fatal_error_listener final : logs::listener
_msg += '\n';
// If launched from CMD
utils::attach_console(utils::console_stream::std_err, false);
utils::attach_console(msg == logs::level::fatal ? utils::console_stream::std_err : utils::console_stream::std_out, false);
// Output to error stream as is
utils::output_stderr(_msg);
if (msg == logs::level::fatal)
{
utils::output_stderr(_msg);
}
else
{
std::cout << _msg;
}
#ifdef _WIN32
if (IsDebuggerPresent())
@ -295,6 +303,14 @@ struct fatal_error_listener final : logs::listener
}
}
}
void log_always(bool enabled)
{
m_log_always = enabled;
}
private:
bool m_log_always = false;
};
// Arguments that force a headless application (need to be checked in create_application)
@ -494,6 +510,7 @@ int main(int argc, char** argv)
}
const std::string lock_name = fs::get_cache_dir() + "RPCS3.buf";
const std::string log_name = fs::get_cache_dir() + "RPCS3.log";
static fs::file instance_lock;
@ -512,19 +529,19 @@ int main(int argc, char** argv)
{
if (fs::exists(lock_name))
{
report_fatal_error("Another instance of RPCS3 is running.\nClose it or kill its process, if necessary.");
report_fatal_error(fmt::format("Another instance of RPCS3 is running.\nClose it or kill its process, if necessary.\n'%s' still exists.", lock_name));
}
report_fatal_error("Cannot create RPCS3.log (access denied)."
report_fatal_error(fmt::format("Cannot create '%s' or '%s' (access denied).\n"
#ifdef _WIN32
"\nNote that RPCS3 cannot be installed in Program Files or similar directories with limited permissions."
"Note that RPCS3 cannot be installed in Program Files or similar directories with limited permissions."
#else
"\nPlease, check RPCS3 permissions in '~/.config/rpcs3'."
"Please, check RPCS3 permissions."
#endif
);
, log_name, lock_name));
}
report_fatal_error(fmt::format("Cannot create RPCS3.log (error %s)", fs::g_tls_error));
report_fatal_error(fmt::format("Cannot create'%s' or '%s' (error=%s)", log_name, lock_name, fs::g_tls_error));
}
#ifdef _WIN32
@ -552,9 +569,6 @@ int main(int argc, char** argv)
ensure(thread_ctrl::is_main(), "Not main thread");
// Initialize TSC freq (in case it isn't)
static_cast<void>(utils::get_tsc_freq());
// Initialize thread pool finalizer (on first use)
static_cast<void>(named_thread("", [](int) {}));
@ -568,10 +582,10 @@ int main(int argc, char** argv)
}
// Limit log size to ~25% of free space
log_file = logs::make_file_listener(fs::get_cache_dir() + "RPCS3.log", stats.avail_free / 4);
log_file = logs::make_file_listener(log_name, stats.avail_free / 4);
}
static std::unique_ptr<logs::listener> fatal_listener = std::make_unique<fatal_error_listener>();
static std::unique_ptr<fatal_error_listener> fatal_listener = std::make_unique<fatal_error_listener>();
logs::listener::add(fatal_listener.get());
{
@ -999,6 +1013,10 @@ int main(int argc, char** argv)
return 0;
}
// Enable console output of "always" log messages.
// Do this after parsing any Qt cli args that might open a window.
fatal_listener->log_always(true);
// Log unique ID
gui::utils::log_uuid();

View file

@ -28,7 +28,7 @@ namespace rpcs3
// Currently accessible by Windows and Linux build scripts, see implementations when doing MACOSX
const utils::version& get_version()
{
static constexpr utils::version version{ 0, 0, 33, utils::version_type::alpha, 1, RPCS3_GIT_VERSION };
static constexpr utils::version version{ 0, 0, 34, utils::version_type::alpha, 1, RPCS3_GIT_VERSION };
return version;
}

View file

@ -441,9 +441,9 @@ void emu_settings::EnhanceCheckBox(QCheckBox* checkbox, emu_settings_type type)
m_broken_types.insert(type);
}
connect(checkbox, &QCheckBox::stateChanged, this, [type, this](int val)
connect(checkbox, &QCheckBox::checkStateChanged, this, [type, this](Qt::CheckState val)
{
const std::string str = val != 0 ? "true" : "false";
const std::string str = val != Qt::Unchecked ? "true" : "false";
SetSetting(type, str);
});

View file

@ -14,6 +14,30 @@ game_list::game_list() : QTableWidget(), game_list_base()
};
}
void game_list::sync_header_actions(QList<QAction*>& actions, std::function<bool(int)> get_visibility)
{
ensure(get_visibility);
bool is_dirty = false;
for (int col = 0; col < actions.count(); ++col)
{
const bool is_hidden = !get_visibility(col);
actions[col]->setChecked(!is_hidden);
if (isColumnHidden(col) != is_hidden)
{
setColumnHidden(col, is_hidden);
is_dirty = true;
}
}
if (is_dirty)
{
fix_narrow_columns();
}
}
void game_list::create_header_actions(QList<QAction*>& actions, std::function<bool(int)> get_visibility, std::function<void(int, bool)> set_visibility)
{
ensure(get_visibility);
@ -48,6 +72,7 @@ void game_list::create_header_actions(QList<QAction*>& actions, std::function<bo
return;
}
}
setColumnHidden(col, !checked); // Negate because it's a set col hidden and we have menu say show.
set_visibility(col, checked);
@ -56,11 +81,9 @@ void game_list::create_header_actions(QList<QAction*>& actions, std::function<bo
fix_narrow_columns();
}
});
const bool vis = get_visibility(col);
actions[col]->setChecked(vis);
setColumnHidden(col, !vis);
}
sync_header_actions(actions, get_visibility);
}
void game_list::clear_list()

View file

@ -24,6 +24,7 @@ class game_list : public QTableWidget, public game_list_base
public:
game_list();
void sync_header_actions(QList<QAction*>& actions, std::function<bool(int)> get_visibility);
void create_header_actions(QList<QAction*>& actions, std::function<bool(int)> get_visibility, std::function<void(int, bool)> set_visibility);
void clear_list() override; // Use this instead of clearContents

View file

@ -230,12 +230,7 @@ void game_list_frame::LoadSettings()
m_show_custom_icons = m_gui_settings->GetValue(gui::gl_custom_icon).toBool();
m_play_hover_movies = m_gui_settings->GetValue(gui::gl_hover_gifs).toBool();
for (int col = 0; col < m_columnActs.count(); ++col)
{
const bool vis = m_gui_settings->GetGamelistColVisibility(static_cast<gui::game_list_columns>(col));
m_columnActs[col]->setChecked(vis);
m_game_list->setColumnHidden(col, !vis);
}
m_game_list->sync_header_actions(m_columnActs, [this](int col) { return m_gui_settings->GetGamelistColVisibility(static_cast<gui::game_list_columns>(col)); });
}
game_list_frame::~game_list_frame()
@ -915,6 +910,7 @@ void game_list_frame::OnRefreshFinished()
if (!std::exchange(m_initial_refresh_done, true))
{
m_game_list->restore_layout(m_gui_settings->GetValue(gui::gl_state).toByteArray());
m_game_list->sync_header_actions(m_columnActs, [this](int col) { return m_gui_settings->GetGamelistColVisibility(static_cast<gui::game_list_columns>(col)); });
}
// Emit signal and remove slots

View file

@ -160,9 +160,17 @@ bool gui_application::Init()
if (m_gui_settings->GetValue(gui::ib_show_welcome).toBool())
{
welcome_dialog* welcome = new welcome_dialog(m_gui_settings, false);
bool use_dark_theme = false;
connect(welcome, &QDialog::accepted, this, [&]()
{
use_dark_theme = welcome->does_user_want_dark_theme();
});
welcome->exec();
if (welcome->does_user_want_dark_theme())
if (use_dark_theme)
{
m_gui_settings->SetValue(gui::m_currentStylesheet, "Darker Style by TheMitoSan");
}

View file

@ -99,7 +99,7 @@ pad_motion_settings_dialog::pad_motion_settings_dialog(QDialog* parent, std::sha
m_shifts[i]->setRange(config->shift.min, config->shift.max);
m_shifts[i]->setValue(config->shift.get());
connect(m_mirrors[i], &QCheckBox::stateChanged, this, [this, i](int state)
connect(m_mirrors[i], &QCheckBox::checkStateChanged, this, [this, i](Qt::CheckState state)
{
std::lock_guard lock(m_config_mutex);
m_config_entries[i]->mirrored.set(state != Qt::Unchecked);

View file

@ -93,7 +93,7 @@ patch_manager_dialog::patch_manager_dialog(std::shared_ptr<gui_settings> gui_set
connect(ui->patch_tree, &QTreeWidget::currentItemChanged, this, &patch_manager_dialog::handle_item_selected);
connect(ui->patch_tree, &QTreeWidget::itemChanged, this, &patch_manager_dialog::handle_item_changed);
connect(ui->patch_tree, &QTreeWidget::customContextMenuRequested, this, &patch_manager_dialog::handle_custom_context_menu_requested);
connect(ui->cb_owned_games_only, &QCheckBox::stateChanged, this, &patch_manager_dialog::handle_show_owned_games_only);
connect(ui->cb_owned_games_only, &QCheckBox::checkStateChanged, this, &patch_manager_dialog::handle_show_owned_games_only);
connect(ui->configurable_selector, QOverload<int>::of(&QComboBox::currentIndexChanged), this, [this](int index)
{
if (index >= 0)
@ -1087,7 +1087,7 @@ void patch_manager_dialog::dropEvent(QDropEvent* event)
}
}
void patch_manager_dialog::handle_show_owned_games_only(int state)
void patch_manager_dialog::handle_show_owned_games_only(Qt::CheckState state)
{
m_show_owned_games_only = state == Qt::CheckState::Checked;
m_gui_settings->SetValue(gui::pm_show_owned, m_show_owned_games_only);

View file

@ -50,7 +50,7 @@ private Q_SLOTS:
void handle_item_changed(QTreeWidgetItem* item, int column);
void handle_config_value_changed(double value);
void handle_custom_context_menu_requested(const QPoint& pos);
void handle_show_owned_games_only(int state);
void handle_show_owned_games_only(Qt::CheckState state);
private:
void refresh(bool restore_layout = false);

View file

@ -1433,7 +1433,7 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
if (game)
ui->gb_DiskCacheClearing->setDisabled(true);
else
connect(ui->enableCacheClearing, &QCheckBox::stateChanged, ui->maximumCacheSize, &QSlider::setEnabled);
connect(ui->enableCacheClearing, &QCheckBox::checkStateChanged, ui->maximumCacheSize, &QSlider::setEnabled);
// Date Time Edit Box
m_emu_settings->EnhanceDateTimeEdit(ui->console_time_edit, emu_settings_type::ConsoleTimeOffset, tr("dd MMM yyyy HH:mm"), true, true, 15000);
@ -1580,7 +1580,7 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
ui->mfcDelayCommand->setChecked(m_emu_settings->GetSetting(emu_settings_type::MFCCommandsShuffling) == "1");
SubscribeTooltip(ui->mfcDelayCommand, tooltips.settings.mfc_delay_command);
connect(ui->mfcDelayCommand, &QCheckBox::stateChanged, [&](int val)
connect(ui->mfcDelayCommand, &QCheckBox::checkStateChanged, [&](Qt::CheckState val)
{
const std::string str = val != Qt::Unchecked ? "1" : "0";
m_emu_settings->SetSetting(emu_settings_type::MFCCommandsShuffling, str);

View file

@ -33,6 +33,7 @@
#include <QWheelEvent>
#include <QGuiApplication>
#include <QScreen>
#include <QTimeZone>
LOG_CHANNEL(gui_log, "GUI");
@ -522,6 +523,13 @@ void trophy_manager_dialog::RepaintUI(bool restore_layout)
//m_trophy_table->horizontalHeader()->resizeSections(QHeaderView::ResizeMode::ResizeToContents);
}
if (restore_layout)
{
// Make sure the actions and the headers are synced
m_game_table->sync_header_actions(m_game_column_acts, [this](int col) { return m_gui_settings->GetTrophyGamelistColVisibility(static_cast<gui::trophy_game_list_columns>(col)); });
m_trophy_table->sync_header_actions(m_trophy_column_acts, [this](int col) { return m_gui_settings->GetTrophylistColVisibility(static_cast<gui::trophy_list_columns>(col)); });
}
ApplyFilter();
// Show dialog and then paint gui in order to adjust headers correctly
@ -543,6 +551,10 @@ void trophy_manager_dialog::HandleRepaintUiRequest()
m_game_table->horizontalHeader()->restoreState(game_table_state);
m_trophy_table->horizontalHeader()->restoreState(trophy_table_state);
// Make sure the actions and the headers are synced
m_game_table->sync_header_actions(m_game_column_acts, [this](int col) { return m_gui_settings->GetTrophyGamelistColVisibility(static_cast<gui::trophy_game_list_columns>(col)); });
m_trophy_table->sync_header_actions(m_trophy_column_acts, [this](int col) { return m_gui_settings->GetTrophylistColVisibility(static_cast<gui::trophy_list_columns>(col)); });
resize(window_size);
}
@ -1331,7 +1343,7 @@ QDateTime trophy_manager_dialog::TickToDateTime(u64 tick)
const QDateTime datetime(
QDate(rtc_date.year, rtc_date.month, rtc_date.day),
QTime(rtc_date.hour, rtc_date.minute, rtc_date.second, rtc_date.microsecond / 1000),
Qt::TimeSpec::UTC);
QTimeZone::UTC);
return datetime.toLocalTime();
}

View file

@ -410,10 +410,12 @@ namespace utils
return static_cast<T>(value * u64{numerator} / u64{denominator});
}
#if is_u128_emulated
if constexpr (sizeof(T) <= sizeof(u128) / 2)
{
return static_cast<T>(value * u128{numerator} / u64{denominator});
return static_cast<T>(u128_from_mul(value, numerator) / u64{denominator});
}
#endif
return static_cast<T>(value / denominator * numerator + (value % denominator) * numerator / denominator);
}
@ -464,3 +466,7 @@ namespace utils
} // namespace utils
using utils::busy_wait;
#ifdef _MSC_VER
using utils::operator/;
#endif

View file

@ -434,7 +434,7 @@ static u32 cond_alloc(uptr iptr, u32 tls_slot = -1)
});
// Set lowest clear bit
const u64 bits = s_cond_bits[level3].fetch_op(FN(x |= x + 1, void()));
const u64 bits = s_cond_bits[level3].fetch_op(AOFN(x |= x + 1, void()));
// Find lowest clear bit (before it was set in fetch_op)
const u32 id = level3 * 64 + std::countr_one(bits);
@ -503,9 +503,9 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1)
// Release the semaphore tree in the reverse order
s_cond_bits[cond_id / 64] &= ~(1ull << (cond_id % 64));
s_cond_sem3[level2].atomic_op(FN(x -= u128{1} << (level3 * 7)));
s_cond_sem2[level1].atomic_op(FN(x -= u128{1} << (level2 * 11)));
s_cond_sem1.atomic_op(FN(x -= u128{1} << (level1 * 14)));
s_cond_sem3[level2].atomic_op(AOFN(x -= u128{1} << (level3 * 7)));
s_cond_sem2[level1].atomic_op(AOFN(x -= u128{1} << (level2 * 11)));
s_cond_sem1.atomic_op(AOFN(x -= u128{1} << (level1 * 14)));
}
static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0)
@ -674,19 +674,28 @@ u64 utils::get_unique_tsc()
{
const u64 stamp0 = utils::get_tsc();
return s_min_tsc.atomic_op([&](u64& tsc)
if (!s_min_tsc.fetch_op([=](u64& tsc)
{
if (stamp0 <= s_min_tsc)
if (stamp0 <= tsc)
{
// Add 1 if new stamp is too old
return ++tsc;
return false;
}
else
{
// Update last tsc with new stamp otherwise
return ((tsc = stamp0));
tsc = stamp0;
return true;
}
});
}).second)
{
// Add 1 if new stamp is too old
// Avoid doing it in the atomic operaion because, if it gets here it means there is already much cntention
// So break the race (at least on x86)
return s_min_tsc.add_fetch(1);
}
return stamp0;
}
atomic_t<u16>* root_info::slot_alloc(uptr ptr) noexcept

View file

@ -1233,6 +1233,7 @@ public:
// Atomic operation; returns old value, or pair of old value and return value (cancel op if evaluates to false)
template <typename F, typename RT = std::invoke_result_t<F, T&>>
requires (!std::is_invocable_v<F, const T> && !std::is_invocable_v<F, volatile T>)
std::conditional_t<std::is_void_v<RT>, type, std::pair<type, RT>> fetch_op(F func)
{
type _new, old = atomic_storage<type>::load(m_data);
@ -1264,6 +1265,7 @@ public:
// Atomic operation; returns function result value, function is the lambda
template <typename F, typename RT = std::invoke_result_t<F, T&>>
requires (!std::is_invocable_v<F, const T> && !std::is_invocable_v<F, volatile T>)
RT atomic_op(F func)
{
type _new, old = atomic_storage<type>::load(m_data);
@ -1798,3 +1800,31 @@ struct std::common_type<T, atomic_t<T2, Align2>> : std::common_type<std::common_
#pragma GCC diagnostic pop
#pragma GCC diagnostic pop
#endif
namespace utils
{
template <typename F>
struct aofn_helper
{
F f;
aofn_helper(F&& f) noexcept
: f(std::forward<F>(f))
{
}
template <typename Arg> requires (std::is_same_v<std::remove_reference_t<Arg>, std::remove_cvref_t<Arg>> && !std::is_rvalue_reference_v<Arg>)
auto operator()(Arg& arg) const noexcept
{
return f(std::forward<Arg&>(arg));
}
};
template <typename F>
aofn_helper(F&& f) -> aofn_helper<F>;
}
// Shorter lambda for non-cv qualified L-values
// For use with atomic operations
#define AOFN(...) \
::utils::aofn_helper([&](auto& x) { return (__VA_ARGS__); })

View file

@ -451,47 +451,47 @@ void logs::message::broadcast(const char* fmt, const fmt_type_info* sup, ...) co
logs::file_writer::file_writer(const std::string& name, u64 max_size)
: m_max_size(max_size)
{
if (!name.empty() && max_size)
if (name.empty() || !max_size)
{
// Initialize ringbuffer
m_fptr = std::make_unique<uchar[]>(s_log_size);
return;
}
// Actual log file (allowed to fail)
if (!m_fout.open(name, fs::rewrite))
{
fprintf(stderr, "Log file open failed: %s (error %d)\n", name.c_str(), errno);
}
// Initialize ringbuffer
m_fptr = std::make_unique<uchar[]>(s_log_size);
// Compressed log, make it inaccessible (foolproof)
if (m_fout2.open(name + ".gz", fs::rewrite + fs::unread))
{
// Actual log file (allowed to fail)
if (!m_fout.open(name, fs::rewrite))
{
fprintf(stderr, "Log file open failed: %s (error %d)\n", name.c_str(), errno);
}
// Compressed log, make it inaccessible (foolproof)
if (m_fout2.open(name + ".gz", fs::rewrite + fs::unread))
{
#ifndef _MSC_VER
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#endif
if (deflateInit2(&m_zs, 9, Z_DEFLATED, 16 + 15, 9, Z_DEFAULT_STRATEGY) != Z_OK)
if (deflateInit2(&m_zs, 9, Z_DEFLATED, 16 + 15, 9, Z_DEFAULT_STRATEGY) != Z_OK)
#ifndef _MSC_VER
#pragma GCC diagnostic pop
#endif
m_fout2.close();
}
if (!m_fout2)
{
fprintf(stderr, "Log file open failed: %s.gz (error %d)\n", name.c_str(), errno);
m_fout2.close();
}
}
if (!m_fout2)
{
fprintf(stderr, "Log file open failed: %s.gz (error %d)\n", name.c_str(), errno);
}
#ifdef _WIN32
// Autodelete compressed log file
FILE_DISPOSITION_INFO disp;
disp.DeleteFileW = true;
SetFileInformationByHandle(m_fout2.get_handle(), FileDispositionInfo, &disp, sizeof(disp));
// Autodelete compressed log file
FILE_DISPOSITION_INFO disp{};
disp.DeleteFileW = true;
SetFileInformationByHandle(m_fout2.get_handle(), FileDispositionInfo, &disp, sizeof(disp));
#endif
}
else
{
return;
}
m_writer = std::thread([this]()
{

View file

@ -22,6 +22,8 @@
#endif
#endif
#include <thread>
#include "util/asm.hpp"
#include "util/fence.hpp"
@ -734,12 +736,32 @@ bool utils::get_low_power_mode()
#endif
}
static constexpr ullong round_tsc(ullong val)
static constexpr ullong round_tsc(ullong val, ullong known_error)
{
return utils::rounded_div(val, 1'000'000) * 1'000'000;
if (known_error >= 500'000)
{
// Do not accept large errors
return 0;
}
ullong by = 1000;
known_error /= 1000;
while (known_error && by < 100'000)
{
by *= 10;
known_error /= 10;
}
return utils::rounded_div(val, by) * by;
}
ullong utils::get_tsc_freq()
namespace utils
{
u64 s_tsc_freq = 0;
}
static const bool s_tsc_freq_evaluated = []() -> bool
{
static const ullong cal_tsc = []() -> ullong
{
@ -749,7 +771,7 @@ ullong utils::get_tsc_freq()
return r;
#endif
if (!has_invariant_tsc())
if (!utils::has_invariant_tsc())
return 0;
#ifdef _WIN32
@ -758,64 +780,109 @@ ullong utils::get_tsc_freq()
return 0;
if (freq.QuadPart <= 9'999'999)
return round_tsc(freq.QuadPart * 1024);
return 0;
const ullong timer_freq = freq.QuadPart;
#else
const ullong timer_freq = 1'000'000'000;
constexpr ullong timer_freq = 1'000'000'000;
#endif
// Calibrate TSC
constexpr int samples = 40;
ullong rdtsc_data[samples];
ullong timer_data[samples];
[[maybe_unused]] ullong error_data[samples];
constexpr u64 retry_count = 1024;
// Narrow thread affinity to a single core
const u64 old_aff = thread_ctrl::get_thread_affinity_mask();
thread_ctrl::set_thread_affinity_mask(old_aff & (0 - old_aff));
// First is entry is for the onset measurements, last is for the end measurements
constexpr usz sample_count = 2;
std::array<u64, sample_count> rdtsc_data{};
std::array<u64, sample_count> rdtsc_diff{};
std::array<u64, sample_count> timer_data{};
#ifndef _WIN32
#ifdef _WIN32
LARGE_INTEGER ctr0;
QueryPerformanceCounter(&ctr0);
const ullong time_base = ctr0.QuadPart;
#else
struct timespec ts0;
clock_gettime(CLOCK_MONOTONIC, &ts0);
ullong sec_base = ts0.tv_sec;
const ullong sec_base = ts0.tv_sec;
#endif
for (int i = 0; i < samples; i++)
constexpr usz sleep_time_ms = 40;
for (usz sample = 0; sample < sample_count; sample++)
{
for (usz i = 0; i < retry_count; i++)
{
const u64 rdtsc_read = (utils::lfence(), utils::get_tsc());
#ifdef _WIN32
Sleep(1);
error_data[i] = (utils::lfence(), utils::get_tsc());
LARGE_INTEGER ctr;
QueryPerformanceCounter(&ctr);
rdtsc_data[i] = (utils::lfence(), utils::get_tsc());
timer_data[i] = ctr.QuadPart;
LARGE_INTEGER ctr;
QueryPerformanceCounter(&ctr);
#else
usleep(200);
error_data[i] = (utils::lfence(), utils::get_tsc());
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
rdtsc_data[i] = (utils::lfence(), utils::get_tsc());
timer_data[i] = ts.tv_nsec + (ts.tv_sec - sec_base) * 1'000'000'000;
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
#endif
const u64 rdtsc_read2 = (utils::lfence(), utils::get_tsc());
#ifdef _WIN32
const u64 timer_read = ctr.QuadPart - time_base;
#else
const u64 timer_read = ts.tv_nsec + (ts.tv_sec - sec_base) * 1'000'000'000;
#endif
if (i == 0 || (rdtsc_read2 >= rdtsc_read && rdtsc_read2 - rdtsc_read < rdtsc_diff[sample]))
{
rdtsc_data[sample] = rdtsc_read; // Note: rdtsc_read2 can also be written here because of the assumption of accuracy
timer_data[sample] = timer_read;
rdtsc_diff[sample] = rdtsc_read2 >= rdtsc_read ? rdtsc_read2 - rdtsc_read : u64{umax};
}
// 80 results in an error range of 4000 hertz (0.00025% of 4GHz CPU, quite acceptable)
// Error of 2.5 seconds per month
if (rdtsc_read2 - rdtsc_read < 80 && rdtsc_read2 >= rdtsc_read)
{
break;
}
// 8 yields seems to reduce significantly thread contention, improving accuracy
// Even 3 seem to do the job though, but just in case
if (i % 128 == 64)
{
std::this_thread::yield();
}
// Take 50% more yields with the last sample because it helps accuracy additionally the more time that passes
if (sample == sample_count - 1 && i % 256 == 128)
{
std::this_thread::yield();
}
}
if (sample < sample_count - 1)
{
// Sleep between first and last sample
#ifdef _WIN32
Sleep(sleep_time_ms);
#else
usleep(sleep_time_ms * 1000);
#endif
}
}
// Restore main thread affinity
thread_ctrl::set_thread_affinity_mask(old_aff);
// Compute average TSC
ullong acc = 0;
for (int i = 0; i < samples - 1; i++)
if (timer_data[1] == timer_data[0])
{
acc += (rdtsc_data[i + 1] - rdtsc_data[i]) * timer_freq / (timer_data[i + 1] - timer_data[i]);
// Division by zero
return 0;
}
const u128 data = u128_from_mul(rdtsc_data[1] - rdtsc_data[0], timer_freq);
const u64 res = utils::udiv128(static_cast<u64>(data >> 64), static_cast<u64>(data), (timer_data[1] - timer_data[0]));
// Rounding
return round_tsc(acc / (samples - 1));
return round_tsc(res, utils::mul_saturate<u64>(utils::add_saturate<u64>(rdtsc_diff[0], rdtsc_diff[1]), utils::aligned_div(timer_freq, timer_data[1] - timer_data[0])));
}();
return cal_tsc;
}
atomic_storage<u64>::release(utils::s_tsc_freq, cal_tsc);
return true;
}();
u64 utils::get_total_memory()
{

View file

@ -73,8 +73,6 @@ namespace utils
bool get_low_power_mode();
ullong get_tsc_freq();
u64 get_total_memory();
u32 get_thread_count();
@ -89,4 +87,11 @@ namespace utils
u64 _get_main_tid();
inline const u64 main_tid = _get_main_tid();
extern u64 s_tsc_freq;
inline ullong get_tsc_freq()
{
return s_tsc_freq;
}
}

View file

@ -566,6 +566,22 @@ struct s128 : u128
};
#endif
// Optimization for u64*u64=u128
constexpr u128 u128_from_mul(u64 a, u64 b)
{
#ifdef _MSC_VER
if (!std::is_constant_evaluated())
{
u64 hi;
u128 result = _umul128(a, b, &hi);
result.hi = hi;
return result;
}
#endif
return u128{a} * b;
}
template <>
struct get_int_impl<16>
{