diff --git a/.gitmodules b/.gitmodules index 3216038e4c..0ac7381b73 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,8 +7,8 @@ ignore = dirty [submodule "llvm"] path = llvm - url = https://github.com/llvm-mirror/llvm - branch = release_40 + url = https://github.com/RPCS3/llvm + branch = release_60 [submodule "GSL"] path = 3rdparty/GSL url = https://github.com/Microsoft/GSL.git @@ -35,6 +35,7 @@ path = 3rdparty/hidapi url = https://github.com/RPCS3/hidapi branch = master + ignore = dirty [submodule "3rdparty/Optional"] path = 3rdparty/Optional url = https://github.com/akrzemi1/Optional.git diff --git a/.travis.yml b/.travis.yml index f214250f13..cf1e8550ca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,9 +26,6 @@ git: submodules: false before_install: - - if [ "$CC" = "clang" ]; then - export CXX="clang++-4.0" CC="clang-4.0"; - fi; - if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$CXX" = "g++" ]; then export CXX="g++-5" CC="gcc-5" CXXFLAGS="-Wno-format-security"; fi; @@ -65,7 +62,7 @@ before_script: - cmake .. -DCMAKE_INSTALL_PREFIX=/usr -G Ninja; - ninja - # AppImage generation - - if [ -n "$UPLOAD_URL" ] && [ "$TRAVIS_BRANCH" = "master" ] && [ "$CC" = "clang-4.0" ] && [ "$TRAVIS_PULL_REQUEST" = false ]; then + - if [ -n "$UPLOAD_URL" ] && [ "$TRAVIS_BRANCH" = "master" ] && [ "$CC" = "clang" ] && [ "$TRAVIS_PULL_REQUEST" = false ]; then export LD_LIBRARY_PATH=~/Qt/5.10.0/gcc_64/lib; DESTDIR=appdir ninja install ; find appdir/ ; find ../bin ; @@ -80,6 +77,7 @@ before_script: cp ~/Qt/5.10.0/gcc_64/plugins/xcbglintegrations/* ./appdir/usr/plugins/xcbglintegrations/ ; cp ~/Qt/5.10.0/gcc_64/plugins/imageformats/* ./appdir/usr/plugins/imageformats/ ; cp ~/Qt/5.10.0/gcc_64/plugins/platforms/* ./appdir/usr/plugins/platforms/ ; + rm ./appdir/usr/lib/libfreetype.so.6 ; export PATH=${TRAVIS_BUILD_DIR}/build/squashfs-root/usr/bin/:${PATH} ; ./squashfs-root/usr/bin/appimagetool ${TRAVIS_BUILD_DIR}/build/appdir ; find ./appdir -executable -type f -exec ldd {} \; | grep " => /usr" | cut -d " " -f 2-3 | sort | uniq ; @@ -111,7 +109,8 @@ addons: - libc6-dev - llvm-4.0 - llvm-4.0-dev - - clang-4.0 + # Clang 5.0 is now bundled in travis, so we no longer need the ppa version. + #- clang-4.0 - libedit-dev - g++-5 - gcc-5 diff --git a/Utilities/File.cpp b/Utilities/File.cpp index f9c1cf7425..d7f96e75dc 100644 --- a/Utilities/File.cpp +++ b/Utilities/File.cpp @@ -1034,7 +1034,7 @@ fs::file::file(const std::string& path, bs_t mode) (fmt::throw_exception("Invalid whence (0x%x)" HERE, whence), 0); const auto result = ::lseek(m_fd, offset, mode); - + if (result == -1) { g_tls_error = to_error(errno); @@ -1258,7 +1258,12 @@ bool fs::dir::open(const std::string& path) } struct ::stat file_info; - verify("dir::read" HERE), ::fstatat(::dirfd(m_dd), found->d_name, &file_info, 0) == 0; + + if (::fstatat(::dirfd(m_dd), found->d_name, &file_info, 0) != 0) + { + //failed metadata (broken symlink?), ignore and skip to next file + return read(info); + } info.name = found->d_name; info.is_directory = S_ISDIR(file_info.st_mode); @@ -1358,7 +1363,7 @@ std::string fs::get_data_dir(const std::string& prefix, const std::string& locat continue; } - + buf.push_back(c); } diff --git a/Utilities/bin_patch.cpp b/Utilities/bin_patch.cpp index afdfb5e875..f8fe60bdb5 100644 --- a/Utilities/bin_patch.cpp +++ b/Utilities/bin_patch.cpp @@ -32,13 +32,23 @@ void patch_engine::append(const std::string& patch) { if (fs::file f{patch}) { - auto root = YAML::Load(f.to_string()); + YAML::Node root; + + try + { + root = YAML::Load(f.to_string()); + } + catch (const std::exception& e) + { + LOG_FATAL(GENERAL, "Failed to load patch file %s\n%s thrown: %s", patch, typeid(e).name(), e.what()); + return; + } for (auto pair : root) { auto& name = pair.first.Scalar(); auto& data = m_map[name]; - + for (auto patch : pair.second) { u64 type64 = 0; @@ -91,7 +101,7 @@ void patch_engine::append(const std::string& patch) break; } } - + data.emplace_back(info); } } diff --git a/Utilities/sysinfo.cpp b/Utilities/sysinfo.cpp index 421d7655c4..8ec13aaf6d 100644 --- a/Utilities/sysinfo.cpp +++ b/Utilities/sysinfo.cpp @@ -39,6 +39,12 @@ bool utils::has_512() return g_value; } +bool utils::has_xop() +{ + static const bool g_value = has_avx() && get_cpuid(0x80000001, 0)[2] & 0x800; + return g_value; +} + std::string utils::get_system_info() { std::string result; @@ -92,6 +98,11 @@ std::string utils::get_system_info() { result += '+'; } + + if (has_xop()) + { + result += 'x'; + } } if (has_rtm()) diff --git a/Utilities/sysinfo.h b/Utilities/sysinfo.h index 1ef041078c..4151acb15a 100644 --- a/Utilities/sysinfo.h +++ b/Utilities/sysinfo.h @@ -26,6 +26,8 @@ namespace utils bool has_512(); + bool has_xop(); + inline bool transaction_enter() { while (true) diff --git a/appveyor.yml b/appveyor.yml index c75759ea54..d26daf6141 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,79 +1,100 @@ -#---------------------------------# -# general configuration # -#---------------------------------# -# version format -version: '0.0.4-{build}' +version: '{build}' -#---------------------------------# -# environment configuration # -#---------------------------------# - -# Build worker image (VM template) image: Visual Studio 2015 -# clone directory -clone_folder: c:\projects\rpcs3 - -# environment variables environment: - QTDIR: C:\Qt\5.9\msvc2015_64 + QTDIR: C:\Qt\5.10.0\msvc2015_64 + LLVMLIBS: https://drive.google.com/uc?export=download&id=0B8A6NaxhQAGRY2k3Q2Yya05lcm8 + VULKAN: https://drive.google.com/uc?export=download&id=1A2eOMmCO714i0U7J0qI4aEMKnuWl8l_R + COMPATDB: https://rpcs3.net/compatibility?api=v1&export -# build cache to preserve files/folders between builds cache: - - llvmlibs.7z -> appveyor.yml - - vulkan.7z -> appveyor.yml +- llvmlibs.7z -> appveyor.yml +- vulkan.7z -> appveyor.yml +- compat_database.dat -# scripts that run after cloning repository install: - - git submodule update --init 3rdparty/cereal 3rdparty/ffmpeg 3rdparty/GSL 3rdparty/hidapi 3rdparty/libpng 3rdparty/Optional 3rdparty/pugixml 3rdparty/zlib asmjit Utilities/yaml-cpp Vulkan/glslang Vulkan/Vulkan-LoaderAndValidationLayers +- ps: | # set env vars for versioning + $commDate = $env:APPVEYOR_REPO_COMMIT_TIMESTAMP.Substring(0,10) + $commSha = $env:APPVEYOR_REPO_COMMIT.Substring(0,8) + $commTag = $(git describe --tags $(git rev-list --tags --max-count=1)) + + $env:AVVER = "{0}-{1}" -f $commTag.TrimStart("v"), $env:APPVEYOR_BUILD_NUMBER + $env:BUILD = "rpcs3-{0}-{1}-{2}_win64.7z" -f $commTag, $commDate, $commSha -#---------------------------------# -# build configuration # -#---------------------------------# +- ps: | # used for experimental build warnings for pr builds + $env:BRANCH = "{0}/{1}/#{2}" -f $env:APPVEYOR_REPO_NAME, ` + $env:APPVEYOR_REPO_BRANCH, $env:APPVEYOR_PULL_REQUEST_NUMBER + $env:BRANCH = $env:BRANCH -replace "/#$" + +- ps: | # misc global settings + $env:PATH += $env:QTDIR + [net.servicepointmanager]::securityprotocol = "tls12, tls11, tls" + +- ps: | # update and init submodules + git submodule -q update --init ` + 3rdparty/cereal ` + 3rdparty/ffmpeg ` + 3rdparty/GSL ` + 3rdparty/hidapi ` + 3rdparty/libpng ` + 3rdparty/Optional ` + 3rdparty/pugixml ` + 3rdparty/zlib ` + asmjit ` + Utilities/yaml-cpp ` + Vulkan/glslang ` + Vulkan/Vulkan-LoaderAndValidationLayers -# build platform, i.e. x86, x64, Any CPU. This setting is optional. platform: x64 -# build Configuration, i.e. Debug, Release, etc. configuration: Release - LLVM build: - parallel: true # enable MSBuild parallel builds - project: rpcs3.sln # path to Visual Studio solution or project - # MSBuild verbosity level + parallel: true + project: rpcs3.sln verbosity: normal -# scripts to run before build before_build: - - set BRANCH=%APPVEYOR_REPO_NAME%/%APPVEYOR_REPO_BRANCH%/#%APPVEYOR_PULL_REQUEST_NUMBER% - - if "%APPVEYOR_PULL_REQUEST_NUMBER%"=="" (set BRANCH=%APPVEYOR_REPO_NAME%/%APPVEYOR_REPO_BRANCH%) - - set PATH=%PATH%;%QTDIR% - - if not exist llvmlibs.7z appveyor DownloadFile "https://drive.google.com/uc?export=download&id=0B8A6NaxhQAGRY2k3Q2Yya05lcm8" -FileName llvmlibs.7z - - 7z x llvmlibs.7z -aos -o%APPVEYOR_BUILD_FOLDER% > null - - if not exist vulkan.7z appveyor DownloadFile "https://drive.google.com/uc?export=download&id=1A2eOMmCO714i0U7J0qI4aEMKnuWl8l_R" -FileName vulkan.7z - - 7z x vulkan.7z -aos -o"%APPVEYOR_BUILD_FOLDER%\lib\%CONFIGURATION%-%PLATFORM%" > null +- ps: | # fetch precompiled build dependencies + if (!(test-path llvmlibs.7z)) { irm $env:LLVMLIBS -outfile llvmlibs.7z } + if (!(test-path vulkan.7z)) { irm $env:VULKAN -outfile vulkan.7z } + 7z x llvmlibs.7z -aos -o"." | out-null + 7z x vulkan.7z -aos -o".\lib\$env:CONFIGURATION-$env:PLATFORM" | out-null -# scripts to run *after* solution is built and *before* automatic packaging occurs (web apps, NuGet packages, Azure Cloud Services) -before_package: - - rm %APPVEYOR_BUILD_FOLDER%\bin\rpcs3.exp - - rm %APPVEYOR_BUILD_FOLDER%\bin\rpcs3.lib - - rm %APPVEYOR_BUILD_FOLDER%\bin\rpcs3.pdb - - set COMMIT_DATE=%APPVEYOR_REPO_COMMIT_TIMESTAMP:~0,10% - - set COMMIT_SHA=%APPVEYOR_REPO_COMMIT:~0,8% +after_build: +- ps: | # remove unnecessary files + rm .\bin\rpcs3.exp, .\bin\rpcs3.lib, .\bin\rpcs3.pdb -#---------------------------------# -# tests configuration # -#---------------------------------# +- ps: | # prepare compatibility database for packaging + $db = irm $env:COMPATDB -erroraction silentlycontinue + if ($db -and $db.return_code -eq 0) { + $db | convertto-json -compress | out-file compat_database.dat -encoding utf8 + } + copy-item compat_database.dat .\bin\GuiConfigs\compat_database.dat + +- ps: | # package artifacts + 7z a -m0=LZMA2 -mx9 $env:BUILD .\bin\* + 7z a -m0=LZMA2 -mx9 openssl_win64.7z C:\OpenSSL-Win64\bin\libeay32.dll ` + C:\OpenSSL-Win64\bin\ssleay32.dll + +- ps: | # generate sha256 hashes + (Get-FileHash $env:BUILD -Algorithm SHA256).Hash | Out-File -encoding ASCII "$($env:BUILD).sha256" + (Get-FileHash openssl_win64.7z -Algorithm SHA256).Hash | Out-File -encoding ASCII "openssl_win64.7z.sha256" -# to disable automatic tests test: off -#---------------------------------# -# artifacts configuration # -#---------------------------------# - -# pushing entire folder as a zip archive artifacts: - - path: bin - name: 'rpcs3-v0.0.4-$(COMMIT_DATE)-$(COMMIT_SHA)_win64' +- path: $(BUILD) + name: rpcs3 +- path: $(BUILD).sha256 + name: rpcs3 sha256 hash +- path: openssl_win64.7z + name: openssl +- path: openssl_win64.7z.sha256 + name: openssl sha256 hash + +on_finish: +- ps: | # update appveyor build version, done last to prevent webhook breakage + update-appveyorbuild -version $env:AVVER diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 3e0b8d0ef2..0fc8c8cc90 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -457,6 +457,13 @@ endif() cotire(rpcs3) +if (UNIX) +# Copy icons to executable directory +add_custom_command(TARGET rpcs3 POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${CMAKE_SOURCE_DIR}/bin/Icons $/Icons) +endif() + # Unix installation if(UNIX AND NOT APPLE) # Install the binary diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index 4f3ad59106..6ef7c521b6 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -1 +1,13 @@ +#ifdef LLVM_AVAILABLE + #include "CPUTranslator.h" + +cpu_translator::cpu_translator(llvm::LLVMContext& context, llvm::Module* module, bool is_be) + : m_context(context) + , m_module(module) + , m_is_be(is_be) +{ + +} + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 6f70f09bee..a6290ccc6e 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -1 +1,750 @@ #pragma once + +#ifdef LLVM_AVAILABLE + +#include "restore_new.h" +#ifdef _MSC_VER +#pragma warning(push, 0) +#endif +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif +#include "define_new_memleakdetect.h" + +#include "../Utilities/types.h" +#include "../Utilities/StrFmt.h" +#include "../Utilities/BEType.h" +#include "../Utilities/BitField.h" + +#include +#include +#include +#include +#include +#include + +template +struct llvm_value_t +{ + static_assert(std::is_same::value, "llvm_value_t<> error: unknown type"); + + using type = void; + static constexpr uint esize = 0; + static constexpr bool is_int = false; + static constexpr bool is_sint = false; + static constexpr bool is_uint = false; + static constexpr bool is_float = false; + static constexpr uint is_vector = false; + static constexpr uint is_pointer = false; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::Type::getVoidTy(context); + } + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + return value; + } + + llvm::Value* value; + + // llvm_value_t() = default; + + // llvm_value_t(llvm::Value* value) + // : value(value) + // { + // } +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = bool; + using base = llvm_value_t; + using base::base; + + static constexpr uint esize = 1; + static constexpr uint is_int = true; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::Type::getInt1Ty(context); + } +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = char; + using base = llvm_value_t; + using base::base; + + static constexpr uint esize = 8; + static constexpr bool is_int = true; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::Type::getInt8Ty(context); + } +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = s8; + using base = llvm_value_t; + using base::base; + + static constexpr bool is_sint = true; +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = u8; + using base = llvm_value_t; + using base::base; + + static constexpr bool is_uint = true; +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = s16; + using base = llvm_value_t; + using base::base; + + static constexpr uint esize = 16; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::Type::getInt16Ty(context); + } +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = u16; + using base = llvm_value_t; + using base::base; + + static constexpr bool is_sint = false; + static constexpr bool is_uint = true; +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = s32; + using base = llvm_value_t; + using base::base; + + static constexpr uint esize = 32; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::Type::getInt32Ty(context); + } +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = u32; + using base = llvm_value_t; + using base::base; + + static constexpr bool is_sint = false; + static constexpr bool is_uint = true; +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = s64; + using base = llvm_value_t; + using base::base; + + static constexpr uint esize = 64; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::Type::getInt64Ty(context); + } +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = u64; + using base = llvm_value_t; + using base::base; + + static constexpr bool is_sint = false; + static constexpr bool is_uint = true; +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = s128; + using base = llvm_value_t; + using base::base; + + static constexpr uint esize = 128; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::Type::getIntNTy(context, 128); + } +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = u128; + using base = llvm_value_t; + using base::base; + + static constexpr bool is_sint = false; + static constexpr bool is_uint = true; +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = f32; + using base = llvm_value_t; + using base::base; + + static constexpr uint esize = 32; + static constexpr bool is_float = true; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::Type::getFloatTy(context); + } +}; + +template <> +struct llvm_value_t : llvm_value_t +{ + using type = f64; + using base = llvm_value_t; + using base::base; + + static constexpr uint esize = 64; + static constexpr bool is_float = true; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::Type::getDoubleTy(context); + } +}; + +template +struct llvm_value_t : llvm_value_t +{ + static_assert(!std::is_void::value, "llvm_value_t<> error: invalid pointer to void type"); + + using type = T*; + using base = llvm_value_t; + using base::base; + + static constexpr uint is_pointer = llvm_value_t::is_pointer + 1; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm_value_t::get_type(context)->getPointerTo(); + } +}; + +template +struct llvm_value_t : llvm_value_t +{ + static_assert(!llvm_value_t::is_vector, "llvm_value_t<> error: invalid multidimensional vector"); + static_assert(!llvm_value_t::is_pointer, "llvm_value_t<>: vector of pointers is not allowed"); + + using type = T[N]; + using base = llvm_value_t; + using base::base; + + static constexpr uint is_vector = N; + static constexpr uint is_pointer = 0; + + static llvm::Type* get_type(llvm::LLVMContext& context) + { + return llvm::VectorType::get(llvm_value_t::get_type(context), N); + } +}; + +template +struct llvm_add_t +{ + using type = T; + + A1 a1; + A2 a2; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_add_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm_value_t::is_int) + { + return ir->CreateAdd(v1, v2); + } + + if (llvm_value_t::is_float) + { + return ir->CreateFAdd(v1, v2); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::value>> +inline llvm_add_t operator +(T1 a1, T2 a2) +{ + return {a1, a2}; +} + +template +struct llvm_sub_t +{ + using type = T; + + A1 a1; + A2 a2; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_sub_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm_value_t::is_int) + { + return ir->CreateSub(v1, v2); + } + + if (llvm_value_t::is_float) + { + return ir->CreateFSub(v1, v2); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::value>> +inline llvm_sub_t operator -(T1 a1, T2 a2) +{ + return {a1, a2}; +} + +template +struct llvm_mul_t +{ + using type = T; + + A1 a1; + A2 a2; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_mul_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm_value_t::is_int) + { + return ir->CreateMul(v1, v2); + } + + if (llvm_value_t::is_float) + { + return ir->CreateFMul(v1, v2); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::value>> +inline llvm_mul_t operator *(T1 a1, T2 a2) +{ + return {a1, a2}; +} + +template +struct llvm_div_t +{ + using type = T; + + A1 a1; + A2 a2; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_div_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm_value_t::is_sint) + { + return ir->CreateSDiv(v1, v2); + } + + if (llvm_value_t::is_uint) + { + return ir->CreateUDiv(v1, v2); + } + + if (llvm_value_t::is_float) + { + return ir->CreateFDiv(v1, v2); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::value>> +inline llvm_div_t operator /(T1 a1, T2 a2) +{ + return {a1, a2}; +} + +template +struct llvm_neg_t +{ + using type = T; + + A1 a1; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_neg_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + + if (llvm_value_t::is_int) + { + return ir->CreateNeg(v1); + } + + if (llvm_value_t::is_float) + { + return ir->CreateFNeg(v1); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::esize>> +inline llvm_neg_t operator -(T1 a1) +{ + return {a1}; +} + +// Constant int helper +struct llvm_int_t +{ + u64 value; + + u64 eval(llvm::IRBuilder<>*) const + { + return value; + } +}; + +template +struct llvm_shl_t +{ + using type = T; + + A1 a1; + A2 a2; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_shl_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm_value_t::is_sint) + { + return ir->CreateShl(v1, v2); + } + + if (llvm_value_t::is_uint) + { + return ir->CreateShl(v1, v2); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::value>> +inline llvm_shl_t operator <<(T1 a1, T2 a2) +{ + return {a1, a2}; +} + +template ().eval(0)), typename = std::enable_if_t::is_int>> +inline llvm_shl_t operator <<(T1 a1, u64 a2) +{ + return {a1, llvm_int_t{a2}}; +} + +template +struct llvm_shr_t +{ + using type = T; + + A1 a1; + A2 a2; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_shr_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm_value_t::is_sint) + { + return ir->CreateAShr(v1, v2); + } + + if (llvm_value_t::is_uint) + { + return ir->CreateLShr(v1, v2); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::value>> +inline llvm_shr_t operator >>(T1 a1, T2 a2) +{ + return {a1, a2}; +} + +template ().eval(0)), typename = std::enable_if_t::is_int>> +inline llvm_shr_t operator >>(T1 a1, u64 a2) +{ + return {a1, llvm_int_t{a2}}; +} + +template +struct llvm_and_t +{ + using type = T; + + A1 a1; + A2 a2; + + static_assert(llvm_value_t::is_int, "llvm_and_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm_value_t::is_int) + { + return ir->CreateAnd(v1, v2); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::value>> +inline llvm_and_t operator &(T1 a1, T2 a2) +{ + return {a1, a2}; +} + +template ().eval(0)), typename = std::enable_if_t::is_int>> +inline llvm_and_t operator &(T1 a1, u64 a2) +{ + return {a1, llvm_int_t{a2}}; +} + +template +struct llvm_or_t +{ + using type = T; + + A1 a1; + A2 a2; + + static_assert(llvm_value_t::is_int, "llvm_or_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm_value_t::is_int) + { + return ir->CreateOr(v1, v2); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::value>> +inline llvm_or_t operator |(T1 a1, T2 a2) +{ + return {a1, a2}; +} + +template ().eval(0)), typename = std::enable_if_t::is_int>> +inline llvm_or_t operator |(T1 a1, u64 a2) +{ + return {a1, llvm_int_t{a2}}; +} + +template +struct llvm_xor_t +{ + using type = T; + + A1 a1; + A2 a2; + + static_assert(llvm_value_t::is_int, "llvm_xor_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm_value_t::is_int) + { + return ir->CreateXor(v1, v2); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::value>> +inline llvm_xor_t operator ^(T1 a1, T2 a2) +{ + return {a1, a2}; +} + +template ().eval(0)), typename = std::enable_if_t::is_int>> +inline llvm_xor_t operator ^(T1 a1, u64 a2) +{ + return {a1, llvm_int_t{a2}}; +} + +template +struct llvm_not_t +{ + using type = T; + + A1 a1; + + static_assert(llvm_value_t::is_int, "llvm_not_t<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + + if (llvm_value_t::is_int) + { + return ir->CreateNot(v1); + } + } +}; + +template ().eval(0)), typename = std::enable_if_t::is_int>> +inline llvm_not_t operator ~(T1 a1) +{ + return {a1}; +} + +class cpu_translator +{ +protected: + cpu_translator(llvm::LLVMContext& context, llvm::Module* module, bool is_be); + + // LLVM context + llvm::LLVMContext& m_context; + + // Module to which all generated code is output to + llvm::Module* const m_module; + + // Endianness, affects vector element numbering (TODO) + const bool m_is_be; + + // IR builder + llvm::IRBuilder<>* m_ir; + +public: + // Convert a C++ type to an LLVM type (TODO: remove) + template + llvm::Type* GetType() + { + return llvm_value_t::get_type(m_context); + } + + template + llvm::Type* get_type() + { + return llvm_value_t::get_type(m_context); + } + + template + using value_t = llvm_value_t; + + template + auto eval(T expr) + { + value_t result; + result.value = expr.eval(m_ir); + return result; + } + + // Get unsigned addition carry into the sign bit (s = a + b) + template + static inline auto ucarry(T a, T b, T s) + { + return ((a ^ b) & ~s) | (a & b); + } + + // Get signed addition overflow into the sign bit (s = a + b) + template + static inline auto scarry(T a, T b, T s) + { + return (b ^ s) & ~(a ^ b); + } + + // Get signed subtraction overflow into the sign bit (d = a - b) + template + static inline auto sborrow(T a, T b, T d) + { + return (a ^ b) & (a ^ d); + } + + // Bitwise select (c ? a : b) + template + static inline auto merge(T c, T a, T b) + { + return (a & c) | (b & ~c); + } + + // Average: (a + b + 1) >> 1 + template + static inline auto avg(T a, T b) + { + return (a >> 1) + (b >> 1) + ((a | b) & 1); + } +}; + +#endif \ No newline at end of file diff --git a/rpcs3/Emu/Cell/MFC.cpp b/rpcs3/Emu/Cell/MFC.cpp index bd1c601349..71c138d47e 100644 --- a/rpcs3/Emu/Cell/MFC.cpp +++ b/rpcs3/Emu/Cell/MFC.cpp @@ -302,7 +302,7 @@ void mfc_thread::cpu_task() spu.ch_tag_stat.push(spu, completed); no_updates = 0; } - else if (completed && spu.ch_tag_mask == completed && spu.ch_tag_upd.compare_and_swap_test(2, 0)) + else if (spu.ch_tag_mask == completed && spu.ch_tag_upd.compare_and_swap_test(2, 0)) { spu.ch_tag_stat.push(spu, completed); no_updates = 0; diff --git a/rpcs3/Emu/Cell/Modules/cellKb.cpp b/rpcs3/Emu/Cell/Modules/cellKb.cpp index 7bf6a67b14..0c80f816ad 100644 --- a/rpcs3/Emu/Cell/Modules/cellKb.cpp +++ b/rpcs3/Emu/Cell/Modules/cellKb.cpp @@ -100,7 +100,7 @@ u16 cellKbCnvRawCode(u32 arrange, u32 mkey, u32 led, u16 rawcode) ((led&(CELL_KB_LED_CAPS_LOCK)) ? 0 : 0x20) : ((led&(CELL_KB_LED_CAPS_LOCK)) ? 0x20 : 0); return rawcode + 0x5D; - } + } if (rawcode >= 0x1E && rawcode <= 0x26) return rawcode + 0x13; // '1' - '9' if (rawcode == 0x27) return 0x30; // '0' if (rawcode == 0x28) return 0x0A; // '\n' @@ -136,7 +136,7 @@ error_code cellKbGetInfo(vm::ptr info) { info->status[i] = current_info.status[i]; } - + return CELL_OK; } @@ -165,7 +165,7 @@ error_code cellKbRead(u32 port_no, vm::ptr data) } current_data.len = 0; - + return CELL_OK; } @@ -180,7 +180,7 @@ error_code cellKbSetCodeType(u32 port_no, u32 type) if (port_no >= handler->GetKeyboards().size()) return CELL_KB_ERROR_INVALID_PARAMETER; - + KbConfig& current_config = handler->GetConfig(port_no); current_config.code_type = type; return CELL_OK; @@ -203,7 +203,7 @@ error_code cellKbSetReadMode(u32 port_no, u32 rmode) if (port_no >= handler->GetKeyboards().size()) return CELL_KB_ERROR_INVALID_PARAMETER; - + KbConfig& current_config = handler->GetConfig(port_no); current_config.read_mode = rmode; diff --git a/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp b/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp index b5cb274c18..f66d805354 100644 --- a/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp +++ b/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp @@ -315,6 +315,11 @@ s32 cellMsgDialogProgressBarSetMsg(u32 progressBarIndex, vm::cptr msgStrin { cellSysutil.warning("cellMsgDialogProgressBarSetMsg(progressBarIndex=%d, msgString=%s)", progressBarIndex, msgString); + if (!msgString) + { + return CELL_MSGDIALOG_ERROR_PARAM; + } + if (auto rsxthr = fxm::get()) { if (auto dlg2 = rsxthr->shell_get_current_dialog()) @@ -331,7 +336,7 @@ s32 cellMsgDialogProgressBarSetMsg(u32 progressBarIndex, vm::cptr msgStrin return CELL_MSGDIALOG_ERROR_DIALOG_NOT_OPENED; } - if (progressBarIndex >= dlg->type.progress_bar_count || !msgString) + if (progressBarIndex >= dlg->type.progress_bar_count) { return CELL_MSGDIALOG_ERROR_PARAM; } diff --git a/rpcs3/Emu/Cell/Modules/cellPhotoExport.cpp b/rpcs3/Emu/Cell/Modules/cellPhotoExport.cpp index e8bd573c69..0b2024abd8 100644 --- a/rpcs3/Emu/Cell/Modules/cellPhotoExport.cpp +++ b/rpcs3/Emu/Cell/Modules/cellPhotoExport.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "Emu/Cell/PPUModule.h" +#include "cellSysutil.h" namespace vm { using namespace ps3; } @@ -20,6 +21,16 @@ enum CELL_PHOTO_EXPORT_UTIL_ERROR_INITIALIZE = 0x8002c20a, }; +struct CellPhotoExportSetParam +{ + vm::bptr photo_title; + vm::bptr game_title; + vm::bptr game_comment; + vm::bptr reserved; +}; + +using CellPhotoExportUtilFinishCallback = void(s32 result, vm::ptr userdata); + s32 cellPhotoInitialize() { UNIMPLEMENTED_FUNC(cellPhotoExport); @@ -38,39 +49,81 @@ s32 cellPhotoRegistFromFile() return CELL_OK; } -s32 cellPhotoExportInitialize() +error_code cellPhotoExportInitialize(u32 version, u32 container, vm::ptr funcFinish, vm::ptr userdata) { - UNIMPLEMENTED_FUNC(cellPhotoExport); + cellPhotoExport.todo("cellPhotoExportInitialize(version=0x%x, container=0x%x, funcFinish=*0x%x, userdata=*0x%x)", version, container, funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + return CELL_OK; } -s32 cellPhotoExportInitialize2() +error_code cellPhotoExportInitialize2(u32 version, vm::ptr funcFinish, vm::ptr userdata) { - UNIMPLEMENTED_FUNC(cellPhotoExport); + cellPhotoExport.todo("cellPhotoExportInitialize2(version=0x%x, funcFinish=*0x%x, userdata=*0x%x)", version, funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + return CELL_OK; } -s32 cellPhotoExportFinalize() +error_code cellPhotoExportFinalize(vm::ptr funcFinish, vm::ptr userdata) { - UNIMPLEMENTED_FUNC(cellPhotoExport); + cellPhotoExport.todo("cellPhotoExportFinalize(funcFinish=*0x%x, userdata=*0x%x)", funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + return CELL_OK; } -s32 cellPhotoExportFromFile() +error_code cellPhotoExportFromFile(vm::cptr srcHddDir, vm::cptr srcHddFile, vm::ptr param, vm::ptr funcFinish, vm::ptr userdata) { - UNIMPLEMENTED_FUNC(cellPhotoExport); + cellPhotoExport.todo("cellPhotoExportFromFile(srcHddDir=%s, srcHddFile=%s, param=*0x%x, funcFinish=*0x%x, userdata=*0x%x)", srcHddDir, srcHddFile, param, funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + return CELL_OK; } -s32 cellPhotoExportFromFileWithCopy() +error_code cellPhotoExportFromFileWithCopy(vm::cptr srcHddDir, vm::cptr srcHddFile, vm::ptr param, vm::ptr funcFinish, vm::ptr userdata) { - UNIMPLEMENTED_FUNC(cellPhotoExport); + cellPhotoExport.todo("cellPhotoExportFromFileWithCopy(srcHddDir=%s, srcHddFile=%s, param=*0x%x, funcFinish=*0x%x, userdata=*0x%x)", srcHddDir, srcHddFile, param, funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + return CELL_OK; } -s32 cellPhotoExportProgress() +error_code cellPhotoExportProgress(vm::ptr funcFinish, vm::ptr userdata) { - UNIMPLEMENTED_FUNC(cellPhotoExport); + cellPhotoExport.todo("cellPhotoExportProgress(funcFinish=*0x%x, userdata=*0x%x)", funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, 0xFFFF, userdata); // 0-0xFFFF where 0xFFFF = 100% + return CELL_OK; + }); + return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellRec.cpp b/rpcs3/Emu/Cell/Modules/cellRec.cpp index 4e8c34917f..1525594613 100644 --- a/rpcs3/Emu/Cell/Modules/cellRec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellRec.cpp @@ -1,10 +1,22 @@ #include "stdafx.h" #include "Emu/Cell/PPUModule.h" +#include "Emu/IdManager.h" +#include "cellSysutil.h" namespace vm { using namespace ps3; } logs::channel cellRec("cellRec"); +enum +{ + CELL_REC_STATUS_UNLOAD = 0, + CELL_REC_STATUS_OPEN = 1, + CELL_REC_STATUS_START = 2, + CELL_REC_STATUS_STOP = 3, + CELL_REC_STATUS_CLOSE = 4, + CELL_REC_STATUS_ERR = 10 +}; + struct CellRecSpursParam { vm::bptr pSpurs; @@ -46,15 +58,40 @@ struct CellRecParam using CellRecCallback = void(s32 recStatus, s32 recError, vm::ptr userdata); -s32 cellRecOpen(vm::cptr pDirName, vm::cptr pFileName, vm::cptr pParam, u32 container, vm::ptr cb, vm::ptr cbUserData) +struct rec_t +{ + vm::ptr cb; + vm::ptr cbUserData; +}; + +error_code cellRecOpen(vm::cptr pDirName, vm::cptr pFileName, vm::cptr pParam, u32 container, vm::ptr cb, vm::ptr cbUserData) { cellRec.todo("cellRecOpen(pDirName=%s, pFileName=%s, pParam=*0x%x, container=0x%x, cb=*0x%x, cbUserData=*0x%x)", pDirName, pFileName, pParam, container, cb, cbUserData); + + const auto rec = fxm::make_always(); + rec->cb = cb; + rec->cbUserData = cbUserData; + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + cb(ppu, CELL_REC_STATUS_OPEN, CELL_OK, cbUserData); + return CELL_OK; + }); + return CELL_OK; } -s32 cellRecClose(s32 isDiscard) +error_code cellRecClose(s32 isDiscard) { cellRec.todo("cellRecClose(isDiscard=0x%x)", isDiscard); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto rec = fxm::get_always(); + rec->cb(ppu, CELL_REC_STATUS_CLOSE, CELL_OK, rec->cbUserData); + return CELL_OK; + }); + return CELL_OK; } @@ -63,15 +100,31 @@ void cellRecGetInfo(s32 info, vm::ptr pValue) cellRec.todo("cellRecGetInfo(info=0x%x, pValue=*0x%x)", info, pValue); } -s32 cellRecStop() +error_code cellRecStop() { cellRec.todo("cellRecStop()"); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto rec = fxm::get_always(); + rec->cb(ppu, CELL_REC_STATUS_STOP, CELL_OK, rec->cbUserData); + return CELL_OK; + }); + return CELL_OK; } -s32 cellRecStart() +error_code cellRecStart() { cellRec.todo("cellRecStart()"); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto rec = fxm::get_always(); + rec->cb(ppu, CELL_REC_STATUS_START, CELL_OK, rec->cbUserData); + return CELL_OK; + }); + return CELL_OK; } @@ -81,7 +134,7 @@ u32 cellRecQueryMemSize(vm::cptr pParam) return 1 * 1024 * 1024; // dummy memory size } -s32 cellRecSetInfo(s32 setInfo, u64 value) +error_code cellRecSetInfo(s32 setInfo, u64 value) { cellRec.todo("cellRecSetInfo(setInfo=0x%x, value=0x%x)", setInfo, value); return CELL_OK; diff --git a/rpcs3/Emu/Cell/Modules/cellVideoExport.cpp b/rpcs3/Emu/Cell/Modules/cellVideoExport.cpp index a089a404c4..4a9ee93b4b 100644 --- a/rpcs3/Emu/Cell/Modules/cellVideoExport.cpp +++ b/rpcs3/Emu/Cell/Modules/cellVideoExport.cpp @@ -1,36 +1,99 @@ #include "stdafx.h" #include "Emu/Cell/PPUModule.h" +#include "cellSysutil.h" + +namespace vm { using namespace ps3; } + logs::channel cellVideoExport("cellVideoExport"); -s32 cellVideoExportProgress() +struct CellVideoExportSetParam { - fmt::throw_exception("Unimplemented" HERE); + vm::bptr title; + vm::bptr game_title; + vm::bptr game_comment; + be_t editable; + vm::bptr reserved2; +}; + +using CellVideoExportUtilFinishCallback = void(s32 result, vm::ptr userdata); + +error_code cellVideoExportProgress(vm::ptr funcFinish, vm::ptr userdata) +{ + cellVideoExport.todo("cellVideoExportProgress(funcFinish=*0x%x, userdata=*0x%x)", funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, 0xFFFF, userdata); // 0-0xFFFF where 0xFFFF = 100% + return CELL_OK; + }); + + return CELL_OK; } -s32 cellVideoExportInitialize2() +error_code cellVideoExportInitialize2(u32 version, vm::ptr funcFinish, vm::ptr userdata) { - fmt::throw_exception("Unimplemented" HERE); + cellVideoExport.todo("cellVideoExportInitialize2(version=0x%x, funcFinish=*0x%x, userdata=*0x%x)", version, funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + + return CELL_OK; } -s32 cellVideoExportInitialize() +error_code cellVideoExportInitialize(u32 version, u32 container, vm::ptr funcFinish, vm::ptr userdata) { - fmt::throw_exception("Unimplemented" HERE); + cellVideoExport.todo("cellVideoExportInitialize(version=0x%x, container=0x%x, funcFinish=*0x%x, userdata=*0x%x)", version, container, funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + + return CELL_OK; } -s32 cellVideoExportFromFileWithCopy() +error_code cellVideoExportFromFileWithCopy(vm::cptr srcHddDir, vm::cptr srcHddFile, vm::ptr param, vm::ptr funcFinish, vm::ptr userdata) { - fmt::throw_exception("Unimplemented" HERE); + cellVideoExport.todo("cellVideoExportFromFileWithCopy(srcHddDir=%s, srcHddFile=%s, param=*0x%x, funcFinish=*0x%x, userdata=*0x%x)", srcHddDir, srcHddFile, param, funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + + return CELL_OK; } -s32 cellVideoExportFromFile() +error_code cellVideoExportFromFile(vm::cptr srcHddDir, vm::cptr srcHddFile, vm::ptr param, vm::ptr funcFinish, vm::ptr userdata) { - fmt::throw_exception("Unimplemented" HERE); + cellVideoExport.todo("cellVideoExportFromFile(srcHddDir=%s, srcHddFile=%s, param=*0x%x, funcFinish=*0x%x, userdata=*0x%x)", srcHddDir, srcHddFile, param, funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + + return CELL_OK; } -s32 cellVideoExportFinalize() +error_code cellVideoExportFinalize(vm::ptr funcFinish, vm::ptr userdata) { - fmt::throw_exception("Unimplemented" HERE); + cellVideoExport.todo("cellVideoExportFinalize(funcFinish=*0x%x, userdata=*0x%x)", funcFinish, userdata); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + funcFinish(ppu, CELL_OK, userdata); + return CELL_OK; + }); + + return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellVideoUpload.cpp b/rpcs3/Emu/Cell/Modules/cellVideoUpload.cpp index 80dd8a0b62..54de45608e 100644 --- a/rpcs3/Emu/Cell/Modules/cellVideoUpload.cpp +++ b/rpcs3/Emu/Cell/Modules/cellVideoUpload.cpp @@ -2,13 +2,24 @@ #include "Emu/Cell/PPUModule.h" #include "cellVideoUpload.h" +#include "cellSysutil.h" logs::channel cellVideoUpload("cellVideoUpload"); -s32 cellVideoUploadInitialize(vm::cptr pParam, vm::ptr cb, vm::ptr userdata) +error_code cellVideoUploadInitialize(vm::cptr pParam, vm::ptr cb, vm::ptr userdata) { cellVideoUpload.todo("cellVideoUploadInitialize(pParam=*0x%x, cb=*0x%x, userdata=*0x%x)", pParam, cb, userdata); + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + vm::var pResultURL(128); + + cb(ppu, CELL_VIDEO_UPLOAD_STATUS_INITIALIZED, CELL_OK, pResultURL, userdata); + cb(ppu, CELL_VIDEO_UPLOAD_STATUS_FINALIZED, CELL_OK, pResultURL, userdata); + + return CELL_OK; + }); + return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellVideoUpload.h b/rpcs3/Emu/Cell/Modules/cellVideoUpload.h index 9abc051b9f..f57eb912b3 100644 --- a/rpcs3/Emu/Cell/Modules/cellVideoUpload.h +++ b/rpcs3/Emu/Cell/Modules/cellVideoUpload.h @@ -1,7 +1,5 @@ #pragma once -namespace vm { using namespace ps3; } - struct CellVideoUploadOption { be_t type; @@ -11,27 +9,27 @@ struct CellVideoUploadOption struct CellVideoUploadParam { be_t siteID; - vm::bcptr pFilePath; + vm::ps3::bcptr pFilePath; union { struct { - vm::bcptr pClientId; - vm::bcptr pDeveloperKey; - vm::bcptr pTitle_UTF8; - vm::bcptr pDescription_UTF8; - vm::bcptr pKeyword_1_UTF8; - vm::bcptr pKeyword_2_UTF8; - vm::bcptr pKeyword_3_UTF8; + vm::ps3::bcptr pClientId; + vm::ps3::bcptr pDeveloperKey; + vm::ps3::bcptr pTitle_UTF8; + vm::ps3::bcptr pDescription_UTF8; + vm::ps3::bcptr pKeyword_1_UTF8; + vm::ps3::bcptr pKeyword_2_UTF8; + vm::ps3::bcptr pKeyword_3_UTF8; u8 isPrivate; u8 rating; } youtube; } u; be_t numOfOption; - vm::bptr pOption; + vm::ps3::bptr pOption; }; -typedef void(CellVideoUploadCallback)(s32 status, s32 errorCode, vm::cptr pResultURL, vm::ptr userdata); +using CellVideoUploadCallback = void(s32 status, s32 errorCode, vm::ps3::cptr pResultURL, vm::ps3::ptr userdata); enum { @@ -59,3 +57,9 @@ enum CELL_VIDEO_UPLOAD_ERROR_FILE_OPEN = 0x8002d023, CELL_VIDEO_UPLOAD_ERROR_INVALID_STATE = 0x8002d024 }; + +enum +{ + CELL_VIDEO_UPLOAD_STATUS_INITIALIZED = 1, + CELL_VIDEO_UPLOAD_STATUS_FINALIZED = 2 +}; diff --git a/rpcs3/Emu/Cell/Modules/cellWebBrowser.cpp b/rpcs3/Emu/Cell/Modules/cellWebBrowser.cpp index b338fe5450..e8e5f8013f 100644 --- a/rpcs3/Emu/Cell/Modules/cellWebBrowser.cpp +++ b/rpcs3/Emu/Cell/Modules/cellWebBrowser.cpp @@ -2,9 +2,16 @@ #include "Emu/Cell/PPUModule.h" #include "cellWebBrowser.h" +#include "Emu/IdManager.h" extern logs::channel cellSysutil; +struct browser_t +{ + vm::ptr system_cb; + vm::ptr userData; +}; + s32 cellWebBrowserActivate() { fmt::throw_exception("Unimplemented" HERE); @@ -15,9 +22,10 @@ s32 cellWebBrowserConfig() fmt::throw_exception("Unimplemented" HERE); } -s32 cellWebBrowserConfig2() +error_code cellWebBrowserConfig2(vm::cptr config, u32 version) { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserConfig2(config=*0x%x, version=%d)", config, version); + return CELL_OK; } s32 cellWebBrowserConfigGetHeapSize() @@ -45,9 +53,10 @@ s32 cellWebBrowserConfigSetErrorHook2() fmt::throw_exception("Unimplemented" HERE); } -s32 cellWebBrowserConfigSetFullScreen2() +error_code cellWebBrowserConfigSetFullScreen2(vm::cptr config, u32 full) { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserConfigSetFullScreen2(config=*0x%x, full=%d)", config, full); + return CELL_OK; } s32 cellWebBrowserConfigSetFullVersion2() @@ -60,9 +69,10 @@ s32 cellWebBrowserConfigSetFunction() fmt::throw_exception("Unimplemented" HERE); } -s32 cellWebBrowserConfigSetFunction2() +error_code cellWebBrowserConfigSetFunction2(vm::ptr config, u32 funcset) { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserConfigSetFunction2(config=*0x%x, funcset=0x%x)", config, funcset); + return CELL_OK; } s32 cellWebBrowserConfigSetHeapSize() @@ -70,9 +80,10 @@ s32 cellWebBrowserConfigSetHeapSize() fmt::throw_exception("Unimplemented" HERE); } -s32 cellWebBrowserConfigSetHeapSize2() +error_code cellWebBrowserConfigSetHeapSize2(vm::ptr config, u32 size) { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserConfigSetHeapSize(config=*0x%x, size=0x%x)", config, size); + return CELL_OK; } s32 cellWebBrowserConfigSetMimeSet() @@ -80,9 +91,10 @@ s32 cellWebBrowserConfigSetMimeSet() fmt::throw_exception("Unimplemented" HERE); } -s32 cellWebBrowserConfigSetNotifyHook2() +error_code cellWebBrowserConfigSetNotifyHook2(vm::cptr config, vm::ptr cb, vm::ptr userdata) { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserConfigSetNotifyHook2(config=*0x%x, cb=*0x%x, userdata=*0x%x)", config, cb, userdata); + return CELL_OK; } s32 cellWebBrowserConfigSetRequestHook2() @@ -95,14 +107,16 @@ s32 cellWebBrowserConfigSetStatusHook2() fmt::throw_exception("Unimplemented" HERE); } -s32 cellWebBrowserConfigSetTabCount2() +error_code cellWebBrowserConfigSetTabCount2(vm::cptr config, u32 tab_count) { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserConfigSetTabCount2(config=*0x%x, tab_count=%d)", config, tab_count); + return CELL_OK; } -s32 cellWebBrowserConfigSetUnknownMIMETypeHook2() +error_code cellWebBrowserConfigSetUnknownMIMETypeHook2(vm::cptr config, vm::ptr cb, vm::ptr userdata) { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserConfigSetUnknownMIMETypeHook2(config=*0x%x, cb=*0x%x, userdata=*0x%x)", config, cb, userdata); + return CELL_OK; } s32 cellWebBrowserConfigSetVersion() @@ -110,9 +124,10 @@ s32 cellWebBrowserConfigSetVersion() fmt::throw_exception("Unimplemented" HERE); } -s32 cellWebBrowserConfigSetViewCondition2() +error_code cellWebBrowserConfigSetViewCondition2(vm::ptr config, u32 cond) { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserConfigSetViewCondition2(config=*0x%x, cond=0x%x)", config, cond); + return CELL_OK; } s32 cellWebBrowserConfigSetViewRect2() @@ -180,7 +195,7 @@ s32 cellWebBrowserEstimate() fmt::throw_exception("Unimplemented" HERE); } -s32 cellWebBrowserEstimate2(vm::cptr config, vm::ptr memSize) +error_code cellWebBrowserEstimate2(vm::cptr config, vm::ptr memSize) { cellSysutil.warning("cellWebBrowserEstimate2(config=*0x%x, memSize=*0x%x)", config, memSize); @@ -190,15 +205,26 @@ s32 cellWebBrowserEstimate2(vm::cptr config, vm::ptr return CELL_OK; } -s32 cellWebBrowserGetUsrdataOnGameExit(vm::ptr ptr) +error_code cellWebBrowserGetUsrdataOnGameExit(vm::ptr ptr) { - cellSysutil.todo("cellWebBrowserGetUsrdataOnGameExit(ptr=*0x%x", ptr); + cellSysutil.todo("cellWebBrowserGetUsrdataOnGameExit(ptr=*0x%x)", ptr); return CELL_OK; } -s32 cellWebBrowserInitialize() +error_code cellWebBrowserInitialize(vm::ptr system_cb, u32 container) { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserInitialize(system_cb=*0x%x, container=0x%x)", system_cb, container); + + const auto browser = fxm::make_always(); + browser->system_cb = system_cb; + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + system_cb(ppu, CELL_SYSUTIL_WEBBROWSER_INITIALIZING_FINISHED, browser->userData); + return CELL_OK; + }); + + return CELL_OK; } s32 cellWebBrowserNavigate2() @@ -216,9 +242,17 @@ s32 cellWebBrowserSetSystemCallbackUsrdata() fmt::throw_exception("Unimplemented" HERE); } -s32 cellWebBrowserShutdown() +void cellWebBrowserShutdown() { - fmt::throw_exception("Unimplemented" HERE); + cellSysutil.todo("cellWebBrowserShutdown()"); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto browser = fxm::get_always(); + + browser->system_cb(ppu, CELL_SYSUTIL_WEBBROWSER_SHUTDOWN_FINISHED, browser->userData); + return CELL_OK; + }); } s32 cellWebBrowserUpdatePointerDisplayPos2() diff --git a/rpcs3/Emu/Cell/Modules/cellWebBrowser.h b/rpcs3/Emu/Cell/Modules/cellWebBrowser.h index 0b6254b05c..050156dbaf 100644 --- a/rpcs3/Emu/Cell/Modules/cellWebBrowser.h +++ b/rpcs3/Emu/Cell/Modules/cellWebBrowser.h @@ -2,6 +2,17 @@ #include "cellSysutil.h" +//events +enum CellWebBrowserEvent : s32 +{ + CELL_SYSUTIL_WEBBROWSER_INITIALIZING_FINISHED = 1, + CELL_SYSUTIL_WEBBROWSER_SHUTDOWN_FINISHED = 4, + CELL_SYSUTIL_WEBBROWSER_LOADING_FINISHED = 5, + CELL_SYSUTIL_WEBBROWSER_UNLOADING_FINISHED = 7, + CELL_SYSUTIL_WEBBROWSER_RELEASED = 9, + CELL_SYSUTIL_WEBBROWSER_GRABBED = 11, +}; + using CellWebBrowserCallback = void(s32 cb_type, vm::ptr client_session, vm::ptr usrdata); using CellWebComponentCallback = void(s32 web_browser_id, s32 cb_type, vm::ptr client_session, vm::ptr usrdata); using CellWebBrowserSystemCallback = void(s32 cb_type, vm::ptr usrdata); diff --git a/rpcs3/Emu/Cell/Modules/sceNpMatchingInt.cpp b/rpcs3/Emu/Cell/Modules/sceNpMatchingInt.cpp new file mode 100644 index 0000000000..03a88d057e --- /dev/null +++ b/rpcs3/Emu/Cell/Modules/sceNpMatchingInt.cpp @@ -0,0 +1,41 @@ +#include "stdafx.h" +#include "Emu/System.h" +#include "Emu/Cell/PPUModule.h" + +namespace vm { using namespace ps3; } + +logs::channel sceNpMatchingInt("sceNpMatchingInt"); + +s32 sceNpMatchingGetRoomMemberList() +{ + UNIMPLEMENTED_FUNC(sceNpMatchingInt); + return CELL_OK; +} + +// Parameter "unknown" added to distinguish this function +// from the one in sceNp.cpp which has the same name +s32 sceNpMatchingJoinRoomGUI(vm::ptr unknown) +{ + UNIMPLEMENTED_FUNC(sceNpMatchingInt); + return CELL_OK; +} + +s32 sceNpMatchingGetRoomListGUI() +{ + UNIMPLEMENTED_FUNC(sceNpMatchingInt); + return CELL_OK; +} + +s32 sceNpMatchingSendRoomMessage() +{ + UNIMPLEMENTED_FUNC(sceNpMatchingInt); + return CELL_OK; +} + +DECLARE(ppu_module_manager::sceNpMatchingInt)("sceNpMatchingInt", []() +{ + REG_FUNC(sceNpMatchingInt, sceNpMatchingGetRoomMemberList); + REG_FUNC(sceNpMatchingInt, sceNpMatchingJoinRoomGUI); + REG_FUNC(sceNpMatchingInt, sceNpMatchingGetRoomListGUI); + REG_FUNC(sceNpMatchingInt, sceNpMatchingSendRoomMessage); +}); diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index b5f438e7d2..f041e6cb66 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -244,6 +244,7 @@ static void ppu_initialize_modules(const std::shared_ptr& link &ppu_module_manager::sceNp2, &ppu_module_manager::sceNpClans, &ppu_module_manager::sceNpCommerce2, + &ppu_module_manager::sceNpMatchingInt, &ppu_module_manager::sceNpSns, &ppu_module_manager::sceNpTrophy, &ppu_module_manager::sceNpTus, diff --git a/rpcs3/Emu/Cell/PPUModule.h b/rpcs3/Emu/Cell/PPUModule.h index ca11e49150..b55c4c648d 100644 --- a/rpcs3/Emu/Cell/PPUModule.h +++ b/rpcs3/Emu/Cell/PPUModule.h @@ -255,6 +255,7 @@ public: static const ppu_static_module sceNp2; static const ppu_static_module sceNpClans; static const ppu_static_module sceNpCommerce2; + static const ppu_static_module sceNpMatchingInt; static const ppu_static_module sceNpSns; static const ppu_static_module sceNpTrophy; static const ppu_static_module sceNpTus; diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index e3f535c1f3..10b957f1f2 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -12,9 +12,7 @@ using namespace llvm; const ppu_decoder s_ppu_decoder; PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, const ppu_module& info) - : m_context(context) - , m_module(module) - , m_is_be(false) + : cpu_translator(context, module, false) , m_info(info) , m_pure_attr(AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadNone})) { @@ -564,136 +562,156 @@ void PPUTranslator::MTVSCR(ppu_opcode_t op) void PPUTranslator::VADDCUW(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, ZExt(m_ir->CreateICmpULT(m_ir->CreateAdd(ab[0], ab[1]), ab[0]), GetType())); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(ucarry(a, b, eval(a + b)) >> 31)); } void PPUTranslator::VADDFP(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vf, op.va, op.vb); - SetVr(op.vd, m_ir->CreateFAdd(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a + b)); } void PPUTranslator::VADDSBS(ppu_opcode_t op) { - const auto ab = SExt(GetVrs(VrType::vi8, op.va, op.vb)); - const auto result = m_ir->CreateAdd(ab[0], ab[1]); - const auto saturated = SaturateSigned(result, -0x80, 0x7f); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto s = eval(a + b); + const auto z = eval((a >> 7) ^ 0x7f); + const auto x = eval(scarry(a, b, s) >> 7); + set_vr(op.vd, eval(merge(x, z, s))); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VADDSHS(ppu_opcode_t op) { - const auto ab = SExt(GetVrs(VrType::vi16, op.va, op.vb)); - const auto result = m_ir->CreateAdd(ab[0], ab[1]); - const auto saturated = SaturateSigned(result, -0x8000, 0x7fff); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto s = eval(a + b); + const auto z = eval((a >> 15) ^ 0x7fff); + const auto x = eval(scarry(a, b, s) >> 15); + set_vr(op.vd, eval(merge(x, z, s))); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VADDSWS(ppu_opcode_t op) { - const auto ab = SExt(GetVrs(VrType::vi32, op.va, op.vb)); - const auto result = m_ir->CreateAdd(ab[0], ab[1]); - const auto saturated = SaturateSigned(result, -0x80000000ll, 0x7fffffff); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto s = eval(a + b); + const auto z = eval((a >> 31) ^ 0x7fffffff); + const auto x = eval(scarry(a, b, s) >> 31); + set_vr(op.vd, eval(merge(x, z, s))); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VADDUBM(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi8, op.va, op.vb); - SetVr(op.vd, m_ir->CreateAdd(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a + b)); } void PPUTranslator::VADDUBS(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi8, op.va, op.vb)); - const auto result = m_ir->CreateAdd(ab[0], ab[1]); - const auto saturated = Saturate(result, ICmpInst::ICMP_UGT, m_ir->getInt16(0xff)); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto s = eval(a + b); + const auto x = eval(ucarry(a, b, s) >> 7); + set_vr(op.vd, eval(s | x)); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VADDUHM(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi16, op.va, op.vb); - SetVr(op.vd, m_ir->CreateAdd(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a + b)); } void PPUTranslator::VADDUHS(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi16, op.va, op.vb)); - const auto result = m_ir->CreateAdd(ab[0], ab[1]); - const auto saturated = Saturate(result, ICmpInst::ICMP_UGT, m_ir->getInt32(0xffff)); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto s = eval(a + b); + const auto x = eval(ucarry(a, b, s) >> 15); + set_vr(op.vd, eval(s | x)); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VADDUWM(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateAdd(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a + b)); } void PPUTranslator::VADDUWS(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi32, op.va, op.vb)); - const auto result = m_ir->CreateAdd(ab[0], ab[1]); - const auto saturated = Saturate(result, ICmpInst::ICMP_UGT, m_ir->getInt64(0xffffffff)); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto s = eval(a + b); + const auto x = eval(ucarry(a, b, s) >> 31); + set_vr(op.vd, eval(s | x)); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VAND(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateAnd(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a & b)); } void PPUTranslator::VANDC(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateAnd(ab[0], m_ir->CreateNot(ab[1]))); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a & ~b)); } -#define AVG_OP(a, b) m_ir->CreateLShr(m_ir->CreateSub(a, m_ir->CreateNot(b)), 1) /* (a + b + 1) >> 1 */ - void PPUTranslator::VAVGSB(ppu_opcode_t op) { - const auto ab = SExt(GetVrs(VrType::vi8, op.va, op.vb)); - SetVr(op.vd, AVG_OP(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(avg(a, b))); } void PPUTranslator::VAVGSH(ppu_opcode_t op) { - const auto ab = SExt(GetVrs(VrType::vi16, op.va, op.vb)); - SetVr(op.vd, AVG_OP(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(avg(a, b))); } void PPUTranslator::VAVGSW(ppu_opcode_t op) { - const auto ab = SExt(GetVrs(VrType::vi32, op.va, op.vb)); - SetVr(op.vd, AVG_OP(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(avg(a, b))); } void PPUTranslator::VAVGUB(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi8, op.va, op.vb)); - SetVr(op.vd, AVG_OP(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(avg(a, b))); } void PPUTranslator::VAVGUH(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi16, op.va, op.vb)); - SetVr(op.vd, AVG_OP(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(avg(a, b))); } void PPUTranslator::VAVGUW(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi32, op.va, op.vb)); - SetVr(op.vd, AVG_OP(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(avg(a, b))); } void PPUTranslator::VCFSX(ppu_opcode_t op) @@ -1134,14 +1152,16 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op) void PPUTranslator::VNOR(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateNot(m_ir->CreateOr(ab[0], ab[1]))); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(~(a | b))); } void PPUTranslator::VOR(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateOr(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a | b)); } void PPUTranslator::VPERM(ppu_opcode_t op) @@ -1424,86 +1444,100 @@ void PPUTranslator::VSRW(ppu_opcode_t op) void PPUTranslator::VSUBCUW(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, ZExt(m_ir->CreateICmpUGE(ab[0], ab[1]), GetType())); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(~ucarry(b, eval(a - b), a) >> 31)); } void PPUTranslator::VSUBFP(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vf, op.va, op.vb); - SetVr(op.vd, m_ir->CreateFSub(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a - b)); } void PPUTranslator::VSUBSBS(ppu_opcode_t op) { - const auto ab = SExt(GetVrs(VrType::vi8, op.va, op.vb)); - const auto result = m_ir->CreateSub(ab[0], ab[1]); - const auto saturated = SaturateSigned(result, -0x80, 0x7f); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto d = eval(a - b); + const auto z = eval((a >> 7) ^ 0x7f); + const auto x = eval(sborrow(a, b, d) >> 7); + set_vr(op.vd, eval(merge(x, z, d))); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VSUBSHS(ppu_opcode_t op) { - const auto ab = SExt(GetVrs(VrType::vi16, op.va, op.vb)); - const auto result = m_ir->CreateSub(ab[0], ab[1]); - const auto saturated = SaturateSigned(result, -0x8000, 0x7fff); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto d = eval(a - b); + const auto z = eval((a >> 15) ^ 0x7fff); + const auto x = eval(sborrow(a, b, d) >> 15); + set_vr(op.vd, eval(merge(x, z, d))); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VSUBSWS(ppu_opcode_t op) { - const auto ab = SExt(GetVrs(VrType::vi32, op.va, op.vb)); - const auto result = m_ir->CreateSub(ab[0], ab[1]); - const auto saturated = SaturateSigned(result, -0x80000000ll, 0x7fffffff); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto d = eval(a - b); + const auto z = eval((a >> 31) ^ 0x7fffffff); + const auto x = eval(sborrow(a, b, d) >> 31); + set_vr(op.vd, eval(merge(x, z, d))); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VSUBUBM(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi8, op.va, op.vb); - SetVr(op.vd, m_ir->CreateSub(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a - b)); } void PPUTranslator::VSUBUBS(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi8, op.va, op.vb)); - const auto result = m_ir->CreateSub(ab[0], ab[1]); - const auto saturated = Saturate(result, ICmpInst::ICMP_SLT, m_ir->getInt16(0)); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto d = eval(a - b); + const auto x = eval(ucarry(b, d, a) >> 7); + set_vr(op.vd, eval(d & ~x)); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VSUBUHM(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi16, op.va, op.vb); - SetVr(op.vd, m_ir->CreateSub(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a - b)); } void PPUTranslator::VSUBUHS(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi16, op.va, op.vb)); - const auto result = m_ir->CreateSub(ab[0], ab[1]); - const auto saturated = Saturate(result, ICmpInst::ICMP_SLT, m_ir->getInt32(0)); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto d = eval(a - b); + const auto x = eval(ucarry(b, d, a) >> 15); + set_vr(op.vd, eval(d & ~x)); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VSUBUWM(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, m_ir->CreateSub(ab[0], ab[1])); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + set_vr(op.vd, eval(a - b)); } void PPUTranslator::VSUBUWS(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi32, op.va, op.vb)); - const auto result = m_ir->CreateSub(ab[0], ab[1]); - const auto saturated = Saturate(result, ICmpInst::ICMP_SLT, m_ir->getInt64(0)); - SetVr(op.vd, saturated.first); - SetSat(IsNotZero(saturated.second)); + const auto a = get_vr(op.va); + const auto b = get_vr(op.vb); + const auto d = eval(a - b); + const auto x = eval(ucarry(b, d, a) >> 31); + set_vr(op.vd, eval(d & ~x)); + SetSat(IsNotZero(x.value)); } void PPUTranslator::VSUMSWS(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index cefabf306e..d94f8fde9c 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -2,117 +2,12 @@ #ifdef LLVM_AVAILABLE -#include -#include -#include -#include -#include -#include - +#include "../rpcs3/Emu/CPU/CPUTranslator.h" #include "../rpcs3/Emu/Cell/PPUOpcodes.h" #include "../rpcs3/Emu/Cell/PPUAnalyser.h" -#include "restore_new.h" -#ifdef _MSC_VER -#pragma warning(push, 0) -#endif -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#ifdef _MSC_VER -#pragma warning(pop) -#endif -#include "define_new_memleakdetect.h" - -#include "../Utilities/types.h" -#include "../Utilities/StrFmt.h" -#include "../Utilities/BEType.h" - -template -struct TypeGen +class PPUTranslator final : public cpu_translator { - static_assert(!sizeof(T), "GetType<>() error: unknown type"); -}; - -template -struct TypeGen::value>> -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getVoidTy(context); } -}; - -template -struct TypeGen::value || std::is_same::value || std::is_same::value>> -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt64Ty(context); } -}; - -template -struct TypeGen::value || std::is_same::value>> -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt32Ty(context); } -}; - -template -struct TypeGen::value || std::is_same::value>> -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt16Ty(context); } -}; - -template -struct TypeGen::value || std::is_same::value || std::is_same::value>> -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt8Ty(context); } -}; - -template<> -struct TypeGen -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getFloatTy(context); } -}; - -template<> -struct TypeGen -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getDoubleTy(context); } -}; - -template<> -struct TypeGen -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt1Ty(context); } -}; - -template<> -struct TypeGen -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getIntNTy(context, 128); } -}; - -// Pointer type -template -struct TypeGen -{ - static llvm::Type* get(llvm::LLVMContext& context) { return TypeGen::get(context)->getPointerTo(); } -}; - -// Vector type -template -struct TypeGen -{ - static llvm::Type* get(llvm::LLVMContext& context) { return llvm::VectorType::get(TypeGen::get(context), N); } -}; - -class PPUTranslator final //: public CPUTranslator -{ - // LLVM context - llvm::LLVMContext& m_context; - - // Module to which all generated code is output to - llvm::Module* const m_module; - - // Endianness, affects vector element numbering (TODO) - const bool m_is_be; - // PPU Module const ppu_module& m_info; @@ -122,9 +17,6 @@ class PPUTranslator final //: public CPUTranslator // Attributes for function calls which are "pure" and may be optimized away if their results are unused const llvm::AttributeSet m_pure_attr; - // IR builder - llvm::IRBuilder<>* m_ir; - // LLVM function llvm::Function* m_function; @@ -191,6 +83,20 @@ class PPUTranslator final //: public CPUTranslator #undef DEF_VALUE public: + template + value_t get_vr(u32 vr) + { + value_t result; + result.value = m_ir->CreateBitCast(GetVr(vr, VrType::vi32), value_t::get_type(m_context)); + return result; + } + + template + void set_vr(u32 vr, value_t v) + { + return SetVr(vr, v.value); + } + // Get current instruction address llvm::Value* GetAddr(u64 _add = 0); @@ -382,19 +288,6 @@ public: // Write to memory void WriteMemory(llvm::Value* addr, llvm::Value* value, bool is_be = true, u32 align = 1); - // Convert a C++ type to an LLVM type - template - llvm::Type* GetType() - { - return TypeGen::get(m_context); - } - - template - llvm::PointerType* GetPtrType() - { - return TypeGen::get(m_context)->getPointerTo(); - } - // Get an undefined value with specified type template llvm::Value* GetUndef() diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 193ce2096f..3400adf07c 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -157,6 +157,14 @@ void spu_recompiler::compile(spu_function_t& f) // Start compilation m_pos = f.addr; + if (utils::has_avx()) + { + compiler.vzeroupper(); + //compiler.pxor(asmjit::x86::xmm0, asmjit::x86::xmm0); + //compiler.vptest(asmjit::x86::ymm0, asmjit::x86::ymm0); + //compiler.jnz(end_label); + } + for (const u32 op : f.data) { // Bind label if initialized @@ -615,6 +623,16 @@ void spu_recompiler::ROT(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->vprotd(vt, va, vb); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u32* t, const u32* a, const s32* b) noexcept { for (u32 i = 0; i < 4; i++) @@ -654,6 +672,22 @@ void spu_recompiler::ROTM(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->psubd(vb, XmmConst(_mm_set1_epi32(1))); + c->pandn(vb, XmmConst(_mm_set1_epi32(0x3f))); + c->pxor(vt, vt); + c->psubd(vt, vb); + c->pcmpgtd(vb, XmmConst(_mm_set1_epi32(31))); + c->vpshld(vt, va, vt); + c->vpandn(vt, vb, vt); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u32* t, const u32* a, const u32* b) noexcept { for (u32 i = 0; i < 4; i++) @@ -694,6 +728,21 @@ void spu_recompiler::ROTMA(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->psubd(vb, XmmConst(_mm_set1_epi32(1))); + c->pandn(vb, XmmConst(_mm_set1_epi32(0x3f))); + c->pxor(vt, vt); + c->pminud(vb, XmmConst(_mm_set1_epi32(31))); + c->psubd(vt, vb); + c->vpshad(vt, va, vt); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](s32* t, const s32* a, const u32* b) noexcept { for (u32 i = 0; i < 4; i++) @@ -733,6 +782,19 @@ void spu_recompiler::SHL(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->pand(vb, XmmConst(_mm_set1_epi32(0x3f))); + c->vpcmpgtd(vt, vb, XmmConst(_mm_set1_epi32(31))); + c->vpshld(vb, va, vb); + c->pandn(vt, vb); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u32* t, const u32* a, const u32* b) noexcept { for (u32 i = 0; i < 4; i++) @@ -766,13 +828,24 @@ void spu_recompiler::ROTH(spu_opcode_t op) //nf const XmmLink& vb = XmmGet(op.rb, XmmType::Int); const XmmLink& vt = XmmAlloc(); const XmmLink& v4 = XmmAlloc(); - c->movdqa(v4, XmmConst(_mm_set1_epi16(0xf))); - c->pand(vb, v4); - c->vpsllvw(vt, va, vb); - c->psubw(vb, XmmConst(_mm_set1_epi16(1))); - c->pandn(vb, v4); - c->vpsrlvw(va, va, vb); - c->por(vt, va); + c->vmovdqa(v4, XmmConst(_mm_set_epi32(0x0d0c0d0c, 0x09080908, 0x05040504, 0x01000100))); + c->vpshufb(vt, va, v4); // duplicate low word + c->vpsrld(va, va, 16); + c->vpshufb(va, va, v4); + c->vpsrld(v4, vb, 16); + c->vprolvd(va, va, v4); + c->vprolvd(vb, vt, vb); + c->vpblendw(vt, vb, va, 0xaa); + c->vmovdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->vprotw(vt, va, vb); c->movdqa(SPU_OFF_128(gpr, op.rt), vt); return; } @@ -816,6 +889,42 @@ void spu_recompiler::ROTHM(spu_opcode_t op) return; } + if (utils::has_avx2()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + const XmmLink& v4 = XmmAlloc(); + const XmmLink& v5 = XmmAlloc(); + c->psubw(vb, XmmConst(_mm_set1_epi16(1))); + c->pandn(vb, XmmConst(_mm_set1_epi16(0x1f))); + c->movdqa(vt, XmmConst(_mm_set1_epi32(0xffff0000))); // mask: select high words + c->vpsrld(v4, vb, 16); + c->vpsubusw(v5, vb, vt); // clear high words (using saturation sub for throughput) + c->vpandn(vb, vt, va); // clear high words + c->vpsrlvd(va, va, v4); + c->vpsrlvd(vb, vb, v5); + c->vpblendw(vt, vb, va, 0xaa); // can use vpblendvb with 0xffff0000 mask (vt) + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->psubw(vb, XmmConst(_mm_set1_epi16(1))); + c->pandn(vb, XmmConst(_mm_set1_epi16(0x1f))); + c->pxor(vt, vt); + c->psubw(vt, vb); + c->pcmpgtw(vb, XmmConst(_mm_set1_epi16(15))); + c->vpshlw(vt, va, vt); + c->vpandn(vt, vb, vt); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u16* t, const u16* a, const u16* b) noexcept { for (u32 i = 0; i < 8; i++) @@ -856,6 +965,43 @@ void spu_recompiler::ROTMAH(spu_opcode_t op) return; } + if (utils::has_avx2()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + const XmmLink& v4 = XmmAlloc(); + const XmmLink& v5 = XmmAlloc(); + c->psubw(vb, XmmConst(_mm_set1_epi16(1))); + c->movdqa(vt, XmmConst(_mm_set1_epi16(0x1f))); + c->vpandn(v4, vb, vt); + c->vpand(v5, vb, vt); + c->movdqa(vt, XmmConst(_mm_set1_epi32(0x2f))); + c->vpsrld(v4, v4, 16); + c->vpsubusw(v5, vt, v5); // clear high word and add 16 to low word + c->vpslld(vb, va, 16); + c->vpsravd(va, va, v4); + c->vpsravd(vb, vb, v5); + c->vpblendw(vt, vb, va, 0xaa); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->psubw(vb, XmmConst(_mm_set1_epi16(1))); + c->pandn(vb, XmmConst(_mm_set1_epi16(0x1f))); + c->pxor(vt, vt); + c->pminuw(vb, XmmConst(_mm_set1_epi16(15))); + c->psubw(vt, vb); + c->vpshaw(vt, va, vt); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](s16* t, const s16* a, const u16* b) noexcept { for (u32 i = 0; i < 8; i++) @@ -895,6 +1041,38 @@ void spu_recompiler::SHLH(spu_opcode_t op) return; } + if (utils::has_avx2()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + const XmmLink& v4 = XmmAlloc(); + const XmmLink& v5 = XmmAlloc(); + c->pand(vb, XmmConst(_mm_set1_epi16(0x1f))); + c->movdqa(vt, XmmConst(_mm_set1_epi32(0xffff0000))); // mask: select high words + c->vpsrld(v4, vb, 16); + c->vpsubusw(v5, vb, vt); // clear high words (using saturation sub for throughput) + c->vpand(vb, vt, va); // clear low words + c->vpsllvd(va, va, v5); + c->vpsllvd(vb, vb, v4); + c->vpblendw(vt, vb, va, 0x55); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->pand(vb, XmmConst(_mm_set1_epi16(0x1f))); + c->vpcmpgtw(vt, vb, XmmConst(_mm_set1_epi16(15))); + c->vpshlw(vb, va, vb); + c->pandn(vt, vb); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); + return; + } + auto body = [](u16* t, const u16* a, const u16* b) noexcept { for (u32 i = 0; i < 8; i++) @@ -933,6 +1111,14 @@ void spu_recompiler::ROTI(spu_opcode_t op) return; } + if (utils::has_xop()) + { + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + c->vprotd(va, va, s); + c->movdqa(SPU_OFF_128(gpr, op.rt), va); + return; + } + const XmmLink& va = XmmGet(op.ra, XmmType::Int); const XmmLink& v1 = XmmAlloc(); c->movdqa(v1, va); @@ -1530,50 +1716,57 @@ void spu_recompiler::CDX(spu_opcode_t op) void spu_recompiler::ROTQBI(spu_opcode_t op) { - c->mov(*qw0, SPU_OFF_64(gpr, op.ra, &v128::_u64, 0)); - c->mov(*qw1, SPU_OFF_64(gpr, op.ra, &v128::_u64, 1)); - c->mov(*qw2, *qw0); - c->mov(*addr, SPU_OFF_32(gpr, op.rb, &v128::_u32, 3)); - c->and_(*addr, 7); - c->shld(*qw0, *qw1, *addr); - c->shld(*qw1, *qw2, *addr); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 0), *qw0); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 1), *qw1); - c->unuse(*addr); - c->unuse(*qw0); - c->unuse(*qw1); - c->unuse(*qw2); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + const XmmLink& v4 = XmmAlloc(); + c->psrldq(vb, 12); + c->pand(vb, XmmConst(_mm_set_epi64x(0, 7))); + c->movdqa(v4, XmmConst(_mm_set_epi64x(0, 64))); + c->pshufd(vt, va, 0x4e); + c->psubq(v4, vb); + c->psllq(va, vb); + c->psrlq(vt, v4); + c->por(vt, va); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); } void spu_recompiler::ROTQMBI(spu_opcode_t op) { - c->mov(*qw0, SPU_OFF_64(gpr, op.ra, &v128::_u64, 0)); - c->mov(*qw1, SPU_OFF_64(gpr, op.ra, &v128::_u64, 1)); - c->mov(*addr, SPU_OFF_32(gpr, op.rb, &v128::_u32, 3)); - c->neg(*addr); - c->and_(*addr, 7); - c->shrd(*qw0, *qw1, *addr); - c->shr(*qw1, *addr); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 0), *qw0); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 1), *qw1); - c->unuse(*addr); - c->unuse(*qw0); - c->unuse(*qw1); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmAlloc(); + const XmmLink& vt = XmmGet(op.rb, XmmType::Int); + const XmmLink& v4 = XmmAlloc(); + c->psrldq(vt, 12); + c->pxor(vb, vb); + c->psubq(vb, vt); + c->pand(vb, XmmConst(_mm_set_epi64x(0, 7))); + c->movdqa(v4, XmmConst(_mm_set_epi64x(0, 64))); + c->movdqa(vt, va); + c->psrldq(vt, 8); + c->psubq(v4, vb); + c->psrlq(va, vb); + c->psllq(vt, v4); + c->por(vt, va); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); } void spu_recompiler::SHLQBI(spu_opcode_t op) { - c->mov(*qw0, SPU_OFF_64(gpr, op.ra, &v128::_u64, 0)); - c->mov(*qw1, SPU_OFF_64(gpr, op.ra, &v128::_u64, 1)); - c->mov(*addr, SPU_OFF_32(gpr, op.rb, &v128::_u32, 3)); - c->and_(*addr, 7); - c->shld(*qw1, *qw0, *addr); - c->shl(*qw0, *addr); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 0), *qw0); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 1), *qw1); - c->unuse(*addr); - c->unuse(*qw0); - c->unuse(*qw1); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vb = XmmGet(op.rb, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + const XmmLink& v4 = XmmAlloc(); + c->psrldq(vb, 12); + c->pand(vb, XmmConst(_mm_set_epi64x(0, 7))); + c->movdqa(v4, XmmConst(_mm_set_epi64x(0, 64))); + c->movdqa(vt, va); + c->pslldq(vt, 8); + c->psubq(v4, vb); + c->psllq(va, vb); + c->psrlq(vt, v4); + c->por(vt, va); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); } void spu_recompiler::ROTQBY(spu_opcode_t op) @@ -1674,16 +1867,14 @@ void spu_recompiler::SHLQBY(spu_opcode_t op) void spu_recompiler::ORX(spu_opcode_t op) { - c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 0)); - c->or_(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 1)); - c->or_(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 2)); - c->or_(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); - c->mov(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), *addr); - c->xor_(*addr, *addr); - c->mov(SPU_OFF_32(gpr, op.rt, &v128::_u32, 0), *addr); - c->mov(SPU_OFF_32(gpr, op.rt, &v128::_u32, 1), *addr); - c->mov(SPU_OFF_32(gpr, op.rt, &v128::_u32, 2), *addr); - c->unuse(*addr); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& v1 = XmmAlloc(); + c->pshufd(v1, va, 0xb1); + c->por(va, v1); + c->pshufd(v1, va, 0x4e); + c->por(va, v1); + c->pslldq(va, 12); + c->movdqa(SPU_OFF_128(gpr, op.rt), va); } void spu_recompiler::CBD(spu_opcode_t op) @@ -1790,40 +1981,37 @@ void spu_recompiler::CDD(spu_opcode_t op) void spu_recompiler::ROTQBII(spu_opcode_t op) { - c->mov(*qw0, SPU_OFF_64(gpr, op.ra, &v128::_u64, 0)); - c->mov(*qw1, SPU_OFF_64(gpr, op.ra, &v128::_u64, 1)); - c->mov(*qw2, *qw0); - c->shld(*qw0, *qw1, op.i7 & 0x7); - c->shld(*qw1, *qw2, op.i7 & 0x7); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 0), *qw0); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 1), *qw1); - c->unuse(*qw0); - c->unuse(*qw1); - c->unuse(*qw2); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->pshufd(vt, va, 0x4e); // swap 64-bit parts + c->psllq(va, (op.i7 & 0x7)); + c->psrlq(vt, 64 - (op.i7 & 0x7)); + c->por(vt, va); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); } void spu_recompiler::ROTQMBII(spu_opcode_t op) { - c->mov(*qw0, SPU_OFF_64(gpr, op.ra, &v128::_u64, 0)); - c->mov(*qw1, SPU_OFF_64(gpr, op.ra, &v128::_u64, 1)); - c->shrd(*qw0, *qw1, 0-op.i7 & 0x7); - c->shr(*qw1, 0-op.i7 & 0x7); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 0), *qw0); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 1), *qw1); - c->unuse(*qw0); - c->unuse(*qw1); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->movdqa(vt, va); + c->psrldq(vt, 8); + c->psrlq(va, ((0 - op.i7) & 0x7)); + c->psllq(vt, 64 - ((0 - op.i7) & 0x7)); + c->por(vt, va); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); } void spu_recompiler::SHLQBII(spu_opcode_t op) { - c->mov(*qw0, SPU_OFF_64(gpr, op.ra, &v128::_u64, 0)); - c->mov(*qw1, SPU_OFF_64(gpr, op.ra, &v128::_u64, 1)); - c->shld(*qw1, *qw0, op.i7 & 0x7); - c->shl(*qw0, op.i7 & 0x7); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 0), *qw0); - c->mov(SPU_OFF_64(gpr, op.rt, &v128::_u64, 1), *qw1); - c->unuse(*qw0); - c->unuse(*qw1); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vt = XmmAlloc(); + c->movdqa(vt, va); + c->pslldq(vt, 8); + c->psllq(va, (op.i7 & 0x7)); + c->psrlq(vt, 64 - (op.i7 & 0x7)); + c->por(vt, va); + c->movdqa(SPU_OFF_128(gpr, op.rt), vt); } void spu_recompiler::ROTQBYI(spu_opcode_t op) @@ -3290,6 +3478,13 @@ void spu_recompiler::SELB(spu_opcode_t op) return; } + if (utils::has_xop()) + { + c->vpcmov(vc, vb, SPU_OFF_128(gpr, op.ra), vc); + c->movdqa(SPU_OFF_128(gpr, op.rt4), vc); + return; + } + c->pand(vb, vc); c->pandn(vc, SPU_OFF_128(gpr, op.ra)); c->por(vb, vc); @@ -3414,6 +3609,10 @@ void spu_recompiler::SHUFB(spu_opcode_t op) { c->vpternlogd(vc, va, vb, 0xca /* A?B:C */); } + else if (utils::has_xop()) + { + c->vpcmov(vc, va, vb, vc); + } else { c->pand(va, vc); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index eb98662899..ae16a1960e 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1767,6 +1767,17 @@ bool SPUThread::stop_and_signal(u32 code) return true; } + case 0x100: + { + if (ch_out_mbox.get_count()) + { + fmt::throw_exception("STOP code 0x100: Out_MBox is not empty" HERE); + } + + _mm_mfence(); + return true; + } + case 0x101: { /* ===== sys_spu_thread_group_exit ===== */ diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.cpp b/rpcs3/Emu/Cell/lv2/sys_spu.cpp index 0bd4e042b4..7bf7c9f41e 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp @@ -278,16 +278,18 @@ error_code sys_spu_thread_get_exit_status(u32 id, vm::ptr status) const auto thread = idm::get(id); - if (!thread) + if (UNLIKELY(!thread)) { return CELL_ESRCH; } - // TODO: check CELL_ESTAT condition + if (thread->status & SPU_STATUS_STOPPED_BY_STOP) + { + *status = thread->ch_out_mbox.get_value(); + return CELL_OK; + } - *status = thread->ch_out_mbox.pop(*thread); - - return CELL_OK; + return CELL_ESTAT; } error_code sys_spu_thread_group_create(vm::ptr id, u32 num, s32 prio, vm::ptr attr) diff --git a/rpcs3/Emu/Io/KeyboardHandler.h b/rpcs3/Emu/Io/KeyboardHandler.h index 11e2a80c07..d8528982c1 100644 --- a/rpcs3/Emu/Io/KeyboardHandler.h +++ b/rpcs3/Emu/Io/KeyboardHandler.h @@ -248,6 +248,7 @@ struct KbButton struct Keyboard { + bool m_key_repeat; // for future use KbData m_data; KbConfig m_config; std::vector m_buttons; @@ -255,6 +256,7 @@ struct Keyboard Keyboard() : m_data() , m_config() + , m_key_repeat(false) { } }; @@ -274,19 +276,20 @@ public: { for(Keyboard& keyboard : m_keyboards) { + KbData& data = keyboard.m_data; + KbConfig& config = keyboard.m_config; + + // TODO: handle read modes + for(KbButton& button : keyboard.m_buttons) { if(button.m_keyCode != code) continue; - KbData& data = keyboard.m_data; - KbConfig& config = keyboard.m_config; - if (pressed) { // Meta Keys - if (code == 308 || code == 307 || code == 306 || - code == 393 || code == 396 || code == 394) + if (code == 308 || code == 307 || code == 306 || code == 393 || code == 396 || code == 394) { data.mkey |= button.m_outKeyCode; } @@ -310,17 +313,17 @@ public: data.len++; } } - - if (!pressed) + else { // Meta Keys - if (code == 308 || code == 307 || code == 306 || - code == 393 || code == 396 || code == 394) + if (code == 308 || code == 307 || code == 306 || code == 393 || code == 396 || code == 394) { data.mkey &= ~button.m_outKeyCode; } + // Needed to indicate key releases. Without this you have to tap another key before using the same key again + data.keycode[0] = CELL_KEYC_NO_EVENT; + data.len = 1; } - } } } diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index 1d00c820be..b461283efa 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -40,15 +40,48 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask) { if (dst.exp_tex) { - //If dst.exp_tex really is _bx2 postfix, we need to unpack dynamic range + //Expand [0,1] to [-1, 1]. Confirmed by Castlevania: LOS AddCode("//exp tex flag is set"); code = "((" + code + "- 0.5) * 2.)"; } if (dst.saturate) + { code = saturate(code); - else - code = ClampValue(code, dst.prec); + } + else if (dst.prec) + { + switch (dst.opcode) + { + case RSX_FP_OPCODE_NRM: + case RSX_FP_OPCODE_MAX: + case RSX_FP_OPCODE_MIN: + case RSX_FP_OPCODE_COS: + case RSX_FP_OPCODE_SIN: + case RSX_FP_OPCODE_REFL: + case RSX_FP_OPCODE_EX2: + case RSX_FP_OPCODE_FRC: + case RSX_FP_OPCODE_LIT: + case RSX_FP_OPCODE_LIF: + case RSX_FP_OPCODE_LRP: + case RSX_FP_OPCODE_LG2: + break; + case RSX_FP_OPCODE_MOV: + //NOTE: Sometimes varying inputs from VS are out of range so do not exempt any input types, unless fp16 (Naruto UNS) + if (dst.fp16 && src0.fp16 && src0.reg_type == RSX_FP_REGISTER_TYPE_TEMP) + break; + default: + { + //fp16 precsion flag on f32 register; ignore + if (dst.prec == 1 && !dst.fp16) + break; + + //clamp value to allowed range + code = ClampValue(code, dst.prec); + break; + } + } + } } code += (append_mask ? "$m" : ""); @@ -195,12 +228,12 @@ std::string FragmentProgramDecompiler::AddX2d() //Failure to catch causes infinite values since theres alot of rcp(0) std::string FragmentProgramDecompiler::NotZero(const std::string& code) { - return "(max(abs(" + code + "), 0.000001) * sign(" + code + "))"; + return "(max(abs(" + code + "), 0.0000000001) * sign(" + code + "))"; } std::string FragmentProgramDecompiler::NotZeroPositive(const std::string& code) { - return "max(abs(" + code + "), 0.000001)"; + return "max(abs(" + code + "), 0.0000000001)"; } std::string FragmentProgramDecompiler::ClampValue(const std::string& code, u32 precision) @@ -385,7 +418,10 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) auto ® = temp_registers[src.tmp_reg_index]; if (reg.requires_gather(xy_read, zw_read)) + { + properties.has_gather_op = true; AddCode(reg.gather_r()); + } } } @@ -407,7 +443,10 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) switch (dst.src_attr_reg_num) { - case 0x00: ret += reg_table[0]; break; + case 0x00: + ret += reg_table[0]; + properties.has_wpos_input = true; + break; default: if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0])) { @@ -478,24 +517,28 @@ std::string FragmentProgramDecompiler::BuildCode() //Insert global function definitions insertGlobalFunctions(OS); - std::string float2 = getFloatTypeName(2); - std::string float4 = getFloatTypeName(4); + //Declare register gather/merge if needed + if (properties.has_gather_op) + { + std::string float2 = getFloatTypeName(2); + std::string float4 = getFloatTypeName(4); - OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n"; - OS << "{\n"; - OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n"; - OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n"; - OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n"; - OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n"; - OS << " return " << float4 << "(x, y, z, w);\n"; - OS << "}\n\n"; + OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n"; + OS << "{\n"; + OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n"; + OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n"; + OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n"; + OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n"; + OS << " return " << float4 << "(x, y, z, w);\n"; + OS << "}\n\n"; - OS << float2 << " gather(" << float4 << " _h)\n"; - OS << "{\n"; - OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n"; - OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n"; - OS << " return " << float2 << "(x, y);\n"; - OS << "}\n\n"; + OS << float2 << " gather(" << float4 << " _h)\n"; + OS << "{\n"; + OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n"; + OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n"; + OS << " return " << float2 << "(x, y);\n"; + OS << "}\n\n"; + } insertMainStart(OS); OS << main << std::endl; @@ -558,7 +601,10 @@ bool FragmentProgramDecompiler::handle_scb(u32 opcode) case RSX_FP_OPCODE_EX2: SetDst("exp2($0.xxxx)"); return true; case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true; case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true; - case RSX_FP_OPCODE_LIT: SetDst("lit_legacy($0)"); return true; + case RSX_FP_OPCODE_LIT: + SetDst("lit_legacy($0)"); + properties.has_lit_op = true; + return true; case RSX_FP_OPCODE_LIF: SetDst(getFloatTypeName(4) + "(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); return true; case RSX_FP_OPCODE_LRP: SetDst(getFloatTypeName(4) + "($2 * (1 - $0) + $1 * $0)"); return true; case RSX_FP_OPCODE_LG2: SetDst("log2(" + NotZeroPositive("$0.x") + ").xxxx"); return true; diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h index d88cdd70e2..9eb1fe82a1 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h @@ -219,7 +219,16 @@ protected: /** insert end of main function (return value, output copy...) */ virtual void insertMainEnd(std::stringstream &OS) = 0; + public: + struct + { + bool has_lit_op = false; + bool has_gather_op = false; + bool has_wpos_input = false; + } + properties; + ParamArray m_parr; FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size); FragmentProgramDecompiler(const FragmentProgramDecompiler&) = delete; diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index 58464933ad..39642cec71 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -281,31 +281,18 @@ namespace glsl OS << " if (desc.attribute_size == 0)\n"; OS << " {\n"; OS << " //default values\n"; - OS << " switch (location)\n"; - OS << " {\n"; - OS << " case 0:\n"; - OS << " //position\n"; - OS << " return vec4(0., 0., 0., 1.);\n"; - OS << " case 1:\n"; - OS << " case 2:\n"; - OS << " //weight, normals\n"; - OS << " return vec4(0.);\n"; - OS << " case 3:\n"; - OS << " //diffuse\n"; - OS << " return vec4(1.);\n"; - OS << " case 4:\n"; - OS << " //specular\n"; - OS << " return vec4(0.);\n"; - OS << " case 5:\n"; - OS << " //fog\n"; - OS << " return vec4(0.);\n"; - OS << " case 6:\n"; - OS << " //point size\n"; - OS << " return vec4(1.);\n"; - OS << " default:\n"; - OS << " //mostly just texture coordinates\n"; - OS << " return vec4(0.);\n"; - OS << " }\n"; + OS << " const vec4 defaults[] = \n"; + OS << " { vec4(0., 0., 0., 1.), //position\n"; + OS << " vec4(0.), vec4(0.), //weight, normals\n"; + OS << " vec4(1.), //diffuse\n"; + OS << " vec4(0.), vec4(0.), //specular, fog\n"; + OS << " vec4(1.), //point size\n"; + OS << " vec4(0.), //in_7\n"; + OS << " //in_tc registers\n"; + OS << " vec4(0.), vec4(0.), vec4(0.), vec4(0.),\n"; + OS << " vec4(0.), vec4(0.), vec4(0.), vec4(0.)\n"; + OS << " };\n"; + OS << " return defaults[location];\n"; OS << " }\n\n"; OS << " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n"; OS << " if (desc.frequency == 0)\n"; @@ -326,84 +313,95 @@ namespace glsl OS << "}\n\n"; } - static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain) + static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false) { - OS << "vec4 lit_legacy(vec4 val)"; - OS << "{\n"; - OS << " vec4 clamped_val = val;\n"; - OS << " clamped_val.x = max(val.x, 0.);\n"; - OS << " clamped_val.y = max(val.y, 0.);\n"; - OS << " vec4 result;\n"; - OS << " result.x = 1.;\n"; - OS << " result.w = 1.;\n"; - OS << " result.y = clamped_val.x;\n"; - OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.000001))) : 0.;\n"; - OS << " return result;\n"; - OS << "}\n\n"; - - OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n"; - OS << "{\n"; - OS << " float d = pos.z / pos.w;\n"; - OS << " if (d < 0.f && d >= near_plane)\n"; - OS << " d = 0.f;\n"; //force clamp negative values - OS << " else if (d > 1.f && d <= far_plane)\n"; - OS << " d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n"; - OS << " else\n"; - OS << " return pos; //d = (0.99 * d);\n"; //range compression for normal values is disabled until a solution to ops comparing z is found - OS << "\n"; - OS << " pos.z = d * pos.w;\n"; - OS << " return pos;\n"; - OS << "}\n\n"; + if (require_lit_emulation) + { + OS << "vec4 lit_legacy(vec4 val)"; + OS << "{\n"; + OS << " vec4 clamped_val = val;\n"; + OS << " clamped_val.x = max(val.x, 0.);\n"; + OS << " clamped_val.y = max(val.y, 0.);\n"; + OS << " vec4 result;\n"; + OS << " result.x = 1.;\n"; + OS << " result.w = 1.;\n"; + OS << " result.y = clamped_val.x;\n"; + OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n"; + OS << " return result;\n"; + OS << "}\n\n"; + } if (domain == glsl::program_domain::glsl_vertex_program) + { + OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n"; + OS << "{\n"; + OS << " float d = pos.z / pos.w;\n"; + OS << " if (d < 0.f && d >= near_plane)\n"; + OS << " d = 0.f;\n"; //force clamp negative values + OS << " else if (d > 1.f && d <= far_plane)\n"; + OS << " d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n"; + OS << " else\n"; + OS << " return pos; //d = (0.99 * d);\n"; //range compression for normal values is disabled until a solution to ops comparing z is found + OS << "\n"; + OS << " pos.z = d * pos.w;\n"; + OS << " return pos;\n"; + OS << "}\n\n"; + return; + } program_common::insert_compare_op(OS); - //NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) - //The A component (Z) is useless (should contain stencil8 or just 1) - OS << "vec4 decodeLinearDepth(float depth_value)\n"; - OS << "{\n"; - OS << " uint value = uint(depth_value * 16777215);\n"; - OS << " uint b = (value & 0xff);\n"; - OS << " uint g = (value >> 8) & 0xff;\n"; - OS << " uint r = (value >> 16) & 0xff;\n"; - OS << " return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n"; - OS << "}\n\n"; + if (require_depth_conversion) + { + //NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) + //The A component (Z) is useless (should contain stencil8 or just 1) + OS << "vec4 decodeLinearDepth(float depth_value)\n"; + OS << "{\n"; + OS << " uint value = uint(depth_value * 16777215);\n"; + OS << " uint b = (value & 0xff);\n"; + OS << " uint g = (value >> 8) & 0xff;\n"; + OS << " uint r = (value >> 16) & 0xff;\n"; + OS << " return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n"; + OS << "}\n\n"; - OS << "float read_value(vec4 src, uint remap_index)\n"; - OS << "{\n"; - OS << " switch (remap_index)\n"; - OS << " {\n"; - OS << " case 0: return src.a;\n"; - OS << " case 1: return src.r;\n"; - OS << " case 2: return src.g;\n"; - OS << " case 3: return src.b;\n"; - OS << " }\n"; - OS << "}\n\n"; + OS << "float read_value(vec4 src, uint remap_index)\n"; + OS << "{\n"; + OS << " switch (remap_index)\n"; + OS << " {\n"; + OS << " case 0: return src.a;\n"; + OS << " case 1: return src.r;\n"; + OS << " case 2: return src.g;\n"; + OS << " case 3: return src.b;\n"; + OS << " }\n"; + OS << "}\n\n"; - OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord, float remap)\n"; - OS << "{\n"; - OS << " vec4 result = decodeLinearDepth(texture(tex, coord.xy).r);\n"; - OS << " uint remap_vector = floatBitsToUint(remap) & 0xFF;\n"; - OS << " if (remap_vector == 0xE4) return result;\n\n"; - OS << " vec4 tmp;\n"; - OS << " uint remap_a = remap_vector & 0x3;\n"; - OS << " uint remap_r = (remap_vector >> 2) & 0x3;\n"; - OS << " uint remap_g = (remap_vector >> 4) & 0x3;\n"; - OS << " uint remap_b = (remap_vector >> 6) & 0x3;\n"; - OS << " tmp.a = read_value(result, remap_a);\n"; - OS << " tmp.r = read_value(result, remap_r);\n"; - OS << " tmp.g = read_value(result, remap_g);\n"; - OS << " tmp.b = read_value(result, remap_b);\n"; - OS << " return tmp;\n"; - OS << "}\n\n"; + OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord, float remap)\n"; + OS << "{\n"; + OS << " vec4 result = decodeLinearDepth(texture(tex, coord.xy).r);\n"; + OS << " uint remap_vector = floatBitsToUint(remap) & 0xFF;\n"; + OS << " if (remap_vector == 0xE4) return result;\n\n"; + OS << " vec4 tmp;\n"; + OS << " uint remap_a = remap_vector & 0x3;\n"; + OS << " uint remap_r = (remap_vector >> 2) & 0x3;\n"; + OS << " uint remap_g = (remap_vector >> 4) & 0x3;\n"; + OS << " uint remap_b = (remap_vector >> 6) & 0x3;\n"; + OS << " tmp.a = read_value(result, remap_a);\n"; + OS << " tmp.r = read_value(result, remap_r);\n"; + OS << " tmp.g = read_value(result, remap_g);\n"; + OS << " tmp.b = read_value(result, remap_b);\n"; + OS << " return tmp;\n"; + OS << "}\n\n"; + } - OS << "vec4 get_wpos()\n"; - OS << "{\n"; - OS << " float abs_scale = abs(wpos_scale);\n"; - OS << " return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n"; - OS << "}\n\n"; + if (require_wpos) + { + OS << "vec4 get_wpos()\n"; + OS << "{\n"; + OS << " float abs_scale = abs(wpos_scale);\n"; + OS << " return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n"; + OS << "}\n\n"; + } } static void insert_fog_declaration(std::ostream& OS) diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp index 81f5750f8c..b048a29501 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp @@ -376,7 +376,7 @@ void VertexProgramDecompiler::SetDSTSca(const std::string& code) std::string VertexProgramDecompiler::NotZeroPositive(const std::string& code) { - return "max(" + code + ", 0.000001)"; + return "max(" + code + ", 0.0000000001)"; } std::string VertexProgramDecompiler::BuildFuncBody(const FuncInfo& func) @@ -638,7 +638,10 @@ std::string VertexProgramDecompiler::Decompile() case RSX_SCA_OPCODE_RSQ: SetDSTSca("1. / sqrt(" + NotZeroPositive("$s.x") +").xxxx"); break; case RSX_SCA_OPCODE_EXP: SetDSTSca("exp($s)"); break; case RSX_SCA_OPCODE_LOG: SetDSTSca("log($s)"); break; - case RSX_SCA_OPCODE_LIT: SetDSTSca("lit_legacy($s)"); break; + case RSX_SCA_OPCODE_LIT: + SetDSTSca("lit_legacy($s)"); + properties.has_lit_op = true; + break; case RSX_SCA_OPCODE_BRA: { AddCode("$if ($cond) //BRA"); diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h index 14a5b31ee4..b02cac3e6e 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h @@ -126,7 +126,14 @@ protected: /** insert end of main function (return value, output copy...) */ virtual void insertMainEnd(std::stringstream &OS) = 0; + public: + struct + { + bool has_lit_op = false; + } + properties; + VertexProgramDecompiler(const RSXVertexProgram& prog); std::string Decompile(); }; \ No newline at end of file diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 50af451884..2749a6592f 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -274,6 +274,9 @@ namespace rsx //Set when a hw blit engine incompatibility is detected bool blit_engine_incompatibility_warning_raised = false; + //Set when a shader read-only texture data suddenly becomes contested, usually by fbo memory + bool read_only_tex_invalidate = false; + //Memory usage const s32 m_max_zombie_objects = 64; //Limit on how many texture objects to keep around for reuse after they are invalidated std::atomic m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory @@ -289,7 +292,7 @@ namespace rsx const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled, const std::pair, std::array>& remap_vector) = 0; virtual void enforce_surface_creation_type(section_storage_type& section, u32 gcm_format, texture_create_flags expected) = 0; virtual void set_up_remap_vector(section_storage_type& section, const std::pair, std::array>& remap_vector) = 0; - virtual void insert_texture_barrier() = 0; + virtual void insert_texture_barrier(commandbuffer_type&, image_storage_type* tex) = 0; virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::array& sources) = 0; constexpr u32 get_block_size() const { return 0x1000000; } @@ -782,6 +785,7 @@ namespace rsx { //This space was being used for other purposes other than framebuffer storage //Delete used resources before attaching it to framebuffer memory + read_only_tex_invalidate = true; free_texture_section(region); m_texture_memory_in_use -= region.get_section_size(); } @@ -1116,7 +1120,7 @@ namespace rsx } template - sampled_image_descriptor process_framebuffer_resource(render_target_type texptr, u32 texaddr, u32 gcm_format, surface_store_type& m_rtts, + sampled_image_descriptor process_framebuffer_resource(commandbuffer_type& cmd, render_target_type texptr, u32 texaddr, u32 gcm_format, surface_store_type& m_rtts, u16 tex_width, u16 tex_height, rsx::texture_dimension_extended extended_dimension, bool is_depth) { const u32 format = gcm_format & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); @@ -1240,7 +1244,7 @@ namespace rsx else { //issue a texture barrier to ensure previous writes are visible - insert_texture_barrier(); + insert_texture_barrier(cmd, texptr); break; } } @@ -1258,7 +1262,7 @@ namespace rsx else { //issue a texture barrier to ensure previous writes are visible - insert_texture_barrier(); + insert_texture_barrier(cmd, texptr); } } } @@ -1320,7 +1324,7 @@ namespace rsx { if (test_framebuffer(texaddr + texptr->raster_address_offset)) { - return process_framebuffer_resource(texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, extended_dimension, false); + return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, extended_dimension, false); } else { @@ -1333,7 +1337,7 @@ namespace rsx { if (test_framebuffer(texaddr + texptr->raster_address_offset)) { - return process_framebuffer_resource(texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, extended_dimension, true); + return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, extended_dimension, true); } else { @@ -1391,7 +1395,7 @@ namespace rsx if (rsc.is_bound) { LOG_WARNING(RSX, "Sampling from a currently bound render target @ 0x%x", texaddr); - insert_texture_barrier(); + insert_texture_barrier(cmd, rsc.surface); } return{ rsc.surface->get_view(), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, @@ -1875,6 +1879,20 @@ namespace rsx return m_texture_memory_in_use; } + /** + * The read only texture invalidate flag is set if a read only texture is trampled by framebuffer memory + * If set, all cached read only textures are considered invalid and should be re-fetched from the texture cache + */ + virtual void clear_ro_tex_invalidate_intr() + { + read_only_tex_invalidate = false; + } + + virtual bool get_ro_tex_invalidate_intr() const + { + return read_only_tex_invalidate; + } + void tag_framebuffer(u32 texaddr) { if (!g_cfg.video.strict_rendering_mode) diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 3002f1726e..d7142c56ad 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -194,7 +194,7 @@ namespace void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) { - glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program); + glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input); } void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) @@ -248,8 +248,11 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) } } - OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; - OS << " vec4 wpos = get_wpos();\n"; + if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa")) + OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; + + if (properties.has_wpos_input) + OS << " vec4 wpos = get_wpos();\n"; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 7d593570fc..e4c0407395 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -176,21 +176,6 @@ void GLGSRender::begin() init_buffers(rsx::framebuffer_creation_context::context_draw); } -namespace -{ - GLenum get_gl_target_for_texture(const rsx::texture_dimension_extended type) - { - switch (type) - { - case rsx::texture_dimension_extended::texture_dimension_1d: return GL_TEXTURE_1D; - case rsx::texture_dimension_extended::texture_dimension_2d: return GL_TEXTURE_2D; - case rsx::texture_dimension_extended::texture_dimension_cubemap: return GL_TEXTURE_CUBE_MAP; - case rsx::texture_dimension_extended::texture_dimension_3d: return GL_TEXTURE_3D; - } - fmt::throw_exception("Unknown texture target" HERE); - } -} - void GLGSRender::end() { std::chrono::time_point state_check_start = steady_clock::now(); @@ -321,7 +306,7 @@ void GLGSRender::end() if (tex.enabled()) { - GLenum target = get_gl_target_for_texture(sampler_state->image_type); + GLenum target = gl::get_target(sampler_state->image_type); if (sampler_state->image_handle) { glBindTexture(target, sampler_state->image_handle); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 700d2c8ee1..7f741498b4 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -31,7 +31,8 @@ color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_forma return{ ::gl::texture::type::f32, ::gl::texture::format::rgba, true, 4, 4 }; case rsx::surface_color_format::b8: - return{ ::gl::texture::type::ubyte, ::gl::texture::format::r, false, 1, 1 }; + return{ ::gl::texture::type::ubyte, ::gl::texture::format::r, false, 1, 1, + { ::gl::texture::channel::one, ::gl::texture::channel::r, ::gl::texture::channel::r, ::gl::texture::channel::r } }; case rsx::surface_color_format::g8b8: return{ ::gl::texture::type::ubyte, ::gl::texture::format::rg, false, 2, 1 }; @@ -188,10 +189,14 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk //NOTE: Its is possible that some renders are done on a swizzled context. Pitch is meaningless in that case //Seen in Nier (color) and GT HD concept (z buffer) - //Restriction is that the RTT is always a square region for that dimensions are powers of 2 + //Restriction is that the dimensions are powers of 2. Also, dimensions are passed via log2w and log2h entries const auto required_zeta_pitch = std::max((u32)(depth_format == rsx::surface_depth_format::z16 ? clip_horizontal * 2 : clip_horizontal * 4), 64u); const auto required_color_pitch = std::max((u32)rsx::utility::get_packed_pitch(surface_format, clip_horizontal), 64u); const bool stencil_test_enabled = depth_format == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled(); + const auto lg2w = rsx::method_registers.surface_log2_width(); + const auto lg2h = rsx::method_registers.surface_log2_height(); + const auto clipw_log2 = (u32)floor(log2(clip_horizontal)); + const auto cliph_log2 = (u32)floor(log2(clip_vertical)); if (depth_address) { @@ -210,8 +215,21 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk if (depth_address && zeta_pitch < required_zeta_pitch) { - if (zeta_pitch < 64 || clip_vertical != clip_horizontal) + if (lg2w < clipw_log2 || lg2h < cliph_log2) + { + //Cannot fit depth_address = 0; + + if (lg2w > 0 || lg2h > 0) + { + //Something was actually declared for the swizzle context dimensions + LOG_ERROR(RSX, "Invalid swizzled context depth surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_horizontal, clip_vertical); + } + } + else + { + LOG_TRACE(RSX, "Swizzled context depth surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_horizontal, clip_vertical); + } } } @@ -219,8 +237,20 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { if (pitchs[index] < required_color_pitch) { - if (pitchs[index] < 64 || clip_vertical != clip_horizontal) + if (lg2w < clipw_log2 || lg2h < cliph_log2) + { surface_addresses[index] = 0; + + if (lg2w > 0 || lg2h > 0) + { + //Something was actually declared for the swizzle context dimensions + LOG_ERROR(RSX, "Invalid swizzled context color surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_horizontal, clip_vertical); + } + } + else + { + LOG_TRACE(RSX, "Swizzled context color surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_horizontal, clip_vertical); + } } if (surface_addresses[index] == depth_address) @@ -364,6 +394,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk break; } + m_gl_texture_cache.clear_ro_tex_invalidate_intr(); + //Mark buffer regions as NO_ACCESS on Cell visible side if (g_cfg.video.write_color_buffers) { @@ -393,6 +425,12 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk depth_format_gl.format, depth_format_gl.type, true); } } + + if (m_gl_texture_cache.get_ro_tex_invalidate_intr()) + { + //Invalidate cached sampler state + m_samplers_dirty.store(true); + } } std::array, 4> GLGSRender::copy_render_targets_to_memory() diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 41d6d2c6f3..f711325888 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -8,6 +8,18 @@ namespace gl { + GLenum get_target(rsx::texture_dimension_extended type) + { + switch (type) + { + case rsx::texture_dimension_extended::texture_dimension_1d: return GL_TEXTURE_1D; + case rsx::texture_dimension_extended::texture_dimension_2d: return GL_TEXTURE_2D; + case rsx::texture_dimension_extended::texture_dimension_cubemap: return GL_TEXTURE_CUBE_MAP; + case rsx::texture_dimension_extended::texture_dimension_3d: return GL_TEXTURE_3D; + } + fmt::throw_exception("Unknown texture target" HERE); + } + GLenum get_sized_internal_format(u32 texture_format) { switch (texture_format) diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index 7e49de3a93..494886ad73 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -10,6 +10,7 @@ namespace rsx namespace gl { + GLenum get_target(rsx::texture_dimension_extended type); GLenum get_sized_internal_format(u32 gcm_format); std::tuple get_format_type(u32 texture_format); GLenum wrap_mode(rsx::texture_wrap_mode wrap); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 75fb7056d8..263ef35adb 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -765,8 +765,9 @@ namespace gl break; } - glBindTexture(GL_TEXTURE_2D, vram_texture); - apply_component_mapping_flags(GL_TEXTURE_2D, gcm_format, flags); + auto target = gl::get_target(type); + glBindTexture(target, vram_texture); + apply_component_mapping_flags(target, gcm_format, flags); auto& cached = create_texture(vram_texture, rsx_address, rsx_size, width, height, depth, mipmaps); cached.set_dirty(false); @@ -836,7 +837,7 @@ namespace gl section.set_sampler_status(rsx::texture_sampler_status::status_ready); } - void insert_texture_barrier() override + void insert_texture_barrier(void*&, gl::texture*) override { auto &caps = gl::get_driver_caps(); diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index a1acc4c8f7..d8e4f28c04 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -149,7 +149,7 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) { - insert_glsl_legacy_function(OS, glsl::glsl_vertex_program); + insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false); std::string parameters = ""; diff --git a/rpcs3/Emu/RSX/GL/OpenGL.cpp b/rpcs3/Emu/RSX/GL/OpenGL.cpp index d1e99f2ff0..65828dbb39 100644 --- a/rpcs3/Emu/RSX/GL/OpenGL.cpp +++ b/rpcs3/Emu/RSX/GL/OpenGL.cpp @@ -34,6 +34,9 @@ void gl::init() #ifdef __unix__ glewExperimental = true; glewInit(); +#ifndef __APPLE__ + glxewInit(); +#endif #endif } @@ -41,5 +44,19 @@ void gl::set_swapinterval(int interval) { #ifdef _WIN32 wglSwapIntervalEXT(interval); +#elif !defined(__APPLE__) + if (glXSwapIntervalEXT) + { + if (auto window = glXGetCurrentDrawable()) + { + glXSwapIntervalEXT(glXGetCurrentDisplay(), window, interval); + return; + } + } + + //No existing drawable or missing swap extension, EGL? + LOG_ERROR(RSX, "Failed to set swap interval"); +#else + LOG_UNIMPLEMENTED(RSX, "Swap control not implemented for this platform. Vsync options not available."); #endif -} \ No newline at end of file +} diff --git a/rpcs3/Emu/RSX/GL/OpenGL.h b/rpcs3/Emu/RSX/GL/OpenGL.h index 5db08ebfaa..487745b837 100644 --- a/rpcs3/Emu/RSX/GL/OpenGL.h +++ b/rpcs3/Emu/RSX/GL/OpenGL.h @@ -22,6 +22,7 @@ typedef BOOL (WINAPI* PFNWGLSWAPINTERVALEXTPROC) (int interval); #include #else +#include #include #include #include diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 3510b1acca..ecdb5b99cf 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -144,6 +144,10 @@ namespace vk glslang::TProgram program; glslang::TShader shader_object(lang); + + shader_object.setEnvInput(glslang::EShSourceGlsl, lang, glslang::EShClientVulkan, 100); + shader_object.setEnvClient(glslang::EShClientVulkan, 100); + shader_object.setEnvTarget(glslang::EshTargetSpv, 0x00001000); bool success = false; const char *shader_text = shader.data(); @@ -154,11 +158,13 @@ namespace vk if (shader_object.parse(&g_default_config, 400, EProfile::ECoreProfile, false, true, msg)) { program.addShader(&shader_object); - success = program.link(EShMsgVulkanRules); + success = program.link(msg); if (success) { - glslang::TIntermediate* bytes = program.getIntermediate(lang); - glslang::GlslangToSpv(*bytes, spv); + glslang::SpvOptions options; + options.disableOptimizer = false; + options.optimizeSize = true; + glslang::GlslangToSpv(*program.getIntermediate(lang), spv, &options); } } else diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 97035a6b02..551ff533b9 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -197,7 +197,7 @@ namespace vk void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) { - glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program); + glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input); } void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) @@ -251,8 +251,11 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) } } - OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; - OS << " vec4 wpos = get_wpos();\n"; + if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa")) + OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; + + if (properties.has_wpos_input) + OS << " vec4 wpos = get_wpos();\n"; bool two_sided_enabled = m_prog.front_back_color_enabled && (m_prog.back_color_diffuse_output || m_prog.back_color_specular_output); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ec144a6823..beb0fac695 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -84,7 +84,10 @@ namespace vk } case rsx::surface_color_format::b8: - return std::make_pair(VK_FORMAT_R8_UNORM, vk::default_component_map()); + { + VkComponentMapping no_alpha = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }; + return std::make_pair(VK_FORMAT_R8_UNORM, no_alpha); + } case rsx::surface_color_format::g8b8: return std::make_pair(VK_FORMAT_R8G8_UNORM, vk::default_component_map()); @@ -360,23 +363,14 @@ namespace subpass.pColorAttachments = number_of_color_surface > 0 ? attachment_references.data() : nullptr; subpass.pDepthStencilAttachment = depth_format != VK_FORMAT_UNDEFINED ? &attachment_references.back() : nullptr; - VkSubpassDependency dependency = {}; - dependency.srcSubpass = VK_SUBPASS_EXTERNAL; - dependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dependency.dstSubpass = 0; - dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; - VkRenderPassCreateInfo rp_info = {}; rp_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; rp_info.attachmentCount = static_cast(attachments.size()); rp_info.pAttachments = attachments.data(); rp_info.subpassCount = 1; rp_info.pSubpasses = &subpass; - rp_info.pDependencies = &dependency; - rp_info.dependencyCount = 1; + rp_info.pDependencies = nullptr; + rp_info.dependencyCount = 0; VkRenderPass result; CHECK_RESULT(vkCreateRenderPass(dev, &rp_info, NULL, &result)); @@ -1143,8 +1137,22 @@ void VKGSRender::end() if (replace) { - fs_sampler_handles[i] = std::make_unique(*m_device, wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, - min_filter, mag_filter, mip_mode, border_color, compare_enabled, depth_compare_mode); + for (auto &sampler : m_current_frame->samplers_to_clean) + { + if (sampler->matches(wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, + min_filter, mag_filter, mip_mode, border_color, compare_enabled, depth_compare_mode)) + { + fs_sampler_handles[i] = std::move(sampler); + replace = false; + break; + } + } + + if (replace) + { + fs_sampler_handles[i] = std::make_unique(*m_device, wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, + min_filter, mag_filter, mip_mode, border_color, compare_enabled, depth_compare_mode); + } } } else @@ -1188,6 +1196,7 @@ void VKGSRender::end() if (replace) { + //This is unlikely, there is no need to check the dirty pool vs_sampler_handles[i] = std::make_unique( *m_device, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, @@ -2478,10 +2487,14 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) //NOTE: Its is possible that some renders are done on a swizzled context. Pitch is meaningless in that case //Seen in Nier (color) and GT HD concept (z buffer) - //Restriction is that the RTT is always a square region for that dimensions are powers of 2 + //Restriction is that the dimensions are powers of 2. Also, dimensions are passed via log2w and log2h entries const auto required_zeta_pitch = std::max((u32)(depth_fmt == rsx::surface_depth_format::z16 ? clip_width * 2 : clip_width * 4), 64u); const auto required_color_pitch = std::max((u32)rsx::utility::get_packed_pitch(color_fmt, clip_width), 64u); const bool stencil_test_enabled = depth_fmt == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled(); + const auto lg2w = rsx::method_registers.surface_log2_width(); + const auto lg2h = rsx::method_registers.surface_log2_height(); + const auto clipw_log2 = (u32)floor(log2(clip_width)); + const auto cliph_log2 = (u32)floor(log2(clip_height)); if (zeta_address) { @@ -2500,8 +2513,21 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (zeta_address && zeta_pitch < required_zeta_pitch) { - if (zeta_pitch < 64 || clip_width != clip_height) + if (lg2w < clipw_log2 || lg2h < cliph_log2) + { + //Cannot fit zeta_address = 0; + + if (lg2w > 0 || lg2h > 0) + { + //Something was actually declared for the swizzle context dimensions + LOG_ERROR(RSX, "Invalid swizzled context depth surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_width, clip_height); + } + } + else + { + LOG_TRACE(RSX, "Swizzled context depth surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_width, clip_height); + } } } @@ -2509,8 +2535,20 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { if (surface_pitchs[index] < required_color_pitch) { - if (surface_pitchs[index] < 64 || clip_width != clip_height) + if (lg2w < clipw_log2 || lg2h < cliph_log2) + { surface_addresses[index] = 0; + + if (lg2w > 0 || lg2h > 0) + { + //Something was actually declared for the swizzle context dimensions + LOG_ERROR(RSX, "Invalid swizzled context color surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_width, clip_height); + } + } + else + { + LOG_TRACE(RSX, "Swizzled context color surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_width, clip_height); + } } if (surface_addresses[index] == zeta_address) @@ -2990,7 +3028,7 @@ void VKGSRender::flip(int buffer) VkImageLayout target_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; VkImageSubresourceRange range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; - if (aspect_ratio.x) + if (aspect_ratio.x || aspect_ratio.y) { VkClearColorValue clear_black {}; vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range); @@ -3194,4 +3232,4 @@ void VKGSRender::shell_do_cleanup() { //TODO: Guard this m_overlay_cleanup_requests.push_back(0); -} \ No newline at end of file +} diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 0d8dad1ca2..d7044f3daf 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -388,6 +388,45 @@ namespace vk image->current_layout = new_layout; } + void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout layout, VkImageSubresourceRange range) + { + VkImageMemoryBarrier barrier = {}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.newLayout = layout; + barrier.oldLayout = layout; + barrier.image = image; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.subresourceRange = range; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + VkPipelineStageFlags src_stage; + if (range.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) + { + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + } + else + { + barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + } + + vkCmdPipelineBarrier(cmd, src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); + } + + void insert_texture_barrier(VkCommandBuffer cmd, vk::image *image) + { + if (image->info.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + { + insert_texture_barrier(cmd, image->value, image->current_layout, { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 }); + } + else + { + insert_texture_barrier(cmd, image->value, image->current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + } + } + void enter_uninterruptible() { g_cb_no_interrupt_flag = true; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index f0400d2188..f69ff3decf 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -90,6 +90,10 @@ namespace vk std::pair get_compatible_surface_format(rsx::surface_color_format color_format); size_t get_render_pass_location(VkFormat color_surface_format, VkFormat depth_stencil_format, u8 color_surface_count); + //Texture barrier applies to a texture to ensure writes to it are finished before any reads are attempted to avoid RAW hazards + void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout layout, VkImageSubresourceRange range); + void insert_texture_barrier(VkCommandBuffer cmd, vk::image *image); + void enter_uninterruptible(); void leave_uninterruptible(); bool is_uninterruptible(); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 2917e0fffb..ca9f2f6562 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -831,8 +831,10 @@ namespace vk section.set_sampler_status(rsx::texture_sampler_status::status_ready); } - void insert_texture_barrier() override - {} + void insert_texture_barrier(vk::command_buffer& cmd, vk::image* tex) override + { + vk::insert_texture_barrier(cmd, tex); + } public: diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index bfa54106bb..df032ac84a 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -185,7 +185,7 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) { - glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program); + glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv); std::string parameters = ""; diff --git a/rpcs3/Emu/RSX/overlay_controls.h b/rpcs3/Emu/RSX/overlay_controls.h index 2046c0ae81..8fd4eede57 100644 --- a/rpcs3/Emu/RSX/overlay_controls.h +++ b/rpcs3/Emu/RSX/overlay_controls.h @@ -472,6 +472,7 @@ namespace rsx //Resource was not found in config dir, try and grab from relative path (linux) info = std::make_unique(("Icons/ui/" + res).c_str()); #ifndef _WIN32 + // Check for Icons in ../share/rpcs3 for AppImages and /usr/bin/ if (info->data == nullptr) { char result[ PATH_MAX ]; @@ -483,6 +484,11 @@ namespace rsx std::string executablePath = dirname(result); info = std::make_unique((executablePath + "/../share/rpcs3/Icons/ui/" + res).c_str()); + // Check if the icons are in the same directory as the executable (local builds) + if (info->data == nullptr) + { + info = std::make_unique((executablePath + "/Icons/ui/" + res).c_str()); + } } #endif if (info->data != nullptr) @@ -1331,6 +1337,8 @@ namespace rsx s16 m_selected_entry = -1; u16 m_elements_count = 0; + bool m_cancel_only = false; + public: list_view(u16 width, u16 height) { @@ -1461,6 +1469,17 @@ namespace rsx return m_items[m_selected_entry]->text; } + void set_cancel_only(bool cancel_only) + { + if (cancel_only) + m_cancel_btn->set_pos(x + 30, y + h + 20); + else + m_cancel_btn->set_pos(x + 180, y + h + 20); + + m_cancel_only = cancel_only; + is_compiled = false; + } + void translate(s16 _x, s16 _y) override { layout_container::translate(_x, _y); @@ -1478,9 +1497,11 @@ namespace rsx compiled.add(m_highlight_box->get_compiled()); compiled.add(m_scroll_indicator_top->get_compiled()); compiled.add(m_scroll_indicator_bottom->get_compiled()); - compiled.add(m_accept_btn->get_compiled()); compiled.add(m_cancel_btn->get_compiled()); + if (!m_cancel_only) + compiled.add(m_accept_btn->get_compiled()); + compiled_resources = compiled; } diff --git a/rpcs3/Emu/RSX/overlays.h b/rpcs3/Emu/RSX/overlays.h index 8539186d0e..45c89ba92a 100644 --- a/rpcs3/Emu/RSX/overlays.h +++ b/rpcs3/Emu/RSX/overlays.h @@ -254,6 +254,7 @@ namespace rsx std::unique_ptr m_list; std::unique_ptr