mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-20 19:44:46 +00:00
Merge branch 'shadps4-emu:main' into allocate-fixes
This commit is contained in:
commit
6bd4c6b02f
105 changed files with 21471 additions and 19128 deletions
82
.github/workflows/build.yml
vendored
82
.github/workflows/build.yml
vendored
|
@ -14,14 +14,14 @@ env:
|
|||
|
||||
jobs:
|
||||
reuse:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: fsfe/reuse-action@v5
|
||||
|
||||
clang-format:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -39,7 +39,7 @@ jobs:
|
|||
run: ./.ci/clang-format.sh
|
||||
|
||||
get-info:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
outputs:
|
||||
date: ${{ steps.vars.outputs.date }}
|
||||
shorthash: ${{ steps.vars.outputs.shorthash }}
|
||||
|
@ -57,7 +57,7 @@ jobs:
|
|||
echo "fullhash=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
|
||||
|
||||
windows-sdl:
|
||||
runs-on: windows-latest
|
||||
runs-on: windows-2025
|
||||
needs: get-info
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -101,7 +101,7 @@ jobs:
|
|||
path: ${{github.workspace}}/build/shadPS4.exe
|
||||
|
||||
windows-qt:
|
||||
runs-on: windows-latest
|
||||
runs-on: windows-2025
|
||||
needs: get-info
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -376,6 +376,78 @@ jobs:
|
|||
name: shadps4-linux-qt-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
|
||||
path: Shadps4-qt.AppImage
|
||||
|
||||
linux-sdl-gcc:
|
||||
runs-on: ubuntu-24.04
|
||||
needs: get-info
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Install dependencies
|
||||
run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev
|
||||
|
||||
- name: Cache CMake Configuration
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration
|
||||
with:
|
||||
path: |
|
||||
${{github.workspace}}/build
|
||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
restore-keys: |
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.14
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
|
||||
with:
|
||||
append-timestamp: false
|
||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
|
||||
|
||||
linux-qt-gcc:
|
||||
runs-on: ubuntu-24.04
|
||||
needs: get-info
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Install dependencies
|
||||
run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev
|
||||
|
||||
- name: Cache CMake Configuration
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-qt-cache-cmake-configuration
|
||||
with:
|
||||
path: |
|
||||
${{github.workspace}}/build
|
||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
restore-keys: |
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.14
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-qt-cache-cmake-build
|
||||
with:
|
||||
append-timestamp: false
|
||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
|
||||
|
||||
pre-release:
|
||||
if: github.ref == 'refs/heads/main' && github.repository == 'shadps4-emu/shadPS4' && github.event_name == 'push'
|
||||
needs: [get-info, windows-sdl, windows-qt, macos-sdl, macos-qt, linux-sdl, linux-qt]
|
||||
|
|
|
@ -336,6 +336,8 @@ set(SYSTEM_LIBS src/core/libraries/system/commondialog.cpp
|
|||
src/core/libraries/share_play/shareplay.h
|
||||
src/core/libraries/razor_cpu/razor_cpu.cpp
|
||||
src/core/libraries/razor_cpu/razor_cpu.h
|
||||
src/core/libraries/mouse/mouse.cpp
|
||||
src/core/libraries/mouse/mouse.h
|
||||
)
|
||||
|
||||
set(VIDEOOUT_LIB src/core/libraries/videoout/buffer.h
|
||||
|
@ -413,7 +415,9 @@ set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp
|
|||
src/core/libraries/videodec/videodec_impl.h
|
||||
)
|
||||
|
||||
set(NP_LIBS src/core/libraries/np_manager/np_manager.cpp
|
||||
set(NP_LIBS src/core/libraries/np_common/np_common.cpp
|
||||
src/core/libraries/np_common/np_common.h
|
||||
src/core/libraries/np_manager/np_manager.cpp
|
||||
src/core/libraries/np_manager/np_manager.h
|
||||
src/core/libraries/np_score/np_score.cpp
|
||||
src/core/libraries/np_score/np_score.h
|
||||
|
@ -1040,7 +1044,6 @@ install(TARGETS shadps4 BUNDLE DESTINATION .)
|
|||
|
||||
if (ENABLE_QT_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
install(FILES "dist/net.shadps4.shadPS4.desktop" DESTINATION "share/applications")
|
||||
install(FILES "dist/net.shadps4.shadPS4.releases.xml" DESTINATION "share/metainfo/releases")
|
||||
install(FILES "dist/net.shadps4.shadPS4.metainfo.xml" DESTINATION "share/metainfo")
|
||||
install(FILES ".github/shadps4.png" DESTINATION "share/icons/hicolor/512x512/apps" RENAME "net.shadps4.shadPS4.png")
|
||||
install(FILES "src/images/net.shadps4.shadPS4.svg" DESTINATION "share/icons/hicolor/scalable/apps")
|
||||
|
|
|
@ -11,7 +11,6 @@ path = [
|
|||
"dist/net.shadps4.shadPS4.desktop",
|
||||
"dist/net.shadps4.shadPS4_metadata.pot",
|
||||
"dist/net.shadps4.shadPS4.metainfo.xml",
|
||||
"dist/net.shadps4.shadPS4.releases.xml",
|
||||
"documents/changelog.md",
|
||||
"documents/Quickstart/2.png",
|
||||
"documents/Screenshots/*",
|
||||
|
|
27
dist/net.shadps4.shadPS4.metainfo.xml
vendored
27
dist/net.shadps4.shadPS4.metainfo.xml
vendored
|
@ -36,9 +36,30 @@
|
|||
<categories>
|
||||
<category translate="no">Game</category>
|
||||
</categories>
|
||||
<releases type="external" url="https://cdn.jsdelivr.net/gh/fpiesche/flatpak-builds/apps/net.shadps4.shadPS4/net.shadps4.shadPS4.releases.xml">
|
||||
<release version="v.0.4.0" date="2024-11-03">
|
||||
<description></description>
|
||||
<releases>
|
||||
<release version="0.5.0" date="2024-12-25">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.5.0</url>
|
||||
</release>
|
||||
<release version="0.4.0" date="2024-10-31">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.4.0</url>
|
||||
</release>
|
||||
<release version="0.3.0" date="2024-09-23">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.3.0</url>
|
||||
</release>
|
||||
<release version="0.2.0" date="2024-08-15">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.2.0</url>
|
||||
</release>
|
||||
<release version="0.1.0" date="2024-07-01">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/0.1.0</url>
|
||||
</release>
|
||||
<release version="0.0.3" date="2024-03-23">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.3</url>
|
||||
</release>
|
||||
<release version="0.0.2" date="2023-10-21">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.2</url>
|
||||
</release>
|
||||
<release version="0.0.1" date="2024-09-29">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.1</url>
|
||||
</release>
|
||||
</releases>
|
||||
<content_rating type="oars-1.1"/>
|
||||
|
|
23
dist/net.shadps4.shadPS4.releases.xml
vendored
23
dist/net.shadps4.shadPS4.releases.xml
vendored
|
@ -1,23 +0,0 @@
|
|||
<releases>
|
||||
<release version="0.4.0" date="2024-10-31">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.4.0</url>
|
||||
</release>
|
||||
<release version="0.3.0" date="2024-09-23">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.3.0</url>
|
||||
</release>
|
||||
<release version="0.2.0" date="2024-08-15">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.2.0</url>
|
||||
</release>
|
||||
<release version="0.1.0" date="2024-07-01">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/0.1.0</url>
|
||||
</release>
|
||||
<release version="0.0.3" date="2024-03-23">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.3</url>
|
||||
</release>
|
||||
<release version="0.0.2" date="2023-10-21">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.2</url>
|
||||
</release>
|
||||
<release version="0.0.1" date="2024-09-29">
|
||||
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.1</url>
|
||||
</release>
|
||||
</releases>
|
2
externals/CMakeLists.txt
vendored
2
externals/CMakeLists.txt
vendored
|
@ -213,9 +213,7 @@ endif()
|
|||
|
||||
# Discord RPC
|
||||
if (ENABLE_DISCORD_RPC)
|
||||
set(BUILD_EXAMPLES OFF)
|
||||
add_subdirectory(discord-rpc)
|
||||
target_include_directories(discord-rpc INTERFACE discord-rpc/include)
|
||||
endif()
|
||||
|
||||
# GCN Headers
|
||||
|
|
2
externals/discord-rpc
vendored
2
externals/discord-rpc
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 4ec218155d73bcb8022f8f7ca72305d801f84beb
|
||||
Subproject commit 51b09d426a4a1bcfa6ee6d4894e57d669f4a2e65
|
2
externals/sirit
vendored
2
externals/sirit
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35
|
||||
Subproject commit 26ad5a9d0fe13260b0d7d6c64419d01a196b2e32
|
|
@ -33,6 +33,7 @@ namespace Config {
|
|||
|
||||
static bool isNeo = false;
|
||||
static bool isFullscreen = false;
|
||||
static std::string fullscreenMode = "borderless";
|
||||
static bool playBGM = false;
|
||||
static bool isTrophyPopupDisabled = false;
|
||||
static int BGMvolume = 50;
|
||||
|
@ -47,6 +48,7 @@ static std::string updateChannel;
|
|||
static std::string backButtonBehavior = "left";
|
||||
static bool useSpecialPad = false;
|
||||
static int specialPadClass = 1;
|
||||
static bool isMotionControlsEnabled = true;
|
||||
static bool isDebugDump = false;
|
||||
static bool isShaderDebug = false;
|
||||
static bool isShowSplash = false;
|
||||
|
@ -104,10 +106,14 @@ bool isNeoModeConsole() {
|
|||
return isNeo;
|
||||
}
|
||||
|
||||
bool isFullscreenMode() {
|
||||
bool getIsFullscreen() {
|
||||
return isFullscreen;
|
||||
}
|
||||
|
||||
std::string getFullscreenMode() {
|
||||
return fullscreenMode;
|
||||
}
|
||||
|
||||
bool getisTrophyPopupDisabled() {
|
||||
return isTrophyPopupDisabled;
|
||||
}
|
||||
|
@ -172,6 +178,10 @@ int getSpecialPadClass() {
|
|||
return specialPadClass;
|
||||
}
|
||||
|
||||
bool getIsMotionControlsEnabled() {
|
||||
return isMotionControlsEnabled;
|
||||
}
|
||||
|
||||
bool debugDump() {
|
||||
return isDebugDump;
|
||||
}
|
||||
|
@ -304,10 +314,14 @@ void setVblankDiv(u32 value) {
|
|||
vblankDivider = value;
|
||||
}
|
||||
|
||||
void setFullscreenMode(bool enable) {
|
||||
void setIsFullscreen(bool enable) {
|
||||
isFullscreen = enable;
|
||||
}
|
||||
|
||||
void setFullscreenMode(std::string mode) {
|
||||
fullscreenMode = mode;
|
||||
}
|
||||
|
||||
void setisTrophyPopupDisabled(bool disable) {
|
||||
isTrophyPopupDisabled = disable;
|
||||
}
|
||||
|
@ -368,6 +382,10 @@ void setSpecialPadClass(int type) {
|
|||
specialPadClass = type;
|
||||
}
|
||||
|
||||
void setIsMotionControlsEnabled(bool use) {
|
||||
isMotionControlsEnabled = use;
|
||||
}
|
||||
|
||||
void setSeparateUpdateEnabled(bool use) {
|
||||
separateupdatefolder = use;
|
||||
}
|
||||
|
@ -566,6 +584,7 @@ void load(const std::filesystem::path& path) {
|
|||
|
||||
isNeo = toml::find_or<bool>(general, "isPS4Pro", false);
|
||||
isFullscreen = toml::find_or<bool>(general, "Fullscreen", false);
|
||||
fullscreenMode = toml::find_or<std::string>(general, "FullscreenMode", "borderless");
|
||||
playBGM = toml::find_or<bool>(general, "playBGM", false);
|
||||
isTrophyPopupDisabled = toml::find_or<bool>(general, "isTrophyPopupDisabled", false);
|
||||
BGMvolume = toml::find_or<int>(general, "BGMvolume", 50);
|
||||
|
@ -594,6 +613,7 @@ void load(const std::filesystem::path& path) {
|
|||
backButtonBehavior = toml::find_or<std::string>(input, "backButtonBehavior", "left");
|
||||
useSpecialPad = toml::find_or<bool>(input, "useSpecialPad", false);
|
||||
specialPadClass = toml::find_or<int>(input, "specialPadClass", 1);
|
||||
isMotionControlsEnabled = toml::find_or<bool>(input, "isMotionControlsEnabled", true);
|
||||
}
|
||||
|
||||
if (data.contains("GPU")) {
|
||||
|
@ -691,6 +711,7 @@ void save(const std::filesystem::path& path) {
|
|||
|
||||
data["General"]["isPS4Pro"] = isNeo;
|
||||
data["General"]["Fullscreen"] = isFullscreen;
|
||||
data["General"]["FullscreenMode"] = fullscreenMode;
|
||||
data["General"]["isTrophyPopupDisabled"] = isTrophyPopupDisabled;
|
||||
data["General"]["playBGM"] = playBGM;
|
||||
data["General"]["BGMvolume"] = BGMvolume;
|
||||
|
@ -709,6 +730,7 @@ void save(const std::filesystem::path& path) {
|
|||
data["Input"]["backButtonBehavior"] = backButtonBehavior;
|
||||
data["Input"]["useSpecialPad"] = useSpecialPad;
|
||||
data["Input"]["specialPadClass"] = specialPadClass;
|
||||
data["Input"]["isMotionControlsEnabled"] = isMotionControlsEnabled;
|
||||
data["GPU"]["screenWidth"] = screenWidth;
|
||||
data["GPU"]["screenHeight"] = screenHeight;
|
||||
data["GPU"]["nullGpu"] = isNullGpu;
|
||||
|
|
|
@ -17,9 +17,9 @@ void saveMainWindow(const std::filesystem::path& path);
|
|||
|
||||
std::string getTrophyKey();
|
||||
void setTrophyKey(std::string key);
|
||||
|
||||
bool getIsFullscreen();
|
||||
std::string getFullscreenMode();
|
||||
bool isNeoModeConsole();
|
||||
bool isFullscreenMode();
|
||||
bool getPlayBGM();
|
||||
int getBGMvolume();
|
||||
bool getisTrophyPopupDisabled();
|
||||
|
@ -38,6 +38,7 @@ int getCursorHideTimeout();
|
|||
std::string getBackButtonBehavior();
|
||||
bool getUseSpecialPad();
|
||||
int getSpecialPadClass();
|
||||
bool getIsMotionControlsEnabled();
|
||||
|
||||
u32 getScreenWidth();
|
||||
u32 getScreenHeight();
|
||||
|
@ -65,7 +66,8 @@ void setVblankDiv(u32 value);
|
|||
void setGpuId(s32 selectedGpuId);
|
||||
void setScreenWidth(u32 width);
|
||||
void setScreenHeight(u32 height);
|
||||
void setFullscreenMode(bool enable);
|
||||
void setIsFullscreen(bool enable);
|
||||
void setFullscreenMode(std::string mode);
|
||||
void setisTrophyPopupDisabled(bool disable);
|
||||
void setPlayBGM(bool enable);
|
||||
void setBGMvolume(int volume);
|
||||
|
@ -84,6 +86,7 @@ void setCursorHideTimeout(int newcursorHideTimeout);
|
|||
void setBackButtonBehavior(const std::string& type);
|
||||
void setUseSpecialPad(bool use);
|
||||
void setSpecialPadClass(int type);
|
||||
void setIsMotionControlsEnabled(bool use);
|
||||
|
||||
void setLogType(const std::string& type);
|
||||
void setLogFilter(const std::string& type);
|
||||
|
@ -139,4 +142,4 @@ void setDefaultValues();
|
|||
|
||||
// settings
|
||||
u32 GetLanguage();
|
||||
}; // namespace Config
|
||||
}; // namespace Config
|
||||
|
|
|
@ -111,7 +111,7 @@ public:
|
|||
return raw_firmware_ver;
|
||||
}
|
||||
|
||||
[[nodiscard]] const PSFAttributes& PSFAttributes() const {
|
||||
[[nodiscard]] const PSFAttributes& GetPSFAttributes() const {
|
||||
ASSERT(initialized);
|
||||
return psf_attributes;
|
||||
}
|
||||
|
|
|
@ -98,6 +98,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
|
|||
SUB(Lib, Ssl) \
|
||||
SUB(Lib, SysModule) \
|
||||
SUB(Lib, Move) \
|
||||
SUB(Lib, NpCommon) \
|
||||
SUB(Lib, NpManager) \
|
||||
SUB(Lib, NpScore) \
|
||||
SUB(Lib, NpTrophy) \
|
||||
|
@ -126,6 +127,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
|
|||
SUB(Lib, Vdec2) \
|
||||
SUB(Lib, Videodec) \
|
||||
SUB(Lib, RazorCpu) \
|
||||
SUB(Lib, Mouse) \
|
||||
CLS(Frontend) \
|
||||
CLS(Render) \
|
||||
SUB(Render, Vulkan) \
|
||||
|
|
|
@ -65,6 +65,7 @@ enum class Class : u8 {
|
|||
Lib_Ssl, ///< The LibSceSsl implementation.
|
||||
Lib_Http, ///< The LibSceHttp implementation.
|
||||
Lib_SysModule, ///< The LibSceSysModule implementation
|
||||
Lib_NpCommon, ///< The LibSceNpCommon implementation
|
||||
Lib_NpManager, ///< The LibSceNpManager implementation
|
||||
Lib_NpScore, ///< The LibSceNpScore implementation
|
||||
Lib_NpTrophy, ///< The LibSceNpTrophy implementation
|
||||
|
@ -93,6 +94,7 @@ enum class Class : u8 {
|
|||
Lib_Vdec2, ///< The LibSceVideodec2 implementation.
|
||||
Lib_Videodec, ///< The LibSceVideodec implementation.
|
||||
Lib_RazorCpu, ///< The LibRazorCpu implementation.
|
||||
Lib_Mouse, ///< The LibSceMouse implementation
|
||||
Frontend, ///< Emulator UI
|
||||
Render, ///< Video Core
|
||||
Render_Vulkan, ///< Vulkan backend
|
||||
|
|
|
@ -40,7 +40,8 @@ void MntPoints::UnmountAll() {
|
|||
m_mnt_pairs.clear();
|
||||
}
|
||||
|
||||
std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_read_only) {
|
||||
std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_read_only,
|
||||
bool force_base_path) {
|
||||
// Evil games like Turok2 pass double slashes e.g /app0//game.kpf
|
||||
std::string corrected_path(path);
|
||||
size_t pos = corrected_path.find("//");
|
||||
|
@ -72,7 +73,7 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea
|
|||
patch_path /= rel_path;
|
||||
|
||||
if ((corrected_path.starts_with("/app0") || corrected_path.starts_with("/hostapp")) &&
|
||||
std::filesystem::exists(patch_path)) {
|
||||
!force_base_path && std::filesystem::exists(patch_path)) {
|
||||
return patch_path;
|
||||
}
|
||||
|
||||
|
@ -132,8 +133,10 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea
|
|||
return std::optional<std::filesystem::path>(current_path);
|
||||
};
|
||||
|
||||
if (const auto path = search(patch_path)) {
|
||||
return *path;
|
||||
if (!force_base_path) {
|
||||
if (const auto path = search(patch_path)) {
|
||||
return *path;
|
||||
}
|
||||
}
|
||||
if (const auto path = search(host_path)) {
|
||||
return *path;
|
||||
|
@ -144,6 +147,39 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea
|
|||
return host_path;
|
||||
}
|
||||
|
||||
// TODO: Does not handle mount points inside mount points.
|
||||
void MntPoints::IterateDirectory(std::string_view guest_directory,
|
||||
const IterateDirectoryCallback& callback) {
|
||||
const auto base_path = GetHostPath(guest_directory, nullptr, true);
|
||||
const auto patch_path = GetHostPath(guest_directory, nullptr, false);
|
||||
// Only need to consider patch path if it exists and does not resolve to the same as base.
|
||||
const auto apply_patch = base_path != patch_path && std::filesystem::exists(patch_path);
|
||||
|
||||
// Pass 1: Any files that existed in the base directory, using patch directory if needed.
|
||||
if (std::filesystem::exists(base_path)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(base_path)) {
|
||||
if (apply_patch) {
|
||||
const auto patch_entry_path = patch_path / entry.path().filename();
|
||||
if (std::filesystem::exists(patch_entry_path)) {
|
||||
callback(patch_entry_path, !std::filesystem::is_directory(patch_entry_path));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
callback(entry.path(), !entry.is_directory());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 2: Any files that exist only in the patch directory.
|
||||
if (apply_patch) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(patch_path)) {
|
||||
const auto base_entry_path = base_path / entry.path().filename();
|
||||
if (!std::filesystem::exists(base_entry_path)) {
|
||||
callback(entry.path(), !entry.is_directory());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int HandleTable::CreateHandle() {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
|
||||
|
|
|
@ -36,7 +36,11 @@ public:
|
|||
void UnmountAll();
|
||||
|
||||
std::filesystem::path GetHostPath(std::string_view guest_directory,
|
||||
bool* is_read_only = nullptr);
|
||||
bool* is_read_only = nullptr, bool force_base_path = false);
|
||||
using IterateDirectoryCallback =
|
||||
std::function<void(const std::filesystem::path& host_path, bool is_file)>;
|
||||
void IterateDirectory(std::string_view guest_directory,
|
||||
const IterateDirectoryCallback& callback);
|
||||
|
||||
const MntPair* GetMountFromHostPath(const std::string& host_path) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
|
|
|
@ -46,17 +46,6 @@ static std::map<std::string, FactoryDevice> available_device = {
|
|||
|
||||
namespace Libraries::Kernel {
|
||||
|
||||
auto GetDirectoryEntries(const std::filesystem::path& path) {
|
||||
std::vector<Core::FileSys::DirEntry> files;
|
||||
for (const auto& entry : std::filesystem::directory_iterator(path)) {
|
||||
auto& dir_entry = files.emplace_back();
|
||||
dir_entry.name = entry.path().filename().string();
|
||||
dir_entry.isFile = !std::filesystem::is_directory(entry.path().string());
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelOpen(const char* raw_path, int flags, u16 mode) {
|
||||
LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", raw_path, flags, mode);
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
|
@ -115,7 +104,12 @@ int PS4_SYSV_ABI sceKernelOpen(const char* raw_path, int flags, u16 mode) {
|
|||
if (create) {
|
||||
return handle; // dir already exists
|
||||
} else {
|
||||
file->dirents = GetDirectoryEntries(file->m_host_name);
|
||||
mnt->IterateDirectory(file->m_guest_name,
|
||||
[&file](const auto& ent_path, const auto ent_is_file) {
|
||||
auto& dir_entry = file->dirents.emplace_back();
|
||||
dir_entry.name = ent_path.filename().string();
|
||||
dir_entry.isFile = ent_is_file;
|
||||
});
|
||||
file->dirents_index = 0;
|
||||
}
|
||||
}
|
||||
|
@ -695,66 +689,12 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) {
|
|||
return sizeof(OrbisKernelDirent);
|
||||
}
|
||||
|
||||
static int HandleSeparateUpdateDents(int fd, char* buf, int nbytes, s64* basep) {
|
||||
int dir_entries = 0;
|
||||
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
||||
auto* file = h->GetFile(fd);
|
||||
auto update_dir_name = std::string{fmt::UTF(file->m_host_name.u8string()).data};
|
||||
auto mount = mnt->GetMountFromHostPath(update_dir_name);
|
||||
auto suffix = std::string{fmt::UTF(mount->host_path.u8string()).data};
|
||||
|
||||
size_t pos = update_dir_name.find("-UPDATE");
|
||||
if (pos != std::string::npos) {
|
||||
update_dir_name.erase(pos, 7);
|
||||
auto guest_name = mount->mount + "/" + update_dir_name.substr(suffix.size() + 1);
|
||||
int descriptor;
|
||||
|
||||
auto existent_folder = h->GetFile(update_dir_name);
|
||||
if (!existent_folder) {
|
||||
u32 handle = h->CreateHandle();
|
||||
auto* new_file = h->GetFile(handle);
|
||||
new_file->type = Core::FileSys::FileType::Directory;
|
||||
new_file->m_guest_name = guest_name;
|
||||
new_file->m_host_name = update_dir_name;
|
||||
if (!std::filesystem::is_directory(new_file->m_host_name)) {
|
||||
h->DeleteHandle(handle);
|
||||
return dir_entries;
|
||||
} else {
|
||||
new_file->dirents = GetDirectoryEntries(new_file->m_host_name);
|
||||
new_file->dirents_index = 0;
|
||||
}
|
||||
new_file->is_opened = true;
|
||||
descriptor = h->GetFileDescriptor(new_file);
|
||||
} else {
|
||||
descriptor = h->GetFileDescriptor(existent_folder);
|
||||
}
|
||||
|
||||
dir_entries = GetDents(descriptor, buf, nbytes, basep);
|
||||
if (dir_entries == ORBIS_OK && existent_folder) {
|
||||
existent_folder->dirents_index = 0;
|
||||
file->dirents_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return dir_entries;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) {
|
||||
int a = GetDents(fd, buf, nbytes, nullptr);
|
||||
if (a == ORBIS_OK) {
|
||||
return HandleSeparateUpdateDents(fd, buf, nbytes, nullptr);
|
||||
}
|
||||
return a;
|
||||
return GetDents(fd, buf, nbytes, nullptr);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) {
|
||||
int a = GetDents(fd, buf, nbytes, basep);
|
||||
if (a == ORBIS_OK) {
|
||||
return HandleSeparateUpdateDents(fd, buf, nbytes, basep);
|
||||
}
|
||||
return a;
|
||||
return GetDents(fd, buf, nbytes, basep);
|
||||
}
|
||||
|
||||
s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {
|
||||
|
|
|
@ -15,7 +15,7 @@ namespace Libraries::Kernel {
|
|||
int PS4_SYSV_ABI sceKernelIsNeoMode() {
|
||||
LOG_DEBUG(Kernel_Sce, "called");
|
||||
return Config::isNeoModeConsole() &&
|
||||
Common::ElfInfo::Instance().PSFAttributes().support_neo_mode;
|
||||
Common::ElfInfo::Instance().GetPSFAttributes().support_neo_mode;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) {
|
||||
|
|
|
@ -18,11 +18,13 @@
|
|||
#include "core/libraries/libc_internal/libc_internal.h"
|
||||
#include "core/libraries/libpng/pngdec.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "core/libraries/mouse/mouse.h"
|
||||
#include "core/libraries/move/move.h"
|
||||
#include "core/libraries/network/http.h"
|
||||
#include "core/libraries/network/net.h"
|
||||
#include "core/libraries/network/netctl.h"
|
||||
#include "core/libraries/network/ssl.h"
|
||||
#include "core/libraries/np_common/np_common.h"
|
||||
#include "core/libraries/np_manager/np_manager.h"
|
||||
#include "core/libraries/np_score/np_score.h"
|
||||
#include "core/libraries/np_trophy/np_trophy.h"
|
||||
|
@ -71,6 +73,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
|
|||
Libraries::SysModule::RegisterlibSceSysmodule(sym);
|
||||
Libraries::Posix::Registerlibsceposix(sym);
|
||||
Libraries::AudioIn::RegisterlibSceAudioIn(sym);
|
||||
Libraries::NpCommon::RegisterlibSceNpCommon(sym);
|
||||
Libraries::NpManager::RegisterlibSceNpManager(sym);
|
||||
Libraries::NpScore::RegisterlibSceNpScore(sym);
|
||||
Libraries::NpTrophy::RegisterlibSceNpTrophy(sym);
|
||||
|
@ -97,6 +100,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
|
|||
Libraries::Move::RegisterlibSceMove(sym);
|
||||
Libraries::Fiber::RegisterlibSceFiber(sym);
|
||||
Libraries::JpegEnc::RegisterlibSceJpegEnc(sym);
|
||||
Libraries::Mouse::RegisterlibSceMouse(sym);
|
||||
}
|
||||
|
||||
} // namespace Libraries
|
||||
|
|
99
src/core/libraries/mouse/mouse.cpp
Normal file
99
src/core/libraries/mouse/mouse.cpp
Normal file
|
@ -0,0 +1,99 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
// Generated By moduleGenerator
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "mouse.h"
|
||||
|
||||
namespace Libraries::Mouse {
|
||||
|
||||
int PS4_SYSV_ABI sceMouseClose() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseConnectPort() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDebugGetDeviceId() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDeviceOpen() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDisconnectDevice() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDisconnectPort() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseGetDeviceInfo() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseInit() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseMbusInit() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseOpen() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseRead() {
|
||||
LOG_DEBUG(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseSetHandType() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseSetPointerSpeed() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseSetProcessPrivilege() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym) {
|
||||
LIB_FUNCTION("cAnT0Rw-IwU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseClose);
|
||||
LIB_FUNCTION("Ymyy1HSSJLQ", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseConnectPort);
|
||||
LIB_FUNCTION("BRXOoXQtb+k", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDebugGetDeviceId);
|
||||
LIB_FUNCTION("WiGKINCZWkc", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDeviceOpen);
|
||||
LIB_FUNCTION("eDQTFHbgeTU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectDevice);
|
||||
LIB_FUNCTION("jJP1vYMEPd4", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectPort);
|
||||
LIB_FUNCTION("QA9Qupz3Zjw", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseGetDeviceInfo);
|
||||
LIB_FUNCTION("Qs0wWulgl7U", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseInit);
|
||||
LIB_FUNCTION("1FeceR5YhAo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseMbusInit);
|
||||
LIB_FUNCTION("RaqxZIf6DvE", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseOpen);
|
||||
LIB_FUNCTION("x8qnXqh-tiM", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseRead);
|
||||
LIB_FUNCTION("crkFfp-cmFo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetHandType);
|
||||
LIB_FUNCTION("ghLUU2Z5Lcg", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetPointerSpeed);
|
||||
LIB_FUNCTION("6aANndpS0Wo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetProcessPrivilege);
|
||||
};
|
||||
|
||||
} // namespace Libraries::Mouse
|
29
src/core/libraries/mouse/mouse.h
Normal file
29
src/core/libraries/mouse/mouse.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Core::Loader {
|
||||
class SymbolsResolver;
|
||||
}
|
||||
|
||||
namespace Libraries::Mouse {
|
||||
|
||||
int PS4_SYSV_ABI sceMouseClose();
|
||||
int PS4_SYSV_ABI sceMouseConnectPort();
|
||||
int PS4_SYSV_ABI sceMouseDebugGetDeviceId();
|
||||
int PS4_SYSV_ABI sceMouseDeviceOpen();
|
||||
int PS4_SYSV_ABI sceMouseDisconnectDevice();
|
||||
int PS4_SYSV_ABI sceMouseDisconnectPort();
|
||||
int PS4_SYSV_ABI sceMouseGetDeviceInfo();
|
||||
int PS4_SYSV_ABI sceMouseInit();
|
||||
int PS4_SYSV_ABI sceMouseMbusInit();
|
||||
int PS4_SYSV_ABI sceMouseOpen();
|
||||
int PS4_SYSV_ABI sceMouseRead();
|
||||
int PS4_SYSV_ABI sceMouseSetHandType();
|
||||
int PS4_SYSV_ABI sceMouseSetPointerSpeed();
|
||||
int PS4_SYSV_ABI sceMouseSetProcessPrivilege();
|
||||
|
||||
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym);
|
||||
} // namespace Libraries::Mouse
|
7915
src/core/libraries/np_common/np_common.cpp
Normal file
7915
src/core/libraries/np_common/np_common.cpp
Normal file
File diff suppressed because it is too large
Load diff
1245
src/core/libraries/np_common/np_common.h
Normal file
1245
src/core/libraries/np_common/np_common.h
Normal file
File diff suppressed because it is too large
Load diff
9
src/core/libraries/np_common/np_common_error.h
Normal file
9
src/core/libraries/np_common/np_common_error.h
Normal file
|
@ -0,0 +1,9 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/libraries/error_codes.h"
|
||||
|
||||
constexpr int ORBIS_NP_ERROR_INVALID_ARGUMENT = 0x80550003;
|
||||
constexpr int ORBIS_NP_UTIL_ERROR_NOT_MATCH = 0x80550609;
|
|
@ -1,7 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/libs.h"
|
||||
|
|
|
@ -174,10 +174,11 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size,
|
|||
|
||||
// Fixed mapping means the virtual address must exactly match the provided one.
|
||||
if (True(flags & MemoryMapFlags::Fixed)) {
|
||||
const auto& vma = FindVMA(mapped_addr)->second;
|
||||
auto& vma = FindVMA(mapped_addr)->second;
|
||||
// If the VMA is mapped, unmap the region first.
|
||||
if (vma.IsMapped()) {
|
||||
UnmapMemoryImpl(mapped_addr, size);
|
||||
vma = FindVMA(mapped_addr)->second;
|
||||
}
|
||||
const size_t remaining_size = vma.base + vma.size - mapped_addr;
|
||||
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
|
||||
|
@ -211,10 +212,11 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem
|
|||
|
||||
// Fixed mapping means the virtual address must exactly match the provided one.
|
||||
if (True(flags & MemoryMapFlags::Fixed)) {
|
||||
const auto& vma = FindVMA(mapped_addr)->second;
|
||||
auto& vma = FindVMA(mapped_addr)->second;
|
||||
// If the VMA is mapped, unmap the region first.
|
||||
if (vma.IsMapped()) {
|
||||
UnmapMemoryImpl(mapped_addr, size);
|
||||
vma = FindVMA(mapped_addr)->second;
|
||||
}
|
||||
const size_t remaining_size = vma.base + vma.size - mapped_addr;
|
||||
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
|
||||
|
@ -396,14 +398,18 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, size_t size) {
|
|||
ASSERT_MSG(vma_base.Contains(virtual_addr, size),
|
||||
"Existing mapping does not contain requested unmap range");
|
||||
|
||||
const auto type = vma_base.type;
|
||||
if (type == VMAType::Free) {
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
const auto vma_base_addr = vma_base.base;
|
||||
const auto vma_base_size = vma_base.size;
|
||||
const auto phys_base = vma_base.phys_base;
|
||||
const bool is_exec = vma_base.is_exec;
|
||||
const auto start_in_vma = virtual_addr - vma_base_addr;
|
||||
const auto type = vma_base.type;
|
||||
const bool has_backing = type == VMAType::Direct || type == VMAType::File;
|
||||
if (type == VMAType::Direct) {
|
||||
if (type == VMAType::Direct || type == VMAType::Pooled) {
|
||||
rasterizer->UnmapMemory(virtual_addr, size);
|
||||
}
|
||||
if (type == VMAType::Flexible) {
|
||||
|
@ -421,10 +427,12 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, size_t size) {
|
|||
MergeAdjacent(vma_map, new_it);
|
||||
bool readonly_file = vma.prot == MemoryProt::CpuRead && type == VMAType::File;
|
||||
|
||||
// Unmap the memory region.
|
||||
impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec,
|
||||
has_backing, readonly_file);
|
||||
TRACK_FREE(virtual_addr, "VMEM");
|
||||
if (type != VMAType::Reserved && type != VMAType::PoolReserved) {
|
||||
// Unmap the memory region.
|
||||
impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base,
|
||||
is_exec, has_backing, readonly_file);
|
||||
TRACK_FREE(virtual_addr, "VMEM");
|
||||
}
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
|
|
@ -217,41 +217,15 @@ void Emulator::Run(const std::filesystem::path& file) {
|
|||
linker->LoadModule(eboot_path);
|
||||
|
||||
// check if we have system modules to load
|
||||
LoadSystemModules(eboot_path, game_info.game_serial);
|
||||
LoadSystemModules(game_info.game_serial);
|
||||
|
||||
// Load all prx from game's sce_module folder
|
||||
std::vector<std::filesystem::path> modules_to_load;
|
||||
std::filesystem::path game_module_folder = file.parent_path() / "sce_module";
|
||||
if (std::filesystem::is_directory(game_module_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(game_module_folder)) {
|
||||
if (entry.is_regular_file()) {
|
||||
modules_to_load.push_back(entry.path());
|
||||
}
|
||||
mnt->IterateDirectory("/app0/sce_module", [this](const auto& path, const auto is_file) {
|
||||
if (is_file) {
|
||||
LOG_INFO(Loader, "Loading {}", fmt::UTF(path.u8string()));
|
||||
linker->LoadModule(path);
|
||||
}
|
||||
}
|
||||
|
||||
// Load all prx from separate update's sce_module folder
|
||||
std::filesystem::path game_patch_folder = game_folder;
|
||||
game_patch_folder += "-UPDATE";
|
||||
std::filesystem::path update_module_folder = game_patch_folder / "sce_module";
|
||||
if (std::filesystem::is_directory(update_module_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) {
|
||||
auto it = std::find_if(modules_to_load.begin(), modules_to_load.end(),
|
||||
[&entry](const std::filesystem::path& p) {
|
||||
return p.filename() == entry.path().filename();
|
||||
});
|
||||
if (it != modules_to_load.end()) {
|
||||
*it = entry.path();
|
||||
} else {
|
||||
modules_to_load.push_back(entry.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& module_path : modules_to_load) {
|
||||
LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string()));
|
||||
linker->LoadModule(module_path);
|
||||
}
|
||||
});
|
||||
|
||||
#ifdef ENABLE_DISCORD_RPC
|
||||
// Discord RPC
|
||||
|
@ -278,7 +252,7 @@ void Emulator::Run(const std::filesystem::path& file) {
|
|||
std::exit(0);
|
||||
}
|
||||
|
||||
void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) {
|
||||
void Emulator::LoadSystemModules(const std::string& game_serial) {
|
||||
constexpr std::array<SysModules, 11> ModulesToLoad{
|
||||
{{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2},
|
||||
{"libSceUlt.sprx", nullptr},
|
||||
|
|
|
@ -29,7 +29,7 @@ public:
|
|||
void UpdatePlayTime(const std::string& serial);
|
||||
|
||||
private:
|
||||
void LoadSystemModules(const std::filesystem::path& file, std::string game_serial);
|
||||
void LoadSystemModules(const std::string& game_serial);
|
||||
|
||||
Core::MemoryManager* memory;
|
||||
Input::GameController* controller;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <SDL3/SDL.h>
|
||||
#include "common/config.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/kernel/time.h"
|
||||
#include "core/libraries/pad/pad.h"
|
||||
|
@ -189,11 +190,6 @@ void GameController::CalculateOrientation(Libraries::Pad::OrbisFVector3& acceler
|
|||
gz += Kp * ez + Ki * eInt[2];
|
||||
|
||||
//// Integrate rate of change of quaternion
|
||||
// float pa = q2, pb = q3, pc = q4;
|
||||
// q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
|
||||
// q2 += (pa * gx + pb * gz - pc * gy) * (0.5f * deltaTime);
|
||||
// q3 += (pb * gy - pa * gz + pc * gx) * (0.5f * deltaTime);
|
||||
// q4 += (pc * gz + pa * gy - pb * gx) * (0.5f * deltaTime);
|
||||
q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
|
||||
q2 += (q1 * gx + q3 * gz - q4 * gy) * (0.5f * deltaTime);
|
||||
q3 += (q1 * gy - q2 * gz + q4 * gx) * (0.5f * deltaTime);
|
||||
|
@ -247,18 +243,21 @@ void GameController::TryOpenSDLController() {
|
|||
int gamepad_count;
|
||||
SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count);
|
||||
m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr;
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) {
|
||||
gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO);
|
||||
LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad");
|
||||
}
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) {
|
||||
accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL);
|
||||
LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize accel controls for gamepad");
|
||||
if (Config::getIsMotionControlsEnabled()) {
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) {
|
||||
gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO);
|
||||
LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad");
|
||||
}
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) {
|
||||
accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL);
|
||||
LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize accel controls for gamepad");
|
||||
}
|
||||
}
|
||||
|
||||
SDL_free(gamepads);
|
||||
|
||||
SetLightBarRGB(0, 0, 255);
|
||||
|
|
|
@ -86,7 +86,7 @@ int main(int argc, char* argv[]) {
|
|||
exit(1);
|
||||
}
|
||||
// Set fullscreen mode without saving it to config file
|
||||
Config::setFullscreenMode(is_fullscreen);
|
||||
Config::setIsFullscreen(is_fullscreen);
|
||||
}},
|
||||
{"--fullscreen", [&](int& i) { arg_map["-f"](i); }},
|
||||
{"--add-game-folder",
|
||||
|
|
|
@ -283,7 +283,7 @@ public:
|
|||
#ifdef Q_OS_WIN
|
||||
if (createShortcutWin(linkPath, ebootPath, icoPath, exePath)) {
|
||||
#else
|
||||
if (createShortcutLinux(linkPath, ebootPath, iconPath)) {
|
||||
if (createShortcutLinux(linkPath, m_games[itemID].name, ebootPath, iconPath)) {
|
||||
#endif
|
||||
QMessageBox::information(
|
||||
nullptr, tr("Shortcut creation"),
|
||||
|
@ -301,7 +301,7 @@ public:
|
|||
#ifdef Q_OS_WIN
|
||||
if (createShortcutWin(linkPath, ebootPath, iconPath, exePath)) {
|
||||
#else
|
||||
if (createShortcutLinux(linkPath, ebootPath, iconPath)) {
|
||||
if (createShortcutLinux(linkPath, m_games[itemID].name, ebootPath, iconPath)) {
|
||||
#endif
|
||||
QMessageBox::information(
|
||||
nullptr, tr("Shortcut creation"),
|
||||
|
@ -510,8 +510,8 @@ private:
|
|||
return SUCCEEDED(hres);
|
||||
}
|
||||
#else
|
||||
bool createShortcutLinux(const QString& linkPath, const QString& targetPath,
|
||||
const QString& iconPath) {
|
||||
bool createShortcutLinux(const QString& linkPath, const std::string& name,
|
||||
const QString& targetPath, const QString& iconPath) {
|
||||
QFile shortcutFile(linkPath);
|
||||
if (!shortcutFile.open(QIODevice::WriteOnly | QIODevice::Text)) {
|
||||
QMessageBox::critical(nullptr, "Error",
|
||||
|
@ -522,7 +522,7 @@ private:
|
|||
QTextStream out(&shortcutFile);
|
||||
out << "[Desktop Entry]\n";
|
||||
out << "Version=1.0\n";
|
||||
out << "Name=" << QFileInfo(linkPath).baseName() << "\n";
|
||||
out << "Name=" << QString::fromStdString(name) << "\n";
|
||||
out << "Exec=" << QCoreApplication::applicationFilePath() << " \"" << targetPath << "\"\n";
|
||||
out << "Icon=" << iconPath << "\n";
|
||||
out << "Terminal=false\n";
|
||||
|
|
|
@ -97,7 +97,7 @@ int main(int argc, char* argv[]) {
|
|||
exit(1);
|
||||
}
|
||||
// Set fullscreen mode without saving it to config file
|
||||
Config::setFullscreenMode(is_fullscreen);
|
||||
Config::setIsFullscreen(is_fullscreen);
|
||||
}},
|
||||
{"--fullscreen", [&](int& i) { arg_map["-f"](i); }},
|
||||
{"--add-game-folder",
|
||||
|
@ -190,4 +190,4 @@ int main(int argc, char* argv[]) {
|
|||
// Show the main window and run the Qt application
|
||||
m_main_window->show();
|
||||
return a.exec();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,9 @@ PKGViewer::PKGViewer(std::shared_ptr<GameInfoClass> game_info_get, QWidget* pare
|
|||
|
||||
connect(treeWidget, &QTreeWidget::customContextMenuRequested, this,
|
||||
[=, this](const QPoint& pos) {
|
||||
if (treeWidget->selectedItems().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
m_gui_context_menus.RequestGameMenuPKGViewer(pos, m_full_pkg_list, treeWidget,
|
||||
InstallDragDropPkg);
|
||||
});
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#ifdef ENABLE_UPDATER
|
||||
#include "check_update.h"
|
||||
#endif
|
||||
#include <QDesktopServices>
|
||||
#include <toml.hpp>
|
||||
#include "background_music_player.h"
|
||||
#include "common/logging/backend.h"
|
||||
|
@ -203,6 +204,16 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices,
|
|||
});
|
||||
}
|
||||
|
||||
// DEBUG TAB
|
||||
{
|
||||
connect(ui->OpenLogLocationButton, &QPushButton::clicked, this, []() {
|
||||
QString userPath;
|
||||
Common::FS::PathToQString(userPath,
|
||||
Common::FS::GetUserPath(Common::FS::PathType::UserDir));
|
||||
QDesktopServices::openUrl(QUrl::fromLocalFile(userPath + "/log"));
|
||||
});
|
||||
}
|
||||
|
||||
// Descriptions
|
||||
{
|
||||
// General
|
||||
|
@ -300,6 +311,8 @@ void SettingsDialog::LoadValuesFromConfig() {
|
|||
ui->discordRPCCheckbox->setChecked(
|
||||
toml::find_or<bool>(data, "General", "enableDiscordRPC", true));
|
||||
ui->fullscreenCheckBox->setChecked(toml::find_or<bool>(data, "General", "Fullscreen", false));
|
||||
ui->fullscreenModeComboBox->setCurrentText(QString::fromStdString(
|
||||
toml::find_or<std::string>(data, "General", "FullscreenMode", "Borderless")));
|
||||
ui->separateUpdatesCheckBox->setChecked(
|
||||
toml::find_or<bool>(data, "General", "separateUpdateEnabled", false));
|
||||
ui->showSplashCheckBox->setChecked(toml::find_or<bool>(data, "General", "showSplash", false));
|
||||
|
@ -339,6 +352,8 @@ void SettingsDialog::LoadValuesFromConfig() {
|
|||
toml::find_or<std::string>(data, "Input", "backButtonBehavior", "left"));
|
||||
int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior);
|
||||
ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0);
|
||||
ui->motionControlsCheckBox->setChecked(
|
||||
toml::find_or<bool>(data, "Input", "isMotionControlsEnabled", true));
|
||||
|
||||
ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty());
|
||||
ResetInstallFolders();
|
||||
|
@ -532,7 +547,9 @@ void SettingsDialog::UpdateSettings() {
|
|||
|
||||
const QVector<std::string> TouchPadIndex = {"left", "center", "right", "none"};
|
||||
Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]);
|
||||
Config::setFullscreenMode(ui->fullscreenCheckBox->isChecked());
|
||||
Config::setIsFullscreen(ui->fullscreenCheckBox->isChecked());
|
||||
Config::setFullscreenMode(ui->fullscreenModeComboBox->currentText().toStdString());
|
||||
Config::setIsMotionControlsEnabled(ui->motionControlsCheckBox->isChecked());
|
||||
Config::setisTrophyPopupDisabled(ui->disableTrophycheckBox->isChecked());
|
||||
Config::setPlayBGM(ui->playBGMCheckBox->isChecked());
|
||||
Config::setLogType(ui->logTypeComboBox->currentText().toStdString());
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>970</width>
|
||||
<height>670</height>
|
||||
<height>750</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="sizePolicy">
|
||||
|
@ -133,6 +133,35 @@
|
|||
<string>Enable Fullscreen</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="fullscreenModeGroupBox">
|
||||
<property name="title">
|
||||
<string>Fullscreen Mode</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="fullscreenModeLayout">
|
||||
<item>
|
||||
<widget class="QComboBox" name="fullscreenModeComboBox">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Borderless</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>True</string>
|
||||
</property>
|
||||
</item>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="separateUpdatesCheckBox">
|
||||
|
@ -536,6 +565,9 @@
|
|||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="bottomMargin">
|
||||
<number>80</number>
|
||||
</property>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="hLayoutTrophy">
|
||||
<item>
|
||||
|
@ -566,6 +598,12 @@
|
|||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
<property name="font">
|
||||
<font>
|
||||
<pointsize>10</pointsize>
|
||||
<bold>false</bold>
|
||||
</font>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
|
@ -815,6 +853,13 @@
|
|||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="motionControlsCheckBox">
|
||||
<property name="text">
|
||||
<string>Enable Motion Controls</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QWidget" name="controllerWidgetSpacer" native="true">
|
||||
<property name="enabled">
|
||||
|
@ -1349,6 +1394,13 @@
|
|||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="OpenLogLocationButton">
|
||||
<property name="text">
|
||||
<string>Open Log Location</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
1419
src/qt_gui/translations/sv.ts
Normal file
1419
src/qt_gui/translations/sv.ts
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -93,7 +93,23 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_
|
|||
}
|
||||
|
||||
SDL_SetWindowMinimumSize(window, 640, 360);
|
||||
SDL_SetWindowFullscreen(window, Config::isFullscreenMode());
|
||||
|
||||
bool error = false;
|
||||
const SDL_DisplayID displayIndex = SDL_GetDisplayForWindow(window);
|
||||
if (displayIndex < 0) {
|
||||
LOG_ERROR(Frontend, "Error getting display index: {}", SDL_GetError());
|
||||
error = true;
|
||||
}
|
||||
const SDL_DisplayMode* displayMode;
|
||||
if ((displayMode = SDL_GetCurrentDisplayMode(displayIndex)) == 0) {
|
||||
LOG_ERROR(Frontend, "Error getting display mode: {}", SDL_GetError());
|
||||
error = true;
|
||||
}
|
||||
if (!error) {
|
||||
SDL_SetWindowFullscreenMode(window,
|
||||
Config::getFullscreenMode() == "True" ? displayMode : NULL);
|
||||
}
|
||||
SDL_SetWindowFullscreen(window, Config::getIsFullscreen());
|
||||
|
||||
SDL_InitSubSystem(SDL_INIT_GAMEPAD);
|
||||
controller->TryOpenSDLController();
|
||||
|
|
|
@ -172,20 +172,18 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
|
|||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info);
|
||||
const auto type = sharp.GetBoundType();
|
||||
const Id zero = ctx.u32_zero_value;
|
||||
const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }};
|
||||
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};
|
||||
const bool uses_lod{texture.view_type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};
|
||||
const auto query{[&](Id type) {
|
||||
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
|
||||
: ctx.OpImageQuerySize(type, image);
|
||||
}};
|
||||
switch (type) {
|
||||
switch (texture.view_type) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
|
||||
case AmdGpu::ImageType::Color1DArray:
|
||||
case AmdGpu::ImageType::Color2D:
|
||||
case AmdGpu::ImageType::Cube:
|
||||
case AmdGpu::ImageType::Color2DMsaa:
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips());
|
||||
case AmdGpu::ImageType::Color2DArray:
|
||||
|
@ -257,4 +255,20 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
|
|||
ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands);
|
||||
}
|
||||
|
||||
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
|
||||
if (ctx.profile.supports_native_cube_calc) {
|
||||
return ctx.OpCubeFaceCoordAMD(ctx.F32[2], cube_coords);
|
||||
} else {
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
|
||||
if (ctx.profile.supports_native_cube_calc) {
|
||||
return ctx.OpCubeFaceIndexAMD(ctx.F32[1], cube_coords);
|
||||
} else {
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -439,6 +439,8 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
|||
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
|
||||
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
|
||||
Id EmitLaneId(EmitContext& ctx);
|
||||
Id EmitWarpId(EmitContext& ctx);
|
||||
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
||||
|
|
|
@ -773,8 +773,8 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||
const auto image = desc.GetSharp(ctx.info);
|
||||
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
|
||||
const auto type = image.GetBoundType();
|
||||
const u32 sampled = desc.IsStorage(image) ? 2 : 1;
|
||||
const auto type = image.GetViewType(desc.is_array);
|
||||
const u32 sampled = desc.is_written ? 2 : 1;
|
||||
switch (type) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
|
||||
|
@ -788,9 +788,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format);
|
||||
case AmdGpu::ImageType::Color3D:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled,
|
||||
format);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -802,7 +799,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
const auto sharp = image_desc.GetSharp(info);
|
||||
const auto nfmt = sharp.GetNumberFmt();
|
||||
const bool is_integer = AmdGpu::IsInteger(nfmt);
|
||||
const bool is_storage = image_desc.IsStorage(sharp);
|
||||
const bool is_storage = image_desc.is_written;
|
||||
const VectorIds& data_types = GetAttributeType(*this, nfmt);
|
||||
const Id sampled_type = data_types[1];
|
||||
const Id image_type{ImageType(*this, image_desc, sampled_type)};
|
||||
|
@ -817,6 +814,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
.sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type),
|
||||
.pointer_type = pointer_type,
|
||||
.image_type = image_type,
|
||||
.view_type = sharp.GetViewType(image_desc.is_array),
|
||||
.is_integer = is_integer,
|
||||
.is_storage = is_storage,
|
||||
});
|
||||
|
|
|
@ -222,6 +222,7 @@ public:
|
|||
Id sampled_type;
|
||||
Id pointer_type;
|
||||
Id image_type;
|
||||
AmdGpu::ImageType view_type;
|
||||
bool is_integer = false;
|
||||
bool is_storage = false;
|
||||
};
|
||||
|
|
|
@ -47,13 +47,26 @@ static IR::Condition MakeCondition(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
static bool IgnoresExecMask(Opcode opcode) {
|
||||
switch (opcode) {
|
||||
case Opcode::V_WRITELANE_B32:
|
||||
static bool IgnoresExecMask(const GcnInst& inst) {
|
||||
// EXEC mask does not affect scalar instructions or branches.
|
||||
switch (inst.category) {
|
||||
case InstCategory::ScalarALU:
|
||||
case InstCategory::ScalarMemory:
|
||||
case InstCategory::FlowControl:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
// Read/Write Lane instructions are not affected either.
|
||||
switch (inst.opcode) {
|
||||
case Opcode::V_READLANE_B32:
|
||||
case Opcode::V_WRITELANE_B32:
|
||||
case Opcode::V_READFIRSTLANE_B32:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static constexpr size_t LabelReserveSize = 32;
|
||||
|
@ -147,8 +160,7 @@ void CFG::EmitDivergenceLabels() {
|
|||
// If all instructions in the scope ignore exec masking, we shouldn't insert a
|
||||
// scope.
|
||||
const auto start = inst_list.begin() + curr_begin + 1;
|
||||
if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask,
|
||||
&GcnInst::opcode)) {
|
||||
if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) {
|
||||
// Add a label to the instruction right after the open scope call.
|
||||
// It is the start of a new basic block.
|
||||
const auto& save_inst = inst_list[curr_begin];
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/reinterpret.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
@ -31,14 +32,16 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||
return;
|
||||
}
|
||||
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
||||
const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle;
|
||||
const auto col_buf = runtime_info.fs_info.color_buffers[index];
|
||||
const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion);
|
||||
const auto [r, g, b, a] = col_buf.swizzle;
|
||||
const std::array swizzle_array = {r, g, b, a};
|
||||
const auto swizzled_comp = swizzle_array[comp];
|
||||
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
|
||||
ir.SetAttribute(attrib, value, comp);
|
||||
ir.SetAttribute(attrib, converted, comp);
|
||||
return;
|
||||
}
|
||||
ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
|
||||
ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
|
||||
};
|
||||
|
||||
const auto unpack = [&](u32 idx) {
|
||||
|
|
|
@ -301,6 +301,9 @@ private:
|
|||
IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0);
|
||||
void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0);
|
||||
|
||||
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
|
||||
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
|
||||
|
||||
void LogMissingOpcode(const GcnInst& inst);
|
||||
|
||||
private:
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "shader_recompiler/frontend/opcodes.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
|
@ -904,7 +905,7 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
|||
case ConditionOp::GE:
|
||||
return ir.FPGreaterThanEqual(src0, src1);
|
||||
case ConditionOp::U:
|
||||
return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1)));
|
||||
return ir.LogicalOr(ir.FPIsNan(src0), ir.FPIsNan(src1));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -1042,20 +1043,92 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
|
|||
V_MAD_I32_I24(inst, false);
|
||||
}
|
||||
|
||||
IR::F32 Translator::SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
|
||||
const IR::F32& x_res, const IR::F32& y_res,
|
||||
const IR::F32& z_res) {
|
||||
const auto abs_x = ir.FPAbs(x);
|
||||
const auto abs_y = ir.FPAbs(y);
|
||||
const auto abs_z = ir.FPAbs(z);
|
||||
|
||||
const auto z_face_cond{
|
||||
ir.LogicalAnd(ir.FPGreaterThanEqual(abs_z, abs_x), ir.FPGreaterThanEqual(abs_z, abs_y))};
|
||||
const auto y_face_cond{ir.FPGreaterThanEqual(abs_y, abs_x)};
|
||||
|
||||
return IR::F32{ir.Select(z_face_cond, z_res, ir.Select(y_face_cond, y_res, x_res))};
|
||||
}
|
||||
|
||||
void Translator::V_CUBEID_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2]));
|
||||
const auto x = GetSrc<IR::F32>(inst.src[0]);
|
||||
const auto y = GetSrc<IR::F32>(inst.src[1]);
|
||||
const auto z = GetSrc<IR::F32>(inst.src[2]);
|
||||
|
||||
IR::F32 result;
|
||||
if (profile.supports_native_cube_calc) {
|
||||
result = ir.CubeFaceIndex(ir.CompositeConstruct(x, y, z));
|
||||
} else {
|
||||
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
|
||||
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
|
||||
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
|
||||
const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
|
||||
const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
|
||||
const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
|
||||
|
||||
result = SelectCubeResult(x, y, z, x_face, y_face, z_face);
|
||||
}
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
void Translator::V_CUBESC_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
|
||||
const auto x = GetSrc<IR::F32>(inst.src[0]);
|
||||
const auto y = GetSrc<IR::F32>(inst.src[1]);
|
||||
const auto z = GetSrc<IR::F32>(inst.src[2]);
|
||||
|
||||
IR::F32 result;
|
||||
if (profile.supports_native_cube_calc) {
|
||||
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
|
||||
result = IR::F32{ir.CompositeExtract(coords, 0)};
|
||||
} else {
|
||||
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
|
||||
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
|
||||
const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
|
||||
const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
|
||||
|
||||
result = SelectCubeResult(x, y, z, x_sc, x, z_sc);
|
||||
}
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
void Translator::V_CUBETC_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1]));
|
||||
const auto x = GetSrc<IR::F32>(inst.src[0]);
|
||||
const auto y = GetSrc<IR::F32>(inst.src[1]);
|
||||
const auto z = GetSrc<IR::F32>(inst.src[2]);
|
||||
|
||||
IR::F32 result;
|
||||
if (profile.supports_native_cube_calc) {
|
||||
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
|
||||
result = IR::F32{ir.CompositeExtract(coords, 1)};
|
||||
} else {
|
||||
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
|
||||
const IR::F32 x_z_sc{ir.FPNeg(y)};
|
||||
const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
|
||||
|
||||
result = SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc);
|
||||
}
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
void Translator::V_CUBEMA_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.Imm32(1.f));
|
||||
const auto x = GetSrc<IR::F32>(inst.src[0]);
|
||||
const auto y = GetSrc<IR::F32>(inst.src[1]);
|
||||
const auto z = GetSrc<IR::F32>(inst.src[2]);
|
||||
|
||||
const auto two{ir.Imm32(4.f)};
|
||||
const IR::F32 x_major_axis{ir.FPMul(x, two)};
|
||||
const IR::F32 y_major_axis{ir.FPMul(y, two)};
|
||||
const IR::F32 z_major_axis{ir.FPMul(z, two)};
|
||||
|
||||
const auto result{SelectCubeResult(x, y, z, x_major_axis, y_major_axis, z_major_axis)};
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
||||
|
|
|
@ -418,6 +418,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
|||
|
||||
IR::TextureInstInfo info{};
|
||||
info.has_lod.Assign(has_mip);
|
||||
info.is_array.Assign(mimg.da);
|
||||
const IR::Value texel = ir.ImageRead(handle, body, {}, {}, info);
|
||||
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
|
@ -442,6 +443,7 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
|
|||
|
||||
IR::TextureInstInfo info{};
|
||||
info.has_lod.Assign(has_mip);
|
||||
info.is_array.Assign(mimg.da);
|
||||
|
||||
boost::container::static_vector<IR::F32, 4> comps;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
|
@ -456,13 +458,18 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
const auto flags = ImageResFlags(inst.control.mimg.dmask);
|
||||
const bool has_mips = flags.test(ImageResComponent::MipCount);
|
||||
const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code));
|
||||
const IR::Value tsharp = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips));
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.is_array.Assign(mimg.da);
|
||||
|
||||
const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips), info);
|
||||
|
||||
if (flags.test(ImageResComponent::Width)) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 0)});
|
||||
|
@ -484,6 +491,9 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
|||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.is_array.Assign(mimg.da);
|
||||
|
||||
const IR::Value value = ir.GetVectorReg(val_reg);
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
|
@ -494,25 +504,25 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
|||
case AtomicOp::Swap:
|
||||
return ir.ImageAtomicExchange(handle, body, value, {});
|
||||
case AtomicOp::Add:
|
||||
return ir.ImageAtomicIAdd(handle, body, value, {});
|
||||
return ir.ImageAtomicIAdd(handle, body, value, info);
|
||||
case AtomicOp::Smin:
|
||||
return ir.ImageAtomicIMin(handle, body, value, true, {});
|
||||
return ir.ImageAtomicIMin(handle, body, value, true, info);
|
||||
case AtomicOp::Umin:
|
||||
return ir.ImageAtomicUMin(handle, body, value, {});
|
||||
return ir.ImageAtomicUMin(handle, body, value, info);
|
||||
case AtomicOp::Smax:
|
||||
return ir.ImageAtomicIMax(handle, body, value, true, {});
|
||||
return ir.ImageAtomicIMax(handle, body, value, true, info);
|
||||
case AtomicOp::Umax:
|
||||
return ir.ImageAtomicUMax(handle, body, value, {});
|
||||
return ir.ImageAtomicUMax(handle, body, value, info);
|
||||
case AtomicOp::And:
|
||||
return ir.ImageAtomicAnd(handle, body, value, {});
|
||||
return ir.ImageAtomicAnd(handle, body, value, info);
|
||||
case AtomicOp::Or:
|
||||
return ir.ImageAtomicOr(handle, body, value, {});
|
||||
return ir.ImageAtomicOr(handle, body, value, info);
|
||||
case AtomicOp::Xor:
|
||||
return ir.ImageAtomicXor(handle, body, value, {});
|
||||
return ir.ImageAtomicXor(handle, body, value, info);
|
||||
case AtomicOp::Inc:
|
||||
return ir.ImageAtomicInc(handle, body, value, {});
|
||||
return ir.ImageAtomicInc(handle, body, value, info);
|
||||
case AtomicOp::Dec:
|
||||
return ir.ImageAtomicDec(handle, body, value, {});
|
||||
return ir.ImageAtomicDec(handle, body, value, info);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -643,11 +653,14 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
|||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.is_array.Assign(mimg.da);
|
||||
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body = ir.CompositeConstruct(
|
||||
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
||||
const IR::Value lod = ir.ImageQueryLod(handle, body, {});
|
||||
const IR::Value lod = ir.ImageQueryLod(handle, body, info);
|
||||
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
|
||||
}
|
||||
|
|
|
@ -70,14 +70,8 @@ struct ImageResource {
|
|||
bool is_depth{};
|
||||
bool is_atomic{};
|
||||
bool is_array{};
|
||||
bool is_read{};
|
||||
bool is_written{};
|
||||
|
||||
[[nodiscard]] bool IsStorage(const AmdGpu::Image& image) const noexcept {
|
||||
// Need cube as storage when used with ImageRead.
|
||||
return is_written || (is_read && image.GetBoundType() == AmdGpu::ImageType::Cube);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;
|
||||
|
|
|
@ -1732,11 +1732,6 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const
|
|||
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
|
||||
const IR::U1& skip_mips) {
|
||||
return Inst(Opcode::ImageQueryDimensions, handle, lod, skip_mips);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
|
||||
const IR::U1& skip_mips, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips);
|
||||
|
@ -1763,6 +1758,14 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32&
|
|||
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, multisampling, color);
|
||||
}
|
||||
|
||||
[[nodiscard]] Value IREmitter::CubeFaceCoord(const Value& cube_coords) {
|
||||
return Inst(Opcode::CubeFaceCoord, cube_coords);
|
||||
}
|
||||
|
||||
[[nodiscard]] F32 IREmitter::CubeFaceIndex(const Value& cube_coords) {
|
||||
return Inst<F32>(Opcode::CubeFaceIndex, cube_coords);
|
||||
}
|
||||
|
||||
// Debug print maps to SPIRV's NonSemantic DebugPrintf instruction
|
||||
// Renderdoc will hook in its own implementation of the SPIRV instruction
|
||||
// Renderdoc accepts format specifiers, e.g. %u, listed here:
|
||||
|
|
|
@ -324,8 +324,6 @@ public:
|
|||
const F32& dref, const F32& lod,
|
||||
const Value& offset, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
|
||||
const U1& skip_mips);
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
|
||||
const U1& skip_mips, TextureInstInfo info);
|
||||
|
||||
|
@ -344,6 +342,9 @@ public:
|
|||
void ImageWrite(const Value& handle, const Value& coords, const U32& lod,
|
||||
const U32& multisampling, const Value& color, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value CubeFaceCoord(const Value& cube_coords);
|
||||
[[nodiscard]] F32 CubeFaceIndex(const Value& cube_coords);
|
||||
|
||||
void EmitVertex();
|
||||
void EmitPrimitive();
|
||||
|
||||
|
|
|
@ -374,6 +374,10 @@ OPCODE(ImageAtomicOr32, U32, Opaq
|
|||
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
|
||||
|
||||
// Cube operations - optional, usable if profile.supports_native_cube_calc
|
||||
OPCODE(CubeFaceCoord, F32x2, F32x3, )
|
||||
OPCODE(CubeFaceIndex, F32, F32x3, )
|
||||
|
||||
// Warp operations
|
||||
OPCODE(LaneId, U32, )
|
||||
OPCODE(WarpId, U32, )
|
||||
|
|
|
@ -161,10 +161,9 @@ public:
|
|||
|
||||
u32 Add(const ImageResource& desc) {
|
||||
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sharp_idx == existing.sharp_idx;
|
||||
return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array;
|
||||
})};
|
||||
auto& image = image_resources[index];
|
||||
image.is_read |= desc.is_read;
|
||||
image.is_written |= desc.is_written;
|
||||
return index;
|
||||
}
|
||||
|
@ -301,8 +300,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
|||
});
|
||||
}
|
||||
|
||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
s32 binding{};
|
||||
AmdGpu::Buffer buffer;
|
||||
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
|
||||
|
@ -317,19 +315,189 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
});
|
||||
}
|
||||
|
||||
// Update buffer descriptor format.
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchTextureBufferSharp(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
|
||||
// Replace handle with binding index in texture buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
image = AmdGpu::Image::Null();
|
||||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
|
||||
|
||||
// Patch image instruction if image is FMask.
|
||||
if (image.IsFmask()) {
|
||||
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageSampleRaw: {
|
||||
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
|
||||
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::ImageQueryLod:
|
||||
inst.ReplaceUsesWith(ir.Imm32(1));
|
||||
return;
|
||||
case IR::Opcode::ImageQueryDimensions: {
|
||||
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
|
||||
ir.Imm32(static_cast<u32>(image.width)), // y
|
||||
ir.Imm32(1), ir.Imm32(1)); // depth, mip
|
||||
inst.ReplaceUsesWith(dims);
|
||||
|
||||
// Track FMask resource to do specialization.
|
||||
descriptors.Add(FMaskResource{
|
||||
.sharp_idx = tsharp,
|
||||
});
|
||||
return;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
|
||||
}
|
||||
}
|
||||
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sharp_idx = tsharp,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
.is_written = is_written,
|
||||
});
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
// Read sampler sharp.
|
||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = inline_sampler,
|
||||
});
|
||||
return {binding, inline_sampler};
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||
}();
|
||||
// Patch image and sampler handle.
|
||||
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));
|
||||
} else {
|
||||
// Patch image handle.
|
||||
inst.SetArg(0, ir.Imm32(image_binding));
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.buffers[handle.U32()];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
|
||||
ASSERT(!buffer.add_tid_enable);
|
||||
|
||||
// Address of constant buffer reads can be calculated at IR emittion time.
|
||||
// Address of constant buffer reads can be calculated at IR emission time.
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
|
||||
const IR::U32 index_stride = ir.Imm32(buffer.index_stride);
|
||||
const IR::U32 element_size = ir.Imm32(buffer.element_size);
|
||||
|
||||
|
@ -366,82 +534,38 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
inst.SetArg(1, address);
|
||||
}
|
||||
|
||||
void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.texture_buffers[handle.U32()];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
|
||||
// Replace handle with binding index in texture buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
}
|
||||
|
||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||
const IR::Value& z, bool is_written, bool is_array) {
|
||||
// When cubemap is written with imageStore it is treated like 2DArray.
|
||||
if (is_written) {
|
||||
return ir.CompositeConstruct(s, t, z);
|
||||
}
|
||||
|
||||
ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below
|
||||
|
||||
// We need to fix x and y coordinate,
|
||||
// because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
|
||||
// We already force the scale value to be 1.0 when handling v_cubema_f32,
|
||||
// here we subtract 1.5 to recover the original value.
|
||||
const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
|
||||
const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
|
||||
if (is_array) {
|
||||
const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z});
|
||||
const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u));
|
||||
const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u));
|
||||
return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id),
|
||||
ir.ConvertIToF(32, 32, false, slice_id));
|
||||
} else {
|
||||
return ir.CompositeConstruct(x, y, z);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors, const IR::Inst* producer,
|
||||
const u32 image_binding, const AmdGpu::Image& image) {
|
||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = inline_sampler,
|
||||
});
|
||||
return {binding, inline_sampler};
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||
}();
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
const auto swizzled = ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect());
|
||||
const auto converted =
|
||||
ApplyWriteNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.SetArg(2, converted);
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
const auto converted =
|
||||
ApplyReadNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
const ImageResource& image_res, const AmdGpu::Image& image) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF];
|
||||
auto sampler = sampler_res.GetSharp(info);
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16);
|
||||
const auto view_type = image.GetViewType(image_res.is_array);
|
||||
|
||||
IR::Inst* body1 = inst.Arg(1).InstRecursive();
|
||||
IR::Inst* body2 = inst.Arg(2).InstRecursive();
|
||||
|
@ -488,7 +612,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(off), ir.Imm32(6), true);
|
||||
};
|
||||
|
||||
switch (image.GetType()) {
|
||||
switch (view_type) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
case AmdGpu::ImageType::Color1DArray:
|
||||
return read(0);
|
||||
|
@ -497,7 +621,6 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
case AmdGpu::ImageType::Color2DMsaa:
|
||||
return ir.CompositeConstruct(read(0), read(8));
|
||||
case AmdGpu::ImageType::Color3D:
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return ir.CompositeConstruct(read(0), read(8), read(16));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
@ -509,7 +632,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
if (!inst_info.has_derivatives) {
|
||||
return {};
|
||||
}
|
||||
switch (image.GetType()) {
|
||||
switch (view_type) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
case AmdGpu::ImageType::Color1DArray:
|
||||
// du/dx, du/dy
|
||||
|
@ -523,7 +646,6 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)),
|
||||
ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))};
|
||||
case AmdGpu::ImageType::Color3D:
|
||||
case AmdGpu::ImageType::Cube:
|
||||
// (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy)
|
||||
addr_reg = addr_reg + 6;
|
||||
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5),
|
||||
|
@ -539,7 +661,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
// Query dimensions of image if needed for normalization.
|
||||
// We can't use the image sharp because it could be bound to a different image later.
|
||||
const auto dimensions =
|
||||
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
|
||||
unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false), inst_info)
|
||||
: IR::Value{};
|
||||
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
|
||||
const auto coord = get_addr_reg(coord_idx);
|
||||
|
@ -554,7 +676,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
|
||||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||
const IR::Value coords = [&] -> IR::Value {
|
||||
switch (image.GetType()) {
|
||||
switch (view_type) {
|
||||
case AmdGpu::ImageType::Color1D: // x
|
||||
addr_reg = addr_reg + 1;
|
||||
return get_coord(addr_reg - 1, 0);
|
||||
|
@ -573,10 +695,6 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
addr_reg = addr_reg + 3;
|
||||
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
|
||||
get_coord(addr_reg - 1, 2));
|
||||
case AmdGpu::ImageType::Cube: // x, y, face
|
||||
addr_reg = addr_reg + 3;
|
||||
return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
|
||||
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -589,7 +707,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
: IR::F32{};
|
||||
const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
|
||||
|
||||
auto new_inst = [&] -> IR::Value {
|
||||
auto texel = [&] -> IR::Value {
|
||||
if (inst_info.is_gather) {
|
||||
if (inst_info.is_depth) {
|
||||
return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
|
||||
|
@ -611,98 +729,35 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
}
|
||||
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
|
||||
}();
|
||||
inst.ReplaceUsesWithAndRemove(new_inst);
|
||||
|
||||
const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
image = AmdGpu::Image::Null();
|
||||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
|
||||
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
|
||||
|
||||
// Patch image instruction if image is FMask.
|
||||
if (image.IsFmask()) {
|
||||
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageSampleRaw: {
|
||||
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
|
||||
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::ImageQueryLod:
|
||||
inst.ReplaceUsesWith(ir.Imm32(1));
|
||||
return;
|
||||
case IR::Opcode::ImageQueryDimensions: {
|
||||
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
|
||||
ir.Imm32(static_cast<u32>(image.width)), // y
|
||||
ir.Imm32(1), ir.Imm32(1)); // depth, mip
|
||||
inst.ReplaceUsesWith(dims);
|
||||
|
||||
// Track FMask resource to do specialization.
|
||||
descriptors.Add(FMaskResource{
|
||||
.sharp_idx = tsharp,
|
||||
});
|
||||
return;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
|
||||
}
|
||||
}
|
||||
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sharp_idx = tsharp,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
.is_read = is_read,
|
||||
.is_written = is_written,
|
||||
});
|
||||
|
||||
// Sample instructions must be resolved into a new instruction using address register data.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image);
|
||||
return;
|
||||
}
|
||||
|
||||
// Patch image handle
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(image_binding));
|
||||
|
||||
// No need to patch coordinates if we are just querying.
|
||||
void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
// Nothing to patch for dimension query.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageQueryDimensions) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto image_res = info.images[handle.U32() & 0xFFFF];
|
||||
auto image = image_res.GetSharp(info);
|
||||
|
||||
// Sample instructions must be handled separately using address register data.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
PatchImageSampleArgs(block, inst, info, image_res, image);
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const auto view_type = image.GetViewType(image_res.is_array);
|
||||
|
||||
// Now that we know the image type, adjust texture coordinate vector.
|
||||
IR::Inst* body = inst.Arg(1).InstRecursive();
|
||||
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
|
||||
switch (image.GetType()) {
|
||||
switch (view_type) {
|
||||
case AmdGpu::ImageType::Color1D: // x, [lod]
|
||||
return {body->Arg(0), body->Arg(1)};
|
||||
case AmdGpu::ImageType::Color1DArray: // x, slice, [lod]
|
||||
|
@ -718,153 +773,74 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
[[fallthrough]];
|
||||
case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
case AmdGpu::ImageType::Cube: // x, y, face, [lod]
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_written,
|
||||
inst_info.is_array),
|
||||
body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
|
||||
UNREACHABLE_MSG("Unknown image type {}", view_type);
|
||||
}
|
||||
}();
|
||||
inst.SetArg(1, coords);
|
||||
|
||||
if (inst_info.has_lod) {
|
||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite);
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
|
||||
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray);
|
||||
inst.SetArg(2, arg);
|
||||
} else if ((image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
||||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) &&
|
||||
(inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite)) {
|
||||
inst.SetArg(3, arg);
|
||||
}
|
||||
}
|
||||
const auto has_ms = view_type == AmdGpu::ImageType::Color2DMsaa ||
|
||||
view_type == AmdGpu::ImageType::Color2DMsaaArray;
|
||||
ASSERT(!inst_info.has_lod || !has_ms);
|
||||
const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
|
||||
const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
|
||||
|
||||
void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto buffer_res = info.texture_buffers[binding];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
if (!buffer.Valid()) {
|
||||
// Don't need to swizzle invalid buffer.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto image_res = info.images[binding & 0xFFFF];
|
||||
const auto image = image_res.GetSharp(info);
|
||||
if (!image.Valid() || !image_res.IsStorage(image)) {
|
||||
// Don't need to swizzle invalid or non-storage image.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::ImageRead) {
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const auto lod = inst.Arg(2);
|
||||
const auto ms = inst.Arg(3);
|
||||
const auto texel =
|
||||
ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod},
|
||||
ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
const auto is_storage = image_res.is_written;
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageRead) {
|
||||
auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info);
|
||||
if (is_storage) {
|
||||
// Storage image requires shader swizzle.
|
||||
texel = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto converted =
|
||||
ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
} else {
|
||||
inst.SetArg(1, coords);
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(2, lod);
|
||||
inst.SetArg(3, ms);
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
auto texel = inst.Arg(4);
|
||||
if (is_storage) {
|
||||
// Storage image requires shader swizzle.
|
||||
texel = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
}
|
||||
const auto converted =
|
||||
ApplyWriteNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.SetArg(4, converted);
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
|
||||
// Pass 1: Track resource sharps
|
||||
Descriptors descriptors{info};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingInstruction(*block, inst, info, descriptors);
|
||||
PatchBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageSharp(*block, inst, info, descriptors);
|
||||
} else if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingAccess(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second pass to reinterpret format read/write where needed, since we now know
|
||||
// the bindings and their properties.
|
||||
|
||||
// Pass 2: Patch instruction args
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferInterpretation(*block, inst, info);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInterpretation(*block, inst, info);
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferArgs(*block, inst, info);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferArgs(*block, inst, info);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageArgs(*block, inst, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void Visit(Info& info, IR::Inst& inst) {
|
||||
void Visit(Info& info, const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::GetAttribute:
|
||||
case IR::Opcode::GetAttributeU32:
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -21,4 +21,66 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp
|
|||
return swizzled;
|
||||
}
|
||||
|
||||
/// Applies a number conversion in the read direction.
|
||||
inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
switch (conversion) {
|
||||
case AmdGpu::NumberConversion::None:
|
||||
return value;
|
||||
case AmdGpu::NumberConversion::UintToUscaled:
|
||||
return ir.ConvertUToF(32, 32, ir.BitCast<U32>(value));
|
||||
case AmdGpu::NumberConversion::SintToSscaled:
|
||||
return ir.ConvertSToF(32, 32, ir.BitCast<U32>(value));
|
||||
case AmdGpu::NumberConversion::UnormToUbnorm:
|
||||
// Convert 0...1 to -1...1
|
||||
return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
inline Value ApplyReadNumberConversionVec4(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
if (conversion == AmdGpu::NumberConversion::None) {
|
||||
return value;
|
||||
}
|
||||
const auto x = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
|
||||
const auto y = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
|
||||
const auto z = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
|
||||
const auto w = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
/// Applies a number conversion in the write direction.
|
||||
inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
switch (conversion) {
|
||||
case AmdGpu::NumberConversion::None:
|
||||
return value;
|
||||
case AmdGpu::NumberConversion::UintToUscaled:
|
||||
// Need to return float type to maintain IR semantics.
|
||||
return ir.BitCast<F32>(U32{ir.ConvertFToU(32, value)});
|
||||
case AmdGpu::NumberConversion::SintToSscaled:
|
||||
// Need to return float type to maintain IR semantics.
|
||||
return ir.BitCast<F32>(U32{ir.ConvertFToS(32, value)});
|
||||
case AmdGpu::NumberConversion::UnormToUbnorm:
|
||||
// Convert -1...1 to 0...1
|
||||
return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
inline Value ApplyWriteNumberConversionVec4(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
if (conversion == AmdGpu::NumberConversion::None) {
|
||||
return value;
|
||||
}
|
||||
const auto x = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
|
||||
const auto y = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
|
||||
const auto z = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
|
||||
const auto w = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -24,6 +24,7 @@ struct Profile {
|
|||
bool support_explicit_workgroup_layout{};
|
||||
bool support_legacy_vertex_attributes{};
|
||||
bool supports_image_load_store_lod{};
|
||||
bool supports_native_cube_calc{};
|
||||
bool has_broken_spirv_clamp{};
|
||||
bool lower_left_origin_mode{};
|
||||
bool needs_manual_interpolation{};
|
||||
|
|
|
@ -180,6 +180,7 @@ struct FragmentRuntimeInfo {
|
|||
std::array<PsInput, 32> inputs;
|
||||
struct PsColorBuffer {
|
||||
AmdGpu::NumberFormat num_format;
|
||||
AmdGpu::NumberConversion num_conversion;
|
||||
AmdGpu::CompMapping swizzle;
|
||||
|
||||
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
||||
|
|
|
@ -32,6 +32,7 @@ struct BufferSpecialization {
|
|||
struct TextureBufferSpecialization {
|
||||
bool is_integer = false;
|
||||
AmdGpu::CompMapping dst_select{};
|
||||
AmdGpu::NumberConversion num_conversion{};
|
||||
|
||||
auto operator<=>(const TextureBufferSpecialization&) const = default;
|
||||
};
|
||||
|
@ -41,6 +42,7 @@ struct ImageSpecialization {
|
|||
bool is_integer = false;
|
||||
bool is_storage = false;
|
||||
AmdGpu::CompMapping dst_select{};
|
||||
AmdGpu::NumberConversion num_conversion{};
|
||||
|
||||
auto operator<=>(const ImageSpecialization&) const = default;
|
||||
};
|
||||
|
@ -107,15 +109,17 @@ struct StageSpecialization {
|
|||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
spec.num_conversion = sharp.GetNumberConversion();
|
||||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.type = sharp.GetBoundType();
|
||||
spec.type = sharp.GetViewType(desc.is_array);
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
spec.is_storage = desc.IsStorage(sharp);
|
||||
spec.is_storage = desc.is_written;
|
||||
if (spec.is_storage) {
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
}
|
||||
spec.num_conversion = sharp.GetNumberConversion();
|
||||
});
|
||||
ForEachSharp(binding, fmasks, info->fmasks,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
|
|
|
@ -20,9 +20,9 @@
|
|||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "shader_recompiler/params.h"
|
||||
#include "types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Vulkan {
|
||||
class Rasterizer;
|
||||
|
@ -899,7 +899,12 @@ struct Liverpool {
|
|||
// There is a small difference between T# and CB number types, account for it.
|
||||
return RemapNumberFormat(info.number_type == NumberFormat::SnormNz
|
||||
? NumberFormat::Srgb
|
||||
: info.number_type.Value());
|
||||
: info.number_type.Value(),
|
||||
info.format);
|
||||
}
|
||||
|
||||
[[nodiscard]] NumberConversion GetNumberConversion() const {
|
||||
return MapNumberConversion(info.number_type);
|
||||
}
|
||||
|
||||
[[nodiscard]] CompMapping Swizzle() const {
|
||||
|
@ -938,7 +943,7 @@ struct Liverpool {
|
|||
const auto swap_idx = static_cast<u32>(info.comp_swap.Value());
|
||||
const auto components_idx = NumComponents(info.format) - 1;
|
||||
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
|
||||
return RemapComponents(info.format, mrt_swizzle);
|
||||
return RemapSwizzle(info.format, mrt_swizzle);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ std::string_view NameOf(NumberFormat fmt) {
|
|||
return "Srgb";
|
||||
case NumberFormat::Ubnorm:
|
||||
return "Ubnorm";
|
||||
case NumberFormat::UbnromNz:
|
||||
case NumberFormat::UbnormNz:
|
||||
return "UbnormNz";
|
||||
case NumberFormat::Ubint:
|
||||
return "Ubint";
|
||||
|
|
|
@ -11,96 +11,6 @@
|
|||
|
||||
namespace AmdGpu {
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r : 3;
|
||||
CompSwizzle g : 3;
|
||||
CompSwizzle b : 3;
|
||||
CompSwizzle a : 3;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
|
||||
return format;
|
||||
}
|
||||
|
||||
inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10: {
|
||||
CompMapping result;
|
||||
result.r = components.b;
|
||||
result.g = components.g;
|
||||
result.b = components.r;
|
||||
result.a = components.a;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format10_10_10_2:
|
||||
case DataFormat::Format5_5_5_1: {
|
||||
CompMapping result;
|
||||
result.r = components.a;
|
||||
result.g = components.b;
|
||||
result.b = components.g;
|
||||
result.a = components.r;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
return components;
|
||||
}
|
||||
}
|
||||
|
||||
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
|
||||
struct Buffer {
|
||||
u64 base_address : 44;
|
||||
|
@ -140,17 +50,21 @@ struct Buffer {
|
|||
.b = CompSwizzle(dst_sel_z),
|
||||
.a = CompSwizzle(dst_sel_w),
|
||||
};
|
||||
return RemapComponents(DataFormat(data_format), dst_sel);
|
||||
return RemapSwizzle(DataFormat(data_format), dst_sel);
|
||||
}
|
||||
|
||||
NumberFormat GetNumberFmt() const noexcept {
|
||||
return RemapNumberFormat(NumberFormat(num_format));
|
||||
return RemapNumberFormat(NumberFormat(num_format), DataFormat(data_format));
|
||||
}
|
||||
|
||||
DataFormat GetDataFmt() const noexcept {
|
||||
return RemapDataFormat(DataFormat(data_format));
|
||||
}
|
||||
|
||||
NumberConversion GetNumberConversion() const noexcept {
|
||||
return MapNumberConversion(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
u32 GetStride() const noexcept {
|
||||
return stride;
|
||||
}
|
||||
|
@ -305,22 +219,22 @@ struct Image {
|
|||
.b = CompSwizzle(dst_sel_z),
|
||||
.a = CompSwizzle(dst_sel_w),
|
||||
};
|
||||
return RemapComponents(DataFormat(data_format), dst_sel);
|
||||
return RemapSwizzle(DataFormat(data_format), dst_sel);
|
||||
}
|
||||
|
||||
u32 Pitch() const {
|
||||
return pitch + 1;
|
||||
}
|
||||
|
||||
u32 NumLayers(bool is_array) const {
|
||||
u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1;
|
||||
if (GetType() == ImageType::Cube) {
|
||||
if (is_array) {
|
||||
slices = last_array + 1;
|
||||
ASSERT(slices % 6 == 0);
|
||||
} else {
|
||||
slices = 6;
|
||||
}
|
||||
[[nodiscard]] u32 NumLayers() const noexcept {
|
||||
// Depth is the number of layers for Array images.
|
||||
u32 slices = depth + 1;
|
||||
if (GetType() == ImageType::Color3D) {
|
||||
// Depth is the actual texture depth for 3D images.
|
||||
slices = 1;
|
||||
} else if (IsCube()) {
|
||||
// Depth is the number of full cubes for Cube images.
|
||||
slices *= 6;
|
||||
}
|
||||
if (pow2pad) {
|
||||
slices = std::bit_ceil(slices);
|
||||
|
@ -342,8 +256,12 @@ struct Image {
|
|||
return 1;
|
||||
}
|
||||
|
||||
bool IsCube() const noexcept {
|
||||
return static_cast<ImageType>(type) == ImageType::Cube;
|
||||
}
|
||||
|
||||
ImageType GetType() const noexcept {
|
||||
return static_cast<ImageType>(type);
|
||||
return IsCube() ? ImageType::Color2DArray : static_cast<ImageType>(type);
|
||||
}
|
||||
|
||||
DataFormat GetDataFmt() const noexcept {
|
||||
|
@ -351,7 +269,11 @@ struct Image {
|
|||
}
|
||||
|
||||
NumberFormat GetNumberFmt() const noexcept {
|
||||
return RemapNumberFormat(NumberFormat(num_format));
|
||||
return RemapNumberFormat(NumberFormat(num_format), DataFormat(data_format));
|
||||
}
|
||||
|
||||
NumberConversion GetNumberConversion() const noexcept {
|
||||
return MapNumberConversion(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
TilingMode GetTilingMode() const {
|
||||
|
@ -371,13 +293,48 @@ struct Image {
|
|||
GetDataFmt() <= DataFormat::FormatFmask64_8;
|
||||
}
|
||||
|
||||
bool IsPartialCubemap() const {
|
||||
const auto viewed_slice = last_array - base_array + 1;
|
||||
return GetType() == ImageType::Cube && viewed_slice < 6;
|
||||
[[nodiscard]] ImageType GetViewType(const bool is_array) const noexcept {
|
||||
const auto base_type = GetType();
|
||||
if (IsCube()) {
|
||||
// Cube needs to remain array type regardless of instruction array specifier.
|
||||
return base_type;
|
||||
}
|
||||
if (base_type == ImageType::Color1DArray && !is_array) {
|
||||
return ImageType::Color1D;
|
||||
}
|
||||
if (base_type == ImageType::Color2DArray && !is_array) {
|
||||
return ImageType::Color2D;
|
||||
}
|
||||
if (base_type == ImageType::Color2DMsaaArray && !is_array) {
|
||||
return ImageType::Color2DMsaa;
|
||||
}
|
||||
return base_type;
|
||||
}
|
||||
|
||||
ImageType GetBoundType() const noexcept {
|
||||
return IsPartialCubemap() ? ImageType::Color2DArray : GetType();
|
||||
[[nodiscard]] u32 NumViewLevels(const bool is_array) const noexcept {
|
||||
switch (GetViewType(is_array)) {
|
||||
case ImageType::Color2DMsaa:
|
||||
case ImageType::Color2DMsaaArray:
|
||||
return 1;
|
||||
default:
|
||||
// Constrain to actual number of available levels.
|
||||
const auto max_level = std::min<u32>(last_level + 1, NumLevels());
|
||||
return max_level > base_level ? max_level - base_level : 1;
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NumViewLayers(const bool is_array) const noexcept {
|
||||
switch (GetViewType(is_array)) {
|
||||
case ImageType::Color1D:
|
||||
case ImageType::Color2D:
|
||||
case ImageType::Color2DMsaa:
|
||||
case ImageType::Color3D:
|
||||
return 1;
|
||||
default:
|
||||
// Constrain to actual number of available layers.
|
||||
const auto max_array = std::min<u32>(last_array + 1, NumLayers());
|
||||
return max_array > base_array ? max_array - base_array : 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Image) == 32); // 256bits
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <string_view>
|
||||
#include <fmt/format.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
@ -177,11 +178,138 @@ enum class NumberFormat : u32 {
|
|||
Float = 7,
|
||||
Srgb = 9,
|
||||
Ubnorm = 10,
|
||||
UbnromNz = 11,
|
||||
UbnormNz = 11,
|
||||
Ubint = 12,
|
||||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
enum class NumberConversion : u32 {
|
||||
None,
|
||||
UintToUscaled,
|
||||
SintToSscaled,
|
||||
UnormToUbnorm,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r : 3;
|
||||
CompSwizzle g : 3;
|
||||
CompSwizzle b : 3;
|
||||
CompSwizzle a : 3;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberFormat::Uint;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberFormat::Sint;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberFormat::Unorm;
|
||||
case NumberFormat::Float:
|
||||
if (data_format == DataFormat::Format8) {
|
||||
// Games may ask for 8-bit float when they want to access the stencil component
|
||||
// of a depth-stencil image. Change to unsigned int to match the stencil format.
|
||||
// This is also the closest approximation to pass the bits through unconverted.
|
||||
return NumberFormat::Uint;
|
||||
}
|
||||
[[fallthrough]];
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.b;
|
||||
result.g = swizzle.g;
|
||||
result.b = swizzle.r;
|
||||
result.a = swizzle.a;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format10_10_10_2:
|
||||
case DataFormat::Format5_5_5_1: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.a;
|
||||
result.g = swizzle.b;
|
||||
result.b = swizzle.g;
|
||||
result.a = swizzle.r;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
return swizzle;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberConversion MapNumberConversion(const NumberFormat format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberConversion::UintToUscaled;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberConversion::SintToSscaled;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberConversion::UnormToUbnorm;
|
||||
default:
|
||||
return NumberConversion::None;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
||||
template <>
|
||||
|
|
|
@ -119,19 +119,23 @@ public:
|
|||
return buffer;
|
||||
}
|
||||
|
||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask,
|
||||
vk::PipelineStageFlagBits2 dst_stage) {
|
||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(
|
||||
vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
|
||||
u32 offset = 0) {
|
||||
if (dst_acess_mask == access_mask && stage == dst_stage) {
|
||||
return {};
|
||||
}
|
||||
|
||||
DEBUG_ASSERT(offset < size_bytes);
|
||||
|
||||
auto barrier = vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = stage,
|
||||
.srcAccessMask = access_mask,
|
||||
.dstStageMask = dst_stage,
|
||||
.dstAccessMask = dst_acess_mask,
|
||||
.buffer = buffer.buffer,
|
||||
.size = size_bytes,
|
||||
.offset = offset,
|
||||
.size = size_bytes - offset,
|
||||
};
|
||||
access_mask = dst_acess_mask;
|
||||
stage = dst_stage;
|
||||
|
@ -150,8 +154,10 @@ public:
|
|||
Vulkan::Scheduler* scheduler;
|
||||
MemoryUsage usage;
|
||||
UniqueBuffer buffer;
|
||||
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone};
|
||||
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone};
|
||||
vk::Flags<vk::AccessFlagBits2> access_mask{
|
||||
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
|
||||
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite};
|
||||
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands};
|
||||
};
|
||||
|
||||
class StreamBuffer : public Buffer {
|
||||
|
|
|
@ -10,13 +10,13 @@
|
|||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
static constexpr size_t NumVertexBuffers = 32;
|
||||
static constexpr size_t GdsBufferSize = 64_KB;
|
||||
static constexpr size_t StagingBufferSize = 1_GB;
|
||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||
|
@ -34,21 +34,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
|||
|
||||
// Ensure the first slot is used for the null buffer
|
||||
const auto null_id =
|
||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1);
|
||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 16);
|
||||
ASSERT(null_id.index == 0);
|
||||
const vk::Buffer& null_buffer = slot_buffers[null_id].buffer;
|
||||
Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer");
|
||||
|
||||
const vk::BufferViewCreateInfo null_view_ci = {
|
||||
.buffer = null_buffer,
|
||||
.format = vk::Format::eR8Unorm,
|
||||
.offset = 0,
|
||||
.range = VK_WHOLE_SIZE,
|
||||
};
|
||||
const auto [null_view_result, null_view] = instance.GetDevice().createBufferView(null_view_ci);
|
||||
ASSERT_MSG(null_view_result == vk::Result::eSuccess, "Failed to create null buffer view.");
|
||||
null_buffer_view = null_view;
|
||||
Vulkan::SetObjectName(instance.GetDevice(), null_buffer_view, "Null Buffer View");
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() = default;
|
||||
|
@ -100,35 +89,22 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
|||
}
|
||||
}
|
||||
|
||||
bool BufferCache::BindVertexBuffers(
|
||||
const Shader::Info& vs_info, const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) {
|
||||
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
|
||||
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
|
||||
SCOPE_EXIT {
|
||||
if (instance.IsVertexInputDynamicState()) {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.setVertexInputEXT(bindings, attributes);
|
||||
} else if (bindings.empty()) {
|
||||
// Required to call bindVertexBuffers2EXT at least once in the current command buffer
|
||||
// with non-null strides without a non-dynamic stride pipeline in between. Thus even
|
||||
// when nothing is bound we still need to make a dummy call. Non-null strides in turn
|
||||
// requires a count greater than 0.
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const std::array null_buffers = {GetBuffer(NULL_BUFFER_ID).buffer.buffer};
|
||||
constexpr std::array null_offsets = {static_cast<vk::DeviceSize>(0)};
|
||||
cmdbuf.bindVertexBuffers2EXT(0, null_buffers, null_offsets, null_offsets, null_offsets);
|
||||
}
|
||||
};
|
||||
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
||||
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
|
||||
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
|
||||
Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
|
||||
pipeline.GetVertexInputs(attributes, bindings, guest_buffers);
|
||||
|
||||
if (!fetch_shader || fetch_shader->attributes.empty()) {
|
||||
return false;
|
||||
if (instance.IsVertexInputDynamicState()) {
|
||||
// Update current vertex inputs.
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.setVertexInputEXT(bindings, attributes);
|
||||
}
|
||||
|
||||
std::array<vk::Buffer, NumVertexBuffers> host_buffers;
|
||||
std::array<vk::DeviceSize, NumVertexBuffers> host_offsets;
|
||||
std::array<vk::DeviceSize, NumVertexBuffers> host_sizes;
|
||||
std::array<vk::DeviceSize, NumVertexBuffers> host_strides;
|
||||
boost::container::static_vector<AmdGpu::Buffer, NumVertexBuffers> guest_buffers;
|
||||
if (bindings.empty()) {
|
||||
// If there are no bindings, there is nothing further to do.
|
||||
return;
|
||||
}
|
||||
|
||||
struct BufferRange {
|
||||
VAddr base_address;
|
||||
|
@ -136,61 +112,37 @@ bool BufferCache::BindVertexBuffers(
|
|||
vk::Buffer vk_buffer;
|
||||
u64 offset;
|
||||
|
||||
size_t GetSize() const {
|
||||
[[nodiscard]] size_t GetSize() const {
|
||||
return end_address - base_address;
|
||||
}
|
||||
};
|
||||
|
||||
// Calculate buffers memory overlaps
|
||||
bool has_step_rate = false;
|
||||
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{};
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
has_step_rate = true;
|
||||
continue;
|
||||
// Build list of ranges covering the requested buffers
|
||||
Vulkan::VertexInputs<BufferRange> ranges{};
|
||||
for (const auto& buffer : guest_buffers) {
|
||||
if (buffer.GetSize() > 0) {
|
||||
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
||||
}
|
||||
}
|
||||
|
||||
const auto& buffer = attrib.GetSharp(vs_info);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
guest_buffers.emplace_back(buffer);
|
||||
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
||||
attributes.push_back({
|
||||
.location = attrib.semantic,
|
||||
.binding = attrib.semantic,
|
||||
.format =
|
||||
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||
.offset = 0,
|
||||
// Merge connecting ranges together
|
||||
Vulkan::VertexInputs<BufferRange> ranges_merged{};
|
||||
if (!ranges.empty()) {
|
||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||
return lhv.base_address < rhv.base_address;
|
||||
});
|
||||
bindings.push_back({
|
||||
.binding = attrib.semantic,
|
||||
.stride = buffer.GetStride(),
|
||||
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
|
||||
? vk::VertexInputRate::eVertex
|
||||
: vk::VertexInputRate::eInstance,
|
||||
.divisor = 1,
|
||||
});
|
||||
}
|
||||
if (ranges.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||
return lhv.base_address < rhv.base_address;
|
||||
});
|
||||
|
||||
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges_merged{ranges[0]};
|
||||
for (auto range : ranges) {
|
||||
auto& prev_range = ranges_merged.back();
|
||||
if (prev_range.end_address < range.base_address) {
|
||||
ranges_merged.emplace_back(range);
|
||||
} else {
|
||||
prev_range.end_address = std::max(prev_range.end_address, range.end_address);
|
||||
ranges_merged.emplace_back(ranges[0]);
|
||||
for (auto range : ranges) {
|
||||
auto& prev_range = ranges_merged.back();
|
||||
if (prev_range.end_address < range.base_address) {
|
||||
ranges_merged.emplace_back(range);
|
||||
} else {
|
||||
prev_range.end_address = std::max(prev_range.end_address, range.end_address);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Map buffers
|
||||
// Map buffers for merged ranges
|
||||
for (auto& range : ranges_merged) {
|
||||
const auto [buffer, offset] = ObtainBuffer(range.base_address, range.GetSize(), false);
|
||||
range.vk_buffer = buffer->buffer;
|
||||
|
@ -198,32 +150,39 @@ bool BufferCache::BindVertexBuffers(
|
|||
}
|
||||
|
||||
// Bind vertex buffers
|
||||
const size_t num_buffers = guest_buffers.size();
|
||||
for (u32 i = 0; i < num_buffers; ++i) {
|
||||
const auto& buffer = guest_buffers[i];
|
||||
const auto host_buffer = std::ranges::find_if(ranges_merged, [&](const BufferRange& range) {
|
||||
return (buffer.base_address >= range.base_address &&
|
||||
buffer.base_address < range.end_address);
|
||||
});
|
||||
ASSERT(host_buffer != ranges_merged.cend());
|
||||
|
||||
host_buffers[i] = host_buffer->vk_buffer;
|
||||
host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address;
|
||||
host_sizes[i] = buffer.GetSize();
|
||||
host_strides[i] = buffer.GetStride();
|
||||
}
|
||||
|
||||
if (num_buffers > 0) {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
if (instance.IsVertexInputDynamicState()) {
|
||||
cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
|
||||
Vulkan::VertexInputs<vk::Buffer> host_buffers;
|
||||
Vulkan::VertexInputs<vk::DeviceSize> host_offsets;
|
||||
Vulkan::VertexInputs<vk::DeviceSize> host_sizes;
|
||||
Vulkan::VertexInputs<vk::DeviceSize> host_strides;
|
||||
const auto null_buffer =
|
||||
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : GetBuffer(NULL_BUFFER_ID).Handle();
|
||||
for (const auto& buffer : guest_buffers) {
|
||||
if (buffer.GetSize() > 0) {
|
||||
const auto host_buffer_info =
|
||||
std::ranges::find_if(ranges_merged, [&](const BufferRange& range) {
|
||||
return buffer.base_address >= range.base_address &&
|
||||
buffer.base_address < range.end_address;
|
||||
});
|
||||
ASSERT(host_buffer_info != ranges_merged.cend());
|
||||
host_buffers.emplace_back(host_buffer_info->vk_buffer);
|
||||
host_offsets.push_back(host_buffer_info->offset + buffer.base_address -
|
||||
host_buffer_info->base_address);
|
||||
} else {
|
||||
cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(),
|
||||
host_sizes.data(), host_strides.data());
|
||||
host_buffers.emplace_back(null_buffer);
|
||||
host_offsets.push_back(0);
|
||||
}
|
||||
host_sizes.push_back(buffer.GetSize());
|
||||
host_strides.push_back(buffer.GetStride());
|
||||
}
|
||||
|
||||
return has_step_rate;
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const auto num_buffers = guest_buffers.size();
|
||||
if (instance.IsVertexInputDynamicState()) {
|
||||
cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
|
||||
} else {
|
||||
cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(),
|
||||
host_sizes.data(), host_strides.data());
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCache::BindIndexBuffer(u32 index_offset) {
|
||||
|
@ -479,43 +438,36 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
|||
};
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const std::array pre_barriers = {
|
||||
vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.buffer = overlap.Handle(),
|
||||
.offset = 0,
|
||||
.size = overlap.SizeBytes(),
|
||||
},
|
||||
};
|
||||
const std::array post_barriers = {
|
||||
vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = overlap.Handle(),
|
||||
.offset = 0,
|
||||
.size = overlap.SizeBytes(),
|
||||
},
|
||||
vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = new_buffer.Handle(),
|
||||
.offset = dst_base_offset,
|
||||
.size = overlap.SizeBytes(),
|
||||
},
|
||||
};
|
||||
|
||||
boost::container::static_vector<vk::BufferMemoryBarrier2, 2> pre_barriers{};
|
||||
if (auto src_barrier = overlap.GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
||||
vk::PipelineStageFlagBits2::eTransfer)) {
|
||||
pre_barriers.push_back(*src_barrier);
|
||||
}
|
||||
if (auto dst_barrier =
|
||||
new_buffer.GetBarrier(vk::AccessFlagBits2::eTransferWrite,
|
||||
vk::PipelineStageFlagBits2::eTransfer, dst_base_offset)) {
|
||||
pre_barriers.push_back(*dst_barrier);
|
||||
}
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.bufferMemoryBarrierCount = static_cast<u32>(pre_barriers.size()),
|
||||
.pBufferMemoryBarriers = pre_barriers.data(),
|
||||
});
|
||||
|
||||
cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
|
||||
|
||||
boost::container::static_vector<vk::BufferMemoryBarrier2, 2> post_barriers{};
|
||||
if (auto src_barrier =
|
||||
overlap.GetBarrier(vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
vk::PipelineStageFlagBits2::eAllCommands)) {
|
||||
post_barriers.push_back(*src_barrier);
|
||||
}
|
||||
if (auto dst_barrier = new_buffer.GetBarrier(
|
||||
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
vk::PipelineStageFlagBits2::eAllCommands, dst_base_offset)) {
|
||||
post_barriers.push_back(*dst_barrier);
|
||||
}
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
|
||||
|
@ -626,7 +578,8 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
|||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
|
||||
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
|
|
|
@ -5,8 +5,6 @@
|
|||
|
||||
#include <shared_mutex>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/slot_vector.h"
|
||||
#include "common/types.h"
|
||||
|
@ -26,6 +24,10 @@ struct FetchShaderData;
|
|||
struct Info;
|
||||
} // namespace Shader
|
||||
|
||||
namespace Vulkan {
|
||||
class GraphicsPipeline;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
using BufferId = Common::SlotId;
|
||||
|
@ -71,16 +73,11 @@ public:
|
|||
return slot_buffers[id];
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::BufferView& NullBufferView() {
|
||||
return null_buffer_view;
|
||||
}
|
||||
|
||||
/// Invalidates any buffer in the logical page range.
|
||||
void InvalidateMemory(VAddr device_addr, u64 size);
|
||||
|
||||
/// Binds host vertex buffers for the current draw.
|
||||
bool BindVertexBuffers(const Shader::Info& vs_info,
|
||||
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);
|
||||
void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
|
||||
|
||||
/// Bind host index buffer for the current draw.
|
||||
void BindIndexBuffer(u32 index_offset);
|
||||
|
@ -160,7 +157,6 @@ private:
|
|||
std::shared_mutex mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
RangeSet gpu_modified_ranges;
|
||||
vk::BufferView null_buffer_view;
|
||||
MemoryTracker memory_tracker;
|
||||
PageTable page_table;
|
||||
};
|
||||
|
|
|
@ -447,7 +447,7 @@ static constexpr vk::FormatFeatureFlags2 GetNumberFormatFeatureFlags(
|
|||
case AmdGpu::NumberFormat::Srgb:
|
||||
return ImageRead | Mrt;
|
||||
case AmdGpu::NumberFormat::Ubnorm:
|
||||
case AmdGpu::NumberFormat::UbnromNz:
|
||||
case AmdGpu::NumberFormat::UbnormNz:
|
||||
case AmdGpu::NumberFormat::Ubint:
|
||||
case AmdGpu::NumberFormat::Ubscaled:
|
||||
return ImageRead;
|
||||
|
@ -468,6 +468,7 @@ static constexpr SurfaceFormatInfo CreateSurfaceFormatInfo(const AmdGpu::DataFor
|
|||
}
|
||||
|
||||
std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
// Uscaled, Sscaled, and Ubnorm formats are automatically remapped and handled in shader.
|
||||
static constexpr std::array formats{
|
||||
// Invalid
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Unorm,
|
||||
|
@ -490,7 +491,7 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubnorm,
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnromNz,
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnormNz,
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubint,
|
||||
vk::Format::eUndefined),
|
||||
|
@ -501,10 +502,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -516,10 +513,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -531,10 +524,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR8G8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8G8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8G8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8G8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8G8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -553,10 +542,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR16G16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16G16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR16G16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR16G16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16G16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -573,10 +558,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eA2B10G10R10UnormPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eA2B10G10R10SnormPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eA2B10G10R10UscaledPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eA2B10G10R10SscaledPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eA2B10G10R10UintPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -586,10 +567,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR8G8B8A8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8G8B8A8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8G8B8A8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8G8B8A8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8G8B8A8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -608,10 +585,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR16G16B16A16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16G16B16A16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
|
||||
AmdGpu::NumberFormat::Uscaled, vk::Format::eR16G16B16A16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
|
||||
AmdGpu::NumberFormat::Sscaled, vk::Format::eR16G16B16A16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16G16B16A16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Sint,
|
||||
|
|
|
@ -18,6 +18,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
|
||||
auto& info = stages[int(Shader::LogicalStage::Compute)];
|
||||
info = &info_;
|
||||
const auto debug_str = GetDebugString();
|
||||
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
|
@ -58,9 +59,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
for (const auto& image : info->images) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = image.IsStorage(image.GetSharp(*info))
|
||||
? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
|
@ -89,8 +89,9 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
};
|
||||
const auto device = instance.GetDevice();
|
||||
auto [descriptor_set_result, descriptor_set] =
|
||||
instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||
device.createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||
ASSERT_MSG(descriptor_set_result == vk::Result::eSuccess,
|
||||
"Failed to create compute descriptor set layout: {}",
|
||||
vk::to_string(descriptor_set_result));
|
||||
|
@ -107,6 +108,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
ASSERT_MSG(layout_result == vk::Result::eSuccess,
|
||||
"Failed to create compute pipeline layout: {}", vk::to_string(layout_result));
|
||||
pipeline_layout = std::move(layout);
|
||||
SetObjectName(device, *pipeline_layout, "Compute PipelineLayout {}", debug_str);
|
||||
|
||||
const vk::ComputePipelineCreateInfo compute_pipeline_ci = {
|
||||
.stage = shader_ci,
|
||||
|
@ -117,6 +119,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}",
|
||||
vk::to_string(pipeline_result));
|
||||
pipeline = std::move(pipe);
|
||||
SetObjectName(device, *pipeline, "Compute Pipeline {}", debug_str);
|
||||
}
|
||||
|
||||
ComputePipeline::~ComputePipeline() = default;
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
@ -16,6 +15,7 @@
|
|||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
@ -36,6 +36,7 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
const vk::Device device = instance.GetDevice();
|
||||
std::ranges::copy(infos, stages.begin());
|
||||
BuildDescSetLayout();
|
||||
const auto debug_str = GetDebugString();
|
||||
|
||||
const vk::PushConstantRange push_constants = {
|
||||
.stageFlags = gp_stage_flags,
|
||||
|
@ -54,36 +55,13 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
ASSERT_MSG(layout_result == vk::Result::eSuccess,
|
||||
"Failed to create graphics pipeline layout: {}", vk::to_string(layout_result));
|
||||
pipeline_layout = std::move(layout);
|
||||
SetObjectName(device, *pipeline_layout, "Graphics PipelineLayout {}", debug_str);
|
||||
|
||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
||||
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
// Skip attribute binding as the data will be pulled by shader
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto buffer = attrib.GetSharp(vs_info);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
vertex_attributes.push_back({
|
||||
.location = attrib.semantic,
|
||||
.binding = attrib.semantic,
|
||||
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||
.offset = 0,
|
||||
});
|
||||
vertex_bindings.push_back({
|
||||
.binding = attrib.semantic,
|
||||
.stride = buffer.GetStride(),
|
||||
.inputRate =
|
||||
attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
|
||||
? vk::VertexInputRate::eVertex
|
||||
: vk::VertexInputRate::eInstance,
|
||||
});
|
||||
}
|
||||
VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
|
||||
VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
|
||||
VertexInputs<AmdGpu::Buffer> guest_buffers;
|
||||
if (!instance.IsVertexInputDynamicState()) {
|
||||
GetVertexInputs(vertex_attributes, vertex_bindings, guest_buffers);
|
||||
}
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
|
@ -159,7 +137,7 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
}
|
||||
if (instance.IsVertexInputDynamicState()) {
|
||||
dynamic_states.push_back(vk::DynamicState::eVertexInputEXT);
|
||||
} else {
|
||||
} else if (!vertex_bindings.empty()) {
|
||||
dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStrideEXT);
|
||||
}
|
||||
|
||||
|
@ -322,10 +300,56 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}",
|
||||
vk::to_string(pipeline_result));
|
||||
pipeline = std::move(pipe);
|
||||
SetObjectName(device, *pipeline, "Graphics Pipeline {}", debug_str);
|
||||
}
|
||||
|
||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||
|
||||
template <typename Attribute, typename Binding>
|
||||
void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
||||
VertexInputs<Binding>& bindings,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const {
|
||||
if (!fetch_shader || fetch_shader->attributes.empty()) {
|
||||
return;
|
||||
}
|
||||
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
// Skip attribute binding as the data will be pulled by shader.
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& buffer = attrib.GetSharp(vs_info);
|
||||
attributes.push_back(Attribute{
|
||||
.location = attrib.semantic,
|
||||
.binding = attrib.semantic,
|
||||
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||
.offset = 0,
|
||||
});
|
||||
bindings.push_back(Binding{
|
||||
.binding = attrib.semantic,
|
||||
.stride = buffer.GetStride(),
|
||||
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
|
||||
? vk::VertexInputRate::eVertex
|
||||
: vk::VertexInputRate::eInstance,
|
||||
});
|
||||
if constexpr (std::is_same_v<Binding, vk::VertexInputBindingDescription2EXT>) {
|
||||
bindings.back().divisor = 1;
|
||||
}
|
||||
guest_buffers.emplace_back(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Declare templated GetVertexInputs for necessary types.
|
||||
template void GraphicsPipeline::GetVertexInputs(
|
||||
VertexInputs<vk::VertexInputAttributeDescription>& attributes,
|
||||
VertexInputs<vk::VertexInputBindingDescription>& bindings,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
||||
template void GraphicsPipeline::GetVertexInputs(
|
||||
VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
|
||||
VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
||||
|
||||
void GraphicsPipeline::BuildDescSetLayout() {
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
u32 binding{};
|
||||
|
@ -364,9 +388,8 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
for (const auto& image : stage->images) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = image.IsStorage(image.GetSharp(*stage))
|
||||
? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = gp_stage_flags,
|
||||
});
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <xxhash.h>
|
||||
|
||||
#include "common/types.h"
|
||||
|
@ -27,11 +28,15 @@ class DescriptorHeap;
|
|||
|
||||
using Liverpool = AmdGpu::Liverpool;
|
||||
|
||||
template <typename T>
|
||||
using VertexInputs = boost::container::static_vector<T, MaxVertexBufferCount>;
|
||||
|
||||
struct GraphicsPipelineKey {
|
||||
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||
u32 num_color_attachments;
|
||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
|
||||
std::array<AmdGpu::NumberConversion, Liverpool::NumColorBuffers> color_num_conversions;
|
||||
std::array<AmdGpu::CompMapping, Liverpool::NumColorBuffers> color_swizzles;
|
||||
vk::Format depth_format;
|
||||
vk::Format stencil_format;
|
||||
|
@ -99,6 +104,11 @@ public:
|
|||
key.prim_type == AmdGpu::PrimitiveType::QuadList;
|
||||
}
|
||||
|
||||
/// Gets the attributes and bindings for vertex inputs.
|
||||
template <typename Attribute, typename Binding>
|
||||
void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
||||
|
||||
private:
|
||||
void BuildDescSetLayout();
|
||||
|
||||
|
|
|
@ -271,6 +271,7 @@ bool Instance::CreateDevice() {
|
|||
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
||||
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
|
||||
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
|
||||
|
||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||
// with extensions.
|
||||
|
|
|
@ -159,6 +159,11 @@ public:
|
|||
return image_load_store_lod;
|
||||
}
|
||||
|
||||
/// Returns true when VK_AMD_gcn_shader is supported.
|
||||
bool IsAmdGcnShaderSupported() const {
|
||||
return amd_gcn_shader;
|
||||
}
|
||||
|
||||
/// Returns true when geometry shaders are supported by the device
|
||||
bool IsGeometryStageSupported() const {
|
||||
return features.geometryShader;
|
||||
|
@ -334,6 +339,7 @@ private:
|
|||
bool list_restart{};
|
||||
bool legacy_vertex_attributes{};
|
||||
bool image_load_store_lod{};
|
||||
bool amd_gcn_shader{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
|
|
|
@ -168,6 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
|
||||
info.fs_info.color_buffers[i] = {
|
||||
.num_format = graphics_key.color_num_formats[i],
|
||||
.num_conversion = graphics_key.color_num_conversions[i],
|
||||
.swizzle = graphics_key.color_swizzles[i],
|
||||
};
|
||||
}
|
||||
|
@ -203,6 +204,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
.support_explicit_workgroup_layout = true,
|
||||
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
|
||||
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
|
||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
|
||||
|
@ -302,6 +304,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
key.num_color_attachments = 0;
|
||||
key.color_formats.fill(vk::Format::eUndefined);
|
||||
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
|
||||
key.color_num_conversions.fill(AmdGpu::NumberConversion::None);
|
||||
key.blend_controls.fill({});
|
||||
key.write_masks.fill({});
|
||||
key.color_swizzles.fill({});
|
||||
|
@ -330,6 +333,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
key.color_formats[remapped_cb] =
|
||||
LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
|
||||
key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt();
|
||||
key.color_num_conversions[remapped_cb] = col_buf.GetNumberConversion();
|
||||
key.color_swizzles[remapped_cb] = col_buf.Swizzle();
|
||||
}
|
||||
|
||||
|
@ -416,17 +420,17 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
}
|
||||
}
|
||||
|
||||
const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
|
||||
const auto* vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
|
||||
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||
// Without vertex input dynamic state, the pipeline needs to specialize on format.
|
||||
// Stride will still be handled outside the pipeline using dynamic state.
|
||||
u32 vertex_binding = 0;
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
// Skip attribute binding as the data will be pulled by shader.
|
||||
continue;
|
||||
}
|
||||
const auto& buffer = attrib.GetSharp(*vs_info);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
ASSERT(vertex_binding < MaxVertexBufferCount);
|
||||
key.vertex_buffer_formats[vertex_binding++] =
|
||||
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt());
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
@ -55,4 +56,19 @@ void Pipeline::BindResources(DescriptorWrites& set_writes, const BufferBarriers&
|
|||
cmdbuf.bindDescriptorSets(bind_point, *pipeline_layout, 0, desc_set, {});
|
||||
}
|
||||
|
||||
std::string Pipeline::GetDebugString() const {
|
||||
std::string stage_desc;
|
||||
for (const auto& stage : stages) {
|
||||
if (stage) {
|
||||
const auto shader_name = PipelineCache::GetShaderName(stage->stage, stage->pgm_hash);
|
||||
if (stage_desc.empty()) {
|
||||
stage_desc = shader_name;
|
||||
} else {
|
||||
stage_desc = fmt::format("{},{}", stage_desc, shader_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return stage_desc;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -61,6 +61,8 @@ public:
|
|||
const Shader::PushData& push_data) const;
|
||||
|
||||
protected:
|
||||
[[nodiscard]] std::string GetDebugString() const;
|
||||
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
DescriptorHeap& desc_heap;
|
||||
|
|
|
@ -248,9 +248,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
return;
|
||||
}
|
||||
|
||||
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
|
||||
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
|
||||
buffer_cache.BindVertexBuffers(*pipeline);
|
||||
if (is_indexed) {
|
||||
buffer_cache.BindIndexBuffer(index_offset);
|
||||
}
|
||||
|
@ -258,6 +256,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
BeginRendering(*pipeline, state);
|
||||
UpdateDynamicState(*pipeline);
|
||||
|
||||
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
|
||||
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||
const auto [vertex_offset, instance_offset] = GetDrawOffsets(regs, vs_info, fetch_shader);
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
@ -292,9 +292,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
|||
return;
|
||||
}
|
||||
|
||||
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
|
||||
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
|
||||
buffer_cache.BindVertexBuffers(*pipeline);
|
||||
if (is_indexed) {
|
||||
buffer_cache.BindIndexBuffer(0);
|
||||
}
|
||||
|
@ -537,6 +535,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
}
|
||||
|
||||
// Second pass to re-bind buffers that were updated after binding
|
||||
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
|
||||
for (u32 i = 0; i < buffer_bindings.size(); i++) {
|
||||
const auto& [buffer_id, vsharp] = buffer_bindings[i];
|
||||
const auto& desc = stage.buffers[i];
|
||||
|
@ -548,7 +547,6 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
} else if (instance.IsNullDescriptorSupported()) {
|
||||
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
|
||||
} else {
|
||||
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
|
||||
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
} else {
|
||||
|
@ -582,17 +580,19 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
++binding.buffer;
|
||||
}
|
||||
|
||||
const auto null_buffer_view =
|
||||
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
|
||||
for (u32 i = 0; i < texbuffer_bindings.size(); i++) {
|
||||
const auto& [buffer_id, vsharp] = texbuffer_bindings[i];
|
||||
const auto& desc = stage.texture_buffers[i];
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
|
||||
// Fallback format for null buffer view; never used in valid buffer case.
|
||||
const auto data_fmt = vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid
|
||||
? vsharp.GetDataFmt()
|
||||
: AmdGpu::DataFormat::Format8;
|
||||
const u32 fmt_stride = AmdGpu::NumBits(data_fmt) >> 3;
|
||||
vk::BufferView buffer_view;
|
||||
if (buffer_id) {
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
|
||||
vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
const u32 buf_stride = vsharp.GetStride();
|
||||
ASSERT_MSG(buf_stride % fmt_stride == 0,
|
||||
"Texel buffer stride must match format stride");
|
||||
|
@ -600,9 +600,8 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
const u32 adjust = offset - offset_aligned;
|
||||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride);
|
||||
buffer_view =
|
||||
vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust,
|
||||
desc.is_written, data_fmt, vsharp.GetNumberFmt());
|
||||
if (auto barrier =
|
||||
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||
: vk::AccessFlagBits2::eShaderRead,
|
||||
|
@ -612,6 +611,11 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
|
||||
}
|
||||
} else if (instance.IsNullDescriptorSupported()) {
|
||||
buffer_view = VK_NULL_HANDLE;
|
||||
} else {
|
||||
buffer_view =
|
||||
null_buffer.View(0, fmt_stride, desc.is_written, data_fmt, vsharp.GetNumberFmt());
|
||||
}
|
||||
|
||||
set_writes.push_back({
|
||||
|
@ -621,7 +625,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
.descriptorCount = 1,
|
||||
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.pTexelBufferView = &buffer_view,
|
||||
.pTexelBufferView = &buffer_views.emplace_back(buffer_view),
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
|
@ -655,7 +659,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
|
|||
if (image->binding.is_bound) {
|
||||
// The image is already bound. In case if it is about to be used as storage we need
|
||||
// to force general layout on it.
|
||||
image->binding.force_general |= image_desc.IsStorage(tsharp);
|
||||
image->binding.force_general |= image_desc.is_written;
|
||||
}
|
||||
if (image->binding.is_target) {
|
||||
// The image is already bound as target. Since we read and output to it need to force
|
||||
|
|
|
@ -153,7 +153,8 @@ vk::DescriptorSet DescriptorHeap::Commit(vk::DescriptorSetLayout set_layout) {
|
|||
}
|
||||
|
||||
// The pool has run out. Record current tick and place it in pending list.
|
||||
ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory,
|
||||
ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory ||
|
||||
result == vk::Result::eErrorFragmentedPool,
|
||||
"Unexpected error during descriptor set allocation {}", vk::to_string(result));
|
||||
pending_pools.emplace_back(curr_pool, master_semaphore->CurrentTick());
|
||||
if (const auto [pool, tick] = pending_pools.front(); master_semaphore->IsFree(tick)) {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue