Merge branch 'shadps4-emu:main' into v_max_legacy_f32

This commit is contained in:
Stolas 2024-07-08 10:19:19 +10:00 committed by GitHub
commit 6a4b11eae6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 429 additions and 118 deletions

3
.gitmodules vendored
View file

@ -58,6 +58,3 @@
[submodule "externals/tracy"]
path = externals/tracy
url = https://github.com/shadps4-emu/tracy.git
[submodule "externals/hwinfo"]
path = externals/hwinfo
url = https://github.com/lfreist/hwinfo.git

View file

@ -525,7 +525,7 @@ endif()
create_target_directory_groups(shadps4)
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient hwinfo::HWinfo)
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient)
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3)
if (NOT ENABLE_QT_GUI)

View file

@ -140,6 +140,3 @@ option(TRACY_ON_DEMAND "" ON)
option(TRACY_NO_FRAME_IMAGE "" ON)
option(TRACY_FIBERS "" ON) # For AmdGpu frontend profiling
add_subdirectory(tracy)
# hwinfo
add_subdirectory(hwinfo)

2
externals/fmt vendored

@ -1 +1 @@
Subproject commit bbf44cc000531dc7737d5321ccfa9f2f11b20127
Subproject commit a96259701e1d16e2f8c8299c668e863ec0ac0119

2
externals/glslang vendored

@ -1 +1 @@
Subproject commit 33c7e30860928bab3819c3abae8297b109a02f73
Subproject commit 5939e32b87487fa9c72ab336ebfcc5ae26d9ab6d

1
externals/hwinfo vendored

@ -1 +0,0 @@
Subproject commit 81ea6332fd4839890b1904f9668865145450f8da

@ -1 +1 @@
Subproject commit dd6a39d0ba1852cf06907e0f0573a2a10d23c2ad
Subproject commit ec1adc5763f684e0f32888b157bfc0b86021b8e9

2
externals/sdl3 vendored

@ -1 +1 @@
Subproject commit b72c22340e9a3c680011e245c28492bf60f5be66
Subproject commit f9a06c20ed85fb1d6754fc2280d6183382217910

2
externals/vma vendored

@ -1 +1 @@
Subproject commit feb11e172715011ef2a7b3b6c7c8737337b34181
Subproject commit 257138b8f5686ae84491a3df9f90a77d5660c3bd

2
externals/zlib-ng vendored

@ -1 +1 @@
Subproject commit 80514c17b384df68fbe83cca69ece0521b85f708
Subproject commit d54e3769be0c522015b784eca2af258b1c026107

View file

@ -18,7 +18,7 @@ static std::string logFilter;
static std::string logType = "sync";
static bool isDebugDump = false;
static bool isLibc = true;
static bool isShowSplash = true;
static bool isShowSplash = false;
static bool isNullGpu = false;
static bool shouldDumpShaders = false;
static bool shouldDumpPM4 = false;

View file

@ -3,6 +3,7 @@
#include "common/assert.h"
#include "common/config.h"
#include "common/debug.h"
#include "common/logging/log.h"
#include "common/path_util.h"
#include "common/slot_vector.h"
@ -264,6 +265,7 @@ static_assert(CtxInitSequence400.size() == 0x61);
// In case if `submitDone` is issued we need to block submissions until GPU idle
static u32 submission_lock{};
std::condition_variable cv_lock{};
static std::mutex m_submission{};
static u64 frames_submitted{}; // frame counter
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
@ -277,6 +279,18 @@ struct AscQueueInfo {
static Common::SlotVector<AscQueueInfo> asc_queues{};
static constexpr VAddr tessellation_factors_ring_addr = 0xFF0000000ULL;
static void ResetSubmissionLock(Platform::InterruptId irq) {
std::unique_lock lock{m_submission};
submission_lock = 0;
cv_lock.notify_all();
}
static void WaitGpuIdle() {
HLE_TRACE;
std::unique_lock lock{m_submission};
cv_lock.wait(lock, [] { return submission_lock == 0; });
}
static void DumpCommandList(std::span<const u32> cmd_list, const std::string& postfix) {
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::PM4Dir);
@ -465,14 +479,9 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
return;
}
std::unique_lock lock{m_submission};
if (submission_lock != 0) {
liverpool->WaitGpuIdle();
WaitGpuIdle();
// Suspend logic goes here
submission_lock = 0;
}
/* Suspend logic goes here */
auto vqid = gnm_vqid - 1;
auto& asc_queue = asc_queues[{vqid}];
@ -863,9 +872,9 @@ int PS4_SYSV_ABI sceGnmEndWorkload() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceGnmFindResourcesPublic() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK;
s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() {
LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_GNM_ERROR_FAILURE; // not available in retail FW
}
void PS4_SYSV_ABI sceGnmFlushGarlic() {
@ -1321,7 +1330,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id,
if (shader_id > 1) {
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
return 0x8eee00ff;
return ORBIS_GNM_ERROR_FAILURE;
}
// clang-format off
@ -1391,7 +1400,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
if (shader_id != 0) {
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
return 0x8eee00ff;
return ORBIS_GNM_ERROR_FAILURE;
}
// A fullscreen triangle with one uv set
@ -1930,13 +1939,9 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
}
}
if (submission_lock != 0) {
liverpool->WaitGpuIdle();
WaitGpuIdle();
// Suspend logic goes here
submission_lock = 0;
}
/* Suspend logic goes here */
if (send_init_packet) {
if (sdk_version <= 0x1ffffffu) {
@ -1990,7 +1995,6 @@ int PS4_SYSV_ABI sceGnmSubmitDone() {
if (!liverpool->IsGpuIdle()) {
submission_lock = true;
}
liverpool->NotifySubmitDone();
send_init_packet = true;
++frames_submitted;
return ORBIS_OK;
@ -2471,6 +2475,9 @@ void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) {
sdk_version = 0;
}
Platform::IrqC::Instance()->Register(Platform::InterruptId::GpuIdle, ResetSubmissionLock,
nullptr);
LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent);
LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1,
sceGnmAreSubmitsAllowed);

View file

@ -75,7 +75,7 @@ int PS4_SYSV_ABI sceGnmDriverInternalVirtualQuery();
int PS4_SYSV_ABI sceGnmDriverTraceInProgress();
int PS4_SYSV_ABI sceGnmDriverTriggerCapture();
int PS4_SYSV_ABI sceGnmEndWorkload();
int PS4_SYSV_ABI sceGnmFindResourcesPublic();
s32 PS4_SYSV_ABI sceGnmFindResourcesPublic();
void PS4_SYSV_ABI sceGnmFlushGarlic();
int PS4_SYSV_ABI sceGnmGetCoredumpAddress();
int PS4_SYSV_ABI sceGnmGetCoredumpMode();

View file

@ -840,6 +840,31 @@ int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr,
return result;
}
int PS4_SYSV_ABI scePthreadMutexTimedlock(ScePthreadMutex* mutex, u64 usec) {
mutex = createMutex(mutex);
if (mutex == nullptr) {
return SCE_KERNEL_ERROR_EINVAL;
}
timespec time{};
time.tv_sec = usec / 1000000;
time.tv_nsec = ((usec % 1000000) * 1000);
int result = pthread_mutex_timedlock(&(*mutex)->pth_mutex, &time);
switch (result) {
case 0:
return SCE_OK;
case ETIMEDOUT:
return SCE_KERNEL_ERROR_ETIMEDOUT;
case EINTR:
return SCE_KERNEL_ERROR_EINTR;
case EAGAIN:
return SCE_KERNEL_ERROR_EAGAIN;
default:
return SCE_KERNEL_ERROR_EINVAL;
}
}
static int pthread_copy_attributes(ScePthreadAttr* dst, const ScePthreadAttr* src) {
if (dst == nullptr || *dst == nullptr || src == nullptr || *src == nullptr) {
return SCE_KERNEL_ERROR_EINVAL;
@ -1362,6 +1387,8 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("9UK1vLZQft4", "libkernel", 1, "libkernel", 1, 1, scePthreadMutexLock);
LIB_FUNCTION("tn3VlD0hG60", "libkernel", 1, "libkernel", 1, 1, scePthreadMutexUnlock);
LIB_FUNCTION("upoVrzMHFeE", "libkernel", 1, "libkernel", 1, 1, scePthreadMutexTrylock);
LIB_FUNCTION("IafI2PxcPnQ", "libkernel", 1, "libkernel", 1, 1, scePthreadMutexTimedlock);
// cond calls
LIB_FUNCTION("2Tb92quprl0", "libkernel", 1, "libkernel", 1, 1, scePthreadCondInit);
LIB_FUNCTION("m5-2bsNfv7s", "libkernel", 1, "libkernel", 1, 1, scePthreadCondattrInit);

View file

@ -243,6 +243,7 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags,
if (vma.type == VMAType::Direct) {
const auto dmem_it = FindDmemArea(vma.phys_base);
ASSERT(dmem_it != dmem_map.end());
info->offset = vma.phys_base;
info->memory_type = dmem_it->second.memory_type;
}

View file

@ -26,6 +26,7 @@ enum class InterruptId : u32 {
Compute6RelMem = 6u,
GfxEop = 7u,
GfxFlip = 8u,
GpuIdle = 9u,
};
using IrqHandler = std::function<void(InterruptId)>;

View file

@ -22,7 +22,6 @@
#include "core/linker.h"
#include "core/memory.h"
#include "emulator.h"
#include "hwinfo/hwinfo.h"
Frontend::WindowSDL* g_window = nullptr;
@ -43,7 +42,6 @@ Emulator::Emulator()
Common::Log::Initialize();
Common::Log::Start();
LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::VERSION);
PrintSystemInfo();
}
Emulator::~Emulator() {
@ -181,17 +179,4 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file) {
}
}
void Emulator::PrintSystemInfo() {
auto cpus = hwinfo::getAllCPUs();
for (const auto& cpu : cpus) {
LOG_INFO(Loader, "CPU #{} {}", cpu.id(), cpu.modelName());
}
hwinfo::OS os;
LOG_INFO(Loader, "{}", os.name());
auto gpus = hwinfo::getAllGPUs();
for (auto& gpu : gpus) {
LOG_INFO(Loader, "GPU #{} {}", gpu.id(), gpu.name());
}
}
} // namespace Core

View file

@ -29,7 +29,6 @@ public:
private:
void LoadSystemModules(const std::filesystem::path& file);
void PrintSystemInfo();
Core::MemoryManager* memory;
Input::GameController* controller = Common::Singleton<Input::GameController>::Instance();

View file

@ -3,15 +3,31 @@
#pragma once
#include <QCoreApplication>
#include <QDesktopServices>
#include <QFile>
#include <QHeaderView>
#include <QImage>
#include <QMenu>
#include <QMessageBox>
#include <QPixmap>
#include <QStandardPaths>
#include <QTableWidget>
#include <QTextStream>
#include <QTreeWidget>
#include <QTreeWidgetItem>
#include "game_info.h"
#include "trophy_viewer.h"
#ifdef Q_OS_WIN
#include <ShlObj.h>
#include <Windows.h>
#include <objbase.h>
#include <shlguid.h>
#include <shobjidl.h>
#endif
class GuiContextMenus : public QObject {
Q_OBJECT
public:
@ -27,13 +43,16 @@ public:
// Setup menu.
QMenu menu(widget);
QAction createShortcut("Create Shortcut", widget);
QAction openFolder("Open Game Folder", widget);
QAction openSfoViewer("SFO Viewer", widget);
QAction openTrophyViewer("Trophy Viewer", widget);
menu.addAction(&createShortcut);
menu.addAction(&openFolder);
menu.addAction(&openSfoViewer);
menu.addAction(&openTrophyViewer);
// Show menu.
auto selected = menu.exec(global_pos);
if (!selected) {
@ -105,6 +124,69 @@ public:
connect(widget->parent(), &QWidget::destroyed, trophyViewer,
[widget, trophyViewer]() { trophyViewer->deleteLater(); });
}
if (selected == &createShortcut) {
QString targetPath = QString::fromStdString(m_games[itemID].path);
QString ebootPath = targetPath + "/eboot.bin";
// Get the full path to the icon
QString iconPath = QString::fromStdString(m_games[itemID].icon_path);
QFileInfo iconFileInfo(iconPath);
QString icoPath = iconFileInfo.absolutePath() + "/" + iconFileInfo.baseName() + ".ico";
// Path to shortcut/link
QString linkPath;
// Path to the shadps4.exe executable
QString exePath;
#ifdef Q_OS_WIN
linkPath = QStandardPaths::writableLocation(QStandardPaths::DesktopLocation) + "/" +
QString::fromStdString(m_games[itemID].name) + ".lnk";
exePath = QCoreApplication::applicationFilePath().replace("\\", "/");
#else
linkPath = QStandardPaths::writableLocation(QStandardPaths::DesktopLocation) + "/" +
QString::fromStdString(m_games[itemID].name) + ".desktop";
#endif
// Convert the icon to .ico if necessary
if (iconFileInfo.suffix().toLower() == "png") {
// Convert icon from PNG to ICO
if (convertPngToIco(iconPath, icoPath)) {
#ifdef Q_OS_WIN
if (createShortcutWin(linkPath, ebootPath, icoPath, exePath)) {
#else
if (createShortcutLinux(linkPath, ebootPath, iconPath)) {
#endif
QMessageBox::information(
nullptr, "Shortcut Creation",
QString("Shortcut created successfully:\n %1").arg(linkPath));
} else {
QMessageBox::critical(
nullptr, "Error",
QString("Error creating shortcut:\n %1").arg(linkPath));
}
} else {
QMessageBox::critical(nullptr, "Error", "Failed to convert icon.");
}
} else {
// If the icon is already in ICO format, we just create the shortcut
#ifdef Q_OS_WIN
if (createShortcutWin(linkPath, ebootPath, iconPath, exePath)) {
#else
if (createShortcutLinux(linkPath, ebootPath, iconPath)) {
#endif
QMessageBox::information(
nullptr, "Shortcut Creation",
QString("Shortcut created successfully:\n %1").arg(linkPath));
} else {
QMessageBox::critical(nullptr, "Error",
QString("Error creating shortcut:\n %1").arg(linkPath));
}
}
}
}
int GetRowIndex(QTreeWidget* treeWidget, QTreeWidgetItem* item) {
@ -155,4 +237,88 @@ public:
InstallDragDropPkg(path, 1, 1);
}
}
private:
bool convertPngToIco(const QString& pngFilePath, const QString& icoFilePath) {
// Load the PNG image
QImage image(pngFilePath);
if (image.isNull()) {
return false;
}
// Scale the image to the default icon size (256x256 pixels)
QImage scaledImage =
image.scaled(QSize(256, 256), Qt::KeepAspectRatio, Qt::SmoothTransformation);
// Convert the image to QPixmap
QPixmap pixmap = QPixmap::fromImage(scaledImage);
// Save the pixmap as an ICO file
if (pixmap.save(icoFilePath, "ICO")) {
return true;
} else {
return false;
}
}
#ifdef Q_OS_WIN
bool createShortcutWin(const QString& linkPath, const QString& targetPath,
const QString& iconPath, const QString& exePath) {
CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED);
// Create the ShellLink object
IShellLink* pShellLink = nullptr;
HRESULT hres = CoCreateInstance(CLSID_ShellLink, nullptr, CLSCTX_INPROC_SERVER,
IID_IShellLink, (LPVOID*)&pShellLink);
if (SUCCEEDED(hres)) {
// Defines the path to the program executable
pShellLink->SetPath((LPCWSTR)exePath.utf16());
// Sets the home directory ("Start in")
pShellLink->SetWorkingDirectory((LPCWSTR)QFileInfo(exePath).absolutePath().utf16());
// Set arguments, eboot.bin file location
QString arguments = QString("\"%1\"").arg(targetPath);
pShellLink->SetArguments((LPCWSTR)arguments.utf16());
// Set the icon for the shortcut
pShellLink->SetIconLocation((LPCWSTR)iconPath.utf16(), 0);
// Save the shortcut
IPersistFile* pPersistFile = nullptr;
hres = pShellLink->QueryInterface(IID_IPersistFile, (LPVOID*)&pPersistFile);
if (SUCCEEDED(hres)) {
hres = pPersistFile->Save((LPCWSTR)linkPath.utf16(), TRUE);
pPersistFile->Release();
}
pShellLink->Release();
}
CoUninitialize();
return SUCCEEDED(hres);
}
#else
bool createShortcutLinux(const QString& linkPath, const QString& targetPath,
const QString& iconPath) {
QFile shortcutFile(linkPath);
if (!shortcutFile.open(QIODevice::WriteOnly | QIODevice::Text)) {
QMessageBox::critical(nullptr, "Error",
QString("Error creating shortcut:\n %1").arg(linkPath));
return false;
}
QTextStream out(&shortcutFile);
out << "[Desktop Entry]\n";
out << "Version=1.0\n";
out << "Name=" << QFileInfo(linkPath).baseName() << "\n";
out << "Exec=" << QCoreApplication::applicationFilePath() << " \"" << targetPath << "\"\n";
out << "Icon=" << iconPath << "\n";
out << "Terminal=false\n";
out << "Type=Application\n";
shortcutFile.close();
return true;
}
#endif
};

View file

@ -8,10 +8,16 @@
#include "qt_gui/game_install_dialog.h"
#include "qt_gui/main_window.h"
#include <emulator.h>
#include <fmt/core.h>
// Custom message handler to ignore Qt logs
void customMessageHandler(QtMsgType, const QMessageLogContext&, const QString&) {}
int main(int argc, char* argv[]) {
QApplication a(argc, argv);
// Load configurations and initialize Qt application
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
Config::load(config_dir / "config.toml");
QString gameDataPath = qApp->applicationDirPath() + "/game_data/";
@ -23,14 +29,25 @@ int main(int argc, char* argv[]) {
#endif
std::filesystem::create_directory(path);
// Check if the game install directory is set
if (Config::getGameInstallDir() == "") {
GameInstallDialog dlg;
dlg.exec();
}
qInstallMessageHandler(customMessageHandler); // ignore qt logs.
// Ignore Qt logs
qInstallMessageHandler(customMessageHandler);
// Initialize the main window
MainWindow* m_main_window = new MainWindow(nullptr);
m_main_window->Init();
// Check for command line arguments
if (argc > 1) {
Core::Emulator emulator;
emulator.Run(argv[1]);
}
// Run the Qt application
return a.exec();
}
}

View file

@ -130,7 +130,7 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
case SDLK_RETURN:
button = OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_OPTIONS;
break;
case SDLK_a:
case SDLK_A:
axis = Input::Axis::LeftX;
if (event->type == SDL_EVENT_KEY_DOWN) {
axisvalue += -127;
@ -139,7 +139,7 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
}
ax = Input::GetAxis(-0x80, 0x80, axisvalue);
break;
case SDLK_d:
case SDLK_D:
axis = Input::Axis::LeftX;
if (event->type == SDL_EVENT_KEY_DOWN) {
axisvalue += 127;
@ -148,7 +148,7 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
}
ax = Input::GetAxis(-0x80, 0x80, axisvalue);
break;
case SDLK_w:
case SDLK_W:
axis = Input::Axis::LeftY;
if (event->type == SDL_EVENT_KEY_DOWN) {
axisvalue += -127;
@ -157,7 +157,7 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
}
ax = Input::GetAxis(-0x80, 0x80, axisvalue);
break;
case SDLK_s:
case SDLK_S:
axis = Input::Axis::LeftY;
if (event->type == SDL_EVENT_KEY_DOWN) {
axisvalue += 127;
@ -166,7 +166,7 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
}
ax = Input::GetAxis(-0x80, 0x80, axisvalue);
break;
case SDLK_j:
case SDLK_J:
axis = Input::Axis::RightX;
if (event->type == SDL_EVENT_KEY_DOWN) {
axisvalue += -127;
@ -175,7 +175,7 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
}
ax = Input::GetAxis(-0x80, 0x80, axisvalue);
break;
case SDLK_l:
case SDLK_L:
axis = Input::Axis::RightX;
if (event->type == SDL_EVENT_KEY_DOWN) {
axisvalue += 127;
@ -184,7 +184,7 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
}
ax = Input::GetAxis(-0x80, 0x80, axisvalue);
break;
case SDLK_i:
case SDLK_I:
axis = Input::Axis::RightY;
if (event->type == SDL_EVENT_KEY_DOWN) {
axisvalue += -127;
@ -193,7 +193,7 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
}
ax = Input::GetAxis(-0x80, 0x80, axisvalue);
break;
case SDLK_k:
case SDLK_K:
axis = Input::Axis::RightY;
if (event->type == SDL_EVENT_KEY_DOWN) {
axisvalue += 127;
@ -202,19 +202,19 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
}
ax = Input::GetAxis(-0x80, 0x80, axisvalue);
break;
case SDLK_x:
case SDLK_X:
button = OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L3;
break;
case SDLK_m:
case SDLK_M:
button = OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R3;
break;
case SDLK_q:
case SDLK_Q:
button = OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L1;
break;
case SDLK_u:
case SDLK_U:
button = OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R1;
break;
case SDLK_e:
case SDLK_E:
button = OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L2;
axis = Input::Axis::TriggerLeft;
if (event->type == SDL_EVENT_KEY_DOWN) {
@ -224,7 +224,7 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
}
ax = Input::GetAxis(0, 0x80, axisvalue);
break;
case SDLK_o:
case SDLK_O:
button = OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R2;
axis = Input::Axis::TriggerRight;
if (event->type == SDL_EVENT_KEY_DOWN) {

View file

@ -135,15 +135,33 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)};
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
if (param.num_components > 1) {
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
if (param.buffer_handle < 0) {
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
if (param.num_components > 1) {
const Id pointer{
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
} else {
return ctx.OpLoad(param.component_type, param.id);
}
} else {
return ctx.OpLoad(param.component_type, param.id);
const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value;
const auto step_rate = ctx.OpLoad(
ctx.U32[1],
ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.instance_step_rates, rate_idx));
const auto offset = ctx.OpIAdd(
ctx.U32[1],
ctx.OpIMul(
ctx.U32[1],
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
ctx.ConstU32(param.num_components)),
ctx.ConstU32(comp));
return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
}
}
switch (attr) {

View file

@ -171,17 +171,47 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
void EmitContext::DefineInputs(const Info& info) {
switch (stage) {
case Stage::Vertex:
case Stage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(instance_step_rates, "step_rates");
interfaces.push_back(instance_step_rates);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
const Id id{DefineInput(type, input.binding)};
Name(id, fmt::format("vs_in_attr{}", input.binding));
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
const u32 rate_idx =
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
: 1;
// Note that we pass index rather than Id
input_params[input.binding] = {
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
};
} else {
Id id{DefineInput(type, input.binding)};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
Name(id, fmt::format("vs_instance_attr{}", input.binding));
} else {
Name(id, fmt::format("vs_in_attr{}", input.binding));
}
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
}
}
break;
}
case Stage::Fragment:
if (info.uses_group_quad) {
subgroup_local_invocation_id = DefineVariable(
@ -276,7 +306,10 @@ void EmitContext::DefineBuffers(const Info& info) {
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
const auto name =
fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
buffer.is_instance_data
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
sizeof(float) * CHAR_BIT)
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
@ -317,6 +350,14 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rg32f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::Rg32ui;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::Rgba32ui;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::R16f;

View file

@ -165,6 +165,8 @@ public:
Id output_position{};
Id vertex_index{};
Id instance_id{};
Id instance_step_rates{};
Id base_vertex{};
Id frag_coord{};
Id front_facing{};
@ -214,6 +216,7 @@ public:
Id pointer_type;
Id component_type;
u32 num_components;
s32 buffer_handle{-1};
};
std::array<SpirvAttribute, 32> input_params{};
std::array<SpirvAttribute, 32> output_params{};

View file

@ -235,9 +235,22 @@ void Translator::EmitFetch(const GcnInst& inst) {
ir.SetVectorReg(dst_reg++, comp);
}
if (attrib.instance_data == 2 || attrib.instance_data == 3) {
LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}",
attrib.instance_data);
// In case of programmable step rates we need to fallback to instance data pulling in
// shader, so VBs should be bound as regular data buffers
s32 instance_buf_handle = -1;
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) {
info.buffers.push_back({
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.stride = buffer.GetStride(),
.num_records = buffer.num_records,
.used_types = IR::Type::F32,
.is_storage = true, // we may not fit into UBO with large meshes
.is_instance_data = true,
});
instance_buf_handle = s32(info.buffers.size() - 1);
}
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
@ -247,7 +260,8 @@ void Translator::EmitFetch(const GcnInst& inst) {
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.instance_step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data),
.instance_step_rate = step_rate,
.instance_data_buf = instance_buf_handle,
});
}
}

View file

@ -165,13 +165,14 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
if (!flags.test(MimgModifier::Pcf)) {
return ir.ImageGather(handle, body, offset, {}, info);
}
ASSERT(mimg.dmask & 1); // should be always 1st (R) component
return ir.ImageGatherDref(handle, body, offset, {}, dref, info);
}();
// For gather4 instructions dmask selects which component to read and must have
// only one bit set to 1
ASSERT_MSG(std::popcount(mimg.dmask) == 1, "Unexpected bits in gather dmask");
for (u32 i = 0; i < 4; i++) {
if (((mimg.dmask >> i) & 1) == 0) {
continue;
}
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
ir.SetVectorReg(dest_reg++, value);
}

View file

@ -77,7 +77,8 @@ struct BufferResource {
u32 num_records;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
bool is_storage;
bool is_storage{false};
bool is_instance_data{false};
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
};
@ -116,6 +117,7 @@ struct Info {
u8 sgpr_base;
u8 dword_offset;
InstanceIdType instance_step_rate;
s32 instance_data_buf;
};
boost::container::static_vector<VsInput, 32> vs_inputs{};

View file

@ -66,21 +66,10 @@ void Liverpool::Process(std::stop_token stoken) {
}
}
if (submit_done) {
std::scoped_lock lk{submit_mutex};
submit_cv.notify_all();
submit_done = false;
}
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle);
}
}
void Liverpool::WaitGpuIdle() {
RENDERER_TRACE;
std::unique_lock lk{submit_mutex};
submit_cv.wait(lk, [this] { return num_submits == 0; });
}
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
TracyFiberEnter(ccb_task_name);

View file

@ -887,7 +887,10 @@ struct Liverpool {
IndexBufferType index_buffer_type;
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
u32 enable_primitive_id;
INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1);
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1);
u32 vgt_instance_step_rate_0;
u32 vgt_instance_step_rate_1;
INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1);
PolygonOffset poly_offset;
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
AaConfig aa_config;
@ -937,18 +940,10 @@ public:
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
void SubmitAsc(u32 vqid, std::span<const u32> acb);
void WaitGpuIdle();
bool IsGpuIdle() const {
return num_submits == 0;
}
void NotifySubmitDone() {
std::scoped_lock lk{submit_mutex};
submit_done = true;
submit_cv.notify_all();
}
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
}
@ -1017,7 +1012,6 @@ private:
u32 num_submits{};
std::mutex submit_mutex;
std::condition_variable_any submit_cv;
std::atomic<bool> submit_done{};
};
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
@ -1055,6 +1049,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);

View file

@ -222,6 +222,8 @@ vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp) {
return vk::CompareOp::eGreaterOrEqual;
case AmdGpu::DepthCompare::Always:
return vk::CompareOp::eAlways;
default:
UNREACHABLE();
}
}
@ -321,6 +323,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc4UnormBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc5UnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16B16A16Sint;
@ -366,6 +371,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8Unorm;
}
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR8G8Snorm;
}
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc7UnormBlock;
}
@ -429,6 +437,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR16Unorm;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR16G16B16A16Unorm;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View file

@ -30,12 +30,19 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
stages[i] = *infos[i];
}
BuildDescSetLayout();
const vk::PushConstantRange push_constants = {
.stageFlags = vk::ShaderStageFlagBits::eVertex,
.offset = 0,
.size = 2 * sizeof(u32),
};
const vk::DescriptorSetLayout set_layout = *desc_layout;
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U,
.pSetLayouts = &set_layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constants,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
@ -43,6 +50,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[0];
for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
// Skip attribute binding as the data will be pulled by shader
continue;
}
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({
.location = input.binding,
@ -420,6 +433,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
// Calculate buffers memory overlaps
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
continue;
}
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (buffer.GetSize() == 0) {
continue;

View file

@ -67,20 +67,24 @@ public:
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
VideoCore::TextureCache& texture_cache) const;
[[nodiscard]] vk::Pipeline Handle() const noexcept {
vk::Pipeline Handle() const noexcept {
return *pipeline;
}
[[nodiscard]] bool IsEmbeddedVs() const noexcept {
vk::PipelineLayout GetLayout() const {
return *pipeline_layout;
}
bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[0] == EmbeddedVsHash;
}
[[nodiscard]] auto GetWriteMasks() const {
auto GetWriteMasks() const {
return key.write_masks;
}
[[nodiscard]] bool IsDepthEnabled() const {
bool IsDepthEnabled() const {
return key.depth.depth_enable.Value();
}

View file

@ -183,7 +183,7 @@ void PipelineCache::RefreshGraphicsKey() {
int remapped_cb{};
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb];
if (!col_buf || skip_cb_binding) {
if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) {
continue;
}
const auto base_format =

View file

@ -54,6 +54,13 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
UpdateDynamicState(*pipeline);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
const u32 step_rates[] = {
regs.vgt_instance_step_rate_0,
regs.vgt_instance_step_rate_1,
};
cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u,
sizeof(step_rates), &step_rates);
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
@ -99,6 +106,12 @@ void Rasterizer::BeginRendering() {
continue;
}
// If the color buffer is still bound but rendering to it is disabled by the target mask,
// we need to prevent the render area from being affected by unbound render target extents.
if (!regs.color_target_mask.GetMask(col_buf_id)) {
continue;
}
const auto& hint = liverpool->last_cb_extent[col_buf_id];
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
const auto& image = texture_cache.GetImage(image_view.image_id);

View file

@ -189,10 +189,14 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR32Uint:
return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc4UnormBlock:
case vk::Format::eR32G32Sfloat:
return vk::Format::eR32G32Uint;
case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc2UnormBlock:
case vk::Format::eBc3SrgbBlock:
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock:
return vk::Format::eR32G32B32A32Uint;