Merge branch 'main' into present2

This commit is contained in:
psucien 2024-07-28 15:28:24 +02:00 committed by GitHub
commit 831148965b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 770 additions and 183 deletions

View file

@ -309,6 +309,8 @@ set(CORE src/core/aerolib/stubs.cpp
src/core/file_format/pkg_type.h
src/core/file_format/psf.cpp
src/core/file_format/psf.h
src/core/file_format/playgo_chunk.cpp
src/core/file_format/playgo_chunk.h
src/core/file_format/trp.cpp
src/core/file_format/trp.h
src/core/file_format/splash.h

View file

@ -230,7 +230,7 @@ void load(const std::filesystem::path& path) {
auto general = generalResult.unwrap();
isNeo = toml::find_or<toml::boolean>(general, "isPS4Pro", false);
isFullscreen = toml::find_or<toml::boolean>(general, "Fullscreen", true);
isFullscreen = toml::find_or<toml::boolean>(general, "Fullscreen", false);
logFilter = toml::find_or<toml::string>(general, "logFilter", "");
logType = toml::find_or<toml::string>(general, "logType", "sync");
isShowSplash = toml::find_or<toml::boolean>(general, "showSplash", true);

View file

@ -72,8 +72,9 @@ static auto UserPaths = [] {
create_path(PathType::GameDataDir, user_dir / GAMEDATA_DIR);
create_path(PathType::TempDataDir, user_dir / TEMPDATA_DIR);
create_path(PathType::SysModuleDir, user_dir / SYSMODULES_DIR);
create_path(PathType::DownloadDir, user_dir / DOWNLOAD_DIR);
create_path(PathType::CapturesDir, user_dir / CAPTURES_DIR);
return paths;
}();

View file

@ -18,6 +18,7 @@ enum class PathType {
TempDataDir, // Where game temp data is stored.
GameDataDir, // Where game data is stored.
SysModuleDir, // Where system modules are stored.
DownloadDir, // Where downloads/temp files are stored.
CapturesDir, // Where rdoc captures are stored.
};
@ -32,6 +33,7 @@ constexpr auto SAVEDATA_DIR = "savedata";
constexpr auto GAMEDATA_DIR = "data";
constexpr auto TEMPDATA_DIR = "temp";
constexpr auto SYSMODULES_DIR = "sys_modules";
constexpr auto DOWNLOAD_DIR = "download";
constexpr auto CAPTURES_DIR = "captures";
// Filenames

View file

@ -285,20 +285,24 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe
const auto slot = GetTcbKey();
#if defined(_WIN32)
// The following logic is based on the wine implementation of TlsGetValue
// https://github.com/wine-mirror/wine/blob/a27b9551/dlls/kernelbase/thread.c#L719
// The following logic is based on the Kernel32.dll asm of TlsGetValue
static constexpr u32 TlsSlotsOffset = 0x1480;
static constexpr u32 TlsExpansionSlotsOffset = 0x1780;
static constexpr u32 TlsMinimumAvailable = 64;
const u32 teb_offset = slot < TlsMinimumAvailable ? TlsSlotsOffset : TlsExpansionSlotsOffset;
const u32 tls_index = slot < TlsMinimumAvailable ? slot : slot - TlsMinimumAvailable;
// Load the pointer to the table of TLS slots.
c.putSeg(gs);
c.mov(dst, ptr[reinterpret_cast<void*>(teb_offset)]);
// Load the pointer to our buffer.
c.mov(dst, qword[dst + tls_index * sizeof(LPVOID)]);
if (slot < TlsMinimumAvailable) {
// Load the pointer to TLS slots.
c.mov(dst, ptr[reinterpret_cast<void*>(TlsSlotsOffset + slot * sizeof(LPVOID))]);
} else {
const u32 tls_index = slot - TlsMinimumAvailable;
// Load the pointer to the table of TLS expansion slots.
c.mov(dst, ptr[reinterpret_cast<void*>(TlsExpansionSlotsOffset)]);
// Load the pointer to our buffer.
c.mov(dst, qword[dst + tls_index * sizeof(LPVOID)]);
}
#elif defined(__APPLE__)
// The following logic is based on the Darwin implementation of _os_tsd_get_direct, used by
// pthread_getspecific https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L89-L96

View file

@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/io_file.h"
#include "playgo_chunk.h"
bool PlaygoChunk::Open(const std::filesystem::path& filepath) {
Common::FS::IOFile file(filepath, Common::FS::FileAccessMode::Read);
if (!file.IsOpen()) {
return false;
}
file.Read(playgoHeader);
return true;
}

View file

@ -0,0 +1,31 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <filesystem>
#include "common/types.h"
struct PlaygoHeader {
u32 magic;
u16 version_major;
u16 version_minor;
u16 image_count;
u16 chunk_count;
u16 mchunk_count;
u16 scenario_count;
// TODO fill the rest
};
class PlaygoChunk {
public:
PlaygoChunk() = default;
~PlaygoChunk() = default;
bool Open(const std::filesystem::path& filepath);
PlaygoHeader GetPlaygoHeader() {
return playgoHeader;
}
private:
PlaygoHeader playgoHeader;
};

View file

@ -26,23 +26,27 @@ void MntPoints::UnmountAll() {
}
std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory) {
const MntPair* mount = GetMount(guest_directory);
// Evil games like Turok2 pass double slashes e.g /app0//game.kpf
auto corrected_path = guest_directory;
size_t pos = corrected_path.find("//");
while (pos != std::string::npos) {
corrected_path.replace(pos, 2, "/");
pos = corrected_path.find("//", pos + 1);
}
const MntPair* mount = GetMount(corrected_path);
if (!mount) {
return guest_directory;
return "";
}
// Nothing to do if getting the mount itself.
if (guest_directory == mount->mount) {
if (corrected_path == mount->mount) {
return mount->host_path;
}
// Remove device (e.g /app0) from path to retrieve relative path.
u32 pos = mount->mount.size() + 1;
// Evil games like Turok2 pass double slashes e.g /app0//game.kpf
if (guest_directory[pos] == '/') {
pos++;
}
const auto rel_path = std::string_view(guest_directory).substr(pos);
pos = mount->mount.size() + 1;
const auto rel_path = std::string_view(corrected_path).substr(pos);
const auto host_path = mount->host_path / rel_path;
if (!NeedsCaseInsensiveSearch) {
return host_path;

View file

@ -198,13 +198,9 @@ int PS4_SYSV_ABI sceAppContentTemporaryDataMount() {
int PS4_SYSV_ABI sceAppContentTemporaryDataMount2(OrbisAppContentTemporaryDataOption option,
OrbisAppContentMountPoint* mountPoint) {
if (std::string_view(mountPoint->data).empty()) // causing issues with save_data.
if (mountPoint == nullptr)
return ORBIS_APP_CONTENT_ERROR_PARAMETER;
auto* param_sfo = Common::Singleton<PSF>::Instance();
std::string id(param_sfo->GetString("CONTENT_ID"), 7, 9);
const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::TempDataDir) / id;
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
mnt->Mount(mount_dir, mountPoint->data);
strncpy(mountPoint->data, "/temp0", 16);
LOG_INFO(Lib_AppContent, "sceAppContentTemporaryDataMount2: option = {}, mountPoint = {}",
option, mountPoint->data);
return ORBIS_OK;

View file

@ -53,6 +53,9 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
if (std::string_view{path} == "/dev/stdout") {
return 2002;
}
if (std::string_view{path} == "/dev/urandom") {
return 2003;
}
u32 handle = h->CreateHandle();
auto* file = h->GetFile(handle);
if (directory) {
@ -113,6 +116,9 @@ int PS4_SYSV_ABI sceKernelClose(int d) {
if (d < 3) { // d probably hold an error code
return ORBIS_KERNEL_ERROR_EPERM;
}
if (d == 2003) { // dev/urandom case
return SCE_OK;
}
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* file = h->GetFile(d);
if (file == nullptr) {
@ -223,6 +229,13 @@ s64 PS4_SYSV_ABI posix_lseek(int d, s64 offset, int whence) {
}
s64 PS4_SYSV_ABI sceKernelRead(int d, void* buf, size_t nbytes) {
if (d == 2003) // dev urandom case
{
auto rbuf = static_cast<char*>(buf);
for (size_t i = 0; i < nbytes; i++)
rbuf[i] = std::rand() & 0xFF;
return nbytes;
}
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* file = h->GetFile(d);
if (file == nullptr) {
@ -460,6 +473,7 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {
}
void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
std::srand(std::time(nullptr));
LIB_FUNCTION("1G3lF1Gg1k8", "libkernel", 1, "libkernel", 1, 1, sceKernelOpen);
LIB_FUNCTION("wuCroIGjt2g", "libScePosix", 1, "libkernel", 1, 1, posix_open);
LIB_FUNCTION("UK2Tl2DWUns", "libkernel", 1, "libkernel", 1, 1, sceKernelClose);

View file

@ -405,6 +405,9 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("VOx8NGmHXTs", "libkernel", 1, "libkernel", 1, 1, sceKernelGetCpumode);
LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", 1, 1, sceKernelUuidCreate);
LIB_FUNCTION("2SKEx6bSq-4", "libkernel", 1, "libkernel", 1, 1, sceKernelBatchMap);
LIB_FUNCTION("kBJzF8x4SyE", "libkernel", 1, "libkernel", 1, 1, sceKernelBatchMap2);
// equeue
LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue);
LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue);

View file

@ -3,6 +3,7 @@
#include <bit>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/singleton.h"
#include "core/libraries/error_codes.h"
@ -225,4 +226,52 @@ int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut
directMemoryEndOut);
}
s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries,
int* numEntriesOut) {
return sceKernelBatchMap2(entries, numEntries, numEntriesOut,
MemoryFlags::SCE_KERNEL_MAP_FIXED); // 0x10, 0x410?
}
int PS4_SYSV_ABI sceKernelMunmap(void* addr, size_t len);
s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEntries,
int* numEntriesOut, int flags) {
int processed = 0;
int result = 0;
for (int i = 0; i < numEntries; i++) {
if (entries == nullptr || entries[i].length == 0 || entries[i].operation > 4) {
result = ORBIS_KERNEL_ERROR_EINVAL;
break; // break and assign a value to numEntriesOut.
}
if (entries[i].operation == MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_DIRECT) {
result = sceKernelMapNamedDirectMemory(&entries[i].start, entries[i].length,
entries[i].protection, flags,
static_cast<s64>(entries[i].offset), 0, "");
LOG_INFO(
Kernel_Vmm,
"BatchMap: entry = {}, operation = {}, len = {:#x}, offset = {:#x}, type = {}, "
"result = {}",
i, entries[i].operation, entries[i].length, entries[i].offset, (u8)entries[i].type,
result);
if (result == 0)
processed++;
} else if (entries[i].operation == MemoryOpTypes::ORBIS_KERNEL_MAP_OP_UNMAP) {
result = sceKernelMunmap(entries[i].start, entries[i].length);
LOG_INFO(Kernel_Vmm, "BatchMap: entry = {}, operation = {}, len = {:#x}, result = {}",
i, entries[i].operation, entries[i].length, result);
if (result == 0)
processed++;
} else {
UNREACHABLE_MSG("called: Unimplemented Operation = {}", entries[i].operation);
}
}
if (numEntriesOut != NULL) { // can be zero. do not return an error code.
*numEntriesOut = processed;
}
return result;
}
} // namespace Libraries::Kernel

View file

@ -6,7 +6,7 @@
#include "common/bit_field.h"
#include "common/types.h"
constexpr u64 SCE_KERNEL_MAIN_DMEM_SIZE = 5376_MB; // ~ 6GB
constexpr u64 SCE_KERNEL_MAIN_DMEM_SIZE = 6_GB; // ~ 6GB
namespace Libraries::Kernel {
@ -31,6 +31,14 @@ enum MemoryProtection : u32 {
SCE_KERNEL_PROT_GPU_RW = 0x30 // Permit reads/writes from the GPU
};
enum MemoryOpTypes : u32 {
ORBIS_KERNEL_MAP_OP_MAP_DIRECT = 0,
ORBIS_KERNEL_MAP_OP_UNMAP = 1,
ORBIS_KERNEL_MAP_OP_PROTECT = 2,
ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE = 3,
ORBIS_KERNEL_MAP_OP_TYPE_PROTECT = 4
};
struct OrbisQueryInfo {
uintptr_t start;
uintptr_t end;
@ -53,6 +61,16 @@ struct OrbisVirtualQueryInfo {
std::array<char, 32> name;
};
struct OrbisKernelBatchMapEntry {
void* start;
off_t offset;
size_t length;
char protection;
char type;
short reserved;
int operation;
};
u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize();
int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len,
u64 alignment, int memoryType, s64* physAddrOut);
@ -85,4 +103,9 @@ int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut
void** directMemoryStartOut,
void** directMemoryEndOut);
s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries,
int* numEntriesOut);
s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEntries,
int* numEntriesOut, int flags);
} // namespace Libraries::Kernel

View file

@ -785,7 +785,22 @@ int PS4_SYSV_ABI posix_pthread_mutex_destroy(ScePthreadMutex* mutex) {
int PS4_SYSV_ABI posix_pthread_cond_wait(ScePthreadCond* cond, ScePthreadMutex* mutex) {
int result = scePthreadCondWait(cond, mutex);
if (result < 0) {
UNREACHABLE();
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_cond_timedwait(ScePthreadCond* cond, ScePthreadMutex* mutex,
u64 usec) {
int result = scePthreadCondTimedwait(cond, mutex, usec);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
@ -1321,10 +1336,23 @@ int PS4_SYSV_ABI posix_sem_wait(sem_t* sem) {
return sem_wait(sem);
}
int PS4_SYSV_ABI posix_sem_timedwait(sem_t* sem, const timespec* t) {
#ifndef __APPLE__
return sem_timedwait(sem, t);
#else
LOG_ERROR(Kernel_Pthread, "Apple doesn't support sem_timedwait yet");
return 0; // unsupported for apple yet
#endif
}
int PS4_SYSV_ABI posix_sem_post(sem_t* sem) {
return sem_post(sem);
}
int PS4_SYSV_ABI posix_sem_destroy(sem_t* sem) {
return sem_destroy(sem);
}
int PS4_SYSV_ABI posix_sem_getvalue(sem_t* sem, int* sval) {
return sem_getvalue(sem, sval);
}
@ -1350,6 +1378,11 @@ int PS4_SYSV_ABI scePthreadOnce(int* once_control, void (*init_routine)(void)) {
UNREACHABLE();
}
[[noreturn]] void PS4_SYSV_ABI posix_pthread_exit(void* value_ptr) {
pthread_exit(value_ptr);
UNREACHABLE();
}
int PS4_SYSV_ABI scePthreadGetthreadid() {
return (int)(size_t)g_pthread_self;
}
@ -1383,6 +1416,26 @@ int PS4_SYSV_ABI posix_pthread_condattr_setclock(ScePthreadCondattr* attr, clock
return SCE_OK;
}
int PS4_SYSV_ABI posix_pthread_getschedparam(ScePthread thread, int* policy,
SceKernelSchedParam* param) {
return scePthreadGetschedparam(thread, policy, param);
}
int PS4_SYSV_ABI posix_pthread_setschedparam(ScePthread thread, int policy,
const SceKernelSchedParam* param) {
return scePthreadSetschedparam(thread, policy, param);
}
int PS4_SYSV_ABI posix_pthread_attr_getschedpolicy(const ScePthreadAttr* attr, int* policy) {
return scePthreadAttrGetschedpolicy(attr, policy);
}
int PS4_SYSV_ABI scePthreadRename(ScePthread thread, const char* name) {
thread->name = name;
LOG_INFO(Kernel_Pthread, "scePthreadRename: name = {}", thread->name);
return SCE_OK;
}
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate);
LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init);
@ -1401,11 +1454,13 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("4qGrR6eoP9Y", "libkernel", 1, "libkernel", 1, 1, scePthreadDetach);
LIB_FUNCTION("3PtV6p3QNX4", "libkernel", 1, "libkernel", 1, 1, scePthreadEqual);
LIB_FUNCTION("3kg7rT0NQIs", "libkernel", 1, "libkernel", 1, 1, scePthreadExit);
LIB_FUNCTION("FJrT5LuUBAU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_exit);
LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_equal);
LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join);
LIB_FUNCTION("EI-5-jlq2dE", "libkernel", 1, "libkernel", 1, 1, scePthreadGetthreadid);
LIB_FUNCTION("1tKyG7RlMJo", "libkernel", 1, "libkernel", 1, 1, scePthreadGetprio);
LIB_FUNCTION("W0Hpm2X0uPE", "libkernel", 1, "libkernel", 1, 1, scePthreadSetprio);
LIB_FUNCTION("GBUY7ywdULE", "libkernel", 1, "libkernel", 1, 1, scePthreadRename);
LIB_FUNCTION("aI+OeCz8xrQ", "libkernel", 1, "libkernel", 1, 1, scePthreadSelf);
LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self);
@ -1462,6 +1517,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
LIB_FUNCTION("Op8TBGY5KHg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_cond_wait);
LIB_FUNCTION("Op8TBGY5KHg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_wait);
LIB_FUNCTION("27bAgiJmOh0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_timedwait);
LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast);
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
LIB_FUNCTION("mDmgMOGVUqg", "libScePosix", 1, "libkernel", 1, 1,
@ -1476,6 +1532,8 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("EjllaAqAPZo", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_condattr_setclock);
LIB_FUNCTION("Z4QosVuAsA0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_once);
LIB_FUNCTION("RtLRV-pBTTY", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_attr_getschedpolicy);
// openorbis weird functions
LIB_FUNCTION("7H0iTOciTLo", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
@ -1490,9 +1548,13 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("+U1R4WtXvoc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_detach);
LIB_FUNCTION("CBNtXOoef-E", "libScePosix", 1, "libkernel", 1, 1, posix_sched_get_priority_max);
LIB_FUNCTION("m0iS6jNsXds", "libScePosix", 1, "libkernel", 1, 1, posix_sched_get_priority_min);
LIB_FUNCTION("FIs3-UQT9sg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_getschedparam);
LIB_FUNCTION("Xs9hdiD7sAA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setschedparam);
LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init);
LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait);
LIB_FUNCTION("w5IHyvahg-o", "libScePosix", 1, "libkernel", 1, 1, posix_sem_timedwait);
LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post);
LIB_FUNCTION("cDW233RAwWo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_destroy);
LIB_FUNCTION("Bq+LRV-N6Hk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_getvalue);
// libs
RwlockSymbolsRegister(sym);

View file

@ -41,7 +41,6 @@ public:
AddWaiter(waiter);
// Perform the wait.
std::exchange(lk, std::unique_lock{waiter.mutex});
return waiter.Wait(lk, timeout);
}
@ -59,10 +58,9 @@ public:
it++;
continue;
}
std::scoped_lock lk2{waiter.mutex};
it = wait_list.erase(it);
token_count -= waiter.need_count;
waiter.cv.notify_one();
it = wait_list.erase(it);
}
return true;
@ -84,7 +82,6 @@ public:
public:
struct WaitingThread : public ListBaseHook {
std::mutex mutex;
std::string name;
std::condition_variable cv;
u32 priority;

View file

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <core/file_format/playgo_chunk.h>
#include "common/logging/log.h"
#include "common/singleton.h"
#include "core/libraries/error_codes.h"
@ -8,8 +9,6 @@
#include "playgo.h"
namespace Libraries::PlayGo {
// this lib is used to play as the game is being installed.
// can be skipped by just returning and assigning the correct values.
s32 PS4_SYSV_ABI sceDbgPlayGoRequestNextChunk() {
LOG_ERROR(Lib_PlayGo, "(STUBBED)called");
@ -52,9 +51,16 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh
uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) {
LOG_ERROR(Lib_PlayGo, "(STUBBED)called handle = {}, chunkIds = {}, numberOfEntries = {}",
handle, *chunkIds, numberOfEntries);
// assign all now so that scePlayGoGetLocus is not called again for every single entry
std::fill(outLoci, outLoci + numberOfEntries,
OrbisPlayGoLocusValue::ORBIS_PLAYGO_LOCUS_LOCAL_FAST);
auto* playgo = Common::Singleton<PlaygoChunk>::Instance();
for (uint32_t i = 0; i < numberOfEntries; i++) {
if (chunkIds[i] <= playgo->GetPlaygoHeader().mchunk_count) {
outLoci[i] = OrbisPlayGoLocusValue::ORBIS_PLAYGO_LOCUS_LOCAL_FAST;
} else {
return ORBIS_PLAYGO_ERROR_BAD_CHUNK_ID;
}
}
return ORBIS_OK;
}
@ -70,7 +76,7 @@ s32 PS4_SYSV_ABI scePlayGoGetProgress(OrbisPlayGoHandle handle, const OrbisPlayG
s32 PS4_SYSV_ABI scePlayGoGetToDoList(OrbisPlayGoHandle handle, OrbisPlayGoToDo* outTodoList,
u32 numberOfEntries, u32* outEntries) {
LOG_ERROR(Lib_PlayGo, "(STUBBED)called");
if (handle != shadMagic)
if (handle != 1)
return ORBIS_PLAYGO_ERROR_BAD_HANDLE;
if (outTodoList == nullptr)
return ORBIS_PLAYGO_ERROR_BAD_POINTER;
@ -88,7 +94,7 @@ s32 PS4_SYSV_ABI scePlayGoInitialize(OrbisPlayGoInitParams* param) {
}
s32 PS4_SYSV_ABI scePlayGoOpen(OrbisPlayGoHandle* outHandle, const void* param) {
*outHandle = shadMagic;
*outHandle = 1;
LOG_INFO(Lib_PlayGo, "(STUBBED)called");
return ORBIS_OK;
}
@ -141,4 +147,4 @@ void RegisterlibScePlayGo(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("MPe0EeBGM-E", "libScePlayGo", 1, "libScePlayGo", 1, 0, scePlayGoTerminate);
};
} // namespace Libraries::PlayGo
} // namespace Libraries::PlayGo

View file

@ -186,21 +186,23 @@ int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond*
if (!mount_dir.empty() && std::filesystem::exists(mount_dir)) {
if (cond->dirName == nullptr) { // look for all dirs if no dir is provided.
for (int i = 0; const auto& entry : std::filesystem::directory_iterator(mount_dir)) {
if (std::filesystem::is_directory(entry.path())) {
if (std::filesystem::is_directory(entry.path()) &&
entry.path().filename().string() != "sdmemory") {
// sceSaveDataDirNameSearch does not search for dataMemory1/2 dirs.
i++;
result->dirNamesNum = 0; // why is it 1024? is it max?
// copy dir name to be used by sceSaveDataMount in read mode.
strncpy(result->dirNames[i].data, entry.path().filename().string().c_str(), 32);
result->hitNum = i + 1;
result->dirNamesNum = i + 1; // to confirm
result->setNum = i + 1; // to confirm
result->dirNamesNum = i + 1;
result->setNum = i + 1;
}
}
} else { // Need a game to test.
strncpy(result->dirNames[0].data, cond->dirName->data, 32);
result->hitNum = 1;
result->dirNamesNum = 1; // to confirm
result->setNum = 1; // to confirm
result->dirNamesNum = 1;
result->setNum = 1;
}
} else {
result->hitNum = 0;
@ -321,7 +323,7 @@ int PS4_SYSV_ABI sceSaveDataGetSaveDataCount() {
int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(const u32 userId, void* buf, const size_t bufSize,
const int64_t offset) {
const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) /
std::to_string(userId) / game_serial / "save_mem1.sav";
std::to_string(userId) / game_serial / "sdmemory/save_mem1.sav";
Common::FS::IOFile file(mount_dir, Common::FS::FileAccessMode::Read);
if (!file.IsOpen()) {
@ -336,7 +338,7 @@ int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(const u32 userId, void* buf, const
int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getParam) {
const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) /
std::to_string(getParam->userId) / game_serial;
std::to_string(getParam->userId) / game_serial / "sdmemory";
if (getParam == nullptr)
return ORBIS_SAVE_DATA_ERROR_PARAMETER;
if (getParam->data != nullptr) {
@ -604,7 +606,7 @@ int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(const u32 userId, const void* buf,
const size_t bufSize, const int64_t offset) {
LOG_INFO(Lib_SaveData, "called");
const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) /
std::to_string(userId) / game_serial / "save_mem1.sav";
std::to_string(userId) / game_serial / "sdmemory/save_mem1.sav";
Common::FS::IOFile file(mount_dir, Common::FS::FileAccessMode::Write);
file.Seek(offset);
@ -616,7 +618,7 @@ int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(const u32 userId, const void* buf,
int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* setParam) {
LOG_INFO(Lib_SaveData, "called: dataNum = {}, slotId= {}", setParam->dataNum, setParam->slotId);
const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) /
std::to_string(setParam->userId) / game_serial;
std::to_string(setParam->userId) / game_serial / "sdmemory";
if (setParam->data != nullptr) {
Common::FS::IOFile file(mount_dir / "save_mem2.sav", Common::FS::FileAccessMode::Write);
if (!file.IsOpen())
@ -644,7 +646,7 @@ int PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory(u32 userId, size_t memorySize,
LOG_INFO(Lib_SaveData, "called:userId = {}, memorySize = {}", userId, memorySize);
const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) /
std::to_string(userId) / game_serial;
std::to_string(userId) / game_serial / "sdmemory";
if (std::filesystem::exists(mount_dir)) {
return ORBIS_SAVE_DATA_ERROR_EXISTS;
@ -663,7 +665,7 @@ int PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetup2
LOG_INFO(Lib_SaveData, "called");
// if (setupParam->option == 1) { // check this later.
const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) /
std::to_string(setupParam->userId) / game_serial;
std::to_string(setupParam->userId) / game_serial / "sdmemory";
if (std::filesystem::exists(mount_dir) &&
std::filesystem::exists(mount_dir / "save_mem2.sav")) {
Common::FS::IOFile file(mount_dir / "save_mem2.sav", Common::FS::FileAccessMode::Read);

View file

@ -320,11 +320,15 @@ void Linker::InitTlsForThread(bool is_primary) {
static constexpr size_t TlsAllocAlign = 0x20;
const size_t total_tls_size = Common::AlignUp(static_tls_size, TlsAllocAlign) + TcbSize;
// If sceKernelMapNamedFlexibleMemory is being called from libkernel and addr = 0
// it automatically places mappings in system reserved area instead of managed.
static constexpr VAddr KernelAllocBase = 0x880000000ULL;
// The kernel module has a few different paths for TLS allocation.
// For SDK < 1.7 it allocates both main and secondary thread blocks using libc mspace/malloc.
// In games compiled with newer SDK, the main thread gets mapped from flexible memory,
// with addr = 0, so system managed area. Here we will only implement the latter.
void* addr_out{};
void* addr_out{reinterpret_cast<void*>(KernelAllocBase)};
if (is_primary) {
const size_t tls_aligned = Common::AlignUp(total_tls_size, 16_KB);
const int ret = Libraries::Kernel::sceKernelMapNamedFlexibleMemory(

View file

@ -4,7 +4,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/debug.h"
#include "common/scope_exit.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/kernel/memory_management.h"
#include "core/memory.h"
@ -55,7 +54,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr;
// Add the allocated region to the list and commit its pages.
auto& area = AddDmemAllocation(free_addr, size);
auto& area = CarveDmemArea(free_addr, size);
area.memory_type = memory_type;
area.is_free = false;
return free_addr;
@ -100,29 +99,32 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem
alignment = alignment > 0 ? alignment : 16_KB;
VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr;
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) {
const auto& vma = FindVMA(mapped_addr)->second;
// If the VMA is mapped, unmap the region first.
if (vma.IsMapped()) {
ASSERT_MSG(vma.base == mapped_addr && vma.size == size,
"Region must match when reserving a mapped region");
UnmapMemory(mapped_addr, size);
}
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
}
// Find the first free area starting with provided virtual address.
if (False(flags & MemoryMapFlags::Fixed)) {
auto it = FindVMA(mapped_addr);
// If the VMA is free and contains the requested mapping we are done.
if (it->second.type == VMAType::Free && it->second.Contains(virtual_addr, size)) {
mapped_addr = virtual_addr;
} else {
// Search for the first free VMA that fits our mapping.
while (it->second.type != VMAType::Free || it->second.size < size) {
it++;
}
ASSERT(it != vma_map.end());
const auto& vma = it->second;
mapped_addr = alignment > 0 ? Common::AlignUp(vma.base, alignment) : vma.base;
}
mapped_addr = SearchFree(mapped_addr, size, alignment);
}
// Add virtual memory area
auto& new_vma = AddMapping(mapped_addr, size);
const auto new_vma_handle = CarveVMA(mapped_addr, size);
auto& new_vma = new_vma_handle->second;
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = MemoryProt::NoAccess;
new_vma.name = "";
new_vma.type = VMAType::Reserved;
MergeAdjacent(vma_map, new_vma_handle);
*out_addr = std::bit_cast<void*>(mapped_addr);
return ORBIS_OK;
@ -132,6 +134,9 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
MemoryMapFlags flags, VMAType type, std::string_view name,
bool is_exec, PAddr phys_addr, u64 alignment) {
std::scoped_lock lk{mutex};
// Certain games perform flexible mappings on loop to determine
// the available flexible memory size. Questionable but we need to handle this.
if (type == VMAType::Flexible && flexible_usage + size > total_flexible_size) {
return SCE_KERNEL_ERROR_ENOMEM;
}
@ -140,91 +145,63 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
// flag so we will take the branch that searches for free (or reserved) mappings.
virtual_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
alignment = alignment > 0 ? alignment : 16_KB;
VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr;
SCOPE_EXIT {
auto& new_vma = AddMapping(mapped_addr, size);
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = prot;
new_vma.name = name;
new_vma.type = type;
if (type == VMAType::Direct) {
new_vma.phys_base = phys_addr;
MapVulkanMemory(mapped_addr, size);
}
if (type == VMAType::Flexible) {
flexible_usage += size;
}
};
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed) && True(flags & MemoryMapFlags::NoOverwrite)) {
if (True(flags & MemoryMapFlags::Fixed)) {
// This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen.
const auto& vma = FindVMA(mapped_addr)->second;
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size);
}
// Find the first free area starting with provided virtual address.
if (False(flags & MemoryMapFlags::Fixed)) {
auto it = FindVMA(mapped_addr);
// If the VMA is free and contains the requested mapping we are done.
if (it->second.type == VMAType::Free && it->second.Contains(virtual_addr, size)) {
mapped_addr = virtual_addr;
} else {
// Search for the first free VMA that fits our mapping.
while (it->second.type != VMAType::Free || it->second.size < size) {
it++;
}
ASSERT(it != vma_map.end());
const auto& vma = it->second;
mapped_addr = alignment > 0 ? Common::AlignUp(vma.base, alignment) : vma.base;
}
mapped_addr = SearchFree(mapped_addr, size, alignment);
}
// Perform the mapping.
*out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec);
TRACK_ALLOC(*out_addr, size, "VMEM");
auto& new_vma = CarveVMA(mapped_addr, size)->second;
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = prot;
new_vma.name = name;
new_vma.type = type;
if (type == VMAType::Direct) {
new_vma.phys_base = phys_addr;
MapVulkanMemory(mapped_addr, size);
}
if (type == VMAType::Flexible) {
flexible_usage += size;
}
return ORBIS_OK;
}
int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
MemoryMapFlags flags, uintptr_t fd, size_t offset) {
if (virtual_addr == 0) {
virtual_addr = impl.SystemManagedVirtualBase();
} else {
LOG_INFO(Kernel_Vmm, "Virtual addr {:#x} with size {:#x}", virtual_addr, size);
}
VAddr mapped_addr = 0;
VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
const size_t size_aligned = Common::AlignUp(size, 16_KB);
// Find first free area to map the file.
if (False(flags & MemoryMapFlags::Fixed)) {
auto it = FindVMA(virtual_addr);
while (it->second.type != VMAType::Free || it->second.size < size_aligned) {
it++;
}
ASSERT(it != vma_map.end());
mapped_addr = it->second.base;
mapped_addr = SearchFree(mapped_addr, size_aligned);
}
if (True(flags & MemoryMapFlags::Fixed)) {
const auto& vma = FindVMA(virtual_addr)->second;
const size_t remaining_size = vma.base + vma.size - virtual_addr;
ASSERT_MSG((vma.type == VMAType::Free || vma.type == VMAType::Reserved) &&
remaining_size >= size);
mapped_addr = virtual_addr;
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size);
}
// Map the file.
impl.MapFile(mapped_addr, size, offset, std::bit_cast<u32>(prot), fd);
// Add virtual memory area
auto& new_vma = AddMapping(mapped_addr, size_aligned);
auto& new_vma = CarveVMA(mapped_addr, size_aligned)->second;
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = prot;
new_vma.name = "File";
@ -238,10 +215,9 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem
void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
std::scoped_lock lk{mutex};
// TODO: Partial unmaps are technically supported by the guest.
const auto it = vma_map.find(virtual_addr);
ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr,
"Attempting to unmap partially mapped range");
const auto it = FindVMA(virtual_addr);
ASSERT_MSG(it->second.Contains(virtual_addr, size),
"Existing mapping does not contain requested unmap range");
const auto type = it->second.type;
const bool has_backing = type == VMAType::Direct || type == VMAType::File;
@ -253,11 +229,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
}
// Mark region as free and attempt to coalesce it with neighbours.
auto& vma = it->second;
const auto new_it = CarveVMA(virtual_addr, size);
auto& vma = new_it->second;
vma.type = VMAType::Free;
vma.prot = MemoryProt::NoAccess;
vma.phys_base = 0;
MergeAdjacent(vma_map, it);
vma.disallow_merge = false;
MergeAdjacent(vma_map, new_it);
// Unmap the memory region.
impl.Unmap(virtual_addr, size, has_backing);
@ -288,10 +266,10 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags,
std::scoped_lock lk{mutex};
auto it = FindVMA(addr);
if (it->second.type == VMAType::Free && flags == 1) {
if (!it->second.IsMapped() && flags == 1) {
it++;
}
if (it->second.type == VMAType::Free) {
if (!it->second.IsMapped()) {
LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region");
return ORBIS_KERNEL_ERROR_EACCES;
}
@ -360,14 +338,38 @@ std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
return std::make_pair(*it->second.buffer, addr - it->first);
}
VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) {
auto it = FindVMA(virtual_addr);
// If the VMA is free and contains the requested mapping we are done.
if (it->second.IsFree() && it->second.Contains(virtual_addr, size)) {
return virtual_addr;
}
// Search for the first free VMA that fits our mapping.
const auto is_suitable = [&] {
if (!it->second.IsFree()) {
return false;
}
const auto& vma = it->second;
virtual_addr = Common::AlignUp(vma.base, alignment);
// Sometimes the alignment itself might be larger than the VMA.
if (virtual_addr > vma.base + vma.size) {
return false;
}
const size_t remaining_size = vma.base + vma.size - virtual_addr;
return remaining_size >= size;
};
while (!is_suitable()) {
it++;
}
return virtual_addr;
}
MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size) {
auto vma_handle = FindVMA(virtual_addr);
ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
const VirtualMemoryArea& vma = vma_handle->second;
ASSERT_MSG((vma.type == VMAType::Free || vma.type == VMAType::Reserved) &&
vma.base <= virtual_addr,
"Adding a mapping to already mapped region");
ASSERT_MSG(vma.base <= virtual_addr, "Adding a mapping to already mapped region");
const VAddr start_in_vma = virtual_addr - vma.base;
const VAddr end_in_vma = start_in_vma + size;
@ -382,10 +384,10 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
vma_handle = Split(vma_handle, start_in_vma);
}
return vma_handle->second;
return vma_handle;
}
DirectMemoryArea& MemoryManager::AddDmemAllocation(PAddr addr, size_t size) {
DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) {
auto dmem_handle = FindDmemArea(addr);
ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map");

View file

@ -89,7 +89,15 @@ struct VirtualMemoryArea {
uintptr_t fd = 0;
bool Contains(VAddr addr, size_t size) const {
return addr >= base && (addr + size) < (base + this->size);
return addr >= base && (addr + size) <= (base + this->size);
}
bool IsFree() const noexcept {
return type == VMAType::Free;
}
bool IsMapped() const noexcept {
return type != VMAType::Free && type != VMAType::Reserved;
}
bool CanMergeWith(const VirtualMemoryArea& next) const {
@ -198,9 +206,11 @@ private:
return iter;
}
VirtualMemoryArea& AddMapping(VAddr virtual_addr, size_t size);
VAddr SearchFree(VAddr virtual_addr, size_t size, u32 alignment = 0);
DirectMemoryArea& AddDmemAllocation(PAddr addr, size_t size);
VMAHandle CarveVMA(VAddr virtual_addr, size_t size);
DirectMemoryArea& CarveDmemArea(PAddr addr, size_t size);
VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma);

View file

@ -1,10 +1,18 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <fmt/core.h>
#include <core/file_format/playgo_chunk.h>
#include <core/file_format/psf.h>
#include <core/file_format/splash.h>
#include <core/libraries/disc_map/disc_map.h>
#include <core/libraries/libc/libc.h>
#include <core/libraries/libc_internal/libc_internal.h>
#include <core/libraries/rtc/rtc.h>
#include <core/libraries/videoout/video_out.h>
#include "common/config.h"
#include "common/debug.h"
#include "common/logging/backend.h"
#include "common/logging/log.h"
#include "common/ntapi.h"
#include "common/path_util.h"
#include "common/polyfill_thread.h"
@ -24,6 +32,8 @@
#include "emulator.h"
#include "video_core/renderdoc.h"
#include <fmt/core.h>
Frontend::WindowSDL* g_window = nullptr;
namespace Core {
@ -50,8 +60,6 @@ Emulator::Emulator() {
memory = Core::Memory::Instance();
controller = Common::Singleton<Input::GameController>::Instance();
linker = Common::Singleton<Core::Linker>::Instance();
window = std::make_unique<Frontend::WindowSDL>(WindowWidth, WindowHeight, controller);
g_window = window.get();
// Load renderdoc module.
VideoCore::LoadRenderDoc();
@ -69,6 +77,8 @@ void Emulator::Run(const std::filesystem::path& file) {
// Loading param.sfo file if exists
std::string id;
std::string title;
std::string app_version;
std::filesystem::path sce_sys_folder = file.parent_path() / "sce_sys";
if (std::filesystem::is_directory(sce_sys_folder)) {
for (const auto& entry : std::filesystem::directory_iterator(sce_sys_folder)) {
@ -76,11 +86,14 @@ void Emulator::Run(const std::filesystem::path& file) {
auto* param_sfo = Common::Singleton<PSF>::Instance();
param_sfo->open(sce_sys_folder.string() + "/param.sfo", {});
id = std::string(param_sfo->GetString("CONTENT_ID"), 7, 9);
std::string title(param_sfo->GetString("TITLE"));
title = param_sfo->GetString("TITLE");
LOG_INFO(Loader, "Game id: {} Title: {}", id, title);
u32 fw_version = param_sfo->GetInteger("SYSTEM_VER");
std::string app_version = param_sfo->GetString("APP_VER");
app_version = param_sfo->GetString("APP_VER");
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
} else if (entry.path().filename() == "playgo-chunk.dat") {
auto* playgo = Common::Singleton<PlaygoChunk>::Instance();
playgo->Open(sce_sys_folder.string() + "/playgo-chunk.dat");
} else if (entry.path().filename() == "pic0.png" ||
entry.path().filename() == "pic1.png") {
auto* splash = Common::Singleton<Splash>::Instance();
@ -93,6 +106,12 @@ void Emulator::Run(const std::filesystem::path& file) {
}
}
}
std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version);
window =
std::make_unique<Frontend::WindowSDL>(WindowWidth, WindowHeight, controller, game_title);
g_window = window.get();
const auto& mount_data_dir = Common::FS::GetUserPath(Common::FS::PathType::GameDataDir) / id;
if (!std::filesystem::exists(mount_data_dir)) {
@ -105,6 +124,13 @@ void Emulator::Run(const std::filesystem::path& file) {
}
mnt->Mount(mount_temp_dir, "/temp0"); // called in app_content ==> stat/mkdir
const auto& mount_download_dir =
Common::FS::GetUserPath(Common::FS::PathType::DownloadDir) / id;
if (!std::filesystem::exists(mount_download_dir)) {
std::filesystem::create_directory(mount_download_dir);
}
mnt->Mount(mount_download_dir, "/download0");
const auto& mount_captures_dir = Common::FS::GetUserPath(Common::FS::PathType::CapturesDir);
if (!std::filesystem::exists(mount_captures_dir)) {
std::filesystem::create_directory(mount_captures_dir);
@ -151,10 +177,12 @@ void Emulator::Run(const std::filesystem::path& file) {
}
void Emulator::LoadSystemModules(const std::filesystem::path& file) {
constexpr std::array<SysModules, 8> ModulesToLoad{
constexpr std::array<SysModules, 10> ModulesToLoad{
{{"libSceNgs2.sprx", nullptr},
{"libSceFiber.sprx", nullptr},
{"libSceUlt.sprx", nullptr},
{"libSceJson.sprx", nullptr},
{"libSceJson2.sprx", nullptr},
{"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal},
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},

View file

@ -19,14 +19,15 @@
namespace Frontend {
WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_)
WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_,
std::string_view game_title)
: width{width_}, height{height_}, controller{controller_} {
if (SDL_Init(SDL_INIT_VIDEO) < 0) {
UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError());
}
SDL_InitSubSystem(SDL_INIT_AUDIO);
const std::string title = "shadPS4 v" + std::string(Common::VERSION);
const std::string title = fmt::format("shadPS4 v{} | {}", Common::VERSION, game_title);
SDL_PropertiesID props = SDL_CreateProperties();
SDL_SetStringProperty(props, SDL_PROP_WINDOW_CREATE_TITLE_STRING, title.c_str());
SDL_SetNumberProperty(props, SDL_PROP_WINDOW_CREATE_X_NUMBER, SDL_WINDOWPOS_CENTERED);

View file

@ -3,6 +3,7 @@
#pragma once
#include <string>
#include "common/types.h"
struct SDL_Window;
@ -40,7 +41,8 @@ struct WindowSystemInfo {
class WindowSDL {
public:
explicit WindowSDL(s32 width, s32 height, Input::GameController* controller);
explicit WindowSDL(s32 width, s32 height, Input::GameController* controller,
std::string_view game_title);
~WindowSDL();
s32 getWidth() const {

View file

@ -258,6 +258,7 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b);
Id EmitSMulExt(EmitContext& ctx, Id a, Id b);
Id EmitUMulExt(EmitContext& ctx, Id a, Id b);
Id EmitIMul32(EmitContext& ctx, Id a, Id b);
Id EmitIMul64(EmitContext& ctx, Id a, Id b);
Id EmitSDiv32(EmitContext& ctx, Id a, Id b);
Id EmitUDiv32(EmitContext& ctx, Id a, Id b);
Id EmitINeg32(EmitContext& ctx, Id value);
@ -271,6 +272,7 @@ Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count);
Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
@ -286,8 +288,10 @@ Id EmitSMax32(EmitContext& ctx, Id a, Id b);
Id EmitUMax32(EmitContext& ctx, Id a, Id b);
Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);

View file

@ -84,6 +84,10 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
return ctx.OpIMul(ctx.U32[1], a, b);
}
Id EmitIMul64(EmitContext& ctx, Id a, Id b) {
return ctx.OpIMul(ctx.U64, a, b);
}
Id EmitSDiv32(EmitContext& ctx, Id a, Id b) {
return ctx.OpSDiv(ctx.U32[1], a, b);
}
@ -142,6 +146,13 @@ Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return result;
}
Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
const Id result{ctx.OpBitwiseOr(ctx.U64, a, b)};
SetZeroFlag(ctx, inst, result);
SetSignFlag(ctx, inst, result);
return result;
}
Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)};
SetZeroFlag(ctx, inst, result);
@ -231,11 +242,19 @@ Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
return result;
}
Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSLessThan(ctx.U1[1], lhs, rhs);
}
Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) {
Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpSLessThan(ctx.U1[1], lhs, rhs);
}
Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpULessThan(ctx.U1[1], lhs, rhs);
}
Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpULessThan(ctx.U1[1], lhs, rhs);
}

View file

@ -388,6 +388,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
return spv::ImageFormat::Rgba8;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::Rgba8ui;
}
UNREACHABLE();
}

View file

@ -2392,10 +2392,10 @@ enum class OperandField : u32 {
ConstFloatPos_4_0,
ConstFloatNeg_4_0,
VccZ = 251,
ExecZ,
Scc,
LdsDirect,
LiteralConst,
ExecZ = 252,
Scc = 253,
LdsDirect = 254,
LiteralConst = 255,
VectorGPR,
Undefined = 0xFFFFFFFF,

View file

@ -76,21 +76,21 @@ void Translator::EmitPrologue() {
}
}
template <>
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
// Input modifiers work on float values.
force_flt |= operand.input_modifier.abs | operand.input_modifier.neg;
IR::U32F32 value{};
const bool is_float = operand.type == ScalarType::Float32 || force_flt;
switch (operand.field) {
case OperandField::ScalarGPR:
if (operand.type == ScalarType::Float32 || force_flt) {
if (is_float) {
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
} else {
value = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
}
break;
case OperandField::VectorGPR:
if (operand.type == ScalarType::Float32 || force_flt) {
if (is_float) {
value = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
} else {
value = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
@ -164,15 +164,160 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
UNREACHABLE();
}
if (operand.input_modifier.abs) {
value = ir.FPAbs(value);
}
if (operand.input_modifier.neg) {
value = ir.FPNeg(value);
if (is_float) {
if (operand.input_modifier.abs) {
value = ir.FPAbs(value);
}
if (operand.input_modifier.neg) {
value = ir.FPNeg(value);
}
}
return value;
}
template <>
IR::U32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
return GetSrc<IR::U32F32>(operand, force_flt);
}
template <>
IR::F32 Translator::GetSrc(const InstOperand& operand, bool) {
return GetSrc<IR::U32F32>(operand, true);
}
template <>
IR::U64F64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
IR::Value value_hi{};
IR::Value value_lo{};
bool immediate = false;
const bool is_float = operand.type == ScalarType::Float64 || force_flt;
switch (operand.field) {
case OperandField::ScalarGPR:
if (is_float) {
value_lo = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
value_hi = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code + 1));
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
value_lo = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
value_hi = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code + 1));
} else {
UNREACHABLE();
}
break;
case OperandField::VectorGPR:
if (is_float) {
value_lo = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
value_hi = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code + 1));
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
value_lo = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
value_hi = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code + 1));
} else {
UNREACHABLE();
}
break;
case OperandField::ConstZero:
immediate = true;
if (force_flt) {
value_lo = ir.Imm64(0.0);
} else {
value_lo = ir.Imm64(u64(0U));
}
break;
case OperandField::SignedConstIntPos:
ASSERT(!force_flt);
immediate = true;
value_lo = ir.Imm64(s64(operand.code) - SignedConstIntPosMin + 1);
break;
case OperandField::SignedConstIntNeg:
ASSERT(!force_flt);
immediate = true;
value_lo = ir.Imm64(-s64(operand.code) + SignedConstIntNegMin - 1);
break;
case OperandField::LiteralConst:
immediate = true;
if (force_flt) {
UNREACHABLE(); // There is a literal double?
} else {
value_lo = ir.Imm64(u64(operand.code));
}
break;
case OperandField::ConstFloatPos_1_0:
immediate = true;
if (force_flt) {
value_lo = ir.Imm64(1.0);
} else {
value_lo = ir.Imm64(std::bit_cast<u64>(f64(1.0)));
}
break;
case OperandField::ConstFloatPos_0_5:
immediate = true;
value_lo = ir.Imm64(0.5);
break;
case OperandField::ConstFloatPos_2_0:
immediate = true;
value_lo = ir.Imm64(2.0);
break;
case OperandField::ConstFloatPos_4_0:
immediate = true;
value_lo = ir.Imm64(4.0);
break;
case OperandField::ConstFloatNeg_0_5:
immediate = true;
value_lo = ir.Imm64(-0.5);
break;
case OperandField::ConstFloatNeg_1_0:
immediate = true;
value_lo = ir.Imm64(-1.0);
break;
case OperandField::ConstFloatNeg_2_0:
immediate = true;
value_lo = ir.Imm64(-2.0);
break;
case OperandField::ConstFloatNeg_4_0:
immediate = true;
value_lo = ir.Imm64(-4.0);
break;
case OperandField::VccLo: {
value_lo = ir.GetVccLo();
value_hi = ir.GetVccHi();
} break;
case OperandField::VccHi:
UNREACHABLE();
default:
UNREACHABLE();
}
IR::Value value;
if (immediate) {
value = value_lo;
} else if (is_float) {
throw NotImplementedException("required OpPackDouble2x32 implementation");
} else {
IR::Value packed = ir.CompositeConstruct(value_lo, value_hi);
value = ir.PackUint2x32(packed);
}
if (is_float) {
if (operand.input_modifier.abs) {
value = ir.FPAbs(IR::F32F64(value));
}
if (operand.input_modifier.neg) {
value = ir.FPNeg(IR::F32F64(value));
}
}
return IR::U64F64(value);
}
template <>
IR::U64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
return GetSrc64<IR::U64F64>(operand, force_flt);
}
template <>
IR::F64 Translator::GetSrc64(const InstOperand& operand, bool) {
return GetSrc64<IR::U64F64>(operand, true);
}
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
IR::U32F32 result = value;
if (operand.output_modifier.multiplier != 0.f) {
@ -197,6 +342,43 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
}
}
void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_raw) {
IR::U64F64 value_untyped = value_raw;
const bool is_float = value_raw.Type() == IR::Type::F64 || value_raw.Type() == IR::Type::F32;
if (is_float) {
if (operand.output_modifier.multiplier != 0.f) {
value_untyped =
ir.FPMul(value_untyped, ir.Imm64(f64(operand.output_modifier.multiplier)));
}
if (operand.output_modifier.clamp) {
value_untyped = ir.FPSaturate(value_raw);
}
}
const IR::U64 value =
is_float ? ir.BitCast<IR::U64>(IR::F64{value_untyped}) : IR::U64{value_untyped};
const IR::Value unpacked{ir.UnpackUint2x32(value)};
const IR::U32 lo{ir.CompositeExtract(unpacked, 0U)};
const IR::U32 hi{ir.CompositeExtract(unpacked, 1U)};
switch (operand.field) {
case OperandField::ScalarGPR:
ir.SetScalarReg(IR::ScalarReg(operand.code + 1), hi);
return ir.SetScalarReg(IR::ScalarReg(operand.code), lo);
case OperandField::VectorGPR:
ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi);
return ir.SetVectorReg(IR::VectorReg(operand.code), lo);
case OperandField::VccLo:
UNREACHABLE();
case OperandField::VccHi:
UNREACHABLE();
case OperandField::M0:
break;
default:
UNREACHABLE();
}
}
void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly.
const u32 sgpr_base = inst.src[0].code;
@ -320,6 +502,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::V_ADD_I32:
translator.V_ADD_I32(inst);
break;
case Opcode::V_ADDC_U32:
translator.V_ADDC_U32(inst);
break;
case Opcode::V_CVT_F32_I32:
translator.V_CVT_F32_I32(inst);
break;
@ -470,6 +655,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::IMAGE_LOAD:
translator.IMAGE_LOAD(false, inst);
break;
case Opcode::V_MAD_U64_U32:
translator.V_MAD_U64_U32(inst);
break;
case Opcode::V_CMP_GE_I32:
translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
break;
@ -612,6 +800,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::BUFFER_STORE_DWORD:
translator.BUFFER_STORE_FORMAT(1, false, inst);
break;
case Opcode::BUFFER_STORE_DWORDX2:
translator.BUFFER_STORE_FORMAT(2, false, inst);
break;
case Opcode::BUFFER_STORE_DWORDX3:
translator.BUFFER_STORE_FORMAT(3, false, inst);
break;

View file

@ -100,6 +100,7 @@ public:
void V_AND_B32(const GcnInst& inst);
void V_LSHLREV_B32(const GcnInst& inst);
void V_ADD_I32(const GcnInst& inst);
void V_ADDC_U32(const GcnInst& inst);
void V_CVT_F32_I32(const GcnInst& inst);
void V_CVT_F32_U32(const GcnInst& inst);
void V_MAD_F32(const GcnInst& inst);
@ -129,6 +130,7 @@ public:
void V_CVT_U32_F32(const GcnInst& inst);
void V_SUBREV_F32(const GcnInst& inst);
void V_SUBREV_I32(const GcnInst& inst);
void V_MAD_U64_U32(const GcnInst& inst);
void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
void V_LSHRREV_B32(const GcnInst& inst);
void V_MUL_HI_U32(bool is_signed, const GcnInst& inst);
@ -186,8 +188,12 @@ public:
void EXP(const GcnInst& inst);
private:
IR::U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false);
template <typename T = IR::U32F32>
[[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false);
template <typename T = IR::U64F64>
[[nodiscard]] T GetSrc64(const InstOperand& operand, bool flt_zero = false);
void SetDst(const InstOperand& operand, const IR::U32F32& value);
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
private:
IR::IREmitter ir;

View file

@ -67,7 +67,8 @@ void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, is_xor ? ir.BitwiseXor(src0, src1) : ir.BitwiseOr(src0, src1));
ir.SetVectorReg(dst_reg,
is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1)));
}
void Translator::V_AND_B32(const GcnInst& inst) {
@ -92,6 +93,30 @@ void Translator::V_ADD_I32(const GcnInst& inst) {
// TODO: Carry
}
void Translator::V_ADDC_U32(const GcnInst& inst) {
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
IR::U32 scarry;
if (inst.src_count == 3) { // VOP3
IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))};
scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))};
} else { // VOP2
scarry = ir.GetVccLo();
}
const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry);
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, result);
const IR::U1 less_src0 = ir.ILessThan(result, src0, false);
const IR::U1 less_src1 = ir.ILessThan(result, src1, false);
const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1);
ir.SetVcc(did_overflow);
}
void Translator::V_CVT_F32_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::VectorReg dst_reg{inst.dst[0].code};
@ -294,6 +319,23 @@ void Translator::V_SUBREV_I32(const GcnInst& inst) {
// TODO: Carry-out
}
void Translator::V_MAD_U64_U32(const GcnInst& inst) {
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
const auto src2 = GetSrc64<IR::U64>(inst.src[2]);
const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
const IR::U64 sum_result = ir.IAdd(mul_result, src2);
SetDst64(inst.dst[0], sum_result);
const IR::U1 less_src0 = ir.ILessThan(sum_result, mul_result, false);
const IR::U1 less_src1 = ir.ILessThan(sum_result, src2, false);
const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1);
ir.SetVcc(did_overflow);
}
void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};

View file

@ -964,8 +964,18 @@ IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) {
return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b);
}
U32 IREmitter::IMul(const U32& a, const U32& b) {
return Inst<U32>(Opcode::IMul32, a, b);
U32U64 IREmitter::IMul(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
case Type::U32:
return Inst<U32>(Opcode::IMul32, a, b);
case Type::U64:
return Inst<U64>(Opcode::IMul64, a, b);
default:
ThrowInvalidType(a.Type());
}
}
U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) {
@ -1024,8 +1034,18 @@ U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
return Inst<U32>(Opcode::BitwiseAnd32, a, b);
}
U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
return Inst<U32>(Opcode::BitwiseOr32, a, b);
U32U64 IREmitter::BitwiseOr(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
case Type::U32:
return Inst<U32>(Opcode::BitwiseOr32, a, b);
case Type::U64:
return Inst<U64>(Opcode::BitwiseOr64, a, b);
default:
ThrowInvalidType(a.Type());
}
}
U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
@ -1095,8 +1115,18 @@ U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) {
return Inst<U32>(Opcode::UClamp32, value, min, max);
}
U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
U1 IREmitter::ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::U32:
return Inst<U1>(is_signed ? Opcode::SLessThan32 : Opcode::ULessThan32, lhs, rhs);
case Type::U64:
return Inst<U1>(is_signed ? Opcode::SLessThan64 : Opcode::ULessThan64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
@ -1155,8 +1185,9 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F32F64& value) {
ThrowInvalidType(value.Type());
}
default:
UNREACHABLE_MSG("Invalid destination bitsize {}", bitsize);
break;
}
throw NotImplementedException("Invalid destination bitsize {}", bitsize);
}
U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) {
@ -1183,13 +1214,17 @@ F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Val
switch (src_bitsize) {
case 32:
return Inst<F32>(Opcode::ConvertF32S32, value);
default:
break;
}
break;
case 64:
switch (src_bitsize) {
case 32:
return Inst<F64>(Opcode::ConvertF64S32, value);
default:
break;
}
default:
break;
}
UNREACHABLE_MSG("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
@ -1203,13 +1238,17 @@ F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Val
return Inst<F32>(Opcode::ConvertF32U16, value);
case 32:
return Inst<F32>(Opcode::ConvertF32U32, value);
default:
break;
}
break;
case 64:
switch (src_bitsize) {
case 32:
return Inst<F64>(Opcode::ConvertF64U32, value);
default:
break;
}
default:
break;
}
UNREACHABLE_MSG("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
@ -1227,7 +1266,11 @@ U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
switch (value.Type()) {
case Type::U32:
return Inst<U16>(Opcode::ConvertU16U32, value);
default:
break;
}
default:
break;
}
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
@ -1238,13 +1281,17 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
switch (value.Type()) {
case Type::F32:
return Inst<F16>(Opcode::ConvertF16F32, value);
default:
break;
}
break;
case 32:
switch (value.Type()) {
case Type::F16:
return Inst<F32>(Opcode::ConvertF32F16, value);
default:
break;
}
default:
break;
}
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);

View file

@ -159,7 +159,7 @@ public:
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
[[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b);
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32U64 INeg(const U32U64& value);
[[nodiscard]] U32 IAbs(const U32& value);
@ -167,7 +167,7 @@ public:
[[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
[[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
[[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
[[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
[[nodiscard]] U32U64 BitwiseOr(const U32U64& a, const U32U64& b);
[[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
[[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
const U32& count);
@ -188,7 +188,7 @@ public:
[[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
[[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
[[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
[[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
[[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);

View file

@ -227,6 +227,7 @@ OPCODE(IAddCary32, U32x2, U32,
OPCODE(ISub32, U32, U32, U32, )
OPCODE(ISub64, U64, U64, U64, )
OPCODE(IMul32, U32, U32, U32, )
OPCODE(IMul64, U64, U64, U64, )
OPCODE(SMulExt, U32x2, U32, U32, )
OPCODE(UMulExt, U32x2, U32, U32, )
OPCODE(SDiv32, U32, U32, U32, )
@ -242,6 +243,7 @@ OPCODE(ShiftRightArithmetic32, U32, U32,
OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
OPCODE(BitwiseAnd32, U32, U32, U32, )
OPCODE(BitwiseOr32, U32, U32, U32, )
OPCODE(BitwiseOr64, U64, U64, U64, )
OPCODE(BitwiseXor32, U32, U32, U32, )
OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
@ -258,8 +260,10 @@ OPCODE(SMax32, U32, U32,
OPCODE(UMax32, U32, U32, U32, )
OPCODE(SClamp32, U32, U32, U32, U32, )
OPCODE(UClamp32, U32, U32, U32, U32, )
OPCODE(SLessThan, U1, U32, U32, )
OPCODE(ULessThan, U1, U32, U32, )
OPCODE(SLessThan32, U1, U32, U32, )
OPCODE(SLessThan64, U1, U64, U64, )
OPCODE(ULessThan32, U1, U32, U32, )
OPCODE(ULessThan64, U1, U64, U64, )
OPCODE(IEqual, U1, U32, U32, )
OPCODE(SLessThanEqual, U1, U32, U32, )
OPCODE(ULessThanEqual, U1, U32, U32, )

View file

@ -21,6 +21,8 @@ template <typename T>
return value.F32();
} else if constexpr (std::is_same_v<T, u64>) {
return value.U64();
} else if constexpr (std::is_same_v<T, s64>) {
return static_cast<s64>(value.U64());
}
}
@ -281,12 +283,18 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
return FoldLogicalOr(inst);
case IR::Opcode::LogicalNot:
return FoldLogicalNot(inst);
case IR::Opcode::SLessThan:
case IR::Opcode::SLessThan32:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
return;
case IR::Opcode::ULessThan:
case IR::Opcode::SLessThan64:
FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a < b; });
return;
case IR::Opcode::ULessThan32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
return;
case IR::Opcode::ULessThan64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; });
return;
case IR::Opcode::SLessThanEqual:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
return;

View file

@ -348,13 +348,15 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::GetThreadBitScalarReg:
case IR::Opcode::GetScalarRegister: {
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
inst.ReplaceUsesWith(
pass.ReadVariable(reg, block, opcode == IR::Opcode::GetThreadBitScalarReg));
const bool thread_bit = opcode == IR::Opcode::GetThreadBitScalarReg;
const IR::Value value = pass.ReadVariable(reg, block, thread_bit);
inst.ReplaceUsesWith(value);
break;
}
case IR::Opcode::GetVectorRegister: {
const IR::VectorReg reg{inst.Arg(0).VectorReg()};
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
const IR::Value value = pass.ReadVariable(reg, block);
inst.ReplaceUsesWith(value);
break;
}
case IR::Opcode::GetGotoVariable:

View file

@ -220,6 +220,7 @@ using F16 = TypedValue<Type::F16>;
using F32 = TypedValue<Type::F32>;
using F64 = TypedValue<Type::F64>;
using U32F32 = TypedValue<Type::U32 | Type::F32>;
using U64F64 = TypedValue<Type::U64 | Type::F64>;
using U32U64 = TypedValue<Type::U32 | Type::U64>;
using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
using F32F64 = TypedValue<Type::F32 | Type::F64>;