Merge branch 'sdl-basic-gamepad' of https://github.com/counter185/shadPS4 into sdl-basic-gamepad

This commit is contained in:
counter185 2024-08-13 10:26:52 +02:00
commit 285fb8fafd
58 changed files with 889 additions and 525 deletions

2
externals/ext-boost vendored

@ -1 +1 @@
Subproject commit 147b2de7734f5dc3b9aeb1f4135ae15fcd44b9d7
Subproject commit a04136add1e469f46d8ae8d3e8307779240a5c53

View file

@ -25,7 +25,9 @@ static bool shouldDumpPM4 = false;
static u32 vblankDivider = 1;
static bool vkValidation = false;
static bool vkValidationSync = false;
static bool vkValidationGpu = false;
static bool rdocEnable = false;
static bool rdocMarkersEnable = false;
// Gui
std::string settings_install_dir = "";
u32 main_window_geometry_x = 400;
@ -102,6 +104,10 @@ bool isRdocEnabled() {
return rdocEnable;
}
bool isMarkersEnabled() {
return rdocMarkersEnable;
}
u32 vblankDiv() {
return vblankDivider;
}
@ -114,6 +120,10 @@ bool vkValidationSyncEnabled() {
return vkValidationSync;
}
bool vkValidationGpuEnabled() {
return vkValidationGpu;
}
void setScreenWidth(u32 width) {
screenWidth = width;
}
@ -319,7 +329,9 @@ void load(const std::filesystem::path& path) {
gpuId = toml::find_or<int>(vk, "gpuId", -1);
vkValidation = toml::find_or<bool>(vk, "validation", false);
vkValidationSync = toml::find_or<bool>(vk, "validation_sync", false);
vkValidationGpu = toml::find_or<bool>(vk, "validation_gpu", true);
rdocEnable = toml::find_or<bool>(vk, "rdocEnable", false);
rdocMarkersEnable = toml::find_or<bool>(vk, "rdocMarkersEnable", false);
}
if (data.contains("Debug")) {
@ -394,7 +406,9 @@ void save(const std::filesystem::path& path) {
data["Vulkan"]["gpuId"] = gpuId;
data["Vulkan"]["validation"] = vkValidation;
data["Vulkan"]["validation_sync"] = vkValidationSync;
data["Vulkan"]["validation_gpu"] = vkValidationGpu;
data["Vulkan"]["rdocEnable"] = rdocEnable;
data["Vulkan"]["rdocMarkersEnable"] = rdocMarkersEnable;
data["Debug"]["DebugDump"] = isDebugDump;
data["LLE"]["libc"] = isLibc;
data["GUI"]["theme"] = mw_themes;

View file

@ -27,6 +27,7 @@ bool nullGpu();
bool dumpShaders();
bool dumpPM4();
bool isRdocEnabled();
bool isMarkersEnabled();
u32 vblankDiv();
void setDebugDump(bool enable);
@ -50,6 +51,7 @@ void setRdocEnabled(bool enable);
bool vkValidationEnabled();
bool vkValidationSyncEnabled();
bool vkValidationGpuEnabled();
// Gui
void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h);

View file

@ -454,8 +454,28 @@ void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32
#endif
}
void AddressSpace::Unmap(VAddr virtual_addr, size_t size, bool has_backing) {
return impl->Unmap(virtual_addr, size, has_backing);
void AddressSpace::Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma,
PAddr phys_base, bool is_exec, bool has_backing) {
#ifdef _WIN32
// There does not appear to be comparable support for partial unmapping on Windows.
// Unfortunately, a least one title was found to require this. The workaround is to unmap
// the entire allocation and remap the portions outside of the requested unmapping range.
impl->Unmap(virtual_addr, size, has_backing);
// TODO: Determine if any titles require partial unmapping support for flexible allocations.
ASSERT_MSG(has_backing || (start_in_vma == 0 && end_in_vma == size),
"Partial unmapping of flexible allocations is not supported");
if (start_in_vma != 0) {
Map(virtual_addr, start_in_vma, 0, phys_base, is_exec);
}
if (end_in_vma != size) {
Map(virtual_addr + end_in_vma, size - end_in_vma, 0, phys_base + end_in_vma, is_exec);
}
#else
impl->Unmap(virtual_addr + start_in_vma, end_in_vma - start_in_vma, has_backing);
#endif
}
void AddressSpace::Protect(VAddr virtual_addr, size_t size, MemoryPermission perms) {

View file

@ -91,7 +91,8 @@ public:
void* MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot, uintptr_t fd);
/// Unmaps specified virtual memory area.
void Unmap(VAddr virtual_addr, size_t size, bool has_backing);
void Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma,
PAddr phys_base, bool is_exec, bool has_backing);
void Protect(VAddr virtual_addr, size_t size, MemoryPermission perms);

View file

@ -54,6 +54,7 @@ std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory)
// If the path does not exist attempt to verify this.
// Retrieve parent path until we find one that exists.
std::scoped_lock lk{m_mutex};
path_parts.clear();
auto current_path = host_path;
while (!std::filesystem::exists(current_path)) {

View file

@ -956,9 +956,9 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
LOG_DEBUG(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK;
u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
LOG_TRACE(Lib_GnmDriver, "called");
return Config::isNeoMode() ? 911'000'000 : 800'000'000;
}
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() {
@ -1706,8 +1706,18 @@ int PS4_SYSV_ABI sceGnmSetupMipStatsReport() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceGnmSetVgtControl() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one,
u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode) {
LOG_TRACE(Lib_GnmDriver, "called");
if (!cmdbuf || size != 3 || (prim_group_sz_minus_one >= 0x100) ||
((wd_switch_only_on_eop_mode | partial_vs_wave_mode) >= 2)) {
return -1;
}
const u32 reg_value =
((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu);
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM
return ORBIS_OK;
}

View file

@ -85,7 +85,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp();
int PS4_SYSV_ABI sceGnmGetEqEventType();
int PS4_SYSV_ABI sceGnmGetEqTimeStamp();
int PS4_SYSV_ABI sceGnmGetGpuBlockStatus();
int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency();
u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency();
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus();
int PS4_SYSV_ABI sceGnmGetLastWaitedAddress();
int PS4_SYSV_ABI sceGnmGetNumTcaUnits();
@ -161,7 +161,8 @@ int PS4_SYSV_ABI sceGnmSetResourceUserData();
int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters();
int PS4_SYSV_ABI sceGnmSetSpiEnableSqCountersForUnitInstance();
int PS4_SYSV_ABI sceGnmSetupMipStatsReport();
int PS4_SYSV_ABI sceGnmSetVgtControl();
s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one,
u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode);
s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier);
int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier();
int PS4_SYSV_ABI sceGnmSetWaveLimitMultipliers();

View file

@ -212,9 +212,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* out_size) {
return ORBIS_OK;
}
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) {
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]) {
auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->SetHeapApiFunc(func);
linker->SetHeapAPI(func);
}
int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut,

View file

@ -98,7 +98,7 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void**
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
size_t infoSize);
s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* sizeOut);
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func);
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]);
int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut,
void** directMemoryStartOut,
void** directMemoryEndOut);

View file

@ -421,13 +421,21 @@ ScePthreadMutex* createMutex(ScePthreadMutex* addr) {
return addr;
}
int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* attr,
int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* mutex_attr,
const char* name) {
const ScePthreadMutexattr* attr;
if (mutex == nullptr) {
return SCE_KERNEL_ERROR_EINVAL;
}
if (attr == nullptr) {
if (mutex_attr == nullptr) {
attr = g_pthread_cxt->getDefaultMutexattr();
} else {
if (*mutex_attr == nullptr) {
attr = g_pthread_cxt->getDefaultMutexattr();
} else {
attr = mutex_attr;
}
}
*mutex = new PthreadMutexInternal{};

View file

@ -9,7 +9,6 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/kernel/thread_management.h"
#include "core/libraries/libs.h"
namespace Libraries::Kernel {
@ -82,7 +81,6 @@ public:
public:
struct WaitingThread : public ListBaseHook {
std::string name;
std::condition_variable cv;
u32 priority;
s32 need_count;
@ -90,7 +88,6 @@ public:
bool was_cancled{};
explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} {
name = scePthreadSelf()->name;
if (is_fifo) {
return;
}
@ -174,10 +171,16 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3
}
s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) {
if (!sem) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
return sem->Wait(true, needCount, pTimeout);
}
s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
if (!sem) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
if (!sem->Signal(signalCount)) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
@ -185,10 +188,16 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
}
s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) {
if (!sem) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
return sem->Wait(false, needCount, nullptr);
}
int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) {
if (!sem) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
return sem->Cancel(setCount, pNumWaitThreads);
}

View file

@ -305,7 +305,8 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) {
// Module was just loaded by above code. Allocate TLS block for it.
Module* module = m_modules[module_index - 1].get();
const u32 init_image_size = module->tls.init_image_size;
u8* dest = reinterpret_cast<u8*>(heap_api_func(module->tls.image_size));
// TODO: Determine if Windows will crash from this
u8* dest = reinterpret_cast<u8*>(heap_api->heap_malloc(module->tls.image_size));
const u8* src = reinterpret_cast<const u8*>(module->tls.image_virtual_addr);
std::memcpy(dest, src, init_image_size);
std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size);
@ -335,10 +336,23 @@ void Linker::InitTlsForThread(bool is_primary) {
&addr_out, tls_aligned, 3, 0, "SceKernelPrimaryTcbTls");
ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread");
} else {
if (heap_api_func) {
addr_out = heap_api_func(total_tls_size);
if (heap_api) {
#ifndef WIN32
addr_out = heap_api->heap_malloc(total_tls_size);
} else {
addr_out = std::malloc(total_tls_size);
#else
// TODO: Windows tls malloc replacement, refer to rtld_tls_block_malloc
LOG_ERROR(Core_Linker, "TLS user malloc called, using std::malloc");
addr_out = std::malloc(total_tls_size);
if (!addr_out) {
auto pth_id = pthread_self();
auto handle = pthread_gethandle(pth_id);
ASSERT_MSG(addr_out,
"Cannot allocate TLS block defined for handle=%x, index=%d size=%d",
handle, pth_id, total_tls_size);
}
#endif
}
}

View file

@ -46,7 +46,21 @@ struct EntryParams {
const char* argv[3];
};
using HeapApiFunc = PS4_SYSV_ABI void* (*)(size_t);
struct HeapAPI {
PS4_SYSV_ABI void* (*heap_malloc)(size_t);
PS4_SYSV_ABI void (*heap_free)(void*);
PS4_SYSV_ABI void* (*heap_calloc)(size_t, size_t);
PS4_SYSV_ABI void* (*heap_realloc)(void*, size_t);
PS4_SYSV_ABI void* (*heap_memalign)(size_t, size_t);
PS4_SYSV_ABI int (*heap_posix_memalign)(void**, size_t, size_t);
// NOTE: Fields below may be inaccurate
PS4_SYSV_ABI int (*heap_reallocalign)(void);
PS4_SYSV_ABI void (*heap_malloc_stats)(void);
PS4_SYSV_ABI int (*heap_malloc_stats_fast)(void);
PS4_SYSV_ABI size_t (*heap_malloc_usable_size)(void*);
};
using AppHeapAPI = HeapAPI*;
class Linker {
public:
@ -75,8 +89,8 @@ public:
}
}
void SetHeapApiFunc(void* func) {
heap_api_func = *reinterpret_cast<HeapApiFunc*>(func);
void SetHeapAPI(void* func[]) {
heap_api = reinterpret_cast<AppHeapAPI>(func);
}
void AdvanceGenerationCounter() noexcept {
@ -104,7 +118,7 @@ private:
size_t static_tls_size{};
u32 max_tls_index{};
u32 num_static_modules{};
HeapApiFunc heap_api_func{};
AppHeapAPI heap_api{};
std::vector<std::unique_ptr<Module>> m_modules;
Loader::SymbolsResolver m_hle_symbols{};
};

View file

@ -54,7 +54,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr;
// Add the allocated region to the list and commit its pages.
auto& area = CarveDmemArea(free_addr, size);
auto& area = CarveDmemArea(free_addr, size)->second;
area.memory_type = memory_type;
area.is_free = false;
return free_addr;
@ -63,9 +63,8 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
void MemoryManager::Free(PAddr phys_addr, size_t size) {
std::scoped_lock lk{mutex};
const auto dmem_area = FindDmemArea(phys_addr);
ASSERT(dmem_area != dmem_map.end() && dmem_area->second.base == phys_addr &&
dmem_area->second.size == size);
auto dmem_area = CarveDmemArea(phys_addr, size);
ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size);
// Release any dmem mappings that reference this physical block.
std::vector<std::pair<VAddr, u64>> remove_list;
@ -74,10 +73,11 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) {
continue;
}
if (mapping.phys_base <= phys_addr && phys_addr < mapping.phys_base + mapping.size) {
LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}", addr,
mapping.size);
auto vma_segment_start_addr = phys_addr - mapping.phys_base + addr;
LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}",
vma_segment_start_addr, size);
// Unmaping might erase from vma_map. We can't do it here.
remove_list.emplace_back(addr, mapping.size);
remove_list.emplace_back(vma_segment_start_addr, size);
}
}
for (const auto& [addr, size] : remove_list) {
@ -104,8 +104,6 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem
const auto& vma = FindVMA(mapped_addr)->second;
// If the VMA is mapped, unmap the region first.
if (vma.IsMapped()) {
ASSERT_MSG(vma.base == mapped_addr && vma.size == size,
"Region must match when reserving a mapped region");
UnmapMemory(mapped_addr, size);
}
const size_t remaining_size = vma.base + vma.size - mapped_addr;
@ -169,6 +167,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
new_vma.prot = prot;
new_vma.name = name;
new_vma.type = type;
new_vma.is_exec = is_exec;
if (type == VMAType::Direct) {
new_vma.phys_base = phys_addr;
@ -216,10 +215,16 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
std::scoped_lock lk{mutex};
const auto it = FindVMA(virtual_addr);
ASSERT_MSG(it->second.Contains(virtual_addr, size),
const auto& vma_base = it->second;
ASSERT_MSG(vma_base.Contains(virtual_addr, size),
"Existing mapping does not contain requested unmap range");
const auto type = it->second.type;
const auto vma_base_addr = vma_base.base;
const auto vma_base_size = vma_base.size;
const auto phys_base = vma_base.phys_base;
const bool is_exec = vma_base.is_exec;
const auto start_in_vma = virtual_addr - vma_base_addr;
const auto type = vma_base.type;
const bool has_backing = type == VMAType::Direct || type == VMAType::File;
if (type == VMAType::Direct) {
rasterizer->UnmapMemory(virtual_addr, size);
@ -239,7 +244,8 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
MergeAdjacent(vma_map, new_it);
// Unmap the memory region.
impl.Unmap(virtual_addr, size, has_backing);
impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec,
has_backing);
TRACK_FREE(virtual_addr, "VMEM");
}
@ -397,13 +403,12 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size
return vma_handle;
}
DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) {
MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) {
auto dmem_handle = FindDmemArea(addr);
ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map");
const DirectMemoryArea& area = dmem_handle->second;
ASSERT_MSG(area.is_free && area.base <= addr,
"Adding an allocation to already allocated region");
ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region");
const PAddr start_in_area = addr - area.base;
const PAddr end_in_vma = start_in_area + size;
@ -418,7 +423,7 @@ DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) {
dmem_handle = Split(dmem_handle, start_in_area);
}
return dmem_handle->second;
return dmem_handle;
}
MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, size_t offset_in_vma) {

View file

@ -84,6 +84,7 @@ struct VirtualMemoryArea {
bool disallow_merge = false;
std::string name = "";
uintptr_t fd = 0;
bool is_exec = false;
bool Contains(VAddr addr, size_t size) const {
return addr >= base && (addr + size) <= (base + this->size);
@ -205,7 +206,7 @@ private:
VMAHandle CarveVMA(VAddr virtual_addr, size_t size);
DirectMemoryArea& CarveDmemArea(PAddr addr, size_t size);
DMemHandle CarveDmemArea(PAddr addr, size_t size);
VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma);

View file

@ -128,11 +128,7 @@ Id EmitReadConst(EmitContext& ctx) {
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
}
const Id offset_dwords{ctx.OpShiftRightLogical(ctx.U32[1], buffer.offset, ctx.ConstU32(2U))};
index = ctx.OpIAdd(ctx.U32[1], index, offset_dwords);
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
}
@ -229,9 +225,6 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
template <u32 N>
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
}
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
if constexpr (N == 1) {
@ -386,19 +379,12 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
if (is_signed) {
value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset,
ctx.ConstU32(bit_width));
value = ctx.OpConvertSToF(ctx.F32[1], value);
} else {
value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset,
ctx.ConstU32(bit_width));
value = ctx.OpConvertUToF(ctx.F32[1], value);
}
} else {
if (is_signed) {
value = ctx.OpConvertSToF(ctx.F32[1], value);
} else {
value = ctx.OpConvertUToF(ctx.F32[1], value);
}
}
value = ctx.OpBitcast(ctx.F32[1], value);
return ConvertValue(ctx, value, num_format, bit_width);
}
break;
@ -411,9 +397,6 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
template <u32 N>
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
}
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
if constexpr (N == 1) {
return GetBufferFormatValue(ctx, handle, address, 0);
@ -445,9 +428,6 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad
template <u32 N>
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
}
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
if constexpr (N == 1) {

View file

@ -21,45 +21,72 @@ struct ImageOperands {
boost::container::static_vector<Id, 4> operands;
};
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
Id offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Offset, offset);
if (Sirit::ValidId(bias)) {
operands.Add(spv::ImageOperandsMask::Bias, bias);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
operands.operands);
}
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
Id offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords,
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
ImageOperands operands;
if (Sirit::ValidId(lod)) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
operands.operands);
}
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias_lc, const IR::Value& offset) {
Id bias, Id offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref);
ImageOperands operands;
if (Sirit::ValidId(bias)) {
operands.Add(spv::ImageOperandsMask::Bias, bias);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask,
operands.operands);
}
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias_lc, Id offset) {
Id lod, Id offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref,
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
ImageOperands operands;
if (Sirit::ValidId(lod)) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
}
if (Sirit::ValidId(offset)) {
operands.Add(spv::ImageOperandsMask::Offset, offset);
}
return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask,
operands.operands);
}
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2) {

View file

@ -357,14 +357,14 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value);
Id EmitConvertU16U32(EmitContext& ctx, Id value);
Id EmitConvertU32U16(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
Id offset);
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
Id offset);
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias_lc, const IR::Value& offset);
Id bias, Id offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias_lc, Id offset);
Id lod, Id offset);
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset,
Id offset2, Id dref);

View file

@ -6,7 +6,9 @@
namespace Shader::Backend::SPIRV {
void EmitPrologue(EmitContext& ctx) {}
void EmitPrologue(EmitContext& ctx) {
ctx.DefineBufferOffsets();
}
void EmitEpilogue(EmitContext& ctx) {}

View file

@ -165,14 +165,18 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
throw InvalidArgument("Invalid attribute type {}", fmt);
}
Id EmitContext::GetBufferOffset(u32 binding) {
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
void EmitContext::DefineBufferOffsets() {
for (auto& buffer : buffers) {
const u32 binding = buffer.binding;
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
}
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
@ -327,7 +331,9 @@ void EmitContext::DefineBuffers() {
for (u32 i = 0; const auto& buffer : info.buffers) {
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
const Id data_type = (*data_types)[1];
const Id record_array_type{TypeArray(data_type, ConstU32(buffer.length))};
const Id record_array_type{buffer.is_storage
? TypeRuntimeArray(data_type)
: TypeArray(data_type, ConstU32(buffer.length))};
const Id struct_type{TypeStruct(record_array_type)};
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
@ -354,7 +360,7 @@ void EmitContext::DefineBuffers() {
buffers.push_back({
.id = id,
.global_binding = binding++,
.binding = binding++,
.data_types = data_types,
.pointer_type = pointer_type,
.buffer = buffer.GetVsharp(info),

View file

@ -40,7 +40,7 @@ public:
~EmitContext();
Id Def(const IR::Value& value);
Id GetBufferOffset(u32 binding);
void DefineBufferOffsets();
[[nodiscard]] Id DefineInput(Id type, u32 location) {
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
@ -203,7 +203,8 @@ public:
struct BufferDefinition {
Id id;
Id offset;
u32 global_binding;
Id offset_dwords;
u32 binding;
const VectorIds* data_types;
Id pointer_type;
AmdGpu::Buffer buffer;

View file

@ -73,101 +73,190 @@ void Translator::EmitPrologue() {
}
}
template <>
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
IR::U32F32 value{};
template <typename T>
T Translator::GetSrc(const InstOperand& operand) {
constexpr bool is_float = std::is_same_v<T, IR::F32>;
const bool is_float = operand.type == ScalarType::Float32 || force_flt;
const auto get_imm = [&](auto value) -> T {
if constexpr (is_float) {
return ir.Imm32(std::bit_cast<float>(value));
} else {
return ir.Imm32(std::bit_cast<u32>(value));
}
};
T value{};
switch (operand.field) {
case OperandField::ScalarGPR:
if (is_float) {
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
} else {
value = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
}
value = ir.GetScalarReg<T>(IR::ScalarReg(operand.code));
break;
case OperandField::VectorGPR:
if (is_float) {
value = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
} else {
value = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
}
value = ir.GetVectorReg<T>(IR::VectorReg(operand.code));
break;
case OperandField::ConstZero:
if (is_float) {
value = ir.Imm32(0.f);
} else {
value = ir.Imm32(0U);
}
value = get_imm(0U);
break;
case OperandField::SignedConstIntPos:
ASSERT(!force_flt);
value = ir.Imm32(operand.code - SignedConstIntPosMin + 1);
value = get_imm(operand.code - SignedConstIntPosMin + 1);
break;
case OperandField::SignedConstIntNeg:
ASSERT(!force_flt);
value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1);
value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1);
break;
case OperandField::LiteralConst:
if (is_float) {
value = ir.Imm32(std::bit_cast<float>(operand.code));
} else {
value = ir.Imm32(operand.code);
}
value = get_imm(operand.code);
break;
case OperandField::ConstFloatPos_1_0:
if (is_float) {
value = ir.Imm32(1.f);
} else {
value = ir.Imm32(std::bit_cast<u32>(1.f));
}
value = get_imm(1.f);
break;
case OperandField::ConstFloatPos_0_5:
value = ir.Imm32(0.5f);
value = get_imm(0.5f);
break;
case OperandField::ConstFloatPos_2_0:
value = ir.Imm32(2.0f);
value = get_imm(2.0f);
break;
case OperandField::ConstFloatPos_4_0:
value = ir.Imm32(4.0f);
value = get_imm(4.0f);
break;
case OperandField::ConstFloatNeg_0_5:
value = ir.Imm32(-0.5f);
value = get_imm(-0.5f);
break;
case OperandField::ConstFloatNeg_1_0:
if (is_float) {
value = ir.Imm32(-1.0f);
} else {
value = ir.Imm32(std::bit_cast<u32>(-1.0f));
}
value = get_imm(-1.0f);
break;
case OperandField::ConstFloatNeg_2_0:
value = ir.Imm32(-2.0f);
value = get_imm(-2.0f);
break;
case OperandField::ConstFloatNeg_4_0:
value = ir.Imm32(-4.0f);
value = get_imm(-4.0f);
break;
case OperandField::VccLo:
if (force_flt) {
if constexpr (is_float) {
value = ir.BitCast<IR::F32>(ir.GetVccLo());
} else {
value = ir.GetVccLo();
}
break;
case OperandField::VccHi:
if (force_flt) {
if constexpr (is_float) {
value = ir.BitCast<IR::F32>(ir.GetVccHi());
} else {
value = ir.GetVccHi();
}
break;
case OperandField::M0:
return m0_value;
if constexpr (is_float) {
UNREACHABLE();
} else {
return m0_value;
}
default:
UNREACHABLE();
}
if (is_float) {
if constexpr (is_float) {
if (operand.input_modifier.abs) {
value = ir.FPAbs(value);
}
if (operand.input_modifier.neg) {
value = ir.FPNeg(value);
}
} else {
if (operand.input_modifier.abs) {
UNREACHABLE();
}
if (operand.input_modifier.neg) {
UNREACHABLE();
}
}
return value;
}
template IR::U32 Translator::GetSrc<IR::U32>(const InstOperand&);
template IR::F32 Translator::GetSrc<IR::F32>(const InstOperand&);
template <typename T>
T Translator::GetSrc64(const InstOperand& operand) {
constexpr bool is_float = std::is_same_v<T, IR::F64>;
const auto get_imm = [&](auto value) -> T {
if constexpr (is_float) {
return ir.Imm64(std::bit_cast<double>(value));
} else {
return ir.Imm64(std::bit_cast<u64>(value));
}
};
T value{};
switch (operand.field) {
case OperandField::ScalarGPR: {
const auto value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code));
const auto value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1));
if constexpr (is_float) {
UNREACHABLE();
} else {
value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi));
}
break;
}
case OperandField::VectorGPR: {
const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code));
const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1));
if constexpr (is_float) {
UNREACHABLE();
} else {
value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi));
}
break;
}
case OperandField::ConstZero:
value = get_imm(0ULL);
break;
case OperandField::SignedConstIntPos:
value = get_imm(s64(operand.code) - SignedConstIntPosMin + 1);
break;
case OperandField::SignedConstIntNeg:
value = get_imm(-s64(operand.code) + SignedConstIntNegMin - 1);
break;
case OperandField::LiteralConst:
value = get_imm(u64(operand.code));
break;
case OperandField::ConstFloatPos_1_0:
value = get_imm(1.0);
break;
case OperandField::ConstFloatPos_0_5:
value = get_imm(0.5);
break;
case OperandField::ConstFloatPos_2_0:
value = get_imm(2.0);
break;
case OperandField::ConstFloatPos_4_0:
value = get_imm(4.0);
break;
case OperandField::ConstFloatNeg_0_5:
value = get_imm(-0.5);
break;
case OperandField::ConstFloatNeg_1_0:
value = get_imm(-1.0);
break;
case OperandField::ConstFloatNeg_2_0:
value = get_imm(-2.0);
break;
case OperandField::ConstFloatNeg_4_0:
value = get_imm(-4.0);
break;
case OperandField::VccLo:
if constexpr (is_float) {
UNREACHABLE();
} else {
value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi()));
}
break;
case OperandField::VccHi:
default:
UNREACHABLE();
}
if constexpr (is_float) {
if (operand.input_modifier.abs) {
value = ir.FPAbs(value);
}
@ -178,148 +267,8 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
return value;
}
template <>
IR::U32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
return GetSrc<IR::U32F32>(operand, force_flt);
}
template <>
IR::F32 Translator::GetSrc(const InstOperand& operand, bool) {
return GetSrc<IR::U32F32>(operand, true);
}
template <>
IR::U64F64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
IR::Value value_hi{};
IR::Value value_lo{};
bool immediate = false;
const bool is_float = operand.type == ScalarType::Float64 || force_flt;
switch (operand.field) {
case OperandField::ScalarGPR:
if (is_float) {
value_lo = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
value_hi = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code + 1));
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
value_lo = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
value_hi = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code + 1));
} else {
UNREACHABLE();
}
break;
case OperandField::VectorGPR:
if (is_float) {
value_lo = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
value_hi = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code + 1));
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
value_lo = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
value_hi = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code + 1));
} else {
UNREACHABLE();
}
break;
case OperandField::ConstZero:
immediate = true;
if (force_flt) {
value_lo = ir.Imm64(0.0);
} else {
value_lo = ir.Imm64(u64(0U));
}
break;
case OperandField::SignedConstIntPos:
ASSERT(!force_flt);
immediate = true;
value_lo = ir.Imm64(s64(operand.code) - SignedConstIntPosMin + 1);
break;
case OperandField::SignedConstIntNeg:
ASSERT(!force_flt);
immediate = true;
value_lo = ir.Imm64(-s64(operand.code) + SignedConstIntNegMin - 1);
break;
case OperandField::LiteralConst:
immediate = true;
if (force_flt) {
UNREACHABLE(); // There is a literal double?
} else {
value_lo = ir.Imm64(u64(operand.code));
}
break;
case OperandField::ConstFloatPos_1_0:
immediate = true;
if (force_flt) {
value_lo = ir.Imm64(1.0);
} else {
value_lo = ir.Imm64(std::bit_cast<u64>(f64(1.0)));
}
break;
case OperandField::ConstFloatPos_0_5:
immediate = true;
value_lo = ir.Imm64(0.5);
break;
case OperandField::ConstFloatPos_2_0:
immediate = true;
value_lo = ir.Imm64(2.0);
break;
case OperandField::ConstFloatPos_4_0:
immediate = true;
value_lo = ir.Imm64(4.0);
break;
case OperandField::ConstFloatNeg_0_5:
immediate = true;
value_lo = ir.Imm64(-0.5);
break;
case OperandField::ConstFloatNeg_1_0:
immediate = true;
value_lo = ir.Imm64(-1.0);
break;
case OperandField::ConstFloatNeg_2_0:
immediate = true;
value_lo = ir.Imm64(-2.0);
break;
case OperandField::ConstFloatNeg_4_0:
immediate = true;
value_lo = ir.Imm64(-4.0);
break;
case OperandField::VccLo: {
value_lo = ir.GetVccLo();
value_hi = ir.GetVccHi();
} break;
case OperandField::VccHi:
UNREACHABLE();
default:
UNREACHABLE();
}
IR::Value value;
if (immediate) {
value = value_lo;
} else if (is_float) {
throw NotImplementedException("required OpPackDouble2x32 implementation");
} else {
IR::Value packed = ir.CompositeConstruct(value_lo, value_hi);
value = ir.PackUint2x32(packed);
}
if (is_float) {
if (operand.input_modifier.abs) {
value = ir.FPAbs(IR::F32F64(value));
}
if (operand.input_modifier.neg) {
value = ir.FPNeg(IR::F32F64(value));
}
}
return IR::U64F64(value);
}
template <>
IR::U64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
return GetSrc64<IR::U64F64>(operand, force_flt);
}
template <>
IR::F64 Translator::GetSrc64(const InstOperand& operand, bool) {
return GetSrc64<IR::U64F64>(operand, true);
}
template IR::U64 Translator::GetSrc64<IR::U64>(const InstOperand&);
template IR::F64 Translator::GetSrc64<IR::F64>(const InstOperand&);
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
IR::U32F32 result = value;

View file

@ -211,10 +211,10 @@ public:
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
private:
template <typename T = IR::U32F32>
[[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false);
template <typename T = IR::U64F64>
[[nodiscard]] T GetSrc64(const InstOperand& operand, bool flt_zero = false);
template <typename T = IR::U32>
[[nodiscard]] T GetSrc(const InstOperand& operand);
template <typename T = IR::U64>
[[nodiscard]] T GetSrc64(const InstOperand& operand);
void SetDst(const InstOperand& operand, const IR::U32F32& value);
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);

View file

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/profile.h"
namespace Shader::Gcn {
@ -312,7 +311,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
}
void Translator::V_MOV(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc(inst.src[0]));
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
}
void Translator::V_SAD(const GcnInst& inst) {
@ -321,14 +320,14 @@ void Translator::V_SAD(const GcnInst& inst) {
}
void Translator::V_MAC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true),
GetSrc(inst.dst[0], true)));
SetDst(inst.dst[0], ir.FPFma(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]),
GetSrc<IR::F32>(inst.dst[0])));
}
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value vec_f32 =
ir.CompositeConstruct(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true));
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
}
@ -339,13 +338,13 @@ void Translator::V_CVT_F32_F16(const GcnInst& inst) {
}
void Translator::V_CVT_F16_F32(const GcnInst& inst) {
const IR::F32 src0 = GetSrc(inst.src[0], true);
const IR::F32 src0 = GetSrc<IR::F32>(inst.src[0]);
const IR::F16 src0fp16 = ir.FPConvert(16, src0);
SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast<IR::U16>(src0fp16)));
}
void Translator::V_MUL_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
SetDst(inst.dst[0], ir.FPMul(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1])));
}
void Translator::V_CNDMASK_B32(const GcnInst& inst) {
@ -354,24 +353,8 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR
? ir.GetThreadBitScalarReg(flag_reg)
: ir.GetVcc();
// We can treat the instruction as integer most of the time, but when a source is
// a floating point constant we will force the other as float for better readability
// The other operand is also higly likely to be float as well.
const auto is_float_const = [](OperandField field) {
return field >= OperandField::ConstFloatPos_0_5 && field <= OperandField::ConstFloatNeg_4_0;
};
const bool has_flt_source =
is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field);
IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source);
if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) {
src1 = ir.BitCast<IR::F32, IR::U32>(src1);
}
if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) {
src0 = ir.BitCast<IR::F32, IR::U32>(src0);
}
const IR::Value result = ir.Select(flag, src1, src0);
const IR::Value result =
ir.Select(flag, GetSrc<IR::F32>(inst.src[1]), GetSrc<IR::F32>(inst.src[0]));
ir.SetVectorReg(dst_reg, IR::U32F32{result});
}
@ -448,21 +431,21 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) {
}
void Translator::V_MAD_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src2{GetSrc(inst.src[2], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
}
void Translator::V_FRACT_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.Fract(src0));
}
void Translator::V_ADD_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
}
@ -476,9 +459,9 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
}
void Translator::V_MED3_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src2{GetSrc(inst.src[2], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
}
@ -492,32 +475,32 @@ void Translator::V_MED3_I32(const GcnInst& inst) {
}
void Translator::V_FLOOR_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
}
void Translator::V_SUB_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
SetDst(inst.dst[0], ir.FPSub(src0, src1));
}
void Translator::V_RCP_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPRecip(src0));
}
void Translator::V_FMA_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src2{GetSrc(inst.src[2], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
}
void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::U1 result = [&] {
switch (op) {
case ConditionOp::F:
@ -557,8 +540,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
}
void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy));
}
@ -569,40 +552,40 @@ void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) {
}
void Translator::V_RSQ_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
}
void Translator::V_SIN_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPSin(src0));
}
void Translator::V_LOG_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPLog2(src0));
}
void Translator::V_EXP_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPExp2(src0));
}
void Translator::V_SQRT_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPSqrt(src0));
}
void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy));
}
void Translator::V_MIN3_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src2{GetSrc(inst.src[2], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
}
@ -614,9 +597,9 @@ void Translator::V_MIN3_I32(const GcnInst& inst) {
}
void Translator::V_MADMK_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 k{GetSrc(inst.src[2], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 k{GetSrc<IR::F32>(inst.src[2])};
SetDst(inst.dst[0], ir.FPFma(src0, k, src1));
}
@ -625,25 +608,25 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
}
void Translator::V_CUBESC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc(inst.src[0], true));
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
}
void Translator::V_CUBETC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc(inst.src[1], true));
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1]));
}
void Translator::V_CUBEID_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc(inst.src[2], true));
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2]));
}
void Translator::V_CVT_U32_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.ConvertFToU(32, src0));
}
void Translator::V_SUBREV_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
SetDst(inst.dst[0], ir.FPSub(src1, src0));
}
@ -727,9 +710,17 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
const IR::U32 max{ir.IMax(src0, src1, false)};
const IR::U32 min{ir.IMin(src0, src1, false)};
SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2));
IR::U32 result;
if (src0.IsImmediate() && src0.U32() == 0U) {
result = src1;
} else if (src1.IsImmediate() && src1.U32() == 0U) {
result = src0;
} else {
const IR::U32 max{ir.IMax(src0, src1, false)};
const IR::U32 min{ir.IMin(src0, src1, false)};
result = ir.ISub(max, min);
}
SetDst(inst.dst[0], ir.IAdd(result, src2));
}
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
@ -783,7 +774,7 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
}
void Translator::V_RNDNE_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPRoundEven(src0));
}
@ -794,14 +785,14 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
}
void Translator::V_COS_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPCos(src0));
}
void Translator::V_MAX3_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src2{GetSrc(inst.src[2], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
}
@ -813,7 +804,7 @@ void Translator::V_MAX3_U32(const GcnInst& inst) {
}
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
}
@ -830,12 +821,12 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) {
}
void Translator::V_TRUNC_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPTrunc(src0));
}
void Translator::V_CEIL_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPCeil(src0));
}
@ -899,18 +890,18 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
}
void Translator::V_LDEXP_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
}
void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
}
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
const IR::F32F64 src0{GetSrc(inst.src[0])};
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
IR::U1 value;
if (src1.IsImmediate()) {

View file

@ -135,8 +135,8 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
// Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction
// Set Architecture
const IR::Value offset =
flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{};
const IR::U32 offset =
flags.test(MimgModifier::Offset) ? ir.GetVectorReg<IR::U32>(addr_reg++) : IR::U32{};
const IR::F32 bias =
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
const IR::F32 dref =
@ -168,18 +168,17 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
// Issue IR instruction, leaving unknown fields blank to patch later.
const IR::Value texel = [&]() -> IR::Value {
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
if (!flags.test(MimgModifier::Pcf)) {
if (explicit_lod) {
return ir.ImageSampleExplicitLod(handle, body, lod, offset, info);
return ir.ImageSampleExplicitLod(handle, body, offset, info);
} else {
return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, info);
return ir.ImageSampleImplicitLod(handle, body, bias, offset, info);
}
}
if (explicit_lod) {
return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, info);
return ir.ImageSampleDrefExplicitLod(handle, body, dref, offset, info);
}
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, info);
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, info);
}();
for (u32 i = 0; i < 4; i++) {

View file

@ -16,18 +16,6 @@ namespace {
UNREACHABLE_MSG("Invalid type = {}, functionName = {}, line = {}", u32(type), functionName,
lineNumber);
}
Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
return ir.CompositeConstruct(bias_lod, lod_clamp);
} else if (!bias_lod.IsEmpty()) {
return bias_lod;
} else if (!lod_clamp.IsEmpty()) {
return lod_clamp;
} else {
return Value{};
}
}
} // Anonymous namespace
U1 IREmitter::Imm1(bool value) const {
@ -1386,30 +1374,26 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
const Value& offset, const F32& lod_clamp,
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& body, const F32& bias,
const U32& offset, TextureInstInfo info) {
return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, body, bias, offset);
}
Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& body, const U32& offset,
TextureInstInfo info) {
const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, coords, bias_lc, offset);
return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, body, IR::F32{}, offset);
}
Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
const Value& offset, TextureInstInfo info) {
return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, coords, lod, offset);
}
F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
const F32& bias, const Value& offset,
const F32& lod_clamp, TextureInstInfo info) {
const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
return Inst<F32>(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, coords, dref, bias_lc,
F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref,
const F32& bias, const U32& offset,
TextureInstInfo info) {
return Inst<F32>(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, body, dref, bias,
offset);
}
F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
const F32& lod, const Value& offset,
TextureInstInfo info) {
return Inst<F32>(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, coords, dref, lod,
F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body, const F32& dref,
const U32& offset, TextureInstInfo info) {
return Inst<F32>(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, body, dref, IR::F32{},
offset);
}

View file

@ -241,19 +241,21 @@ public:
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
const F32& bias, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);
[[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
const F32& lod, const Value& offset,
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body,
const F32& bias, const U32& offset,
TextureInstInfo info);
[[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
[[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& body,
const U32& offset, TextureInstInfo info);
[[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& body,
const F32& dref, const F32& bias,
const Value& offset, const F32& lod_clamp,
const U32& offset, TextureInstInfo info);
[[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& body,
const F32& dref, const U32& offset,
TextureInstInfo info);
[[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
const F32& dref, const F32& lod,
const Value& offset, TextureInstInfo info);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
const IR::U1& skip_mips);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,

View file

@ -298,10 +298,10 @@ OPCODE(ConvertU16U32, U16, U32,
OPCODE(ConvertU32U16, U32, U16, )
// Image operations
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, U32, )
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, U32, )
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, U32, )
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, U32, )
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )

View file

@ -376,9 +376,11 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
return -1;
}
// We have found this pattern. Build the sharp.
std::array<u64, 2> buffer;
std::array<u32, 4> buffer;
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32;
buffer[1] = 0;
buffer[2] = handle->Arg(2).U32();
buffer[3] = handle->Arg(3).U32();
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
// Assign a binding to this sharp.
return descriptors.Add(BufferResource{
@ -565,25 +567,47 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
if (inst_info.has_offset) {
// The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16].
const bool is_gather = inst.GetOpcode() == IR::Opcode::ImageGather ||
inst.GetOpcode() == IR::Opcode::ImageGatherDref;
const u32 arg_pos = is_gather ? 2 : (inst_info.is_depth ? 4 : 3);
const u32 arg_pos = [&]() -> u32 {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageGather:
case IR::Opcode::ImageGatherDref:
return 2;
case IR::Opcode::ImageSampleExplicitLod:
case IR::Opcode::ImageSampleImplicitLod:
return 3;
case IR::Opcode::ImageSampleDrefExplicitLod:
case IR::Opcode::ImageSampleDrefImplicitLod:
return 4;
default:
break;
}
return inst_info.is_depth ? 4 : 3;
}();
const IR::Value arg = inst.Arg(arg_pos);
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); };
union {
u32 raw;
BitField<0, 6, u32> x;
BitField<8, 6, u32> y;
BitField<16, 6, u32> z;
} offset{arg.U32()};
const IR::Value value = ir.CompositeConstruct(sign_ext(offset.x), sign_ext(offset.y));
const auto f = [&](IR::Value value, u32 offset) -> auto {
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
};
const auto x = f(arg, 0);
const auto y = f(arg, 8);
const auto z = f(arg, 16);
const IR::Value value = ir.CompositeConstruct(x, y, z);
inst.SetArg(arg_pos, value);
}
if (inst_info.has_lod_clamp) {
// Final argument contains lod_clamp
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
const u32 arg_pos = [&]() -> u32 {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageSampleImplicitLod:
return 2;
case IR::Opcode::ImageSampleDrefImplicitLod:
return 3;
default:
break;
}
return inst_info.is_depth ? 5 : 4;
}();
inst.SetArg(arg_pos, arg);
}
if (inst_info.explicit_lod) {
@ -591,7 +615,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ||
inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod);
const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3;
inst.SetArg(pos, arg);
const IR::Value value = inst_info.force_level0 ? ir.Imm32(0.f) : arg;
inst.SetArg(pos, value);
}
}

View file

@ -116,7 +116,7 @@ struct PushData {
std::array<u8, 32> buf_offsets;
void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 64 && binding < 32);
ASSERT(offset < 256 && binding < buf_offsets.size());
buf_offsets[binding] = offset;
}
};

View file

@ -180,6 +180,17 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
break;
}
case PM4CmdNop::PayloadType::DebugMarkerPush: {
const auto marker_sz = nop->header.count.Value() * 2;
const std::string_view label{reinterpret_cast<const char*>(&nop->data_block[1]),
marker_sz};
rasterizer->ScopeMarkerBegin(label);
break;
}
case PM4CmdNop::PayloadType::DebugMarkerPop: {
rasterizer->ScopeMarkerEnd();
break;
}
default:
break;
}
@ -226,7 +237,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
ASSERT(col_buf_id < NumColorBuffers);
const auto nop_offset = header->type3.count;
if (nop_offset == 0x0e || nop_offset == 0x0d) {
if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
"NOP hint is missing in CB setup sequence");
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
@ -295,8 +306,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) {
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DrawIndex2", reinterpret_cast<const void*>(dcb.data())));
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->Draw(true);
rasterizer->ScopeMarkerEnd();
}
@ -308,8 +320,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = draw_index_off->index_count;
regs.draw_initiator = draw_index_off->draw_initiator;
if (rasterizer) {
rasterizer->ScopeMarkerBegin(fmt::format(
"dcb:{}:DrawIndexOffset2", reinterpret_cast<const void*>(dcb.data())));
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexOffset2", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->Draw(true, draw_index_off->index_offset);
rasterizer->ScopeMarkerEnd();
}
@ -320,8 +333,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) {
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DrawIndexAuto", reinterpret_cast<const void*>(dcb.data())));
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->Draw(false);
rasterizer->ScopeMarkerEnd();
}
@ -334,8 +348,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.cs_program.dim_z = dispatch_direct->dim_z;
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:Dispatch", reinterpret_cast<const void*>(dcb.data())));
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->DispatchDirect();
rasterizer->ScopeMarkerEnd();
}
@ -486,8 +501,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
regs.cs_program.dim_z = dispatch_direct->dim_z;
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
rasterizer->ScopeMarkerBegin(fmt::format(
"acb[{}]:{}:Dispatch", vqid, reinterpret_cast<const void*>(acb.data())));
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->DispatchDirect();
rasterizer->ScopeMarkerEnd();
}

View file

@ -766,7 +766,8 @@ struct Liverpool {
}
TilingMode GetTilingMode() const {
return attrib.tile_mode_index;
return info.linear_general ? TilingMode::Display_Linear
: attrib.tile_mode_index.Value();
}
bool IsTiled() const {

View file

@ -282,6 +282,13 @@ enum class InterruptSelect : u32 {
IrqUndocumented = 3,
};
static u64 GetGpuClock64() {
auto now = std::chrono::high_resolution_clock::now();
auto duration = now.time_since_epoch();
auto ticks = std::chrono::duration_cast<std::chrono::nanoseconds>(duration).count();
return static_cast<u64>(ticks);
}
struct PM4CmdEventWriteEop {
PM4Type3Header header;
union {
@ -325,6 +332,10 @@ struct PM4CmdEventWriteEop {
*Address<u64>() = DataQWord();
break;
}
case DataSelect::GpuClock64: {
*Address<u64>() = GetGpuClock64();
break;
}
case DataSelect::PerfCounter: {
*Address<u64>() = Common::FencedRDTSC();
break;
@ -652,13 +663,6 @@ struct PM4CmdReleaseMem {
return data_lo | u64(data_hi) << 32;
}
uint64_t GetGpuClock64() const {
auto now = std::chrono::high_resolution_clock::now();
auto duration = now.time_since_epoch();
auto ticks = std::chrono::duration_cast<std::chrono::nanoseconds>(duration).count();
return static_cast<uint64_t>(ticks);
}
void SignalFence(Platform::InterruptId irq_id) const {
switch (data_sel.Value()) {
case DataSelect::Data32Low: {

View file

@ -41,6 +41,7 @@ enum class PM4ItOpcode : u32 {
CondIndirectBuffer = 0x3F,
CopyData = 0x40,
CommandProcessorDma = 0x41,
PfpSyncMe = 0x42,
SurfaceSync = 0x43,
CondWrite = 0x45,
EventWrite = 0x46,

View file

@ -106,10 +106,8 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_
VmaAllocationInfo alloc_info{};
buffer.Create(buffer_ci, usage, &alloc_info);
if (instance->HasDebuggingToolAttached()) {
const auto device = instance->GetDevice();
Vulkan::SetObjectName(device, Handle(), "Buffer {:#x} {} KiB", cpu_addr, size_bytes / 1024);
}
const auto device = instance->GetDevice();
Vulkan::SetObjectName(device, Handle(), "Buffer {:#x}:{:#x}", cpu_addr, size_bytes);
// Map it if it is host visible.
VkMemoryPropertyFlags property_flags{};
@ -152,10 +150,8 @@ StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler&
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
const auto device = instance.GetDevice();
if (instance.HasDebuggingToolAttached()) {
Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}): {} KiB", BufferTypeName(usage),
size_bytes / 1024);
}
Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}):{:#x}", BufferTypeName(usage),
size_bytes);
}
std::pair<u8*, u64> StreamBuffer::Map(u64 size, u64 alignment) {

View file

@ -146,6 +146,10 @@ public:
return offset;
}
u64 GetFreeSize() const {
return size_bytes - offset - mapped_size;
}
private:
struct Watch {
u64 tick{};

View file

@ -87,6 +87,15 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
}
bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
SCOPE_EXIT {
if (instance.IsVertexInputDynamicState()) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.setVertexInputEXT(bindings, attributes);
}
};
if (vs_info.vs_inputs.empty()) {
return false;
}
@ -122,6 +131,21 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
}
guest_buffers.emplace_back(buffer);
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format =
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0,
});
bindings.push_back({
.binding = input.binding,
.stride = buffer.GetStride(),
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None
? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
.divisor = 1,
});
}
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
@ -224,6 +248,19 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
return {&buffer, buffer.Offset(device_addr)};
}
std::pair<const Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) {
const u64 page = gpu_addr >> CACHING_PAGEBITS;
const BufferId buffer_id = page_table[page];
if (buffer_id) {
const Buffer& buffer = slot_buffers[buffer_id];
if (buffer.IsInBounds(gpu_addr, size)) {
return {&buffer, buffer.Offset(gpu_addr)};
}
}
const u32 offset = staging_buffer.Copy(gpu_addr, size, 16);
return {&staging_buffer, offset};
}
bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
const VAddr end_addr = addr + size;
const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
@ -248,6 +285,10 @@ bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
return memory_tracker.IsRegionCpuModified(addr, size);
}
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
return memory_tracker.IsRegionGpuModified(addr, size);
}
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
if (device_addr == 0) {
return NULL_BUFFER_ID;

View file

@ -69,12 +69,18 @@ public:
/// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written);
/// Obtains a temporary buffer for usage in texture cache.
[[nodiscard]] std::pair<const Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size);
/// Return true when a region is registered on the cache
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
private:
template <typename Func>
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {

View file

@ -47,7 +47,7 @@ public:
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address, bool is_eop) {
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(info, false);
const auto image_id = texture_cache.FindImage(info);
auto& image = texture_cache.GetImage(image_id);
return PrepareFrameInternal(image, is_eop);
}
@ -61,7 +61,7 @@ public:
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
vo_buffers_addr.emplace_back(cpu_address);
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(info, false);
const auto image_id = texture_cache.FindImage(info);
return texture_cache.GetImage(image_id);
}

View file

@ -96,7 +96,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
Shader::PushData push_data{};
u32 binding{};
for (u32 i = 0; const auto& buffer : info.buffers) {
for (const auto& buffer : info.buffers) {
const auto vsharp = buffer.GetVsharp(info);
const VAddr address = vsharp.base_address;
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
@ -115,7 +115,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
}
const u32 size = vsharp.GetSize();
if (buffer.is_written) {
texture_cache.InvalidateMemory(address, size);
texture_cache.InvalidateMemory(address, size, true);
}
const u32 alignment =
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
@ -137,7 +137,6 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
i++;
}
for (const auto& image_desc : info.images) {

View file

@ -145,6 +145,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT);
dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT);
}
if (instance.IsVertexInputDynamicState()) {
dynamic_states.push_back(vk::DynamicState::eVertexInputEXT);
}
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),

View file

@ -8,6 +8,7 @@
#include <fmt/ranges.h>
#include "common/assert.h"
#include "common/config.h"
#include "sdl_window.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
@ -163,7 +164,8 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceVulkan12Features,
vk::PhysicalDeviceVulkan13Features,
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR,
vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
vk::PhysicalDeviceDepthClipControlFeaturesEXT, vk::PhysicalDeviceRobustness2FeaturesEXT,
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
const vk::StructureChain properties_chain = physical_device.getProperties2<
vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>();
@ -197,10 +199,12 @@ bool Instance::CreateDevice() {
external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
const bool depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
workgroup_memory_explicit_layout =
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
// The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
@ -210,9 +214,21 @@ bool Instance::CreateDevice() {
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions.
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME);
const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
if (has_sync2) {
has_nv_checkpoints = Config::isMarkersEnabled()
? add_extension(VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME)
: false;
}
#ifdef __APPLE__
// Required by Vulkan spec if supported.
add_extension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME);
#endif
const auto family_properties = physical_device.getQueueFamilyProperties();
if (family_properties.empty()) {
@ -308,21 +324,51 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceRobustness2FeaturesEXT{
.nullDescriptor = true,
},
vk::PhysicalDeviceSynchronization2Features{
.synchronization2 = true,
},
vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{
.vertexInputDynamicState = true,
},
#ifdef __APPLE__
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
#endif
};
if (!maintenance4) {
device_chain.unlink<vk::PhysicalDeviceMaintenance4FeaturesKHR>();
}
if (!custom_border_color) {
device_chain.unlink<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>();
}
if (!color_write_en) {
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
}
if (!robustness) {
if (!depth_clip_control) {
device_chain.unlink<vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
}
if (!workgroup_memory_explicit_layout) {
device_chain.unlink<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
}
if (robustness) {
device_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>().nullDescriptor =
feature_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>().nullDescriptor;
} else {
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
}
if (!vertex_input_dynamic_state) {
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
}
try {
device = physical_device.createDeviceUnique(device_chain.get());
} catch (vk::ExtensionNotPresentError& err) {
LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available {}", err.what());
return false;
} catch (vk::FeatureNotPresentError& err) {
LOG_CRITICAL(Render_Vulkan, "Some required features are not available {}", err.what());
return false;
}
VULKAN_HPP_DEFAULT_DISPATCHER.init(*device);

View file

@ -88,6 +88,10 @@ public:
return profiler_context;
}
bool HasNvCheckpoints() const {
return has_nv_checkpoints;
}
/// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics;
@ -128,6 +132,11 @@ public:
return color_write_en;
}
/// Returns true when VK_EXT_vertex_input_dynamic_state is supported.
bool IsVertexInputDynamicState() const {
return vertex_input_dynamic_state;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return properties.vendorID;
@ -253,12 +262,14 @@ private:
bool external_memory_host{};
bool workgroup_memory_explicit_layout{};
bool color_write_en{};
bool vertex_input_dynamic_state{};
u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{};
bool tooling_info{};
bool debug_utils_supported{};
bool has_nsight_graphics{};
bool has_renderdoc{};
bool has_nv_checkpoints{};
};
} // namespace Vulkan

View file

@ -209,6 +209,10 @@ void PipelineCache::RefreshGraphicsKey() {
continue;
}
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
if (!bininfo->Valid()) {
key.stage_hashes[i] = 0;
continue;
}
key.stage_hashes[i] = bininfo->shader_hash;
}
}

View file

@ -157,6 +157,10 @@ std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window
break;
}
#ifdef __APPLE__
extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
#endif
if (window_type != Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
@ -221,12 +225,61 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT
vk::Bool32 enable_sync =
enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False;
vk::LayerSettingEXT layer_set = {
.pLayerName = VALIDATION_LAYER_NAME,
.pSettingName = "validate_sync",
.type = vk::LayerSettingTypeEXT::eBool32,
.valueCount = 1,
.pValues = &enable_sync,
vk::Bool32 enable_gpuav =
enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False;
const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled()
? "GPU_BASED_GPU_ASSISTED"
: "GPU_BASED_NONE";
const std::array layer_setings = {
vk::LayerSettingEXT{
.pLayerName = VALIDATION_LAYER_NAME,
.pSettingName = "validate_sync",
.type = vk::LayerSettingTypeEXT::eBool32,
.valueCount = 1,
.pValues = &enable_sync,
},
vk::LayerSettingEXT{
.pLayerName = VALIDATION_LAYER_NAME,
.pSettingName = "sync_queue_submit",
.type = vk::LayerSettingTypeEXT::eBool32,
.valueCount = 1,
.pValues = &enable_sync,
},
vk::LayerSettingEXT{
.pLayerName = VALIDATION_LAYER_NAME,
.pSettingName = "validate_gpu_based",
.type = vk::LayerSettingTypeEXT::eString,
.valueCount = 1,
.pValues = &gpuav_mode,
},
vk::LayerSettingEXT{
.pLayerName = VALIDATION_LAYER_NAME,
.pSettingName = "gpuav_reserve_binding_slot",
.type = vk::LayerSettingTypeEXT::eBool32,
.valueCount = 1,
.pValues = &enable_gpuav,
},
vk::LayerSettingEXT{
.pLayerName = VALIDATION_LAYER_NAME,
.pSettingName = "gpuav_descriptor_checks",
.type = vk::LayerSettingTypeEXT::eBool32,
.valueCount = 1,
.pValues = &enable_gpuav,
},
vk::LayerSettingEXT{
.pLayerName = VALIDATION_LAYER_NAME,
.pSettingName = "gpuav_validate_indirect_buffer",
.type = vk::LayerSettingTypeEXT::eBool32,
.valueCount = 1,
.pValues = &enable_gpuav,
},
vk::LayerSettingEXT{
.pLayerName = VALIDATION_LAYER_NAME,
.pSettingName = "gpuav_buffer_copies",
.type = vk::LayerSettingTypeEXT::eBool32,
.valueCount = 1,
.pValues = &enable_gpuav,
},
};
vk::StructureChain<vk::InstanceCreateInfo, vk::LayerSettingsCreateInfoEXT> instance_ci_chain = {
@ -236,10 +289,13 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT
.ppEnabledLayerNames = layers.data(),
.enabledExtensionCount = static_cast<u32>(extensions.size()),
.ppEnabledExtensionNames = extensions.data(),
#ifdef __APPLE__
.flags = vk::InstanceCreateFlagBits::eEnumeratePortabilityKHR,
#endif
},
vk::LayerSettingsCreateInfoEXT{
.settingCount = 1,
.pSettings = &layer_set,
.settingCount = layer_setings.size(),
.pSettings = layer_setings.data(),
},
};

View file

@ -152,7 +152,8 @@ void Rasterizer::BeginRendering() {
.stencil = regs.stencil_clear}},
};
texture_cache.TouchMeta(htile_address, false);
state.num_depth_attachments++;
state.has_depth = true;
state.has_stencil = image.info.usage.stencil;
}
scheduler.BeginRendering(state);
}
@ -230,16 +231,42 @@ void Rasterizer::UpdateDepthStencilState() {
cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable);
}
void Rasterizer::ScopeMarkerBegin(const std::string& str) {
void Rasterizer::ScopeMarkerBegin(const std::string_view& str) {
if (!Config::isMarkersEnabled()) {
return;
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
.pLabelName = str.c_str(),
.pLabelName = str.data(),
});
}
void Rasterizer::ScopeMarkerEnd() {
if (!Config::isMarkersEnabled()) {
return;
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.endDebugUtilsLabelEXT();
}
void Rasterizer::ScopedMarkerInsert(const std::string_view& str) {
if (!Config::isMarkersEnabled()) {
return;
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.insertDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
.pLabelName = str.data(),
});
}
void Rasterizer::Breadcrumb(u64 id) {
if (!instance.HasNvCheckpoints()) {
return;
}
scheduler.CommandBuffer().setCheckpointNV(id);
}
} // namespace Vulkan

View file

@ -35,8 +35,10 @@ public:
void DispatchDirect();
void ScopeMarkerBegin(const std::string& str);
void ScopeMarkerBegin(const std::string_view& str);
void ScopeMarkerEnd();
void ScopedMarkerInsert(const std::string_view& str);
void Breadcrumb(u64 id);
void InvalidateMemory(VAddr addr, u64 size);
void MapMemory(VAddr addr, u64 size);

View file

@ -38,8 +38,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
.layerCount = 1,
.colorAttachmentCount = render_state.num_color_attachments,
.pColorAttachments = render_state.color_attachments.data(),
.pDepthAttachment =
render_state.num_depth_attachments ? &render_state.depth_attachment : nullptr,
.pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr,
};
current_cmdbuf.beginRendering(rendering_info);
@ -50,6 +49,8 @@ void Scheduler::EndRendering() {
return;
}
is_rendering = false;
current_cmdbuf.endRendering();
boost::container::static_vector<vk::ImageMemoryBarrier, 9> barriers;
for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
barriers.push_back(vk::ImageMemoryBarrier{
@ -70,10 +71,35 @@ void Scheduler::EndRendering() {
},
});
}
current_cmdbuf.endRendering();
if (render_state.has_depth) {
barriers.push_back(vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.oldLayout = render_state.depth_attachment.imageLayout,
.newLayout = render_state.depth_attachment.imageLayout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = render_state.depth_image,
.subresourceRange =
{
.aspectMask = vk::ImageAspectFlagBits::eDepth |
(render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil
: vk::ImageAspectFlagBits::eNone),
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
});
}
if (!barriers.empty()) {
current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eFragmentShader,
const auto src_stages =
vk::PipelineStageFlagBits::eColorAttachmentOutput |
(render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests |
vk::PipelineStageFlagBits::eEarlyFragmentTests
: vk::PipelineStageFlagBits::eNone);
current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader,
vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
}
}
@ -158,6 +184,13 @@ void Scheduler::SubmitExecution(SubmitInfo& info) {
try {
instance.GetGraphicsQueue().submit(submit_info, info.fence);
} catch (vk::DeviceLostError& err) {
if (instance.HasNvCheckpoints()) {
const auto checkpoint_data = instance.GetGraphicsQueue().getCheckpointData2NV();
for (const auto& cp : checkpoint_data) {
LOG_CRITICAL(Render_Vulkan, "{}: {:#x}", vk::to_string(cp.stage),
reinterpret_cast<u64>(cp.pCheckpointMarker));
}
}
UNREACHABLE_MSG("Device lost during submit: {}", err.what());
}

View file

@ -20,7 +20,8 @@ struct RenderState {
vk::RenderingAttachmentInfo depth_attachment{};
vk::Image depth_image{};
u32 num_color_attachments{};
u32 num_depth_attachments{};
bool has_depth{};
bool has_stencil{};
u32 width = std::numeric_limits<u32>::max();
u32 height = std::numeric_limits<u32>::max();

View file

@ -37,6 +37,16 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
instance.GetPresentQueueFamilyIndex(),
};
const auto modes = instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface);
const auto find_mode = [&modes](vk::PresentModeKHR requested) {
const auto it =
std::find_if(modes.begin(), modes.end(),
[&requested](vk::PresentModeKHR mode) { return mode == requested; });
return it != modes.end();
};
const bool has_mailbox = find_mode(vk::PresentModeKHR::eMailbox);
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
const u32 queue_family_indices_count = exclusive ? 1u : 2u;
const vk::SharingMode sharing_mode =
@ -55,7 +65,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
.pQueueFamilyIndices = queue_family_indices.data(),
.preTransform = transform,
.compositeAlpha = composite_alpha,
.presentMode = vk::PresentModeKHR::eMailbox,
.presentMode = has_mailbox ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eImmediate,
.clipped = true,
.oldSwapchain = nullptr,
};

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/config.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -116,6 +117,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address},
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
mip_hashes.resize(info.resources.levels);
ASSERT(info.pixel_format != vk::Format::eUndefined);
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
// the texture cache should re-create the resource with the usage requested
@ -154,6 +156,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
};
image.Create(image_ci);
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {:#x}:{:#x}",
info.guest_address, info.guest_size_bytes);
}
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,

View file

@ -111,6 +111,7 @@ struct Image {
vk::Flags<vk::PipelineStageFlagBits> pl_stage = vk::PipelineStageFlagBits::eAllCommands;
vk::Flags<vk::AccessFlagBits> access_mask = vk::AccessFlagBits::eNone;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
boost::container::small_vector<u64, 14> mip_hashes;
};
} // namespace VideoCore

View file

@ -189,6 +189,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
usage.depth_target = true;
usage.stencil =
buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
guest_address = buffer.Address();
const auto depth_slice_sz = buffer.GetDepthSliceSize();
@ -260,7 +262,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
case AmdGpu::TilingMode::Display_MacroTiled:
case AmdGpu::TilingMode::Texture_MacroTiled:
case AmdGpu::TilingMode::Depth_MacroTiled: {
// ASSERT(!props.is_cube && !props.is_block);
ASSERT(!props.is_block);
ASSERT(num_samples == 1);
std::tie(mip_info.pitch, mip_info.size) =
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index);

View file

@ -92,6 +92,8 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
bool is_vo_surface) noexcept {
const auto base_format =
Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat());
range.base.layer = col_buffer.view.slice_start;
range.extent.layers = col_buffer.NumSlices();
format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(
base_format, col_buffer.info.comp_swap.Value(), is_vo_surface);
}

View file

@ -3,6 +3,7 @@
#include <xxhash.h>
#include "common/assert.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -11,13 +12,11 @@
namespace VideoCore {
static constexpr u64 StreamBufferSize = 512_MB;
static constexpr u64 PageShift = 12;
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
BufferCache& buffer_cache_, PageManager& tracker_)
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
staging{instance, scheduler, MemoryUsage::Upload, StreamBufferSize},
tile_manager{instance, scheduler} {
ImageInfo info;
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
@ -31,9 +30,12 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
TextureCache::~TextureCache() = default;
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
void TextureCache::InvalidateMemory(VAddr address, size_t size, bool from_compute) {
std::unique_lock lock{mutex};
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
if (from_compute && !image.Overlaps(address, size)) {
return;
}
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
@ -57,7 +59,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
}
}
ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
ImageId TextureCache::FindImage(const ImageInfo& info) {
if (info.guest_address == 0) [[unlikely]] {
return NULL_IMAGE_VIEW_ID;
}
@ -87,12 +89,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
}
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
RefreshImage(image);
TrackImage(image, image_id);
}
return image_id;
}
@ -119,6 +115,7 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
const ImageId image_id = FindImage(info);
UpdateImage(image_id);
Image& image = slot_images[image_id];
auto& usage = image.info.usage;
@ -165,7 +162,8 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
const ImageViewInfo& view_info) {
const ImageId image_id = FindImage(image_info);
Image& image = slot_images[image_id];
image.flags &= ~ImageFlagBits::CpuModified;
image.flags |= ImageFlagBits::GpuModified;
UpdateImage(image_id);
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
vk::AccessFlagBits::eColorAttachmentWrite |
@ -198,8 +196,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
const ImageViewInfo& view_info) {
const ImageId image_id = FindImage(image_info, false);
const ImageId image_id = FindImage(image_info);
Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified;
image.flags &= ~ImageFlagBits::CpuModified;
const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal
@ -228,22 +227,6 @@ void TextureCache::RefreshImage(Image& image) {
// Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified;
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
vk::Buffer buffer{staging.Handle()};
u32 offset{0};
auto upload_buffer = tile_manager.TryDetile(image);
if (upload_buffer) {
buffer = *upload_buffer;
} else {
// Upload data to the staging buffer.
offset = staging.Copy(image.info.guest_address, image.info.guest_size_bytes, 16);
}
const auto& num_layers = image.info.resources.layers;
const auto& num_mips = image.info.resources.levels;
ASSERT(num_mips == image.info.mips_layout.size());
@ -254,12 +237,23 @@ void TextureCache::RefreshImage(Image& image) {
const u32 height = std::max(image.info.size.height >> m, 1u);
const u32 depth =
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
const auto& [_, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
// Protect GPU modified resources from accidental reuploads.
if (True(image.flags & ImageFlagBits::GpuModified) &&
!buffer_cache.IsRegionGpuModified(image.info.guest_address + mip_ofs, mip_size)) {
const u8* addr = std::bit_cast<u8*>(image.info.guest_address);
const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size);
if (image.mip_hashes[m] == hash) {
continue;
}
image.mip_hashes[m] = hash;
}
image_copy.push_back({
.bufferOffset = offset + mip_ofs * num_layers,
.bufferRowLength = static_cast<uint32_t>(mip_pitch),
.bufferImageHeight = static_cast<uint32_t>(mip_height),
.bufferOffset = mip_ofs * num_layers,
.bufferRowLength = static_cast<u32>(mip_pitch),
.bufferImageHeight = static_cast<u32>(mip_height),
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = m,
@ -271,6 +265,30 @@ void TextureCache::RefreshImage(Image& image) {
});
}
if (image_copy.empty()) {
return;
}
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf);
const VAddr image_addr = image.info.guest_address;
const size_t image_size = image.info.guest_size_bytes;
vk::Buffer buffer{};
u32 offset{};
if (auto upload_buffer = tile_manager.TryDetile(image); upload_buffer) {
buffer = *upload_buffer;
} else {
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size);
buffer = vk_buffer->Handle();
offset = buf_offset;
}
for (auto& copy : image_copy) {
copy.bufferOffset += offset;
}
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
}

View file

@ -38,13 +38,13 @@ public:
~TextureCache();
/// Invalidates any image in the logical page range.
void InvalidateMemory(VAddr address, size_t size);
void InvalidateMemory(VAddr address, size_t size, bool from_compute = false);
/// Evicts any images that overlap the unmapped range.
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Retrieves the image handle of the image with the provided attributes.
[[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true);
[[nodiscard]] ImageId FindImage(const ImageInfo& info);
/// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,
@ -58,6 +58,16 @@ public:
[[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info,
const ImageViewInfo& view_info);
/// Updates image contents if it was modified by CPU.
void UpdateImage(ImageId image_id) {
Image& image = slot_images[image_id];
if (False(image.flags & ImageFlagBits::CpuModified)) {
return;
}
RefreshImage(image);
TrackImage(image, image_id);
}
/// Reuploads image contents.
void RefreshImage(Image& image);
@ -170,7 +180,6 @@ private:
Vulkan::Scheduler& scheduler;
BufferCache& buffer_cache;
PageManager& tracker;
StreamBuffer staging;
TileManager tile_manager;
Common::SlotVector<Image> slot_images;
Common::SlotVector<ImageView> slot_image_views;

View file

@ -5,7 +5,6 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/tile_manager.h"
#include "video_core/host_shaders/detile_m32x1_comp.h"
@ -194,6 +193,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR32G32Sfloat:
case vk::Format::eR32G32Uint:
case vk::Format::eR16G16B16A16Unorm:
case vk::Format::eR16G16B16A16Sfloat:
return vk::Format::eR32G32Uint;
case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc2UnormBlock:
@ -397,7 +397,7 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
const u32 image_size = image.info.guest_size_bytes;
const auto [in_buffer, in_offset] = [&] -> std::pair<vk::Buffer, u32> {
// Use stream buffer for smaller textures.
if (image_size <= StreamBufferSize) {
if (image_size <= stream_buffer.GetFreeSize()) {
u32 offset = stream_buffer.Copy(image.info.guest_address, image_size);
return {stream_buffer.Handle(), offset};
}