Merge branch 'shadps4-emu:main' into allocate-fixes

This commit is contained in:
Stephen Miller 2025-01-06 02:54:40 -06:00 committed by GitHub
commit ae1beca725
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 1536 additions and 940 deletions

View file

@ -89,7 +89,7 @@ body:
- type: textarea
id: logs
attributes:
label: "Logs"
description: Attach any logs here. Log can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`.
label: "Log File"
description: Drag and drop the log file here. It can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`.
validations:
required: false
required: true

View file

@ -209,6 +209,7 @@ set(AUDIO_LIB src/core/libraries/audio/audioin.cpp
set(GNM_LIB src/core/libraries/gnmdriver/gnmdriver.cpp
src/core/libraries/gnmdriver/gnmdriver.h
src/core/libraries/gnmdriver/gnmdriver_init.h
src/core/libraries/gnmdriver/gnm_error.h
)

View file

@ -100,7 +100,7 @@ void setTrophyKey(std::string key) {
trophyKey = key;
}
bool isNeoMode() {
bool isNeoModeConsole() {
return isNeo;
}

View file

@ -18,7 +18,7 @@ void saveMainWindow(const std::filesystem::path& path);
std::string getTrophyKey();
void setTrophyKey(std::string key);
bool isNeoMode();
bool isNeoModeConsole();
bool isFullscreenMode();
bool getPlayBGM();
int getBGMvolume();

View file

@ -7,6 +7,7 @@
#include <string_view>
#include "assert.h"
#include "bit_field.h"
#include "singleton.h"
#include "types.h"
@ -16,6 +17,46 @@ class Emulator;
namespace Common {
union PSFAttributes {
/// Supports initial user's logout
BitField<0, 1, u32> support_initial_user_logout;
/// Enter button for the common dialog is cross.
BitField<1, 1, u32> enter_button_cross;
/// Warning dialog for PS Move is displayed in the options menu.
BitField<2, 1, u32> ps_move_warning;
/// Supports stereoscopic 3D.
BitField<3, 1, u32> support_stereoscopic_3d;
/// Suspends when PS button is pressed.
BitField<4, 1, u32> ps_button_suspend;
/// Enter button for the common dialog is assigned by the system software.
BitField<5, 1, u32> enter_button_system;
/// Overrides share menu behavior.
BitField<6, 1, u32> override_share_menu;
/// Suspends when PS button is pressed and special output resolution is set.
BitField<8, 1, u32> special_res_ps_button_suspend;
/// Enable HDCP.
BitField<9, 1, u32> enable_hdcp;
/// Disable HDCP for non-game.
BitField<10, 1, u32> disable_hdcp_non_game;
/// Supports PS VR.
BitField<14, 1, u32> support_ps_vr;
/// CPU mode (6 CPU)
BitField<15, 1, u32> six_cpu_mode;
/// CPU mode (7 CPU)
BitField<16, 1, u32> seven_cpu_mode;
/// Supports PS4 Pro (Neo) mode.
BitField<23, 1, u32> support_neo_mode;
/// Requires PS VR.
BitField<26, 1, u32> require_ps_vr;
/// Supports HDR.
BitField<29, 1, u32> support_hdr;
/// Display location.
BitField<31, 1, u32> display_location;
u32 raw{};
};
static_assert(sizeof(PSFAttributes) == 4);
class ElfInfo {
friend class Core::Emulator;
@ -26,6 +67,7 @@ class ElfInfo {
std::string app_ver{};
u32 firmware_ver = 0;
u32 raw_firmware_ver = 0;
PSFAttributes psf_attributes{};
public:
static constexpr u32 FW_15 = 0x1500000;
@ -68,6 +110,11 @@ public:
ASSERT(initialized);
return raw_firmware_ver;
}
[[nodiscard]] const PSFAttributes& PSFAttributes() const {
ASSERT(initialized);
return psf_attributes;
}
};
} // namespace Common

View file

@ -15,13 +15,6 @@ class SDLPortBackend : public PortBackend {
public:
explicit SDLPortBackend(const PortOut& port)
: frame_size(port.format_info.FrameSize()), guest_buffer_size(port.BufferSize()) {
// We want the latency for delivering frames out to be as small as possible,
// so set the sample frames hint to the number of frames per buffer.
const auto samples_num_str = std::to_string(port.buffer_frames);
if (!SDL_SetHint(SDL_HINT_AUDIO_DEVICE_SAMPLE_FRAMES, samples_num_str.c_str())) {
LOG_WARNING(Lib_AudioOut, "Failed to set SDL audio sample frames hint to {}: {}",
samples_num_str, SDL_GetError());
}
const SDL_AudioSpec fmt = {
.format = port.format_info.is_float ? SDL_AUDIO_F32LE : SDL_AUDIO_S16LE,
.channels = port.format_info.num_channels,

View file

@ -12,6 +12,7 @@
#include "core/address_space.h"
#include "core/debug_state.h"
#include "core/libraries/gnmdriver/gnm_error.h"
#include "core/libraries/gnmdriver/gnmdriver_init.h"
#include "core/libraries/kernel/orbis_error.h"
#include "core/libraries/kernel/process.h"
#include "core/libraries/libs.h"
@ -54,244 +55,11 @@ enum ShaderStages : u32 {
static constexpr std::array indirect_sgpr_offsets{0u, 0u, 0x4cu, 0u, 0xccu, 0u, 0x14cu};
static constexpr auto HwInitPacketSize = 0x100u;
// clang-format off
static constexpr std::array InitSequence{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1ffu,
0xc0017600u, 0x46u, 0x1ffu,
0xc0017600u, 0x87u, 0x1ffu,
0xc0017600u, 0xc7u, 0x1ffu,
0xc0017600u, 0x107u, 0u,
0xc0017600u, 0x147u, 0x1ffu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6000000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence.size() == 0x73 + 2);
static constexpr std::array InitSequence175{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1ffu,
0xc0017600u, 0x46u, 0x1ffu,
0xc0017600u, 0x87u, 0x1ffu,
0xc0017600u, 0xc7u, 0x1ffu,
0xc0017600u, 0x107u, 0u,
0xc0017600u, 0x147u, 0x1ffu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence175.size() == 0x73 + 2);
static constexpr std::array InitSequence200{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence200.size() == 0x76 + 2);
static constexpr std::array InitSequence350{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0016900u, 0x2aau, 0xffu,
};
static_assert(InitSequence350.size() == 0x7c + 2);
static constexpr std::array CtxInitSequence{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0002f00u, 1u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0111000u, 0u
};
static_assert(CtxInitSequence.size() == 0x0f);
static constexpr std::array CtxInitSequence400{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0016900u, 0x2aau, 0xffu,
0xc09e1000u,
};
static_assert(CtxInitSequence400.size() == 0x61);
// clang-format on
// Gates use of what appear to be the neo-mode init sequences but with the older
// IA_MULTI_VGT_PARAM register address. No idea what this is for as the ioctl
// that controls it is still a mystery, but leaving the sequences in gated behind
// this flag in case we need it in the future.
static constexpr bool UseNeoCompatSequences = false;
// In case if `submitDone` is issued we need to block submissions until GPU idle
static u32 submission_lock{};
@ -317,6 +85,14 @@ static void WaitGpuIdle() {
cv_lock.wait(lock, [] { return submission_lock == 0; });
}
// Write a special ending NOP packet with N DWs data block
static inline u32* WriteTrailingNop(u32* cmdbuf, u32 data_block_size) {
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized
return cmdbuf + data_block_size + 1 /* header */;
}
// Write a special ending NOP packet with N DWs data block
template <u32 data_block_size>
static inline u32* WriteTrailingNop(u32* cmdbuf) {
@ -607,9 +383,16 @@ s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset,
return -1;
}
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK;
s32 PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(u32* cmdbuf, u32 size, VAddr args, u32 modifier) {
if (cmdbuf != nullptr && size == 8 && args != 0 && ((args & 3u) == 0)) {
cmdbuf[0] = 0xc0021602 | (modifier & 1u);
*(VAddr*)(&cmdbuf[1]) = args;
cmdbuf[3] = (modifier & 0x18) | 1u;
cmdbuf[4] = 0xc0021000;
cmdbuf[5] = 0;
return ORBIS_OK;
}
return ORBIS_FAIL;
}
u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
@ -619,17 +402,30 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
return 0;
}
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x216u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x217u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x216u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x217u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
if (sceKernelIsNeoMode()) {
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x219u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE2
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x21au,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE3
}
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
cmdbuf = WriteHeader<PM4ItOpcode::AcquireMem>(cmdbuf, 6);
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u, 0u);
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u, 0xau);
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef);
cmdbuf = WriteBody(cmdbuf, 0xau, 0u);
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, sceKernelIsNeoMode() ? 0xe9 : 0xef);
cmdbuf = WriteBody(cmdbuf, 0u);
return HwInitPacketSize;
}
@ -646,7 +442,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr
draw_index->index_base_lo = u32(index_addr);
draw_index->index_base_hi = u32(index_addr >> 32);
draw_index->index_count = index_count;
draw_index->draw_initiator = 0;
draw_index->draw_initiator = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0;
WriteTrailingNop<3>(cmdbuf + 6);
return ORBIS_OK;
@ -659,8 +455,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32
if (cmdbuf && (size == 7) &&
(flags & 0x1ffffffe) == 0) { // no predication will be set in the packet
cmdbuf = WritePacket<PM4ItOpcode::DrawIndexAuto>(cmdbuf, PM4ShaderType::ShaderGraphics,
index_count, 2u);
cmdbuf = WritePacket<PM4ItOpcode::DrawIndexAuto>(
cmdbuf, PM4ShaderType::ShaderGraphics, index_count,
sceKernelIsNeoMode() ? flags & 0xe0000000u | 2u : 2u);
WriteTrailingNop<3>(cmdbuf);
return ORBIS_OK;
}
@ -684,7 +481,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirect(u32* cmdbuf, u32 size, u32 data_offset,
cmdbuf[0] = data_offset;
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[3] = 0;
cmdbuf[3] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0u;
cmdbuf += 4;
WriteTrailingNop<3>(cmdbuf);
@ -699,8 +496,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
u32 flags) {
LOG_TRACE(Lib_GnmDriver, "called");
if (cmdbuf && (size == 16) && (shader_stage < ShaderStages::Max) &&
(vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u)) {
if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && !cmdbuf && (size == 16) &&
(shader_stage < ShaderStages::Max) && (vertex_sgpr_offset < 0x10u) &&
(instance_sgpr_offset < 0x10u)) {
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 2);
cmdbuf = WriteBody(cmdbuf, 0u);
@ -719,7 +517,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
cmdbuf[4] = max_count;
*(u64*)(&cmdbuf[5]) = count_addr;
cmdbuf[7] = sizeof(DrawIndexedIndirectArgs);
cmdbuf[8] = 0;
cmdbuf[8] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0;
cmdbuf += 9;
WriteTrailingNop<2>(cmdbuf);
@ -748,7 +546,8 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset,
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
cmdbuf = WriteHeader<PM4ItOpcode::DrawIndexOffset2>(
cmdbuf, 4, PM4ShaderType::ShaderGraphics, predicate);
cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count, 0u);
cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count,
sceKernelIsNeoMode() ? flags & 0xe0000000u : 0u);
WriteTrailingNop<3>(cmdbuf);
return ORBIS_OK;
@ -772,7 +571,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32
cmdbuf[0] = data_offset;
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[3] = 2; // auto index
cmdbuf[3] = sceKernelIsNeoMode() ? flags & 0xe0000000u | 2u : 2u; // auto index
cmdbuf += 4;
WriteTrailingNop<3>(cmdbuf);
@ -801,6 +600,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
}
const auto& SetupContext = [](u32* cmdbuf, u32 size, bool clear_state) {
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
if (clear_state) {
cmdbuf = ClearContextState(cmdbuf);
}
@ -808,10 +608,8 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
std::memcpy(cmdbuf, &InitSequence[2], (InitSequence.size() - 2) * 4);
cmdbuf += InitSequence.size() - 2;
const auto cmdbuf_left =
HwInitPacketSize - (InitSequence.size() - 2) - (clear_state ? 0xc : 0) - 1;
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
cmdbuf = WriteBody(cmdbuf, 0u);
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
WriteTrailingNop(cmdbuf, cmdbuf_left);
return HwInitPacketSize;
};
@ -826,12 +624,13 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) {
return 0;
}
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
cmdbuf = ClearContextState(cmdbuf);
std::memcpy(cmdbuf, &InitSequence175[2], (InitSequence175.size() - 2) * 4);
cmdbuf += InitSequence175.size() - 2;
constexpr auto cmdbuf_left = HwInitPacketSize - (InitSequence175.size() - 2) - 0xc - 1;
WriteTrailingNop<cmdbuf_left>(cmdbuf);
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
WriteTrailingNop(cmdbuf, cmdbuf_left);
return HwInitPacketSize;
}
@ -844,17 +643,27 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) {
}
const auto& SetupContext200 = [](u32* cmdbuf, u32 size, bool clear_state) {
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
if (clear_state) {
cmdbuf = ClearContextState(cmdbuf);
}
std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4);
cmdbuf += InitSequence200.size() - 2;
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
std::memcpy(cmdbuf, &InitSequence200Neo[2], (InitSequence200Neo.size() - 2) * 4);
cmdbuf += InitSequence200Neo.size() - 2;
} else {
std::memcpy(cmdbuf, &InitSequence200NeoCompat[2],
(InitSequence200NeoCompat.size() - 2) * 4);
cmdbuf += InitSequence200NeoCompat.size() - 2;
}
} else {
std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4);
cmdbuf += InitSequence200.size() - 2;
}
const auto cmdbuf_left =
HwInitPacketSize - (InitSequence200.size() - 2) - (clear_state ? 0xc : 0) - 1;
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
cmdbuf = WriteBody(cmdbuf, 0u);
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
WriteTrailingNop(cmdbuf, cmdbuf_left);
return HwInitPacketSize;
};
@ -870,17 +679,27 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
}
const auto& SetupContext350 = [](u32* cmdbuf, u32 size, bool clear_state) {
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
if (clear_state) {
cmdbuf = ClearContextState(cmdbuf);
}
std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4);
cmdbuf += InitSequence350.size() - 2;
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
std::memcpy(cmdbuf, &InitSequence350Neo[2], (InitSequence350Neo.size() - 2) * 4);
cmdbuf += InitSequence350Neo.size() - 2;
} else {
std::memcpy(cmdbuf, &InitSequence350NeoCompat[2],
(InitSequence350NeoCompat.size() - 2) * 4);
cmdbuf += InitSequence350NeoCompat.size() - 2;
}
} else {
std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4);
cmdbuf += InitSequence350.size() - 2;
}
const auto cmdbuf_left =
HwInitPacketSize - (InitSequence350.size() - 2) - (clear_state ? 0xc : 0) - 1;
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
cmdbuf = WriteBody(cmdbuf, 0u);
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
WriteTrailingNop(cmdbuf, cmdbuf_left);
return HwInitPacketSize;
};
@ -896,7 +715,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size) {
return 0;
}
std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4);
if (sceKernelIsNeoMode()) {
std::memcpy(cmdbuf, CtxInitSequenceNeo.data(), CtxInitSequenceNeo.size() * 4);
} else {
std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4);
}
return CtxInitPacketSize;
}
@ -908,7 +731,16 @@ u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size) {
return 0;
}
std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4);
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
std::memcpy(cmdbuf, CtxInitSequence400Neo.data(), CtxInitSequence400Neo.size() * 4);
} else {
std::memcpy(cmdbuf, CtxInitSequence400NeoCompat.data(),
CtxInitSequence400NeoCompat.size() * 4);
}
} else {
std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4);
}
return CtxInitPacketSize;
}
@ -1030,7 +862,8 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() {
u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
LOG_TRACE(Lib_GnmDriver, "called");
return Config::isNeoMode() ? 911'000'000 : 800'000'000;
// On console this uses an ioctl check, but we assume it is equal to just checking for neo mode.
return sceKernelIsNeoMode() ? 911'000'000 : 800'000'000;
}
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() {
@ -1369,7 +1202,15 @@ s32 PS4_SYSV_ABI sceGnmResetVgtControl(u32* cmdbuf, u32 size) {
if (cmdbuf == nullptr || size != 3) {
return -1;
}
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, 0xffu); // IA_MULTI_VGT_PARAM
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
PM4CmdSetData::SetUconfigReg(cmdbuf, 0x40000258u, 0x6d007fu); // IA_MULTI_VGT_PARAM
} else {
PM4CmdSetData::SetContextReg(cmdbuf, 0x100002aau, 0xd00ffu); // IA_MULTI_VGT_PARAM
}
} else {
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, 0xffu); // IA_MULTI_VGT_PARAM
}
return ORBIS_OK;
}
@ -1830,9 +1671,25 @@ s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_mi
return -1;
}
const u32 reg_value =
((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu);
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM
if (sceKernelIsNeoMode()) {
const u32 wd_switch_on_eop = u32(wd_switch_only_on_eop_mode != 0) << 0x14;
const u32 switch_on_eoi = u32(wd_switch_only_on_eop_mode == 0) << 0x13;
const u32 reg_value =
wd_switch_only_on_eop_mode != 0
? (partial_vs_wave_mode & 1) << 0x10 | prim_group_sz_minus_one | wd_switch_on_eop |
switch_on_eoi | 0x40000u
: prim_group_sz_minus_one & 0x1cffffu | wd_switch_on_eop | switch_on_eoi | 0x50000u;
if (!UseNeoCompatSequences) {
PM4CmdSetData::SetUconfigReg(cmdbuf, 0x40000258u,
reg_value | 0x600000u); // IA_MULTI_VGT_PARAM
} else {
PM4CmdSetData::SetContextReg(cmdbuf, 0x100002aau, reg_value); // IA_MULTI_VGT_PARAM
}
} else {
const u32 reg_value =
((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu);
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM
}
return ORBIS_OK;
}
@ -2215,9 +2072,25 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
if (sdk_version <= 0x1ffffffu) {
liverpool->SubmitGfx(InitSequence, {});
} else if (sdk_version <= 0x3ffffffu) {
liverpool->SubmitGfx(InitSequence200, {});
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
liverpool->SubmitGfx(InitSequence200Neo, {});
} else {
liverpool->SubmitGfx(InitSequence200NeoCompat, {});
}
} else {
liverpool->SubmitGfx(InitSequence200, {});
}
} else {
liverpool->SubmitGfx(InitSequence350, {});
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
liverpool->SubmitGfx(InitSequence350Neo, {});
} else {
liverpool->SubmitGfx(InitSequence350NeoCompat, {});
}
} else {
liverpool->SubmitGfx(InitSequence350, {});
}
}
send_init_packet = false;
}

View file

@ -39,7 +39,7 @@ int PS4_SYSV_ABI sceGnmDisableMipStatsReport();
s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y,
u32 threads_z, u32 flags);
s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 flags);
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec();
s32 PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(u32* cmdbuf, u32 size, VAddr args, u32 modifier);
u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size);
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr,
u32 flags, u32 type);

View file

@ -0,0 +1,542 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
namespace Libraries::GnmDriver {
constexpr auto HwInitPacketSize = 0x100u;
// clang-format off
constexpr std::array InitSequence{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1ffu,
0xc0017600u, 0x46u, 0x1ffu,
0xc0017600u, 0x87u, 0x1ffu,
0xc0017600u, 0xc7u, 0x1ffu,
0xc0017600u, 0x107u, 0u,
0xc0017600u, 0x147u, 0x1ffu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6000000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence.size() == 0x73 + 2);
constexpr std::array InitSequence175{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1ffu,
0xc0017600u, 0x46u, 0x1ffu,
0xc0017600u, 0x87u, 0x1ffu,
0xc0017600u, 0xc7u, 0x1ffu,
0xc0017600u, 0x107u, 0u,
0xc0017600u, 0x147u, 0x1ffu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence175.size() == 0x73 + 2);
constexpr std::array InitSequence200{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence200.size() == 0x76 + 2);
constexpr std::array InitSequence200Neo{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x219u, 0xffffffffu,
0xc0017600u, 0x21au, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0017900u, 0x40000258u, 0x6d007fu,
};
static_assert(InitSequence200Neo.size() == 0x83 + 2);
constexpr std::array InitSequence200NeoCompat{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x219u, 0xffffffffu,
0xc0017600u, 0x21au, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0016900u, 0x100002aau, 0xd00ffu,
};
static_assert(InitSequence200NeoCompat.size() == 0x83 + 2);
constexpr std::array InitSequence350{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0016900u, 0x2aau, 0xffu,
};
static_assert(InitSequence350.size() == 0x7c + 2);
constexpr std::array InitSequence350Neo{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x219u, 0xffffffffu,
0xc0017600u, 0x21au, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0017900u, 0x40000258u, 0x6d007fu,
};
static_assert(InitSequence350Neo.size() == 0x86 + 2);
constexpr std::array InitSequence350NeoCompat{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x219u, 0xffffffffu,
0xc0017600u, 0x21au, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0016900u, 0x100002aau, 0xd00ffu,
};
static_assert(InitSequence350NeoCompat.size() == 0x86 + 2);
constexpr std::array CtxInitSequence{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0002f00u, 1u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0111000u, 0u
};
static_assert(CtxInitSequence.size() == 0x0f);
constexpr std::array CtxInitSequenceNeo{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0002f00u, 1u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc00d1000, 0u
};
static_assert(CtxInitSequenceNeo.size() == 0x13);
constexpr std::array CtxInitSequence400{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0016900u, 0x2aau, 0xffu,
0xc09e1000u,
};
static_assert(CtxInitSequence400.size() == 0x61);
constexpr std::array CtxInitSequence400Neo{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x40000258u, 0x6d007fu,
0xc09a1000u,
};
static_assert(CtxInitSequence400Neo.size() == 0x65);
constexpr std::array CtxInitSequence400NeoCompat{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0016900u, 0x100002aau, 0xd00ffu,
0xc09a1000u,
};
static_assert(CtxInitSequence400Neo.size() == 0x65);
// clang-format on
} // namespace Libraries::GnmDriver

View file

@ -14,7 +14,8 @@ namespace Libraries::Kernel {
int PS4_SYSV_ABI sceKernelIsNeoMode() {
LOG_DEBUG(Kernel_Sce, "called");
return Config::isNeoMode();
return Config::isNeoModeConsole() &&
Common::ElfInfo::Instance().PSFAttributes().support_neo_mode;
}
int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) {

View file

@ -157,7 +157,7 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh
}
for (int i = 0; i < numberOfEntries; i++) {
if (chunkIds[i] <= playgo->chunks.size()) {
if (chunkIds[i] < playgo->chunks.size()) {
outLoci[i] = OrbisPlayGoLocus::LocalFast;
} else {
outLoci[i] = OrbisPlayGoLocus::NotDownloaded;

View file

@ -7,6 +7,7 @@
#include "common/debug.h"
#include "core/libraries/kernel/memory.h"
#include "core/libraries/kernel/orbis_error.h"
#include "core/libraries/kernel/process.h"
#include "core/memory.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -35,7 +36,7 @@ MemoryManager::~MemoryManager() = default;
void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1,
bool use_extended_mem2) {
const bool is_neo = Config::isNeoMode();
const bool is_neo = ::Libraries::Kernel::sceKernelIsNeoMode();
auto total_size = is_neo ? SCE_KERNEL_TOTAL_MEM_PRO : SCE_KERNEL_TOTAL_MEM;
if (!use_extended_mem1 && is_neo) {
total_size -= 256_MB;

View file

@ -28,8 +28,6 @@
#include "core/file_format/trp.h"
#include "core/file_sys/fs.h"
#include "core/libraries/disc_map/disc_map.h"
#include "core/libraries/fiber/fiber.h"
#include "core/libraries/jpeg/jpegenc.h"
#include "core/libraries/libc_internal/libc_internal.h"
#include "core/libraries/libs.h"
#include "core/libraries/ngs2/ngs2.h"
@ -59,8 +57,8 @@ Emulator::Emulator() {
LOG_INFO(Loader, "Branch {}", Common::g_scm_branch);
LOG_INFO(Loader, "Description {}", Common::g_scm_desc);
LOG_INFO(Config, "General Logtype: {}", Config::getLogType());
LOG_INFO(Config, "General isNeo: {}", Config::isNeoMode());
LOG_INFO(Config, "General LogType: {}", Config::getLogType());
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
@ -101,19 +99,12 @@ Emulator::~Emulator() {
}
void Emulator::Run(const std::filesystem::path& file) {
// Use the eboot from the separated updates folder if it's there
std::filesystem::path game_patch_folder = file.parent_path();
game_patch_folder += "-UPDATE";
std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename())
? game_patch_folder / file.filename()
: file;
// Applications expect to be run from /app0 so mount the file's parent path as app0.
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
mnt->Mount(file.parent_path(), "/app0");
const auto game_folder = file.parent_path();
mnt->Mount(game_folder, "/app0");
// Certain games may use /hostapp as well such as CUSA001100
mnt->Mount(file.parent_path(), "/hostapp");
mnt->Mount(game_folder, "/hostapp");
auto& game_info = Common::ElfInfo::Instance();
@ -122,50 +113,52 @@ void Emulator::Run(const std::filesystem::path& file) {
std::string title;
std::string app_version;
u32 fw_version;
Common::PSFAttributes psf_attributes{};
std::filesystem::path sce_sys_folder = eboot_path.parent_path() / "sce_sys";
if (std::filesystem::is_directory(sce_sys_folder)) {
for (const auto& entry : std::filesystem::directory_iterator(sce_sys_folder)) {
if (entry.path().filename() == "param.sfo") {
auto* param_sfo = Common::Singleton<PSF>::Instance();
const bool success = param_sfo->Open(sce_sys_folder / "param.sfo");
ASSERT_MSG(success, "Failed to open param.sfo");
const auto content_id = param_sfo->GetString("CONTENT_ID");
ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID");
id = std::string(*content_id, 7, 9);
Libraries::NpTrophy::game_serial = id;
const auto trophyDir =
Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles";
if (!std::filesystem::exists(trophyDir)) {
TRP trp;
if (!trp.Extract(eboot_path.parent_path(), id)) {
LOG_ERROR(Loader, "Couldn't extract trophies");
}
}
const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo");
if (std::filesystem::exists(param_sfo_path)) {
auto* param_sfo = Common::Singleton<PSF>::Instance();
const bool success = param_sfo->Open(param_sfo_path);
ASSERT_MSG(success, "Failed to open param.sfo");
const auto content_id = param_sfo->GetString("CONTENT_ID");
ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID");
id = std::string(*content_id, 7, 9);
Libraries::NpTrophy::game_serial = id;
const auto trophyDir =
Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles";
if (!std::filesystem::exists(trophyDir)) {
TRP trp;
if (!trp.Extract(game_folder, id)) {
LOG_ERROR(Loader, "Couldn't extract trophies");
}
}
#ifdef ENABLE_QT_GUI
MemoryPatcher::g_game_serial = id;
MemoryPatcher::g_game_serial = id;
// Timer for 'Play Time'
QTimer* timer = new QTimer();
QObject::connect(timer, &QTimer::timeout, [this, id]() {
UpdatePlayTime(id);
start_time = std::chrono::steady_clock::now();
});
timer->start(60000); // 60000 ms = 1 minute
// Timer for 'Play Time'
QTimer* timer = new QTimer();
QObject::connect(timer, &QTimer::timeout, [this, id]() {
UpdatePlayTime(id);
start_time = std::chrono::steady_clock::now();
});
timer->start(60000); // 60000 ms = 1 minute
#endif
title = param_sfo->GetString("TITLE").value_or("Unknown title");
LOG_INFO(Loader, "Game id: {} Title: {}", id, title);
fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000);
app_version = param_sfo->GetString("APP_VER").value_or("Unknown version");
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
} else if (entry.path().filename() == "pic1.png") {
auto* splash = Common::Singleton<Splash>::Instance();
if (splash->IsLoaded()) {
continue;
}
if (!splash->Open(entry.path())) {
LOG_ERROR(Loader, "Game splash: unable to open file");
}
title = param_sfo->GetString("TITLE").value_or("Unknown title");
LOG_INFO(Loader, "Game id: {} Title: {}", id, title);
fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000);
app_version = param_sfo->GetString("APP_VER").value_or("Unknown version");
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) {
psf_attributes.raw = *raw_attributes;
}
}
const auto pic1_path = mnt->GetHostPath("/app0/sce_sys/pic1.png");
if (std::filesystem::exists(pic1_path)) {
auto* splash = Common::Singleton<Splash>::Instance();
if (!splash->IsLoaded()) {
if (!splash->Open(pic1_path)) {
LOG_ERROR(Loader, "Game splash: unable to open file");
}
}
}
@ -176,6 +169,7 @@ void Emulator::Run(const std::filesystem::path& file) {
game_info.app_ver = app_version;
game_info.firmware_ver = fw_version & 0xFFF00000;
game_info.raw_firmware_ver = fw_version;
game_info.psf_attributes = psf_attributes;
std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version);
std::string window_title = "";
@ -219,6 +213,7 @@ void Emulator::Run(const std::filesystem::path& file) {
Libraries::InitHLELibs(&linker->GetHLESymbols());
// Load the module with the linker
const auto eboot_path = mnt->GetHostPath("/app0/" + file.filename().string());
linker->LoadModule(eboot_path);
// check if we have system modules to load
@ -236,6 +231,8 @@ void Emulator::Run(const std::filesystem::path& file) {
}
// Load all prx from separate update's sce_module folder
std::filesystem::path game_patch_folder = game_folder;
game_patch_folder += "-UPDATE";
std::filesystem::path update_module_folder = game_patch_folder / "sce_module";
if (std::filesystem::is_directory(update_module_folder)) {
for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) {

View file

@ -266,6 +266,7 @@ void GameController::TryOpenSDLController() {
}
u32 GameController::Poll() {
std::scoped_lock lock{m_mutex};
if (m_connected) {
auto time = Libraries::Kernel::sceKernelGetProcessTime();
if (m_states_num == 0) {

File diff suppressed because it is too large Load diff

View file

@ -106,6 +106,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
return S_FF1_I32_B32(inst);
case Opcode::S_FF1_I32_B64:
return S_FF1_I32_B64(inst);
case Opcode::S_BITSET0_B32:
return S_BITSET_B32(inst, 0);
case Opcode::S_BITSET1_B32:
return S_BITSET_B32(inst, 1);
case Opcode::S_AND_SAVEEXEC_B64:
return S_SAVEEXEC_B64(NegateMode::None, false, inst);
case Opcode::S_ORN2_SAVEEXEC_B64:
@ -607,6 +611,13 @@ void Translator::S_FF1_I32_B64(const GcnInst& inst) {
SetDst(inst.dst[0], result);
}
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
const IR::U32 old_value{GetSrc(inst.dst[0])};
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};
const IR::U32 result{ir.BitFieldInsert(old_value, ir.Imm32(bit_value), offset, ir.Imm32(1U))};
SetDst(inst.dst[0], result);
}
void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) {
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination

View file

@ -114,6 +114,7 @@ public:
void S_BCNT1_I32_B64(const GcnInst& inst);
void S_FF1_I32_B32(const GcnInst& inst);
void S_FF1_I32_B64(const GcnInst& inst);
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
void S_ABS_I32(const GcnInst& inst);

View file

@ -454,7 +454,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::DrawIndirect: {
const auto* draw_indirect = reinterpret_cast<const PM4CmdDrawIndirect*>(header);
const auto offset = draw_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(DrawIndirectArgs);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
@ -462,7 +461,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndirect", cmd_address));
rasterizer->DrawIndirect(false, ib_address, offset, size, 1, 0);
rasterizer->DrawIndirect(false, indirect_args_addr, offset, size, 1, 0);
rasterizer->ScopeMarkerEnd();
}
break;
@ -471,7 +470,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto* draw_index_indirect =
reinterpret_cast<const PM4CmdDrawIndexIndirect*>(header);
const auto offset = draw_index_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(DrawIndexedIndirectArgs);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
@ -480,7 +478,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DrawIndexIndirect", cmd_address));
rasterizer->DrawIndirect(true, ib_address, offset, size, 1, 0);
rasterizer->DrawIndirect(true, indirect_args_addr, offset, size, 1, 0);
rasterizer->ScopeMarkerEnd();
}
break;
@ -489,7 +487,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto* draw_index_indirect =
reinterpret_cast<const PM4CmdDrawIndexIndirectMulti*>(header);
const auto offset = draw_index_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
@ -497,9 +494,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DrawIndexIndirectCountMulti", cmd_address));
rasterizer->DrawIndirect(true, ib_address, offset, draw_index_indirect->stride,
draw_index_indirect->count,
draw_index_indirect->countAddr);
rasterizer->DrawIndirect(
true, indirect_args_addr, offset, draw_index_indirect->stride,
draw_index_indirect->count, draw_index_indirect->countAddr);
rasterizer->ScopeMarkerEnd();
}
break;
@ -528,7 +525,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
auto& cs_program = GetCsRegs();
const auto offset = dispatch_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
@ -538,7 +534,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
rasterizer->DispatchIndirect(ib_address, offset, size);
rasterizer->DispatchIndirect(indirect_args_addr, offset, size);
rasterizer->ScopeMarkerEnd();
}
break;
@ -562,7 +558,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::SetBase: {
const auto* set_base = reinterpret_cast<const PM4CmdSetBase*>(header);
ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable);
mapped_queues[GfxQueueId].indirect_args_addr = set_base->Address<u64>();
indirect_args_addr = set_base->Address<u64>();
break;
}
case PM4ItOpcode::EventWrite: {
@ -823,10 +819,10 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
break;
}
case PM4ItOpcode::DispatchIndirect: {
const auto* dispatch_indirect = reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
const auto* dispatch_indirect =
reinterpret_cast<const PM4CmdDispatchIndirectMec*>(header);
auto& cs_program = GetCsRegs();
const auto offset = dispatch_indirect->data_offset;
const auto ib_address = mapped_queues[vqid].indirect_args_addr;
const auto ib_address = dispatch_indirect->Address<VAddr>();
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
@ -835,7 +831,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
rasterizer->DispatchIndirect(ib_address, offset, size);
rasterizer->DispatchIndirect(ib_address, 0, size);
rasterizer->ScopeMarkerEnd();
}
break;

View file

@ -814,7 +814,9 @@ struct Liverpool {
BitField<26, 1, u32> fmask_compression_disable_ci;
BitField<27, 1, u32> fmask_compress_1frag_only;
BitField<28, 1, u32> dcc_enable;
BitField<29, 1, u32> cmask_addr_type;
BitField<29, 2, u32> cmask_addr_type;
/// Neo-mode only
BitField<31, 1, u32> alt_tile_mode;
u32 u32all;
} info;
@ -1477,11 +1479,12 @@ private:
std::vector<u32> ccb_buffer;
std::queue<Task::Handle> submits{};
ComputeProgram cs_state{};
VAddr indirect_args_addr{};
};
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
u32 num_mapped_queues{1u}; // GFX is always available
VAddr indirect_args_addr{};
struct ConstantEngine {
void Reset() {
ce_count = 0;

View file

@ -204,6 +204,11 @@ struct PM4CmdSetData {
static constexpr u32* SetShReg(u32* cmdbuf, Args... data) {
return WritePacket<PM4ItOpcode::SetShReg>(cmdbuf, type, data...);
}
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
static constexpr u32* SetUconfigReg(u32* cmdbuf, Args... data) {
return WritePacket<PM4ItOpcode::SetUconfigReg>(cmdbuf, type, data...);
}
};
struct PM4CmdNop {
@ -791,6 +796,18 @@ struct PM4CmdDispatchIndirect {
u32 dispatch_initiator; ///< Dispatch Initiator Register
};
struct PM4CmdDispatchIndirectMec {
PM4Type3Header header;
u32 address0;
u32 address1;
u32 dispatch_initiator; ///< Dispatch Initiator Register
template <typename T>
T Address() const {
return std::bit_cast<T>(address0 | (u64(address1 & 0xffff) << 32u));
}
};
struct DrawIndirectArgs {
u32 vertex_count_per_instance;
u32 instance_count;

View file

@ -263,7 +263,15 @@ struct Image {
u64 min_lod_warn : 12;
u64 counter_bank_id : 8;
u64 lod_hw_cnt_en : 1;
u64 : 43;
/// Neo-mode only
u64 compression_en : 1;
/// Neo-mode only
u64 alpha_is_on_msb : 1;
/// Neo-mode only
u64 color_transform : 1;
/// Neo-mode only
u64 alt_tile_mode : 1;
u64 : 39;
static constexpr Image Null() {
Image image{};

View file

@ -660,7 +660,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
TextureCache::BaseDesc desc{};
desc.info.guest_address = device_addr;
desc.info.guest_size_bytes = size;
desc.info.guest_size = size;
const ImageId image_id = texture_cache.FindImage(desc, find_flags);
if (!image_id) {
return false;

View file

@ -2,14 +2,14 @@
# SPDX-License-Identifier: GPL-2.0-or-later
set(SHADER_FILES
detile_m8x1.comp
detile_m8x2.comp
detile_m32x1.comp
detile_m32x2.comp
detile_m32x4.comp
detile_macro8x1.comp
detile_macro32x1.comp
detile_macro32x2.comp
detilers/macro_32bpp.comp
detilers/macro_64bpp.comp
detilers/macro_8bpp.comp
detilers/micro_128bpp.comp
detilers/micro_16bpp.comp
detilers/micro_32bpp.comp
detilers/micro_64bpp.comp
detilers/micro_8bpp.comp
fs_tri.vert
post_process.frag
)

View file

@ -92,13 +92,15 @@ std::string GetReadableVersion(u32 version) {
Instance::Instance(bool enable_validation, bool enable_crash_diagnostic)
: instance{CreateInstance(Frontend::WindowSystemType::Headless, enable_validation,
enable_crash_diagnostic)},
physical_devices{EnumeratePhysicalDevices(instance)} {}
physical_devices{EnumeratePhysicalDevices(instance)},
crash_diagnostic{enable_crash_diagnostic} {}
Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
bool enable_validation /*= false*/, bool enable_crash_diagnostic /*= false*/)
: instance{CreateInstance(window.GetWindowInfo().type, enable_validation,
enable_crash_diagnostic)},
physical_devices{EnumeratePhysicalDevices(instance)} {
physical_devices{EnumeratePhysicalDevices(instance)},
crash_diagnostic{enable_crash_diagnostic} {
if (enable_validation) {
debug_callback = CreateDebugCallback(*instance);
}

View file

@ -81,7 +81,7 @@ public:
/// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics;
return crash_diagnostic || has_renderdoc || has_nsight_graphics;
}
/// Returns true if anisotropic filtering is supported
@ -338,6 +338,7 @@ private:
u32 subgroup_size{};
bool tooling_info{};
bool debug_utils_supported{};
bool crash_diagnostic{};
bool has_nsight_graphics{};
bool has_renderdoc{};
};

View file

@ -427,7 +427,7 @@ bool Presenter::ShowSplash(Frame* frame /*= nullptr*/) {
VideoCore::Extent3D{splash->GetImageInfo().width, splash->GetImageInfo().height, 1};
info.pitch = splash->GetImageInfo().width;
info.guest_address = VAddr(splash->GetImageData().data());
info.guest_size_bytes = splash->GetImageData().size();
info.guest_size = splash->GetImageData().size();
info.mips_layout.emplace_back(splash->GetImageData().size(),
splash->GetImageInfo().width,
splash->GetImageInfo().height, 0);

View file

@ -108,7 +108,7 @@ private:
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>, 8>
cb_descs;
std::optional<std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc>> db_desc;
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 64> image_infos;
boost::container::static_vector<vk::BufferView, 8> buffer_views;
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
boost::container::static_vector<VideoCore::ImageId, 64> bound_images;
@ -121,7 +121,7 @@ private:
using TexBufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>;
boost::container::static_vector<TexBufferBindingInfo, 32> texbuffer_bindings;
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>;
boost::container::static_vector<ImageBindingInfo, 32> image_bindings;
boost::container::static_vector<ImageBindingInfo, 64> image_bindings;
};
} // namespace Vulkan

View file

@ -210,7 +210,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {}x{}x{} {:#x}:{:#x}",
info.size.width, info.size.height, info.size.depth, info.guest_address,
info.guest_size_bytes);
info.guest_size);
}
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(

View file

@ -80,7 +80,7 @@ struct Image {
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
const auto image_addr = info.guest_address;
const auto image_end = info.guest_address + info.guest_size_bytes;
const auto image_end = info.guest_address + info.guest_size;
return image_addr < overlap_end && overlap_cpu_addr < image_end;
}

View file

@ -3,8 +3,10 @@
#include "common/assert.h"
#include "common/config.h"
#include "core/libraries/kernel/process.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/tile.h"
namespace VideoCore {
@ -45,195 +47,6 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
}
}
// clang-format off
// The table of macro tiles parameters for given tiling index (row) and bpp (column)
static constexpr std::array macro_tile_extents_x1{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0A
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0B
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0E
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0F
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 10
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 11
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
};
static constexpr std::array macro_tile_extents_x2{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
};
static constexpr std::array macro_tile_extents_x4{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
};
static constexpr std::array macro_tile_extents_x8{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
};
static constexpr std::array macro_tile_extents{
macro_tile_extents_x1,
macro_tile_extents_x2,
macro_tile_extents_x4,
macro_tile_extents_x8,
};
// clang-format on
static constexpr std::pair micro_tile_extent{8u, 8u};
static constexpr auto hw_pipe_interleave = 256u;
static constexpr std::pair<u32, u32> GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples) {
ASSERT(num_samples <= 8);
const auto row = tiling_idx * 5;
const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64, 128
return (macro_tile_extents[std::log2(num_samples)])[row + column];
}
static constexpr std::pair<u32, size_t> ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp,
u32 num_samples) {
const auto pitch_align = std::max(8u, 64u / ((bpp + 7) / 8));
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
const auto height_aligned = height;
size_t log_sz = pitch_aligned * height_aligned * num_samples;
const auto slice_align = std::max(64u, 256u / ((bpp + 7) / 8));
while (log_sz % slice_align) {
pitch_aligned += pitch_align;
log_sz = pitch_aligned * height_aligned * num_samples;
}
return {pitch_aligned, (log_sz * bpp + 7) / 8};
}
static constexpr std::pair<u32, size_t> ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp,
u32 num_samples) {
const auto& [pitch_align, height_align] = micro_tile_extent;
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
size_t log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
while (log_sz % 256) {
pitch_aligned += 8;
log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
}
return {pitch_aligned, log_sz};
}
static constexpr std::pair<u32, size_t> ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp,
u32 num_samples, u32 tiling_idx,
u32 mip_n) {
const auto& [pitch_align, height_align] = GetMacroTileExtents(tiling_idx, bpp, num_samples);
ASSERT(pitch_align != 0 && height_align != 0);
bool downgrade_to_micro = false;
if (mip_n > 0) {
const bool is_less_than_tile = pitch < pitch_align || height < height_align;
// TODO: threshold check
downgrade_to_micro = is_less_than_tile;
}
if (downgrade_to_micro) {
return ImageSizeMicroTiled(pitch, height, bpp, num_samples);
}
const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
const auto log_sz = pitch_aligned * height_aligned * num_samples;
return {pitch_aligned, (log_sz * bpp + 7) / 8};
}
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
VAddr cpu_address) noexcept {
const auto& attrib = group.attrib;
@ -250,15 +63,15 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
guest_address = cpu_address;
if (!props.is_tiled) {
guest_size_bytes = pitch * size.height * 4;
guest_size = pitch * size.height * 4;
} else {
if (Config::isNeoMode()) {
guest_size_bytes = pitch * ((size.height + 127) & (~127)) * 4;
if (Libraries::Kernel::sceKernelIsNeoMode()) {
guest_size = pitch * ((size.height + 127) & (~127)) * 4;
} else {
guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4;
guest_size = pitch * ((size.height + 63) & (~63)) * 4;
}
}
mips_layout.emplace_back(guest_size_bytes, pitch, 0);
mips_layout.emplace_back(guest_size, pitch, 0);
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
@ -279,9 +92,10 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
guest_address = buffer.Address();
const auto color_slice_sz = buffer.GetColorSliceSize();
guest_size_bytes = color_slice_sz * buffer.NumSlices();
guest_size = color_slice_sz * buffer.NumSlices();
mips_layout.emplace_back(color_slice_sz, pitch, 0);
tiling_idx = static_cast<u32>(buffer.attrib.tile_mode_index.Value());
alt_tile = Libraries::Kernel::sceKernelIsNeoMode() && buffer.info.alt_tile_mode;
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
@ -303,7 +117,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
guest_address = buffer.Address();
const auto depth_slice_sz = buffer.GetDepthSliceSize();
guest_size_bytes = depth_slice_sz * num_slices;
guest_size = depth_slice_sz * num_slices;
mips_layout.emplace_back(depth_slice_sz, pitch, 0);
}
@ -333,13 +147,14 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
mips_layout.reserve(resources.levels);
tiling_idx = image.tiling_index;
alt_tile = Libraries::Kernel::sceKernelIsNeoMode() && image.alt_tile_mode;
UpdateSize();
}
void ImageInfo::UpdateSize() {
mips_layout.clear();
MipInfo mip_info{};
guest_size_bytes = 0;
guest_size = 0;
for (auto mip = 0u; mip < resources.levels; ++mip) {
auto bpp = num_bits;
auto mip_w = pitch >> mip;
@ -384,7 +199,7 @@ void ImageInfo::UpdateSize() {
case AmdGpu::TilingMode::Depth_MacroTiled: {
ASSERT(!props.is_block);
std::tie(mip_info.pitch, mip_info.size) =
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, tiling_idx, mip);
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, tiling_idx, mip, alt_tile);
break;
}
default: {
@ -392,11 +207,11 @@ void ImageInfo::UpdateSize() {
}
}
mip_info.size *= mip_d;
mip_info.offset = guest_size_bytes;
mip_info.offset = guest_size;
mips_layout.emplace_back(mip_info);
guest_size_bytes += mip_info.size;
guest_size += mip_info.size;
}
guest_size_bytes *= resources.layers;
guest_size *= resources.layers;
}
int ImageInfo::IsMipOf(const ImageInfo& info) const {
@ -468,18 +283,18 @@ int ImageInfo::IsSliceOf(const ImageInfo& info) const {
}
// Check for size alignment.
const bool slice_size = info.guest_size_bytes / info.resources.layers;
if (guest_size_bytes % slice_size != 0) {
const bool slice_size = info.guest_size / info.resources.layers;
if (guest_size % slice_size != 0) {
return -1;
}
// Ensure that address is aligned too.
const auto addr_diff = guest_address - info.guest_address;
if ((addr_diff % guest_size_bytes) != 0) {
if ((addr_diff % guest_size) != 0) {
return -1;
}
return addr_diff / guest_size_bytes;
return addr_diff / guest_size;
}
} // namespace VideoCore

View file

@ -84,8 +84,9 @@ struct ImageInfo {
};
boost::container::small_vector<MipInfo, 14> mips_layout;
VAddr guest_address{0};
u32 guest_size_bytes{0};
u32 guest_size{0};
u32 tiling_idx{0}; // TODO: merge with existing!
bool alt_tile{false};
VAddr stencil_addr{0};
u32 stencil_size{0};

View file

@ -3,7 +3,9 @@
#include <optional>
#include <xxhash.h>
#include "common/assert.h"
#include "common/debug.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_instance.h"
@ -34,7 +36,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image");
img.flags = ImageFlagBits::Empty;
img.track_addr = img.info.guest_address;
img.track_addr_end = img.info.guest_address + img.info.guest_size_bytes;
img.track_addr_end = img.info.guest_address + img.info.guest_size;
ImageViewInfo view_info;
const auto null_view_id =
@ -50,7 +52,7 @@ void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) {
if (image.hash == 0) {
// Initialize hash
const u8* addr = std::bit_cast<u8*>(image.info.guest_address);
image.hash = XXH3_64bits(addr, image.info.guest_size_bytes);
image.hash = XXH3_64bits(addr, image.info.guest_size);
}
image.flags |= ImageFlagBits::MaybeCpuDirty;
UntrackImage(image_id);
@ -63,7 +65,7 @@ void TextureCache::InvalidateMemory(VAddr addr, size_t size) {
const auto pages_end = PageManager::GetNextPageAddr(addr + size - 1);
ForEachImageInRegion(pages_start, pages_end - pages_start, [&](ImageId image_id, Image& image) {
const auto image_begin = image.info.guest_address;
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
const auto image_end = image.info.guest_address + image.info.guest_size;
if (image_begin < end && addr < image_end) {
// Start or end of the modified region is in the image, or the image is entirely within
// the modified region, so the image was definitely accessed by this page fault.
@ -201,7 +203,7 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
}
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
image_info.guest_size_bytes <= tex_cache_image.info.guest_size_bytes) {
image_info.guest_size <= tex_cache_image.info.guest_size) {
auto result_id = merged_image_id ? merged_image_id : cache_image_id;
const auto& result_image = slot_images[result_id];
return {
@ -302,7 +304,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
std::scoped_lock lock{mutex};
boost::container::small_vector<ImageId, 8> image_ids;
ForEachImageInRegion(info.guest_address, info.guest_size_bytes,
ForEachImageInRegion(info.guest_address, info.guest_size,
[&](ImageId image_id, Image& image) { image_ids.push_back(image_id); });
ImageId image_id{};
@ -313,8 +315,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
if (cache_image.info.guest_address != info.guest_address) {
continue;
}
if (False(flags & FindFlags::RelaxSize) &&
cache_image.info.guest_size_bytes != info.guest_size_bytes) {
if (False(flags & FindFlags::RelaxSize) && cache_image.info.guest_size != info.guest_size) {
continue;
}
if (False(flags & FindFlags::RelaxDim) && cache_image.info.size != info.size) {
@ -455,7 +456,7 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
if (!stencil_id) {
ImageInfo info{};
info.guest_address = desc.info.stencil_addr;
info.guest_size_bytes = desc.info.stencil_size;
info.guest_size = desc.info.stencil_size;
info.size = desc.info.size;
stencil_id = slot_images.insert(instance, scheduler, info);
RegisterImage(stencil_id);
@ -468,6 +469,9 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
}
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
RENDERER_TRACE;
TRACE_HINT(fmt::format("{:x}:{:x}", image.info.guest_address, image.info.guest_size));
if (False(image.flags & ImageFlagBits::Dirty)) {
return;
}
@ -543,7 +547,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
const auto cmdbuf = sched_ptr->CommandBuffer();
const VAddr image_addr = image.info.guest_address;
const size_t image_size = image.info.guest_size_bytes;
const size_t image_size = image.info.guest_size;
const auto [vk_buffer, buf_offset] =
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
@ -612,7 +616,7 @@ void TextureCache::RegisterImage(ImageId image_id) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
"Trying to register an already registered image");
image.flags |= ImageFlagBits::Registered;
ForEachPage(image.info.guest_address, image.info.guest_size_bytes,
ForEachPage(image.info.guest_address, image.info.guest_size,
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
}
@ -621,7 +625,7 @@ void TextureCache::UnregisterImage(ImageId image_id) {
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
"Trying to unregister an already unregistered image");
image.flags &= ~ImageFlagBits::Registered;
ForEachPage(image.info.guest_address, image.info.guest_size_bytes, [this, image_id](u64 page) {
ForEachPage(image.info.guest_address, image.info.guest_size, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == nullptr) {
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift);
@ -640,7 +644,7 @@ void TextureCache::UnregisterImage(ImageId image_id) {
void TextureCache::TrackImage(ImageId image_id) {
auto& image = slot_images[image_id];
const auto image_begin = image.info.guest_address;
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
const auto image_end = image.info.guest_address + image.info.guest_size;
if (image_begin == image.track_addr && image_end == image.track_addr_end) {
return;
}
@ -649,7 +653,7 @@ void TextureCache::TrackImage(ImageId image_id) {
// Re-track the whole image
image.track_addr = image_begin;
image.track_addr_end = image_end;
tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size_bytes, 1);
tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size, 1);
} else {
if (image_begin < image.track_addr) {
TrackImageHead(image_id);
@ -674,7 +678,7 @@ void TextureCache::TrackImageHead(ImageId image_id) {
void TextureCache::TrackImageTail(ImageId image_id) {
auto& image = slot_images[image_id];
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
const auto image_end = image.info.guest_address + image.info.guest_size;
if (image_end == image.track_addr_end) {
return;
}
@ -719,7 +723,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) {
void TextureCache::UntrackImageTail(ImageId image_id) {
auto& image = slot_images[image_id];
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
const auto image_end = image.info.guest_address + image.info.guest_size;
if (!image.IsTracked() || image.track_addr_end < image_end) {
return;
}

View file

@ -0,0 +1,347 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/types.h"
namespace VideoCore {
// clang-format off
// The table of macro tiles parameters for given tiling index (row) and bpp (column)
/* Calculation:
* - Inputs:
* TileMode, BytesPerPixel, NumFragments
* - Constants:
* MicroTileWidth = 8, MicroTileHeight = 8,
* Tile Mode LUTs: IsDepth(), IsPrt(), TileThickness(), TileSplit(), SampleSplit(), NumPipes()
* Macro Tile Mode LUTs: BankWidth(), BankHeight(), NumBanks(), MacroTileAspect()
* - Determine the macro tile mode:
* TileBytes = MicroTileWidth * MicroTileHeight * TileThickness(TileMode) * BytesPerPixel
* TileSplit = min(IsDepth(TileMode) ? TileSplit(TileMode) : max(TileBytes * SampleSplit(TileMode), 256), NumFragments * TileBytes, 1024)
* MacroTileModeIndex = log2(TileSplit / 64)
* MacroTileMode = IsPrt(TileMode) ? MacroTileModeIndex + 8 : MacroTileModeIndex
* - Calculate macro tile width and height:
* Width = NumPipes(TileMode) * BankWidth(MacroTileMode) * MicroTileWidth * MacroTileAspect(MacroTileMode, AltTileMode)
* Height = NumBanks(MacroTileMode, AltTileMode) * BankHeight(MacroTileMode, AltTileMode) * MicroTileHeight / MacroTileAspect(MacroTileMode, AltTileMode)
*/
constexpr std::array macro_tile_extents_x1{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0A
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0B
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0E
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0F
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 10
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 11
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
};
constexpr std::array macro_tile_extents_x2{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
};
constexpr std::array macro_tile_extents_x4{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
};
constexpr std::array macro_tile_extents_x8{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
};
constexpr std::array macro_tile_extents_alt_x1{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 01
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 02
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 03
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0A
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 0B
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0E
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0F
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 10
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 11
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 14
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 18
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 19
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 1A
};
constexpr std::array macro_tile_extents_alt_x2{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 01
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 02
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 03
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0B
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 10
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 11
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 14
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 18
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 19
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 1A
};
constexpr std::array macro_tile_extents_alt_x4{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 01
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 02
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 03
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0B
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 10
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 11
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 14
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 18
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 19
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 1A
};
constexpr std::array macro_tile_extents_alt_x8{
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 01
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 02
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 03
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 07
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0B
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0C
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 10
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 11
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 12
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 14
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 15
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 16
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 17
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 18
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 19
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 1A
};
constexpr std::array macro_tile_extents{
macro_tile_extents_x1,
macro_tile_extents_x2,
macro_tile_extents_x4,
macro_tile_extents_x8,
};
constexpr std::array macro_tile_extents_alt{
macro_tile_extents_alt_x1,
macro_tile_extents_alt_x2,
macro_tile_extents_alt_x4,
macro_tile_extents_alt_x8,
};
// clang-format on
constexpr std::pair micro_tile_extent{8u, 8u};
constexpr auto hw_pipe_interleave = 256u;
constexpr std::pair<u32, u32> GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples,
bool alt) {
ASSERT(num_samples <= 8);
const auto samples_log = static_cast<u32>(std::log2(num_samples));
const auto row = tiling_idx * 5;
const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64, 128
return (alt ? macro_tile_extents_alt : macro_tile_extents)[samples_log][row + column];
}
constexpr std::pair<u32, size_t> ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp,
u32 num_samples) {
const auto pitch_align = std::max(8u, 64u / ((bpp + 7) / 8));
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
const auto height_aligned = height;
size_t log_sz = pitch_aligned * height_aligned * num_samples;
const auto slice_align = std::max(64u, 256u / ((bpp + 7) / 8));
while (log_sz % slice_align) {
pitch_aligned += pitch_align;
log_sz = pitch_aligned * height_aligned * num_samples;
}
return {pitch_aligned, (log_sz * bpp + 7) / 8};
}
constexpr std::pair<u32, size_t> ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp,
u32 num_samples) {
const auto& [pitch_align, height_align] = micro_tile_extent;
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
size_t log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
while (log_sz % 256) {
pitch_aligned += 8;
log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
}
return {pitch_aligned, log_sz};
}
constexpr std::pair<u32, size_t> ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp,
u32 num_samples, u32 tiling_idx, u32 mip_n,
bool alt) {
const auto& [pitch_align, height_align] =
GetMacroTileExtents(tiling_idx, bpp, num_samples, alt);
ASSERT(pitch_align != 0 && height_align != 0);
bool downgrade_to_micro = false;
if (mip_n > 0) {
const bool is_less_than_tile = pitch < pitch_align || height < height_align;
// TODO: threshold check
downgrade_to_micro = is_less_than_tile;
}
if (downgrade_to_micro) {
return ImageSizeMicroTiled(pitch, height, bpp, num_samples);
}
const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
const auto log_sz = pitch_aligned * height_aligned * num_samples;
return {pitch_aligned, (log_sz * bpp + 7) / 8};
}
} // namespace VideoCore

View file

@ -8,113 +8,47 @@
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/tile_manager.h"
#include "video_core/host_shaders/detile_m32x1_comp.h"
#include "video_core/host_shaders/detile_m32x2_comp.h"
#include "video_core/host_shaders/detile_m32x4_comp.h"
#include "video_core/host_shaders/detile_m8x1_comp.h"
#include "video_core/host_shaders/detile_m8x2_comp.h"
#include "video_core/host_shaders/detile_macro32x1_comp.h"
#include "video_core/host_shaders/detile_macro32x2_comp.h"
#include "video_core/host_shaders/detile_macro8x1_comp.h"
#include "video_core/host_shaders/detilers/macro_32bpp_comp.h"
#include "video_core/host_shaders/detilers/macro_64bpp_comp.h"
#include "video_core/host_shaders/detilers/macro_8bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_128bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_16bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_32bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_64bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_8bpp_comp.h"
#include <boost/container/static_vector.hpp>
// #include <boost/container/static_vector.hpp>
#include <magic_enum/magic_enum.hpp>
#include <vk_mem_alloc.h>
namespace VideoCore {
static vk::Format DemoteImageFormatForDetiling(vk::Format format) {
switch (format) {
case vk::Format::eR8Uint:
case vk::Format::eR8Unorm:
case vk::Format::eR8Snorm:
return vk::Format::eR8Uint;
case vk::Format::eR4G4B4A4UnormPack16:
case vk::Format::eB5G6R5UnormPack16:
case vk::Format::eR5G5B5A1UnormPack16:
case vk::Format::eR8G8Unorm:
case vk::Format::eR16Sfloat:
case vk::Format::eR16Uint:
case vk::Format::eR16Unorm:
case vk::Format::eD16Unorm:
return vk::Format::eR8G8Uint;
case vk::Format::eR8G8B8A8Srgb:
case vk::Format::eB8G8R8A8Srgb:
case vk::Format::eB8G8R8A8Unorm:
case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eR8G8B8A8Snorm:
case vk::Format::eR8G8B8A8Uint:
case vk::Format::eR32Sfloat:
case vk::Format::eD32Sfloat:
case vk::Format::eR32Uint:
case vk::Format::eR16G16Sfloat:
case vk::Format::eR16G16Unorm:
case vk::Format::eR16G16Snorm:
case vk::Format::eB10G11R11UfloatPack32:
case vk::Format::eA2B10G10R10UnormPack32:
return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaSrgbBlock:
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc4UnormBlock:
case vk::Format::eR32G32Sfloat:
case vk::Format::eR32G32Uint:
case vk::Format::eR16G16B16A16Unorm:
case vk::Format::eR16G16B16A16Uint:
case vk::Format::eR16G16B16A16Sfloat:
return vk::Format::eR32G32Uint;
case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc2UnormBlock:
case vk::Format::eBc3SrgbBlock:
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc5SnormBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock:
case vk::Format::eBc6HUfloatBlock:
case vk::Format::eR32G32B32A32Uint:
case vk::Format::eR32G32B32A32Sfloat:
return vk::Format::eR32G32B32A32Uint;
default:
break;
}
// Log missing formats only once to avoid spamming the log.
static constexpr size_t MaxFormatIndex = 256;
static std::array<bool, MaxFormatIndex> logged_formats{};
if (const u32 index = u32(format); !logged_formats[index]) {
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
logged_formats[index] = true;
}
return format;
}
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
const auto format = DemoteImageFormatForDetiling(info.pixel_format);
const auto bpp = info.num_bits * (info.props.is_block ? 16 : 1);
switch (info.tiling_mode) {
case AmdGpu::TilingMode::Texture_MicroTiled:
switch (format) {
case vk::Format::eR8Uint:
return &detilers[DetilerType::Micro8x1];
case vk::Format::eR8G8Uint:
return &detilers[DetilerType::Micro8x2];
case vk::Format::eR32Uint:
return &detilers[DetilerType::Micro32x1];
case vk::Format::eR32G32Uint:
return &detilers[DetilerType::Micro32x2];
case vk::Format::eR32G32B32A32Uint:
return &detilers[DetilerType::Micro32x4];
switch (bpp) {
case 8:
return &detilers[DetilerType::Micro8];
case 16:
return &detilers[DetilerType::Micro16];
case 32:
return &detilers[DetilerType::Micro32];
case 64:
return &detilers[DetilerType::Micro64];
case 128:
return &detilers[DetilerType::Micro128];
default:
return nullptr;
}
case AmdGpu::TilingMode::Texture_Volume:
switch (format) {
case vk::Format::eR8Uint:
return &detilers[DetilerType::Macro8x1];
case vk::Format::eR32Uint:
return &detilers[DetilerType::Macro32x1];
case vk::Format::eR32G32Uint:
return &detilers[DetilerType::Macro32x2];
switch (bpp) {
case 8:
return &detilers[DetilerType::Macro8];
case 32:
return &detilers[DetilerType::Macro32];
case 64:
return &detilers[DetilerType::Macro64];
default:
return nullptr;
}
@ -134,10 +68,10 @@ struct DetilerParams {
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
: instance{instance}, scheduler{scheduler} {
static const std::array detiler_shaders{
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP,
HostShaders::DETILE_M32X4_COMP, HostShaders::DETILE_MACRO8X1_COMP,
HostShaders::DETILE_MACRO32X1_COMP, HostShaders::DETILE_MACRO32X2_COMP,
HostShaders::MICRO_8BPP_COMP, HostShaders::MICRO_16BPP_COMP,
HostShaders::MICRO_32BPP_COMP, HostShaders::MICRO_64BPP_COMP,
HostShaders::MICRO_128BPP_COMP, HostShaders::MACRO_8BPP_COMP,
HostShaders::MACRO_32BPP_COMP, HostShaders::MACRO_64BPP_COMP,
};
boost::container::static_vector<vk::DescriptorSetLayoutBinding, 2> bindings{
@ -278,7 +212,7 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
return {in_buffer, in_offset};
}
const u32 image_size = info.guest_size_bytes;
const u32 image_size = info.guest_size;
// Prepare output buffer
auto out_buffer = AllocBuffer(image_size, true);

View file

@ -12,15 +12,15 @@ class TextureCache;
struct ImageInfo;
enum DetilerType : u32 {
Micro8x1,
Micro8x2,
Micro32x1,
Micro32x2,
Micro32x4,
Micro8,
Micro16,
Micro32,
Micro64,
Micro128,
Macro8x1,
Macro32x1,
Macro32x2,
Macro8,
Macro32,
Macro64,
Max
};