mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-20 03:24:49 +00:00
Allow our BMI1 emulation to work on non-macOS CPUs - also emulate TZCNT (#2526)
* Allow our BMI1 emulation to work on non-macOS CPUs * Add TZCNT * Some changes * Subtract and add to rsp
This commit is contained in:
parent
e82c8d2f70
commit
7bef4a5c70
1 changed files with 65 additions and 8 deletions
|
@ -202,21 +202,39 @@ static void RestoreStack(Xbyak::CodeGenerator& c) {
|
|||
|
||||
/// Switches to the patch stack, saves registers, and restores the original stack.
|
||||
static void SaveRegisters(Xbyak::CodeGenerator& c, const std::initializer_list<Xbyak::Reg> regs) {
|
||||
// Uses a more robust solution for saving registers on MacOS to avoid potential stack corruption
|
||||
// if games decide to not follow the ABI and use the red zone.
|
||||
#ifdef __APPLE__
|
||||
SaveStack(c);
|
||||
#else
|
||||
c.lea(rsp, ptr[rsp - 128]); // red zone
|
||||
#endif
|
||||
for (const auto& reg : regs) {
|
||||
c.push(reg.cvt64());
|
||||
}
|
||||
#ifdef __APPLE__
|
||||
RestoreStack(c);
|
||||
#else
|
||||
c.lea(rsp, ptr[rsp + 128]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Switches to the patch stack, restores registers, and restores the original stack.
|
||||
static void RestoreRegisters(Xbyak::CodeGenerator& c,
|
||||
const std::initializer_list<Xbyak::Reg> regs) {
|
||||
#ifdef __APPLE__
|
||||
SaveStack(c);
|
||||
#else
|
||||
c.lea(rsp, ptr[rsp - 128]); // red zone
|
||||
#endif
|
||||
for (const auto& reg : regs) {
|
||||
c.pop(reg.cvt64());
|
||||
}
|
||||
#ifdef __APPLE__
|
||||
RestoreStack(c);
|
||||
#else
|
||||
c.lea(rsp, ptr[rsp + 128]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Switches to the patch stack and stores all registers.
|
||||
|
@ -257,12 +275,11 @@ static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst,
|
|||
RestoreStack(c);
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||
const auto src1 = ZydisToXbyakRegisterOperand(operands[1]);
|
||||
const auto src2 = ZydisToXbyakOperand(operands[2]);
|
||||
ASSERT_MSG(dst.getIdx() != rsp.getIdx(), "ANDN overwriting the stack pointer");
|
||||
|
||||
// Check if src2 is a memory operand or a register different to dst.
|
||||
// In those cases, we don't need to use a temporary register and are free to modify dst.
|
||||
|
@ -301,6 +318,7 @@ static void GenerateBEXTR(const ZydisDecodedOperand* operands, Xbyak::CodeGenera
|
|||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||
const auto src = ZydisToXbyakOperand(operands[1]);
|
||||
const auto start_len = ZydisToXbyakRegisterOperand(operands[2]);
|
||||
ASSERT_MSG(dst.getIdx() != rsp.getIdx(), "BEXTR overwriting the stack pointer");
|
||||
|
||||
const Xbyak::Reg32e shift(Xbyak::Operand::RCX, static_cast<int>(start_len.getBit()));
|
||||
const auto scratch1 =
|
||||
|
@ -338,6 +356,7 @@ static void GenerateBEXTR(const ZydisDecodedOperand* operands, Xbyak::CodeGenera
|
|||
static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||
const auto src = ZydisToXbyakOperand(operands[1]);
|
||||
ASSERT_MSG(dst.getIdx() != rsp.getIdx(), "BLSI overwriting the stack pointer");
|
||||
|
||||
const auto scratch = AllocateScratchRegister({&dst, src.get()}, dst.getBit());
|
||||
|
||||
|
@ -367,6 +386,7 @@ static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
|
|||
static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||
const auto src = ZydisToXbyakOperand(operands[1]);
|
||||
ASSERT_MSG(dst.getIdx() != rsp.getIdx(), "BLSMSK overwriting the stack pointer");
|
||||
|
||||
const auto scratch = AllocateScratchRegister({&dst, src.get()}, dst.getBit());
|
||||
|
||||
|
@ -395,9 +415,37 @@ static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGener
|
|||
RestoreRegisters(c, {scratch});
|
||||
}
|
||||
|
||||
static void GenerateTZCNT(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||
const auto src = ZydisToXbyakOperand(operands[1]);
|
||||
ASSERT_MSG(dst.getIdx() != rsp.getIdx(), "TZCNT overwriting the stack pointer");
|
||||
|
||||
Xbyak::Label src_zero, end;
|
||||
|
||||
c.cmp(*src, 0);
|
||||
c.je(src_zero);
|
||||
|
||||
// If src is not zero, functions like a BSF, but also clears the CF
|
||||
c.bsf(dst, *src);
|
||||
c.clc();
|
||||
c.jmp(end);
|
||||
|
||||
c.L(src_zero);
|
||||
c.mov(dst, operands[0].size);
|
||||
// Since dst is not zero, also set ZF to zero. Testing dst with itself when we know
|
||||
// it isn't zero is a good way to do this.
|
||||
// Use cvt32 to avoid REX/Operand size prefixes.
|
||||
c.test(dst.cvt32(), dst.cvt32());
|
||||
// When source is zero, TZCNT also sets CF.
|
||||
c.stc();
|
||||
|
||||
c.L(end);
|
||||
}
|
||||
|
||||
static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||
const auto src = ZydisToXbyakOperand(operands[1]);
|
||||
ASSERT_MSG(dst.getIdx() != rsp.getIdx(), "BLSR overwriting the stack pointer");
|
||||
|
||||
const auto scratch = AllocateScratchRegister({&dst, src.get()}, dst.getBit());
|
||||
|
||||
|
@ -426,6 +474,8 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
|
|||
RestoreRegisters(c, {scratch});
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
static __attribute__((sysv_abi)) void PerformVCVTPH2PS(float* out, const half_float::half* in,
|
||||
const u32 count) {
|
||||
for (u32 i = 0; i < count; i++) {
|
||||
|
@ -616,6 +666,11 @@ static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
|
|||
return !cpu.has(Cpu::tSSE4a);
|
||||
}
|
||||
|
||||
static bool FilterNoBMI1(const ZydisDecodedOperand*) {
|
||||
Cpu cpu;
|
||||
return !cpu.has(Cpu::tBMI1);
|
||||
}
|
||||
|
||||
static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
|
||||
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
|
||||
|
@ -897,14 +952,16 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
|
|||
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
|
||||
{ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}},
|
||||
|
||||
// BMI1
|
||||
{ZYDIS_MNEMONIC_ANDN, {FilterNoBMI1, GenerateANDN, true}},
|
||||
{ZYDIS_MNEMONIC_BEXTR, {FilterNoBMI1, GenerateBEXTR, true}},
|
||||
{ZYDIS_MNEMONIC_BLSI, {FilterNoBMI1, GenerateBLSI, true}},
|
||||
{ZYDIS_MNEMONIC_BLSMSK, {FilterNoBMI1, GenerateBLSMSK, true}},
|
||||
{ZYDIS_MNEMONIC_BLSR, {FilterNoBMI1, GenerateBLSR, true}},
|
||||
{ZYDIS_MNEMONIC_TZCNT, {FilterNoBMI1, GenerateTZCNT, true}},
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Patches for instruction sets not supported by Rosetta 2.
|
||||
// BMI1
|
||||
{ZYDIS_MNEMONIC_ANDN, {FilterRosetta2Only, GenerateANDN, true}},
|
||||
{ZYDIS_MNEMONIC_BEXTR, {FilterRosetta2Only, GenerateBEXTR, true}},
|
||||
{ZYDIS_MNEMONIC_BLSI, {FilterRosetta2Only, GenerateBLSI, true}},
|
||||
{ZYDIS_MNEMONIC_BLSMSK, {FilterRosetta2Only, GenerateBLSMSK, true}},
|
||||
{ZYDIS_MNEMONIC_BLSR, {FilterRosetta2Only, GenerateBLSR, true}},
|
||||
// F16C
|
||||
{ZYDIS_MNEMONIC_VCVTPH2PS, {FilterRosetta2Only, GenerateVCVTPH2PS, true}},
|
||||
{ZYDIS_MNEMONIC_VCVTPS2PH, {FilterRosetta2Only, GenerateVCVTPS2PH, true}},
|
||||
|
|
Loading…
Add table
Reference in a new issue