Merge branch 'master' into LTO

This commit is contained in:
Elad 2025-02-11 19:46:27 +02:00 committed by GitHub
commit 254346b863
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 1674 additions and 3063 deletions

View file

@ -478,6 +478,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/Common/TextureUtils.cpp
RSX/Common/texture_cache.cpp
RSX/Core/RSXContext.cpp
RSX/Core/RSXDisplay.cpp
RSX/Core/RSXDrawCommands.cpp
RSX/gcm_enums.cpp
RSX/gcm_printing.cpp
@ -492,6 +493,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/GL/GLPipelineCompiler.cpp
RSX/GL/GLPresent.cpp
RSX/GL/GLRenderTargets.cpp
RSX/GL/GLResolveHelper.cpp
RSX/GL/GLShaderInterpreter.cpp
RSX/GL/GLTexture.cpp
RSX/GL/GLTextureCache.cpp

File diff suppressed because it is too large Load diff

View file

@ -1419,738 +1419,3 @@ struct ppu_iname
#undef NAME
#undef NAME_
};
// PPU Analyser Context
struct ppu_acontext
{
// General-purpose register range
struct spec_gpr
{
// Integral range: normalized undef = (0;UINT64_MAX), unnormalized undefs are possible (when max = min - 1)
// Bit range: constant 0 = (0;0), constant 1 = (1;1), normalized undef = (0;1), unnormalized undef = (1;0)
u64 imin = 0ull; // Integral range begin
u64 imax = ~0ull; // Integral range end
u64 bmin = 0ull; // Bit range begin
u64 bmax = ~0ull; // Bit range end
void set_undef()
{
imin = 0;
imax = -1;
bmin = 0;
bmax = -1;
}
// (Number of possible values - 1), 0 = const
u64 div() const
{
return imax - imin;
}
// Return zero bits for zeros, ones for ones or undefs
u64 mask() const
{
return bmin | bmax;
}
// Return one bits for ones, zeros for zeros or undefs
u64 ones() const
{
return bmin & bmax;
}
// Return one bits for undefs
u64 undefs() const
{
return bmin ^ bmax;
}
// Return number of trailing zero bits
u64 tz() const
{
return std::countr_zero(mask());
}
// Range NOT
spec_gpr operator ~() const
{
spec_gpr r;
r.imin = ~imax;
r.imax = ~imin;
r.bmin = ~bmax;
r.bmax = ~bmin;
return r;
}
// Range ADD
spec_gpr operator +(const spec_gpr& rhs) const
{
spec_gpr r{};
const u64 adiv = div();
const u64 bdiv = rhs.div();
// Check overflow, generate normalized range
if (adiv != umax && bdiv != umax && adiv <= adiv + bdiv)
{
r = range(imin + rhs.imin, imax + rhs.imax);
}
// Carry for bitrange computation
u64 cmin = 0;
u64 cmax = 0;
const u64 amask = mask();
const u64 bmask = rhs.mask();
const u64 aones = ones();
const u64 bones = rhs.ones();
for (u32 i = 0; i < 64; i++)
{
cmin += ((amask >> i) & 1) + ((bmask >> i) & 1);
cmax += ((aones >> i) & 1) + ((bones >> i) & 1);
// Discover some constant bits
if (cmin == cmax)
{
r.bmin |= (cmin & 1) << i;
r.bmax &= ~((~cmin & 1) << i);
}
cmin >>= 1;
cmax >>= 1;
}
return r;
}
// Range AND
spec_gpr operator &(const spec_gpr& rhs) const
{
// Ignore inverted ranges (TODO)
if (imin > imax || rhs.imin > rhs.imax)
{
return approx(ones() & rhs.ones(), mask() & rhs.mask());
}
// Const (TODO: remove when unnecessary)
if (imin == imax && rhs.imin == rhs.imax)
{
return fixed(imin & rhs.imin);
}
// Swap (TODO: remove when unnecessary)
if (imin == imax || rhs.undefs() > undefs())
{
return rhs & *this;
}
// Copy and attempt to partially preserve integral range
spec_gpr r = *this;
for (u32 i = 63; ~i; i--)
{
const u64 m = 1ull << i;
if (!(rhs.mask() & m))
{
if (r.undefs() & m)
{
// undef -> 0
r.imin &= ~(m - 1);
r.imax |= (m - 1);
r.imin &= ~m;
r.imax &= ~m;
}
else if (r.ones() & m)
{
// 1 -> 0
if ((r.imin ^ r.imax) > (m - 1))
{
r.imin &= ~(m - 1);
r.imax |= (m - 1);
}
r.imin &= ~m;
r.imax &= ~m;
}
}
else if (rhs.undefs() & m)
{
// -> undef
r.imin &= ~(m - 1);
r.imax |= (m - 1);
r.imin &= ~m;
r.imax |= m;
}
}
r.bmin = ones() & rhs.ones();
r.bmax = mask() & rhs.mask();
return r;
}
// Range OR
spec_gpr operator |(const spec_gpr& rhs) const
{
// Ignore inverted ranges (TODO)
if (imin > imax || rhs.imin > rhs.imax)
{
return approx(ones() | rhs.ones(), mask() | rhs.mask());
}
// Const (TODO: remove when unnecessary)
if (imin == imax && rhs.imin == rhs.imax)
{
return fixed(imin | rhs.imin);
}
// Swap (TODO: remove when unnecessary)
if (imin == imax || rhs.undefs() > undefs())
{
return rhs | *this;
}
// Copy and attempt to partially preserve integral range
spec_gpr r = *this;
for (u32 i = 63; ~i; i--)
{
const u64 m = 1ull << i;
if (rhs.ones() & m)
{
if (r.undefs() & m)
{
// undef -> 1
r.imin &= ~(m - 1);
r.imax |= (m - 1);
r.imin |= m;
r.imax |= m;
}
else if (!(r.mask() & m))
{
// 0 -> 1
if ((r.imin ^ r.imax) > (m - 1))
{
r.imin &= ~(m - 1);
r.imax |= (m - 1);
}
r.imin |= m;
r.imax |= m;
}
}
else if (rhs.undefs() & m)
{
// -> undef
r.imin &= ~(m - 1);
r.imax |= (m - 1);
r.imin &= ~m;
r.imax |= m;
}
}
r.bmin = ones() | rhs.ones();
r.bmax = mask() | rhs.mask();
return r;
}
// Range XOR
spec_gpr operator ^(const spec_gpr& rhs) const
{
return (~*this & rhs) | (*this & ~rhs);
}
// Check whether the value is in range
bool test(u64 value) const
{
if (imin <= imax)
{
if (value < imin || value > imax)
{
return false;
}
}
else
{
if (value < imin && value > imax)
{
return false;
}
}
if ((value & mask()) != value)
{
return false;
}
if ((value | ones()) != value)
{
return false;
}
return true;
}
// Constant value
static spec_gpr fixed(u64 value)
{
spec_gpr r;
r.imin = value;
r.imax = value;
r.bmin = value;
r.bmax = value;
return r;
}
// Range (tz = number of constant trailing zeros)
static spec_gpr range(u64 min, u64 max, u64 tz = 0)
{
const u64 mask = tz < 64 ? ~0ull << tz : 0ull;
spec_gpr r;
r.bmin = 0;
r.bmax = mask;
// Normalize min/max for tz (TODO)
if (min < max)
{
// Inverted constant MSB mask
const u64 mix = ~0ull >> std::countl_zero(min ^ max);
r.bmin |= min & ~mix;
r.bmax &= max | mix;
r.imin = (min + ~mask) & mask;
r.imax = max & mask;
ensure(r.imin <= r.imax); // "Impossible range"
}
else
{
r.imin = min & mask;
r.imax = (max + ~mask) & mask;
ensure(r.imin >= r.imax); // "Impossible range"
}
// Fix const values
if (r.imin == r.imax)
{
r.bmin = r.imin;
r.bmax = r.imax;
}
return r;
}
// Make from bitrange (normalize, approximate range values)
static spec_gpr approx(u64 bmin, u64 bmax)
{
spec_gpr r;
r.imin = bmin & ~(bmin ^ bmax);
r.imax = bmax | (bmin ^ bmax);
r.bmin = bmin & ~(bmin ^ bmax);
r.bmax = bmax | (bmin ^ bmax);
return r;
}
} gpr[32]{};
// Vector registers (draft)
struct spec_vec
{
u8 imin8[16]{};
u8 imax8[16]{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};
u16 imin16[8]{};
u16 imax16[8]{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff};
u32 imin32[4]{};
u32 imax32[4]{0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu};
u64 bmin64[2]{};
u64 bmax64[2]{0xffffffffffffffffull, 0xffffffffffffffffull};
};
// Info
u32 cia;
// Analyser step
void UNK(ppu_opcode_t);
void MFVSCR(ppu_opcode_t);
void MTVSCR(ppu_opcode_t);
void VADDCUW(ppu_opcode_t);
void VADDFP(ppu_opcode_t);
void VADDSBS(ppu_opcode_t);
void VADDSHS(ppu_opcode_t);
void VADDSWS(ppu_opcode_t);
void VADDUBM(ppu_opcode_t);
void VADDUBS(ppu_opcode_t);
void VADDUHM(ppu_opcode_t);
void VADDUHS(ppu_opcode_t);
void VADDUWM(ppu_opcode_t);
void VADDUWS(ppu_opcode_t);
void VAND(ppu_opcode_t);
void VANDC(ppu_opcode_t);
void VAVGSB(ppu_opcode_t);
void VAVGSH(ppu_opcode_t);
void VAVGSW(ppu_opcode_t);
void VAVGUB(ppu_opcode_t);
void VAVGUH(ppu_opcode_t);
void VAVGUW(ppu_opcode_t);
void VCFSX(ppu_opcode_t);
void VCFUX(ppu_opcode_t);
void VCMPBFP(ppu_opcode_t);
void VCMPEQFP(ppu_opcode_t);
void VCMPEQUB(ppu_opcode_t);
void VCMPEQUH(ppu_opcode_t);
void VCMPEQUW(ppu_opcode_t);
void VCMPGEFP(ppu_opcode_t);
void VCMPGTFP(ppu_opcode_t);
void VCMPGTSB(ppu_opcode_t);
void VCMPGTSH(ppu_opcode_t);
void VCMPGTSW(ppu_opcode_t);
void VCMPGTUB(ppu_opcode_t);
void VCMPGTUH(ppu_opcode_t);
void VCMPGTUW(ppu_opcode_t);
void VCTSXS(ppu_opcode_t);
void VCTUXS(ppu_opcode_t);
void VEXPTEFP(ppu_opcode_t);
void VLOGEFP(ppu_opcode_t);
void VMADDFP(ppu_opcode_t);
void VMAXFP(ppu_opcode_t);
void VMAXSB(ppu_opcode_t);
void VMAXSH(ppu_opcode_t);
void VMAXSW(ppu_opcode_t);
void VMAXUB(ppu_opcode_t);
void VMAXUH(ppu_opcode_t);
void VMAXUW(ppu_opcode_t);
void VMHADDSHS(ppu_opcode_t);
void VMHRADDSHS(ppu_opcode_t);
void VMINFP(ppu_opcode_t);
void VMINSB(ppu_opcode_t);
void VMINSH(ppu_opcode_t);
void VMINSW(ppu_opcode_t);
void VMINUB(ppu_opcode_t);
void VMINUH(ppu_opcode_t);
void VMINUW(ppu_opcode_t);
void VMLADDUHM(ppu_opcode_t);
void VMRGHB(ppu_opcode_t);
void VMRGHH(ppu_opcode_t);
void VMRGHW(ppu_opcode_t);
void VMRGLB(ppu_opcode_t);
void VMRGLH(ppu_opcode_t);
void VMRGLW(ppu_opcode_t);
void VMSUMMBM(ppu_opcode_t);
void VMSUMSHM(ppu_opcode_t);
void VMSUMSHS(ppu_opcode_t);
void VMSUMUBM(ppu_opcode_t);
void VMSUMUHM(ppu_opcode_t);
void VMSUMUHS(ppu_opcode_t);
void VMULESB(ppu_opcode_t);
void VMULESH(ppu_opcode_t);
void VMULEUB(ppu_opcode_t);
void VMULEUH(ppu_opcode_t);
void VMULOSB(ppu_opcode_t);
void VMULOSH(ppu_opcode_t);
void VMULOUB(ppu_opcode_t);
void VMULOUH(ppu_opcode_t);
void VNMSUBFP(ppu_opcode_t);
void VNOR(ppu_opcode_t);
void VOR(ppu_opcode_t);
void VPERM(ppu_opcode_t);
void VPKPX(ppu_opcode_t);
void VPKSHSS(ppu_opcode_t);
void VPKSHUS(ppu_opcode_t);
void VPKSWSS(ppu_opcode_t);
void VPKSWUS(ppu_opcode_t);
void VPKUHUM(ppu_opcode_t);
void VPKUHUS(ppu_opcode_t);
void VPKUWUM(ppu_opcode_t);
void VPKUWUS(ppu_opcode_t);
void VREFP(ppu_opcode_t);
void VRFIM(ppu_opcode_t);
void VRFIN(ppu_opcode_t);
void VRFIP(ppu_opcode_t);
void VRFIZ(ppu_opcode_t);
void VRLB(ppu_opcode_t);
void VRLH(ppu_opcode_t);
void VRLW(ppu_opcode_t);
void VRSQRTEFP(ppu_opcode_t);
void VSEL(ppu_opcode_t);
void VSL(ppu_opcode_t);
void VSLB(ppu_opcode_t);
void VSLDOI(ppu_opcode_t);
void VSLH(ppu_opcode_t);
void VSLO(ppu_opcode_t);
void VSLW(ppu_opcode_t);
void VSPLTB(ppu_opcode_t);
void VSPLTH(ppu_opcode_t);
void VSPLTISB(ppu_opcode_t);
void VSPLTISH(ppu_opcode_t);
void VSPLTISW(ppu_opcode_t);
void VSPLTW(ppu_opcode_t);
void VSR(ppu_opcode_t);
void VSRAB(ppu_opcode_t);
void VSRAH(ppu_opcode_t);
void VSRAW(ppu_opcode_t);
void VSRB(ppu_opcode_t);
void VSRH(ppu_opcode_t);
void VSRO(ppu_opcode_t);
void VSRW(ppu_opcode_t);
void VSUBCUW(ppu_opcode_t);
void VSUBFP(ppu_opcode_t);
void VSUBSBS(ppu_opcode_t);
void VSUBSHS(ppu_opcode_t);
void VSUBSWS(ppu_opcode_t);
void VSUBUBM(ppu_opcode_t);
void VSUBUBS(ppu_opcode_t);
void VSUBUHM(ppu_opcode_t);
void VSUBUHS(ppu_opcode_t);
void VSUBUWM(ppu_opcode_t);
void VSUBUWS(ppu_opcode_t);
void VSUMSWS(ppu_opcode_t);
void VSUM2SWS(ppu_opcode_t);
void VSUM4SBS(ppu_opcode_t);
void VSUM4SHS(ppu_opcode_t);
void VSUM4UBS(ppu_opcode_t);
void VUPKHPX(ppu_opcode_t);
void VUPKHSB(ppu_opcode_t);
void VUPKHSH(ppu_opcode_t);
void VUPKLPX(ppu_opcode_t);
void VUPKLSB(ppu_opcode_t);
void VUPKLSH(ppu_opcode_t);
void VXOR(ppu_opcode_t);
void TDI(ppu_opcode_t);
void TWI(ppu_opcode_t);
void MULLI(ppu_opcode_t);
void SUBFIC(ppu_opcode_t);
void CMPLI(ppu_opcode_t);
void CMPI(ppu_opcode_t);
void ADDIC(ppu_opcode_t);
void ADDI(ppu_opcode_t);
void ADDIS(ppu_opcode_t);
void BC(ppu_opcode_t);
void SC(ppu_opcode_t);
void B(ppu_opcode_t);
void MCRF(ppu_opcode_t);
void BCLR(ppu_opcode_t);
void CRNOR(ppu_opcode_t);
void CRANDC(ppu_opcode_t);
void ISYNC(ppu_opcode_t);
void CRXOR(ppu_opcode_t);
void CRNAND(ppu_opcode_t);
void CRAND(ppu_opcode_t);
void CREQV(ppu_opcode_t);
void CRORC(ppu_opcode_t);
void CROR(ppu_opcode_t);
void BCCTR(ppu_opcode_t);
void RLWIMI(ppu_opcode_t);
void RLWINM(ppu_opcode_t);
void RLWNM(ppu_opcode_t);
void ORI(ppu_opcode_t);
void ORIS(ppu_opcode_t);
void XORI(ppu_opcode_t);
void XORIS(ppu_opcode_t);
void ANDI(ppu_opcode_t);
void ANDIS(ppu_opcode_t);
void RLDICL(ppu_opcode_t);
void RLDICR(ppu_opcode_t);
void RLDIC(ppu_opcode_t);
void RLDIMI(ppu_opcode_t);
void RLDCL(ppu_opcode_t);
void RLDCR(ppu_opcode_t);
void CMP(ppu_opcode_t);
void TW(ppu_opcode_t);
void LVSL(ppu_opcode_t);
void LVEBX(ppu_opcode_t);
void SUBFC(ppu_opcode_t);
void ADDC(ppu_opcode_t);
void MULHDU(ppu_opcode_t);
void MULHWU(ppu_opcode_t);
void MFOCRF(ppu_opcode_t);
void LWARX(ppu_opcode_t);
void LDX(ppu_opcode_t);
void LWZX(ppu_opcode_t);
void SLW(ppu_opcode_t);
void CNTLZW(ppu_opcode_t);
void SLD(ppu_opcode_t);
void AND(ppu_opcode_t);
void CMPL(ppu_opcode_t);
void LVSR(ppu_opcode_t);
void LVEHX(ppu_opcode_t);
void SUBF(ppu_opcode_t);
void LDUX(ppu_opcode_t);
void DCBST(ppu_opcode_t);
void LWZUX(ppu_opcode_t);
void CNTLZD(ppu_opcode_t);
void ANDC(ppu_opcode_t);
void TD(ppu_opcode_t);
void LVEWX(ppu_opcode_t);
void MULHD(ppu_opcode_t);
void MULHW(ppu_opcode_t);
void LDARX(ppu_opcode_t);
void DCBF(ppu_opcode_t);
void LBZX(ppu_opcode_t);
void LVX(ppu_opcode_t);
void NEG(ppu_opcode_t);
void LBZUX(ppu_opcode_t);
void NOR(ppu_opcode_t);
void STVEBX(ppu_opcode_t);
void SUBFE(ppu_opcode_t);
void ADDE(ppu_opcode_t);
void MTOCRF(ppu_opcode_t);
void STDX(ppu_opcode_t);
void STWCX(ppu_opcode_t);
void STWX(ppu_opcode_t);
void STVEHX(ppu_opcode_t);
void STDUX(ppu_opcode_t);
void STWUX(ppu_opcode_t);
void STVEWX(ppu_opcode_t);
void SUBFZE(ppu_opcode_t);
void ADDZE(ppu_opcode_t);
void STDCX(ppu_opcode_t);
void STBX(ppu_opcode_t);
void STVX(ppu_opcode_t);
void SUBFME(ppu_opcode_t);
void MULLD(ppu_opcode_t);
void ADDME(ppu_opcode_t);
void MULLW(ppu_opcode_t);
void DCBTST(ppu_opcode_t);
void STBUX(ppu_opcode_t);
void ADD(ppu_opcode_t);
void DCBT(ppu_opcode_t);
void LHZX(ppu_opcode_t);
void EQV(ppu_opcode_t);
void ECIWX(ppu_opcode_t);
void LHZUX(ppu_opcode_t);
void XOR(ppu_opcode_t);
void MFSPR(ppu_opcode_t);
void LWAX(ppu_opcode_t);
void DST(ppu_opcode_t);
void LHAX(ppu_opcode_t);
void LVXL(ppu_opcode_t);
void MFTB(ppu_opcode_t);
void LWAUX(ppu_opcode_t);
void DSTST(ppu_opcode_t);
void LHAUX(ppu_opcode_t);
void STHX(ppu_opcode_t);
void ORC(ppu_opcode_t);
void ECOWX(ppu_opcode_t);
void STHUX(ppu_opcode_t);
void OR(ppu_opcode_t);
void DIVDU(ppu_opcode_t);
void DIVWU(ppu_opcode_t);
void MTSPR(ppu_opcode_t);
void DCBI(ppu_opcode_t);
void NAND(ppu_opcode_t);
void STVXL(ppu_opcode_t);
void DIVD(ppu_opcode_t);
void DIVW(ppu_opcode_t);
void LVLX(ppu_opcode_t);
void LDBRX(ppu_opcode_t);
void LSWX(ppu_opcode_t);
void LWBRX(ppu_opcode_t);
void LFSX(ppu_opcode_t);
void SRW(ppu_opcode_t);
void SRD(ppu_opcode_t);
void LVRX(ppu_opcode_t);
void LSWI(ppu_opcode_t);
void LFSUX(ppu_opcode_t);
void SYNC(ppu_opcode_t);
void LFDX(ppu_opcode_t);
void LFDUX(ppu_opcode_t);
void STVLX(ppu_opcode_t);
void STDBRX(ppu_opcode_t);
void STSWX(ppu_opcode_t);
void STWBRX(ppu_opcode_t);
void STFSX(ppu_opcode_t);
void STVRX(ppu_opcode_t);
void STFSUX(ppu_opcode_t);
void STSWI(ppu_opcode_t);
void STFDX(ppu_opcode_t);
void STFDUX(ppu_opcode_t);
void LVLXL(ppu_opcode_t);
void LHBRX(ppu_opcode_t);
void SRAW(ppu_opcode_t);
void SRAD(ppu_opcode_t);
void LVRXL(ppu_opcode_t);
void DSS(ppu_opcode_t);
void SRAWI(ppu_opcode_t);
void SRADI(ppu_opcode_t);
void EIEIO(ppu_opcode_t);
void STVLXL(ppu_opcode_t);
void STHBRX(ppu_opcode_t);
void EXTSH(ppu_opcode_t);
void STVRXL(ppu_opcode_t);
void EXTSB(ppu_opcode_t);
void STFIWX(ppu_opcode_t);
void EXTSW(ppu_opcode_t);
void ICBI(ppu_opcode_t);
void DCBZ(ppu_opcode_t);
void LWZ(ppu_opcode_t);
void LWZU(ppu_opcode_t);
void LBZ(ppu_opcode_t);
void LBZU(ppu_opcode_t);
void STW(ppu_opcode_t);
void STWU(ppu_opcode_t);
void STB(ppu_opcode_t);
void STBU(ppu_opcode_t);
void LHZ(ppu_opcode_t);
void LHZU(ppu_opcode_t);
void LHA(ppu_opcode_t);
void LHAU(ppu_opcode_t);
void STH(ppu_opcode_t);
void STHU(ppu_opcode_t);
void LMW(ppu_opcode_t);
void STMW(ppu_opcode_t);
void LFS(ppu_opcode_t);
void LFSU(ppu_opcode_t);
void LFD(ppu_opcode_t);
void LFDU(ppu_opcode_t);
void STFS(ppu_opcode_t);
void STFSU(ppu_opcode_t);
void STFD(ppu_opcode_t);
void STFDU(ppu_opcode_t);
void LD(ppu_opcode_t);
void LDU(ppu_opcode_t);
void LWA(ppu_opcode_t);
void STD(ppu_opcode_t);
void STDU(ppu_opcode_t);
void FDIVS(ppu_opcode_t);
void FSUBS(ppu_opcode_t);
void FADDS(ppu_opcode_t);
void FSQRTS(ppu_opcode_t);
void FRES(ppu_opcode_t);
void FMULS(ppu_opcode_t);
void FMADDS(ppu_opcode_t);
void FMSUBS(ppu_opcode_t);
void FNMSUBS(ppu_opcode_t);
void FNMADDS(ppu_opcode_t);
void MTFSB1(ppu_opcode_t);
void MCRFS(ppu_opcode_t);
void MTFSB0(ppu_opcode_t);
void MTFSFI(ppu_opcode_t);
void MFFS(ppu_opcode_t);
void MTFSF(ppu_opcode_t);
void FCMPU(ppu_opcode_t);
void FRSP(ppu_opcode_t);
void FCTIW(ppu_opcode_t);
void FCTIWZ(ppu_opcode_t);
void FDIV(ppu_opcode_t);
void FSUB(ppu_opcode_t);
void FADD(ppu_opcode_t);
void FSQRT(ppu_opcode_t);
void FSEL(ppu_opcode_t);
void FMUL(ppu_opcode_t);
void FRSQRTE(ppu_opcode_t);
void FMSUB(ppu_opcode_t);
void FMADD(ppu_opcode_t);
void FNMSUB(ppu_opcode_t);
void FNMADD(ppu_opcode_t);
void FCMPO(ppu_opcode_t);
void FNEG(ppu_opcode_t);
void FMR(ppu_opcode_t);
void FNABS(ppu_opcode_t);
void FABS(ppu_opcode_t);
void FCTID(ppu_opcode_t);
void FCTIDZ(ppu_opcode_t);
void FCFID(ppu_opcode_t);
};

View file

@ -1675,7 +1675,9 @@ public:
llvm::Value* starta_pc = m_ir->CreateAnd(get_pc(starta), 0x3fffc);
llvm::Value* data_addr = m_ir->CreateGEP(get_type<u8>(), m_lsptr, starta_pc);
llvm::Value* acc = nullptr;
llvm::Value* acc0 = nullptr;
llvm::Value* acc1 = nullptr;
bool toggle = true;
// Use a 512bit simple checksum to verify integrity if size is atleast 512b * 3
// This code uses a 512bit vector for all hardware to ensure behavior matches.
@ -1721,10 +1723,21 @@ public:
vls = m_ir->CreateShuffleVector(vls, ConstantAggregateZero::get(vls->getType()), llvm::ArrayRef(indices, 16));
}
acc = acc ? m_ir->CreateAdd(acc, vls) : vls;
// Interleave accumulators for more performance
if (toggle)
{
acc0 = acc0 ? m_ir->CreateAdd(acc0, vls) : vls;
}
else
{
acc1 = acc1 ? m_ir->CreateAdd(acc1, vls) : vls;
}
toggle = !toggle;
check_iterations++;
}
llvm::Value* acc = (acc0 && acc1) ? m_ir->CreateAdd(acc0, acc1): (acc0 ? acc0 : acc1);
// Create the checksum
u32 checksum[16] = {0};
@ -1818,9 +1831,21 @@ public:
}
vls = m_ir->CreateXor(vls, ConstantDataVector::get(m_context, llvm::ArrayRef(words, elements)));
acc = acc ? m_ir->CreateOr(acc, vls) : vls;
// Interleave accumulators for more performance
if (toggle)
{
acc0 = acc0 ? m_ir->CreateAdd(acc0, vls) : vls;
}
else
{
acc1 = acc1 ? m_ir->CreateAdd(acc1, vls) : vls;
}
toggle = !toggle;
check_iterations++;
}
llvm::Value* acc = (acc0 && acc1) ? m_ir->CreateAdd(acc0, acc1): (acc0 ? acc0 : acc1);
// Pattern for PTEST
if (m_use_avx512)
{

View file

@ -119,7 +119,8 @@ namespace rsx
RSX_FORMAT_CLASS_DEPTH24_UNORM_X8_PACK32 = 8,
RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32 = 16,
RSX_FORMAT_CLASS_DEPTH_FLOAT_MASK = (RSX_FORMAT_CLASS_DEPTH16_FLOAT | RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32)
RSX_FORMAT_CLASS_DEPTH_FLOAT_MASK = (RSX_FORMAT_CLASS_DEPTH16_FLOAT | RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32),
RSX_FORMAT_CLASS_DONT_CARE = RSX_FORMAT_CLASS_UNDEFINED,
};
}

View file

@ -404,18 +404,19 @@ namespace rsx
return ret;
}
void sort(std::predicate<const Ty&, const Ty&> auto predicate)
simple_array<Ty>& sort(std::predicate<const Ty&, const Ty&> auto predicate)
{
if (_size < 2)
{
return;
return *this;
}
std::sort(begin(), end(), predicate);
return *this;
}
template <typename F, typename U = std::invoke_result_t<F, const Ty&>>
requires std::is_invocable_v<F, const Ty&>
requires (std::is_invocable_v<F, const Ty&> && std::is_trivially_destructible_v<U>)
simple_array<U> map(F&& xform) const
{
simple_array<U> result;
@ -428,6 +429,20 @@ namespace rsx
return result;
}
template <typename F, typename U = std::invoke_result_t<F, const Ty&>>
requires (std::is_invocable_v<F, const Ty&> && !std::is_trivially_destructible_v<U>)
std::vector<U> map(F&& xform) const
{
std::vector<U> result;
result.reserve(size());
for (auto it = begin(); it != end(); ++it)
{
result.push_back(xform(*it));
}
return result;
}
template <typename F, typename U>
requires std::is_invocable_r_v<U, F, const U&, const Ty&>
U reduce(U initial_value, F&& reducer) const

View file

@ -0,0 +1,107 @@
#include "stdafx.h"
#include "RSXDisplay.h"
#include "../Common/simple_array.hpp"
#include "../rsx_utils.h"
namespace rsx
{
std::string framebuffer_dimensions_t::to_string(bool skip_aa_suffix) const
{
std::string suffix = "";
const auto spp = samples_x * samples_y;
if (!skip_aa_suffix && spp > 1)
{
suffix = std::string(" @MSAA ") + std::to_string(spp) + "x";
}
return std::to_string(width) + "x" + std::to_string(height) + suffix;
}
framebuffer_dimensions_t framebuffer_dimensions_t::make(u16 width, u16 height, rsx::surface_antialiasing aa)
{
framebuffer_dimensions_t result { .width = width, .height = height };
switch (aa)
{
case rsx::surface_antialiasing::center_1_sample:
result.samples_x = result.samples_y = 1;
break;
case rsx::surface_antialiasing::diagonal_centered_2_samples:
result.samples_x = 2;
result.samples_y = 1;
break;
case rsx::surface_antialiasing::square_centered_4_samples:
case rsx::surface_antialiasing::square_rotated_4_samples:
result.samples_x = result.samples_y = 2;
break;
}
return result;
}
void framebuffer_statistics_t::add(u16 width, u16 height, rsx::surface_antialiasing aa)
{
auto& stashed = data[aa];
const auto& incoming = framebuffer_dimensions_t::make(width, height, aa);
if (incoming > stashed)
{
stashed = incoming;
}
}
std::string framebuffer_statistics_t::to_string(bool squash) const
{
// Format is sorted by sample count
struct sorted_message_t
{
u32 id;
surface_antialiasing aa_mode;
u32 samples;
};
if (data.size() == 0)
{
return "None";
}
rsx::simple_array<sorted_message_t> messages;
rsx::simple_array<framebuffer_dimensions_t> real_stats;
for (const auto& [aa_mode, stat] : data)
{
auto real_stat = stat;
std::tie(real_stat.width, real_stat.height) = apply_resolution_scale(stat.width, stat.height);
real_stats.push_back(real_stat);
sorted_message_t msg;
msg.id = real_stats.size() - 1;
msg.aa_mode = aa_mode;
msg.samples = real_stat.samples_total();
messages.push_back(msg);
}
if (squash)
{
messages.sort(FN(x.samples > y.samples));
return real_stats[messages.front().id]
.to_string(g_cfg.video.antialiasing_level == msaa_level::none);
}
if (messages.size() > 1)
{
// Should we bother showing the No-AA entry?
// This heurestic ignores pointless no-AA surfaces usually used as compositing buffers for output.
messages.sort(FN(x.samples > y.samples));
if (messages.back().aa_mode == rsx::surface_antialiasing::center_1_sample)
{
// Drop the last entry if it has no AA.
messages.resize(messages.size() - 1);
}
}
const auto text = messages
.sort(FN(static_cast<u8>(x.aa_mode) > static_cast<u8>(y.aa_mode)))
.map(FN(real_stats[x.id].to_string()));
return fmt::merge(text, ", ");
}
}

View file

@ -3,9 +3,48 @@
#include <util/types.hpp>
#include <util/logs.hpp>
#include <deque>
#include <unordered_map>
template <typename T>
class named_thread;
namespace rsx
{
enum class surface_antialiasing : u8;
struct framebuffer_dimensions_t
{
u16 width;
u16 height;
u8 samples_x;
u8 samples_y;
inline u32 samples_total() const
{
return static_cast<u32>(width) * height * samples_x * samples_y;
}
inline bool operator > (const framebuffer_dimensions_t& that) const
{
return samples_total() > that.samples_total();
}
std::string to_string(bool skip_aa_suffix = false) const;
static framebuffer_dimensions_t make(u16 width, u16 height, rsx::surface_antialiasing aa);
};
struct framebuffer_statistics_t
{
std::unordered_map<rsx::surface_antialiasing, framebuffer_dimensions_t> data;
// Replace the existing data with this input if it is greater than what is already known
void add(u16 width, u16 height, rsx::surface_antialiasing aa);
// Returns a formatted string representing the statistics collected over the frame.
std::string to_string(bool squash) const;
};
struct frame_statistics_t
{
u32 draw_calls;
@ -19,6 +58,8 @@ namespace rsx
u32 vertex_cache_request_count;
u32 vertex_cache_miss_count;
framebuffer_statistics_t framebuffer_stats;
};
struct frame_time_t

View file

@ -667,7 +667,23 @@ namespace rsx
rop_control.enable_polygon_stipple();
}
if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !RSX(m_ctx)->get_backend_config().supports_hw_a2c)
auto can_use_hw_a2c = [&]() -> bool
{
const auto& config = RSX(m_ctx)->get_backend_config();
if (!config.supports_hw_a2c)
{
return false;
}
if (config.supports_hw_a2c_1spp)
{
return true;
}
return REGS(m_ctx)->surface_antialias() != rsx::surface_antialiasing::center_1_sample;
};
if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !can_use_hw_a2c())
{
// TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders
// Alpha values generate a coverage mask for order independent blending

View file

@ -58,6 +58,8 @@ namespace gl
if (!compiled)
{
ensure(!m_src.empty(), "Compute shader is not initialized!");
m_shader.create(::glsl::program_domain::glsl_compute_program, m_src);
m_shader.compile();
@ -82,6 +84,7 @@ namespace gl
void compute_task::run(gl::command_context& cmd, u32 invocations_x, u32 invocations_y)
{
ensure(compiled && m_program.id() != GL_NONE);
bind_resources();
cmd->use_program(m_program.id());

View file

@ -3,6 +3,8 @@
#include "../rsx_methods.h"
#include "../Common/BufferUtils.h"
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
namespace gl
{
GLenum comparison_op(rsx::comparison_function op)
@ -256,6 +258,32 @@ void GLGSRender::update_draw_state()
gl_state.enablei(mrt_blend_enabled[2], GL_BLEND, 2);
gl_state.enablei(mrt_blend_enabled[3], GL_BLEND, 3);
}
// Antialias control
if (backend_config.supports_hw_msaa)
{
gl_state.enable(/*REGS(m_ctx)->msaa_enabled()*/GL_MULTISAMPLE);
gl_state.enable(GL_SAMPLE_MASK);
gl_state.sample_mask(REGS(m_ctx)->msaa_sample_mask());
gl_state.enable(GL_SAMPLE_SHADING);
gl_state.min_sample_shading_rate(1.f);
gl_state.enable(GL_SAMPLE_COVERAGE);
gl_state.sample_coverage(1.f);
}
if (backend_config.supports_hw_a2c)
{
const bool hw_enable = backend_config.supports_hw_a2c_1spp || REGS(m_ctx)->surface_antialias() != rsx::surface_antialiasing::center_1_sample;
gl_state.enable(hw_enable && REGS(m_ctx)->msaa_alpha_to_coverage_enabled(), GL_SAMPLE_ALPHA_TO_COVERAGE);
}
if (backend_config.supports_hw_a2one)
{
gl_state.enable(REGS(m_ctx)->msaa_alpha_to_one_enabled(), GL_SAMPLE_ALPHA_TO_ONE);
}
}
switch (rsx::method_registers.current_draw_clause.primitive)
@ -307,12 +335,6 @@ void GLGSRender::update_draw_state()
// Clip planes
gl_state.clip_planes((current_vertex_program.output_mask >> CELL_GCM_ATTRIB_OUTPUT_UC0) & 0x3F);
// Sample control
// TODO: MinSampleShading
//gl_state.enable(rsx::method_registers.msaa_enabled(), GL_MULTISAMPLE);
//gl_state.enable(rsx::method_registers.msaa_alpha_to_coverage_enabled(), GL_SAMPLE_ALPHA_TO_COVERAGE);
//gl_state.enable(rsx::method_registers.msaa_alpha_to_one_enabled(), GL_SAMPLE_ALPHA_TO_ONE);
//TODO
//NV4097_SET_ANISO_SPREAD
//NV4097_SET_SPECULAR_ENABLE

View file

@ -5,6 +5,7 @@
#include "GLCommonDecompiler.h"
#include "../GCM.h"
#include "../Program/GLSLCommon.h"
#include "../RSXThread.h"
std::string GLFragmentDecompilerThread::getFloatTypeName(usz elementCount)
{
@ -44,6 +45,21 @@ void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS)
}
}
if (properties.multisampled_sampler_mask)
{
// Requires this extension or GLSL 450
const auto driver_caps = gl::get_driver_caps();
if (driver_caps.glsl_version.version >= 450)
{
gl_version = 450;
}
else
{
ensure(driver_caps.ARB_shader_texture_image_samples, "MSAA support on OpenGL requires a driver running OpenGL 4.5 or supporting GL_ARB_shader_texture_image_samples.");
required_extensions.push_back("GL_ARB_shader_texture_image_samples");
}
}
if (m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION)
{
gl_version = std::max(gl_version, 450);
@ -110,10 +126,14 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
const auto mask = (1 << index);
if (properties.redirected_sampler_mask & mask)
if (properties.multisampled_sampler_mask & mask)
{
// Provide a stencil view of the main resource for the S channel
OS << "uniform u" << samplerType << " " << PI.name << "_stencil;\n";
if (samplerType != "sampler1D" && samplerType != "sampler2D")
{
rsx_log.error("Unexpected multisampled image type '%s'", samplerType);
}
samplerType = "sampler2DMS";
}
else if (properties.shadow_sampler_mask & mask)
{
@ -127,6 +147,12 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
}
}
if (properties.redirected_sampler_mask & mask)
{
// Provide a stencil view of the main resource for the S channel
OS << "uniform u" << samplerType << " " << PI.name << "_stencil;\n";
}
OS << "uniform " << samplerType << " " << PI.name << ";\n";
}
}
@ -188,11 +214,12 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
m_shader_props.require_texture_ops = properties.has_tex_op;
m_shader_props.require_tex_shadow_ops = properties.shadow_sampler_mask != 0;
m_shader_props.require_msaa_ops = properties.multisampled_sampler_mask != 0;
m_shader_props.require_texture_expand = properties.has_exp_tex_op;
m_shader_props.require_srgb_to_linear = properties.has_upg;
m_shader_props.require_linear_to_srgb = properties.has_pkg;
m_shader_props.require_fog_read = properties.in_register_mask & in_fogc;
m_shader_props.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none;
m_shader_props.emulate_coverage_tests = !rsx::get_renderer_backend_config().supports_hw_a2c_1spp;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION);
m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;

View file

@ -4,6 +4,7 @@
#include "GLGSRender.h"
#include "GLCompute.h"
#include "GLDMA.h"
#include "GLResolveHelper.h"
#include "Emu/Memory/vm_locking.h"
#include "Emu/RSX/rsx_methods.h"
@ -46,10 +47,16 @@ GLGSRender::GLGSRender(utils::serial* ar) noexcept : GSRender(ar)
else
m_vertex_cache = std::make_unique<gl::weak_vertex_cache>();
backend_config.supports_hw_a2c = false;
backend_config.supports_hw_a2one = false;
backend_config.supports_multidraw = true;
backend_config.supports_normalized_barycentrics = true;
if (g_cfg.video.antialiasing_level != msaa_level::none)
{
backend_config.supports_hw_msaa = true;
backend_config.supports_hw_a2c = true;
backend_config.supports_hw_a2c_1spp = false; // In OGL A2C is implicitly disabled at 1spp
backend_config.supports_hw_a2one = true;
}
}
GLGSRender::~GLGSRender()
@ -229,13 +236,13 @@ void GLGSRender::on_init_thread()
// Array stream buffer
{
m_gl_persistent_stream_buffer = std::make_unique<gl::texture>(GL_TEXTURE_BUFFER, 0, 0, 0, 0, GL_R8UI);
m_gl_persistent_stream_buffer = std::make_unique<gl::texture>(GL_TEXTURE_BUFFER, 0, 0, 0, 0, 0, GL_R8UI, RSX_FORMAT_CLASS_DONT_CARE);
gl_state.bind_texture(GL_STREAM_BUFFER_START + 0, GL_TEXTURE_BUFFER, m_gl_persistent_stream_buffer->id());
}
// Register stream buffer
{
m_gl_volatile_stream_buffer = std::make_unique<gl::texture>(GL_TEXTURE_BUFFER, 0, 0, 0, 0, GL_R8UI);
m_gl_volatile_stream_buffer = std::make_unique<gl::texture>(GL_TEXTURE_BUFFER, 0, 0, 0, 0, 0, GL_R8UI, RSX_FORMAT_CLASS_DONT_CARE);
gl_state.bind_texture(GL_STREAM_BUFFER_START + 1, GL_TEXTURE_BUFFER, m_gl_volatile_stream_buffer->id());
}
@ -244,19 +251,19 @@ void GLGSRender::on_init_thread()
std::array<u32, 8> pixeldata = { 0, 0, 0, 0, 0, 0, 0, 0 };
// 1D
auto tex1D = std::make_unique<gl::texture>(GL_TEXTURE_1D, 1, 1, 1, 1, GL_RGBA8);
auto tex1D = std::make_unique<gl::texture>(GL_TEXTURE_1D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
tex1D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
// 2D
auto tex2D = std::make_unique<gl::texture>(GL_TEXTURE_2D, 1, 1, 1, 1, GL_RGBA8);
auto tex2D = std::make_unique<gl::texture>(GL_TEXTURE_2D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
tex2D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
// 3D
auto tex3D = std::make_unique<gl::texture>(GL_TEXTURE_3D, 1, 1, 1, 1, GL_RGBA8);
auto tex3D = std::make_unique<gl::texture>(GL_TEXTURE_3D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
tex3D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
// CUBE
auto texCUBE = std::make_unique<gl::texture>(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, GL_RGBA8);
auto texCUBE = std::make_unique<gl::texture>(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
texCUBE->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
m_null_textures[GL_TEXTURE_1D] = std::move(tex1D);
@ -423,6 +430,7 @@ void GLGSRender::on_exit()
gl::destroy_compute_tasks();
gl::destroy_overlay_passes();
gl::clear_dma_resources();
gl::clear_resolve_helpers();
gl::destroy_global_texture_resources();

View file

@ -23,6 +23,8 @@ namespace gl
{
if (!compiled)
{
ensure(!fs_src.empty() && !vs_src.empty(), "Shaders have not been initialized.");
fs.create(::glsl::program_domain::glsl_fragment_program, fs_src);
fs.compile();
@ -34,6 +36,8 @@ namespace gl
program_handle.attach(fs);
program_handle.link();
ensure(program_handle.id());
fbo.create();
m_sampler.create();
@ -75,7 +79,7 @@ namespace gl
}
}
void overlay_pass::emit_geometry()
void overlay_pass::emit_geometry(gl::command_context& /*cmd*/)
{
int old_vao;
glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &old_vao);
@ -88,11 +92,7 @@ namespace gl
void overlay_pass::run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool enable_blending)
{
if (!compiled)
{
rsx_log.error("You must initialize overlay passes with create() before calling run()");
return;
}
ensure(compiled && program_handle.id() != GL_NONE, "You must initialize overlay passes with create() before calling run()");
GLint viewport[4];
std::unique_ptr<fbo::save_binding_state> save_fbo;
@ -111,6 +111,10 @@ namespace gl
fbo.draw_buffer(fbo.no_color);
fbo.depth = target_texture;
break;
case gl::image_aspect::stencil:
fbo.draw_buffer(fbo.no_color);
fbo.depth_stencil = target_texture;
break;
case gl::image_aspect::depth | gl::image_aspect::stencil:
fbo.draw_buffer(fbo.no_color);
fbo.depth_stencil = target_texture;
@ -176,7 +180,7 @@ namespace gl
cmd->use_program(program_handle.id());
on_load();
bind_resources();
emit_geometry();
emit_geometry(cmd);
glViewport(viewport[0], viewport[1], viewport[2], viewport[3]);
@ -216,7 +220,7 @@ namespace gl
gl::texture_view* ui_overlay_renderer::load_simple_image(rsx::overlays::image_info* desc, bool temp_resource, u32 owner_uid)
{
auto tex = std::make_unique<gl::texture>(GL_TEXTURE_2D, desc->w, desc->h, 1, 1, GL_RGBA8);
auto tex = std::make_unique<gl::texture>(GL_TEXTURE_2D, desc->w, desc->h, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
tex->copy_from(desc->get_data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
GLenum remap[] = { GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN };
@ -301,7 +305,7 @@ namespace gl
// Create font file
const std::vector<u8> glyph_data = font->get_glyph_data();
auto tex = std::make_unique<gl::texture>(GL_TEXTURE_2D_ARRAY, font_size.width, font_size.height, font_size.depth, 1, GL_R8);
auto tex = std::make_unique<gl::texture>(GL_TEXTURE_2D_ARRAY, font_size.width, font_size.height, font_size.depth, 1, 1, GL_R8, RSX_FORMAT_CLASS_COLOR);
tex->copy_from(glyph_data.data(), gl::texture::format::r, gl::texture::type::ubyte, {});
GLenum remap[] = { GL_RED, GL_RED, GL_RED, GL_RED };
@ -350,7 +354,7 @@ namespace gl
}
}
void ui_overlay_renderer::emit_geometry()
void ui_overlay_renderer::emit_geometry(gl::command_context& cmd)
{
if (m_current_primitive_type == rsx::overlays::primitive_type::quad_list)
{
@ -378,7 +382,7 @@ namespace gl
}
else
{
overlay_pass::emit_geometry();
overlay_pass::emit_geometry(cmd);
}
}

View file

@ -57,7 +57,7 @@ namespace gl
m_vertex_data_buffer.data(elements_count * sizeof(T), data);
}
virtual void emit_geometry();
virtual void emit_geometry(gl::command_context& cmd);
void run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool enable_blending = false);
};
@ -87,7 +87,7 @@ namespace gl
void set_primitive_type(rsx::overlays::primitive_type type);
void emit_geometry() override;
void emit_geometry(gl::command_context& cmd) override;
void run(gl::command_context& cmd, const areau& viewport, GLuint target, rsx::overlays::overlay& ui);
};

View file

@ -26,7 +26,7 @@ namespace gl
{
const auto target = static_cast<GLenum>(visual->get_target());
const auto ifmt = static_cast<GLenum>(visual->get_internal_format());
g_vis_texture.reset(new texture(target, visual->width(), visual->height(), 1, 1, ifmt, visual->format_class()));
g_vis_texture.reset(new texture(target, visual->width(), visual->height(), 1, 1, 1, ifmt, visual->format_class()));
glCopyImageSubData(visual->id(), target, 0, 0, 0, 0, g_vis_texture->id(), target, 0, 0, 0, 0, visual->width(), visual->height(), 1);
}
}
@ -115,7 +115,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons
{
if (!flip_image || flip_image->size2D() != sizeu{ info->width, info->height })
{
flip_image = std::make_unique<gl::texture>(GL_TEXTURE_2D, info->width, info->height, 1, 1, expected_format);
flip_image = std::make_unique<gl::texture>(GL_TEXTURE_2D, info->width, info->height, 1, 1, 1, expected_format, RSX_FORMAT_CLASS_COLOR);
}
};
@ -402,6 +402,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
: 0;
rsx::overlays::set_debug_overlay_text(fmt::format(
"Internal Resolution: %s\n"
"RSX Load: %3d%%\n"
"draw calls: %16d\n"
"draw call setup: %11dus\n"
@ -413,6 +414,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
"Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)\n"
"Texture uploads: %11u (%u from CPU - %02u%%, %u copies avoided)\n"
"Vertex cache hits: %9u/%u (%u%%)",
info.stats.framebuffer_stats.to_string(!backend_config.supports_hw_msaa),
get_load(), info.stats.draw_calls, info.stats.setup_time, info.stats.vertex_upload_time,
info.stats.textures_upload_time, info.stats.draw_exec_time, num_dirty_textures, texture_memory_size,
num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate,

View file

@ -259,6 +259,13 @@ OPENGL_PROC(PFNGLTEXSTORAGE1DPROC, TexStorage1D);
OPENGL_PROC(PFNGLTEXSTORAGE2DPROC, TexStorage2D);
OPENGL_PROC(PFNGLTEXSTORAGE3DPROC, TexStorage3D);
// ARB_texture_multisample
OPENGL_PROC(PFNGLTEXSTORAGE2DMULTISAMPLEPROC, TexStorage2DMultisample);
OPENGL_PROC(PFNGLTEXSTORAGE3DMULTISAMPLEPROC, TexStorage3DMultisample);
OPENGL_PROC(PFNGLSAMPLEMASKIPROC, SampleMaski);
OPENGL_PROC(PFNGLMINSAMPLESHADINGPROC, MinSampleShading);
OPENGL_PROC(PFNGLSAMPLECOVERAGEPROC, SampleCoverage);
// Texture_View
OPENGL_PROC(PFNGLTEXTUREVIEWPROC, TextureView);

View file

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "GLGSRender.h"
#include "GLResolveHelper.h"
#include "Emu/RSX/rsx_methods.h"
#include <span>
@ -417,15 +418,16 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool /*
}
// Render target helpers
void gl::render_target::clear_memory(gl::command_context& cmd)
void gl::render_target::clear_memory(gl::command_context& cmd, gl::texture* surface)
{
auto dst = surface ? surface : this;
if (aspect() & gl::image_aspect::depth)
{
gl::g_hw_blitter->fast_clear_image(cmd, this, 1.f, 255);
gl::g_hw_blitter->fast_clear_image(cmd, dst, 1.f, 255);
}
else
{
gl::g_hw_blitter->fast_clear_image(cmd, this, {});
gl::g_hw_blitter->fast_clear_image(cmd, dst, {});
}
state_flags &= ~rsx::surface_state_flags::erase_bkgnd;
@ -449,18 +451,26 @@ void gl::render_target::load_memory(gl::command_context& cmd)
}
else
{
auto tmp = std::make_unique<gl::texture>(GL_TEXTURE_2D, subres.width_in_block, subres.height_in_block, 1, 1, static_cast<GLenum>(get_internal_format()), format_class());
auto tmp = std::make_unique<gl::texture>(GL_TEXTURE_2D, subres.width_in_block, subres.height_in_block, 1, 1, 1, static_cast<GLenum>(get_internal_format()), format_class());
auto dst = samples() > 1 ? get_resolve_target_safe(cmd) : this;
gl::upload_texture(cmd, tmp.get(), get_gcm_format(), is_swizzled, { subres });
gl::g_hw_blitter->scale_image(cmd, tmp.get(), this,
gl::g_hw_blitter->scale_image(cmd, tmp.get(), dst,
{ 0, 0, subres.width_in_block, subres.height_in_block },
{ 0, 0, static_cast<int>(width()), static_cast<int>(height()) },
!is_depth_surface(),
{});
if (samples() > 1)
{
msaa_flags = rsx::surface_state_flags::require_unresolve;
}
}
state_flags &= ~rsx::surface_state_flags::erase_bkgnd;
}
void gl::render_target::initialize_memory(gl::command_context& cmd, rsx::surface_access /*access*/)
void gl::render_target::initialize_memory(gl::command_context& cmd, rsx::surface_access access)
{
const bool memory_load = is_depth_surface() ?
!!g_cfg.video.read_depth_buffer :
@ -469,6 +479,14 @@ void gl::render_target::initialize_memory(gl::command_context& cmd, rsx::surface
if (!memory_load)
{
clear_memory(cmd);
if (samples() > 1 && access.is_transfer_or_read())
{
// Only clear the resolve surface if reading from it, otherwise it's a waste
clear_memory(cmd, get_resolve_target_safe(cmd));
}
msaa_flags = rsx::surface_state_flags::ready;
}
else
{
@ -476,8 +494,28 @@ void gl::render_target::initialize_memory(gl::command_context& cmd, rsx::surface
}
}
gl::viewable_image* gl::render_target::get_surface(rsx::surface_access access_type)
{
if (samples() == 1 || !access_type.is_transfer())
{
return this;
}
// A read barrier should have been called before this!
ensure(resolve_surface, "Read access without explicit barrier");
ensure(!(msaa_flags & rsx::surface_state_flags::require_resolve));
return static_cast<gl::viewable_image*>(resolve_surface.get());
}
void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_access access)
{
if (access == rsx::surface_access::gpu_reference)
{
// In OpenGL, resources are always assumed to be visible to the GPU.
// We don't manage memory spilling, so just return.
return;
}
const bool read_access = access.is_read();
const bool is_depth = is_depth_surface();
const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers;
@ -504,12 +542,33 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_ac
on_write();
}
if (msaa_flags & rsx::surface_state_flags::require_resolve)
{
if (access.is_transfer())
{
// Only do this step when read access is required
get_resolve_target_safe(cmd);
resolve(cmd);
}
}
else if (msaa_flags & rsx::surface_state_flags::require_unresolve)
{
if (access == rsx::surface_access::shader_write)
{
// Only do this step when it is needed to start rendering
ensure(resolve_surface);
unresolve(cmd);
}
}
return;
}
auto dst_img = (samples() > 1) ? get_resolve_target_safe(cmd) : this;
const bool dst_is_depth = !!(aspect() & gl::image_aspect::depth);
const auto dst_bpp = get_bpp();
unsigned first = prepare_rw_barrier_for_transfer(this);
bool optimize_copy = true;
u64 newest_tag = 0;
for (auto i = first; i < old_contents.size(); ++i)
@ -519,6 +578,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_ac
const auto src_bpp = src_texture->get_bpp();
rsx::typeless_xfer typeless_info{};
src_texture->memory_barrier(cmd, rsx::surface_access::transfer_read);
if (get_internal_format() == src_texture->get_internal_format())
{
// Copy data from old contents onto this one
@ -538,29 +599,106 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_ac
}
section.init_transfer(this);
auto src_area = section.src_rect();
auto dst_area = section.dst_rect();
if (state_flags & rsx::surface_state_flags::erase_bkgnd)
if (g_cfg.video.antialiasing_level != msaa_level::none)
{
const auto area = section.dst_rect();
if (area.x1 > 0 || area.y1 > 0 || unsigned(area.x2) < width() || unsigned(area.y2) < height())
{
initialize_memory(cmd, access);
}
else
{
state_flags &= ~rsx::surface_state_flags::erase_bkgnd;
}
src_texture->transform_pixels_to_samples(src_area);
this->transform_pixels_to_samples(dst_area);
}
gl::g_hw_blitter->scale_image(cmd, section.source, this,
section.src_rect(),
section.dst_rect(),
bool memory_load = true;
if (dst_area.x1 == 0 && dst_area.y1 == 0 &&
unsigned(dst_area.x2) == dst_img->width() && unsigned(dst_area.y2) == dst_img->height())
{
// Skip a bunch of useless work
state_flags &= ~(rsx::surface_state_flags::erase_bkgnd);
msaa_flags = rsx::surface_state_flags::ready;
memory_load = false;
stencil_init_flags = src_texture->stencil_init_flags;
}
else if (state_flags & rsx::surface_state_flags::erase_bkgnd)
{
// Might introduce MSAA flags
initialize_memory(cmd, rsx::surface_access::memory_write);
ensure(state_flags == rsx::surface_state_flags::ready);
}
if (msaa_flags & rsx::surface_state_flags::require_resolve)
{
// Need to forward resolve this
resolve(cmd);
}
if (src_texture->samples() > 1)
{
// Ensure a readable surface exists for the source
src_texture->get_resolve_target_safe(cmd);
}
gl::g_hw_blitter->scale_image(
cmd,
src_texture->get_surface(rsx::surface_access::transfer_read),
this->get_surface(rsx::surface_access::transfer_write),
src_area,
dst_area,
!dst_is_depth, typeless_info);
optimize_copy = optimize_copy && !memory_load;
newest_tag = src_texture->last_use_tag;
}
// Memory has been transferred, discard old contents and update memory flags
// TODO: Preserve memory outside surface clip region
on_write(newest_tag);
if (!newest_tag) [[unlikely]]
{
// Underlying memory has been modified and we could not find valid data to fill it
clear_rw_barrier();
state_flags |= rsx::surface_state_flags::erase_bkgnd;
initialize_memory(cmd, access);
ensure(state_flags == rsx::surface_state_flags::ready);
}
// NOTE: Optimize flag relates to stencil resolve/unresolve for NVIDIA.
on_write_copy(newest_tag, optimize_copy);
if (access == rsx::surface_access::shader_write && samples() > 1)
{
// Write barrier, must initialize
unresolve(cmd);
}
}
// MSAA support
gl::viewable_image* gl::render_target::get_resolve_target_safe(gl::command_context& /*cmd*/)
{
if (!resolve_surface)
{
// Create a resolve surface
const auto resolve_w = width() * samples_x;
const auto resolve_h = height() * samples_y;
resolve_surface.reset(new gl::viewable_image(
GL_TEXTURE_2D,
resolve_w, resolve_h,
1, 1, 1,
static_cast<GLenum>(get_internal_format()),
format_class()
));
}
return static_cast<gl::viewable_image*>(resolve_surface.get());
}
void gl::render_target::resolve(gl::command_context& cmd)
{
gl::resolve_image(cmd, get_resolve_target_safe(cmd), this);
msaa_flags &= ~(rsx::surface_state_flags::require_resolve);
}
void gl::render_target::unresolve(gl::command_context& cmd)
{
gl::unresolve_image(cmd, this, get_resolve_target_safe(cmd));
msaa_flags &= ~(rsx::surface_state_flags::require_unresolve);
}

View file

@ -49,13 +49,21 @@ namespace gl
{
class render_target : public viewable_image, public rsx::render_target_descriptor<texture*>
{
void clear_memory(gl::command_context& cmd);
void clear_memory(gl::command_context& cmd, gl::texture* surface = nullptr);
void load_memory(gl::command_context& cmd);
void initialize_memory(gl::command_context& cmd, rsx::surface_access access);
// MSAA support:
// Get the linear resolve target bound to this surface. Initialize if none exists
gl::viewable_image* get_resolve_target_safe(gl::command_context& cmd);
// Resolve the planar MSAA data into a linear block
void resolve(gl::command_context& cmd);
// Unresolve the linear data into planar MSAA data
void unresolve(gl::command_context& cmd);
public:
render_target(GLuint width, GLuint height, GLenum sized_format, rsx::format_class format_class)
: viewable_image(GL_TEXTURE_2D, width, height, 1, 1, sized_format, format_class)
render_target(GLuint width, GLuint height, GLubyte samples, GLenum sized_format, rsx::format_class format_class)
: viewable_image(GL_TEXTURE_2D, width, height, 1, 1, samples, sized_format, format_class)
{}
// Internal pitch is the actual row length in bytes of the openGL texture
@ -81,11 +89,7 @@ namespace gl
return !!(aspect() & gl::image_aspect::depth);
}
viewable_image* get_surface(rsx::surface_access /*access_type*/) override
{
// TODO
return static_cast<gl::viewable_image*>(this);
}
viewable_image* get_surface(rsx::surface_access /*access_type*/) override;
u32 raw_handle() const
{
@ -141,7 +145,20 @@ struct gl_render_target_traits
auto format = rsx::internals::surface_color_format_to_gl(surface_color_format);
const auto [width_, height_] = rsx::apply_resolution_scale<true>(static_cast<u16>(width), static_cast<u16>(height));
std::unique_ptr<gl::render_target> result(new gl::render_target(width_, height_,
u8 samples;
rsx::surface_sample_layout sample_layout;
if (g_cfg.video.antialiasing_level == msaa_level::_auto)
{
samples = get_format_sample_count(antialias);
sample_layout = rsx::surface_sample_layout::ps3;
}
else
{
samples = 1;
sample_layout = rsx::surface_sample_layout::null;
}
std::unique_ptr<gl::render_target> result(new gl::render_target(width_, height_, samples,
static_cast<GLenum>(format.internal_format), RSX_FORMAT_CLASS_COLOR));
result->set_aa_mode(antialias);
@ -154,6 +171,7 @@ struct gl_render_target_traits
result->memory_usage_flags = rsx::surface_usage_flags::attachment;
result->state_flags = rsx::surface_state_flags::erase_bkgnd;
result->sample_layout = sample_layout;
result->queue_tag(address);
result->add_ref();
return result;
@ -170,7 +188,20 @@ struct gl_render_target_traits
auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format);
const auto [width_, height_] = rsx::apply_resolution_scale<true>(static_cast<u16>(width), static_cast<u16>(height));
std::unique_ptr<gl::render_target> result(new gl::render_target(width_, height_,
u8 samples;
rsx::surface_sample_layout sample_layout;
if (g_cfg.video.antialiasing_level == msaa_level::_auto)
{
samples = get_format_sample_count(antialias);
sample_layout = rsx::surface_sample_layout::ps3;
}
else
{
samples = 1;
sample_layout = rsx::surface_sample_layout::null;
}
std::unique_ptr<gl::render_target> result(new gl::render_target(width_, height_, samples,
static_cast<GLenum>(format.internal_format), rsx::classify_format(surface_depth_format)));
result->set_aa_mode(antialias);
@ -183,6 +214,7 @@ struct gl_render_target_traits
result->memory_usage_flags = rsx::surface_usage_flags::attachment;
result->state_flags = rsx::surface_state_flags::erase_bkgnd;
result->sample_layout = sample_layout;
result->queue_tag(address);
result->add_ref();
return result;
@ -200,7 +232,7 @@ struct gl_render_target_traits
const auto [new_w, new_h] = rsx::apply_resolution_scale<true>(prev.width, prev.height,
ref->get_surface_width<rsx::surface_metrics::pixels>(), ref->get_surface_height<rsx::surface_metrics::pixels>());
sink = std::make_unique<gl::render_target>(new_w, new_h, internal_format, ref->format_class());
sink = std::make_unique<gl::render_target>(new_w, new_h, ref->samples(), internal_format, ref->format_class());
sink->add_ref();
sink->memory_usage_flags = rsx::surface_usage_flags::storage;
@ -255,8 +287,9 @@ struct gl_render_target_traits
}
static
void prepare_surface_for_drawing(gl::command_context&, gl::render_target* surface)
void prepare_surface_for_drawing(gl::command_context& cmd, gl::render_target* surface)
{
surface->memory_barrier(cmd, rsx::surface_access::gpu_reference);
surface->memory_usage_flags |= rsx::surface_usage_flags::attachment;
}

View file

@ -0,0 +1,391 @@
#include "stdafx.h"
#include "GLResolveHelper.h"
#include "GLTexture.h"
#include <unordered_map>
#include <stack>
namespace gl
{
std::unordered_map<texture::internal_format, std::unique_ptr<cs_resolve_task>> g_resolve_helpers;
std::unordered_map<texture::internal_format, std::unique_ptr<cs_unresolve_task>> g_unresolve_helpers;
std::unordered_map<GLuint, std::unique_ptr<ds_resolve_pass_base>> g_depth_resolvers;
std::unordered_map<GLuint, std::unique_ptr<ds_resolve_pass_base>> g_depth_unresolvers;
void clear_resolve_helpers()
{
g_resolve_helpers.clear();
g_unresolve_helpers.clear();
g_depth_resolvers.clear();
g_depth_unresolvers.clear();
}
static const char* get_format_string(gl::texture::internal_format format)
{
switch (format)
{
case texture::internal_format::rgb565:
return "r16";
case texture::internal_format::rgba8:
case texture::internal_format::bgra8:
return "rgba8";
case texture::internal_format::rgba16f:
return "rgba16f";
case texture::internal_format::rgba32f:
return "rgba32f";
case texture::internal_format::bgr5a1:
return "r16";
case texture::internal_format::r8:
return "r8";
case texture::internal_format::rg8:
return "rg8";
case texture::internal_format::r32f:
return "r32f";
default:
fmt::throw_exception("Unhandled internal format 0x%x", u32(format));
}
}
void resolve_image(gl::command_context& cmd, gl::viewable_image* dst, gl::viewable_image* src)
{
ensure(src->samples() > 1 && dst->samples() == 1);
if (src->aspect() == gl::image_aspect::color) [[ likely ]]
{
auto& job = g_resolve_helpers[src->get_internal_format()];
if (!job)
{
const auto fmt = get_format_string(src->get_internal_format());
job.reset(new cs_resolve_task(fmt));
}
job->run(cmd, src, dst);
return;
}
auto get_resolver_pass = [](GLuint aspect_bits) -> std::unique_ptr<ds_resolve_pass_base>&
{
auto& pass = g_depth_resolvers[aspect_bits];
if (!pass)
{
ds_resolve_pass_base* ptr = nullptr;
switch (aspect_bits)
{
case gl::image_aspect::depth:
ptr = new depth_only_resolver();
break;
case gl::image_aspect::stencil:
ptr = new stencil_only_resolver();
break;
case (gl::image_aspect::depth | gl::image_aspect::stencil):
ptr = new depth_stencil_resolver();
break;
default:
fmt::throw_exception("Unreachable");
}
pass.reset(ptr);
}
return pass;
};
if (src->aspect() == (gl::image_aspect::depth | gl::image_aspect::stencil) &&
!gl::get_driver_caps().ARB_shader_stencil_export_supported)
{
// Special case, NVIDIA-only fallback
auto& depth_pass = get_resolver_pass(gl::image_aspect::depth);
depth_pass->run(cmd, src, dst);
auto& stencil_pass = get_resolver_pass(gl::image_aspect::stencil);
stencil_pass->run(cmd, src, dst);
return;
}
auto& pass = get_resolver_pass(src->aspect());
pass->run(cmd, src, dst);
}
void unresolve_image(gl::command_context& cmd, gl::viewable_image* dst, gl::viewable_image* src)
{
ensure(dst->samples() > 1 && src->samples() == 1);
if (src->aspect() == gl::image_aspect::color) [[ likely ]]
{
auto& job = g_unresolve_helpers[src->get_internal_format()];
if (!job)
{
const auto fmt = get_format_string(src->get_internal_format());
job.reset(new cs_unresolve_task(fmt));
}
job->run(cmd, dst, src);
return;
}
auto get_unresolver_pass = [](GLuint aspect_bits) -> std::unique_ptr<ds_resolve_pass_base>&
{
auto& pass = g_depth_unresolvers[aspect_bits];
if (!pass)
{
ds_resolve_pass_base* ptr = nullptr;
switch (aspect_bits)
{
case gl::image_aspect::depth:
ptr = new depth_only_unresolver();
break;
case gl::image_aspect::stencil:
ptr = new stencil_only_unresolver();
break;
case (gl::image_aspect::depth | gl::image_aspect::stencil):
ptr = new depth_stencil_unresolver();
break;
default:
fmt::throw_exception("Unreachable");
}
pass.reset(ptr);
}
return pass;
};
if (src->aspect() == (gl::image_aspect::depth | gl::image_aspect::stencil) &&
!gl::get_driver_caps().ARB_shader_stencil_export_supported)
{
// Special case, NVIDIA-only fallback
auto& depth_pass = get_unresolver_pass(gl::image_aspect::depth);
depth_pass->run(cmd, dst, src);
auto& stencil_pass = get_unresolver_pass(gl::image_aspect::stencil);
stencil_pass->run(cmd, dst, src);
return;
}
auto& pass = get_unresolver_pass(src->aspect());
pass->run(cmd, dst, src);
}
// Implementation
void cs_resolve_base::build(const std::string& format_prefix, bool unresolve)
{
is_unresolve = unresolve;
switch (optimal_group_size)
{
default:
case 64:
cs_wave_x = 8;
cs_wave_y = 8;
break;
case 32:
cs_wave_x = 8;
cs_wave_y = 4;
break;
}
static const char* resolve_kernel =
#include "Emu/RSX/Program/MSAA/ColorResolvePass.glsl"
;
static const char* unresolve_kernel =
#include "Emu/RSX/Program/MSAA/ColorUnresolvePass.glsl"
;
const std::pair<std::string_view, std::string> syntax_replace[] =
{
{ "%WORKGROUP_SIZE_X", std::to_string(cs_wave_x) },
{ "%WORKGROUP_SIZE_Y", std::to_string(cs_wave_y) },
{ "%IMAGE_FORMAT", format_prefix },
{ "%BGRA_SWAP", "0" }
};
m_src = unresolve ? unresolve_kernel : resolve_kernel;
m_src = fmt::replace_all(m_src, syntax_replace);
rsx_log.notice("Resolve shader:\n%s", m_src);
create();
}
void cs_resolve_base::bind_resources()
{
auto msaa_view = multisampled->get_view(rsx::default_remap_vector.with_encoding(GL_REMAP_VIEW_MULTISAMPLED));
auto resolved_view = resolve->get_view(rsx::default_remap_vector.with_encoding(GL_REMAP_IDENTITY));
glBindImageTexture(GL_COMPUTE_IMAGE_SLOT(0), msaa_view->id(), 0, GL_FALSE, 0, is_unresolve ? GL_WRITE_ONLY : GL_READ_ONLY, msaa_view->view_format());
glBindImageTexture(GL_COMPUTE_IMAGE_SLOT(1), resolved_view->id(), 0, GL_FALSE, 0, is_unresolve ? GL_READ_ONLY : GL_WRITE_ONLY, resolved_view->view_format());
}
void cs_resolve_base::run(gl::command_context& cmd, gl::viewable_image* msaa_image, gl::viewable_image* resolve_image)
{
ensure(msaa_image->samples() > 1);
ensure(resolve_image->samples() == 1);
multisampled = msaa_image;
resolve = resolve_image;
const u32 invocations_x = utils::align(resolve_image->width(), cs_wave_x) / cs_wave_x;
const u32 invocations_y = utils::align(resolve_image->height(), cs_wave_y) / cs_wave_y;
compute_task::run(cmd, invocations_x, invocations_y);
}
void ds_resolve_pass_base::build(bool depth, bool stencil, bool unresolve)
{
m_config.resolve_depth = depth;
m_config.resolve_stencil = stencil;
m_config.is_unresolve = unresolve;
vs_src =
#include "Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl"
;
static const char* depth_resolver =
#include "Emu/RSX/Program/MSAA/DepthResolvePass.glsl"
;
static const char* depth_unresolver =
#include "Emu/RSX/Program/MSAA/DepthUnresolvePass.glsl"
;
static const char* stencil_resolver =
#include "Emu/RSX/Program/MSAA/StencilResolvePass.glsl"
;
static const char* stencil_unresolver =
#include "Emu/RSX/Program/MSAA/StencilUnresolvePass.glsl"
;
static const char* depth_stencil_resolver =
#include "Emu/RSX/Program/MSAA/DepthStencilResolvePass.glsl"
;
static const char* depth_stencil_unresolver =
#include "Emu/RSX/Program/MSAA/DepthStencilUnresolvePass.glsl"
;
if (m_config.resolve_depth && m_config.resolve_stencil)
{
fs_src = m_config.is_unresolve ? depth_stencil_unresolver : depth_stencil_resolver;
m_write_aspect_mask = gl::image_aspect::depth | gl::image_aspect::stencil;
}
else if (m_config.resolve_depth)
{
fs_src = m_config.is_unresolve ? depth_unresolver : depth_resolver;
m_write_aspect_mask = gl::image_aspect::depth;
}
else if (m_config.resolve_stencil)
{
fs_src = m_config.is_unresolve ? stencil_unresolver : stencil_resolver;
m_write_aspect_mask = gl::image_aspect::stencil;
}
enable_depth_writes = m_config.resolve_depth;
enable_stencil_writes = m_config.resolve_stencil;
create();
rsx_log.notice("Resolve shader:\n%s", fs_src);
}
void ds_resolve_pass_base::update_config()
{
ensure(multisampled && multisampled->samples() > 1);
switch (multisampled->samples())
{
case 2:
m_config.sample_count.x = 2;
m_config.sample_count.y = 1;
break;
case 4:
m_config.sample_count.x = m_config.sample_count.y = 2;
break;
default:
fmt::throw_exception("Unsupported sample count %d", multisampled->samples());
}
program_handle.uniforms["sample_count"] = m_config.sample_count;
}
void ds_resolve_pass_base::run(gl::command_context& cmd, gl::viewable_image* msaa_image, gl::viewable_image* resolve_image)
{
multisampled = msaa_image;
resolve = resolve_image;
update_config();
const auto read_resource = m_config.is_unresolve ? resolve_image : msaa_image;
const auto write_resource = m_config.is_unresolve ? msaa_image : resolve_image;
// Resource binding
std::stack<int> bind_slots;
std::vector<std::unique_ptr<saved_sampler_state>> saved_sampler_states;
auto allocate_slot = [&]() -> int
{
ensure(!bind_slots.empty());
const int slot = bind_slots.top();
bind_slots.pop();
saved_sampler_states.emplace_back(std::make_unique<gl::saved_sampler_state>(slot, m_sampler));
return slot;
};
// Reserve 2 slots max
bind_slots.push(GL_TEMP_IMAGE_SLOT(1));
bind_slots.push(GL_TEMP_IMAGE_SLOT(0));
if (m_config.resolve_depth)
{
const int bind_slot = allocate_slot();
cmd->bind_texture(bind_slot, static_cast<GLenum>(read_resource->get_target()), read_resource->id(), GL_TRUE);
}
if (m_config.resolve_stencil)
{
const int bind_slot = allocate_slot();
auto stencil_view = read_resource->get_view(rsx::default_remap_vector.with_encoding(gl::GL_REMAP_IDENTITY), gl::image_aspect::stencil);
cmd->bind_texture(bind_slot, static_cast<GLenum>(read_resource->get_target()), stencil_view->id(), GL_TRUE);
}
areau viewport{};
viewport.x2 = write_resource->width();
viewport.y2 = write_resource->height();
overlay_pass::run(cmd, viewport, write_resource->id(), m_write_aspect_mask, false);
}
void stencil_only_resolver_base::emit_geometry(gl::command_context& cmd)
{
// Modified version of the base overlay pass to emit 8 draws instead of 1
int old_vao;
glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &old_vao);
m_vao.bind();
// Clear the target
gl::clear_cmd_info clear_info
{
.aspect_mask = gl::image_aspect::stencil,
.clear_stencil = {
.mask = 0xFF,
.value = 0
}
};
gl::clear_attachments(cmd, clear_info);
// Override stencil settings. Always pass, reference is all one, compare mask doesn't matter.
// For each pass the write mask will be overriden to commit output bitwise
cmd->stencil_func(GL_ALWAYS, 0xFF, 0xFF);
cmd->stencil_op(GL_REPLACE, GL_REPLACE, GL_REPLACE);
// Start our inner loop
for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1)
{
program_handle.uniforms["stencil_mask"] = write_mask;
cmd->stencil_mask(write_mask);
glDrawArrays(primitives, 0, num_drawable_elements);
}
glBindVertexArray(old_vao);
}
}

View file

@ -0,0 +1,129 @@
#pragma once
#include "GLCompute.h"
#include "GLOverlays.h"
namespace gl
{
void resolve_image(gl::command_context& cmd, gl::viewable_image* dst, gl::viewable_image* src);
void unresolve_image(gl::command_context& cmd, gl::viewable_image* dst, gl::viewable_image* src);
void clear_resolve_helpers();
struct cs_resolve_base : compute_task
{
gl::viewable_image* multisampled = nullptr;
gl::viewable_image* resolve = nullptr;
bool is_unresolve = false;
u32 cs_wave_x = 1;
u32 cs_wave_y = 1;
cs_resolve_base()
{}
virtual ~cs_resolve_base()
{}
void build(const std::string& format_prefix, bool unresolve);
void bind_resources() override;
void run(gl::command_context& cmd, gl::viewable_image* msaa_image, gl::viewable_image* resolve_image);
};
struct cs_resolve_task : cs_resolve_base
{
cs_resolve_task(const std::string& format_prefix)
{
build(format_prefix, false);
}
};
struct cs_unresolve_task : cs_resolve_base
{
cs_unresolve_task(const std::string& format_prefix)
{
build(format_prefix, true);
}
};
struct ds_resolve_pass_base : overlay_pass
{
gl::viewable_image* multisampled = nullptr;
gl::viewable_image* resolve = nullptr;
struct
{
bool resolve_depth = false;
bool resolve_stencil = false;
bool is_unresolve = false;
color2i sample_count;
} m_config;
void build(bool depth, bool stencil, bool unresolve);
void update_config();
void run(gl::command_context& cmd, gl::viewable_image* msaa_image, gl::viewable_image* resolve_image);
};
struct depth_only_resolver : ds_resolve_pass_base
{
depth_only_resolver()
{
build(true, false, false);
}
};
struct depth_only_unresolver : ds_resolve_pass_base
{
depth_only_unresolver()
{
build(true, false, true);
}
};
struct stencil_only_resolver_base : ds_resolve_pass_base
{
virtual ~stencil_only_resolver_base() = default;
void build(bool is_unresolver)
{
ds_resolve_pass_base::build(false, true, is_unresolver);
}
void emit_geometry(gl::command_context& cmd) override;
};
struct stencil_only_resolver : stencil_only_resolver_base
{
stencil_only_resolver()
{
build(false);
}
};
struct stencil_only_unresolver : stencil_only_resolver_base
{
stencil_only_unresolver()
{
build(true);
}
};
struct depth_stencil_resolver : ds_resolve_pass_base
{
depth_stencil_resolver()
{
build(true, true, false);
}
};
struct depth_stencil_unresolver : ds_resolve_pass_base
{
depth_stencil_unresolver()
{
build(true, true, true);
}
};
}

View file

@ -429,7 +429,7 @@ namespace gl
image_region.height *= dst_region.depth;
scratch = std::make_unique<gl::texture>(
GL_TEXTURE_2D,
image_region.x + image_region.width, image_region.y + image_region.height, 1, 1,
image_region.x + image_region.width, image_region.y + image_region.height, 1, 1, 1,
static_cast<GLenum>(dst->get_internal_format()), dst->format_class());
scratch_view = std::make_unique<gl::nil_texture_view>(scratch.get());
@ -445,7 +445,7 @@ namespace gl
{
scratch = std::make_unique<gl::texture>(
GL_TEXTURE_2D,
image_region.x + image_region.width, 1, 1, 1,
image_region.x + image_region.width, 1, 1, 1, 1,
static_cast<GLenum>(dst->get_internal_format()), dst->format_class());
scratch_view = std::make_unique<gl::nil_texture_view>(scratch.get());
@ -576,7 +576,7 @@ namespace gl
const GLenum internal_format = get_sized_internal_format(gcm_format);
const auto format_class = rsx::classify_format(gcm_format);
return new gl::viewable_image(target, width, height, depth, mipmaps, internal_format, format_class);
return new gl::viewable_image(target, width, height, depth, mipmaps, 1, internal_format, format_class);
}
void fill_texture(gl::command_context& cmd, texture* dst, int format,

View file

@ -149,7 +149,7 @@ namespace gl
if (!dst)
{
std::unique_ptr<temporary_image_t> data = std::make_unique<temporary_image_t>(dst_target, width, height, depth, mipmaps, sized_internal_fmt, rsx::classify_format(gcm_format));
std::unique_ptr<temporary_image_t> data = std::make_unique<temporary_image_t>(dst_target, width, height, depth, mipmaps, 1, sized_internal_fmt, rsx::classify_format(gcm_format));
dst = data.get();
dst->properties_encoding = match_key;
m_temporary_surfaces.emplace_back(std::move(data));
@ -223,7 +223,12 @@ namespace gl
{
const auto src_bpp = slice.src->pitch() / slice.src->width();
const u16 convert_w = u16(slice.src->width() * src_bpp) / dst_bpp;
tmp = std::make_unique<texture>(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, static_cast<GLenum>(dst_image->get_internal_format()), dst_image->format_class());
tmp = std::make_unique<texture>(
GL_TEXTURE_2D,
convert_w, slice.src->height(),
1, 1, 1,
static_cast<GLenum>(dst_image->get_internal_format()),
dst_image->format_class());
src_image = tmp.get();
@ -264,9 +269,17 @@ namespace gl
const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h };
gl::texture* _dst = dst_image;
if (src_image->get_internal_format() != dst_image->get_internal_format() || slice.level != 0 || slice.dst_z != 0) [[ unlikely ]]
if (src_image->get_internal_format() != dst_image->get_internal_format() ||
slice.level != 0 ||
slice.dst_z != 0) [[ unlikely ]]
{
tmp = std::make_unique<texture>(GL_TEXTURE_2D, dst_rect.x2, dst_rect.y2, 1, 1, static_cast<GLenum>(slice.src->get_internal_format()));
tmp = std::make_unique<texture>(
GL_TEXTURE_2D,
dst_rect.x2, dst_rect.y2,
1, 1, 1,
static_cast<GLenum>(slice.src->get_internal_format()),
slice.src->format_class());
_dst = tmp.get();
}

View file

@ -262,28 +262,24 @@ namespace gl
baseclass::on_miss();
}
gl::texture* target_texture = vram_texture;
u32 transfer_width = width;
u32 transfer_height = height;
if (context == rsx::texture_upload_context::framebuffer_storage)
{
auto as_rtt = static_cast<gl::render_target*>(vram_texture);
if (as_rtt->dirty()) as_rtt->read_barrier(cmd);
auto surface = gl::as_rtt(vram_texture);
surface->memory_barrier(cmd, rsx::surface_access::transfer_read);
target_texture = surface->get_surface(rsx::surface_access::transfer_read);
transfer_width *= surface->samples_x;
transfer_height *= surface->samples_y;
}
gl::texture* target_texture = vram_texture;
if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) ||
(vram_texture->pitch() != rsx_pitch))
{
u32 real_width = width;
u32 real_height = height;
if (context == rsx::texture_upload_context::framebuffer_storage)
{
auto surface = gl::as_rtt(vram_texture);
real_width *= surface->samples_x;
real_height *= surface->samples_y;
}
areai src_area = { 0, 0, 0, 0 };
const areai dst_area = { 0, 0, static_cast<s32>(real_width), static_cast<s32>(real_height) };
const areai dst_area = { 0, 0, static_cast<s32>(transfer_width), static_cast<s32>(transfer_height) };
auto ifmt = vram_texture->get_internal_format();
src_area.x2 = vram_texture->width();
@ -294,22 +290,22 @@ namespace gl
if (scaled_texture)
{
auto sfmt = scaled_texture->get_internal_format();
if (scaled_texture->width() != real_width ||
scaled_texture->height() != real_height ||
if (scaled_texture->width() != transfer_width ||
scaled_texture->height() != transfer_height ||
sfmt != ifmt)
{
//Discard current scaled texture
// Discard current scaled texture
scaled_texture.reset();
}
}
if (!scaled_texture)
{
scaled_texture = std::make_unique<gl::texture>(GL_TEXTURE_2D, real_width, real_height, 1, 1, static_cast<GLenum>(ifmt));
scaled_texture = std::make_unique<gl::texture>(GL_TEXTURE_2D, transfer_width, transfer_height, 1, 1, 1, static_cast<GLenum>(ifmt), vram_texture->format_class());
}
const bool linear_interp = is_depth_texture() ? false : true;
g_hw_blitter->scale_image(cmd, vram_texture, scaled_texture.get(), src_area, dst_area, linear_interp, {});
g_hw_blitter->scale_image(cmd, target_texture, scaled_texture.get(), src_area, dst_area, linear_interp, {});
target_texture = scaled_texture.get();
}
}

View file

@ -73,7 +73,31 @@ void GLVertexDecompilerThread::insertConstants(std::stringstream& OS, const std:
continue;
}
OS << "uniform " << PT.type << " " << PI.name << ";\n";
auto type = PT.type;
if (PT.type == "sampler2D" ||
PT.type == "samplerCube" ||
PT.type == "sampler1D" ||
PT.type == "sampler3D")
{
if (m_prog.texture_state.multisampled_textures) [[ unlikely ]]
{
ensure(PI.name.length() > 3);
int index = atoi(&PI.name[3]);
if (m_prog.texture_state.multisampled_textures & (1 << index))
{
if (type != "sampler1D" && type != "sampler2D")
{
rsx_log.error("Unexpected multisampled sampler type '%s'", type);
}
type = "sampler2DMS";
}
}
}
OS << "uniform " << type << " " << PI.name << ";\n";
}
}
}

View file

@ -67,7 +67,7 @@ namespace gl
if (static_cast<gl::texture::internal_format>(internal_fmt) != src->get_internal_format())
{
const u16 internal_width = static_cast<u16>(src->width() * xfer_info.src_scaling_hint);
typeless_src = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, internal_fmt);
typeless_src = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, 1, internal_fmt, RSX_FORMAT_CLASS_DONT_CARE);
copy_typeless(cmd, typeless_src.get(), src);
real_src = typeless_src.get();
@ -85,7 +85,7 @@ namespace gl
if (static_cast<gl::texture::internal_format>(internal_fmt) != dst->get_internal_format())
{
const auto internal_width = static_cast<u16>(dst->width() * xfer_info.dst_scaling_hint);
typeless_dst = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, internal_fmt);
typeless_dst = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, 1, internal_fmt, RSX_FORMAT_CLASS_DONT_CARE);
copy_typeless(cmd, typeless_dst.get(), dst);
real_dst = typeless_dst.get();

View file

@ -33,7 +33,7 @@ namespace gl
void capabilities::initialize()
{
int find_count = 17;
int find_count = 18;
int ext_count = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
@ -171,6 +171,13 @@ namespace gl
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_shader_texture_image_samples"))
{
ARB_shader_texture_image_samples = true;
find_count--;
continue;
}
}
// Set GLSL version

View file

@ -40,6 +40,7 @@ namespace gl
bool ARB_compute_shader_supported = false;
bool NV_depth_buffer_float_supported = false;
bool NV_fragment_shader_barycentric_supported = false;
bool ARB_shader_texture_image_samples = false;
bool vendor_INTEL = false; // has broken GLSL compiler
bool vendor_AMD = false; // has broken ARB_multidraw

View file

@ -125,7 +125,9 @@ namespace gl
void operator = (const texture& rhs)
{
ensure(rhs.get_target() == texture::target::texture2D);
ensure(rhs.get_target() == texture::target::texture2D ||
rhs.get_target() == texture::target::texture2DMS);
m_parent.m_resource_bindings[m_id] = rhs.id();
DSA_CALL2(NamedFramebufferTexture, m_parent.id(), m_id, rhs.id(), 0);
}

View file

@ -19,8 +19,24 @@ namespace gl
}
}
texture::texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLenum sized_format, rsx::format_class format_class)
texture::texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLubyte samples, GLenum sized_format, rsx::format_class format_class)
{
// Upgrade targets for MSAA
if (samples > 1)
{
switch (target)
{
case GL_TEXTURE_2D:
target = GL_TEXTURE_2D_MULTISAMPLE;
break;
case GL_TEXTURE_2D_ARRAY:
target = GL_TEXTURE_2D_MULTISAMPLE_ARRAY;
break;
default:
fmt::throw_exception("MSAA is only supported on 2D images. Target=0x%x", target);
}
}
glGenTextures(1, &m_id);
// Must bind to initialize the new texture
@ -40,30 +56,45 @@ namespace gl
glTexStorage2D(target, mipmaps, storage_fmt, width, height);
depth = 1;
break;
case GL_TEXTURE_2D_MULTISAMPLE:
ensure(mipmaps == 1);
glTexStorage2DMultisample(target, samples, storage_fmt, width, height, GL_TRUE);
depth = 1;
break;
case GL_TEXTURE_3D:
case GL_TEXTURE_2D_ARRAY:
glTexStorage3D(target, mipmaps, storage_fmt, width, height, depth);
break;
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
ensure(mipmaps == 1);
glTexStorage3DMultisample(target, samples, storage_fmt, width, height, depth, GL_TRUE);
break;
case GL_TEXTURE_BUFFER:
break;
}
if (target != GL_TEXTURE_BUFFER)
{
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(target, GL_TEXTURE_WRAP_R, GL_REPEAT);
glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1);
if (samples == 1)
{
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(target, GL_TEXTURE_WRAP_R, GL_REPEAT);
glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1);
}
m_width = width;
m_height = height;
m_depth = depth;
m_mipmaps = mipmaps;
m_samples = samples;
m_aspect_flags = image_aspect::color;
ensure(width > 0 && height > 0 && depth > 0 && mipmaps > 0 && samples > 0, "Invalid OpenGL texture definition.");
switch (storage_fmt)
{
case GL_DEPTH_COMPONENT16:
@ -146,6 +177,8 @@ namespace gl
void texture::copy_from(const void* src, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings)
{
ensure(m_samples <= 1, "Transfer operations are unsupported on multisampled textures.");
pixel_settings.apply();
switch (const auto target_ = static_cast<GLenum>(m_target))
@ -190,6 +223,8 @@ namespace gl
void texture::copy_from(buffer& buf, u32 gl_format_type, u32 offset, u32 length)
{
ensure(m_samples <= 1, "Transfer operations are unsupported on multisampled textures.");
if (get_target() != target::textureBuffer)
fmt::throw_exception("OpenGL error: texture cannot copy from buffer");
@ -203,6 +238,8 @@ namespace gl
void texture::copy_to(void* dst, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const
{
ensure(m_samples <= 1, "Transfer operations are unsupported on multisampled textures.");
pixel_settings.apply();
const auto& caps = get_driver_caps();
@ -223,7 +260,7 @@ namespace gl
{
// Worst case scenario. For some reason, EXT_dsa does not have glGetTextureSubImage
const auto target_ = static_cast<GLenum>(m_target);
texture tmp{ target_, region.width, region.height, region.depth, 1, static_cast<GLenum>(m_internal_format) };
texture tmp{ target_, region.width, region.height, region.depth, 1, 1, static_cast<GLenum>(m_internal_format), m_format_class };
glCopyImageSubData(m_id, target_, level, region.x, region.y, region.z, tmp.id(), target_, 0, 0, 0, 0,
region.width, region.height, region.depth);

View file

@ -45,7 +45,8 @@ namespace gl
enum remap_constants : u32
{
GL_REMAP_IDENTITY = 0xCAFEBABE,
GL_REMAP_BGRA = 0x0000AA6C
GL_REMAP_BGRA = 0x0000AA6C,
GL_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF
};
struct subresource_range
@ -174,7 +175,8 @@ namespace gl
texture3D = GL_TEXTURE_3D,
textureCUBE = GL_TEXTURE_CUBE_MAP,
textureBuffer = GL_TEXTURE_BUFFER,
texture2DArray = GL_TEXTURE_2D_ARRAY
texture2DArray = GL_TEXTURE_2D_ARRAY,
texture2DMS = GL_TEXTURE_2D_MULTISAMPLE
};
protected:
@ -183,6 +185,7 @@ namespace gl
GLuint m_height = 0;
GLuint m_depth = 0;
GLuint m_mipmaps = 0;
GLubyte m_samples = 0;
GLuint m_pitch = 0;
GLuint m_compressed = GL_FALSE;
GLuint m_aspect_flags = 0;
@ -197,7 +200,7 @@ namespace gl
texture(const texture&) = delete;
texture(texture&& texture_) = delete;
texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLenum sized_format, rsx::format_class format_class = rsx::RSX_FORMAT_CLASS_UNDEFINED);
texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLubyte samples, GLenum sized_format, rsx::format_class format_class);
virtual ~texture();
// Getters/setters
@ -276,9 +279,9 @@ namespace gl
return m_pitch;
}
constexpr GLubyte samples() const
GLubyte samples() const
{
return 1;
return m_samples;
}
GLboolean compressed() const

View file

@ -309,6 +309,32 @@ namespace gl
}
}
void sample_mask(GLbitfield mask)
{
if (!test_and_set_property(GL_SAMPLE_MASK_VALUE, mask))
{
glSampleMaski(0, mask);
}
}
void sample_coverage(GLclampf coverage)
{
const u32 value = std::bit_cast<u32>(coverage);
if (!test_and_set_property(GL_SAMPLE_COVERAGE_VALUE, value))
{
glSampleCoverage(coverage, GL_FALSE);
}
}
void min_sample_shading_rate(GLclampf rate)
{
const u32 value = std::bit_cast<u32>(rate);
if (!test_and_set_property(GL_MIN_SAMPLE_SHADING_VALUE, value))
{
glMinSampleShading(rate);
}
}
void clip_planes(GLuint mask)
{
if (!test_and_set_property(CLIP_PLANES, mask))

View file

@ -177,7 +177,7 @@ namespace gl
{
return std::make_unique<gl::viewable_image>(
GL_TEXTURE_2D,
output_w, output_h, 1, 1,
output_w, output_h, 1, 1, 1,
GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
};

View file

@ -1,5 +1,7 @@
R"(
#version 420
#extension GL_ARB_separate_shader_objects: enable
layout(location=0) out vec2 tc0;
#ifdef VULKAN

View file

@ -0,0 +1,37 @@
R"(
#version 430
layout(local_size_x=%WORKGROUP_SIZE_X, local_size_y=%WORKGROUP_SIZE_Y, local_size_z=1) in;
#ifdef VULKAN
layout(set=0, binding=0, %IMAGE_FORMAT) uniform readonly restrict image2DMS multisampled;
layout(set=0, binding=1) uniform writeonly restrict image2D resolve;
#else
layout(binding=0, %IMAGE_FORMAT) uniform readonly restrict image2DMS multisampled;
layout(binding=1) uniform writeonly restrict image2D resolve;
#endif
#if %BGRA_SWAP
#define shuffle(x) (x.bgra)
#else
#define shuffle(x) (x)
#endif
void main()
{
ivec2 resolve_size = imageSize(resolve);
ivec2 aa_size = imageSize(multisampled);
ivec2 sample_count = resolve_size / aa_size;
if (any(greaterThanEqual(gl_GlobalInvocationID.xy, uvec2(resolve_size)))) return;
ivec2 resolve_coords = ivec2(gl_GlobalInvocationID.xy);
ivec2 aa_coords = resolve_coords / sample_count;
ivec2 sample_loc = ivec2(resolve_coords % sample_count);
int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);
vec4 aa_sample = imageLoad(multisampled, aa_coords, sample_index);
imageStore(resolve, resolve_coords, shuffle(aa_sample));
}
)"

View file

@ -0,0 +1,37 @@
R"(
#version 430
layout(local_size_x=%WORKGROUP_SIZE_X, local_size_y=%WORKGROUP_SIZE_Y, local_size_z=1) in;
#ifdef VULKAN
layout(set=0, binding=0) uniform writeonly restrict image2DMS multisampled;
layout(set=0, binding=1, %IMAGE_FORMAT) uniform readonly restrict image2D resolve;
#else
layout(binding=0) uniform writeonly restrict image2DMS multisampled;
layout(binding=1, %IMAGE_FORMAT) uniform readonly restrict image2D resolve;
#endif
#if %BGRA_SWAP
#define shuffle(x) (x.bgra)
#else
#define shuffle(x) (x)
#endif
void main()
{
ivec2 resolve_size = imageSize(resolve);
ivec2 aa_size = imageSize(multisampled);
ivec2 sample_count = resolve_size / aa_size;
if (any(greaterThanEqual(gl_GlobalInvocationID.xy, uvec2(resolve_size)))) return;
ivec2 resolve_coords = ivec2(gl_GlobalInvocationID.xy);
ivec2 aa_coords = resolve_coords / sample_count;
ivec2 sample_loc = ivec2(resolve_coords % sample_count);
int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);
vec4 resolved_sample = imageLoad(resolve, resolve_coords);
imageStore(multisampled, aa_coords, sample_index, shuffle(resolved_sample));
}
)"

View file

@ -0,0 +1,23 @@
R"(
#version 420
#extension GL_ARB_separate_shader_objects: enable
#ifdef VULKAN
layout(set=0, binding=0) uniform sampler2DMS fs0;
layout(push_constant) uniform static_data { ivec2 sample_count; };
#else
layout(binding=31) uniform sampler2DMS fs0;
uniform ivec2 sample_count;
#endif
void main()
{
ivec2 out_coord = ivec2(gl_FragCoord.xy);
ivec2 in_coord = (out_coord / sample_count.xy);
ivec2 sample_loc = out_coord % sample_count.xy;
int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);
float frag_depth = texelFetch(fs0, in_coord, sample_index).x;
gl_FragDepth = frag_depth;
}
)"

View file

@ -0,0 +1,28 @@
R"(
#version 420
#extension GL_ARB_separate_shader_objects: enable
#extension GL_ARB_shader_stencil_export : enable
#ifdef VULKAN
layout(set=0, binding=0) uniform sampler2DMS fs0;
layout(set=0, binding=1) uniform usampler2DMS fs1;
layout(push_constant) uniform static_data { ivec2 sample_count; };
#else
layout(binding=31) uniform sampler2DMS fs0;
layout(binding=30) uniform usampler2DMS fs1;
uniform ivec2 sample_count;
#endif
void main()
{
ivec2 out_coord = ivec2(gl_FragCoord.xy);
ivec2 in_coord = (out_coord / sample_count.xy);
ivec2 sample_loc = out_coord % ivec2(sample_count.xy);
int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);
float frag_depth = texelFetch(fs0, in_coord, sample_index).x;
uint frag_stencil = texelFetch(fs1, in_coord, sample_index).x;
gl_FragDepth = frag_depth;
gl_FragStencilRefARB = int(frag_stencil);
}
)"

View file

@ -0,0 +1,28 @@
R"(
#version 420
#extension GL_ARB_separate_shader_objects: enable
#extension GL_ARB_shader_stencil_export : enable
#ifdef VULKAN
layout(set=0, binding=0) uniform sampler2D fs0;
layout(set=0, binding=1) uniform usampler2D fs1;
layout(push_constant) uniform static_data { ivec2 sample_count; };
#else
layout(binding=31) uniform sampler2D fs0;
layout(binding=30) uniform usampler2D fs1;
uniform ivec2 sample_count;
#endif
void main()
{
ivec2 pixel_coord = ivec2(gl_FragCoord.xy);
pixel_coord *= sample_count.xy;
pixel_coord.x += (gl_SampleID % sample_count.x);
pixel_coord.y += (gl_SampleID / sample_count.x);
float frag_depth = texelFetch(fs0, pixel_coord, 0).x;
uint frag_stencil = texelFetch(fs1, pixel_coord, 0).x;
gl_FragDepth = frag_depth;
gl_FragStencilRefARB = int(frag_stencil);
}
)"

View file

@ -0,0 +1,23 @@
R"(
#version 420
#extension GL_ARB_separate_shader_objects: enable
#ifdef VULKAN
layout(set=0, binding=0) uniform sampler2D fs0;
layout(push_constant) uniform static_data { ivec2 sample_count; };
#else
layout(binding=31) uniform sampler2D fs0;
uniform ivec2 sample_count;
#endif
void main()
{
ivec2 pixel_coord = ivec2(gl_FragCoord.xy);
pixel_coord *= sample_count.xy;
pixel_coord.x += (gl_SampleID % sample_count.x);
pixel_coord.y += (gl_SampleID / sample_count.x);
float frag_depth = texelFetch(fs0, pixel_coord, 0).x;
gl_FragDepth = frag_depth;
}
)"

View file

@ -0,0 +1,28 @@
R"(
#version 420
#extension GL_ARB_separate_shader_objects: enable
#ifdef VULKAN
layout(set=0, binding=0) uniform usampler2DMS fs0;
layout(push_constant) uniform static_data
{
layout(offset=0) ivec2 sample_count;
layout(offset=8) int stencil_mask;
};
#else
layout(binding=31) uniform usampler2DMS fs0;
uniform ivec2 sample_count;
uniform int stencil_mask;
#endif
void main()
{
ivec2 out_coord = ivec2(gl_FragCoord.xy);
ivec2 in_coord = (out_coord / sample_count.xy);
ivec2 sample_loc = out_coord % sample_count.xy;
int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);
uint frag_stencil = texelFetch(fs0, in_coord, sample_index).x;
if ((frag_stencil & uint(stencil_mask)) == 0) discard;
}
)"

View file

@ -0,0 +1,28 @@
R"(
#version 420
#extension GL_ARB_separate_shader_objects: enable
#ifdef VULKAN
layout(set=0, binding=0) uniform usampler2D fs0;
layout(push_constant) uniform static_data
{
layout(offset=0) ivec2 sample_count;
layout(offset=8) int stencil_mask;
};
#else
layout(binding=31) uniform usampler2D fs0;
uniform ivec2 sample_count;
uniform int stencil_mask;
#endif
void main()
{
ivec2 pixel_coord = ivec2(gl_FragCoord.xy);
pixel_coord *= sample_count.xy;
pixel_coord.x += (gl_SampleID % sample_count.x);
pixel_coord.y += (gl_SampleID / sample_count.x);
uint frag_stencil = texelFetch(fs0, pixel_coord, 0).x;
if ((frag_stencil & uint(stencil_mask)) == 0) discard;
}
)"

View file

@ -1637,6 +1637,10 @@ namespace rsx
layout.aa_factors[0] = aa_factor_u;
layout.aa_factors[1] = aa_factor_v;
// Log this to frame stats
m_frame_stats.framebuffer_stats.add(layout.width, layout.height, aa_mode);
// Check if anything has changed
bool really_changed = false;
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)

View file

@ -87,6 +87,7 @@ namespace rsx
{
bool supports_multidraw; // Draw call batching
bool supports_hw_a2c; // Alpha to coverage
bool supports_hw_a2c_1spp; // Alpha to coverage at 1 sample per pixel
bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour
bool supports_hw_msaa; // MSAA support
bool supports_hw_a2one; // Alpha to one
@ -466,4 +467,9 @@ namespace rsx
{
return g_fxo->try_get<rsx::thread>();
}
inline const backend_configuration& get_renderer_backend_config()
{
return g_fxo->get<rsx::thread>().get_backend_config();
}
}

View file

@ -630,6 +630,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
{
backend_config.supports_hw_msaa = true;
backend_config.supports_hw_a2c = true;
backend_config.supports_hw_a2c_1spp = true;
backend_config.supports_hw_a2one = m_device->get_alpha_to_one_support();
}

View file

@ -31,7 +31,7 @@ namespace vk
bool g_drv_sanitize_fp_values = false;
bool g_drv_disable_fence_reset = false;
bool g_drv_emulate_cond_render = false;
bool g_drv_strict_query_scopes = false;
bool g_drv_strict_query_scopes = true;
bool g_drv_force_reuse_query_pools = false;
u64 g_num_processed_frames = 0;

View file

@ -47,9 +47,11 @@ namespace vk
void overlay_pass::init_descriptors()
{
rsx::simple_array<VkDescriptorPoolSize> descriptor_pool_sizes =
rsx::simple_array<VkDescriptorPoolSize> descriptor_pool_sizes = {};
if (m_num_uniform_buffers)
{
{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 }
descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_num_uniform_buffers });
};
if (m_num_usable_samplers)
@ -65,35 +67,38 @@ namespace vk
// Reserve descriptor pools
m_descriptor_pool.create(*m_device, descriptor_pool_sizes);
const auto num_bindings = 1 + m_num_usable_samplers + m_num_input_attachments;
const auto num_bindings = m_num_uniform_buffers + m_num_usable_samplers + m_num_input_attachments;
rsx::simple_array<VkDescriptorSetLayoutBinding> bindings(num_bindings);
u32 binding_slot = 0;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[0].binding = 0;
bindings[0].pImmutableSamplers = nullptr;
u32 descriptor_index = 1;
for (u32 n = 0; n < m_num_usable_samplers; ++n, ++descriptor_index)
for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding_slot)
{
bindings[descriptor_index].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[descriptor_index].descriptorCount = 1;
bindings[descriptor_index].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[descriptor_index].binding = descriptor_index;
bindings[descriptor_index].pImmutableSamplers = nullptr;
bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[binding_slot].descriptorCount = 1;
bindings[binding_slot].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[binding_slot].binding = binding_slot;
bindings[binding_slot].pImmutableSamplers = nullptr;
}
for (u32 n = 0; n < m_num_input_attachments; ++n, ++descriptor_index)
for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding_slot)
{
bindings[descriptor_index].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
bindings[descriptor_index].descriptorCount = 1;
bindings[descriptor_index].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[descriptor_index].binding = descriptor_index;
bindings[descriptor_index].pImmutableSamplers = nullptr;
bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[binding_slot].descriptorCount = 1;
bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[binding_slot].binding = binding_slot;
bindings[binding_slot].pImmutableSamplers = nullptr;
}
ensure(descriptor_index == num_bindings);
for (u32 n = 0; n < m_num_input_attachments; ++n, ++binding_slot)
{
bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
bindings[binding_slot].descriptorCount = 1;
bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[binding_slot].binding = binding_slot;
bindings[binding_slot].pImmutableSamplers = nullptr;
}
ensure(binding_slot == num_bindings);
m_descriptor_layout = vk::descriptors::create_layout(bindings);
VkPipelineLayoutCreateInfo layout_info = {};
@ -120,9 +125,14 @@ namespace vk
std::vector<vk::glsl::program_input> overlay_pass::get_fragment_inputs()
{
std::vector<vk::glsl::program_input> fs_inputs;
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, "static_data" });
u32 binding = 0;
for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding)
{
const std::string name = std::string("static_data") + (n > 0 ? std::to_string(n) : "");
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, name });
}
u32 binding = 1;
for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding)
{
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, binding, "fs" + std::to_string(n) });
@ -231,7 +241,10 @@ namespace vk
update_uniforms(cmd, program);
program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set);
if (m_num_uniform_buffers > 0)
{
program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set);
}
for (uint n = 0; n < src.size(); ++n)
{

View file

@ -52,6 +52,7 @@ namespace vk
VkFilter m_sampler_filter = VK_FILTER_LINEAR;
u32 m_num_usable_samplers = 1;
u32 m_num_input_attachments = 0;
u32 m_num_uniform_buffers = 1;
std::unordered_map<u64, std::unique_ptr<vk::glsl::program>> m_program_cache;
std::unique_ptr<vk::sampler> m_sampler;

View file

@ -831,6 +831,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
: 0;
rsx::overlays::set_debug_overlay_text(fmt::format(
"Internal Resolution: %s\n"
"RSX Load: %3d%%\n"
"draw calls: %17d\n"
"submits: %20d\n"
@ -845,6 +846,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
"Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)\n"
"Texture uploads: %12u (%u from CPU - %02u%%, %u copies avoided)\n"
"Vertex cache hits: %10u/%u (%u%%)",
info.stats.framebuffer_stats.to_string(!backend_config.supports_hw_msaa),
get_load(), info.stats.draw_calls, info.stats.submit_count, info.stats.setup_time, info.stats.vertex_upload_time,
info.stats.textures_upload_time, info.stats.draw_exec_time, info.stats.flip_time,
num_dirty_textures, texture_memory_size, tmp_texture_memory_size,

View file

@ -998,7 +998,7 @@ namespace vk
return;
}
// Memory transfers
// Memory transfers
vk::image* target_image = (samples() > 1) ? get_resolve_target_safe(cmd) : this;
vk::blitter hw_blitter;
const auto dst_bpp = get_bpp();

View file

@ -253,4 +253,90 @@ namespace vk
if (g_depthstencil_resolver) g_depthstencil_resolver->free_resources();
if (g_depthstencil_unresolver) g_depthstencil_unresolver->free_resources();
}
void cs_resolve_base::build(const std::string& format_prefix, bool unresolve, bool bgra_swap)
{
create();
switch (optimal_group_size)
{
default:
case 64:
cs_wave_x = 8;
cs_wave_y = 8;
break;
case 32:
cs_wave_x = 8;
cs_wave_y = 4;
break;
}
static const char* resolve_kernel =
#include "Emu/RSX/Program/MSAA/ColorResolvePass.glsl"
;
static const char* unresolve_kernel =
#include "Emu/RSX/Program/MSAA/ColorUnresolvePass.glsl"
;
const std::pair<std::string_view, std::string> syntax_replace[] =
{
{ "%WORKGROUP_SIZE_X", std::to_string(cs_wave_x) },
{ "%WORKGROUP_SIZE_Y", std::to_string(cs_wave_y) },
{ "%IMAGE_FORMAT", format_prefix },
{ "%BGRA_SWAP", bgra_swap ? "1" : "0" }
};
m_src = unresolve ? unresolve_kernel : resolve_kernel;
m_src = fmt::replace_all(m_src, syntax_replace);
rsx_log.notice("Compute shader:\n%s", m_src);
}
void depth_resolve_base::build(bool resolve_depth, bool resolve_stencil, bool is_unresolve)
{
vs_src =
#include "Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl"
;
static const char* depth_resolver =
#include "Emu/RSX/Program/MSAA/DepthResolvePass.glsl"
;
static const char* depth_unresolver =
#include "Emu/RSX/Program/MSAA/DepthUnresolvePass.glsl"
;
static const char* stencil_resolver =
#include "Emu/RSX/Program/MSAA/StencilResolvePass.glsl"
;
static const char* stencil_unresolver =
#include "Emu/RSX/Program/MSAA/StencilUnresolvePass.glsl"
;
static const char* depth_stencil_resolver =
#include "Emu/RSX/Program/MSAA/DepthStencilResolvePass.glsl"
;
static const char* depth_stencil_unresolver =
#include "Emu/RSX/Program/MSAA/DepthStencilUnresolvePass.glsl"
;
if (resolve_depth && resolve_stencil)
{
fs_src = is_unresolve ? depth_stencil_unresolver : depth_stencil_resolver;
}
else if (resolve_depth)
{
fs_src = is_unresolve ? depth_unresolver : depth_resolver;
}
else if (resolve_stencil)
{
fs_src = is_unresolve ? stencil_unresolver : stencil_resolver;
}
rsx_log.notice("Resolve shader:\n%s", fs_src);
}
}

View file

@ -21,70 +21,7 @@ namespace vk
virtual ~cs_resolve_base()
{}
// FIXME: move body to cpp
void build(const std::string& kernel, const std::string& format_prefix, int direction)
{
create();
// TODO: Tweak occupancy
switch (optimal_group_size)
{
default:
case 64:
cs_wave_x = 8;
cs_wave_y = 8;
break;
case 32:
cs_wave_x = 8;
cs_wave_y = 4;
break;
}
const std::pair<std::string_view, std::string> syntax_replace[] =
{
{ "%wx", std::to_string(cs_wave_x) },
{ "%wy", std::to_string(cs_wave_y) },
};
m_src =
"#version 430\n"
"layout(local_size_x=%wx, local_size_y=%wy, local_size_z=1) in;\n"
"\n";
m_src = fmt::replace_all(m_src, syntax_replace);
if (direction == 0)
{
m_src +=
"layout(set=0, binding=0, " + format_prefix + ") uniform readonly restrict image2DMS multisampled;\n"
"layout(set=0, binding=1) uniform writeonly restrict image2D resolve;\n";
}
else
{
m_src +=
"layout(set=0, binding=0) uniform writeonly restrict image2DMS multisampled;\n"
"layout(set=0, binding=1, " + format_prefix + ") uniform readonly restrict image2D resolve;\n";
}
m_src +=
"\n"
"void main()\n"
"{\n"
" ivec2 resolve_size = imageSize(resolve);\n"
" ivec2 aa_size = imageSize(multisampled);\n"
" ivec2 sample_count = resolve_size / aa_size;\n"
"\n"
" if (any(greaterThanEqual(gl_GlobalInvocationID.xy, uvec2(resolve_size)))) return;"
"\n"
" ivec2 resolve_coords = ivec2(gl_GlobalInvocationID.xy);\n"
" ivec2 aa_coords = resolve_coords / sample_count;\n"
" ivec2 sample_loc = ivec2(resolve_coords % sample_count);\n"
" int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);\n"
+ kernel +
"}\n";
rsx_log.notice("Compute shader:\n%s", m_src);
}
void build(const std::string& format_prefix, bool unresolve, bool bgra_swap);
std::vector<std::pair<VkDescriptorType, u8>> get_descriptor_layout() override
{
@ -144,14 +81,8 @@ namespace vk
{
cs_resolve_task(const std::string& format_prefix, bool bgra_swap = false)
{
// Allow rgba->bgra transformation for old GeForce cards
const std::string swizzle = bgra_swap? ".bgra" : "";
std::string kernel =
" vec4 aa_sample = imageLoad(multisampled, aa_coords, sample_index);\n"
" imageStore(resolve, resolve_coords, aa_sample" + swizzle + ");\n";
build(kernel, format_prefix, 0);
// BGRA-swap flag is a workaround to swap channels for old GeForce cards with broken compute image handling
build(format_prefix, false, bgra_swap);
}
};
@ -159,14 +90,8 @@ namespace vk
{
cs_unresolve_task(const std::string& format_prefix, bool bgra_swap = false)
{
// Allow rgba->bgra transformation for old GeForce cards
const std::string swizzle = bgra_swap? ".bgra" : "";
std::string kernel =
" vec4 resolved_sample = imageLoad(resolve, resolve_coords);\n"
" imageStore(multisampled, aa_coords, sample_index, resolved_sample" + swizzle + ");\n";
build(kernel, format_prefix, 1);
// BGRA-swap flag is a workaround to swap channels for old GeForce cards with broken compute image handling
build(format_prefix, true, bgra_swap);
}
};
@ -184,43 +109,12 @@ namespace vk
// Depth-stencil buffers are almost never filterable, and we do not need it here (1:1 mapping)
m_sampler_filter = VK_FILTER_NEAREST;
// Do not use UBOs
m_num_uniform_buffers = 0;
}
void build(const std::string& kernel, const std::string& extensions, const std::vector<const char*>& inputs)
{
vs_src =
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n\n"
"\n"
"void main()\n"
"{\n"
" vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n"
" gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n"
"}\n";
fs_src =
"#version 420\n"
"#extension GL_ARB_separate_shader_objects : enable\n";
fs_src += extensions +
"\n"
"layout(push_constant) uniform static_data{ ivec" + std::to_string(static_parameters_width) + " regs[1]; };\n";
int binding = 1;
for (const auto& input : inputs)
{
fs_src += "layout(set=0, binding=" + std::to_string(binding++) + ") uniform " + input + ";\n";
}
fs_src +=
"//layout(pixel_center_integer) in vec4 gl_FragCoord;\n"
"\n"
"void main()\n"
"{\n";
fs_src += kernel +
"}\n";
rsx_log.notice("Resolve shader:\n%s", fs_src);
}
void build(bool resolve_depth, bool resolve_stencil, bool unresolve);
std::vector<VkPushConstantRange> get_push_constants() override
{
@ -263,15 +157,7 @@ namespace vk
{
depthonly_resolve()
{
build(
" ivec2 out_coord = ivec2(gl_FragCoord.xy);\n"
" ivec2 in_coord = (out_coord / regs[0].xy);\n"
" ivec2 sample_loc = out_coord % regs[0].xy;\n"
" int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n"
" float frag_depth = texelFetch(fs0, in_coord, sample_index).x;\n"
" gl_FragDepth = frag_depth;\n",
"",
{ "sampler2DMS fs0" });
build(true, false, false);
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
@ -291,15 +177,7 @@ namespace vk
{
depthonly_unresolve()
{
build(
" ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n"
" pixel_coord *= regs[0].xy;\n"
" pixel_coord.x += (gl_SampleID % regs[0].x);\n"
" pixel_coord.y += (gl_SampleID / regs[0].x);\n"
" float frag_depth = texelFetch(fs0, pixel_coord, 0).x;\n"
" gl_FragDepth = frag_depth;\n",
"",
{ "sampler2D fs0" });
build(true, false, true);
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
@ -340,15 +218,7 @@ namespace vk
static_parameters_width = 3;
build(
" ivec2 out_coord = ivec2(gl_FragCoord.xy);\n"
" ivec2 in_coord = (out_coord / regs[0].xy);\n"
" ivec2 sample_loc = out_coord % regs[0].xy;\n"
" int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n"
" uint frag_stencil = texelFetch(fs0, in_coord, sample_index).x;\n"
" if ((frag_stencil & uint(regs[0].z)) == 0) discard;\n",
"",
{"usampler2DMS fs0"});
build(false, true, false);
}
void get_dynamic_state_entries(std::vector<VkDynamicState>& state_descriptors) override
@ -407,15 +277,7 @@ namespace vk
static_parameters_width = 3;
build(
" ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n"
" pixel_coord *= regs[0].xy;\n"
" pixel_coord.x += (gl_SampleID % regs[0].x);\n"
" pixel_coord.y += (gl_SampleID / regs[0].x);\n"
" uint frag_stencil = texelFetch(fs0, pixel_coord, 0).x;\n"
" if ((frag_stencil & uint(regs[0].z)) == 0) discard;\n",
"",
{ "usampler2D fs0" });
build(false, true, false);
}
void get_dynamic_state_entries(std::vector<VkDynamicState>& state_descriptors) override
@ -468,19 +330,7 @@ namespace vk
renderpass_config.set_stencil_mask(0xFF);
m_num_usable_samplers = 2;
build(
" ivec2 out_coord = ivec2(gl_FragCoord.xy);\n"
" ivec2 in_coord = (out_coord / regs[0].xy);\n"
" ivec2 sample_loc = out_coord % ivec2(regs[0].xy);\n"
" int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n"
" float frag_depth = texelFetch(fs0, in_coord, sample_index).x;\n"
" uint frag_stencil = texelFetch(fs1, in_coord, sample_index).x;\n"
" gl_FragDepth = frag_depth;\n"
" gl_FragStencilRefARB = int(frag_stencil);\n",
"#extension GL_ARB_shader_stencil_export : enable\n",
{ "sampler2DMS fs0", "usampler2DMS fs1" });
build(true, true, false);
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
@ -510,19 +360,7 @@ namespace vk
renderpass_config.set_stencil_mask(0xFF);
m_num_usable_samplers = 2;
build(
" ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n"
" pixel_coord *= regs[0].xy;\n"
" pixel_coord.x += (gl_SampleID % regs[0].x);\n"
" pixel_coord.y += (gl_SampleID / regs[0].x);\n"
" float frag_depth = texelFetch(fs0, pixel_coord, 0).x;\n"
" uint frag_stencil = texelFetch(fs1, pixel_coord, 0).x;\n"
" gl_FragDepth = frag_depth;\n"
" gl_FragStencilRefARB = int(frag_stencil);\n",
"#extension GL_ARB_shader_stencil_export : enable\n",
{ "sampler2D fs0", "usampler2D fs1" });
build(true, true, true);
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)

View file

@ -752,7 +752,25 @@ namespace vk
device_fault_info.pNext = const_cast<void*>(device.pNext);
device_fault_info.deviceFault = VK_TRUE;
device_fault_info.deviceFaultVendorBinary = VK_FALSE;
device_fault_info.pNext = &device_fault_info;
device.pNext = &device_fault_info;
}
VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_info{};
if (pgpu->optional_features_support.conditional_rendering)
{
conditional_rendering_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT;
conditional_rendering_info.pNext = const_cast<void*>(device.pNext);
conditional_rendering_info.conditionalRendering = VK_TRUE;
device.pNext = &conditional_rendering_info;
}
VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR shader_barycentric_info{};
if (pgpu->optional_features_support.barycentric_coords)
{
shader_barycentric_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR;
shader_barycentric_info.pNext = const_cast<void*>(device.pNext);
shader_barycentric_info.fragmentShaderBarycentric = VK_TRUE;
device.pNext = &shader_barycentric_info;
}
if (auto error = vkCreateDevice(*pgpu, &device, nullptr, &dev))

View file

@ -29,37 +29,24 @@ namespace vk
std::vector<u8> vendor_binary_data;
std::string fault_description;
#ifdef _MSC_VER
__try
// Retrieve sizes
g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, nullptr);
// Resize arrays and fill
address_info.resize(fault_counts.addressInfoCount);
vendor_info.resize(fault_counts.vendorInfoCount);
vendor_binary_data.resize(fault_counts.vendorBinarySize);
VkDeviceFaultInfoEXT fault_info
{
#endif
// Retrieve sizes
g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, nullptr);
// Resize arrays and fill
address_info.resize(fault_counts.addressInfoCount);
vendor_info.resize(fault_counts.vendorInfoCount);
vendor_binary_data.resize(fault_counts.vendorBinarySize);
VkDeviceFaultInfoEXT fault_info
{
.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT,
.pAddressInfos = address_info.data(),
.pVendorInfos = vendor_info.data(),
.pVendorBinaryData = vendor_binary_data.data()
};
g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, &fault_info);
fault_description = fault_info.description;
#ifdef _MSC_VER
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
rsx_log.error("Driver crashed retrieving extended crash information. Are you running on an NVIDIA card?");
return "Extended fault information is not available. The driver crashed when retrieving the details.";
}
#endif
.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT,
.pAddressInfos = address_info.data(),
.pVendorInfos = vendor_info.data(),
.pVendorBinaryData = vendor_binary_data.data()
};
g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, &fault_info);
fault_description = fault_info.description;
std::string fault_message = fmt::format(
"Device Fault Information:\n"
"Fault Summary:\n"

View file

@ -60,6 +60,7 @@
<ClInclude Include="Emu\RSX\GL\GLGSRender.h" />
<ClInclude Include="Emu\RSX\GL\GLProcTable.h" />
<ClInclude Include="Emu\RSX\GL\GLProgramBuffer.h" />
<ClInclude Include="Emu\RSX\GL\GLResolveHelper.h" />
<ClInclude Include="Emu\RSX\GL\glutils\blitter.h" />
<ClInclude Include="Emu\RSX\GL\glutils\buffer_object.h" />
<ClInclude Include="Emu\RSX\GL\glutils\capabilities.h" />
@ -95,6 +96,7 @@
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLOverlays.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPipelineCompiler.cpp" />
<ClCompile Include="Emu\RSX\GL\GLResolveHelper.cpp" />
<ClCompile Include="Emu\RSX\GL\glutils\blitter.cpp" />
<ClCompile Include="Emu\RSX\GL\glutils\buffer_object.cpp" />
<ClCompile Include="Emu\RSX\GL\glutils\capabilities.cpp" />

View file

@ -48,6 +48,7 @@
<Filter>upscalers\fsr1</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\GL\GLDMA.cpp" />
<ClCompile Include="Emu\RSX\GL\GLResolveHelper.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />
@ -120,6 +121,7 @@
<Filter>upscalers</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\GL\GLDMA.h" />
<ClInclude Include="Emu\RSX\GL\GLResolveHelper.h" />
</ItemGroup>
<ItemGroup>
<Filter Include="glutils">

View file

@ -112,6 +112,7 @@
<ClCompile Include="Emu\perf_monitor.cpp" />
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXDisplay.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXDrawCommands.cpp" />
<ClCompile Include="Emu\RSX\Host\MM.cpp" />
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp" />
@ -1030,6 +1031,14 @@
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXVertexPrologue.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\ShuffleBytes.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\VideoOutCalibrationPass.glsl" />
<None Include="Emu\RSX\Program\MSAA\ColorResolvePass.glsl" />
<None Include="Emu\RSX\Program\MSAA\ColorUnresolvePass.glsl" />
<None Include="Emu\RSX\Program\MSAA\DepthResolvePass.glsl" />
<None Include="Emu\RSX\Program\MSAA\DepthStencilResolvePass.glsl" />
<None Include="Emu\RSX\Program\MSAA\DepthStencilUnresolvePass.glsl" />
<None Include="Emu\RSX\Program\MSAA\DepthUnresolvePass.glsl" />
<None Include="Emu\RSX\Program\MSAA\StencilResolvePass.glsl" />
<None Include="Emu\RSX\Program\MSAA\StencilUnresolvePass.glsl" />
<None Include="Emu\RSX\Program\Upscalers\FSR1\fsr_ffx_a_flattened.inc" />
<None Include="Emu\RSX\Program\Upscalers\FSR1\fsr_ffx_fsr1_flattened.inc" />
<None Include="Emu\RSX\Program\Upscalers\FSR1\fsr_ubershader.glsl" />

View file

@ -130,6 +130,9 @@
<Filter Include="Emu\GPU\RSX\Overlays\Trophies">
<UniqueIdentifier>{caf84300-5c45-4340-bd9a-8ac859409351}</UniqueIdentifier>
</Filter>
<Filter Include="Emu\GPU\RSX\Program\MSAA">
<UniqueIdentifier>{ce6d6b90-8313-4273-b46c-d92bd450c002}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Crypto\aes.cpp">
@ -1219,6 +1222,9 @@
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\Core\RSXDisplay.cpp">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClCompile>
<ClCompile Include="Crypto\unzip.cpp">
<Filter>Crypto</Filter>
</ClCompile>
@ -2802,5 +2808,29 @@
<None Include="Emu\CPU\Backends\AArch64\AArch64Signal.cpp">
<Filter>Emu\CPU\Backends\AArch64</Filter>
</None>
<None Include="Emu\RSX\Program\MSAA\ColorResolvePass.glsl">
<Filter>Emu\GPU\RSX\Program\MSAA</Filter>
</None>
<None Include="Emu\RSX\Program\MSAA\ColorUnresolvePass.glsl">
<Filter>Emu\GPU\RSX\Program\MSAA</Filter>
</None>
<None Include="Emu\RSX\Program\MSAA\DepthResolvePass.glsl">
<Filter>Emu\GPU\RSX\Program\MSAA</Filter>
</None>
<None Include="Emu\RSX\Program\MSAA\DepthStencilResolvePass.glsl">
<Filter>Emu\GPU\RSX\Program\MSAA</Filter>
</None>
<None Include="Emu\RSX\Program\MSAA\DepthStencilUnresolvePass.glsl">
<Filter>Emu\GPU\RSX\Program\MSAA</Filter>
</None>
<None Include="Emu\RSX\Program\MSAA\DepthUnresolvePass.glsl">
<Filter>Emu\GPU\RSX\Program\MSAA</Filter>
</None>
<None Include="Emu\RSX\Program\MSAA\StencilResolvePass.glsl">
<Filter>Emu\GPU\RSX\Program\MSAA</Filter>
</None>
<None Include="Emu\RSX\Program\MSAA\StencilUnresolvePass.glsl">
<Filter>Emu\GPU\RSX\Program\MSAA</Filter>
</None>
</ItemGroup>
</Project>

View file

@ -98,7 +98,7 @@ render_creator::render_creator(QObject *parent) : QObject(parent)
#endif
// Graphics Adapter
Vulkan = render_info(vulkan_adapters, supports_vulkan, emu_settings_type::VulkanAdapter, true);
Vulkan = render_info(vulkan_adapters, supports_vulkan, emu_settings_type::VulkanAdapter);
OpenGL = render_info();
NullRender = render_info();

View file

@ -23,16 +23,14 @@ public:
emu_settings_type type = emu_settings_type::VulkanAdapter;
bool supported = true;
bool has_adapters = true;
bool has_msaa = false;
render_info()
: has_adapters(false) {}
render_info(QStringList adapters, bool supported, emu_settings_type type, bool has_msaa)
render_info(QStringList adapters, bool supported, emu_settings_type type)
: adapters(std::move(adapters))
, type(type)
, supported(supported)
, has_msaa(has_msaa) {}
, supported(supported) {}
};
bool abort_requested = false;

View file

@ -838,12 +838,6 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
}
}
// Enable/disable MSAA depending on renderer
ui->antiAliasing->setEnabled(renderer.has_msaa);
ui->antiAliasing->blockSignals(true);
ui->antiAliasing->setCurrentText(renderer.has_msaa ? qstr(m_emu_settings->GetSetting(emu_settings_type::MSAA)) : tr("Disabled", "MSAA"));
ui->antiAliasing->blockSignals(false);
ui->graphicsAdapterBox->clear();
// Fill combobox with placeholder if no adapters needed
@ -1070,7 +1064,7 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
get_audio_output_devices(false);
change_audio_output_device(0); // Set device to 'Default'
});
m_emu_settings->EnhanceComboBox(ui->combo_audio_channel_layout, emu_settings_type::AudioChannelLayout);
SubscribeTooltip(ui->gb_audio_channel_layout, tooltips.settings.audio_channel_layout);
@ -1512,7 +1506,7 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
m_emu_settings->SetSetting(emu_settings_type::PSNCountry, country_code.toString().toStdString());
});
SubscribeTooltip(ui->gb_psnCountryBox, tooltips.settings.psn_country);
if (!game)