diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index f5baa5f068..eaefb3f048 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -478,6 +478,7 @@ target_sources(rpcs3_emu PRIVATE RSX/Common/TextureUtils.cpp RSX/Common/texture_cache.cpp RSX/Core/RSXContext.cpp + RSX/Core/RSXDisplay.cpp RSX/Core/RSXDrawCommands.cpp RSX/gcm_enums.cpp RSX/gcm_printing.cpp @@ -492,6 +493,7 @@ target_sources(rpcs3_emu PRIVATE RSX/GL/GLPipelineCompiler.cpp RSX/GL/GLPresent.cpp RSX/GL/GLRenderTargets.cpp + RSX/GL/GLResolveHelper.cpp RSX/GL/GLShaderInterpreter.cpp RSX/GL/GLTexture.cpp RSX/GL/GLTextureCache.cpp diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index 3fef5240dd..a936eb6853 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -3102,1962 +3102,3 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size()); return true; } - -// Temporarily -#ifndef _MSC_VER -#pragma GCC diagnostic ignored "-Wunused-parameter" -#endif - -void ppu_acontext::UNK(ppu_opcode_t op) -{ - std::fill_n(gpr, 32, spec_gpr{}); - ppu_log.error("Unknown/Illegal opcode: 0x%08x at 0x%x", op.opcode, cia); -} - -void ppu_acontext::MFVSCR(ppu_opcode_t op) -{ -} - -void ppu_acontext::MTVSCR(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDCUW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDSBS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDSHS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDSWS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDUBM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDUBS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDUHM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDUHS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDUWM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VADDUWS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VAND(ppu_opcode_t op) -{ -} - -void ppu_acontext::VANDC(ppu_opcode_t op) -{ -} - -void ppu_acontext::VAVGSB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VAVGSH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VAVGSW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VAVGUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VAVGUH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VAVGUW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCFSX(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCFUX(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPBFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPEQFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPEQUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPEQUH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPEQUW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPGEFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPGTFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPGTSB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPGTSH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPGTSW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPGTUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPGTUH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCMPGTUW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCTSXS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VCTUXS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VEXPTEFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VLOGEFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMADDFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMAXFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMAXSB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMAXSH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMAXSW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMAXUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMAXUH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMAXUW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMHADDSHS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMHRADDSHS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMINFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMINSB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMINSH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMINSW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMINUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMINUH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMINUW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMLADDUHM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMRGHB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMRGHH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMRGHW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMRGLB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMRGLH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMRGLW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMSUMMBM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMSUMSHM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMSUMSHS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMSUMUBM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMSUMUHM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMSUMUHS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMULESB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMULESH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMULEUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMULEUH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMULOSB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMULOSH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMULOUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VMULOUH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VNMSUBFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VNOR(ppu_opcode_t op) -{ -} - -void ppu_acontext::VOR(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPERM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPKPX(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPKSHSS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPKSHUS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPKSWSS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPKSWUS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPKUHUM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPKUHUS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPKUWUM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VPKUWUS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VREFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VRFIM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VRFIN(ppu_opcode_t op) -{ -} - -void ppu_acontext::VRFIP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VRFIZ(ppu_opcode_t op) -{ -} - -void ppu_acontext::VRLB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VRLH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VRLW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VRSQRTEFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSEL(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSL(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSLB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSLDOI(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSLH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSLO(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSLW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSPLTB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSPLTH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSPLTISB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSPLTISH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSPLTISW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSPLTW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSR(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSRAB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSRAH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSRAW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSRB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSRH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSRO(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSRW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBCUW(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBFP(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBSBS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBSHS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBSWS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBUBM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBUBS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBUHM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBUHS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBUWM(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUBUWS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUMSWS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUM2SWS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUM4SBS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUM4SHS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VSUM4UBS(ppu_opcode_t op) -{ -} - -void ppu_acontext::VUPKHPX(ppu_opcode_t op) -{ -} - -void ppu_acontext::VUPKHSB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VUPKHSH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VUPKLPX(ppu_opcode_t op) -{ -} - -void ppu_acontext::VUPKLSB(ppu_opcode_t op) -{ -} - -void ppu_acontext::VUPKLSH(ppu_opcode_t op) -{ -} - -void ppu_acontext::VXOR(ppu_opcode_t op) -{ -} - -void ppu_acontext::TDI(ppu_opcode_t op) -{ -} - -void ppu_acontext::TWI(ppu_opcode_t op) -{ -} - -void ppu_acontext::MULLI(ppu_opcode_t op) -{ - const s64 amin = gpr[op.ra].imin; - const s64 amax = gpr[op.ra].imax; - - // Undef or mixed range (default) - s64 min = 0; - s64 max = -1; - - // Special cases like powers of 2 and their negations are not handled - if (amin <= amax) - { - min = amin * op.simm16; - max = amax * op.simm16; - - // Check overflow - if (min >> 63 != utils::mulh64(amin, op.simm16) || max >> 63 != utils::mulh64(amax, op.simm16)) - { - min = 0; - max = -1; - } - else if (min > max) - { - std::swap(min, max); - } - } - - gpr[op.rd] = spec_gpr::range(min, max, gpr[op.ra].tz() + std::countr_zero(op.simm16)); -} - -void ppu_acontext::SUBFIC(ppu_opcode_t op) -{ - gpr[op.rd] = ~gpr[op.ra] + spec_gpr::fixed(op.simm16) + spec_gpr::fixed(1); -} - -void ppu_acontext::CMPLI(ppu_opcode_t op) -{ -} - -void ppu_acontext::CMPI(ppu_opcode_t op) -{ -} - -void ppu_acontext::ADDIC(ppu_opcode_t op) -{ - gpr[op.rd] = gpr[op.ra] + spec_gpr::fixed(op.simm16); -} - -void ppu_acontext::ADDI(ppu_opcode_t op) -{ - gpr[op.rd] = op.ra ? gpr[op.ra] + spec_gpr::fixed(op.simm16) : spec_gpr::fixed(op.simm16); -} - -void ppu_acontext::ADDIS(ppu_opcode_t op) -{ - gpr[op.rd] = op.ra ? gpr[op.ra] + spec_gpr::fixed(op.simm16 * 65536) : spec_gpr::fixed(op.simm16 * 65536); -} - -void ppu_acontext::BC(ppu_opcode_t op) -{ -} - -void ppu_acontext::SC(ppu_opcode_t op) -{ -} - -void ppu_acontext::B(ppu_opcode_t op) -{ -} - -void ppu_acontext::MCRF(ppu_opcode_t op) -{ -} - -void ppu_acontext::BCLR(ppu_opcode_t op) -{ -} - -void ppu_acontext::CRNOR(ppu_opcode_t op) -{ -} - -void ppu_acontext::CRANDC(ppu_opcode_t op) -{ -} - -void ppu_acontext::ISYNC(ppu_opcode_t op) -{ -} - -void ppu_acontext::CRXOR(ppu_opcode_t op) -{ -} - -void ppu_acontext::CRNAND(ppu_opcode_t op) -{ -} - -void ppu_acontext::CRAND(ppu_opcode_t op) -{ -} - -void ppu_acontext::CREQV(ppu_opcode_t op) -{ -} - -void ppu_acontext::CRORC(ppu_opcode_t op) -{ -} - -void ppu_acontext::CROR(ppu_opcode_t op) -{ -} - -void ppu_acontext::BCCTR(ppu_opcode_t op) -{ -} - -void ppu_acontext::RLWIMI(ppu_opcode_t op) -{ - const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32); - - u64 min = gpr[op.rs].bmin; - u64 max = gpr[op.rs].bmax; - - if (op.mb32 <= op.me32) - { - // 32-bit op, including mnemonics: INSLWI, INSRWI (TODO) - min = utils::rol32(static_cast(min), op.sh32) & mask; - max = utils::rol32(static_cast(max), op.sh32) & mask; - } - else - { - // Full 64-bit op with duplication - min = utils::rol64(static_cast(min) | min << 32, op.sh32) & mask; - max = utils::rol64(static_cast(max) | max << 32, op.sh32) & mask; - } - - if (mask != umax) - { - // Insertion - min |= gpr[op.ra].bmin & ~mask; - max |= gpr[op.ra].bmax & ~mask; - } - - gpr[op.rs] = spec_gpr::approx(min, max); -} - -void ppu_acontext::RLWINM(ppu_opcode_t op) -{ - const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32); - - u64 min = gpr[op.rs].bmin; - u64 max = gpr[op.rs].bmax; - - if (op.mb32 <= op.me32) - { - if (op.sh32 == 0) - { - // CLRLWI, CLRRWI mnemonics - gpr[op.ra] = gpr[op.ra] & spec_gpr::fixed(mask); - return; - } - else if (op.mb32 == 0 && op.me32 == 31) - { - // ROTLWI, ROTRWI mnemonics - } - else if (op.mb32 == 0 && op.sh32 == 31 - op.me32) - { - // SLWI mnemonic - } - else if (op.me32 == 31 && op.sh32 == 32 - op.mb32) - { - // SRWI mnemonic - } - else if (op.mb32 == 0 && op.sh32 < 31 - op.me32) - { - // EXTLWI and other possible mnemonics - } - else if (op.me32 == 31 && 32 - op.sh32 < op.mb32) - { - // EXTRWI and other possible mnemonics - } - - min = utils::rol32(static_cast(min), op.sh32) & mask; - max = utils::rol32(static_cast(max), op.sh32) & mask; - } - else - { - // Full 64-bit op with duplication - min = utils::rol64(static_cast(min) | min << 32, op.sh32) & mask; - max = utils::rol64(static_cast(max) | max << 32, op.sh32) & mask; - } - - gpr[op.ra] = spec_gpr::approx(min, max); -} - -void ppu_acontext::RLWNM(ppu_opcode_t op) -{ - const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32); - - u64 min = gpr[op.rs].bmin; - u64 max = gpr[op.rs].bmax; - - if (op.mb32 <= op.me32) - { - if (op.mb32 == 0 && op.me32 == 31) - { - // ROTLW mnemonic - } - - // TODO - min = 0; - max = mask; - } - else - { - // Full 64-bit op with duplication - min = 0; - max = mask; - } - - gpr[op.ra] = spec_gpr::approx(min, max); -} - -void ppu_acontext::ORI(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] | spec_gpr::fixed(op.uimm16); -} - -void ppu_acontext::ORIS(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] | spec_gpr::fixed(op.uimm16 << 16); -} - -void ppu_acontext::XORI(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] ^ spec_gpr::fixed(op.uimm16); -} - -void ppu_acontext::XORIS(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] ^ spec_gpr::fixed(op.uimm16 << 16); -} - -void ppu_acontext::ANDI(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] & spec_gpr::fixed(op.uimm16); -} - -void ppu_acontext::ANDIS(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] & spec_gpr::fixed(op.uimm16 << 16); -} - -void ppu_acontext::RLDICL(ppu_opcode_t op) -{ - const u32 sh = op.sh64; - const u32 mb = op.mbe64; - const u64 mask = ~0ull >> mb; - - u64 min = gpr[op.rs].bmin; - u64 max = gpr[op.rs].bmax; - - if (64 - sh < mb) - { - // EXTRDI mnemonic - } - else if (64 - sh == mb) - { - // SRDI mnemonic - } - else if (sh == 0) - { - // CLRLDI mnemonic - gpr[op.ra] = gpr[op.rs] & spec_gpr::fixed(mask); - return; - } - - min = utils::rol64(min, sh) & mask; - max = utils::rol64(max, sh) & mask; - gpr[op.ra] = spec_gpr::approx(min, max); -} - -void ppu_acontext::RLDICR(ppu_opcode_t op) -{ - const u32 sh = op.sh64; - const u32 me = op.mbe64; - const u64 mask = ~0ull << (63 - me); - - u64 min = gpr[op.rs].bmin; - u64 max = gpr[op.rs].bmax; - - if (sh < 63 - me) - { - // EXTLDI mnemonic - } - else if (sh == 63 - me) - { - // SLDI mnemonic - } - else if (sh == 0) - { - // CLRRDI mnemonic - gpr[op.ra] = gpr[op.rs] & spec_gpr::fixed(mask); - return; - } - - min = utils::rol64(min, sh) & mask; - max = utils::rol64(max, sh) & mask; - gpr[op.ra] = spec_gpr::approx(min, max); -} - -void ppu_acontext::RLDIC(ppu_opcode_t op) -{ - const u32 sh = op.sh64; - const u32 mb = op.mbe64; - const u64 mask = ppu_rotate_mask(mb, 63 - sh); - - u64 min = gpr[op.rs].bmin; - u64 max = gpr[op.rs].bmax; - - if (mb == 0 && sh == 0) - { - gpr[op.ra] = gpr[op.rs]; - return; - } - else if (mb <= 63 - sh) - { - // CLRLSLDI - //gpr[op.ra] = (gpr[op.rs] & spec_gpr::fixed(ppu_rotate_mask(0, sh + mb))) << spec_gpr::fixed(sh); - return; - } - - min = utils::rol64(min, sh) & mask; - max = utils::rol64(max, sh) & mask; - gpr[op.ra] = spec_gpr::approx(min, max); -} - -void ppu_acontext::RLDIMI(ppu_opcode_t op) -{ - const u32 sh = op.sh64; - const u32 mb = op.mbe64; - const u64 mask = ppu_rotate_mask(mb, 63 - sh); - - u64 min = gpr[op.rs].bmin; - u64 max = gpr[op.rs].bmax; - - if (mb == 0 && sh == 0) - { - // Copy - } - else if (mb <= 63 - sh) - { - // INSRDI mnemonic - } - - min = utils::rol64(min, sh) & mask; - max = utils::rol64(max, sh) & mask; - - if (mask != umax) - { - // Insertion - min |= gpr[op.ra].bmin & ~mask; - max |= gpr[op.ra].bmax & ~mask; - } - - gpr[op.ra] = spec_gpr::approx(min, max); -} - -void ppu_acontext::RLDCL(ppu_opcode_t op) -{ - const u32 mb = op.mbe64; - const u64 mask = ~0ull >> mb; - - u64 min = gpr[op.rs].bmin; - u64 max = gpr[op.rs].bmax; - - // TODO - min = 0; - max = mask; - gpr[op.ra] = spec_gpr::approx(min, max); -} - -void ppu_acontext::RLDCR(ppu_opcode_t op) -{ - const u32 me = op.mbe64; - const u64 mask = ~0ull << (63 - me); - - u64 min = gpr[op.rs].bmin; - u64 max = gpr[op.rs].bmax; - - // TODO - min = 0; - max = mask; - gpr[op.ra] = spec_gpr::approx(min, max); -} - -void ppu_acontext::CMP(ppu_opcode_t op) -{ -} - -void ppu_acontext::TW(ppu_opcode_t op) -{ -} - -void ppu_acontext::LVSL(ppu_opcode_t op) -{ -} - -void ppu_acontext::LVEBX(ppu_opcode_t op) -{ -} - -void ppu_acontext::SUBFC(ppu_opcode_t op) -{ - gpr[op.rd] = ~gpr[op.ra] + gpr[op.rb] + spec_gpr::fixed(1); -} - -void ppu_acontext::ADDC(ppu_opcode_t op) -{ - gpr[op.rd] = gpr[op.ra] + gpr[op.rb]; -} - -void ppu_acontext::MULHDU(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::MULHWU(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::MFOCRF(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LWARX(ppu_opcode_t op) -{ - gpr[op.rd] = spec_gpr::range(0, u32{umax}); -} - -void ppu_acontext::LDX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LWZX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::SLW(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::CNTLZW(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::SLD(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::AND(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] & gpr[op.rb]; -} - -void ppu_acontext::CMPL(ppu_opcode_t op) -{ -} - -void ppu_acontext::LVSR(ppu_opcode_t op) -{ -} - -void ppu_acontext::LVEHX(ppu_opcode_t op) -{ -} - -void ppu_acontext::SUBF(ppu_opcode_t op) -{ - gpr[op.rd] = ~gpr[op.ra] + gpr[op.rb] + spec_gpr::fixed(1); -} - -void ppu_acontext::LDUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::DCBST(ppu_opcode_t op) -{ -} - -void ppu_acontext::LWZUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::CNTLZD(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::ANDC(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] & ~gpr[op.rb]; -} - -void ppu_acontext::TD(ppu_opcode_t op) -{ -} - -void ppu_acontext::LVEWX(ppu_opcode_t op) -{ -} - -void ppu_acontext::MULHD(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::MULHW(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LDARX(ppu_opcode_t op) -{ - gpr[op.rd] = {}; -} - -void ppu_acontext::DCBF(ppu_opcode_t op) -{ -} - -void ppu_acontext::LBZX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LVX(ppu_opcode_t op) -{ -} - -void ppu_acontext::NEG(ppu_opcode_t op) -{ - gpr[op.rd] = ~gpr[op.ra] + spec_gpr::fixed(1); -} - -void ppu_acontext::LBZUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::NOR(ppu_opcode_t op) -{ - gpr[op.ra] = ~(gpr[op.rs] | gpr[op.rb]); -} - -void ppu_acontext::STVEBX(ppu_opcode_t op) -{ -} - -void ppu_acontext::SUBFE(ppu_opcode_t op) -{ - gpr[op.rd] = ~gpr[op.ra] + gpr[op.rb] + spec_gpr::range(0, 1); -} - -void ppu_acontext::ADDE(ppu_opcode_t op) -{ - gpr[op.rd] = gpr[op.ra] + gpr[op.rb] + spec_gpr::range(0, 1); -} - -void ppu_acontext::MTOCRF(ppu_opcode_t op) -{ -} - -void ppu_acontext::STDX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STWCX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STWX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STVEHX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STDUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::STWUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::STVEWX(ppu_opcode_t op) -{ -} - -void ppu_acontext::SUBFZE(ppu_opcode_t op) -{ - gpr[op.rd] = ~gpr[op.ra] + spec_gpr::range(0, 1); -} - -void ppu_acontext::ADDZE(ppu_opcode_t op) -{ - gpr[op.rd] = gpr[op.ra] + spec_gpr::range(0, 1); -} - -void ppu_acontext::STDCX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STBX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STVX(ppu_opcode_t op) -{ -} - -void ppu_acontext::SUBFME(ppu_opcode_t op) -{ - gpr[op.rd] = ~gpr[op.ra] + spec_gpr::fixed(-1) + spec_gpr::range(0, 1); -} - -void ppu_acontext::MULLD(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::ADDME(ppu_opcode_t op) -{ - gpr[op.rd] = gpr[op.ra] + spec_gpr::fixed(-1) + spec_gpr::range(0, 1); -} - -void ppu_acontext::MULLW(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::DCBTST(ppu_opcode_t op) -{ -} - -void ppu_acontext::STBUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::ADD(ppu_opcode_t op) -{ - gpr[op.rd] = gpr[op.ra] + gpr[op.rd]; -} - -void ppu_acontext::DCBT(ppu_opcode_t op) -{ -} - -void ppu_acontext::LHZX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::EQV(ppu_opcode_t op) -{ - gpr[op.ra] = ~(gpr[op.rs] ^ gpr[op.rb]); -} - -void ppu_acontext::ECIWX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LHZUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::XOR(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] ^ gpr[op.rb]; -} - -void ppu_acontext::MFSPR(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LWAX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::DST(ppu_opcode_t op) -{ -} - -void ppu_acontext::LHAX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LVXL(ppu_opcode_t op) -{ -} - -void ppu_acontext::MFTB(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LWAUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::DSTST(ppu_opcode_t op) -{ -} - -void ppu_acontext::LHAUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::STHX(ppu_opcode_t op) -{ -} - -void ppu_acontext::ORC(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] | ~gpr[op.rb]; -} - -void ppu_acontext::ECOWX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STHUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::OR(ppu_opcode_t op) -{ - gpr[op.ra] = gpr[op.rs] | gpr[op.rb]; -} - -void ppu_acontext::DIVDU(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::DIVWU(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::MTSPR(ppu_opcode_t op) -{ -} - -void ppu_acontext::DCBI(ppu_opcode_t op) -{ -} - -void ppu_acontext::NAND(ppu_opcode_t op) -{ - gpr[op.ra] = ~(gpr[op.rs] & gpr[op.rb]); -} - -void ppu_acontext::STVXL(ppu_opcode_t op) -{ -} - -void ppu_acontext::DIVD(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::DIVW(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LVLX(ppu_opcode_t op) -{ -} - -void ppu_acontext::LDBRX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LSWX(ppu_opcode_t op) -{ -} - -void ppu_acontext::LWBRX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LFSX(ppu_opcode_t op) -{ -} - -void ppu_acontext::SRW(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::SRD(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::LVRX(ppu_opcode_t op) -{ -} - -void ppu_acontext::LSWI(ppu_opcode_t op) -{ - std::fill_n(gpr, 32, spec_gpr{}); -} - -void ppu_acontext::LFSUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::SYNC(ppu_opcode_t op) -{ -} - -void ppu_acontext::LFDX(ppu_opcode_t op) -{ -} - -void ppu_acontext::LFDUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::STVLX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STDBRX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STSWX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STWBRX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STFSX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STVRX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STFSUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::STSWI(ppu_opcode_t op) -{ -} - -void ppu_acontext::STFDX(ppu_opcode_t op) -{ -} - -void ppu_acontext::STFDUX(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::LVLXL(ppu_opcode_t op) -{ -} - -void ppu_acontext::LHBRX(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::SRAW(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::SRAD(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::LVRXL(ppu_opcode_t op) -{ -} - -void ppu_acontext::DSS(ppu_opcode_t op) -{ -} - -void ppu_acontext::SRAWI(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::SRADI(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::EIEIO(ppu_opcode_t op) -{ -} - -void ppu_acontext::STVLXL(ppu_opcode_t op) -{ -} - -void ppu_acontext::STHBRX(ppu_opcode_t op) -{ -} - -void ppu_acontext::EXTSH(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::STVRXL(ppu_opcode_t op) -{ -} - -void ppu_acontext::EXTSB(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::STFIWX(ppu_opcode_t op) -{ -} - -void ppu_acontext::EXTSW(ppu_opcode_t op) -{ - gpr[op.ra].set_undef(); -} - -void ppu_acontext::ICBI(ppu_opcode_t op) -{ -} - -void ppu_acontext::DCBZ(ppu_opcode_t op) -{ -} - -void ppu_acontext::LWZ(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LWZU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::LBZ(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LBZU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::STW(ppu_opcode_t op) -{ -} - -void ppu_acontext::STWU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::STB(ppu_opcode_t op) -{ -} - -void ppu_acontext::STBU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::LHZ(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LHZU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::LHA(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LHAU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::STH(ppu_opcode_t op) -{ -} - -void ppu_acontext::STHU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::LMW(ppu_opcode_t op) -{ - std::fill_n(gpr, 32, spec_gpr{}); -} - -void ppu_acontext::STMW(ppu_opcode_t op) -{ -} - -void ppu_acontext::LFS(ppu_opcode_t op) -{ -} - -void ppu_acontext::LFSU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::LFD(ppu_opcode_t op) -{ -} - -void ppu_acontext::LFDU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::STFS(ppu_opcode_t op) -{ -} - -void ppu_acontext::STFSU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::STFD(ppu_opcode_t op) -{ -} - -void ppu_acontext::STFDU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::LD(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::LDU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.rd].set_undef(); - gpr[op.ra] = addr; -} - -void ppu_acontext::LWA(ppu_opcode_t op) -{ - gpr[op.rd].set_undef(); -} - -void ppu_acontext::STD(ppu_opcode_t op) -{ -} - -void ppu_acontext::STDU(ppu_opcode_t op) -{ - const auto addr = gpr[op.ra] + gpr[op.rb]; - gpr[op.ra] = addr; -} - -void ppu_acontext::FDIVS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FSUBS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FADDS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FSQRTS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FRES(ppu_opcode_t op) -{ -} - -void ppu_acontext::FMULS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FMADDS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FMSUBS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FNMSUBS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FNMADDS(ppu_opcode_t op) -{ -} - -void ppu_acontext::MTFSB1(ppu_opcode_t op) -{ -} - -void ppu_acontext::MCRFS(ppu_opcode_t op) -{ -} - -void ppu_acontext::MTFSB0(ppu_opcode_t op) -{ -} - -void ppu_acontext::MTFSFI(ppu_opcode_t op) -{ -} - -void ppu_acontext::MFFS(ppu_opcode_t op) -{ -} - -void ppu_acontext::MTFSF(ppu_opcode_t op) -{ -} - -void ppu_acontext::FCMPU(ppu_opcode_t op) -{ -} - -void ppu_acontext::FRSP(ppu_opcode_t op) -{ -} - -void ppu_acontext::FCTIW(ppu_opcode_t op) -{ -} - -void ppu_acontext::FCTIWZ(ppu_opcode_t op) -{ -} - -void ppu_acontext::FDIV(ppu_opcode_t op) -{ -} - -void ppu_acontext::FSUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::FADD(ppu_opcode_t op) -{ -} - -void ppu_acontext::FSQRT(ppu_opcode_t op) -{ -} - -void ppu_acontext::FSEL(ppu_opcode_t op) -{ -} - -void ppu_acontext::FMUL(ppu_opcode_t op) -{ -} - -void ppu_acontext::FRSQRTE(ppu_opcode_t op) -{ -} - -void ppu_acontext::FMSUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::FMADD(ppu_opcode_t op) -{ -} - -void ppu_acontext::FNMSUB(ppu_opcode_t op) -{ -} - -void ppu_acontext::FNMADD(ppu_opcode_t op) -{ -} - -void ppu_acontext::FCMPO(ppu_opcode_t op) -{ -} - -void ppu_acontext::FNEG(ppu_opcode_t op) -{ -} - -void ppu_acontext::FMR(ppu_opcode_t op) -{ -} - -void ppu_acontext::FNABS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FABS(ppu_opcode_t op) -{ -} - -void ppu_acontext::FCTID(ppu_opcode_t op) -{ -} - -void ppu_acontext::FCTIDZ(ppu_opcode_t op) -{ -} - -void ppu_acontext::FCFID(ppu_opcode_t op) -{ -} - -#include - -const bool s_tes = []() -{ - return true; - - std::mt19937_64 rnd{123}; - - for (u32 i = 0; i < 10000; i++) - { - ppu_acontext::spec_gpr r1, r2, r3; - r1 = ppu_acontext::spec_gpr::approx(rnd(), rnd()); - r2 = ppu_acontext::spec_gpr::range(rnd(), rnd()); - r3 = r1 | r2; - - for (u32 j = 0; j < 10000; j++) - { - u64 v1 = rnd(), v2 = rnd(); - v1 &= r1.mask(); - v1 |= r1.ones(); - if (!r2.test(v2)) - { - v2 = r2.imin; - } - - if (r1.test(v1) && r2.test(v2)) - { - if (!r3.test(v1 | v2)) - { - auto exp = ppu_acontext::spec_gpr::approx(r1.ones() & r2.ones(), r1.mask() & r2.mask()); - - ppu_log.error("ppu_acontext failure:" - "\n\tr1 = 0x%016x..0x%016x, 0x%016x:0x%016x" - "\n\tr2 = 0x%016x..0x%016x, 0x%016x:0x%016x" - "\n\tr3 = 0x%016x..0x%016x, 0x%016x:0x%016x" - "\n\tex = 0x%016x..0x%016x" - "\n\tv1 = 0x%016x, v2 = 0x%016x, v3 = 0x%016x", - r1.imin, r1.imax, r1.bmin, r1.bmax, r2.imin, r2.imax, r2.bmin, r2.bmax, r3.imin, r3.imax, r3.bmin, r3.bmax, exp.imin, exp.imax, v1, v2, v1 | v2); - break; - } - } - } - } - - ppu_acontext::spec_gpr r1; - r1 = ppu_acontext::spec_gpr::range(0x13311, 0x1fe22); - r1 = r1 ^ ppu_acontext::spec_gpr::approx(0x000, 0xf00); - ppu_log.success("0x%x..0x%x", r1.imin, r1.imax); - - return true; -}(); diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index 31571bdf93..9282ac1ba8 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -1419,738 +1419,3 @@ struct ppu_iname #undef NAME #undef NAME_ }; - -// PPU Analyser Context -struct ppu_acontext -{ - // General-purpose register range - struct spec_gpr - { - // Integral range: normalized undef = (0;UINT64_MAX), unnormalized undefs are possible (when max = min - 1) - // Bit range: constant 0 = (0;0), constant 1 = (1;1), normalized undef = (0;1), unnormalized undef = (1;0) - - u64 imin = 0ull; // Integral range begin - u64 imax = ~0ull; // Integral range end - u64 bmin = 0ull; // Bit range begin - u64 bmax = ~0ull; // Bit range end - - void set_undef() - { - imin = 0; - imax = -1; - bmin = 0; - bmax = -1; - } - - // (Number of possible values - 1), 0 = const - u64 div() const - { - return imax - imin; - } - - // Return zero bits for zeros, ones for ones or undefs - u64 mask() const - { - return bmin | bmax; - } - - // Return one bits for ones, zeros for zeros or undefs - u64 ones() const - { - return bmin & bmax; - } - - // Return one bits for undefs - u64 undefs() const - { - return bmin ^ bmax; - } - - // Return number of trailing zero bits - u64 tz() const - { - return std::countr_zero(mask()); - } - - // Range NOT - spec_gpr operator ~() const - { - spec_gpr r; - r.imin = ~imax; - r.imax = ~imin; - r.bmin = ~bmax; - r.bmax = ~bmin; - return r; - } - - // Range ADD - spec_gpr operator +(const spec_gpr& rhs) const - { - spec_gpr r{}; - - const u64 adiv = div(); - const u64 bdiv = rhs.div(); - - // Check overflow, generate normalized range - if (adiv != umax && bdiv != umax && adiv <= adiv + bdiv) - { - r = range(imin + rhs.imin, imax + rhs.imax); - } - - // Carry for bitrange computation - u64 cmin = 0; - u64 cmax = 0; - - const u64 amask = mask(); - const u64 bmask = rhs.mask(); - const u64 aones = ones(); - const u64 bones = rhs.ones(); - - for (u32 i = 0; i < 64; i++) - { - cmin += ((amask >> i) & 1) + ((bmask >> i) & 1); - cmax += ((aones >> i) & 1) + ((bones >> i) & 1); - - // Discover some constant bits - if (cmin == cmax) - { - r.bmin |= (cmin & 1) << i; - r.bmax &= ~((~cmin & 1) << i); - } - - cmin >>= 1; - cmax >>= 1; - } - - return r; - } - - // Range AND - spec_gpr operator &(const spec_gpr& rhs) const - { - // Ignore inverted ranges (TODO) - if (imin > imax || rhs.imin > rhs.imax) - { - return approx(ones() & rhs.ones(), mask() & rhs.mask()); - } - - // Const (TODO: remove when unnecessary) - if (imin == imax && rhs.imin == rhs.imax) - { - return fixed(imin & rhs.imin); - } - - // Swap (TODO: remove when unnecessary) - if (imin == imax || rhs.undefs() > undefs()) - { - return rhs & *this; - } - - // Copy and attempt to partially preserve integral range - spec_gpr r = *this; - - for (u32 i = 63; ~i; i--) - { - const u64 m = 1ull << i; - - if (!(rhs.mask() & m)) - { - if (r.undefs() & m) - { - // undef -> 0 - r.imin &= ~(m - 1); - r.imax |= (m - 1); - r.imin &= ~m; - r.imax &= ~m; - } - else if (r.ones() & m) - { - // 1 -> 0 - if ((r.imin ^ r.imax) > (m - 1)) - { - r.imin &= ~(m - 1); - r.imax |= (m - 1); - } - - r.imin &= ~m; - r.imax &= ~m; - } - } - else if (rhs.undefs() & m) - { - // -> undef - r.imin &= ~(m - 1); - r.imax |= (m - 1); - r.imin &= ~m; - r.imax |= m; - } - } - - r.bmin = ones() & rhs.ones(); - r.bmax = mask() & rhs.mask(); - return r; - } - - // Range OR - spec_gpr operator |(const spec_gpr& rhs) const - { - // Ignore inverted ranges (TODO) - if (imin > imax || rhs.imin > rhs.imax) - { - return approx(ones() | rhs.ones(), mask() | rhs.mask()); - } - - // Const (TODO: remove when unnecessary) - if (imin == imax && rhs.imin == rhs.imax) - { - return fixed(imin | rhs.imin); - } - - // Swap (TODO: remove when unnecessary) - if (imin == imax || rhs.undefs() > undefs()) - { - return rhs | *this; - } - - // Copy and attempt to partially preserve integral range - spec_gpr r = *this; - - for (u32 i = 63; ~i; i--) - { - const u64 m = 1ull << i; - - if (rhs.ones() & m) - { - if (r.undefs() & m) - { - // undef -> 1 - r.imin &= ~(m - 1); - r.imax |= (m - 1); - r.imin |= m; - r.imax |= m; - } - else if (!(r.mask() & m)) - { - // 0 -> 1 - if ((r.imin ^ r.imax) > (m - 1)) - { - r.imin &= ~(m - 1); - r.imax |= (m - 1); - } - - r.imin |= m; - r.imax |= m; - } - } - else if (rhs.undefs() & m) - { - // -> undef - r.imin &= ~(m - 1); - r.imax |= (m - 1); - r.imin &= ~m; - r.imax |= m; - } - } - - r.bmin = ones() | rhs.ones(); - r.bmax = mask() | rhs.mask(); - return r; - } - - // Range XOR - spec_gpr operator ^(const spec_gpr& rhs) const - { - return (~*this & rhs) | (*this & ~rhs); - } - - // Check whether the value is in range - bool test(u64 value) const - { - if (imin <= imax) - { - if (value < imin || value > imax) - { - return false; - } - } - else - { - if (value < imin && value > imax) - { - return false; - } - } - - if ((value & mask()) != value) - { - return false; - } - - if ((value | ones()) != value) - { - return false; - } - - return true; - } - - // Constant value - static spec_gpr fixed(u64 value) - { - spec_gpr r; - r.imin = value; - r.imax = value; - r.bmin = value; - r.bmax = value; - return r; - } - - // Range (tz = number of constant trailing zeros) - static spec_gpr range(u64 min, u64 max, u64 tz = 0) - { - const u64 mask = tz < 64 ? ~0ull << tz : 0ull; - - spec_gpr r; - r.bmin = 0; - r.bmax = mask; - - // Normalize min/max for tz (TODO) - if (min < max) - { - // Inverted constant MSB mask - const u64 mix = ~0ull >> std::countl_zero(min ^ max); - r.bmin |= min & ~mix; - r.bmax &= max | mix; - - r.imin = (min + ~mask) & mask; - r.imax = max & mask; - ensure(r.imin <= r.imax); // "Impossible range" - } - else - { - r.imin = min & mask; - r.imax = (max + ~mask) & mask; - ensure(r.imin >= r.imax); // "Impossible range" - } - - // Fix const values - if (r.imin == r.imax) - { - r.bmin = r.imin; - r.bmax = r.imax; - } - - return r; - } - - // Make from bitrange (normalize, approximate range values) - static spec_gpr approx(u64 bmin, u64 bmax) - { - spec_gpr r; - r.imin = bmin & ~(bmin ^ bmax); - r.imax = bmax | (bmin ^ bmax); - r.bmin = bmin & ~(bmin ^ bmax); - r.bmax = bmax | (bmin ^ bmax); - return r; - } - } gpr[32]{}; - - // Vector registers (draft) - struct spec_vec - { - u8 imin8[16]{}; - u8 imax8[16]{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; - u16 imin16[8]{}; - u16 imax16[8]{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}; - u32 imin32[4]{}; - u32 imax32[4]{0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu}; - u64 bmin64[2]{}; - u64 bmax64[2]{0xffffffffffffffffull, 0xffffffffffffffffull}; - }; - - // Info - u32 cia; - - // Analyser step - void UNK(ppu_opcode_t); - - void MFVSCR(ppu_opcode_t); - void MTVSCR(ppu_opcode_t); - void VADDCUW(ppu_opcode_t); - void VADDFP(ppu_opcode_t); - void VADDSBS(ppu_opcode_t); - void VADDSHS(ppu_opcode_t); - void VADDSWS(ppu_opcode_t); - void VADDUBM(ppu_opcode_t); - void VADDUBS(ppu_opcode_t); - void VADDUHM(ppu_opcode_t); - void VADDUHS(ppu_opcode_t); - void VADDUWM(ppu_opcode_t); - void VADDUWS(ppu_opcode_t); - void VAND(ppu_opcode_t); - void VANDC(ppu_opcode_t); - void VAVGSB(ppu_opcode_t); - void VAVGSH(ppu_opcode_t); - void VAVGSW(ppu_opcode_t); - void VAVGUB(ppu_opcode_t); - void VAVGUH(ppu_opcode_t); - void VAVGUW(ppu_opcode_t); - void VCFSX(ppu_opcode_t); - void VCFUX(ppu_opcode_t); - void VCMPBFP(ppu_opcode_t); - void VCMPEQFP(ppu_opcode_t); - void VCMPEQUB(ppu_opcode_t); - void VCMPEQUH(ppu_opcode_t); - void VCMPEQUW(ppu_opcode_t); - void VCMPGEFP(ppu_opcode_t); - void VCMPGTFP(ppu_opcode_t); - void VCMPGTSB(ppu_opcode_t); - void VCMPGTSH(ppu_opcode_t); - void VCMPGTSW(ppu_opcode_t); - void VCMPGTUB(ppu_opcode_t); - void VCMPGTUH(ppu_opcode_t); - void VCMPGTUW(ppu_opcode_t); - void VCTSXS(ppu_opcode_t); - void VCTUXS(ppu_opcode_t); - void VEXPTEFP(ppu_opcode_t); - void VLOGEFP(ppu_opcode_t); - void VMADDFP(ppu_opcode_t); - void VMAXFP(ppu_opcode_t); - void VMAXSB(ppu_opcode_t); - void VMAXSH(ppu_opcode_t); - void VMAXSW(ppu_opcode_t); - void VMAXUB(ppu_opcode_t); - void VMAXUH(ppu_opcode_t); - void VMAXUW(ppu_opcode_t); - void VMHADDSHS(ppu_opcode_t); - void VMHRADDSHS(ppu_opcode_t); - void VMINFP(ppu_opcode_t); - void VMINSB(ppu_opcode_t); - void VMINSH(ppu_opcode_t); - void VMINSW(ppu_opcode_t); - void VMINUB(ppu_opcode_t); - void VMINUH(ppu_opcode_t); - void VMINUW(ppu_opcode_t); - void VMLADDUHM(ppu_opcode_t); - void VMRGHB(ppu_opcode_t); - void VMRGHH(ppu_opcode_t); - void VMRGHW(ppu_opcode_t); - void VMRGLB(ppu_opcode_t); - void VMRGLH(ppu_opcode_t); - void VMRGLW(ppu_opcode_t); - void VMSUMMBM(ppu_opcode_t); - void VMSUMSHM(ppu_opcode_t); - void VMSUMSHS(ppu_opcode_t); - void VMSUMUBM(ppu_opcode_t); - void VMSUMUHM(ppu_opcode_t); - void VMSUMUHS(ppu_opcode_t); - void VMULESB(ppu_opcode_t); - void VMULESH(ppu_opcode_t); - void VMULEUB(ppu_opcode_t); - void VMULEUH(ppu_opcode_t); - void VMULOSB(ppu_opcode_t); - void VMULOSH(ppu_opcode_t); - void VMULOUB(ppu_opcode_t); - void VMULOUH(ppu_opcode_t); - void VNMSUBFP(ppu_opcode_t); - void VNOR(ppu_opcode_t); - void VOR(ppu_opcode_t); - void VPERM(ppu_opcode_t); - void VPKPX(ppu_opcode_t); - void VPKSHSS(ppu_opcode_t); - void VPKSHUS(ppu_opcode_t); - void VPKSWSS(ppu_opcode_t); - void VPKSWUS(ppu_opcode_t); - void VPKUHUM(ppu_opcode_t); - void VPKUHUS(ppu_opcode_t); - void VPKUWUM(ppu_opcode_t); - void VPKUWUS(ppu_opcode_t); - void VREFP(ppu_opcode_t); - void VRFIM(ppu_opcode_t); - void VRFIN(ppu_opcode_t); - void VRFIP(ppu_opcode_t); - void VRFIZ(ppu_opcode_t); - void VRLB(ppu_opcode_t); - void VRLH(ppu_opcode_t); - void VRLW(ppu_opcode_t); - void VRSQRTEFP(ppu_opcode_t); - void VSEL(ppu_opcode_t); - void VSL(ppu_opcode_t); - void VSLB(ppu_opcode_t); - void VSLDOI(ppu_opcode_t); - void VSLH(ppu_opcode_t); - void VSLO(ppu_opcode_t); - void VSLW(ppu_opcode_t); - void VSPLTB(ppu_opcode_t); - void VSPLTH(ppu_opcode_t); - void VSPLTISB(ppu_opcode_t); - void VSPLTISH(ppu_opcode_t); - void VSPLTISW(ppu_opcode_t); - void VSPLTW(ppu_opcode_t); - void VSR(ppu_opcode_t); - void VSRAB(ppu_opcode_t); - void VSRAH(ppu_opcode_t); - void VSRAW(ppu_opcode_t); - void VSRB(ppu_opcode_t); - void VSRH(ppu_opcode_t); - void VSRO(ppu_opcode_t); - void VSRW(ppu_opcode_t); - void VSUBCUW(ppu_opcode_t); - void VSUBFP(ppu_opcode_t); - void VSUBSBS(ppu_opcode_t); - void VSUBSHS(ppu_opcode_t); - void VSUBSWS(ppu_opcode_t); - void VSUBUBM(ppu_opcode_t); - void VSUBUBS(ppu_opcode_t); - void VSUBUHM(ppu_opcode_t); - void VSUBUHS(ppu_opcode_t); - void VSUBUWM(ppu_opcode_t); - void VSUBUWS(ppu_opcode_t); - void VSUMSWS(ppu_opcode_t); - void VSUM2SWS(ppu_opcode_t); - void VSUM4SBS(ppu_opcode_t); - void VSUM4SHS(ppu_opcode_t); - void VSUM4UBS(ppu_opcode_t); - void VUPKHPX(ppu_opcode_t); - void VUPKHSB(ppu_opcode_t); - void VUPKHSH(ppu_opcode_t); - void VUPKLPX(ppu_opcode_t); - void VUPKLSB(ppu_opcode_t); - void VUPKLSH(ppu_opcode_t); - void VXOR(ppu_opcode_t); - void TDI(ppu_opcode_t); - void TWI(ppu_opcode_t); - void MULLI(ppu_opcode_t); - void SUBFIC(ppu_opcode_t); - void CMPLI(ppu_opcode_t); - void CMPI(ppu_opcode_t); - void ADDIC(ppu_opcode_t); - void ADDI(ppu_opcode_t); - void ADDIS(ppu_opcode_t); - void BC(ppu_opcode_t); - void SC(ppu_opcode_t); - void B(ppu_opcode_t); - void MCRF(ppu_opcode_t); - void BCLR(ppu_opcode_t); - void CRNOR(ppu_opcode_t); - void CRANDC(ppu_opcode_t); - void ISYNC(ppu_opcode_t); - void CRXOR(ppu_opcode_t); - void CRNAND(ppu_opcode_t); - void CRAND(ppu_opcode_t); - void CREQV(ppu_opcode_t); - void CRORC(ppu_opcode_t); - void CROR(ppu_opcode_t); - void BCCTR(ppu_opcode_t); - void RLWIMI(ppu_opcode_t); - void RLWINM(ppu_opcode_t); - void RLWNM(ppu_opcode_t); - void ORI(ppu_opcode_t); - void ORIS(ppu_opcode_t); - void XORI(ppu_opcode_t); - void XORIS(ppu_opcode_t); - void ANDI(ppu_opcode_t); - void ANDIS(ppu_opcode_t); - void RLDICL(ppu_opcode_t); - void RLDICR(ppu_opcode_t); - void RLDIC(ppu_opcode_t); - void RLDIMI(ppu_opcode_t); - void RLDCL(ppu_opcode_t); - void RLDCR(ppu_opcode_t); - void CMP(ppu_opcode_t); - void TW(ppu_opcode_t); - void LVSL(ppu_opcode_t); - void LVEBX(ppu_opcode_t); - void SUBFC(ppu_opcode_t); - void ADDC(ppu_opcode_t); - void MULHDU(ppu_opcode_t); - void MULHWU(ppu_opcode_t); - void MFOCRF(ppu_opcode_t); - void LWARX(ppu_opcode_t); - void LDX(ppu_opcode_t); - void LWZX(ppu_opcode_t); - void SLW(ppu_opcode_t); - void CNTLZW(ppu_opcode_t); - void SLD(ppu_opcode_t); - void AND(ppu_opcode_t); - void CMPL(ppu_opcode_t); - void LVSR(ppu_opcode_t); - void LVEHX(ppu_opcode_t); - void SUBF(ppu_opcode_t); - void LDUX(ppu_opcode_t); - void DCBST(ppu_opcode_t); - void LWZUX(ppu_opcode_t); - void CNTLZD(ppu_opcode_t); - void ANDC(ppu_opcode_t); - void TD(ppu_opcode_t); - void LVEWX(ppu_opcode_t); - void MULHD(ppu_opcode_t); - void MULHW(ppu_opcode_t); - void LDARX(ppu_opcode_t); - void DCBF(ppu_opcode_t); - void LBZX(ppu_opcode_t); - void LVX(ppu_opcode_t); - void NEG(ppu_opcode_t); - void LBZUX(ppu_opcode_t); - void NOR(ppu_opcode_t); - void STVEBX(ppu_opcode_t); - void SUBFE(ppu_opcode_t); - void ADDE(ppu_opcode_t); - void MTOCRF(ppu_opcode_t); - void STDX(ppu_opcode_t); - void STWCX(ppu_opcode_t); - void STWX(ppu_opcode_t); - void STVEHX(ppu_opcode_t); - void STDUX(ppu_opcode_t); - void STWUX(ppu_opcode_t); - void STVEWX(ppu_opcode_t); - void SUBFZE(ppu_opcode_t); - void ADDZE(ppu_opcode_t); - void STDCX(ppu_opcode_t); - void STBX(ppu_opcode_t); - void STVX(ppu_opcode_t); - void SUBFME(ppu_opcode_t); - void MULLD(ppu_opcode_t); - void ADDME(ppu_opcode_t); - void MULLW(ppu_opcode_t); - void DCBTST(ppu_opcode_t); - void STBUX(ppu_opcode_t); - void ADD(ppu_opcode_t); - void DCBT(ppu_opcode_t); - void LHZX(ppu_opcode_t); - void EQV(ppu_opcode_t); - void ECIWX(ppu_opcode_t); - void LHZUX(ppu_opcode_t); - void XOR(ppu_opcode_t); - void MFSPR(ppu_opcode_t); - void LWAX(ppu_opcode_t); - void DST(ppu_opcode_t); - void LHAX(ppu_opcode_t); - void LVXL(ppu_opcode_t); - void MFTB(ppu_opcode_t); - void LWAUX(ppu_opcode_t); - void DSTST(ppu_opcode_t); - void LHAUX(ppu_opcode_t); - void STHX(ppu_opcode_t); - void ORC(ppu_opcode_t); - void ECOWX(ppu_opcode_t); - void STHUX(ppu_opcode_t); - void OR(ppu_opcode_t); - void DIVDU(ppu_opcode_t); - void DIVWU(ppu_opcode_t); - void MTSPR(ppu_opcode_t); - void DCBI(ppu_opcode_t); - void NAND(ppu_opcode_t); - void STVXL(ppu_opcode_t); - void DIVD(ppu_opcode_t); - void DIVW(ppu_opcode_t); - void LVLX(ppu_opcode_t); - void LDBRX(ppu_opcode_t); - void LSWX(ppu_opcode_t); - void LWBRX(ppu_opcode_t); - void LFSX(ppu_opcode_t); - void SRW(ppu_opcode_t); - void SRD(ppu_opcode_t); - void LVRX(ppu_opcode_t); - void LSWI(ppu_opcode_t); - void LFSUX(ppu_opcode_t); - void SYNC(ppu_opcode_t); - void LFDX(ppu_opcode_t); - void LFDUX(ppu_opcode_t); - void STVLX(ppu_opcode_t); - void STDBRX(ppu_opcode_t); - void STSWX(ppu_opcode_t); - void STWBRX(ppu_opcode_t); - void STFSX(ppu_opcode_t); - void STVRX(ppu_opcode_t); - void STFSUX(ppu_opcode_t); - void STSWI(ppu_opcode_t); - void STFDX(ppu_opcode_t); - void STFDUX(ppu_opcode_t); - void LVLXL(ppu_opcode_t); - void LHBRX(ppu_opcode_t); - void SRAW(ppu_opcode_t); - void SRAD(ppu_opcode_t); - void LVRXL(ppu_opcode_t); - void DSS(ppu_opcode_t); - void SRAWI(ppu_opcode_t); - void SRADI(ppu_opcode_t); - void EIEIO(ppu_opcode_t); - void STVLXL(ppu_opcode_t); - void STHBRX(ppu_opcode_t); - void EXTSH(ppu_opcode_t); - void STVRXL(ppu_opcode_t); - void EXTSB(ppu_opcode_t); - void STFIWX(ppu_opcode_t); - void EXTSW(ppu_opcode_t); - void ICBI(ppu_opcode_t); - void DCBZ(ppu_opcode_t); - void LWZ(ppu_opcode_t); - void LWZU(ppu_opcode_t); - void LBZ(ppu_opcode_t); - void LBZU(ppu_opcode_t); - void STW(ppu_opcode_t); - void STWU(ppu_opcode_t); - void STB(ppu_opcode_t); - void STBU(ppu_opcode_t); - void LHZ(ppu_opcode_t); - void LHZU(ppu_opcode_t); - void LHA(ppu_opcode_t); - void LHAU(ppu_opcode_t); - void STH(ppu_opcode_t); - void STHU(ppu_opcode_t); - void LMW(ppu_opcode_t); - void STMW(ppu_opcode_t); - void LFS(ppu_opcode_t); - void LFSU(ppu_opcode_t); - void LFD(ppu_opcode_t); - void LFDU(ppu_opcode_t); - void STFS(ppu_opcode_t); - void STFSU(ppu_opcode_t); - void STFD(ppu_opcode_t); - void STFDU(ppu_opcode_t); - void LD(ppu_opcode_t); - void LDU(ppu_opcode_t); - void LWA(ppu_opcode_t); - void STD(ppu_opcode_t); - void STDU(ppu_opcode_t); - void FDIVS(ppu_opcode_t); - void FSUBS(ppu_opcode_t); - void FADDS(ppu_opcode_t); - void FSQRTS(ppu_opcode_t); - void FRES(ppu_opcode_t); - void FMULS(ppu_opcode_t); - void FMADDS(ppu_opcode_t); - void FMSUBS(ppu_opcode_t); - void FNMSUBS(ppu_opcode_t); - void FNMADDS(ppu_opcode_t); - void MTFSB1(ppu_opcode_t); - void MCRFS(ppu_opcode_t); - void MTFSB0(ppu_opcode_t); - void MTFSFI(ppu_opcode_t); - void MFFS(ppu_opcode_t); - void MTFSF(ppu_opcode_t); - void FCMPU(ppu_opcode_t); - void FRSP(ppu_opcode_t); - void FCTIW(ppu_opcode_t); - void FCTIWZ(ppu_opcode_t); - void FDIV(ppu_opcode_t); - void FSUB(ppu_opcode_t); - void FADD(ppu_opcode_t); - void FSQRT(ppu_opcode_t); - void FSEL(ppu_opcode_t); - void FMUL(ppu_opcode_t); - void FRSQRTE(ppu_opcode_t); - void FMSUB(ppu_opcode_t); - void FMADD(ppu_opcode_t); - void FNMSUB(ppu_opcode_t); - void FNMADD(ppu_opcode_t); - void FCMPO(ppu_opcode_t); - void FNEG(ppu_opcode_t); - void FMR(ppu_opcode_t); - void FNABS(ppu_opcode_t); - void FABS(ppu_opcode_t); - void FCTID(ppu_opcode_t); - void FCTIDZ(ppu_opcode_t); - void FCFID(ppu_opcode_t); -}; diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 1bbccb2d87..72f589192e 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -1675,7 +1675,9 @@ public: llvm::Value* starta_pc = m_ir->CreateAnd(get_pc(starta), 0x3fffc); llvm::Value* data_addr = m_ir->CreateGEP(get_type(), m_lsptr, starta_pc); - llvm::Value* acc = nullptr; + llvm::Value* acc0 = nullptr; + llvm::Value* acc1 = nullptr; + bool toggle = true; // Use a 512bit simple checksum to verify integrity if size is atleast 512b * 3 // This code uses a 512bit vector for all hardware to ensure behavior matches. @@ -1721,10 +1723,21 @@ public: vls = m_ir->CreateShuffleVector(vls, ConstantAggregateZero::get(vls->getType()), llvm::ArrayRef(indices, 16)); } - acc = acc ? m_ir->CreateAdd(acc, vls) : vls; + // Interleave accumulators for more performance + if (toggle) + { + acc0 = acc0 ? m_ir->CreateAdd(acc0, vls) : vls; + } + else + { + acc1 = acc1 ? m_ir->CreateAdd(acc1, vls) : vls; + } + toggle = !toggle; check_iterations++; } + llvm::Value* acc = (acc0 && acc1) ? m_ir->CreateAdd(acc0, acc1): (acc0 ? acc0 : acc1); + // Create the checksum u32 checksum[16] = {0}; @@ -1818,9 +1831,21 @@ public: } vls = m_ir->CreateXor(vls, ConstantDataVector::get(m_context, llvm::ArrayRef(words, elements))); - acc = acc ? m_ir->CreateOr(acc, vls) : vls; + + // Interleave accumulators for more performance + if (toggle) + { + acc0 = acc0 ? m_ir->CreateAdd(acc0, vls) : vls; + } + else + { + acc1 = acc1 ? m_ir->CreateAdd(acc1, vls) : vls; + } + toggle = !toggle; check_iterations++; } + llvm::Value* acc = (acc0 && acc1) ? m_ir->CreateAdd(acc0, acc1): (acc0 ? acc0 : acc1); + // Pattern for PTEST if (m_use_avx512) { diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 0481ded4f2..243240bce1 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -119,7 +119,8 @@ namespace rsx RSX_FORMAT_CLASS_DEPTH24_UNORM_X8_PACK32 = 8, RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32 = 16, - RSX_FORMAT_CLASS_DEPTH_FLOAT_MASK = (RSX_FORMAT_CLASS_DEPTH16_FLOAT | RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32) + RSX_FORMAT_CLASS_DEPTH_FLOAT_MASK = (RSX_FORMAT_CLASS_DEPTH16_FLOAT | RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32), + RSX_FORMAT_CLASS_DONT_CARE = RSX_FORMAT_CLASS_UNDEFINED, }; } diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index e791af0274..8ae4179a0c 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -404,18 +404,19 @@ namespace rsx return ret; } - void sort(std::predicate auto predicate) + simple_array& sort(std::predicate auto predicate) { if (_size < 2) { - return; + return *this; } std::sort(begin(), end(), predicate); + return *this; } template > - requires std::is_invocable_v + requires (std::is_invocable_v && std::is_trivially_destructible_v) simple_array map(F&& xform) const { simple_array result; @@ -428,6 +429,20 @@ namespace rsx return result; } + template > + requires (std::is_invocable_v && !std::is_trivially_destructible_v) + std::vector map(F&& xform) const + { + std::vector result; + result.reserve(size()); + + for (auto it = begin(); it != end(); ++it) + { + result.push_back(xform(*it)); + } + return result; + } + template requires std::is_invocable_r_v U reduce(U initial_value, F&& reducer) const diff --git a/rpcs3/Emu/RSX/Core/RSXDisplay.cpp b/rpcs3/Emu/RSX/Core/RSXDisplay.cpp new file mode 100644 index 0000000000..e263a945ef --- /dev/null +++ b/rpcs3/Emu/RSX/Core/RSXDisplay.cpp @@ -0,0 +1,107 @@ +#include "stdafx.h" +#include "RSXDisplay.h" + +#include "../Common/simple_array.hpp" +#include "../rsx_utils.h" + +namespace rsx +{ + std::string framebuffer_dimensions_t::to_string(bool skip_aa_suffix) const + { + std::string suffix = ""; + const auto spp = samples_x * samples_y; + + if (!skip_aa_suffix && spp > 1) + { + suffix = std::string(" @MSAA ") + std::to_string(spp) + "x"; + } + + return std::to_string(width) + "x" + std::to_string(height) + suffix; + } + + framebuffer_dimensions_t framebuffer_dimensions_t::make(u16 width, u16 height, rsx::surface_antialiasing aa) + { + framebuffer_dimensions_t result { .width = width, .height = height }; + switch (aa) + { + case rsx::surface_antialiasing::center_1_sample: + result.samples_x = result.samples_y = 1; + break; + case rsx::surface_antialiasing::diagonal_centered_2_samples: + result.samples_x = 2; + result.samples_y = 1; + break; + case rsx::surface_antialiasing::square_centered_4_samples: + case rsx::surface_antialiasing::square_rotated_4_samples: + result.samples_x = result.samples_y = 2; + break; + } + return result; + } + + void framebuffer_statistics_t::add(u16 width, u16 height, rsx::surface_antialiasing aa) + { + auto& stashed = data[aa]; + const auto& incoming = framebuffer_dimensions_t::make(width, height, aa); + if (incoming > stashed) + { + stashed = incoming; + } + } + + std::string framebuffer_statistics_t::to_string(bool squash) const + { + // Format is sorted by sample count + struct sorted_message_t + { + u32 id; + surface_antialiasing aa_mode; + u32 samples; + }; + + if (data.size() == 0) + { + return "None"; + } + + rsx::simple_array messages; + rsx::simple_array real_stats; + + for (const auto& [aa_mode, stat] : data) + { + auto real_stat = stat; + std::tie(real_stat.width, real_stat.height) = apply_resolution_scale(stat.width, stat.height); + real_stats.push_back(real_stat); + + sorted_message_t msg; + msg.id = real_stats.size() - 1; + msg.aa_mode = aa_mode; + msg.samples = real_stat.samples_total(); + messages.push_back(msg); + } + + if (squash) + { + messages.sort(FN(x.samples > y.samples)); + return real_stats[messages.front().id] + .to_string(g_cfg.video.antialiasing_level == msaa_level::none); + } + + if (messages.size() > 1) + { + // Should we bother showing the No-AA entry? + // This heurestic ignores pointless no-AA surfaces usually used as compositing buffers for output. + messages.sort(FN(x.samples > y.samples)); + if (messages.back().aa_mode == rsx::surface_antialiasing::center_1_sample) + { + // Drop the last entry if it has no AA. + messages.resize(messages.size() - 1); + } + } + + const auto text = messages + .sort(FN(static_cast(x.aa_mode) > static_cast(y.aa_mode))) + .map(FN(real_stats[x.id].to_string())); + return fmt::merge(text, ", "); + } +} diff --git a/rpcs3/Emu/RSX/Core/RSXDisplay.h b/rpcs3/Emu/RSX/Core/RSXDisplay.h index 5628ce77a2..4d4c90cd0d 100644 --- a/rpcs3/Emu/RSX/Core/RSXDisplay.h +++ b/rpcs3/Emu/RSX/Core/RSXDisplay.h @@ -3,9 +3,48 @@ #include #include #include +#include + +template +class named_thread; namespace rsx { + enum class surface_antialiasing : u8; + + struct framebuffer_dimensions_t + { + u16 width; + u16 height; + u8 samples_x; + u8 samples_y; + + inline u32 samples_total() const + { + return static_cast(width) * height * samples_x * samples_y; + } + + inline bool operator > (const framebuffer_dimensions_t& that) const + { + return samples_total() > that.samples_total(); + } + + std::string to_string(bool skip_aa_suffix = false) const; + + static framebuffer_dimensions_t make(u16 width, u16 height, rsx::surface_antialiasing aa); + }; + + struct framebuffer_statistics_t + { + std::unordered_map data; + + // Replace the existing data with this input if it is greater than what is already known + void add(u16 width, u16 height, rsx::surface_antialiasing aa); + + // Returns a formatted string representing the statistics collected over the frame. + std::string to_string(bool squash) const; + }; + struct frame_statistics_t { u32 draw_calls; @@ -19,6 +58,8 @@ namespace rsx u32 vertex_cache_request_count; u32 vertex_cache_miss_count; + + framebuffer_statistics_t framebuffer_stats; }; struct frame_time_t diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp index 389efe0a2e..9d218edb89 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -667,7 +667,23 @@ namespace rsx rop_control.enable_polygon_stipple(); } - if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !RSX(m_ctx)->get_backend_config().supports_hw_a2c) + auto can_use_hw_a2c = [&]() -> bool + { + const auto& config = RSX(m_ctx)->get_backend_config(); + if (!config.supports_hw_a2c) + { + return false; + } + + if (config.supports_hw_a2c_1spp) + { + return true; + } + + return REGS(m_ctx)->surface_antialias() != rsx::surface_antialiasing::center_1_sample; + }; + + if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !can_use_hw_a2c()) { // TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders // Alpha values generate a coverage mask for order independent blending diff --git a/rpcs3/Emu/RSX/GL/GLCompute.cpp b/rpcs3/Emu/RSX/GL/GLCompute.cpp index 12ed12344c..5607c149ed 100644 --- a/rpcs3/Emu/RSX/GL/GLCompute.cpp +++ b/rpcs3/Emu/RSX/GL/GLCompute.cpp @@ -58,6 +58,8 @@ namespace gl if (!compiled) { + ensure(!m_src.empty(), "Compute shader is not initialized!"); + m_shader.create(::glsl::program_domain::glsl_compute_program, m_src); m_shader.compile(); @@ -82,6 +84,7 @@ namespace gl void compute_task::run(gl::command_context& cmd, u32 invocations_x, u32 invocations_y) { + ensure(compiled && m_program.id() != GL_NONE); bind_resources(); cmd->use_program(m_program.id()); diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index 3dcac04f09..b2de00e3b7 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -3,6 +3,8 @@ #include "../rsx_methods.h" #include "../Common/BufferUtils.h" +#include "Emu/RSX/NV47/HW/context_accessors.define.h" + namespace gl { GLenum comparison_op(rsx::comparison_function op) @@ -256,6 +258,32 @@ void GLGSRender::update_draw_state() gl_state.enablei(mrt_blend_enabled[2], GL_BLEND, 2); gl_state.enablei(mrt_blend_enabled[3], GL_BLEND, 3); } + + // Antialias control + if (backend_config.supports_hw_msaa) + { + gl_state.enable(/*REGS(m_ctx)->msaa_enabled()*/GL_MULTISAMPLE); + + gl_state.enable(GL_SAMPLE_MASK); + gl_state.sample_mask(REGS(m_ctx)->msaa_sample_mask()); + + gl_state.enable(GL_SAMPLE_SHADING); + gl_state.min_sample_shading_rate(1.f); + + gl_state.enable(GL_SAMPLE_COVERAGE); + gl_state.sample_coverage(1.f); + } + + if (backend_config.supports_hw_a2c) + { + const bool hw_enable = backend_config.supports_hw_a2c_1spp || REGS(m_ctx)->surface_antialias() != rsx::surface_antialiasing::center_1_sample; + gl_state.enable(hw_enable && REGS(m_ctx)->msaa_alpha_to_coverage_enabled(), GL_SAMPLE_ALPHA_TO_COVERAGE); + } + + if (backend_config.supports_hw_a2one) + { + gl_state.enable(REGS(m_ctx)->msaa_alpha_to_one_enabled(), GL_SAMPLE_ALPHA_TO_ONE); + } } switch (rsx::method_registers.current_draw_clause.primitive) @@ -307,12 +335,6 @@ void GLGSRender::update_draw_state() // Clip planes gl_state.clip_planes((current_vertex_program.output_mask >> CELL_GCM_ATTRIB_OUTPUT_UC0) & 0x3F); - // Sample control - // TODO: MinSampleShading - //gl_state.enable(rsx::method_registers.msaa_enabled(), GL_MULTISAMPLE); - //gl_state.enable(rsx::method_registers.msaa_alpha_to_coverage_enabled(), GL_SAMPLE_ALPHA_TO_COVERAGE); - //gl_state.enable(rsx::method_registers.msaa_alpha_to_one_enabled(), GL_SAMPLE_ALPHA_TO_ONE); - //TODO //NV4097_SET_ANISO_SPREAD //NV4097_SET_SPECULAR_ENABLE diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 7980670ca4..f6d5d2f3b4 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -5,6 +5,7 @@ #include "GLCommonDecompiler.h" #include "../GCM.h" #include "../Program/GLSLCommon.h" +#include "../RSXThread.h" std::string GLFragmentDecompilerThread::getFloatTypeName(usz elementCount) { @@ -44,6 +45,21 @@ void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS) } } + if (properties.multisampled_sampler_mask) + { + // Requires this extension or GLSL 450 + const auto driver_caps = gl::get_driver_caps(); + if (driver_caps.glsl_version.version >= 450) + { + gl_version = 450; + } + else + { + ensure(driver_caps.ARB_shader_texture_image_samples, "MSAA support on OpenGL requires a driver running OpenGL 4.5 or supporting GL_ARB_shader_texture_image_samples."); + required_extensions.push_back("GL_ARB_shader_texture_image_samples"); + } + } + if (m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION) { gl_version = std::max(gl_version, 450); @@ -110,10 +126,14 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) const auto mask = (1 << index); - if (properties.redirected_sampler_mask & mask) + if (properties.multisampled_sampler_mask & mask) { - // Provide a stencil view of the main resource for the S channel - OS << "uniform u" << samplerType << " " << PI.name << "_stencil;\n"; + if (samplerType != "sampler1D" && samplerType != "sampler2D") + { + rsx_log.error("Unexpected multisampled image type '%s'", samplerType); + } + + samplerType = "sampler2DMS"; } else if (properties.shadow_sampler_mask & mask) { @@ -127,6 +147,12 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) } } + if (properties.redirected_sampler_mask & mask) + { + // Provide a stencil view of the main resource for the S channel + OS << "uniform u" << samplerType << " " << PI.name << "_stencil;\n"; + } + OS << "uniform " << samplerType << " " << PI.name << ";\n"; } } @@ -188,11 +214,12 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos); m_shader_props.require_texture_ops = properties.has_tex_op; m_shader_props.require_tex_shadow_ops = properties.shadow_sampler_mask != 0; + m_shader_props.require_msaa_ops = properties.multisampled_sampler_mask != 0; m_shader_props.require_texture_expand = properties.has_exp_tex_op; m_shader_props.require_srgb_to_linear = properties.has_upg; m_shader_props.require_linear_to_srgb = properties.has_pkg; m_shader_props.require_fog_read = properties.in_register_mask & in_fogc; - m_shader_props.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none; + m_shader_props.emulate_coverage_tests = !rsx::get_renderer_backend_config().supports_hw_a2c_1spp; m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare; m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION); m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 8d6c6010d9..6dbd7f919c 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -4,6 +4,7 @@ #include "GLGSRender.h" #include "GLCompute.h" #include "GLDMA.h" +#include "GLResolveHelper.h" #include "Emu/Memory/vm_locking.h" #include "Emu/RSX/rsx_methods.h" @@ -46,10 +47,16 @@ GLGSRender::GLGSRender(utils::serial* ar) noexcept : GSRender(ar) else m_vertex_cache = std::make_unique(); - backend_config.supports_hw_a2c = false; - backend_config.supports_hw_a2one = false; backend_config.supports_multidraw = true; backend_config.supports_normalized_barycentrics = true; + + if (g_cfg.video.antialiasing_level != msaa_level::none) + { + backend_config.supports_hw_msaa = true; + backend_config.supports_hw_a2c = true; + backend_config.supports_hw_a2c_1spp = false; // In OGL A2C is implicitly disabled at 1spp + backend_config.supports_hw_a2one = true; + } } GLGSRender::~GLGSRender() @@ -229,13 +236,13 @@ void GLGSRender::on_init_thread() // Array stream buffer { - m_gl_persistent_stream_buffer = std::make_unique(GL_TEXTURE_BUFFER, 0, 0, 0, 0, GL_R8UI); + m_gl_persistent_stream_buffer = std::make_unique(GL_TEXTURE_BUFFER, 0, 0, 0, 0, 0, GL_R8UI, RSX_FORMAT_CLASS_DONT_CARE); gl_state.bind_texture(GL_STREAM_BUFFER_START + 0, GL_TEXTURE_BUFFER, m_gl_persistent_stream_buffer->id()); } // Register stream buffer { - m_gl_volatile_stream_buffer = std::make_unique(GL_TEXTURE_BUFFER, 0, 0, 0, 0, GL_R8UI); + m_gl_volatile_stream_buffer = std::make_unique(GL_TEXTURE_BUFFER, 0, 0, 0, 0, 0, GL_R8UI, RSX_FORMAT_CLASS_DONT_CARE); gl_state.bind_texture(GL_STREAM_BUFFER_START + 1, GL_TEXTURE_BUFFER, m_gl_volatile_stream_buffer->id()); } @@ -244,19 +251,19 @@ void GLGSRender::on_init_thread() std::array pixeldata = { 0, 0, 0, 0, 0, 0, 0, 0 }; // 1D - auto tex1D = std::make_unique(GL_TEXTURE_1D, 1, 1, 1, 1, GL_RGBA8); + auto tex1D = std::make_unique(GL_TEXTURE_1D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); tex1D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); // 2D - auto tex2D = std::make_unique(GL_TEXTURE_2D, 1, 1, 1, 1, GL_RGBA8); + auto tex2D = std::make_unique(GL_TEXTURE_2D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); tex2D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); // 3D - auto tex3D = std::make_unique(GL_TEXTURE_3D, 1, 1, 1, 1, GL_RGBA8); + auto tex3D = std::make_unique(GL_TEXTURE_3D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); tex3D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); // CUBE - auto texCUBE = std::make_unique(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, GL_RGBA8); + auto texCUBE = std::make_unique(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); texCUBE->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); m_null_textures[GL_TEXTURE_1D] = std::move(tex1D); @@ -423,6 +430,7 @@ void GLGSRender::on_exit() gl::destroy_compute_tasks(); gl::destroy_overlay_passes(); gl::clear_dma_resources(); + gl::clear_resolve_helpers(); gl::destroy_global_texture_resources(); diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.cpp b/rpcs3/Emu/RSX/GL/GLOverlays.cpp index 5a2b8803f4..44fbe3dd3c 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.cpp +++ b/rpcs3/Emu/RSX/GL/GLOverlays.cpp @@ -23,6 +23,8 @@ namespace gl { if (!compiled) { + ensure(!fs_src.empty() && !vs_src.empty(), "Shaders have not been initialized."); + fs.create(::glsl::program_domain::glsl_fragment_program, fs_src); fs.compile(); @@ -34,6 +36,8 @@ namespace gl program_handle.attach(fs); program_handle.link(); + ensure(program_handle.id()); + fbo.create(); m_sampler.create(); @@ -75,7 +79,7 @@ namespace gl } } - void overlay_pass::emit_geometry() + void overlay_pass::emit_geometry(gl::command_context& /*cmd*/) { int old_vao; glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &old_vao); @@ -88,11 +92,7 @@ namespace gl void overlay_pass::run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool enable_blending) { - if (!compiled) - { - rsx_log.error("You must initialize overlay passes with create() before calling run()"); - return; - } + ensure(compiled && program_handle.id() != GL_NONE, "You must initialize overlay passes with create() before calling run()"); GLint viewport[4]; std::unique_ptr save_fbo; @@ -111,6 +111,10 @@ namespace gl fbo.draw_buffer(fbo.no_color); fbo.depth = target_texture; break; + case gl::image_aspect::stencil: + fbo.draw_buffer(fbo.no_color); + fbo.depth_stencil = target_texture; + break; case gl::image_aspect::depth | gl::image_aspect::stencil: fbo.draw_buffer(fbo.no_color); fbo.depth_stencil = target_texture; @@ -176,7 +180,7 @@ namespace gl cmd->use_program(program_handle.id()); on_load(); bind_resources(); - emit_geometry(); + emit_geometry(cmd); glViewport(viewport[0], viewport[1], viewport[2], viewport[3]); @@ -216,7 +220,7 @@ namespace gl gl::texture_view* ui_overlay_renderer::load_simple_image(rsx::overlays::image_info* desc, bool temp_resource, u32 owner_uid) { - auto tex = std::make_unique(GL_TEXTURE_2D, desc->w, desc->h, 1, 1, GL_RGBA8); + auto tex = std::make_unique(GL_TEXTURE_2D, desc->w, desc->h, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); tex->copy_from(desc->get_data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); GLenum remap[] = { GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN }; @@ -301,7 +305,7 @@ namespace gl // Create font file const std::vector glyph_data = font->get_glyph_data(); - auto tex = std::make_unique(GL_TEXTURE_2D_ARRAY, font_size.width, font_size.height, font_size.depth, 1, GL_R8); + auto tex = std::make_unique(GL_TEXTURE_2D_ARRAY, font_size.width, font_size.height, font_size.depth, 1, 1, GL_R8, RSX_FORMAT_CLASS_COLOR); tex->copy_from(glyph_data.data(), gl::texture::format::r, gl::texture::type::ubyte, {}); GLenum remap[] = { GL_RED, GL_RED, GL_RED, GL_RED }; @@ -350,7 +354,7 @@ namespace gl } } - void ui_overlay_renderer::emit_geometry() + void ui_overlay_renderer::emit_geometry(gl::command_context& cmd) { if (m_current_primitive_type == rsx::overlays::primitive_type::quad_list) { @@ -378,7 +382,7 @@ namespace gl } else { - overlay_pass::emit_geometry(); + overlay_pass::emit_geometry(cmd); } } diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h index 92f76160a8..5ad7065fd0 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.h +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -57,7 +57,7 @@ namespace gl m_vertex_data_buffer.data(elements_count * sizeof(T), data); } - virtual void emit_geometry(); + virtual void emit_geometry(gl::command_context& cmd); void run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool enable_blending = false); }; @@ -87,7 +87,7 @@ namespace gl void set_primitive_type(rsx::overlays::primitive_type type); - void emit_geometry() override; + void emit_geometry(gl::command_context& cmd) override; void run(gl::command_context& cmd, const areau& viewport, GLuint target, rsx::overlays::overlay& ui); }; diff --git a/rpcs3/Emu/RSX/GL/GLPresent.cpp b/rpcs3/Emu/RSX/GL/GLPresent.cpp index 3ad9ed1c4d..1eaa3a6193 100644 --- a/rpcs3/Emu/RSX/GL/GLPresent.cpp +++ b/rpcs3/Emu/RSX/GL/GLPresent.cpp @@ -26,7 +26,7 @@ namespace gl { const auto target = static_cast(visual->get_target()); const auto ifmt = static_cast(visual->get_internal_format()); - g_vis_texture.reset(new texture(target, visual->width(), visual->height(), 1, 1, ifmt, visual->format_class())); + g_vis_texture.reset(new texture(target, visual->width(), visual->height(), 1, 1, 1, ifmt, visual->format_class())); glCopyImageSubData(visual->id(), target, 0, 0, 0, 0, g_vis_texture->id(), target, 0, 0, 0, 0, visual->width(), visual->height(), 1); } } @@ -115,7 +115,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons { if (!flip_image || flip_image->size2D() != sizeu{ info->width, info->height }) { - flip_image = std::make_unique(GL_TEXTURE_2D, info->width, info->height, 1, 1, expected_format); + flip_image = std::make_unique(GL_TEXTURE_2D, info->width, info->height, 1, 1, 1, expected_format, RSX_FORMAT_CLASS_COLOR); } }; @@ -402,6 +402,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) : 0; rsx::overlays::set_debug_overlay_text(fmt::format( + "Internal Resolution: %s\n" "RSX Load: %3d%%\n" "draw calls: %16d\n" "draw call setup: %11dus\n" @@ -413,6 +414,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) "Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)\n" "Texture uploads: %11u (%u from CPU - %02u%%, %u copies avoided)\n" "Vertex cache hits: %9u/%u (%u%%)", + info.stats.framebuffer_stats.to_string(!backend_config.supports_hw_msaa), get_load(), info.stats.draw_calls, info.stats.setup_time, info.stats.vertex_upload_time, info.stats.textures_upload_time, info.stats.draw_exec_time, num_dirty_textures, texture_memory_size, num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate, diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 20da00944f..783701e4dd 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -259,6 +259,13 @@ OPENGL_PROC(PFNGLTEXSTORAGE1DPROC, TexStorage1D); OPENGL_PROC(PFNGLTEXSTORAGE2DPROC, TexStorage2D); OPENGL_PROC(PFNGLTEXSTORAGE3DPROC, TexStorage3D); +// ARB_texture_multisample +OPENGL_PROC(PFNGLTEXSTORAGE2DMULTISAMPLEPROC, TexStorage2DMultisample); +OPENGL_PROC(PFNGLTEXSTORAGE3DMULTISAMPLEPROC, TexStorage3DMultisample); +OPENGL_PROC(PFNGLSAMPLEMASKIPROC, SampleMaski); +OPENGL_PROC(PFNGLMINSAMPLESHADINGPROC, MinSampleShading); +OPENGL_PROC(PFNGLSAMPLECOVERAGEPROC, SampleCoverage); + // Texture_View OPENGL_PROC(PFNGLTEXTUREVIEWPROC, TextureView); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index e703dd31dd..87e5ee5645 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "GLGSRender.h" +#include "GLResolveHelper.h" #include "Emu/RSX/rsx_methods.h" #include @@ -417,15 +418,16 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool /* } // Render target helpers -void gl::render_target::clear_memory(gl::command_context& cmd) +void gl::render_target::clear_memory(gl::command_context& cmd, gl::texture* surface) { + auto dst = surface ? surface : this; if (aspect() & gl::image_aspect::depth) { - gl::g_hw_blitter->fast_clear_image(cmd, this, 1.f, 255); + gl::g_hw_blitter->fast_clear_image(cmd, dst, 1.f, 255); } else { - gl::g_hw_blitter->fast_clear_image(cmd, this, {}); + gl::g_hw_blitter->fast_clear_image(cmd, dst, {}); } state_flags &= ~rsx::surface_state_flags::erase_bkgnd; @@ -449,18 +451,26 @@ void gl::render_target::load_memory(gl::command_context& cmd) } else { - auto tmp = std::make_unique(GL_TEXTURE_2D, subres.width_in_block, subres.height_in_block, 1, 1, static_cast(get_internal_format()), format_class()); + auto tmp = std::make_unique(GL_TEXTURE_2D, subres.width_in_block, subres.height_in_block, 1, 1, 1, static_cast(get_internal_format()), format_class()); + auto dst = samples() > 1 ? get_resolve_target_safe(cmd) : this; gl::upload_texture(cmd, tmp.get(), get_gcm_format(), is_swizzled, { subres }); - gl::g_hw_blitter->scale_image(cmd, tmp.get(), this, + gl::g_hw_blitter->scale_image(cmd, tmp.get(), dst, { 0, 0, subres.width_in_block, subres.height_in_block }, { 0, 0, static_cast(width()), static_cast(height()) }, !is_depth_surface(), {}); + + if (samples() > 1) + { + msaa_flags = rsx::surface_state_flags::require_unresolve; + } } + + state_flags &= ~rsx::surface_state_flags::erase_bkgnd; } -void gl::render_target::initialize_memory(gl::command_context& cmd, rsx::surface_access /*access*/) +void gl::render_target::initialize_memory(gl::command_context& cmd, rsx::surface_access access) { const bool memory_load = is_depth_surface() ? !!g_cfg.video.read_depth_buffer : @@ -469,6 +479,14 @@ void gl::render_target::initialize_memory(gl::command_context& cmd, rsx::surface if (!memory_load) { clear_memory(cmd); + + if (samples() > 1 && access.is_transfer_or_read()) + { + // Only clear the resolve surface if reading from it, otherwise it's a waste + clear_memory(cmd, get_resolve_target_safe(cmd)); + } + + msaa_flags = rsx::surface_state_flags::ready; } else { @@ -476,8 +494,28 @@ void gl::render_target::initialize_memory(gl::command_context& cmd, rsx::surface } } +gl::viewable_image* gl::render_target::get_surface(rsx::surface_access access_type) +{ + if (samples() == 1 || !access_type.is_transfer()) + { + return this; + } + + // A read barrier should have been called before this! + ensure(resolve_surface, "Read access without explicit barrier"); + ensure(!(msaa_flags & rsx::surface_state_flags::require_resolve)); + return static_cast(resolve_surface.get()); +} + void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_access access) { + if (access == rsx::surface_access::gpu_reference) + { + // In OpenGL, resources are always assumed to be visible to the GPU. + // We don't manage memory spilling, so just return. + return; + } + const bool read_access = access.is_read(); const bool is_depth = is_depth_surface(); const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers; @@ -504,12 +542,33 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_ac on_write(); } + if (msaa_flags & rsx::surface_state_flags::require_resolve) + { + if (access.is_transfer()) + { + // Only do this step when read access is required + get_resolve_target_safe(cmd); + resolve(cmd); + } + } + else if (msaa_flags & rsx::surface_state_flags::require_unresolve) + { + if (access == rsx::surface_access::shader_write) + { + // Only do this step when it is needed to start rendering + ensure(resolve_surface); + unresolve(cmd); + } + } + return; } + auto dst_img = (samples() > 1) ? get_resolve_target_safe(cmd) : this; const bool dst_is_depth = !!(aspect() & gl::image_aspect::depth); const auto dst_bpp = get_bpp(); unsigned first = prepare_rw_barrier_for_transfer(this); + bool optimize_copy = true; u64 newest_tag = 0; for (auto i = first; i < old_contents.size(); ++i) @@ -519,6 +578,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_ac const auto src_bpp = src_texture->get_bpp(); rsx::typeless_xfer typeless_info{}; + src_texture->memory_barrier(cmd, rsx::surface_access::transfer_read); + if (get_internal_format() == src_texture->get_internal_format()) { // Copy data from old contents onto this one @@ -538,29 +599,106 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_ac } section.init_transfer(this); + auto src_area = section.src_rect(); + auto dst_area = section.dst_rect(); - if (state_flags & rsx::surface_state_flags::erase_bkgnd) + if (g_cfg.video.antialiasing_level != msaa_level::none) { - const auto area = section.dst_rect(); - if (area.x1 > 0 || area.y1 > 0 || unsigned(area.x2) < width() || unsigned(area.y2) < height()) - { - initialize_memory(cmd, access); - } - else - { - state_flags &= ~rsx::surface_state_flags::erase_bkgnd; - } + src_texture->transform_pixels_to_samples(src_area); + this->transform_pixels_to_samples(dst_area); } - gl::g_hw_blitter->scale_image(cmd, section.source, this, - section.src_rect(), - section.dst_rect(), + bool memory_load = true; + if (dst_area.x1 == 0 && dst_area.y1 == 0 && + unsigned(dst_area.x2) == dst_img->width() && unsigned(dst_area.y2) == dst_img->height()) + { + // Skip a bunch of useless work + state_flags &= ~(rsx::surface_state_flags::erase_bkgnd); + msaa_flags = rsx::surface_state_flags::ready; + + memory_load = false; + stencil_init_flags = src_texture->stencil_init_flags; + } + else if (state_flags & rsx::surface_state_flags::erase_bkgnd) + { + // Might introduce MSAA flags + initialize_memory(cmd, rsx::surface_access::memory_write); + ensure(state_flags == rsx::surface_state_flags::ready); + } + + if (msaa_flags & rsx::surface_state_flags::require_resolve) + { + // Need to forward resolve this + resolve(cmd); + } + + if (src_texture->samples() > 1) + { + // Ensure a readable surface exists for the source + src_texture->get_resolve_target_safe(cmd); + } + + gl::g_hw_blitter->scale_image( + cmd, + src_texture->get_surface(rsx::surface_access::transfer_read), + this->get_surface(rsx::surface_access::transfer_write), + src_area, + dst_area, !dst_is_depth, typeless_info); + optimize_copy = optimize_copy && !memory_load; newest_tag = src_texture->last_use_tag; } - // Memory has been transferred, discard old contents and update memory flags - // TODO: Preserve memory outside surface clip region - on_write(newest_tag); + if (!newest_tag) [[unlikely]] + { + // Underlying memory has been modified and we could not find valid data to fill it + clear_rw_barrier(); + + state_flags |= rsx::surface_state_flags::erase_bkgnd; + initialize_memory(cmd, access); + ensure(state_flags == rsx::surface_state_flags::ready); + } + + // NOTE: Optimize flag relates to stencil resolve/unresolve for NVIDIA. + on_write_copy(newest_tag, optimize_copy); + + if (access == rsx::surface_access::shader_write && samples() > 1) + { + // Write barrier, must initialize + unresolve(cmd); + } +} + +// MSAA support +gl::viewable_image* gl::render_target::get_resolve_target_safe(gl::command_context& /*cmd*/) +{ + if (!resolve_surface) + { + // Create a resolve surface + const auto resolve_w = width() * samples_x; + const auto resolve_h = height() * samples_y; + + resolve_surface.reset(new gl::viewable_image( + GL_TEXTURE_2D, + resolve_w, resolve_h, + 1, 1, 1, + static_cast(get_internal_format()), + format_class() + )); + } + + return static_cast(resolve_surface.get()); +} + +void gl::render_target::resolve(gl::command_context& cmd) +{ + gl::resolve_image(cmd, get_resolve_target_safe(cmd), this); + msaa_flags &= ~(rsx::surface_state_flags::require_resolve); +} + +void gl::render_target::unresolve(gl::command_context& cmd) +{ + gl::unresolve_image(cmd, this, get_resolve_target_safe(cmd)); + msaa_flags &= ~(rsx::surface_state_flags::require_unresolve); } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 20cefe206f..3d573db52c 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -49,13 +49,21 @@ namespace gl { class render_target : public viewable_image, public rsx::render_target_descriptor { - void clear_memory(gl::command_context& cmd); + void clear_memory(gl::command_context& cmd, gl::texture* surface = nullptr); void load_memory(gl::command_context& cmd); void initialize_memory(gl::command_context& cmd, rsx::surface_access access); + // MSAA support: + // Get the linear resolve target bound to this surface. Initialize if none exists + gl::viewable_image* get_resolve_target_safe(gl::command_context& cmd); + // Resolve the planar MSAA data into a linear block + void resolve(gl::command_context& cmd); + // Unresolve the linear data into planar MSAA data + void unresolve(gl::command_context& cmd); + public: - render_target(GLuint width, GLuint height, GLenum sized_format, rsx::format_class format_class) - : viewable_image(GL_TEXTURE_2D, width, height, 1, 1, sized_format, format_class) + render_target(GLuint width, GLuint height, GLubyte samples, GLenum sized_format, rsx::format_class format_class) + : viewable_image(GL_TEXTURE_2D, width, height, 1, 1, samples, sized_format, format_class) {} // Internal pitch is the actual row length in bytes of the openGL texture @@ -81,11 +89,7 @@ namespace gl return !!(aspect() & gl::image_aspect::depth); } - viewable_image* get_surface(rsx::surface_access /*access_type*/) override - { - // TODO - return static_cast(this); - } + viewable_image* get_surface(rsx::surface_access /*access_type*/) override; u32 raw_handle() const { @@ -141,7 +145,20 @@ struct gl_render_target_traits auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); const auto [width_, height_] = rsx::apply_resolution_scale(static_cast(width), static_cast(height)); - std::unique_ptr result(new gl::render_target(width_, height_, + u8 samples; + rsx::surface_sample_layout sample_layout; + if (g_cfg.video.antialiasing_level == msaa_level::_auto) + { + samples = get_format_sample_count(antialias); + sample_layout = rsx::surface_sample_layout::ps3; + } + else + { + samples = 1; + sample_layout = rsx::surface_sample_layout::null; + } + + std::unique_ptr result(new gl::render_target(width_, height_, samples, static_cast(format.internal_format), RSX_FORMAT_CLASS_COLOR)); result->set_aa_mode(antialias); @@ -154,6 +171,7 @@ struct gl_render_target_traits result->memory_usage_flags = rsx::surface_usage_flags::attachment; result->state_flags = rsx::surface_state_flags::erase_bkgnd; + result->sample_layout = sample_layout; result->queue_tag(address); result->add_ref(); return result; @@ -170,7 +188,20 @@ struct gl_render_target_traits auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format); const auto [width_, height_] = rsx::apply_resolution_scale(static_cast(width), static_cast(height)); - std::unique_ptr result(new gl::render_target(width_, height_, + u8 samples; + rsx::surface_sample_layout sample_layout; + if (g_cfg.video.antialiasing_level == msaa_level::_auto) + { + samples = get_format_sample_count(antialias); + sample_layout = rsx::surface_sample_layout::ps3; + } + else + { + samples = 1; + sample_layout = rsx::surface_sample_layout::null; + } + + std::unique_ptr result(new gl::render_target(width_, height_, samples, static_cast(format.internal_format), rsx::classify_format(surface_depth_format))); result->set_aa_mode(antialias); @@ -183,6 +214,7 @@ struct gl_render_target_traits result->memory_usage_flags = rsx::surface_usage_flags::attachment; result->state_flags = rsx::surface_state_flags::erase_bkgnd; + result->sample_layout = sample_layout; result->queue_tag(address); result->add_ref(); return result; @@ -200,7 +232,7 @@ struct gl_render_target_traits const auto [new_w, new_h] = rsx::apply_resolution_scale(prev.width, prev.height, ref->get_surface_width(), ref->get_surface_height()); - sink = std::make_unique(new_w, new_h, internal_format, ref->format_class()); + sink = std::make_unique(new_w, new_h, ref->samples(), internal_format, ref->format_class()); sink->add_ref(); sink->memory_usage_flags = rsx::surface_usage_flags::storage; @@ -255,8 +287,9 @@ struct gl_render_target_traits } static - void prepare_surface_for_drawing(gl::command_context&, gl::render_target* surface) + void prepare_surface_for_drawing(gl::command_context& cmd, gl::render_target* surface) { + surface->memory_barrier(cmd, rsx::surface_access::gpu_reference); surface->memory_usage_flags |= rsx::surface_usage_flags::attachment; } diff --git a/rpcs3/Emu/RSX/GL/GLResolveHelper.cpp b/rpcs3/Emu/RSX/GL/GLResolveHelper.cpp new file mode 100644 index 0000000000..52cdc39c77 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLResolveHelper.cpp @@ -0,0 +1,391 @@ +#include "stdafx.h" +#include "GLResolveHelper.h" +#include "GLTexture.h" + +#include +#include + +namespace gl +{ + std::unordered_map> g_resolve_helpers; + std::unordered_map> g_unresolve_helpers; + std::unordered_map> g_depth_resolvers; + std::unordered_map> g_depth_unresolvers; + + void clear_resolve_helpers() + { + g_resolve_helpers.clear(); + g_unresolve_helpers.clear(); + g_depth_resolvers.clear(); + g_depth_unresolvers.clear(); + } + + static const char* get_format_string(gl::texture::internal_format format) + { + switch (format) + { + case texture::internal_format::rgb565: + return "r16"; + case texture::internal_format::rgba8: + case texture::internal_format::bgra8: + return "rgba8"; + case texture::internal_format::rgba16f: + return "rgba16f"; + case texture::internal_format::rgba32f: + return "rgba32f"; + case texture::internal_format::bgr5a1: + return "r16"; + case texture::internal_format::r8: + return "r8"; + case texture::internal_format::rg8: + return "rg8"; + case texture::internal_format::r32f: + return "r32f"; + default: + fmt::throw_exception("Unhandled internal format 0x%x", u32(format)); + } + } + + void resolve_image(gl::command_context& cmd, gl::viewable_image* dst, gl::viewable_image* src) + { + ensure(src->samples() > 1 && dst->samples() == 1); + + if (src->aspect() == gl::image_aspect::color) [[ likely ]] + { + auto& job = g_resolve_helpers[src->get_internal_format()]; + if (!job) + { + const auto fmt = get_format_string(src->get_internal_format()); + job.reset(new cs_resolve_task(fmt)); + } + + job->run(cmd, src, dst); + return; + } + + auto get_resolver_pass = [](GLuint aspect_bits) -> std::unique_ptr& + { + auto& pass = g_depth_resolvers[aspect_bits]; + if (!pass) + { + ds_resolve_pass_base* ptr = nullptr; + switch (aspect_bits) + { + case gl::image_aspect::depth: + ptr = new depth_only_resolver(); + break; + case gl::image_aspect::stencil: + ptr = new stencil_only_resolver(); + break; + case (gl::image_aspect::depth | gl::image_aspect::stencil): + ptr = new depth_stencil_resolver(); + break; + default: + fmt::throw_exception("Unreachable"); + } + + pass.reset(ptr); + } + + return pass; + }; + + if (src->aspect() == (gl::image_aspect::depth | gl::image_aspect::stencil) && + !gl::get_driver_caps().ARB_shader_stencil_export_supported) + { + // Special case, NVIDIA-only fallback + auto& depth_pass = get_resolver_pass(gl::image_aspect::depth); + depth_pass->run(cmd, src, dst); + + auto& stencil_pass = get_resolver_pass(gl::image_aspect::stencil); + stencil_pass->run(cmd, src, dst); + return; + } + + auto& pass = get_resolver_pass(src->aspect()); + pass->run(cmd, src, dst); + } + + void unresolve_image(gl::command_context& cmd, gl::viewable_image* dst, gl::viewable_image* src) + { + ensure(dst->samples() > 1 && src->samples() == 1); + + if (src->aspect() == gl::image_aspect::color) [[ likely ]] + { + auto& job = g_unresolve_helpers[src->get_internal_format()]; + if (!job) + { + const auto fmt = get_format_string(src->get_internal_format()); + job.reset(new cs_unresolve_task(fmt)); + } + + job->run(cmd, dst, src); + return; + } + + auto get_unresolver_pass = [](GLuint aspect_bits) -> std::unique_ptr& + { + auto& pass = g_depth_unresolvers[aspect_bits]; + if (!pass) + { + ds_resolve_pass_base* ptr = nullptr; + switch (aspect_bits) + { + case gl::image_aspect::depth: + ptr = new depth_only_unresolver(); + break; + case gl::image_aspect::stencil: + ptr = new stencil_only_unresolver(); + break; + case (gl::image_aspect::depth | gl::image_aspect::stencil): + ptr = new depth_stencil_unresolver(); + break; + default: + fmt::throw_exception("Unreachable"); + } + + pass.reset(ptr); + } + + return pass; + }; + + if (src->aspect() == (gl::image_aspect::depth | gl::image_aspect::stencil) && + !gl::get_driver_caps().ARB_shader_stencil_export_supported) + { + // Special case, NVIDIA-only fallback + auto& depth_pass = get_unresolver_pass(gl::image_aspect::depth); + depth_pass->run(cmd, dst, src); + + auto& stencil_pass = get_unresolver_pass(gl::image_aspect::stencil); + stencil_pass->run(cmd, dst, src); + return; + } + + auto& pass = get_unresolver_pass(src->aspect()); + pass->run(cmd, dst, src); + } + + // Implementation + + void cs_resolve_base::build(const std::string& format_prefix, bool unresolve) + { + is_unresolve = unresolve; + + switch (optimal_group_size) + { + default: + case 64: + cs_wave_x = 8; + cs_wave_y = 8; + break; + case 32: + cs_wave_x = 8; + cs_wave_y = 4; + break; + } + + static const char* resolve_kernel = + #include "Emu/RSX/Program/MSAA/ColorResolvePass.glsl" + ; + + static const char* unresolve_kernel = + #include "Emu/RSX/Program/MSAA/ColorUnresolvePass.glsl" + ; + + const std::pair syntax_replace[] = + { + { "%WORKGROUP_SIZE_X", std::to_string(cs_wave_x) }, + { "%WORKGROUP_SIZE_Y", std::to_string(cs_wave_y) }, + { "%IMAGE_FORMAT", format_prefix }, + { "%BGRA_SWAP", "0" } + }; + + m_src = unresolve ? unresolve_kernel : resolve_kernel; + m_src = fmt::replace_all(m_src, syntax_replace); + + rsx_log.notice("Resolve shader:\n%s", m_src); + + create(); + } + + void cs_resolve_base::bind_resources() + { + auto msaa_view = multisampled->get_view(rsx::default_remap_vector.with_encoding(GL_REMAP_VIEW_MULTISAMPLED)); + auto resolved_view = resolve->get_view(rsx::default_remap_vector.with_encoding(GL_REMAP_IDENTITY)); + + glBindImageTexture(GL_COMPUTE_IMAGE_SLOT(0), msaa_view->id(), 0, GL_FALSE, 0, is_unresolve ? GL_WRITE_ONLY : GL_READ_ONLY, msaa_view->view_format()); + glBindImageTexture(GL_COMPUTE_IMAGE_SLOT(1), resolved_view->id(), 0, GL_FALSE, 0, is_unresolve ? GL_READ_ONLY : GL_WRITE_ONLY, resolved_view->view_format()); + } + + void cs_resolve_base::run(gl::command_context& cmd, gl::viewable_image* msaa_image, gl::viewable_image* resolve_image) + { + ensure(msaa_image->samples() > 1); + ensure(resolve_image->samples() == 1); + + multisampled = msaa_image; + resolve = resolve_image; + + const u32 invocations_x = utils::align(resolve_image->width(), cs_wave_x) / cs_wave_x; + const u32 invocations_y = utils::align(resolve_image->height(), cs_wave_y) / cs_wave_y; + + compute_task::run(cmd, invocations_x, invocations_y); + } + + void ds_resolve_pass_base::build(bool depth, bool stencil, bool unresolve) + { + m_config.resolve_depth = depth; + m_config.resolve_stencil = stencil; + m_config.is_unresolve = unresolve; + + vs_src = +#include "Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl" + ; + + static const char* depth_resolver = +#include "Emu/RSX/Program/MSAA/DepthResolvePass.glsl" + ; + + static const char* depth_unresolver = +#include "Emu/RSX/Program/MSAA/DepthUnresolvePass.glsl" + ; + + static const char* stencil_resolver = +#include "Emu/RSX/Program/MSAA/StencilResolvePass.glsl" + ; + + static const char* stencil_unresolver = +#include "Emu/RSX/Program/MSAA/StencilUnresolvePass.glsl" + ; + + static const char* depth_stencil_resolver = +#include "Emu/RSX/Program/MSAA/DepthStencilResolvePass.glsl" + ; + + static const char* depth_stencil_unresolver = +#include "Emu/RSX/Program/MSAA/DepthStencilUnresolvePass.glsl" + ; + + if (m_config.resolve_depth && m_config.resolve_stencil) + { + fs_src = m_config.is_unresolve ? depth_stencil_unresolver : depth_stencil_resolver; + m_write_aspect_mask = gl::image_aspect::depth | gl::image_aspect::stencil; + } + else if (m_config.resolve_depth) + { + fs_src = m_config.is_unresolve ? depth_unresolver : depth_resolver; + m_write_aspect_mask = gl::image_aspect::depth; + } + else if (m_config.resolve_stencil) + { + fs_src = m_config.is_unresolve ? stencil_unresolver : stencil_resolver; + m_write_aspect_mask = gl::image_aspect::stencil; + } + + enable_depth_writes = m_config.resolve_depth; + enable_stencil_writes = m_config.resolve_stencil; + + + create(); + + rsx_log.notice("Resolve shader:\n%s", fs_src); + } + + void ds_resolve_pass_base::update_config() + { + ensure(multisampled && multisampled->samples() > 1); + switch (multisampled->samples()) + { + case 2: + m_config.sample_count.x = 2; + m_config.sample_count.y = 1; + break; + case 4: + m_config.sample_count.x = m_config.sample_count.y = 2; + break; + default: + fmt::throw_exception("Unsupported sample count %d", multisampled->samples()); + } + + program_handle.uniforms["sample_count"] = m_config.sample_count; + } + + void ds_resolve_pass_base::run(gl::command_context& cmd, gl::viewable_image* msaa_image, gl::viewable_image* resolve_image) + { + multisampled = msaa_image; + resolve = resolve_image; + update_config(); + + const auto read_resource = m_config.is_unresolve ? resolve_image : msaa_image; + const auto write_resource = m_config.is_unresolve ? msaa_image : resolve_image; + + // Resource binding + std::stack bind_slots; + std::vector> saved_sampler_states; + auto allocate_slot = [&]() -> int + { + ensure(!bind_slots.empty()); + const int slot = bind_slots.top(); + bind_slots.pop(); + saved_sampler_states.emplace_back(std::make_unique(slot, m_sampler)); + return slot; + }; + + // Reserve 2 slots max + bind_slots.push(GL_TEMP_IMAGE_SLOT(1)); + bind_slots.push(GL_TEMP_IMAGE_SLOT(0)); + + if (m_config.resolve_depth) + { + const int bind_slot = allocate_slot(); + cmd->bind_texture(bind_slot, static_cast(read_resource->get_target()), read_resource->id(), GL_TRUE); + } + + if (m_config.resolve_stencil) + { + const int bind_slot = allocate_slot(); + auto stencil_view = read_resource->get_view(rsx::default_remap_vector.with_encoding(gl::GL_REMAP_IDENTITY), gl::image_aspect::stencil); + cmd->bind_texture(bind_slot, static_cast(read_resource->get_target()), stencil_view->id(), GL_TRUE); + } + + areau viewport{}; + viewport.x2 = write_resource->width(); + viewport.y2 = write_resource->height(); + overlay_pass::run(cmd, viewport, write_resource->id(), m_write_aspect_mask, false); + } + + void stencil_only_resolver_base::emit_geometry(gl::command_context& cmd) + { + // Modified version of the base overlay pass to emit 8 draws instead of 1 + int old_vao; + glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &old_vao); + m_vao.bind(); + + // Clear the target + gl::clear_cmd_info clear_info + { + .aspect_mask = gl::image_aspect::stencil, + .clear_stencil = { + .mask = 0xFF, + .value = 0 + } + }; + gl::clear_attachments(cmd, clear_info); + + // Override stencil settings. Always pass, reference is all one, compare mask doesn't matter. + // For each pass the write mask will be overriden to commit output bitwise + cmd->stencil_func(GL_ALWAYS, 0xFF, 0xFF); + cmd->stencil_op(GL_REPLACE, GL_REPLACE, GL_REPLACE); + + // Start our inner loop + for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1) + { + program_handle.uniforms["stencil_mask"] = write_mask; + cmd->stencil_mask(write_mask); + + glDrawArrays(primitives, 0, num_drawable_elements); + } + + glBindVertexArray(old_vao); + } +} diff --git a/rpcs3/Emu/RSX/GL/GLResolveHelper.h b/rpcs3/Emu/RSX/GL/GLResolveHelper.h new file mode 100644 index 0000000000..8799efbf25 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLResolveHelper.h @@ -0,0 +1,129 @@ +#pragma once + +#include "GLCompute.h" +#include "GLOverlays.h" + +namespace gl +{ + void resolve_image(gl::command_context& cmd, gl::viewable_image* dst, gl::viewable_image* src); + void unresolve_image(gl::command_context& cmd, gl::viewable_image* dst, gl::viewable_image* src); + void clear_resolve_helpers(); + + struct cs_resolve_base : compute_task + { + gl::viewable_image* multisampled = nullptr; + gl::viewable_image* resolve = nullptr; + bool is_unresolve = false; + + u32 cs_wave_x = 1; + u32 cs_wave_y = 1; + + cs_resolve_base() + {} + + virtual ~cs_resolve_base() + {} + + void build(const std::string& format_prefix, bool unresolve); + + void bind_resources() override; + + void run(gl::command_context& cmd, gl::viewable_image* msaa_image, gl::viewable_image* resolve_image); + }; + + struct cs_resolve_task : cs_resolve_base + { + cs_resolve_task(const std::string& format_prefix) + { + build(format_prefix, false); + } + }; + + struct cs_unresolve_task : cs_resolve_base + { + cs_unresolve_task(const std::string& format_prefix) + { + build(format_prefix, true); + } + }; + + struct ds_resolve_pass_base : overlay_pass + { + gl::viewable_image* multisampled = nullptr; + gl::viewable_image* resolve = nullptr; + + struct + { + bool resolve_depth = false; + bool resolve_stencil = false; + bool is_unresolve = false; + color2i sample_count; + } m_config; + + void build(bool depth, bool stencil, bool unresolve); + + void update_config(); + + void run(gl::command_context& cmd, gl::viewable_image* msaa_image, gl::viewable_image* resolve_image); + }; + + struct depth_only_resolver : ds_resolve_pass_base + { + depth_only_resolver() + { + build(true, false, false); + } + }; + + struct depth_only_unresolver : ds_resolve_pass_base + { + depth_only_unresolver() + { + build(true, false, true); + } + }; + + struct stencil_only_resolver_base : ds_resolve_pass_base + { + virtual ~stencil_only_resolver_base() = default; + + void build(bool is_unresolver) + { + ds_resolve_pass_base::build(false, true, is_unresolver); + } + + void emit_geometry(gl::command_context& cmd) override; + }; + + struct stencil_only_resolver : stencil_only_resolver_base + { + stencil_only_resolver() + { + build(false); + } + }; + + struct stencil_only_unresolver : stencil_only_resolver_base + { + stencil_only_unresolver() + { + build(true); + } + }; + + struct depth_stencil_resolver : ds_resolve_pass_base + { + depth_stencil_resolver() + { + build(true, true, false); + } + }; + + struct depth_stencil_unresolver : ds_resolve_pass_base + { + depth_stencil_unresolver() + { + build(true, true, true); + } + }; +} diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 3847ca0e3a..0bc4e3cb97 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -429,7 +429,7 @@ namespace gl image_region.height *= dst_region.depth; scratch = std::make_unique( GL_TEXTURE_2D, - image_region.x + image_region.width, image_region.y + image_region.height, 1, 1, + image_region.x + image_region.width, image_region.y + image_region.height, 1, 1, 1, static_cast(dst->get_internal_format()), dst->format_class()); scratch_view = std::make_unique(scratch.get()); @@ -445,7 +445,7 @@ namespace gl { scratch = std::make_unique( GL_TEXTURE_2D, - image_region.x + image_region.width, 1, 1, 1, + image_region.x + image_region.width, 1, 1, 1, 1, static_cast(dst->get_internal_format()), dst->format_class()); scratch_view = std::make_unique(scratch.get()); @@ -576,7 +576,7 @@ namespace gl const GLenum internal_format = get_sized_internal_format(gcm_format); const auto format_class = rsx::classify_format(gcm_format); - return new gl::viewable_image(target, width, height, depth, mipmaps, internal_format, format_class); + return new gl::viewable_image(target, width, height, depth, mipmaps, 1, internal_format, format_class); } void fill_texture(gl::command_context& cmd, texture* dst, int format, diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp index 7da909ad97..a3a06de4f5 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp @@ -149,7 +149,7 @@ namespace gl if (!dst) { - std::unique_ptr data = std::make_unique(dst_target, width, height, depth, mipmaps, sized_internal_fmt, rsx::classify_format(gcm_format)); + std::unique_ptr data = std::make_unique(dst_target, width, height, depth, mipmaps, 1, sized_internal_fmt, rsx::classify_format(gcm_format)); dst = data.get(); dst->properties_encoding = match_key; m_temporary_surfaces.emplace_back(std::move(data)); @@ -223,7 +223,12 @@ namespace gl { const auto src_bpp = slice.src->pitch() / slice.src->width(); const u16 convert_w = u16(slice.src->width() * src_bpp) / dst_bpp; - tmp = std::make_unique(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, static_cast(dst_image->get_internal_format()), dst_image->format_class()); + tmp = std::make_unique( + GL_TEXTURE_2D, + convert_w, slice.src->height(), + 1, 1, 1, + static_cast(dst_image->get_internal_format()), + dst_image->format_class()); src_image = tmp.get(); @@ -264,9 +269,17 @@ namespace gl const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h }; gl::texture* _dst = dst_image; - if (src_image->get_internal_format() != dst_image->get_internal_format() || slice.level != 0 || slice.dst_z != 0) [[ unlikely ]] + if (src_image->get_internal_format() != dst_image->get_internal_format() || + slice.level != 0 || + slice.dst_z != 0) [[ unlikely ]] { - tmp = std::make_unique(GL_TEXTURE_2D, dst_rect.x2, dst_rect.y2, 1, 1, static_cast(slice.src->get_internal_format())); + tmp = std::make_unique( + GL_TEXTURE_2D, + dst_rect.x2, dst_rect.y2, + 1, 1, 1, + static_cast(slice.src->get_internal_format()), + slice.src->format_class()); + _dst = tmp.get(); } diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index fbb9b27a04..7348fd5936 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -262,28 +262,24 @@ namespace gl baseclass::on_miss(); } + gl::texture* target_texture = vram_texture; + u32 transfer_width = width; + u32 transfer_height = height; + if (context == rsx::texture_upload_context::framebuffer_storage) { - auto as_rtt = static_cast(vram_texture); - if (as_rtt->dirty()) as_rtt->read_barrier(cmd); + auto surface = gl::as_rtt(vram_texture); + surface->memory_barrier(cmd, rsx::surface_access::transfer_read); + target_texture = surface->get_surface(rsx::surface_access::transfer_read); + transfer_width *= surface->samples_x; + transfer_height *= surface->samples_y; } - gl::texture* target_texture = vram_texture; if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) || (vram_texture->pitch() != rsx_pitch)) { - u32 real_width = width; - u32 real_height = height; - - if (context == rsx::texture_upload_context::framebuffer_storage) - { - auto surface = gl::as_rtt(vram_texture); - real_width *= surface->samples_x; - real_height *= surface->samples_y; - } - areai src_area = { 0, 0, 0, 0 }; - const areai dst_area = { 0, 0, static_cast(real_width), static_cast(real_height) }; + const areai dst_area = { 0, 0, static_cast(transfer_width), static_cast(transfer_height) }; auto ifmt = vram_texture->get_internal_format(); src_area.x2 = vram_texture->width(); @@ -294,22 +290,22 @@ namespace gl if (scaled_texture) { auto sfmt = scaled_texture->get_internal_format(); - if (scaled_texture->width() != real_width || - scaled_texture->height() != real_height || + if (scaled_texture->width() != transfer_width || + scaled_texture->height() != transfer_height || sfmt != ifmt) { - //Discard current scaled texture + // Discard current scaled texture scaled_texture.reset(); } } if (!scaled_texture) { - scaled_texture = std::make_unique(GL_TEXTURE_2D, real_width, real_height, 1, 1, static_cast(ifmt)); + scaled_texture = std::make_unique(GL_TEXTURE_2D, transfer_width, transfer_height, 1, 1, 1, static_cast(ifmt), vram_texture->format_class()); } const bool linear_interp = is_depth_texture() ? false : true; - g_hw_blitter->scale_image(cmd, vram_texture, scaled_texture.get(), src_area, dst_area, linear_interp, {}); + g_hw_blitter->scale_image(cmd, target_texture, scaled_texture.get(), src_area, dst_area, linear_interp, {}); target_texture = scaled_texture.get(); } } diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index fce7786b30..23156c87e8 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -73,7 +73,31 @@ void GLVertexDecompilerThread::insertConstants(std::stringstream& OS, const std: continue; } - OS << "uniform " << PT.type << " " << PI.name << ";\n"; + auto type = PT.type; + + if (PT.type == "sampler2D" || + PT.type == "samplerCube" || + PT.type == "sampler1D" || + PT.type == "sampler3D") + { + if (m_prog.texture_state.multisampled_textures) [[ unlikely ]] + { + ensure(PI.name.length() > 3); + int index = atoi(&PI.name[3]); + + if (m_prog.texture_state.multisampled_textures & (1 << index)) + { + if (type != "sampler1D" && type != "sampler2D") + { + rsx_log.error("Unexpected multisampled sampler type '%s'", type); + } + + type = "sampler2DMS"; + } + } + } + + OS << "uniform " << type << " " << PI.name << ";\n"; } } } diff --git a/rpcs3/Emu/RSX/GL/glutils/blitter.cpp b/rpcs3/Emu/RSX/GL/glutils/blitter.cpp index e5a36d11cf..e7ca554e0e 100644 --- a/rpcs3/Emu/RSX/GL/glutils/blitter.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/blitter.cpp @@ -67,7 +67,7 @@ namespace gl if (static_cast(internal_fmt) != src->get_internal_format()) { const u16 internal_width = static_cast(src->width() * xfer_info.src_scaling_hint); - typeless_src = std::make_unique(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, internal_fmt); + typeless_src = std::make_unique(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, 1, internal_fmt, RSX_FORMAT_CLASS_DONT_CARE); copy_typeless(cmd, typeless_src.get(), src); real_src = typeless_src.get(); @@ -85,7 +85,7 @@ namespace gl if (static_cast(internal_fmt) != dst->get_internal_format()) { const auto internal_width = static_cast(dst->width() * xfer_info.dst_scaling_hint); - typeless_dst = std::make_unique(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, internal_fmt); + typeless_dst = std::make_unique(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, 1, internal_fmt, RSX_FORMAT_CLASS_DONT_CARE); copy_typeless(cmd, typeless_dst.get(), dst); real_dst = typeless_dst.get(); diff --git a/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp b/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp index f19ab3332b..0c8de71786 100644 --- a/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp @@ -33,7 +33,7 @@ namespace gl void capabilities::initialize() { - int find_count = 17; + int find_count = 18; int ext_count = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count); @@ -171,6 +171,13 @@ namespace gl find_count--; continue; } + + if (check(ext_name, "GL_ARB_shader_texture_image_samples")) + { + ARB_shader_texture_image_samples = true; + find_count--; + continue; + } } // Set GLSL version diff --git a/rpcs3/Emu/RSX/GL/glutils/capabilities.h b/rpcs3/Emu/RSX/GL/glutils/capabilities.h index c6abfaf3b6..801a426d80 100644 --- a/rpcs3/Emu/RSX/GL/glutils/capabilities.h +++ b/rpcs3/Emu/RSX/GL/glutils/capabilities.h @@ -40,6 +40,7 @@ namespace gl bool ARB_compute_shader_supported = false; bool NV_depth_buffer_float_supported = false; bool NV_fragment_shader_barycentric_supported = false; + bool ARB_shader_texture_image_samples = false; bool vendor_INTEL = false; // has broken GLSL compiler bool vendor_AMD = false; // has broken ARB_multidraw diff --git a/rpcs3/Emu/RSX/GL/glutils/fbo.h b/rpcs3/Emu/RSX/GL/glutils/fbo.h index 9837c425cd..ef71bfa469 100644 --- a/rpcs3/Emu/RSX/GL/glutils/fbo.h +++ b/rpcs3/Emu/RSX/GL/glutils/fbo.h @@ -125,7 +125,9 @@ namespace gl void operator = (const texture& rhs) { - ensure(rhs.get_target() == texture::target::texture2D); + ensure(rhs.get_target() == texture::target::texture2D || + rhs.get_target() == texture::target::texture2DMS); + m_parent.m_resource_bindings[m_id] = rhs.id(); DSA_CALL2(NamedFramebufferTexture, m_parent.id(), m_id, rhs.id(), 0); } diff --git a/rpcs3/Emu/RSX/GL/glutils/image.cpp b/rpcs3/Emu/RSX/GL/glutils/image.cpp index 7876cf6dc3..0e25301afe 100644 --- a/rpcs3/Emu/RSX/GL/glutils/image.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/image.cpp @@ -19,8 +19,24 @@ namespace gl } } - texture::texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLenum sized_format, rsx::format_class format_class) + texture::texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLubyte samples, GLenum sized_format, rsx::format_class format_class) { + // Upgrade targets for MSAA + if (samples > 1) + { + switch (target) + { + case GL_TEXTURE_2D: + target = GL_TEXTURE_2D_MULTISAMPLE; + break; + case GL_TEXTURE_2D_ARRAY: + target = GL_TEXTURE_2D_MULTISAMPLE_ARRAY; + break; + default: + fmt::throw_exception("MSAA is only supported on 2D images. Target=0x%x", target); + } + } + glGenTextures(1, &m_id); // Must bind to initialize the new texture @@ -40,30 +56,45 @@ namespace gl glTexStorage2D(target, mipmaps, storage_fmt, width, height); depth = 1; break; + case GL_TEXTURE_2D_MULTISAMPLE: + ensure(mipmaps == 1); + glTexStorage2DMultisample(target, samples, storage_fmt, width, height, GL_TRUE); + depth = 1; + break; case GL_TEXTURE_3D: case GL_TEXTURE_2D_ARRAY: glTexStorage3D(target, mipmaps, storage_fmt, width, height, depth); break; + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + ensure(mipmaps == 1); + glTexStorage3DMultisample(target, samples, storage_fmt, width, height, depth, GL_TRUE); + break; case GL_TEXTURE_BUFFER: break; } if (target != GL_TEXTURE_BUFFER) { - glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_REPEAT); - glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_REPEAT); - glTexParameteri(target, GL_TEXTURE_WRAP_R, GL_REPEAT); - glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1); + if (samples == 1) + { + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(target, GL_TEXTURE_WRAP_R, GL_REPEAT); + glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1); + } m_width = width; m_height = height; m_depth = depth; m_mipmaps = mipmaps; + m_samples = samples; m_aspect_flags = image_aspect::color; + ensure(width > 0 && height > 0 && depth > 0 && mipmaps > 0 && samples > 0, "Invalid OpenGL texture definition."); + switch (storage_fmt) { case GL_DEPTH_COMPONENT16: @@ -146,6 +177,8 @@ namespace gl void texture::copy_from(const void* src, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings) { + ensure(m_samples <= 1, "Transfer operations are unsupported on multisampled textures."); + pixel_settings.apply(); switch (const auto target_ = static_cast(m_target)) @@ -190,6 +223,8 @@ namespace gl void texture::copy_from(buffer& buf, u32 gl_format_type, u32 offset, u32 length) { + ensure(m_samples <= 1, "Transfer operations are unsupported on multisampled textures."); + if (get_target() != target::textureBuffer) fmt::throw_exception("OpenGL error: texture cannot copy from buffer"); @@ -203,6 +238,8 @@ namespace gl void texture::copy_to(void* dst, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const { + ensure(m_samples <= 1, "Transfer operations are unsupported on multisampled textures."); + pixel_settings.apply(); const auto& caps = get_driver_caps(); @@ -223,7 +260,7 @@ namespace gl { // Worst case scenario. For some reason, EXT_dsa does not have glGetTextureSubImage const auto target_ = static_cast(m_target); - texture tmp{ target_, region.width, region.height, region.depth, 1, static_cast(m_internal_format) }; + texture tmp{ target_, region.width, region.height, region.depth, 1, 1, static_cast(m_internal_format), m_format_class }; glCopyImageSubData(m_id, target_, level, region.x, region.y, region.z, tmp.id(), target_, 0, 0, 0, 0, region.width, region.height, region.depth); diff --git a/rpcs3/Emu/RSX/GL/glutils/image.h b/rpcs3/Emu/RSX/GL/glutils/image.h index 791762d558..896c3ee088 100644 --- a/rpcs3/Emu/RSX/GL/glutils/image.h +++ b/rpcs3/Emu/RSX/GL/glutils/image.h @@ -45,7 +45,8 @@ namespace gl enum remap_constants : u32 { GL_REMAP_IDENTITY = 0xCAFEBABE, - GL_REMAP_BGRA = 0x0000AA6C + GL_REMAP_BGRA = 0x0000AA6C, + GL_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF }; struct subresource_range @@ -174,7 +175,8 @@ namespace gl texture3D = GL_TEXTURE_3D, textureCUBE = GL_TEXTURE_CUBE_MAP, textureBuffer = GL_TEXTURE_BUFFER, - texture2DArray = GL_TEXTURE_2D_ARRAY + texture2DArray = GL_TEXTURE_2D_ARRAY, + texture2DMS = GL_TEXTURE_2D_MULTISAMPLE }; protected: @@ -183,6 +185,7 @@ namespace gl GLuint m_height = 0; GLuint m_depth = 0; GLuint m_mipmaps = 0; + GLubyte m_samples = 0; GLuint m_pitch = 0; GLuint m_compressed = GL_FALSE; GLuint m_aspect_flags = 0; @@ -197,7 +200,7 @@ namespace gl texture(const texture&) = delete; texture(texture&& texture_) = delete; - texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLenum sized_format, rsx::format_class format_class = rsx::RSX_FORMAT_CLASS_UNDEFINED); + texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLubyte samples, GLenum sized_format, rsx::format_class format_class); virtual ~texture(); // Getters/setters @@ -276,9 +279,9 @@ namespace gl return m_pitch; } - constexpr GLubyte samples() const + GLubyte samples() const { - return 1; + return m_samples; } GLboolean compressed() const diff --git a/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp b/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp index 8020b45936..ffc4fe38f1 100644 --- a/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp +++ b/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp @@ -309,6 +309,32 @@ namespace gl } } + void sample_mask(GLbitfield mask) + { + if (!test_and_set_property(GL_SAMPLE_MASK_VALUE, mask)) + { + glSampleMaski(0, mask); + } + } + + void sample_coverage(GLclampf coverage) + { + const u32 value = std::bit_cast(coverage); + if (!test_and_set_property(GL_SAMPLE_COVERAGE_VALUE, value)) + { + glSampleCoverage(coverage, GL_FALSE); + } + } + + void min_sample_shading_rate(GLclampf rate) + { + const u32 value = std::bit_cast(rate); + if (!test_and_set_property(GL_MIN_SAMPLE_SHADING_VALUE, value)) + { + glMinSampleShading(rate); + } + } + void clip_planes(GLuint mask) { if (!test_and_set_property(CLIP_PLANES, mask)) diff --git a/rpcs3/Emu/RSX/GL/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/GL/upscalers/fsr1/fsr_pass.cpp index adf25b3f12..940de27d89 100644 --- a/rpcs3/Emu/RSX/GL/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/GL/upscalers/fsr1/fsr_pass.cpp @@ -177,7 +177,7 @@ namespace gl { return std::make_unique( GL_TEXTURE_2D, - output_w, output_h, 1, 1, + output_w, output_h, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); }; diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl index 0428dc16c4..b57ad58cbc 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl @@ -1,5 +1,7 @@ R"( #version 420 +#extension GL_ARB_separate_shader_objects: enable + layout(location=0) out vec2 tc0; #ifdef VULKAN diff --git a/rpcs3/Emu/RSX/Program/MSAA/ColorResolvePass.glsl b/rpcs3/Emu/RSX/Program/MSAA/ColorResolvePass.glsl new file mode 100644 index 0000000000..d2e1c25b60 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/MSAA/ColorResolvePass.glsl @@ -0,0 +1,37 @@ +R"( +#version 430 + +layout(local_size_x=%WORKGROUP_SIZE_X, local_size_y=%WORKGROUP_SIZE_Y, local_size_z=1) in; + +#ifdef VULKAN +layout(set=0, binding=0, %IMAGE_FORMAT) uniform readonly restrict image2DMS multisampled; +layout(set=0, binding=1) uniform writeonly restrict image2D resolve; +#else +layout(binding=0, %IMAGE_FORMAT) uniform readonly restrict image2DMS multisampled; +layout(binding=1) uniform writeonly restrict image2D resolve; +#endif + +#if %BGRA_SWAP +#define shuffle(x) (x.bgra) +#else +#define shuffle(x) (x) +#endif + +void main() +{ + ivec2 resolve_size = imageSize(resolve); + ivec2 aa_size = imageSize(multisampled); + ivec2 sample_count = resolve_size / aa_size; + + if (any(greaterThanEqual(gl_GlobalInvocationID.xy, uvec2(resolve_size)))) return; + + ivec2 resolve_coords = ivec2(gl_GlobalInvocationID.xy); + ivec2 aa_coords = resolve_coords / sample_count; + ivec2 sample_loc = ivec2(resolve_coords % sample_count); + int sample_index = sample_loc.x + (sample_loc.y * sample_count.y); + + vec4 aa_sample = imageLoad(multisampled, aa_coords, sample_index); + imageStore(resolve, resolve_coords, shuffle(aa_sample)); +} + +)" diff --git a/rpcs3/Emu/RSX/Program/MSAA/ColorUnresolvePass.glsl b/rpcs3/Emu/RSX/Program/MSAA/ColorUnresolvePass.glsl new file mode 100644 index 0000000000..56ed52b402 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/MSAA/ColorUnresolvePass.glsl @@ -0,0 +1,37 @@ +R"( +#version 430 + +layout(local_size_x=%WORKGROUP_SIZE_X, local_size_y=%WORKGROUP_SIZE_Y, local_size_z=1) in; + +#ifdef VULKAN +layout(set=0, binding=0) uniform writeonly restrict image2DMS multisampled; +layout(set=0, binding=1, %IMAGE_FORMAT) uniform readonly restrict image2D resolve; +#else +layout(binding=0) uniform writeonly restrict image2DMS multisampled; +layout(binding=1, %IMAGE_FORMAT) uniform readonly restrict image2D resolve; +#endif + +#if %BGRA_SWAP +#define shuffle(x) (x.bgra) +#else +#define shuffle(x) (x) +#endif + +void main() +{ + ivec2 resolve_size = imageSize(resolve); + ivec2 aa_size = imageSize(multisampled); + ivec2 sample_count = resolve_size / aa_size; + + if (any(greaterThanEqual(gl_GlobalInvocationID.xy, uvec2(resolve_size)))) return; + + ivec2 resolve_coords = ivec2(gl_GlobalInvocationID.xy); + ivec2 aa_coords = resolve_coords / sample_count; + ivec2 sample_loc = ivec2(resolve_coords % sample_count); + int sample_index = sample_loc.x + (sample_loc.y * sample_count.y); + + vec4 resolved_sample = imageLoad(resolve, resolve_coords); + imageStore(multisampled, aa_coords, sample_index, shuffle(resolved_sample)); +} + +)" diff --git a/rpcs3/Emu/RSX/Program/MSAA/DepthResolvePass.glsl b/rpcs3/Emu/RSX/Program/MSAA/DepthResolvePass.glsl new file mode 100644 index 0000000000..cc957aa0ac --- /dev/null +++ b/rpcs3/Emu/RSX/Program/MSAA/DepthResolvePass.glsl @@ -0,0 +1,23 @@ +R"( +#version 420 +#extension GL_ARB_separate_shader_objects: enable + +#ifdef VULKAN +layout(set=0, binding=0) uniform sampler2DMS fs0; +layout(push_constant) uniform static_data { ivec2 sample_count; }; +#else +layout(binding=31) uniform sampler2DMS fs0; +uniform ivec2 sample_count; +#endif + +void main() +{ + ivec2 out_coord = ivec2(gl_FragCoord.xy); + ivec2 in_coord = (out_coord / sample_count.xy); + ivec2 sample_loc = out_coord % sample_count.xy; + int sample_index = sample_loc.x + (sample_loc.y * sample_count.y); + float frag_depth = texelFetch(fs0, in_coord, sample_index).x; + gl_FragDepth = frag_depth; +} + +)" diff --git a/rpcs3/Emu/RSX/Program/MSAA/DepthStencilResolvePass.glsl b/rpcs3/Emu/RSX/Program/MSAA/DepthStencilResolvePass.glsl new file mode 100644 index 0000000000..c547cc46b8 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/MSAA/DepthStencilResolvePass.glsl @@ -0,0 +1,28 @@ +R"( +#version 420 +#extension GL_ARB_separate_shader_objects: enable +#extension GL_ARB_shader_stencil_export : enable + +#ifdef VULKAN +layout(set=0, binding=0) uniform sampler2DMS fs0; +layout(set=0, binding=1) uniform usampler2DMS fs1; +layout(push_constant) uniform static_data { ivec2 sample_count; }; +#else +layout(binding=31) uniform sampler2DMS fs0; +layout(binding=30) uniform usampler2DMS fs1; +uniform ivec2 sample_count; +#endif + +void main() +{ + ivec2 out_coord = ivec2(gl_FragCoord.xy); + ivec2 in_coord = (out_coord / sample_count.xy); + ivec2 sample_loc = out_coord % ivec2(sample_count.xy); + int sample_index = sample_loc.x + (sample_loc.y * sample_count.y); + float frag_depth = texelFetch(fs0, in_coord, sample_index).x; + uint frag_stencil = texelFetch(fs1, in_coord, sample_index).x; + gl_FragDepth = frag_depth; + gl_FragStencilRefARB = int(frag_stencil); +} + +)" diff --git a/rpcs3/Emu/RSX/Program/MSAA/DepthStencilUnresolvePass.glsl b/rpcs3/Emu/RSX/Program/MSAA/DepthStencilUnresolvePass.glsl new file mode 100644 index 0000000000..de10cc351b --- /dev/null +++ b/rpcs3/Emu/RSX/Program/MSAA/DepthStencilUnresolvePass.glsl @@ -0,0 +1,28 @@ +R"( +#version 420 +#extension GL_ARB_separate_shader_objects: enable +#extension GL_ARB_shader_stencil_export : enable + +#ifdef VULKAN +layout(set=0, binding=0) uniform sampler2D fs0; +layout(set=0, binding=1) uniform usampler2D fs1; +layout(push_constant) uniform static_data { ivec2 sample_count; }; +#else +layout(binding=31) uniform sampler2D fs0; +layout(binding=30) uniform usampler2D fs1; +uniform ivec2 sample_count; +#endif + +void main() +{ + ivec2 pixel_coord = ivec2(gl_FragCoord.xy); + pixel_coord *= sample_count.xy; + pixel_coord.x += (gl_SampleID % sample_count.x); + pixel_coord.y += (gl_SampleID / sample_count.x); + float frag_depth = texelFetch(fs0, pixel_coord, 0).x; + uint frag_stencil = texelFetch(fs1, pixel_coord, 0).x; + gl_FragDepth = frag_depth; + gl_FragStencilRefARB = int(frag_stencil); +} + +)" diff --git a/rpcs3/Emu/RSX/Program/MSAA/DepthUnresolvePass.glsl b/rpcs3/Emu/RSX/Program/MSAA/DepthUnresolvePass.glsl new file mode 100644 index 0000000000..c4e5ee2f63 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/MSAA/DepthUnresolvePass.glsl @@ -0,0 +1,23 @@ +R"( +#version 420 +#extension GL_ARB_separate_shader_objects: enable + +#ifdef VULKAN +layout(set=0, binding=0) uniform sampler2D fs0; +layout(push_constant) uniform static_data { ivec2 sample_count; }; +#else +layout(binding=31) uniform sampler2D fs0; +uniform ivec2 sample_count; +#endif + +void main() +{ + ivec2 pixel_coord = ivec2(gl_FragCoord.xy); + pixel_coord *= sample_count.xy; + pixel_coord.x += (gl_SampleID % sample_count.x); + pixel_coord.y += (gl_SampleID / sample_count.x); + float frag_depth = texelFetch(fs0, pixel_coord, 0).x; + gl_FragDepth = frag_depth; +} + +)" diff --git a/rpcs3/Emu/RSX/Program/MSAA/StencilResolvePass.glsl b/rpcs3/Emu/RSX/Program/MSAA/StencilResolvePass.glsl new file mode 100644 index 0000000000..5d954d7a5e --- /dev/null +++ b/rpcs3/Emu/RSX/Program/MSAA/StencilResolvePass.glsl @@ -0,0 +1,28 @@ +R"( +#version 420 +#extension GL_ARB_separate_shader_objects: enable + +#ifdef VULKAN +layout(set=0, binding=0) uniform usampler2DMS fs0; +layout(push_constant) uniform static_data +{ + layout(offset=0) ivec2 sample_count; + layout(offset=8) int stencil_mask; +}; +#else +layout(binding=31) uniform usampler2DMS fs0; +uniform ivec2 sample_count; +uniform int stencil_mask; +#endif + +void main() +{ + ivec2 out_coord = ivec2(gl_FragCoord.xy); + ivec2 in_coord = (out_coord / sample_count.xy); + ivec2 sample_loc = out_coord % sample_count.xy; + int sample_index = sample_loc.x + (sample_loc.y * sample_count.y); + uint frag_stencil = texelFetch(fs0, in_coord, sample_index).x; + if ((frag_stencil & uint(stencil_mask)) == 0) discard; +} + +)" diff --git a/rpcs3/Emu/RSX/Program/MSAA/StencilUnresolvePass.glsl b/rpcs3/Emu/RSX/Program/MSAA/StencilUnresolvePass.glsl new file mode 100644 index 0000000000..c3a0f65be1 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/MSAA/StencilUnresolvePass.glsl @@ -0,0 +1,28 @@ +R"( +#version 420 +#extension GL_ARB_separate_shader_objects: enable + +#ifdef VULKAN +layout(set=0, binding=0) uniform usampler2D fs0; +layout(push_constant) uniform static_data +{ + layout(offset=0) ivec2 sample_count; + layout(offset=8) int stencil_mask; +}; +#else +layout(binding=31) uniform usampler2D fs0; +uniform ivec2 sample_count; +uniform int stencil_mask; +#endif + +void main() +{ + ivec2 pixel_coord = ivec2(gl_FragCoord.xy); + pixel_coord *= sample_count.xy; + pixel_coord.x += (gl_SampleID % sample_count.x); + pixel_coord.y += (gl_SampleID / sample_count.x); + uint frag_stencil = texelFetch(fs0, pixel_coord, 0).x; + if ((frag_stencil & uint(stencil_mask)) == 0) discard; +} + +)" diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 703c762ae0..e128203957 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1637,6 +1637,10 @@ namespace rsx layout.aa_factors[0] = aa_factor_u; layout.aa_factors[1] = aa_factor_v; + // Log this to frame stats + m_frame_stats.framebuffer_stats.add(layout.width, layout.height, aa_mode); + + // Check if anything has changed bool really_changed = false; for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 257c70e4a5..4da43908c0 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -87,6 +87,7 @@ namespace rsx { bool supports_multidraw; // Draw call batching bool supports_hw_a2c; // Alpha to coverage + bool supports_hw_a2c_1spp; // Alpha to coverage at 1 sample per pixel bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour bool supports_hw_msaa; // MSAA support bool supports_hw_a2one; // Alpha to one @@ -466,4 +467,9 @@ namespace rsx { return g_fxo->try_get(); } + + inline const backend_configuration& get_renderer_backend_config() + { + return g_fxo->get().get_backend_config(); + } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 573f837707..2305e515e4 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -630,6 +630,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) { backend_config.supports_hw_msaa = true; backend_config.supports_hw_a2c = true; + backend_config.supports_hw_a2c_1spp = true; backend_config.supports_hw_a2one = m_device->get_alpha_to_one_support(); } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 2a31ffa51d..45e5e1476f 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -31,7 +31,7 @@ namespace vk bool g_drv_sanitize_fp_values = false; bool g_drv_disable_fence_reset = false; bool g_drv_emulate_cond_render = false; - bool g_drv_strict_query_scopes = false; + bool g_drv_strict_query_scopes = true; bool g_drv_force_reuse_query_pools = false; u64 g_num_processed_frames = 0; diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.cpp b/rpcs3/Emu/RSX/VK/VKOverlays.cpp index fd7dc1b0cc..3785b18efe 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.cpp +++ b/rpcs3/Emu/RSX/VK/VKOverlays.cpp @@ -47,9 +47,11 @@ namespace vk void overlay_pass::init_descriptors() { - rsx::simple_array descriptor_pool_sizes = + rsx::simple_array descriptor_pool_sizes = {}; + + if (m_num_uniform_buffers) { - { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 } + descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_num_uniform_buffers }); }; if (m_num_usable_samplers) @@ -65,35 +67,38 @@ namespace vk // Reserve descriptor pools m_descriptor_pool.create(*m_device, descriptor_pool_sizes); - const auto num_bindings = 1 + m_num_usable_samplers + m_num_input_attachments; + const auto num_bindings = m_num_uniform_buffers + m_num_usable_samplers + m_num_input_attachments; rsx::simple_array bindings(num_bindings); + u32 binding_slot = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[0].binding = 0; - bindings[0].pImmutableSamplers = nullptr; - - u32 descriptor_index = 1; - for (u32 n = 0; n < m_num_usable_samplers; ++n, ++descriptor_index) + for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding_slot) { - bindings[descriptor_index].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[descriptor_index].descriptorCount = 1; - bindings[descriptor_index].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[descriptor_index].binding = descriptor_index; - bindings[descriptor_index].pImmutableSamplers = nullptr; + bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[binding_slot].descriptorCount = 1; + bindings[binding_slot].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[binding_slot].binding = binding_slot; + bindings[binding_slot].pImmutableSamplers = nullptr; } - for (u32 n = 0; n < m_num_input_attachments; ++n, ++descriptor_index) + for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding_slot) { - bindings[descriptor_index].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; - bindings[descriptor_index].descriptorCount = 1; - bindings[descriptor_index].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[descriptor_index].binding = descriptor_index; - bindings[descriptor_index].pImmutableSamplers = nullptr; + bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[binding_slot].descriptorCount = 1; + bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[binding_slot].binding = binding_slot; + bindings[binding_slot].pImmutableSamplers = nullptr; } - ensure(descriptor_index == num_bindings); + for (u32 n = 0; n < m_num_input_attachments; ++n, ++binding_slot) + { + bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + bindings[binding_slot].descriptorCount = 1; + bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[binding_slot].binding = binding_slot; + bindings[binding_slot].pImmutableSamplers = nullptr; + } + + ensure(binding_slot == num_bindings); m_descriptor_layout = vk::descriptors::create_layout(bindings); VkPipelineLayoutCreateInfo layout_info = {}; @@ -120,9 +125,14 @@ namespace vk std::vector overlay_pass::get_fragment_inputs() { std::vector fs_inputs; - fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, "static_data" }); + u32 binding = 0; + + for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding) + { + const std::string name = std::string("static_data") + (n > 0 ? std::to_string(n) : ""); + fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, name }); + } - u32 binding = 1; for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding) { fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, binding, "fs" + std::to_string(n) }); @@ -231,7 +241,10 @@ namespace vk update_uniforms(cmd, program); - program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set); + if (m_num_uniform_buffers > 0) + { + program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set); + } for (uint n = 0; n < src.size(); ++n) { diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index d1288fb80a..2b11285653 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -52,6 +52,7 @@ namespace vk VkFilter m_sampler_filter = VK_FILTER_LINEAR; u32 m_num_usable_samplers = 1; u32 m_num_input_attachments = 0; + u32 m_num_uniform_buffers = 1; std::unordered_map> m_program_cache; std::unique_ptr m_sampler; diff --git a/rpcs3/Emu/RSX/VK/VKPresent.cpp b/rpcs3/Emu/RSX/VK/VKPresent.cpp index 0c32d9034d..f1bcc214f9 100644 --- a/rpcs3/Emu/RSX/VK/VKPresent.cpp +++ b/rpcs3/Emu/RSX/VK/VKPresent.cpp @@ -831,6 +831,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info) : 0; rsx::overlays::set_debug_overlay_text(fmt::format( + "Internal Resolution: %s\n" "RSX Load: %3d%%\n" "draw calls: %17d\n" "submits: %20d\n" @@ -845,6 +846,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info) "Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)\n" "Texture uploads: %12u (%u from CPU - %02u%%, %u copies avoided)\n" "Vertex cache hits: %10u/%u (%u%%)", + info.stats.framebuffer_stats.to_string(!backend_config.supports_hw_msaa), get_load(), info.stats.draw_calls, info.stats.submit_count, info.stats.setup_time, info.stats.vertex_upload_time, info.stats.textures_upload_time, info.stats.draw_exec_time, info.stats.flip_time, num_dirty_textures, texture_memory_size, tmp_texture_memory_size, diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp index 899c6593e0..d09e248f6c 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp @@ -998,7 +998,7 @@ namespace vk return; } - // Memory transfers + // Memory transfers vk::image* target_image = (samples() > 1) ? get_resolve_target_safe(cmd) : this; vk::blitter hw_blitter; const auto dst_bpp = get_bpp(); diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.cpp b/rpcs3/Emu/RSX/VK/VKResolveHelper.cpp index ca30abfc06..75a013ddfd 100644 --- a/rpcs3/Emu/RSX/VK/VKResolveHelper.cpp +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.cpp @@ -253,4 +253,90 @@ namespace vk if (g_depthstencil_resolver) g_depthstencil_resolver->free_resources(); if (g_depthstencil_unresolver) g_depthstencil_unresolver->free_resources(); } + + + void cs_resolve_base::build(const std::string& format_prefix, bool unresolve, bool bgra_swap) + { + create(); + + switch (optimal_group_size) + { + default: + case 64: + cs_wave_x = 8; + cs_wave_y = 8; + break; + case 32: + cs_wave_x = 8; + cs_wave_y = 4; + break; + } + + static const char* resolve_kernel = + #include "Emu/RSX/Program/MSAA/ColorResolvePass.glsl" + ; + + static const char* unresolve_kernel = + #include "Emu/RSX/Program/MSAA/ColorUnresolvePass.glsl" + ; + + const std::pair syntax_replace[] = + { + { "%WORKGROUP_SIZE_X", std::to_string(cs_wave_x) }, + { "%WORKGROUP_SIZE_Y", std::to_string(cs_wave_y) }, + { "%IMAGE_FORMAT", format_prefix }, + { "%BGRA_SWAP", bgra_swap ? "1" : "0" } + }; + + m_src = unresolve ? unresolve_kernel : resolve_kernel; + m_src = fmt::replace_all(m_src, syntax_replace); + + rsx_log.notice("Compute shader:\n%s", m_src); + } + + void depth_resolve_base::build(bool resolve_depth, bool resolve_stencil, bool is_unresolve) + { + vs_src = + #include "Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl" + ; + + static const char* depth_resolver = + #include "Emu/RSX/Program/MSAA/DepthResolvePass.glsl" + ; + + static const char* depth_unresolver = + #include "Emu/RSX/Program/MSAA/DepthUnresolvePass.glsl" + ; + + static const char* stencil_resolver = + #include "Emu/RSX/Program/MSAA/StencilResolvePass.glsl" + ; + + static const char* stencil_unresolver = + #include "Emu/RSX/Program/MSAA/StencilUnresolvePass.glsl" + ; + + static const char* depth_stencil_resolver = + #include "Emu/RSX/Program/MSAA/DepthStencilResolvePass.glsl" + ; + + static const char* depth_stencil_unresolver = + #include "Emu/RSX/Program/MSAA/DepthStencilUnresolvePass.glsl" + ; + + if (resolve_depth && resolve_stencil) + { + fs_src = is_unresolve ? depth_stencil_unresolver : depth_stencil_resolver; + } + else if (resolve_depth) + { + fs_src = is_unresolve ? depth_unresolver : depth_resolver; + } + else if (resolve_stencil) + { + fs_src = is_unresolve ? stencil_unresolver : stencil_resolver; + } + + rsx_log.notice("Resolve shader:\n%s", fs_src); + } } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.h b/rpcs3/Emu/RSX/VK/VKResolveHelper.h index 6b83a5af9c..57cee06b86 100644 --- a/rpcs3/Emu/RSX/VK/VKResolveHelper.h +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.h @@ -21,70 +21,7 @@ namespace vk virtual ~cs_resolve_base() {} - // FIXME: move body to cpp - void build(const std::string& kernel, const std::string& format_prefix, int direction) - { - create(); - - // TODO: Tweak occupancy - switch (optimal_group_size) - { - default: - case 64: - cs_wave_x = 8; - cs_wave_y = 8; - break; - case 32: - cs_wave_x = 8; - cs_wave_y = 4; - break; - } - - const std::pair syntax_replace[] = - { - { "%wx", std::to_string(cs_wave_x) }, - { "%wy", std::to_string(cs_wave_y) }, - }; - - m_src = - "#version 430\n" - "layout(local_size_x=%wx, local_size_y=%wy, local_size_z=1) in;\n" - "\n"; - - m_src = fmt::replace_all(m_src, syntax_replace); - - if (direction == 0) - { - m_src += - "layout(set=0, binding=0, " + format_prefix + ") uniform readonly restrict image2DMS multisampled;\n" - "layout(set=0, binding=1) uniform writeonly restrict image2D resolve;\n"; - } - else - { - m_src += - "layout(set=0, binding=0) uniform writeonly restrict image2DMS multisampled;\n" - "layout(set=0, binding=1, " + format_prefix + ") uniform readonly restrict image2D resolve;\n"; - } - - m_src += - "\n" - "void main()\n" - "{\n" - " ivec2 resolve_size = imageSize(resolve);\n" - " ivec2 aa_size = imageSize(multisampled);\n" - " ivec2 sample_count = resolve_size / aa_size;\n" - "\n" - " if (any(greaterThanEqual(gl_GlobalInvocationID.xy, uvec2(resolve_size)))) return;" - "\n" - " ivec2 resolve_coords = ivec2(gl_GlobalInvocationID.xy);\n" - " ivec2 aa_coords = resolve_coords / sample_count;\n" - " ivec2 sample_loc = ivec2(resolve_coords % sample_count);\n" - " int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);\n" - + kernel + - "}\n"; - - rsx_log.notice("Compute shader:\n%s", m_src); - } + void build(const std::string& format_prefix, bool unresolve, bool bgra_swap); std::vector> get_descriptor_layout() override { @@ -144,14 +81,8 @@ namespace vk { cs_resolve_task(const std::string& format_prefix, bool bgra_swap = false) { - // Allow rgba->bgra transformation for old GeForce cards - const std::string swizzle = bgra_swap? ".bgra" : ""; - - std::string kernel = - " vec4 aa_sample = imageLoad(multisampled, aa_coords, sample_index);\n" - " imageStore(resolve, resolve_coords, aa_sample" + swizzle + ");\n"; - - build(kernel, format_prefix, 0); + // BGRA-swap flag is a workaround to swap channels for old GeForce cards with broken compute image handling + build(format_prefix, false, bgra_swap); } }; @@ -159,14 +90,8 @@ namespace vk { cs_unresolve_task(const std::string& format_prefix, bool bgra_swap = false) { - // Allow rgba->bgra transformation for old GeForce cards - const std::string swizzle = bgra_swap? ".bgra" : ""; - - std::string kernel = - " vec4 resolved_sample = imageLoad(resolve, resolve_coords);\n" - " imageStore(multisampled, aa_coords, sample_index, resolved_sample" + swizzle + ");\n"; - - build(kernel, format_prefix, 1); + // BGRA-swap flag is a workaround to swap channels for old GeForce cards with broken compute image handling + build(format_prefix, true, bgra_swap); } }; @@ -184,43 +109,12 @@ namespace vk // Depth-stencil buffers are almost never filterable, and we do not need it here (1:1 mapping) m_sampler_filter = VK_FILTER_NEAREST; + + // Do not use UBOs + m_num_uniform_buffers = 0; } - void build(const std::string& kernel, const std::string& extensions, const std::vector& inputs) - { - vs_src = - "#version 450\n" - "#extension GL_ARB_separate_shader_objects : enable\n\n" - "\n" - "void main()\n" - "{\n" - " vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n" - " gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n" - "}\n"; - - fs_src = - "#version 420\n" - "#extension GL_ARB_separate_shader_objects : enable\n"; - fs_src += extensions + - "\n" - "layout(push_constant) uniform static_data{ ivec" + std::to_string(static_parameters_width) + " regs[1]; };\n"; - - int binding = 1; - for (const auto& input : inputs) - { - fs_src += "layout(set=0, binding=" + std::to_string(binding++) + ") uniform " + input + ";\n"; - } - - fs_src += - "//layout(pixel_center_integer) in vec4 gl_FragCoord;\n" - "\n" - "void main()\n" - "{\n"; - fs_src += kernel + - "}\n"; - - rsx_log.notice("Resolve shader:\n%s", fs_src); - } + void build(bool resolve_depth, bool resolve_stencil, bool unresolve); std::vector get_push_constants() override { @@ -263,15 +157,7 @@ namespace vk { depthonly_resolve() { - build( - " ivec2 out_coord = ivec2(gl_FragCoord.xy);\n" - " ivec2 in_coord = (out_coord / regs[0].xy);\n" - " ivec2 sample_loc = out_coord % regs[0].xy;\n" - " int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n" - " float frag_depth = texelFetch(fs0, in_coord, sample_index).x;\n" - " gl_FragDepth = frag_depth;\n", - "", - { "sampler2DMS fs0" }); + build(true, false, false); } void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass) @@ -291,15 +177,7 @@ namespace vk { depthonly_unresolve() { - build( - " ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n" - " pixel_coord *= regs[0].xy;\n" - " pixel_coord.x += (gl_SampleID % regs[0].x);\n" - " pixel_coord.y += (gl_SampleID / regs[0].x);\n" - " float frag_depth = texelFetch(fs0, pixel_coord, 0).x;\n" - " gl_FragDepth = frag_depth;\n", - "", - { "sampler2D fs0" }); + build(true, false, true); } void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass) @@ -340,15 +218,7 @@ namespace vk static_parameters_width = 3; - build( - " ivec2 out_coord = ivec2(gl_FragCoord.xy);\n" - " ivec2 in_coord = (out_coord / regs[0].xy);\n" - " ivec2 sample_loc = out_coord % regs[0].xy;\n" - " int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n" - " uint frag_stencil = texelFetch(fs0, in_coord, sample_index).x;\n" - " if ((frag_stencil & uint(regs[0].z)) == 0) discard;\n", - "", - {"usampler2DMS fs0"}); + build(false, true, false); } void get_dynamic_state_entries(std::vector& state_descriptors) override @@ -407,15 +277,7 @@ namespace vk static_parameters_width = 3; - build( - " ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n" - " pixel_coord *= regs[0].xy;\n" - " pixel_coord.x += (gl_SampleID % regs[0].x);\n" - " pixel_coord.y += (gl_SampleID / regs[0].x);\n" - " uint frag_stencil = texelFetch(fs0, pixel_coord, 0).x;\n" - " if ((frag_stencil & uint(regs[0].z)) == 0) discard;\n", - "", - { "usampler2D fs0" }); + build(false, true, false); } void get_dynamic_state_entries(std::vector& state_descriptors) override @@ -468,19 +330,7 @@ namespace vk renderpass_config.set_stencil_mask(0xFF); m_num_usable_samplers = 2; - build( - " ivec2 out_coord = ivec2(gl_FragCoord.xy);\n" - " ivec2 in_coord = (out_coord / regs[0].xy);\n" - " ivec2 sample_loc = out_coord % ivec2(regs[0].xy);\n" - " int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n" - " float frag_depth = texelFetch(fs0, in_coord, sample_index).x;\n" - " uint frag_stencil = texelFetch(fs1, in_coord, sample_index).x;\n" - " gl_FragDepth = frag_depth;\n" - " gl_FragStencilRefARB = int(frag_stencil);\n", - - "#extension GL_ARB_shader_stencil_export : enable\n", - - { "sampler2DMS fs0", "usampler2DMS fs1" }); + build(true, true, false); } void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass) @@ -510,19 +360,7 @@ namespace vk renderpass_config.set_stencil_mask(0xFF); m_num_usable_samplers = 2; - build( - " ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n" - " pixel_coord *= regs[0].xy;\n" - " pixel_coord.x += (gl_SampleID % regs[0].x);\n" - " pixel_coord.y += (gl_SampleID / regs[0].x);\n" - " float frag_depth = texelFetch(fs0, pixel_coord, 0).x;\n" - " uint frag_stencil = texelFetch(fs1, pixel_coord, 0).x;\n" - " gl_FragDepth = frag_depth;\n" - " gl_FragStencilRefARB = int(frag_stencil);\n", - - "#extension GL_ARB_shader_stencil_export : enable\n", - - { "sampler2D fs0", "usampler2D fs1" }); + build(true, true, true); } void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass) diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index aa326982f6..9a4471b784 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -752,7 +752,25 @@ namespace vk device_fault_info.pNext = const_cast(device.pNext); device_fault_info.deviceFault = VK_TRUE; device_fault_info.deviceFaultVendorBinary = VK_FALSE; - device_fault_info.pNext = &device_fault_info; + device.pNext = &device_fault_info; + } + + VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_info{}; + if (pgpu->optional_features_support.conditional_rendering) + { + conditional_rendering_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; + conditional_rendering_info.pNext = const_cast(device.pNext); + conditional_rendering_info.conditionalRendering = VK_TRUE; + device.pNext = &conditional_rendering_info; + } + + VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR shader_barycentric_info{}; + if (pgpu->optional_features_support.barycentric_coords) + { + shader_barycentric_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR; + shader_barycentric_info.pNext = const_cast(device.pNext); + shader_barycentric_info.fragmentShaderBarycentric = VK_TRUE; + device.pNext = &shader_barycentric_info; } if (auto error = vkCreateDevice(*pgpu, &device, nullptr, &dev)) diff --git a/rpcs3/Emu/RSX/VK/vkutils/shared.cpp b/rpcs3/Emu/RSX/VK/vkutils/shared.cpp index 16bc0066ae..b63bf89ec5 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/shared.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/shared.cpp @@ -29,37 +29,24 @@ namespace vk std::vector vendor_binary_data; std::string fault_description; -#ifdef _MSC_VER - __try + // Retrieve sizes + g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, nullptr); + + // Resize arrays and fill + address_info.resize(fault_counts.addressInfoCount); + vendor_info.resize(fault_counts.vendorInfoCount); + vendor_binary_data.resize(fault_counts.vendorBinarySize); + + VkDeviceFaultInfoEXT fault_info { -#endif - // Retrieve sizes - g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, nullptr); - - // Resize arrays and fill - address_info.resize(fault_counts.addressInfoCount); - vendor_info.resize(fault_counts.vendorInfoCount); - vendor_binary_data.resize(fault_counts.vendorBinarySize); - - VkDeviceFaultInfoEXT fault_info - { - .sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT, - .pAddressInfos = address_info.data(), - .pVendorInfos = vendor_info.data(), - .pVendorBinaryData = vendor_binary_data.data() - }; - g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, &fault_info); - - fault_description = fault_info.description; -#ifdef _MSC_VER - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - rsx_log.error("Driver crashed retrieving extended crash information. Are you running on an NVIDIA card?"); - return "Extended fault information is not available. The driver crashed when retrieving the details."; - } -#endif + .sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT, + .pAddressInfos = address_info.data(), + .pVendorInfos = vendor_info.data(), + .pVendorBinaryData = vendor_binary_data.data() + }; + g_render_device->_vkGetDeviceFaultInfoEXT(*g_render_device, &fault_counts, &fault_info); + fault_description = fault_info.description; std::string fault_message = fmt::format( "Device Fault Information:\n" "Fault Summary:\n" diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 013888ead4..dbaffc5d00 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -60,6 +60,7 @@ + @@ -95,6 +96,7 @@ + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index 28670f9bb5..4866b14643 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -48,6 +48,7 @@ upscalers\fsr1 + @@ -120,6 +121,7 @@ upscalers + diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index e946326e05..07ca501ff3 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -112,6 +112,7 @@ + @@ -1030,6 +1031,14 @@ + + + + + + + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 23c7c34fb6..cf3626a7f5 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -130,6 +130,9 @@ {caf84300-5c45-4340-bd9a-8ac859409351} + + {ce6d6b90-8313-4273-b46c-d92bd450c002} + @@ -1219,6 +1222,9 @@ Emu\GPU\RSX\Core + + Emu\GPU\RSX\Core + Crypto @@ -2802,5 +2808,29 @@ Emu\CPU\Backends\AArch64 + + Emu\GPU\RSX\Program\MSAA + + + Emu\GPU\RSX\Program\MSAA + + + Emu\GPU\RSX\Program\MSAA + + + Emu\GPU\RSX\Program\MSAA + + + Emu\GPU\RSX\Program\MSAA + + + Emu\GPU\RSX\Program\MSAA + + + Emu\GPU\RSX\Program\MSAA + + + Emu\GPU\RSX\Program\MSAA + \ No newline at end of file diff --git a/rpcs3/rpcs3qt/render_creator.cpp b/rpcs3/rpcs3qt/render_creator.cpp index c939c729fb..8e16e66d95 100644 --- a/rpcs3/rpcs3qt/render_creator.cpp +++ b/rpcs3/rpcs3qt/render_creator.cpp @@ -98,7 +98,7 @@ render_creator::render_creator(QObject *parent) : QObject(parent) #endif // Graphics Adapter - Vulkan = render_info(vulkan_adapters, supports_vulkan, emu_settings_type::VulkanAdapter, true); + Vulkan = render_info(vulkan_adapters, supports_vulkan, emu_settings_type::VulkanAdapter); OpenGL = render_info(); NullRender = render_info(); diff --git a/rpcs3/rpcs3qt/render_creator.h b/rpcs3/rpcs3qt/render_creator.h index 8203f09bb8..c433c77353 100644 --- a/rpcs3/rpcs3qt/render_creator.h +++ b/rpcs3/rpcs3qt/render_creator.h @@ -23,16 +23,14 @@ public: emu_settings_type type = emu_settings_type::VulkanAdapter; bool supported = true; bool has_adapters = true; - bool has_msaa = false; render_info() : has_adapters(false) {} - render_info(QStringList adapters, bool supported, emu_settings_type type, bool has_msaa) + render_info(QStringList adapters, bool supported, emu_settings_type type) : adapters(std::move(adapters)) , type(type) - , supported(supported) - , has_msaa(has_msaa) {} + , supported(supported) {} }; bool abort_requested = false; diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index 12c62c4dbf..02bc57a3aa 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -838,12 +838,6 @@ settings_dialog::settings_dialog(std::shared_ptr gui_settings, std } } - // Enable/disable MSAA depending on renderer - ui->antiAliasing->setEnabled(renderer.has_msaa); - ui->antiAliasing->blockSignals(true); - ui->antiAliasing->setCurrentText(renderer.has_msaa ? qstr(m_emu_settings->GetSetting(emu_settings_type::MSAA)) : tr("Disabled", "MSAA")); - ui->antiAliasing->blockSignals(false); - ui->graphicsAdapterBox->clear(); // Fill combobox with placeholder if no adapters needed @@ -1070,7 +1064,7 @@ settings_dialog::settings_dialog(std::shared_ptr gui_settings, std get_audio_output_devices(false); change_audio_output_device(0); // Set device to 'Default' }); - + m_emu_settings->EnhanceComboBox(ui->combo_audio_channel_layout, emu_settings_type::AudioChannelLayout); SubscribeTooltip(ui->gb_audio_channel_layout, tooltips.settings.audio_channel_layout); @@ -1512,7 +1506,7 @@ settings_dialog::settings_dialog(std::shared_ptr gui_settings, std m_emu_settings->SetSetting(emu_settings_type::PSNCountry, country_code.toString().toStdString()); }); - + SubscribeTooltip(ui->gb_psnCountryBox, tooltips.settings.psn_country); if (!game)