mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 19:45:20 +00:00
rsx/gl: Add basic interpreter support to OGL
- Adds basic interpreter functionality. - Flow control and other instructions not yet implemented.
This commit is contained in:
parent
65e9e568b5
commit
0072df7f20
15 changed files with 1721 additions and 33 deletions
553
rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl
Normal file
553
rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl
Normal file
|
@ -0,0 +1,553 @@
|
|||
R"(
|
||||
layout(location=0) out vec4 ocol0;
|
||||
layout(location=1) out vec4 ocol1;
|
||||
layout(location=2) out vec4 ocol2;
|
||||
layout(location=3) out vec4 ocol3;
|
||||
|
||||
#define RSX_FP_OPCODE_NOP 0x00 // No-Operation
|
||||
#define RSX_FP_OPCODE_MOV 0x01 // Move
|
||||
#define RSX_FP_OPCODE_MUL 0x02 // Multiply
|
||||
#define RSX_FP_OPCODE_ADD 0x03 // Add
|
||||
#define RSX_FP_OPCODE_MAD 0x04 // Multiply-Add
|
||||
#define RSX_FP_OPCODE_DP3 0x05 // 3-component Dot Product
|
||||
#define RSX_FP_OPCODE_DP4 0x06 // 4-component Dot Product
|
||||
#define RSX_FP_OPCODE_DST 0x07 // Distance
|
||||
#define RSX_FP_OPCODE_MIN 0x08 // Minimum
|
||||
#define RSX_FP_OPCODE_MAX 0x09 // Maximum
|
||||
#define RSX_FP_OPCODE_SLT 0x0A // Set-If-LessThan
|
||||
#define RSX_FP_OPCODE_SGE 0x0B // Set-If-GreaterEqual
|
||||
#define RSX_FP_OPCODE_SLE 0x0C // Set-If-LessEqual
|
||||
#define RSX_FP_OPCODE_SGT 0x0D // Set-If-GreaterThan
|
||||
#define RSX_FP_OPCODE_SNE 0x0E // Set-If-NotEqual
|
||||
#define RSX_FP_OPCODE_SEQ 0x0F // Set-If-Equal
|
||||
#define RSX_FP_OPCODE_FRC 0x10 // Fraction (fract)
|
||||
#define RSX_FP_OPCODE_FLR 0x11 // Floor
|
||||
#define RSX_FP_OPCODE_KIL 0x12 // Kill fragment
|
||||
#define RSX_FP_OPCODE_PK4 0x13 // Pack four signed 8-bit values
|
||||
#define RSX_FP_OPCODE_UP4 0x14 // Unpack four signed 8-bit values
|
||||
#define RSX_FP_OPCODE_DDX 0x15 // Partial-derivative in x (Screen space derivative w.r.t. x)
|
||||
#define RSX_FP_OPCODE_DDY 0x16 // Partial-derivative in y (Screen space derivative w.r.t. y)
|
||||
#define RSX_FP_OPCODE_TEX 0x17 // Texture lookup
|
||||
#define RSX_FP_OPCODE_TXP 0x18 // Texture sample with projection (Projective texture lookup)
|
||||
#define RSX_FP_OPCODE_TXD 0x19 // Texture sample with partial differentiation (Texture lookup with derivatives)
|
||||
#define RSX_FP_OPCODE_RCP 0x1A // Reciprocal
|
||||
#define RSX_FP_OPCODE_RSQ 0x1B // Reciprocal Square Root
|
||||
#define RSX_FP_OPCODE_EX2 0x1C // Exponentiation base 2
|
||||
#define RSX_FP_OPCODE_LG2 0x1D // Log base 2
|
||||
#define RSX_FP_OPCODE_LIT 0x1E // Lighting coefficients
|
||||
#define RSX_FP_OPCODE_LRP 0x1F // Linear Interpolation
|
||||
#define RSX_FP_OPCODE_STR 0x20 // Set-If-True
|
||||
#define RSX_FP_OPCODE_SFL 0x21 // Set-If-False
|
||||
#define RSX_FP_OPCODE_COS 0x22 // Cosine
|
||||
#define RSX_FP_OPCODE_SIN 0x23 // Sine
|
||||
#define RSX_FP_OPCODE_PK2 0x24 // Pack two 16-bit floats
|
||||
#define RSX_FP_OPCODE_UP2 0x25 // Unpack two 16-bit floats
|
||||
#define RSX_FP_OPCODE_POW 0x26 // Power
|
||||
#define RSX_FP_OPCODE_PKB 0x27 // Pack bytes
|
||||
#define RSX_FP_OPCODE_UPB 0x28 // Unpack bytes
|
||||
#define RSX_FP_OPCODE_PK16 0x29 // Pack 16 bits
|
||||
#define RSX_FP_OPCODE_UP16 0x2A // Unpack 16
|
||||
#define RSX_FP_OPCODE_BEM 0x2B // Bump-environment map (a.k.a. 2D coordinate transform)
|
||||
#define RSX_FP_OPCODE_PKG 0x2C // Pack with sRGB transformation
|
||||
#define RSX_FP_OPCODE_UPG 0x2D // Unpack gamma
|
||||
#define RSX_FP_OPCODE_DP2A 0x2E // 2-component dot product with scalar addition
|
||||
#define RSX_FP_OPCODE_TXL 0x2F // Texture sample with explicit LOD
|
||||
#define RSX_FP_OPCODE_TXB 0x31 // Texture sample with bias
|
||||
#define RSX_FP_OPCODE_TEXBEM 0x33
|
||||
#define RSX_FP_OPCODE_TXPBEM 0x34
|
||||
#define RSX_FP_OPCODE_BEMLUM 0x35
|
||||
#define RSX_FP_OPCODE_REFL 0x36 // Reflection vector
|
||||
#define RSX_FP_OPCODE_TIMESWTEX 0x37
|
||||
#define RSX_FP_OPCODE_DP2 0x38 // 2-component dot product
|
||||
#define RSX_FP_OPCODE_NRM 0x39 // Normalize
|
||||
#define RSX_FP_OPCODE_DIV 0x3A // Division
|
||||
#define RSX_FP_OPCODE_DIVSQ 0x3B // Divide by Square Root
|
||||
#define RSX_FP_OPCODE_LIF 0x3C // Final part of LIT
|
||||
#define RSX_FP_OPCODE_FENCT 0x3D // Fence T?
|
||||
#define RSX_FP_OPCODE_FENCB 0x3E // Fence B?
|
||||
#define RSX_FP_OPCODE_BRK 0x40 // Break
|
||||
#define RSX_FP_OPCODE_CAL 0x41 // Subroutine call
|
||||
#define RSX_FP_OPCODE_IFE 0x42 // If
|
||||
#define RSX_FP_OPCODE_LOOP 0x43 // Loop
|
||||
#define RSX_FP_OPCODE_REP 0x44 // Repeat
|
||||
#define RSX_FP_OPCODE_RET 0x45 // Return
|
||||
|
||||
#define EXEC_LT 1
|
||||
#define EXEC_EQ 2
|
||||
#define EXEC_GT 4
|
||||
|
||||
#define RSX_FP_REGISTER_TYPE_TEMP 0
|
||||
#define RSX_FP_REGISTER_TYPE_INPUT 1
|
||||
#define RSX_FP_REGISTER_TYPE_CONSTANT 2
|
||||
#define RSX_FP_REGISTER_TYPE_UNKNOWN 3
|
||||
|
||||
#define CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT 0xe
|
||||
#define CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS 0x40
|
||||
|
||||
#define GET_BITS(word, offset, count) bitfieldExtract(inst.words[word], offset, count)
|
||||
#define TEST_BIT(word, offset) (GET_BITS(word, offset, 1) > 0)
|
||||
|
||||
#define reg_mov(d, s, m) d = mix(d, s, m)
|
||||
|
||||
bool shader_attribute(const in uint mask)
|
||||
{
|
||||
return (shader_control & mask) != 0;
|
||||
}
|
||||
|
||||
vec4 _distance(const in vec4 a, const in vec4 b)
|
||||
{
|
||||
// Old-school distance vector
|
||||
return vec4(1., a.y * b.y, a.z, b.w);
|
||||
}
|
||||
|
||||
vec4 shuffle(const in vec4 value, const in uint code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case 0xE4:
|
||||
return value;
|
||||
case 0x24:
|
||||
return value.xyzx;
|
||||
case 0xA4:
|
||||
return value.xyzz;
|
||||
case 0x00:
|
||||
return value.xxxx;
|
||||
case 0x55:
|
||||
return value.yyyy;
|
||||
case 0xAA:
|
||||
return value.zzzz;
|
||||
case 0xFF:
|
||||
return value.wwww;
|
||||
case 0x04:
|
||||
return value.xyxx;
|
||||
default:
|
||||
uint x = bitfieldExtract(code, 0, 2);
|
||||
uint y = bitfieldExtract(code, 2, 2);
|
||||
uint z = bitfieldExtract(code, 4, 2);
|
||||
uint w = bitfieldExtract(code, 6, 2);
|
||||
return vec4(value[x], value[y], value[z], value[w]);
|
||||
}
|
||||
}
|
||||
|
||||
struct instruction_t
|
||||
{
|
||||
uvec4 words;
|
||||
uint opcode;
|
||||
bool end;
|
||||
};
|
||||
|
||||
const float modifier_scale[] = {1.f, 2.f, 4.f, 8.f, 1.f, 0.5f, 0.25f, 0.125f};
|
||||
|
||||
vec4 regs16[48];
|
||||
vec4 regs32[48];
|
||||
vec4 cc[2];
|
||||
int inst_length = 1;
|
||||
int ip = -1;
|
||||
instruction_t inst;
|
||||
|
||||
vec4 read_src(const in int index)
|
||||
{
|
||||
const uint type = GET_BITS(index + 1, 0, 2);
|
||||
vec4 value;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case RSX_FP_REGISTER_TYPE_TEMP:
|
||||
{
|
||||
const uint i = GET_BITS(index + 1, 2, 6);
|
||||
if (TEST_BIT(index + 1, 8))
|
||||
{
|
||||
value = regs16[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
value = regs32[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RSX_FP_REGISTER_TYPE_INPUT:
|
||||
{
|
||||
const uint i = GET_BITS(0, 13, 4);
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
// TODO: wpos
|
||||
value = vec4(0.); break;
|
||||
case 1:
|
||||
value = gl_FrontFacing? in_regs[0] : in_regs[2]; break;
|
||||
case 2:
|
||||
value = gl_FrontFacing? in_regs[1] : in_regs[3]; break;
|
||||
case 3:
|
||||
value = fetch_fog_value(fog_mode, in_regs[4]); break;
|
||||
case 14:
|
||||
value = gl_FrontFacing? vec4(1.) : vec4(-1.); break;
|
||||
default:
|
||||
value = in_regs[i + 1]; break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RSX_FP_REGISTER_TYPE_CONSTANT:
|
||||
{
|
||||
inst_length = 2;
|
||||
uvec4 result =
|
||||
((fp_instructions[ip + 1] << 8) & uvec4(0xFF00FF00)) |
|
||||
((fp_instructions[ip + 1] >> 8) & uvec4(0x00FF00FF));
|
||||
value = uintBitsToFloat(result);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
value = shuffle(value, GET_BITS(index + 1, 9, 8));
|
||||
|
||||
// abs
|
||||
if (index == 0)
|
||||
{
|
||||
value = (TEST_BIT(1, 29))? abs(value) : value;
|
||||
}
|
||||
else
|
||||
{
|
||||
value = (TEST_BIT(index + 1, 18))? abs(value) : value;
|
||||
}
|
||||
|
||||
// neg
|
||||
return (TEST_BIT(index + 1, 17))? -value : value;
|
||||
}
|
||||
|
||||
vec4 read_cond()
|
||||
{
|
||||
return shuffle(cc[GET_BITS(1, 31, 1)], GET_BITS(1, 21, 8));
|
||||
}
|
||||
|
||||
vec4 _texture(in vec4 coord, float bias)
|
||||
{
|
||||
const uint tex_num = GET_BITS(0, 17, 4);
|
||||
if (!IS_TEXTURE_RESIDENT(tex_num))
|
||||
{
|
||||
return vec4(0., 0., 0., 1.);
|
||||
}
|
||||
|
||||
const uint type = bitfieldExtract(texture_control, int(tex_num + tex_num), 2);
|
||||
coord.xy *= texture_parameters[tex_num].scale;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 0:
|
||||
return texture(SAMPLER1D(tex_num), coord.x, bias);
|
||||
case 1:
|
||||
return texture(SAMPLER2D(tex_num), coord.xy, bias);
|
||||
case 2:
|
||||
return texture(SAMPLER3D(tex_num), coord.xyz, bias);
|
||||
case 3:
|
||||
return texture(SAMPLERCUBE(tex_num), coord.xyz, bias);
|
||||
}
|
||||
|
||||
return vec4(0.);
|
||||
}
|
||||
|
||||
vec4 _textureLod(in vec4 coord, float lod)
|
||||
{
|
||||
const uint tex_num = GET_BITS(0, 17, 4);
|
||||
if (!IS_TEXTURE_RESIDENT(tex_num))
|
||||
{
|
||||
return vec4(0., 0., 0., 1.);
|
||||
}
|
||||
|
||||
const uint type = bitfieldExtract(texture_control, int(tex_num + tex_num), 2);
|
||||
coord.xy *= texture_parameters[tex_num].scale;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 0:
|
||||
return textureLod(SAMPLER1D(tex_num), coord.x, lod);
|
||||
case 1:
|
||||
return textureLod(SAMPLER2D(tex_num), coord.xy, lod);
|
||||
case 2:
|
||||
return textureLod(SAMPLER3D(tex_num), coord.xyz, lod);
|
||||
case 3:
|
||||
return textureLod(SAMPLERCUBE(tex_num), coord.xyz, lod);
|
||||
}
|
||||
|
||||
return vec4(0.);
|
||||
}
|
||||
|
||||
void write_dst(in vec4 value)
|
||||
{
|
||||
bvec4 inst_mask = bvec4(
|
||||
TEST_BIT(0, 9),
|
||||
TEST_BIT(0, 10),
|
||||
TEST_BIT(0, 11),
|
||||
TEST_BIT(0, 12));
|
||||
|
||||
if (TEST_BIT(0, 8)) // SET COND
|
||||
{
|
||||
uint index = GET_BITS(1, 30, 1);
|
||||
reg_mov(cc[index], value, inst_mask);
|
||||
}
|
||||
|
||||
if (TEST_BIT(0, 30)) // NO DEST
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (TEST_BIT(0, 31)) // SAT
|
||||
{
|
||||
value = clamp(value, 0, 1);
|
||||
}
|
||||
|
||||
const uint exec_mask = GET_BITS(1, 18, 3);
|
||||
if (exec_mask != 0x7)
|
||||
{
|
||||
bvec4 write_mask;
|
||||
const vec4 cond = read_cond();
|
||||
|
||||
switch (exec_mask)
|
||||
{
|
||||
case 0:
|
||||
return;
|
||||
case EXEC_GT | EXEC_EQ:
|
||||
write_mask = greaterThanEqual(cond, vec4(0.)); break;
|
||||
case EXEC_LT | EXEC_EQ:
|
||||
write_mask = lessThanEqual(cond, vec4(0.)); break;
|
||||
case EXEC_LT | EXEC_GT:
|
||||
write_mask = notEqual(cond, vec4(0.)); break;
|
||||
case EXEC_GT:
|
||||
write_mask = greaterThan(cond, vec4(0.)); break;
|
||||
case EXEC_LT:
|
||||
write_mask = lessThan(cond, vec4(0.)); break;
|
||||
case EXEC_EQ:
|
||||
write_mask = equal(cond, vec4(0.)); break;
|
||||
}
|
||||
|
||||
inst_mask = bvec4(uvec4(inst_mask) & uvec4(write_mask));
|
||||
}
|
||||
|
||||
const uint scale = GET_BITS(2, 28, 3);
|
||||
value *= modifier_scale[scale];
|
||||
|
||||
const uint index = GET_BITS(0, 1, 6);
|
||||
if (TEST_BIT(0, 7))
|
||||
{
|
||||
reg_mov(regs16[index], value, inst_mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
reg_mov(regs32[index], value, inst_mask);
|
||||
}
|
||||
}
|
||||
|
||||
void initialize()
|
||||
{
|
||||
// Initialize registers
|
||||
// NOTE: Register count is the number of 'full' registers that will be consumed. Hardware seems to do some renaming.
|
||||
// NOTE: Attempting to zero-initialize all the registers will slow things to a crawl!
|
||||
|
||||
uint register_count = bitfieldExtract(shader_control, 24, 6);
|
||||
uint i = 0, j = 0;
|
||||
while (register_count > 0)
|
||||
{
|
||||
regs32[i++] = vec4(0.);
|
||||
regs16[j++] = vec4(0.);
|
||||
regs16[j++] = vec4(0.);
|
||||
register_count--;
|
||||
}
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
initialize();
|
||||
|
||||
vec4 value, s0, s1, s2;
|
||||
inst.end = false;
|
||||
bool handled;
|
||||
|
||||
while (!inst.end)
|
||||
{
|
||||
ip += inst_length;
|
||||
inst_length = 1;
|
||||
|
||||
// Decode instruction
|
||||
// endian swap + word swap
|
||||
inst.words =
|
||||
((fp_instructions[ip] << 8) & uvec4(0xFF00FF00)) |
|
||||
((fp_instructions[ip] >> 8) & uvec4(0x00FF00FF));
|
||||
|
||||
inst.opcode = GET_BITS(0, 24, 6);
|
||||
inst.end = TEST_BIT(0, 0);
|
||||
|
||||
// Class 1, no input/output
|
||||
switch (inst.opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_NOP:
|
||||
case RSX_FP_OPCODE_FENCT:
|
||||
case RSX_FP_OPCODE_FENCB:
|
||||
continue;
|
||||
case RSX_FP_OPCODE_KIL:
|
||||
discard; return;
|
||||
}
|
||||
|
||||
// Class 2, 1 input
|
||||
s0 = read_src(0);
|
||||
handled = true;
|
||||
switch (inst.opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_MOV:
|
||||
value = s0; break;
|
||||
case RSX_FP_OPCODE_FRC:
|
||||
value = fract(s0); break;
|
||||
case RSX_FP_OPCODE_FLR:
|
||||
value = floor(s0); break;
|
||||
case RSX_FP_OPCODE_DDX:
|
||||
value = dFdx(s0); break;
|
||||
case RSX_FP_OPCODE_DDY:
|
||||
value = dFdy(s0); break;
|
||||
case RSX_FP_OPCODE_RCP:
|
||||
value = (1.f / s0.xxxx); break;
|
||||
case RSX_FP_OPCODE_RSQ:
|
||||
value = inversesqrt(s0.xxxx); break;
|
||||
case RSX_FP_OPCODE_EX2:
|
||||
value = exp2(s0.xxxx); break;
|
||||
case RSX_FP_OPCODE_LG2:
|
||||
value = log2(s0.xxxx); break;
|
||||
case RSX_FP_OPCODE_STR:
|
||||
value = vec4(1.); break;
|
||||
case RSX_FP_OPCODE_SFL:
|
||||
value = vec4(0.); break;
|
||||
case RSX_FP_OPCODE_COS:
|
||||
value = cos(s0.xxxx); break;
|
||||
case RSX_FP_OPCODE_SIN:
|
||||
value = sin(s0.xxxx); break;
|
||||
case RSX_FP_OPCODE_NRM:
|
||||
value.xyz = normalize(s0.xyz); break;
|
||||
case RSX_FP_OPCODE_TEX:
|
||||
value = _texture(s0, 0.f); break;
|
||||
default:
|
||||
handled = false;
|
||||
}
|
||||
|
||||
if (!handled)
|
||||
{
|
||||
// Class 3, 2 inputs
|
||||
s1 = read_src(1);
|
||||
handled = true;
|
||||
switch (inst.opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_MUL:
|
||||
value = s0 * s1; break;
|
||||
case RSX_FP_OPCODE_ADD:
|
||||
value = s0 + s1; break;
|
||||
case RSX_FP_OPCODE_DP2:
|
||||
value = dot(s0.xy, s1.xy).xxxx; break;
|
||||
case RSX_FP_OPCODE_DP3:
|
||||
value = dot(s0.xyz, s1.xyz).xxxx; break;
|
||||
case RSX_FP_OPCODE_DP4:
|
||||
value = dot(s0, s1).xxxx; break;
|
||||
case RSX_FP_OPCODE_DST:
|
||||
value = _distance(s0, s1); break;
|
||||
case RSX_FP_OPCODE_MIN:
|
||||
value = min(s0, s1); break;
|
||||
case RSX_FP_OPCODE_MAX:
|
||||
value = max(s0, s1); break;
|
||||
case RSX_FP_OPCODE_SLT:
|
||||
value = vec4(lessThan(s0, s1)); break;
|
||||
case RSX_FP_OPCODE_SGE:
|
||||
value = vec4(greaterThanEqual(s0, s1)); break;
|
||||
case RSX_FP_OPCODE_SLE:
|
||||
value = vec4(lessThanEqual(s0, s1)); break;
|
||||
case RSX_FP_OPCODE_SGT:
|
||||
value = vec4(greaterThan(s0, s1)); break;
|
||||
case RSX_FP_OPCODE_SNE:
|
||||
value = vec4(notEqual(s0, s1)); break;
|
||||
case RSX_FP_OPCODE_SEQ:
|
||||
value = vec4(equal(s0, s1)); break;
|
||||
case RSX_FP_OPCODE_POW:
|
||||
value = pow(s0, s1).xxxx; break;
|
||||
case RSX_FP_OPCODE_DIV:
|
||||
value = s0 / s1.xxxx;
|
||||
case RSX_FP_OPCODE_DIVSQ:
|
||||
value = s0 * inversesqrt(s1.xxxx); break;
|
||||
//case RSX_FP_OPCODE_TXP:
|
||||
//case RSX_FP_OPCODE_TXD:
|
||||
case RSX_FP_OPCODE_TXL:
|
||||
value = _textureLod(s0, s1.x); break;
|
||||
case RSX_FP_OPCODE_TXB:
|
||||
value = _texture(s0, s1.x); break;
|
||||
//case RSX_FP_OPCODE_TEXBEM:
|
||||
//case RSX_FP_OPCODE_TXPBEM:
|
||||
default:
|
||||
handled = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!handled)
|
||||
{
|
||||
// Class 4, 3 inputs
|
||||
s2 = read_src(2);
|
||||
switch (inst.opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_MAD:
|
||||
value = fma(s0, s1, s2); break;
|
||||
case RSX_FP_OPCODE_LRP:
|
||||
value = mix(s1, s2, s0); break;
|
||||
case RSX_FP_OPCODE_DP2A:
|
||||
value = dot(s0.xy, s1.xy).xxxx + s2.xxxx; break;
|
||||
}
|
||||
}
|
||||
|
||||
// Flow control
|
||||
/* case RSX_FP_OPCODE_BRK:
|
||||
case RSX_FP_OPCODE_CAL:
|
||||
case RSX_FP_OPCODE_IFE:
|
||||
case RSX_FP_OPCODE_LOOP:
|
||||
case RSX_FP_OPCODE_REP:
|
||||
case RSX_FP_OPCODE_RET:
|
||||
|
||||
// Other
|
||||
case RSX_FP_OPCODE_PK4:
|
||||
case RSX_FP_OPCODE_UP4:
|
||||
case RSX_FP_OPCODE_LIT:
|
||||
case RSX_FP_OPCODE_LIF:
|
||||
case RSX_FP_OPCODE_PK2:
|
||||
case RSX_FP_OPCODE_FENCT:
|
||||
case RSX_FP_OPCODE_FENCB:
|
||||
case RSX_FP_OPCODE_UP2:
|
||||
case RSX_FP_OPCODE_PKB:
|
||||
case RSX_FP_OPCODE_UPB:
|
||||
case RSX_FP_OPCODE_PK16:
|
||||
case RSX_FP_OPCODE_UP16:
|
||||
case RSX_FP_OPCODE_BEM:
|
||||
case RSX_FP_OPCODE_PKG:
|
||||
case RSX_FP_OPCODE_UPG:
|
||||
case RSX_FP_OPCODE_BEMLUM:
|
||||
case RSX_FP_OPCODE_REFL:
|
||||
case RSX_FP_OPCODE_TIMESWTEX:*/
|
||||
|
||||
write_dst(value);
|
||||
}
|
||||
|
||||
if (!shader_attribute(CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS))
|
||||
{
|
||||
ocol0 = regs16[0];
|
||||
ocol1 = regs16[4];
|
||||
ocol1 = regs16[6];
|
||||
ocol1 = regs16[8];
|
||||
}
|
||||
else
|
||||
{
|
||||
ocol0 = regs32[0];
|
||||
ocol1 = regs32[2];
|
||||
ocol1 = regs32[3];
|
||||
ocol1 = regs32[4];
|
||||
}
|
||||
|
||||
if (shader_attribute(CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT))
|
||||
{
|
||||
gl_FragDepth = regs32[1].z;
|
||||
}
|
||||
else
|
||||
{
|
||||
gl_FragDepth = gl_FragCoord.z;
|
||||
}
|
||||
}
|
||||
|
||||
)"
|
586
rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl
Normal file
586
rpcs3/Emu/RSX/Common/Interpreter/VertexInterpreter.glsl
Normal file
|
@ -0,0 +1,586 @@
|
|||
R"(
|
||||
|
||||
#define RSX_SCA_OPCODE_NOP 0x00 // No-Operation
|
||||
#define RSX_SCA_OPCODE_MOV 0x01 // Move (copy)
|
||||
#define RSX_SCA_OPCODE_RCP 0x02 // Reciprocal
|
||||
#define RSX_SCA_OPCODE_RCC 0x03 // Reciprocal clamped
|
||||
#define RSX_SCA_OPCODE_RSQ 0x04 // Reciprocal square root
|
||||
#define RSX_SCA_OPCODE_EXP 0x05 // Exponential base 2 (low-precision)
|
||||
#define RSX_SCA_OPCODE_LOG 0x06 // Logarithm base 2 (low-precision)
|
||||
#define RSX_SCA_OPCODE_LIT 0x07 // Lighting calculation
|
||||
#define RSX_SCA_OPCODE_BRA 0x08 // Branch
|
||||
#define RSX_SCA_OPCODE_BRI 0x09 // Branch by CC register
|
||||
#define RSX_SCA_OPCODE_CAL 0x0a // Subroutine call
|
||||
#define RSX_SCA_OPCODE_CLI 0x0b // Subroutine call by CC register
|
||||
#define RSX_SCA_OPCODE_RET 0x0c // Return from subroutine
|
||||
#define RSX_SCA_OPCODE_LG2 0x0d // Logarithm base 2
|
||||
#define RSX_SCA_OPCODE_EX2 0x0e // Exponential base 2
|
||||
#define RSX_SCA_OPCODE_SIN 0x0f // Sine function
|
||||
#define RSX_SCA_OPCODE_COS 0x10 // Cosine function
|
||||
#define RSX_SCA_OPCODE_BRB 0x11 // Branch by Boolean constant
|
||||
#define RSX_SCA_OPCODE_CLB 0x12 // Subroutine call by Boolean constant
|
||||
#define RSX_SCA_OPCODE_PSH 0x13 // Push onto stack
|
||||
#define RSX_SCA_OPCODE_POP 0x14 // Pop from stack
|
||||
#define RSX_VEC_OPCODE_NOP 0x00 // No-Operation
|
||||
#define RSX_VEC_OPCODE_MOV 0x01 // Move
|
||||
#define RSX_VEC_OPCODE_MUL 0x02 // Multiply
|
||||
#define RSX_VEC_OPCODE_ADD 0x03 // Addition
|
||||
#define RSX_VEC_OPCODE_MAD 0x04 // Multiply-Add
|
||||
#define RSX_VEC_OPCODE_DP3 0x05 // 3-component Dot Product
|
||||
#define RSX_VEC_OPCODE_DPH 0x06 // Homogeneous Dot Product
|
||||
#define RSX_VEC_OPCODE_DP4 0x07 // 4-component Dot Product
|
||||
#define RSX_VEC_OPCODE_DST 0x08 // Calculate distance vector
|
||||
#define RSX_VEC_OPCODE_MIN 0x09 // Minimum
|
||||
#define RSX_VEC_OPCODE_MAX 0x0a // Maximum
|
||||
#define RSX_VEC_OPCODE_SLT 0x0b // Set-If-LessThan
|
||||
#define RSX_VEC_OPCODE_SGE 0x0c // Set-If-GreaterEqual
|
||||
#define RSX_VEC_OPCODE_ARL 0x0d // Load to address register (round down)
|
||||
#define RSX_VEC_OPCODE_FRC 0x0e // Extract fractional part (fraction)
|
||||
#define RSX_VEC_OPCODE_FLR 0x0f // Round down (floor)
|
||||
#define RSX_VEC_OPCODE_SEQ 0x10 // Set-If-Equal
|
||||
#define RSX_VEC_OPCODE_SFL 0x11 // Set-If-False
|
||||
#define RSX_VEC_OPCODE_SGT 0x12 // Set-If-GreaterThan
|
||||
#define RSX_VEC_OPCODE_SLE 0x13 // Set-If-LessEqual
|
||||
#define RSX_VEC_OPCODE_SNE 0x14 // Set-If-NotEqual
|
||||
#define RSX_VEC_OPCODE_STR 0x15 // Set-If-True
|
||||
#define RSX_VEC_OPCODE_SSG 0x16 // Convert positive values to 1 and negative values to -1
|
||||
#define RSX_VEC_OPCODE_TXL 0x19 // Texture fetch
|
||||
|
||||
#define RSX_VP_REGISTER_TYPE_TEMP 1
|
||||
#define RSX_VP_REGISTER_TYPE_INPUT 2
|
||||
#define RSX_VP_REGISTER_TYPE_CONSTANT 3
|
||||
|
||||
#define EXEC_LT 1
|
||||
#define EXEC_EQ 2
|
||||
#define EXEC_GT 4
|
||||
|
||||
#define GET_BITS bitfieldExtract
|
||||
#define TEST_BIT(word, bit) (GET_BITS(word, bit, 1) != 0)
|
||||
|
||||
#define reg_mov(d, s, m) d = mix(d, s, m)
|
||||
|
||||
struct D0
|
||||
{
|
||||
uint addr_swz;
|
||||
uvec4 swizzle;
|
||||
uint cond;
|
||||
bool cond_test_enable;
|
||||
bool cond_update_enable_0;
|
||||
uint dst_tmp;
|
||||
uint addr_reg_sel_1;
|
||||
uint cond_reg_sel_1;
|
||||
bool saturate;
|
||||
bool index_input;
|
||||
bool cond_update_enable_1;
|
||||
bool vec_result;
|
||||
};
|
||||
|
||||
struct D1
|
||||
{
|
||||
uint input_src;
|
||||
uint const_src;
|
||||
uint vec_opcode;
|
||||
uint sca_opcode;
|
||||
};
|
||||
|
||||
struct D2
|
||||
{
|
||||
uint tex_num;
|
||||
};
|
||||
|
||||
struct D3
|
||||
{
|
||||
bool end;
|
||||
bool index_const;
|
||||
uint dst;
|
||||
uint sca_dst_tmp;
|
||||
bvec4 vec_mask;
|
||||
bvec4 sca_mask;
|
||||
};
|
||||
|
||||
struct SRC
|
||||
{
|
||||
uint reg_type;
|
||||
uint tmp_src;
|
||||
uvec4 swizzle;
|
||||
bool neg;
|
||||
bool abs;
|
||||
};
|
||||
|
||||
D0 unpack_D0(const in uint packed_value)
|
||||
{
|
||||
D0 result;
|
||||
|
||||
result.addr_swz = GET_BITS(packed_value, 0, 2);
|
||||
result.swizzle.w = GET_BITS(packed_value, 2, 2);
|
||||
result.swizzle.z = GET_BITS(packed_value, 4, 2);
|
||||
result.swizzle.y = GET_BITS(packed_value, 6, 2);
|
||||
result.swizzle.x = GET_BITS(packed_value, 8, 2);
|
||||
result.cond = GET_BITS(packed_value, 10, 3);
|
||||
result.cond_test_enable = TEST_BIT(packed_value, 13);
|
||||
result.cond_update_enable_0 = TEST_BIT(packed_value, 14);
|
||||
result.dst_tmp = GET_BITS(packed_value, 15, 6);
|
||||
result.addr_reg_sel_1 = GET_BITS(packed_value, 24, 1);
|
||||
result.cond_reg_sel_1 = GET_BITS(packed_value, 25, 1);
|
||||
result.saturate = TEST_BIT(packed_value, 26);
|
||||
result.index_input = TEST_BIT(packed_value, 27);
|
||||
result.cond_update_enable_1 = TEST_BIT(packed_value, 29);
|
||||
result.vec_result = TEST_BIT(packed_value, 30);
|
||||
return result;
|
||||
}
|
||||
|
||||
D1 unpack_D1(const in uint packed_value)
|
||||
{
|
||||
D1 result;
|
||||
|
||||
result.input_src = GET_BITS(packed_value, 8, 4);
|
||||
result.const_src = GET_BITS(packed_value, 12, 10);
|
||||
result.vec_opcode = GET_BITS(packed_value, 22, 5);
|
||||
result.sca_opcode = GET_BITS(packed_value, 27, 5);
|
||||
return result;
|
||||
}
|
||||
|
||||
D2 unpack_D2(const in uint packed_value)
|
||||
{
|
||||
D2 result;
|
||||
|
||||
result.tex_num = GET_BITS(packed_value, 8, 2);
|
||||
return result;
|
||||
}
|
||||
|
||||
D3 unpack_D3(const in uint packed_value)
|
||||
{
|
||||
D3 result;
|
||||
|
||||
result.end = TEST_BIT(packed_value, 0);
|
||||
result.index_const = TEST_BIT(packed_value, 1);
|
||||
result.dst = GET_BITS(packed_value, 2, 5);
|
||||
result.sca_dst_tmp = GET_BITS(packed_value, 7, 6);
|
||||
result.vec_mask.w = TEST_BIT(packed_value, 13);
|
||||
result.vec_mask.z = TEST_BIT(packed_value, 14);
|
||||
result.vec_mask.y = TEST_BIT(packed_value, 15);
|
||||
result.vec_mask.x = TEST_BIT(packed_value, 16);
|
||||
result.sca_mask.w = TEST_BIT(packed_value, 17);
|
||||
result.sca_mask.z = TEST_BIT(packed_value, 18);
|
||||
result.sca_mask.y = TEST_BIT(packed_value, 19);
|
||||
result.sca_mask.x = TEST_BIT(packed_value, 20);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool attribute_enabled(const in uint mask)
|
||||
{
|
||||
return (output_mask & mask) != 0;
|
||||
}
|
||||
|
||||
vec4 shuffle(const in vec4 value, const in uvec4 swz)
|
||||
{
|
||||
vec4 result;
|
||||
result.x = ref(value, swz.x);
|
||||
result.y = ref(value, swz.y);
|
||||
result.z = ref(value, swz.z);
|
||||
result.w = ref(value, swz.w);
|
||||
return result;
|
||||
}
|
||||
|
||||
vec4 _distance(const in vec4 a, const in vec4 b)
|
||||
{
|
||||
// Old-school distance vector
|
||||
return vec4(1., a.y * b.y, a.z, b.w);
|
||||
}
|
||||
|
||||
// Local registers
|
||||
uvec4 instr;
|
||||
vec4 temp[32];
|
||||
ivec4 a[2] = { ivec4(0), ivec4(0) };
|
||||
vec4 cc[2] = { vec4(0), vec4(0) };
|
||||
vec4 dest[16];
|
||||
|
||||
D0 d0;
|
||||
D1 d1;
|
||||
D2 d2;
|
||||
D3 d3;
|
||||
|
||||
void write_sca(in float value)
|
||||
{
|
||||
if (d0.saturate)
|
||||
{
|
||||
value = clamp(value, 0, 1);
|
||||
}
|
||||
|
||||
if (d3.sca_dst_tmp == 0x3f)
|
||||
{
|
||||
if (d3.dst != 0x1f)
|
||||
{
|
||||
reg_mov(dest[d3.dst], vec4(value), d3.sca_mask);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
reg_mov(temp[d3.sca_dst_tmp], vec4(value), d3.sca_mask);
|
||||
}
|
||||
}
|
||||
|
||||
void write_vec(in vec4 value)
|
||||
{
|
||||
if (d0.saturate)
|
||||
{
|
||||
value = clamp(value, 0, 1);
|
||||
}
|
||||
|
||||
if (d0.dst_tmp == 0x3f && !d0.vec_result)
|
||||
{
|
||||
if (d0.cond_update_enable_1)
|
||||
{
|
||||
reg_mov(cc[d0.cond_reg_sel_1], value, d3.vec_mask);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (d0.vec_result && d3.dst < 16)
|
||||
{
|
||||
reg_mov(dest[d3.dst], value, d3.vec_mask);
|
||||
}
|
||||
|
||||
if (d0.dst_tmp != 0x3f)
|
||||
{
|
||||
reg_mov(temp[d0.dst_tmp], value, d3.vec_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vec4 write_output(const in int oid, const in int mask_bit)
|
||||
{
|
||||
if (attribute_enabled(1 << mask_bit))
|
||||
{
|
||||
return dest[oid];
|
||||
}
|
||||
else
|
||||
{
|
||||
return vec4(0., 0., 0., 1.);
|
||||
}
|
||||
}
|
||||
|
||||
ivec4 read_addr_reg()
|
||||
{
|
||||
return a[d0.addr_reg_sel_1];
|
||||
}
|
||||
|
||||
int branch_addr()
|
||||
{
|
||||
uint addr_h = GET_BITS(instr.z, 0, 6);
|
||||
uint addr_l = GET_BITS(instr.w, 29, 3);
|
||||
return int((addr_h << 3) + addr_l);
|
||||
}
|
||||
|
||||
bool static_branch()
|
||||
{
|
||||
uint mask = (1 << GET_BITS(instr.w, 23, 5));
|
||||
bool cond = TEST_BIT(instr.w, 28);
|
||||
bool actual = (transform_branch_bits & mask) != 0;
|
||||
|
||||
return (cond == actual);
|
||||
}
|
||||
|
||||
bvec4 test_cond(vec4 cond, uint mode)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case EXEC_GT | EXEC_EQ:
|
||||
return greaterThanEqual(cond, vec4(0.));
|
||||
case EXEC_LT | EXEC_EQ:
|
||||
return lessThanEqual(cond, vec4(0.));
|
||||
case EXEC_LT | EXEC_GT:
|
||||
return notEqual(cond, vec4(0.));
|
||||
case EXEC_GT:
|
||||
return greaterThan(cond, vec4(0.));
|
||||
case EXEC_LT:
|
||||
return lessThan(cond, vec4(0.));
|
||||
case EXEC_EQ:
|
||||
return equal(cond, vec4(0.));
|
||||
}
|
||||
|
||||
return bvec4(false);
|
||||
}
|
||||
|
||||
bool dynamic_branch()
|
||||
{
|
||||
if (d0.cond == (EXEC_LT | EXEC_GT | EXEC_EQ)) return true;
|
||||
if (d0.cond == 0) return false;
|
||||
|
||||
vec4 cond = shuffle(cc[d0.cond_reg_sel_1], d0.swizzle);
|
||||
return any(test_cond(cond, d0.cond));
|
||||
}
|
||||
|
||||
vec4 read_src(const in int index)
|
||||
{
|
||||
uint src;
|
||||
vec4 value;
|
||||
bool do_abs = false;
|
||||
|
||||
switch (index)
|
||||
{
|
||||
case 0:
|
||||
src = (GET_BITS(instr.y, 0, 8) << 9) | GET_BITS(instr.z, 23, 9);
|
||||
do_abs = TEST_BIT(instr.x, 21);
|
||||
break;
|
||||
case 1:
|
||||
src = GET_BITS(instr.z, 6, 17);
|
||||
do_abs = TEST_BIT(instr.x, 22);
|
||||
break;
|
||||
case 2:
|
||||
src = (GET_BITS(instr.z, 0, 6) << 11) | GET_BITS(instr.w, 21, 11);
|
||||
do_abs = TEST_BIT(instr.x, 23);
|
||||
break;
|
||||
}
|
||||
|
||||
uint reg_type = GET_BITS(src, 0, 2);
|
||||
uint tmp_src = GET_BITS(src, 2, 6);
|
||||
|
||||
switch (reg_type)
|
||||
{
|
||||
case RSX_VP_REGISTER_TYPE_TEMP:
|
||||
value = temp[tmp_src];
|
||||
break;
|
||||
|
||||
case RSX_VP_REGISTER_TYPE_INPUT:
|
||||
value = read_location(int(d1.input_src));
|
||||
break;
|
||||
|
||||
case RSX_VP_REGISTER_TYPE_CONSTANT:
|
||||
if (d3.index_const)
|
||||
{
|
||||
value = vc[d1.const_src + ref(a[d0.addr_reg_sel_1], d0.addr_swz)];
|
||||
}
|
||||
else
|
||||
{
|
||||
value = vc[d1.const_src];
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (GET_BITS(src, 8, 8) != 0x1B)
|
||||
{
|
||||
uvec4 swz = uvec4(
|
||||
GET_BITS(src, 14, 2),
|
||||
GET_BITS(src, 12, 2),
|
||||
GET_BITS(src, 10, 2),
|
||||
GET_BITS(src, 8, 2)
|
||||
);
|
||||
|
||||
value = shuffle(value, swz);
|
||||
}
|
||||
|
||||
if (do_abs)
|
||||
{
|
||||
value = abs(value);
|
||||
}
|
||||
|
||||
if (TEST_BIT(src, 16))
|
||||
{
|
||||
value = -value;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
// Initialize output registers
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
dest[i] = vec4(0., 0., 0., 1.);
|
||||
}
|
||||
|
||||
int callstack[8];
|
||||
int stack_ptr = 0;
|
||||
int current_instruction = 0;
|
||||
|
||||
d3.end = false;
|
||||
|
||||
while (current_instruction < 512)
|
||||
{
|
||||
if (d3.end)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
instr = vp_instructions[current_instruction];
|
||||
current_instruction++;
|
||||
|
||||
d0 = unpack_D0(instr.x);
|
||||
d1 = unpack_D1(instr.y);
|
||||
d2 = unpack_D2(instr.z);
|
||||
d3 = unpack_D3(instr.w);
|
||||
|
||||
uint vec_opcode = d1.vec_opcode;
|
||||
uint sca_opcode = d1.sca_opcode;
|
||||
|
||||
if (d0.cond_test_enable && d0.cond == 0)
|
||||
{
|
||||
vec_opcode = RSX_VEC_OPCODE_NOP;
|
||||
sca_opcode = RSX_SCA_OPCODE_NOP;
|
||||
}
|
||||
|
||||
if (vec_opcode == RSX_VEC_OPCODE_ARL)
|
||||
{
|
||||
a[d0.dst_tmp] = ivec4(read_src(0));
|
||||
}
|
||||
else if (vec_opcode != RSX_VEC_OPCODE_NOP)
|
||||
{
|
||||
vec4 value = read_src(0);
|
||||
switch (vec_opcode)
|
||||
{
|
||||
case RSX_VEC_OPCODE_MOV: break;
|
||||
case RSX_VEC_OPCODE_MUL: value *= read_src(1); break;
|
||||
case RSX_VEC_OPCODE_ADD: value += read_src(2); break;
|
||||
case RSX_VEC_OPCODE_MAD: value = fma(value, read_src(1), read_src(2)); break;
|
||||
case RSX_VEC_OPCODE_DP3: value = vec4(dot(value.xyz, read_src(1).xyz)); break;
|
||||
case RSX_VEC_OPCODE_DPH: value = vec4(dot(vec4(value.xyz, 1.0), read_src(1))); break;
|
||||
case RSX_VEC_OPCODE_DP4: value = vec4(dot(value, read_src(1))); break;
|
||||
case RSX_VEC_OPCODE_DST: value = _distance(value, read_src(1)); break;
|
||||
case RSX_VEC_OPCODE_MIN: value = min(value, read_src(1)); break;
|
||||
case RSX_VEC_OPCODE_MAX: value = max(value, read_src(1)); break;
|
||||
case RSX_VEC_OPCODE_SLT: value = vec4(lessThan(value, read_src(1))); break;
|
||||
case RSX_VEC_OPCODE_SGE: value = vec4(greaterThanEqual(value, read_src(1))); break;
|
||||
case RSX_VEC_OPCODE_FRC: value = fract(value); break;
|
||||
case RSX_VEC_OPCODE_FLR: value = floor(value); break;
|
||||
case RSX_VEC_OPCODE_SEQ: value = vec4(equal(value, read_src(1))); break;
|
||||
case RSX_VEC_OPCODE_SFL: value = vec4(0); break;
|
||||
case RSX_VEC_OPCODE_SGT: value = vec4(greaterThan(value, read_src(1))); break;
|
||||
case RSX_VEC_OPCODE_SLE: value = vec4(lessThanEqual(value, read_src(1))); break;
|
||||
case RSX_VEC_OPCODE_SNE: value = vec4(notEqual(value, read_src(1))); break;
|
||||
case RSX_VEC_OPCODE_STR: value = vec4(1); break;
|
||||
case RSX_VEC_OPCODE_SSG: value = sign(value); break;
|
||||
}
|
||||
|
||||
write_vec(value);
|
||||
}
|
||||
|
||||
if (sca_opcode != RSX_SCA_OPCODE_NOP)
|
||||
{
|
||||
float value = read_src(2).x;
|
||||
switch (sca_opcode)
|
||||
{
|
||||
case RSX_SCA_OPCODE_MOV: break;
|
||||
case RSX_SCA_OPCODE_RCP: value = 1.0 / value; break;
|
||||
case RSX_SCA_OPCODE_RCC: value = clamp(1.0 / value, 5.42101e-20, 1.884467e19); break;
|
||||
case RSX_SCA_OPCODE_RSQ: value = 1.0 / sqrt(value); break;
|
||||
case RSX_SCA_OPCODE_EXP: value = exp(value); break;
|
||||
case RSX_SCA_OPCODE_LOG: value = log(value); break;
|
||||
//case RSX_SCA_OPCODE_LIT: value = lit_legacy(value); break;
|
||||
case RSX_SCA_OPCODE_LG2: value = log2(value); break;
|
||||
case RSX_SCA_OPCODE_EX2: value = exp2(value); break;
|
||||
case RSX_SCA_OPCODE_SIN: value = sin(value); break;
|
||||
case RSX_SCA_OPCODE_COS: value = cos(value); break;
|
||||
|
||||
case RSX_SCA_OPCODE_BRA:
|
||||
// Jump by address register
|
||||
if (dynamic_branch()) current_instruction = int(read_addr_reg().x);
|
||||
continue;
|
||||
case RSX_SCA_OPCODE_BRI:
|
||||
// Jump immediate
|
||||
if (dynamic_branch()) current_instruction = branch_addr();
|
||||
continue;
|
||||
case RSX_SCA_OPCODE_CAL:
|
||||
// Call immediate
|
||||
if (dynamic_branch())
|
||||
{
|
||||
callstack[stack_ptr] = current_instruction;
|
||||
stack_ptr++;
|
||||
current_instruction = branch_addr();
|
||||
}
|
||||
continue;
|
||||
case RSX_SCA_OPCODE_CLI:
|
||||
// Unknown
|
||||
continue;
|
||||
case RSX_SCA_OPCODE_RET:
|
||||
// Return
|
||||
if (dynamic_branch())
|
||||
{
|
||||
if (stack_ptr == 0) return;
|
||||
current_instruction = callstack[stack_ptr];
|
||||
stack_ptr--;
|
||||
}
|
||||
continue;
|
||||
case RSX_SCA_OPCODE_BRB:
|
||||
// Branch by boolean mask
|
||||
if (static_branch())
|
||||
{
|
||||
current_instruction = branch_addr();
|
||||
}
|
||||
continue;
|
||||
case RSX_SCA_OPCODE_CLB:
|
||||
// Call by boolean mask
|
||||
if (static_branch())
|
||||
{
|
||||
callstack[stack_ptr] = current_instruction;
|
||||
stack_ptr++;
|
||||
current_instruction = branch_addr();
|
||||
}
|
||||
continue;
|
||||
//case RSX_SCA_OPCODE_PSH:
|
||||
//case RSX_SCA_OPCODE_POP:
|
||||
}
|
||||
|
||||
write_sca(value);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: 2-sided lighting
|
||||
if (attribute_enabled(1 << 0 | 1 << 2))
|
||||
{
|
||||
diff_color = dest[1];
|
||||
diff_color1 = dest[1];
|
||||
}
|
||||
|
||||
if (attribute_enabled(1 << 1 | 1 << 3))
|
||||
{
|
||||
spec_color = dest[2];
|
||||
spec_color1 = dest[2];
|
||||
}
|
||||
|
||||
if (attribute_enabled(1 << 4))
|
||||
{
|
||||
fog_c = dest[5].xxxx;
|
||||
}
|
||||
|
||||
if (attribute_enabled(1 << 5))
|
||||
{
|
||||
gl_PointSize = dest[6].x;
|
||||
}
|
||||
else
|
||||
{
|
||||
gl_PointSize = point_size;
|
||||
}
|
||||
|
||||
if (attribute_enabled(1 << 6 | 1 << 7 | 1 << 8))
|
||||
{
|
||||
gl_ClipDistance[0] = (user_clip_enabled[0].x > 0)? dest[5].y * user_clip_factor[0].x : 0.5f;
|
||||
gl_ClipDistance[1] = (user_clip_enabled[0].y > 0)? dest[5].z * user_clip_factor[0].y : 0.5f;
|
||||
gl_ClipDistance[2] = (user_clip_enabled[0].z > 0)? dest[5].w * user_clip_factor[0].z : 0.5f;
|
||||
}
|
||||
|
||||
if (attribute_enabled(1 << 9 | 1 << 10 | 1 << 11))
|
||||
{
|
||||
gl_ClipDistance[3] = (user_clip_enabled[0].w > 0)? dest[6].y * user_clip_factor[0].w : 0.5f;
|
||||
gl_ClipDistance[4] = (user_clip_enabled[1].x > 0)? dest[6].z * user_clip_factor[1].x : 0.5f;
|
||||
gl_ClipDistance[5] = (user_clip_enabled[1].y > 0)? dest[6].w * user_clip_factor[1].y : 0.5f;
|
||||
}
|
||||
|
||||
tc8 = write_output(15, 12);
|
||||
tc9 = write_output(6, 13);
|
||||
tc0 = write_output(7, 14);
|
||||
tc1 = write_output(8, 15);
|
||||
tc2 = write_output(9, 16);
|
||||
tc3 = write_output(10, 17);
|
||||
tc4 = write_output(11, 18);
|
||||
tc5 = write_output(12, 19);
|
||||
tc6 = write_output(13, 20);
|
||||
tc7 = write_output(14, 21);
|
||||
|
||||
vec4 pos = dest[0] * scale_offset_mat;
|
||||
pos.z = (pos.z + pos.z) - pos.w;
|
||||
gl_Position = pos;
|
||||
}
|
||||
|
||||
)"
|
24
rpcs3/Emu/RSX/Common/ShaderInterpreter.h
Normal file
24
rpcs3/Emu/RSX/Common/ShaderInterpreter.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#pragma once
|
||||
#include "Utilities/StrFmt.h"
|
||||
|
||||
namespace program_common
|
||||
{
|
||||
namespace interpreter
|
||||
{
|
||||
std::string get_vertex_interpreter()
|
||||
{
|
||||
const char* s =
|
||||
#include "Interpreter/VertexInterpreter.glsl"
|
||||
;
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string get_fragment_interpreter()
|
||||
{
|
||||
const char* s =
|
||||
#include "Interpreter/FragmentInterpreter.glsl"
|
||||
;
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -8,8 +8,15 @@ namespace glsl
|
|||
struct shader_properties;
|
||||
}
|
||||
|
||||
namespace gl
|
||||
{
|
||||
class shader_interpreter;
|
||||
}
|
||||
|
||||
struct GLFragmentDecompilerThread : public FragmentProgramDecompiler
|
||||
{
|
||||
friend class gl::shader_interpreter;
|
||||
|
||||
std::string& m_shader;
|
||||
ParamArray& m_parrDummy;
|
||||
glsl::shader_properties m_shader_props{};
|
||||
|
|
|
@ -196,6 +196,8 @@ void GLGSRender::on_init_thread()
|
|||
m_texture_parameters_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
||||
m_vertex_layout_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
||||
m_index_ring_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
||||
m_vertex_instructions_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
||||
m_fragment_instructions_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -207,6 +209,8 @@ void GLGSRender::on_init_thread()
|
|||
m_texture_parameters_buffer = std::make_unique<gl::ring_buffer>();
|
||||
m_vertex_layout_buffer = std::make_unique<gl::ring_buffer>();
|
||||
m_index_ring_buffer = std::make_unique<gl::ring_buffer>();
|
||||
m_vertex_instructions_buffer = std::make_unique<gl::ring_buffer>();
|
||||
m_fragment_instructions_buffer = std::make_unique<gl::ring_buffer>();
|
||||
}
|
||||
|
||||
m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000);
|
||||
|
@ -218,6 +222,14 @@ void GLGSRender::on_init_thread()
|
|||
m_texture_parameters_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
m_vertex_layout_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
|
||||
if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled)
|
||||
{
|
||||
m_vertex_instructions_buffer->create(gl::buffer::target::ssbo, 16 * 0x100000);
|
||||
m_fragment_instructions_buffer->create(gl::buffer::target::ssbo, 16 * 0x100000);
|
||||
|
||||
m_shader_interpreter.create();
|
||||
}
|
||||
|
||||
if (gl_caps.vendor_AMD)
|
||||
{
|
||||
m_identity_index_buffer = std::make_unique<gl::buffer>();
|
||||
|
@ -427,6 +439,16 @@ void GLGSRender::on_exit()
|
|||
m_identity_index_buffer->remove();
|
||||
}
|
||||
|
||||
if (m_vertex_instructions_buffer)
|
||||
{
|
||||
m_vertex_instructions_buffer->remove();
|
||||
}
|
||||
|
||||
if (m_fragment_instructions_buffer)
|
||||
{
|
||||
m_fragment_instructions_buffer->remove();
|
||||
}
|
||||
|
||||
m_null_textures.clear();
|
||||
m_text_printer.close();
|
||||
m_gl_texture_cache.destroy();
|
||||
|
@ -434,6 +456,8 @@ void GLGSRender::on_exit()
|
|||
m_ui_renderer.destroy();
|
||||
m_video_output_pass.destroy();
|
||||
|
||||
m_shader_interpreter.destroy();
|
||||
|
||||
for (u32 i = 0; i < occlusion_query_count; ++i)
|
||||
{
|
||||
auto &query = m_occlusion_query_data[i];
|
||||
|
@ -586,7 +610,8 @@ void GLGSRender::clear_surface(u32 arg)
|
|||
|
||||
bool GLGSRender::load_program()
|
||||
{
|
||||
if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)
|
||||
const auto interpreter_mode = g_cfg.video.shader_interpreter_mode.get();
|
||||
if (m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits))
|
||||
{
|
||||
get_current_fragment_program(fs_sampler_state);
|
||||
verify(HERE), current_fragment_program.valid;
|
||||
|
@ -596,40 +621,55 @@ bool GLGSRender::load_program()
|
|||
current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
|
||||
current_fragment_program.unnormalized_coords = 0; //unused
|
||||
}
|
||||
else if (m_program)
|
||||
else if (m_program &&
|
||||
(m_program != m_shader_interpreter.get() || interpreter_mode == shader_interpreter_mode::forced))
|
||||
{
|
||||
// Program already loaded
|
||||
return true;
|
||||
}
|
||||
|
||||
void* pipeline_properties = nullptr;
|
||||
m_program = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties,
|
||||
auto old_program = m_program;
|
||||
if (interpreter_mode != shader_interpreter_mode::forced) [[likely]]
|
||||
{
|
||||
void* pipeline_properties = nullptr;
|
||||
m_program = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties,
|
||||
!g_cfg.video.disable_asynchronous_shader_compiler, true).get();
|
||||
|
||||
if (m_prog_buffer.check_cache_missed())
|
||||
{
|
||||
// Notify the user with HUD notification
|
||||
if (g_cfg.misc.show_shader_compilation_hint)
|
||||
if (m_prog_buffer.check_cache_missed())
|
||||
{
|
||||
if (m_overlay_manager)
|
||||
// Notify the user with HUD notification
|
||||
if (g_cfg.misc.show_shader_compilation_hint)
|
||||
{
|
||||
if (auto dlg = m_overlay_manager->get<rsx::overlays::shader_compile_notification>())
|
||||
if (m_overlay_manager)
|
||||
{
|
||||
// Extend duration
|
||||
dlg->touch();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Create dialog but do not show immediately
|
||||
m_overlay_manager->create<rsx::overlays::shader_compile_notification>();
|
||||
if (auto dlg = m_overlay_manager->get<rsx::overlays::shader_compile_notification>())
|
||||
{
|
||||
// Extend duration
|
||||
dlg->touch();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Create dialog but do not show immediately
|
||||
m_overlay_manager->create<rsx::overlays::shader_compile_notification>();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
verify(HERE), m_program;
|
||||
m_program->sync();
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
if (!m_program && interpreter_mode != shader_interpreter_mode::disabled)
|
||||
{
|
||||
verify(HERE), m_program;
|
||||
m_program->sync();
|
||||
// Fall back to interpreter
|
||||
m_program = m_shader_interpreter.get();
|
||||
if (old_program != m_program)
|
||||
{
|
||||
// Program has changed, reupload
|
||||
m_interpreter_state = rsx::invalidate_pipeline_bits;
|
||||
}
|
||||
}
|
||||
|
||||
return m_program != nullptr;
|
||||
|
@ -649,6 +689,7 @@ void GLGSRender::load_program_env()
|
|||
const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty);
|
||||
const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty);
|
||||
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
|
||||
const bool update_instruction_buffers = (!!m_interpreter_state && m_program == m_shader_interpreter.get());
|
||||
|
||||
m_program->use();
|
||||
|
||||
|
@ -659,6 +700,12 @@ void GLGSRender::load_program_env()
|
|||
if (update_fragment_texture_env) m_texture_parameters_buffer->reserve_storage_on_heap(256);
|
||||
if (update_fragment_constants) m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_constants_size, 256));
|
||||
if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192);
|
||||
|
||||
if (update_instruction_buffers)
|
||||
{
|
||||
m_vertex_instructions_buffer->reserve_storage_on_heap(513 * 16);
|
||||
m_fragment_instructions_buffer->reserve_storage_on_heap(current_fp_metadata.program_ucode_length);
|
||||
}
|
||||
}
|
||||
|
||||
if (update_vertex_env)
|
||||
|
@ -686,7 +733,7 @@ void GLGSRender::load_program_env()
|
|||
m_transform_constants_buffer->bind_range(GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT, mapping.second, 8192);
|
||||
}
|
||||
|
||||
if (update_fragment_constants)
|
||||
if (update_fragment_constants && !update_instruction_buffers)
|
||||
{
|
||||
// Fragment constants
|
||||
auto mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align);
|
||||
|
@ -718,6 +765,49 @@ void GLGSRender::load_program_env()
|
|||
m_texture_parameters_buffer->bind_range(GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, mapping.second, 256);
|
||||
}
|
||||
|
||||
if (update_instruction_buffers)
|
||||
{
|
||||
if (m_interpreter_state & rsx::vertex_program_dirty)
|
||||
{
|
||||
// Attach vertex buffer data
|
||||
const auto vp_block_length = current_vp_metadata.ucode_length + 16;
|
||||
auto vp_mapping = m_vertex_instructions_buffer->alloc_from_heap(vp_block_length, 16);
|
||||
auto vp_buf = static_cast<u8*>(vp_mapping.first);
|
||||
|
||||
auto vp_config = reinterpret_cast<u32*>(vp_buf);
|
||||
vp_config[0] = current_vertex_program.base_address;
|
||||
vp_config[1] = current_vertex_program.entry;
|
||||
vp_config[2] = current_vertex_program.output_mask;
|
||||
|
||||
std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length);
|
||||
|
||||
m_vertex_instructions_buffer->bind_range(GL_INTERPRETER_VERTEX_BLOCK, vp_mapping.second, vp_block_length);
|
||||
m_vertex_instructions_buffer->notify();
|
||||
}
|
||||
|
||||
if (m_interpreter_state & rsx::fragment_program_dirty)
|
||||
{
|
||||
// Attach fragment buffer data
|
||||
const auto fp_block_length = current_fp_metadata.program_ucode_length + 80;
|
||||
auto fp_mapping = m_fragment_instructions_buffer->alloc_from_heap(fp_block_length, 16);
|
||||
auto fp_buf = static_cast<u8*>(fp_mapping.first);
|
||||
|
||||
// Control mask
|
||||
const auto control_masks = reinterpret_cast<u32*>(fp_buf);
|
||||
control_masks[0] = rsx::method_registers.shader_control();
|
||||
control_masks[1] = current_fragment_program.texture_dimensions;
|
||||
|
||||
// Bind textures
|
||||
m_shader_interpreter.update_fragment_textures(fs_sampler_state, current_fp_metadata.referenced_textures_mask, reinterpret_cast<u32*>(fp_buf + 16));
|
||||
|
||||
const auto fp_data = static_cast<u8*>(current_fragment_program.addr) + current_fp_metadata.program_start_offset;
|
||||
std::memcpy(fp_buf + 80, fp_data, current_fp_metadata.program_ucode_length);
|
||||
|
||||
m_fragment_instructions_buffer->bind_range(GL_INTERPRETER_FRAGMENT_BLOCK, fp_mapping.second, fp_block_length);
|
||||
m_fragment_instructions_buffer->notify();
|
||||
}
|
||||
}
|
||||
|
||||
if (manually_flush_ring_buffers)
|
||||
{
|
||||
if (update_fragment_env) m_fragment_env_buffer->unmap();
|
||||
|
@ -725,6 +815,12 @@ void GLGSRender::load_program_env()
|
|||
if (update_fragment_texture_env) m_texture_parameters_buffer->unmap();
|
||||
if (update_fragment_constants) m_fragment_constants_buffer->unmap();
|
||||
if (update_transform_constants) m_transform_constants_buffer->unmap();
|
||||
|
||||
if (update_instruction_buffers)
|
||||
{
|
||||
m_vertex_instructions_buffer->unmap();
|
||||
m_fragment_instructions_buffer->unmap();
|
||||
}
|
||||
}
|
||||
|
||||
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty);
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "GLProgramBuffer.h"
|
||||
#include "GLTextOut.h"
|
||||
#include "GLOverlays.h"
|
||||
#include "GLShaderInterpreter.h"
|
||||
|
||||
#include <optional>
|
||||
|
||||
|
@ -74,8 +75,10 @@ private:
|
|||
gl::sampler_state m_fs_sampler_mirror_states[rsx::limits::fragment_textures_count]; // Alternate views of fragment textures with different format (e.g Depth vs Stencil for D24S8)
|
||||
gl::sampler_state m_vs_sampler_states[rsx::limits::vertex_textures_count]; // Vertex textures
|
||||
|
||||
gl::glsl::program *m_program;
|
||||
gl::glsl::program m_shader_interpreter;
|
||||
gl::glsl::program *m_program = nullptr;
|
||||
|
||||
u32 m_interpreter_state = 0;
|
||||
gl::shader_interpreter m_shader_interpreter;
|
||||
|
||||
gl_render_targets m_rtts;
|
||||
|
||||
|
@ -94,6 +97,8 @@ private:
|
|||
std::unique_ptr<gl::ring_buffer> m_texture_parameters_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_vertex_layout_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_index_ring_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_vertex_instructions_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_fragment_instructions_buffer;
|
||||
|
||||
// Identity buffer used to fix broken gl_VertexID on ATI stack
|
||||
std::unique_ptr<gl::buffer> m_identity_index_buffer;
|
||||
|
|
|
@ -27,7 +27,9 @@
|
|||
#define GL_FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 3
|
||||
#define GL_FRAGMENT_STATE_BIND_SLOT 4
|
||||
#define GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT 5
|
||||
#define GL_COMPUTE_BUFFER_SLOT(index) (index + 6)
|
||||
#define GL_INTERPRETER_VERTEX_BLOCK 6
|
||||
#define GL_INTERPRETER_FRAGMENT_BLOCK 7
|
||||
#define GL_COMPUTE_BUFFER_SLOT(index) (index + 8)
|
||||
|
||||
inline static void _SelectTexture(int unit) { glActiveTexture(GL_TEXTURE0 + unit); }
|
||||
|
||||
|
@ -2576,6 +2578,7 @@ public:
|
|||
void operator = (const color4f& rhs) const { glProgramUniform4f(m_program.id(), location(), rhs.r, rhs.g, rhs.b, rhs.a); }
|
||||
void operator = (const areaf& rhs) const { glProgramUniform4f(m_program.id(), location(), rhs.x1, rhs.y1, rhs.x2, rhs.y2); }
|
||||
void operator = (const areai& rhs) const { glProgramUniform4i(m_program.id(), location(), rhs.x1, rhs.y1, rhs.x2, rhs.y2); }
|
||||
void operator = (const std::vector<int>& rhs) const { glProgramUniform1iv(m_program.id(), location(), ::size32(rhs), rhs.data()); }
|
||||
};
|
||||
|
||||
class uniforms_t
|
||||
|
|
|
@ -1,7 +1,320 @@
|
|||
#include "stdafx.h"
|
||||
#include "stdafx.h"
|
||||
#include "GLShaderInterpreter.h"
|
||||
#include "GLGSRender.h"
|
||||
#include "GLVertexProgram.h"
|
||||
#include "GLFragmentProgram.h"
|
||||
#include "../Common/ShaderInterpreter.h"
|
||||
#include "../Common/GLSLCommon.h"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
using glsl::shader;
|
||||
|
||||
namespace interpreter
|
||||
{
|
||||
void texture_pool_allocator::create(shader::type domain)
|
||||
{
|
||||
GLenum pname;
|
||||
switch (domain)
|
||||
{
|
||||
default:
|
||||
rsx_log.fatal("Unexpected program domain %d", static_cast<int>(domain));
|
||||
case shader::type::vertex:
|
||||
pname = GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS; break;
|
||||
case shader::type::fragment:
|
||||
pname = GL_MAX_TEXTURE_IMAGE_UNITS; break;
|
||||
}
|
||||
|
||||
glGetIntegerv(pname, &max_image_units);
|
||||
}
|
||||
|
||||
void texture_pool_allocator::allocate(int size)
|
||||
{
|
||||
if ((used + size) > max_image_units)
|
||||
{
|
||||
rsx_log.fatal("Out of image binding slots!");
|
||||
}
|
||||
|
||||
used += size;
|
||||
texture_pool pool;
|
||||
pool.pool_size = size;
|
||||
pools.push_back(pool);
|
||||
}
|
||||
}
|
||||
|
||||
void shader_interpreter::create()
|
||||
{
|
||||
texture_pools[0].create(shader::type::vertex);
|
||||
texture_pools[1].create(shader::type::fragment);
|
||||
|
||||
build_vs();
|
||||
build_fs();
|
||||
|
||||
program_handle.create().
|
||||
attach(vs).
|
||||
attach(fs).
|
||||
link();
|
||||
|
||||
program_handle.uniforms[0] = GL_STREAM_BUFFER_START + 0;
|
||||
program_handle.uniforms[1] = GL_STREAM_BUFFER_START + 1;
|
||||
|
||||
// Initialize texture bindings
|
||||
int assigned = 0;
|
||||
auto& allocator = texture_pools[1];
|
||||
const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" };
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
for (int j = 0; j < allocator.pools[i].pool_size; ++j)
|
||||
{
|
||||
allocator.pools[i].allocate(assigned++);
|
||||
}
|
||||
|
||||
program_handle.uniforms[type_names[i]] = allocator.pools[i].allocated;
|
||||
}
|
||||
}
|
||||
|
||||
void shader_interpreter::destroy()
|
||||
{
|
||||
program_handle.remove();
|
||||
vs.remove();
|
||||
fs.remove();
|
||||
}
|
||||
|
||||
glsl::program* shader_interpreter::get()
|
||||
{
|
||||
return &program_handle;
|
||||
}
|
||||
|
||||
void shader_interpreter::build_vs()
|
||||
{
|
||||
::glsl::shader_properties properties{};
|
||||
properties.domain = ::glsl::program_domain::glsl_vertex_program;
|
||||
properties.require_lit_emulation = true;
|
||||
|
||||
// TODO: Extend decompiler thread
|
||||
// TODO: Rename decompiler thread, it no longer spawns a thread
|
||||
RSXVertexProgram null_prog;
|
||||
std::string shader_str;
|
||||
ParamArray arr;
|
||||
GLVertexDecompilerThread comp(null_prog, shader_str, arr);
|
||||
|
||||
std::stringstream builder;
|
||||
comp.insertHeader(builder);
|
||||
comp.insertConstants(builder, {});
|
||||
comp.insertInputs(builder, {});
|
||||
comp.insertOutputs(builder, {});
|
||||
|
||||
// Insert vp stream input
|
||||
builder << "\n"
|
||||
"layout(std140, binding = " << GL_INTERPRETER_VERTEX_BLOCK << ") readonly restrict buffer VertexInstructionBlock\n"
|
||||
"{\n"
|
||||
" uint base_address;\n"
|
||||
" uint entry;\n"
|
||||
" uint output_mask;\n"
|
||||
" uint reserved;\n"
|
||||
" uvec4 vp_instructions[];\n"
|
||||
"};\n\n";
|
||||
|
||||
::glsl::insert_glsl_legacy_function(builder, properties);
|
||||
::glsl::insert_vertex_input_fetch(builder, ::glsl::glsl_rules::glsl_rules_opengl4);
|
||||
|
||||
builder << program_common::interpreter::get_vertex_interpreter();
|
||||
const std::string s = builder.str();
|
||||
|
||||
vs.create(glsl::shader::type::vertex);
|
||||
vs.source(s);
|
||||
vs.compile();
|
||||
}
|
||||
|
||||
void shader_interpreter::build_fs()
|
||||
{
|
||||
// Allocate TIUs
|
||||
auto& allocator = texture_pools[1];
|
||||
if (allocator.max_image_units >= 32)
|
||||
{
|
||||
// 16 + 4 + 4 + 4
|
||||
allocator.allocate(4); // 1D
|
||||
allocator.allocate(16); // 2D
|
||||
allocator.allocate(4); // CUBE
|
||||
allocator.allocate(4); // 3D
|
||||
}
|
||||
else if (allocator.max_image_units >= 24)
|
||||
{
|
||||
// 16 + 4 + 2 + 2
|
||||
allocator.allocate(2); // 1D
|
||||
allocator.allocate(16); // 2D
|
||||
allocator.allocate(2); // CUBE
|
||||
allocator.allocate(4); // 3D
|
||||
}
|
||||
else if (allocator.max_image_units >= 16)
|
||||
{
|
||||
// 10 + 2 + 2 + 2
|
||||
allocator.allocate(2); // 1D
|
||||
allocator.allocate(10); // 2D
|
||||
allocator.allocate(2); // CUBE
|
||||
allocator.allocate(2); // 3D
|
||||
}
|
||||
else
|
||||
{
|
||||
// Unusable
|
||||
rsx_log.fatal("Failed to allocate enough TIUs for shader interpreter.");
|
||||
}
|
||||
|
||||
::glsl::shader_properties properties{};
|
||||
properties.domain = ::glsl::program_domain::glsl_fragment_program;
|
||||
properties.require_depth_conversion = true;
|
||||
properties.require_wpos = true;
|
||||
|
||||
u32 len;
|
||||
ParamArray arr;
|
||||
std::string shader_str;
|
||||
RSXFragmentProgram frag;
|
||||
GLFragmentDecompilerThread comp(shader_str, arr, frag, len);
|
||||
|
||||
std::stringstream builder;
|
||||
builder <<
|
||||
"#version 450\n"
|
||||
"#extension GL_ARB_bindless_texture : require\n\n";
|
||||
|
||||
::glsl::insert_subheader_block(builder);
|
||||
comp.insertConstants(builder);
|
||||
|
||||
// Declare custom inputs
|
||||
builder <<
|
||||
"layout(location=1) in vec4 in_regs[15];\n\n";
|
||||
|
||||
const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" };
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
builder << "uniform " << type_names[i] << " " << type_names[i] << "_array[" << allocator.pools[i].pool_size << "];\n";
|
||||
}
|
||||
|
||||
builder << "\n"
|
||||
"#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n"
|
||||
"#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n"
|
||||
"#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n"
|
||||
"#define SAMPLER3D(index) sampler3D_array[texture_handles[index]]\n"
|
||||
"#define SAMPLERCUBE(index) samplerCube_array[texture_handles[index]]\n\n";
|
||||
|
||||
builder <<
|
||||
"layout(std430, binding =" << GL_INTERPRETER_FRAGMENT_BLOCK << ") readonly restrict buffer FragmentInstructionBlock\n"
|
||||
"{\n"
|
||||
" uint shader_control;\n"
|
||||
" uint texture_control;\n"
|
||||
" uint reserved1;\n"
|
||||
" uint reserved2;\n"
|
||||
" uint texture_handles[16];\n"
|
||||
" uvec4 fp_instructions[];\n"
|
||||
"};\n\n";
|
||||
|
||||
::program_common::insert_fog_declaration(builder, "vec4", "fogc", true);
|
||||
|
||||
builder << program_common::interpreter::get_fragment_interpreter();
|
||||
const std::string s = builder.str();
|
||||
|
||||
fs.create(glsl::shader::type::fragment);
|
||||
fs.source(s);
|
||||
fs.compile();
|
||||
}
|
||||
|
||||
void shader_interpreter::update_fragment_textures(
|
||||
const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, 16>& descriptors,
|
||||
u16 reference_mask, u32* out)
|
||||
{
|
||||
if (reference_mask == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Reset allocation
|
||||
auto& allocator = texture_pools[1];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
{
|
||||
allocator.pools[i].num_used = 0;
|
||||
allocator.pools[i].flags = 0;
|
||||
}
|
||||
|
||||
rsx::simple_array<std::pair<int, int>> replacement_map;
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (reference_mask & (1 << i))
|
||||
{
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(descriptors[i].get());
|
||||
verify(HERE), sampler_state;
|
||||
|
||||
int pool_id = static_cast<int>(sampler_state->image_type);
|
||||
auto& pool = allocator.pools[pool_id];
|
||||
|
||||
const int old = pool.allocated[pool.num_used];
|
||||
if (!pool.allocate(i))
|
||||
{
|
||||
rsx_log.error("Could not allocate texture resource for shader interpreter.");
|
||||
break;
|
||||
}
|
||||
|
||||
out[i] = (pool.num_used - 1);
|
||||
if (old != i)
|
||||
{
|
||||
// Check if the candidate target has also been replaced
|
||||
bool found = false;
|
||||
for (auto& e : replacement_map)
|
||||
{
|
||||
if (e.second == old)
|
||||
{
|
||||
// This replacement consumed this 'old' value
|
||||
e.second = i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
replacement_map.push_back({ old, i });
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
out[i] = 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
// Bind TIU locations
|
||||
if (replacement_map.empty()) [[likely]]
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (get_driver_caps().vendor_AMD)
|
||||
{
|
||||
// AMD drivers don't like texture bindings overlapping which means workarounds are needed
|
||||
// Technically this is accurate to spec, but makes efficient usage of shader resources difficult
|
||||
for (unsigned i = 0; i < replacement_map.size(); ++i)
|
||||
{
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
auto& pool = allocator.pools[j];
|
||||
for (int k = pool.num_used; k < pool.pool_size; ++k)
|
||||
{
|
||||
if (pool.allocated[k] == replacement_map[i].second)
|
||||
{
|
||||
pool.allocated[k] = replacement_map[i].first;
|
||||
pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
|
||||
|
||||
// Exit nested loop
|
||||
j = 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (allocator.pools[0].flags) program_handle.uniforms["sampler1D_array"] = allocator.pools[0].allocated;
|
||||
if (allocator.pools[1].flags) program_handle.uniforms["sampler2D_array"] = allocator.pools[1].allocated;
|
||||
if (allocator.pools[2].flags) program_handle.uniforms["samplerCube_array"] = allocator.pools[2].allocated;
|
||||
if (allocator.pools[3].flags) program_handle.uniforms["sampler3D_array"] = allocator.pools[3].allocated;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,15 +1,71 @@
|
|||
#pragma once
|
||||
#include "GLGSRender.h"
|
||||
#pragma once
|
||||
#include "GLHelpers.h"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
class shader_interpreter : glsl::program
|
||||
namespace interpreter
|
||||
{
|
||||
enum class texture_pool_flags
|
||||
{
|
||||
dirty = 1
|
||||
};
|
||||
|
||||
struct texture_pool
|
||||
{
|
||||
int pool_size = 0;
|
||||
int num_used = 0;
|
||||
u32 flags = 0;
|
||||
std::vector<int> allocated;
|
||||
|
||||
bool allocate(int value)
|
||||
{
|
||||
if (num_used >= pool_size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (allocated.size() == num_used)
|
||||
{
|
||||
allocated.push_back(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
allocated[num_used] = value;
|
||||
}
|
||||
|
||||
num_used++;
|
||||
flags |= static_cast<u32>(texture_pool_flags::dirty);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct texture_pool_allocator
|
||||
{
|
||||
int max_image_units = 0;
|
||||
int used = 0;
|
||||
std::vector<texture_pool> pools;
|
||||
|
||||
void create(::gl::glsl::shader::type domain);
|
||||
void allocate(int size);
|
||||
};
|
||||
}
|
||||
|
||||
class shader_interpreter
|
||||
{
|
||||
glsl::shader vs;
|
||||
glsl::shader fs;
|
||||
glsl::program program_handle;
|
||||
interpreter::texture_pool_allocator texture_pools[2];
|
||||
|
||||
void build_vs();
|
||||
void build_fs();
|
||||
|
||||
public:
|
||||
void create();
|
||||
void destroy();
|
||||
|
||||
void update_fragment_textures(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, 16>& descriptors, u16 reference_mask, u32* out);
|
||||
|
||||
glsl::program* get();
|
||||
};
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
|||
OS << "layout(std140, binding = 1) uniform VertexLayoutBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " uint vertex_base_index;\n";
|
||||
OS << " uint vertex_index_offset;\n";
|
||||
OS << " uint vertex_index_offset;\n";
|
||||
OS << " uvec4 input_attributes_blob[16 / 2];\n";
|
||||
OS << "};\n\n";
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#pragma once
|
||||
#pragma once
|
||||
#include "../Common/VertexProgramDecompiler.h"
|
||||
#include "Emu/RSX/RSXVertexProgram.h"
|
||||
|
||||
|
@ -11,8 +11,15 @@ enum
|
|||
GL_VP_SINT_MASK = (GL_VP_ATTRIB_S16_INT|GL_VP_ATTRIB_S32_INT)
|
||||
};
|
||||
|
||||
namespace gl
|
||||
{
|
||||
class shader_interpreter;
|
||||
};
|
||||
|
||||
struct GLVertexDecompilerThread : public VertexProgramDecompiler
|
||||
{
|
||||
friend class gl::shader_interpreter;
|
||||
|
||||
std::string &m_shader;
|
||||
protected:
|
||||
std::string getFloatTypeName(size_t elementCount) override;
|
||||
|
|
|
@ -132,6 +132,7 @@ struct cfg_root : cfg::node
|
|||
cfg::_bool enable_3d{ this, "Enable 3D", false };
|
||||
cfg::_int<1, 8> consecutive_frames_to_draw{ this, "Consecutive Frames To Draw", 1, true};
|
||||
cfg::_int<1, 8> consecutive_frames_to_skip{ this, "Consecutive Frames To Skip", 1, true};
|
||||
cfg::_bool enable_shader_interpreter{ this, "Enable Shader Interpreter", true };
|
||||
cfg::_int<50, 800> resolution_scale_percent{ this, "Resolution Scale", 100 };
|
||||
cfg::_int<0, 16> anisotropic_level_override{ this, "Anisotropic Filter Override", 0, true };
|
||||
cfg::_int<1, 1024> min_scalable_dimension{ this, "Minimum Scalable Dimension", 16 };
|
||||
|
|
|
@ -387,3 +387,18 @@ void fmt_class_string<ppu_decoder_type>::format(std::string& out, u64 arg)
|
|||
return unknown;
|
||||
});
|
||||
}
|
||||
|
||||
void fmt_class_string<shader_interpreter_mode>::format(std::string& out, u64 arg)
|
||||
{
|
||||
format_enum(out, arg, [](shader_interpreter_mode value)
|
||||
{
|
||||
switch (value)
|
||||
{
|
||||
case shader_interpreter_mode::disabled: return "Disabled";
|
||||
case shader_interpreter_mode::enabled: return "Enabled";
|
||||
case shader_interpreter_mode::forced: return "Forced";
|
||||
}
|
||||
|
||||
return unknown;
|
||||
});
|
||||
}
|
||||
|
|
|
@ -436,6 +436,7 @@
|
|||
<ClInclude Include="Emu\Io\pad_config.h" />
|
||||
<ClInclude Include="Emu\Io\pad_config_types.h" />
|
||||
<ClInclude Include="Emu\NP\np_handler.h" />
|
||||
<ClInclude Include="Emu\RSX\Common\ShaderInterpreter.h" />
|
||||
<ClInclude Include="Emu\RSX\Common\texture_cache_helpers.h" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlay_fonts.h" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlay_message_dialog.h" />
|
||||
|
@ -720,6 +721,10 @@
|
|||
<Project>{fdc361c5-7734-493b-8cfb-037308b35122}</Project>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\Common\Interpreter\FragmentInterpreter.glsl" />
|
||||
<None Include="Emu\RSX\Common\Interpreter\VertexInterpreter.glsl" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
|
|
|
@ -75,6 +75,9 @@
|
|||
<Filter Include="Emu\NP">
|
||||
<UniqueIdentifier>{652ce43e-72db-42cd-831a-0e194f67e731}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Emu\GPU\RSX\Common\Interpreter">
|
||||
<UniqueIdentifier>{bc97b324-1eea-445a-8fa9-6fc49e3df47c}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Crypto\aes.cpp">
|
||||
|
@ -868,7 +871,10 @@
|
|||
</ClCompile>
|
||||
<ClCompile Include="util\cereal.cpp">
|
||||
<Filter>Utilities</Filter>
|
||||
</ClCompile>
|
||||
</ClCompile>
|
||||
<ClCompile Include="util\shared_cptr.cpp">
|
||||
<Filter>Utilities</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="util\fixed_typemap.cpp">
|
||||
<Filter>Utilities</Filter>
|
||||
</ClCompile>
|
||||
|
@ -1758,7 +1764,7 @@
|
|||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlay_osk_panel.h">
|
||||
<Filter>Emu\GPU\RSX\Overlays</Filter>
|
||||
</ClInclude>
|
||||
</ClInclude>
|
||||
<ClInclude Include="util\yaml.hpp">
|
||||
<Filter>Utilities</Filter>
|
||||
</ClInclude>
|
||||
|
@ -1782,6 +1788,17 @@
|
|||
</ClInclude>
|
||||
<ClInclude Include="util\typeindices.hpp">
|
||||
<Filter>Utilities</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Common\ShaderInterpreter.h">
|
||||
<Filter>Emu\GPU\RSX\Common</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\Common\Interpreter\FragmentInterpreter.glsl">
|
||||
<Filter>Emu\GPU\RSX\Common\Interpreter</Filter>
|
||||
</None>
|
||||
<None Include="Emu\RSX\Common\Interpreter\VertexInterpreter.glsl">
|
||||
<Filter>Emu\GPU\RSX\Common\Interpreter</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Add table
Reference in a new issue