mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
rsx/fp: Implement register gather (only for UP(X) instructions)
- Workaround for temp register aliasing between H and R variants - TODO: Implement temp regs as 128 bit-blocks with r/w as pack/unpack
This commit is contained in:
parent
44e34064de
commit
fe9090bd39
3 changed files with 131 additions and 2 deletions
|
@ -76,6 +76,9 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask)
|
|||
{
|
||||
AddCode(m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";");
|
||||
}
|
||||
|
||||
u32 reg_index = dst.fp16 ? dst.dest_reg >> 1 : dst.dest_reg;
|
||||
temp_registers[reg_index].tag(dst.dest_reg, !!dst.fp16);
|
||||
}
|
||||
|
||||
void FragmentProgramDecompiler::AddFlowOp(std::string code)
|
||||
|
@ -339,6 +342,30 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
|
|||
switch (src.reg_type)
|
||||
{
|
||||
case RSX_FP_REGISTER_TYPE_TEMP:
|
||||
|
||||
if (!src.fp16)
|
||||
{
|
||||
if (dst.opcode == RSX_FP_OPCODE_UP16 ||
|
||||
dst.opcode == RSX_FP_OPCODE_UP2 ||
|
||||
dst.opcode == RSX_FP_OPCODE_UP4 ||
|
||||
dst.opcode == RSX_FP_OPCODE_UPB ||
|
||||
dst.opcode == RSX_FP_OPCODE_UPG)
|
||||
{
|
||||
//TODO: Implement aliased gather for half floats
|
||||
bool xy_read = false;
|
||||
bool zw_read = false;
|
||||
|
||||
if (src.swizzle_x < 2 || src.swizzle_y < 2 || src.swizzle_z < 2 || src.swizzle_w < 2)
|
||||
xy_read = true;
|
||||
if (src.swizzle_x > 1 || src.swizzle_y > 1 || src.swizzle_z > 1 || src.swizzle_w > 1)
|
||||
zw_read = true;
|
||||
|
||||
auto ® = temp_registers[src.tmp_reg_index];
|
||||
if (reg.requires_gather(xy_read, zw_read))
|
||||
AddCode(reg.gather_r());
|
||||
}
|
||||
}
|
||||
|
||||
ret += AddReg(src.tmp_reg_index, src.fp16);
|
||||
break;
|
||||
|
||||
|
@ -424,6 +451,27 @@ std::string FragmentProgramDecompiler::BuildCode()
|
|||
OS << std::endl;
|
||||
insertOutputs(OS);
|
||||
OS << std::endl;
|
||||
|
||||
//TODO: Better organization for this
|
||||
std::string float2 = getFloatTypeName(2);
|
||||
std::string float4 = getFloatTypeName(4);
|
||||
|
||||
OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n";
|
||||
OS << "{\n";
|
||||
OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n";
|
||||
OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n";
|
||||
OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n";
|
||||
OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n";
|
||||
OS << " return " << float4 << "(x, y, z, w);\n";
|
||||
OS << "}\n\n";
|
||||
|
||||
OS << float2 << " gather(" << float4 << " _h)\n";
|
||||
OS << "{\n";
|
||||
OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n";
|
||||
OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n";
|
||||
OS << " return " << float2 << "(x, y);\n";
|
||||
OS << "}\n\n";
|
||||
|
||||
insertMainStart(OS);
|
||||
OS << main << std::endl;
|
||||
insertMainEnd(OS);
|
||||
|
|
|
@ -19,6 +19,85 @@
|
|||
*/
|
||||
class FragmentProgramDecompiler
|
||||
{
|
||||
struct temp_register
|
||||
{
|
||||
bool aliased_r0 = false;
|
||||
bool aliased_h0 = false;
|
||||
bool aliased_h1 = false;
|
||||
bool last_write_half = false;
|
||||
|
||||
u32 real_index = UINT32_MAX;
|
||||
|
||||
void tag(u32 index, bool half_register)
|
||||
{
|
||||
if (half_register)
|
||||
{
|
||||
last_write_half = true;
|
||||
|
||||
if (index & 1)
|
||||
aliased_h1 = true;
|
||||
else
|
||||
aliased_h0 = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
aliased_r0 = true;
|
||||
last_write_half = false;
|
||||
}
|
||||
|
||||
if (real_index == UINT32_MAX)
|
||||
{
|
||||
if (half_register)
|
||||
real_index = index >> 1;
|
||||
else
|
||||
real_index = index;
|
||||
}
|
||||
}
|
||||
|
||||
bool requires_gather(bool xy, bool zw) const
|
||||
{
|
||||
//Data fetched from the single precision register requires merging of the two half registers
|
||||
//TODO: Check individual swizzle channels
|
||||
if (aliased_h0 && xy || aliased_h1 && zw)
|
||||
return last_write_half;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool requires_split(u32 /*index*/) const
|
||||
{
|
||||
//Data fetched from any of the two half registers requires sync with the full register
|
||||
if (!last_write_half && aliased_r0)
|
||||
{
|
||||
//r0 has been written to
|
||||
//TODO: Check for specific elements in real32 register
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string gather_r()
|
||||
{
|
||||
std::string h0 = "h" + std::to_string(real_index << 1);
|
||||
std::string h1 = "h" + std::to_string(real_index << 1 | 1);
|
||||
std::string reg = "r" + std::to_string(real_index);
|
||||
std::string ret = "//Invalid gather";
|
||||
|
||||
if (aliased_h0 && aliased_h1)
|
||||
ret = reg + " = gather(" + h0 + ", " + h1 + ");";
|
||||
else if (aliased_h0)
|
||||
ret = reg + ".xy = gather(" + h0 + ");";
|
||||
else if (aliased_h1)
|
||||
ret = reg + ".zw = gather(" + h1 + ");";
|
||||
|
||||
last_write_half = false;
|
||||
aliased_h0 = false;
|
||||
aliased_h1 = false;
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
OPDEST dst;
|
||||
SRC0 src0;
|
||||
SRC1 src1;
|
||||
|
@ -35,6 +114,8 @@ class FragmentProgramDecompiler
|
|||
std::vector<u32> m_end_offsets;
|
||||
std::vector<u32> m_else_offsets;
|
||||
|
||||
std::array<temp_register, 24> temp_registers;
|
||||
|
||||
std::string GetMask();
|
||||
|
||||
void SetDst(std::string code, bool append_mask = true);
|
||||
|
|
|
@ -184,12 +184,12 @@ void insert_d3d12_legacy_function(std::ostream& OS, bool is_fragment_program)
|
|||
**/
|
||||
OS << "uint packHalf2x16(float2 val)";
|
||||
OS << "{\n";
|
||||
OS << " return packSnorm2x16(val / 6.1E+5);\n";
|
||||
OS << " return packSnorm2x16(val / 65504.);\n";
|
||||
OS << "}\n\n";
|
||||
|
||||
OS << "float2 unpackHalf2x16(uint val)";
|
||||
OS << "{\n";
|
||||
OS << " return unpackSnorm2x16(val) * 6.1E+5;\n";
|
||||
OS << " return unpackSnorm2x16(val) * 65504.;\n";
|
||||
OS << "}\n\n";
|
||||
|
||||
OS << "float read_value(float4 src, uint remap_index)\n";
|
||||
|
|
Loading…
Add table
Reference in a new issue