Remove OpenGL-specific vector-types

Removes dependency on the OpenGL header and rendering backen for its
`OpenGL::Vector` type in favor of a more standard array.
This commit is contained in:
Wunkolo 2023-07-10 08:55:23 -07:00
parent 2a1683ba62
commit 9e32b6d4bf
5 changed files with 221 additions and 224 deletions
include/PICA

View file

@ -2,14 +2,12 @@
#include <algorithm>
#include <array>
#include <cstring>
#include "helpers.hpp"
#include "opengl.hpp"
#include "PICA/float_types.hpp"
#include "PICA/pica_hash.hpp"
#include "helpers.hpp"
enum class ShaderType {
Vertex, Geometry
};
enum class ShaderType { Vertex, Geometry };
namespace ShaderOpcodes {
enum : u32 {
@ -46,66 +44,66 @@ namespace ShaderOpcodes {
SETEMIT = 0x2B,
JMPC = 0x2C,
JMPU = 0x2D,
CMP1 = 0x2E, // Both of these instructions are CMP
CMP1 = 0x2E, // Both of these instructions are CMP
CMP2 = 0x2F,
MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it
MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it
};
}
// Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
class PICAShader {
using f24 = Floats::f24;
using vec4f = OpenGL::Vector<f24, 4>;
using vec4f = std::array<f24, 4>;
struct Loop {
u32 startingPC; // PC at the start of the loop
u32 endingPC; // PC at the end of the loop
u32 iterations; // How many iterations of the loop to run
u32 increment; // How much to increment the loop counter after each iteration
u32 startingPC; // PC at the start of the loop
u32 endingPC; // PC at the end of the loop
u32 iterations; // How many iterations of the loop to run
u32 increment; // How much to increment the loop counter after each iteration
};
// Info for ifc/ifu stack
struct ConditionalInfo {
u32 endingPC; // PC at the end of the if block (= DST)
u32 newPC; // PC after the if block is done executing (= DST + NUM)
u32 endingPC; // PC at the end of the if block (= DST)
u32 newPC; // PC after the if block is done executing (= DST + NUM)
};
struct CallInfo {
u32 endingPC; // PC at the end of the function
u32 returnPC; // PC to return to after the function ends
u32 endingPC; // PC at the end of the function
u32 returnPC; // PC to return to after the function ends
};
int bufferIndex; // Index of the next instruction to overwrite for shader uploads
int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite
u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range)
u32 floatUniformWordCount = 0; // How many words have we buffered for the current uniform transfer?
bool f32UniformTransfer = false; // Are we transferring an f32 uniform or an f24 uniform?
int bufferIndex; // Index of the next instruction to overwrite for shader uploads
int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite
u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range)
u32 floatUniformWordCount = 0; // How many words have we buffered for the current uniform transfer?
bool f32UniformTransfer = false; // Are we transferring an f32 uniform or an f24 uniform?
std::array<u32, 4> floatUniformBuffer; // Buffer for temporarily caching float uniform data
std::array<u32, 4> floatUniformBuffer; // Buffer for temporarily caching float uniform data
public:
public:
// These are placed close to the temp registers and co because it helps the JIT generate better code
u32 entrypoint = 0; // Initial shader PC
u32 entrypoint = 0; // Initial shader PC
u32 boolUniform;
std::array<OpenGL::Vector<u8, 4>, 4> intUniforms;
std::array<std::array<u8, 4>, 4> intUniforms;
alignas(16) std::array<vec4f, 96> floatUniforms;
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
alignas(16) std::array<vec4f, 16> outputs;
alignas(16) vec4f dummy = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); // Dummy register used by the JIT
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
protected:
protected:
std::array<u32, 128> operandDescriptors;
alignas(16) std::array<vec4f, 16> tempRegisters; // General purpose registers the shader can use for temp values
OpenGL::Vector<s32, 2> addrRegister; // Address register
bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in
alignas(16) std::array<vec4f, 16> tempRegisters; // General purpose registers the shader can use for temp values
std::array<s32, 2> addrRegister; // Address register
bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in
u32 loopCounter;
u32 pc = 0; // Program counter: Index of the next instruction we're going to execute
u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full)
u32 ifIndex = 0; // The index of our IF stack
u32 callIndex = 0; // The index of our CALL stack
u32 pc = 0; // Program counter: Index of the next instruction we're going to execute
u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full)
u32 ifIndex = 0; // The index of our IF stack
u32 callIndex = 0; // The index of our CALL stack
std::array<Loop, 4> loopInfo;
std::array<ConditionalInfo, 8> conditionalInfo;
@ -117,7 +115,7 @@ protected:
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
using Hash = PICAHash::HashType;
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
bool codeHashDirty = false;
@ -130,7 +128,7 @@ protected:
vec4f getSource(u32 source);
vec4f& getDest(u32 dest);
private:
private:
// Interpreter functions for the various shader functions
void add(u32 instruction);
void call(u32 instruction);
@ -171,13 +169,13 @@ private:
bool negate;
using namespace Helpers;
if constexpr (sourceIndex == 1) { // SRC1
if constexpr (sourceIndex == 1) { // SRC1
negate = (getBit<4>(opDescriptor)) != 0;
compSwizzle = getBits<5, 8>(opDescriptor);
} else if constexpr (sourceIndex == 2) { // SRC2
} else if constexpr (sourceIndex == 2) { // SRC2
negate = (getBit<13>(opDescriptor)) != 0;
compSwizzle = getBits<14, 8>(opDescriptor);
} else if constexpr (sourceIndex == 3) { // SRC3
} else if constexpr (sourceIndex == 3) { // SRC3
negate = (getBit<22>(opDescriptor)) != 0;
compSwizzle = getBits<23, 8>(opDescriptor);
}
@ -185,8 +183,8 @@ private:
// Iterate through every component of the swizzled vector in reverse order
// And get which source component's index to match it with
for (int comp = 0; comp < 4; comp++) {
int index = compSwizzle & 3; // Get index for this component
compSwizzle >>= 2; // Move to next component index
int index = compSwizzle & 3; // Get index for this component
compSwizzle >>= 2; // Move to next component index
ret[3 - comp] = source[index];
}
@ -212,39 +210,33 @@ private:
u8 getIndexedSource(u32 source, u32 index);
bool isCondTrue(u32 instruction);
public:
public:
static constexpr size_t maxInstructionCount = 4096;
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
PICAShader(ShaderType type) : type(type) {}
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
void finalize() {
std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32));
}
void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
void setBufferIndex(u32 index) {
bufferIndex = index & 0xfff;
}
void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
void setOpDescriptorIndex(u32 index) {
opDescriptorIndex = index & 0x7f;
}
void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
void uploadWord(u32 word) {
if (bufferIndex >= 4095) Helpers::panic("o no, shader upload overflew");
bufferedShader[bufferIndex++] = word;
bufferIndex &= 0xfff;
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
}
void uploadDescriptor(u32 word) {
operandDescriptors[opDescriptorIndex++] = word;
opDescriptorIndex &= 0x7f;
opdescHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
opdescHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
}
void setFloatUniformIndex(u32 word) {
@ -255,23 +247,22 @@ public:
void uploadFloatUniform(u32 word) {
floatUniformBuffer[floatUniformWordCount++] = word;
if (floatUniformIndex >= 96)
Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
if (floatUniformIndex >= 96) Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) {
vec4f& uniform = floatUniforms[floatUniformIndex++];
floatUniformWordCount = 0;
if (f32UniformTransfer) {
uniform.x() = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
uniform.y() = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
uniform.z() = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
uniform.w() = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
uniform[2] = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
uniform[3] = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
} else {
uniform.x() = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
uniform.y() = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
uniform.z() = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
uniform.w() = f24::fromRaw(floatUniformBuffer[0] >> 8);
uniform[0] = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
uniform[1] = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
}
}
}
@ -280,10 +271,10 @@ public:
using namespace Helpers;
auto& u = intUniforms[index];
u.x() = word & 0xff;
u.y() = getBits<8, 8>(word);
u.z() = getBits<16, 8>(word);
u.w() = getBits<24, 8>(word);
u[0] = word & 0xff;
u[1] = getBits<8, 8>(word);
u[2] = getBits<16, 8>(word);
u[3] = getBits<24, 8>(word);
}
void run();