gpu: Use spans for memory access

This commit is contained in:
GPUCode 2023-07-20 14:29:38 +03:00
parent 50bf9bf7da
commit ca22b8ec4a
7 changed files with 55 additions and 63 deletions

View file

@ -1,5 +1,6 @@
#pragma once
#include <array>
#include <span>
#include "PICA/dynapica/shader_rec.hpp"
#include "PICA/float_types.hpp"
@ -64,9 +65,9 @@ class GPU {
std::array<u32, 3> fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted
// Command processor pointers for GPU command lists
u32* cmdBuffStart = nullptr;
u32* cmdBuffEnd = nullptr;
u32* cmdBuffCurr = nullptr;
std::span<u32> cmdBuff{};
u32 cmdBuffEnd{};
u32 cmdBuffCurr{};
std::unique_ptr<Renderer> renderer;
PICA::Vertex getImmediateModeVertex();
@ -127,19 +128,18 @@ class GPU {
}
}
// Get a pointer of type T* to the data starting from physical address paddr
// Get a span of type T to the data starting from physical address paddr
template <typename T>
T* getPointerPhys(u32 paddr) {
if (paddr >= PhysicalAddrs::FCRAM && paddr <= PhysicalAddrs::FCRAMEnd) {
std::span<T> getPointerPhys(u32 paddr, u32 size) {
if (paddr >= PhysicalAddrs::FCRAM && paddr + size <= PhysicalAddrs::FCRAMEnd) {
u8* fcram = mem.getFCRAM();
u32 index = paddr - PhysicalAddrs::FCRAM;
return (T*)&fcram[index];
} else if (paddr >= PhysicalAddrs::VRAM && paddr <= PhysicalAddrs::VRAMEnd) {
return std::span{(T*)&fcram[index], size / sizeof(T)};
} else if (paddr >= PhysicalAddrs::VRAM && paddr + size <= PhysicalAddrs::VRAMEnd) {
u32 index = paddr - PhysicalAddrs::VRAM;
return (T*)&vram[index];
return std::span{(T*)&vram[index], size / sizeof(T)};
} else [[unlikely]] {
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
}
}
};
};

View file

@ -5,6 +5,7 @@
#include <fstream>
#include <optional>
#include <vector>
#include <span>
#include "crypto/aes_engine.hpp"
#include "helpers.hpp"
#include "handles.hpp"
@ -168,6 +169,11 @@ public:
u32 getLinearHeapVaddr();
u8* getFCRAM() { return fcram; }
template <class T>
std::span<T> getReadPointer(u32 address, u32 size) {
return std::span{reinterpret_cast<T*>(getReadPointer(address), size / sizeof(T))};
}
// Total amount of OS-only FCRAM available (Can vary depending on how much FCRAM the app requests via the cart exheader)
u32 totalSysFCRAM() {
return FCRAM_SIZE - FCRAM_APPLICATION_SIZE;
@ -248,4 +254,4 @@ public:
void setVRAM(u8* pointer) { vram = pointer; }
bool allocateMainThreadStack(u32 size);
};
};

View file

@ -40,7 +40,7 @@ struct Texture {
void allocate();
void setNewConfig(u32 newConfig);
void decodeTexture(const void* data);
void decodeTexture(std::span<const u8> data);
void free();
u64 sizeInBytes();
@ -61,4 +61,4 @@ struct Texture {
// TODO: Make hasAlpha a template parameter
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, const void* data);
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
};
};

View file

@ -131,12 +131,12 @@ void GPU::drawArrays() {
vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg];
} else {
if (shortIndex) {
auto ptr = getPointerPhys<u16>(indexBufferPointer);
vertexIndex = *ptr; // TODO: This is very unsafe
auto ptr = getPointerPhys<u16>(indexBufferPointer, 2);
vertexIndex = ptr[0]; // TODO: This is very unsafe
indexBufferPointer += 2;
} else {
auto ptr = getPointerPhys<u8>(indexBufferPointer);
vertexIndex = *ptr; // TODO: This is also very unsafe
auto ptr = getPointerPhys<u8>(indexBufferPointer, 1);
vertexIndex = ptr[0]; // TODO: This is also very unsafe
indexBufferPointer += 1;
}
}
@ -194,47 +194,32 @@ void GPU::drawArrays() {
vec4f& attribute = currentAttributes[attrCount];
uint component; // Current component
const auto get_attrib = [&]<typename T>(T param) {
auto ptr = getPointerPhys<T>(attrAddress, size * sizeof(T));
for (component = 0; component < size; component++) {
const float val = static_cast<float>(ptr[component]);
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(T);
};
switch (attribType) {
case 0: { // Signed byte
s8* ptr = getPointerPhys<s8>(attrAddress);
for (component = 0; component < size; component++) {
float val = static_cast<float>(*ptr++);
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(s8);
get_attrib(s8{});
break;
}
case 1: { // Unsigned byte
u8* ptr = getPointerPhys<u8>(attrAddress);
for (component = 0; component < size; component++) {
float val = static_cast<float>(*ptr++);
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(u8);
get_attrib(u8{});
break;
}
case 2: { // Short
s16* ptr = getPointerPhys<s16>(attrAddress);
for (component = 0; component < size; component++) {
float val = static_cast<float>(*ptr++);
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(s16);
get_attrib(s16{});
break;
}
case 3: { // Float
float* ptr = getPointerPhys<float>(attrAddress);
for (component = 0; component < size; component++) {
float val = *ptr++;
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(float);
get_attrib(float{});
break;
}
default: Helpers::panic("[PICA] Unimplemented attribute type %d", attribType);
}

View file

@ -305,9 +305,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;
// Set command buffer state to execute the new buffer
cmdBuffStart = getPointerPhys<u32>(addr);
cmdBuffCurr = cmdBuffStart;
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
cmdBuff = getPointerPhys<u32>(addr, size);
cmdBuffCurr = 0;
cmdBuffEnd = cmdBuff.size();
}
break;
}
@ -336,12 +336,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
}
void GPU::startCommandList(u32 addr, u32 size) {
cmdBuffStart = static_cast<u32*>(mem.getReadPointer(addr));
if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list");
cmdBuff = mem.getReadPointer<u32>(addr, size);
if (!cmdBuff.data())
Helpers::panic("Couldn't get buffer for command list");
// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB
cmdBuffCurr = cmdBuffStart;
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
cmdBuffCurr = 0;
cmdBuffEnd = cmdBuff.size();
// LUT for converting the parameter mask to an actual 32-bit mask
// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
@ -357,13 +358,13 @@ void GPU::startCommandList(u32 addr, u32 size) {
// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
// So to check if it is aligned, we get the number of words it's been incremented by
// If that number is an odd value then the buffer is not aligned, otherwise it is
if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) {
if (cmdBuffCurr % 2 != 0) {
cmdBuffCurr++;
}
// The first word of a command is the command parameter and the second one is the header
u32 param1 = *cmdBuffCurr++;
u32 header = *cmdBuffCurr++;
const u32 param1 = cmdBuff[cmdBuffCurr++];
const u32 header = cmdBuff[cmdBuffCurr++];
u32 id = header & 0xffff;
u32 paramMaskIndex = getBits<16, 4>(header);
@ -380,8 +381,8 @@ void GPU::startCommandList(u32 addr, u32 size) {
writeInternalReg(id, param1, mask);
for (u32 i = 0; i < paramCount; i++) {
id += idIncrement;
u32 param = *cmdBuffCurr++;
u32 param = cmdBuff[cmdBuffCurr++];
writeInternalReg(id, param, mask);
}
}
}
}

View file

@ -448,7 +448,7 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) {
if (buffer.has_value()) {
return buffer.value().get().texture;
} else {
const void* textureData = gpu.getPointerPhys<void*>(tex.location); // Get pointer to the texture data in 3DS memory
std::span<u8> textureData = gpu.getPointerPhys<u8>(tex.location, tex.sizeInBytes()); // Get pointer to the texture data in 3DS memory
Texture& newTex = textureCache.add(tex);
newTex.decodeTexture(textureData);
@ -515,4 +515,4 @@ void RendererGL::screenshot(const std::string& name) {
}
stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
}
}

View file

@ -258,18 +258,18 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
}
}
void Texture::decodeTexture(const void* data) {
void Texture::decodeTexture(std::span<const u8> data) {
std::vector<u32> decoded;
decoded.reserve(u64(size.u()) * u64(size.v()));
// Decode texels line by line
for (u32 v = 0; v < size.v(); v++) {
for (u32 u = 0; u < size.u(); u++) {
u32 colour = decodeTexel(u, v, format, data);
u32 colour = decodeTexel(u, v, format, data.data());
decoded.push_back(colour);
}
}
texture.bind();
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.u(), size.v(), GL_RGBA, GL_UNSIGNED_BYTE, decoded.data());
}
}