mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-20 19:44:58 +00:00
gpu: Use spans for memory access
This commit is contained in:
parent
50bf9bf7da
commit
ca22b8ec4a
7 changed files with 55 additions and 63 deletions
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <span>
|
||||
|
||||
#include "PICA/dynapica/shader_rec.hpp"
|
||||
#include "PICA/float_types.hpp"
|
||||
|
@ -64,9 +65,9 @@ class GPU {
|
|||
std::array<u32, 3> fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted
|
||||
|
||||
// Command processor pointers for GPU command lists
|
||||
u32* cmdBuffStart = nullptr;
|
||||
u32* cmdBuffEnd = nullptr;
|
||||
u32* cmdBuffCurr = nullptr;
|
||||
std::span<u32> cmdBuff{};
|
||||
u32 cmdBuffEnd{};
|
||||
u32 cmdBuffCurr{};
|
||||
|
||||
std::unique_ptr<Renderer> renderer;
|
||||
PICA::Vertex getImmediateModeVertex();
|
||||
|
@ -127,19 +128,18 @@ class GPU {
|
|||
}
|
||||
}
|
||||
|
||||
// Get a pointer of type T* to the data starting from physical address paddr
|
||||
// Get a span of type T to the data starting from physical address paddr
|
||||
template <typename T>
|
||||
T* getPointerPhys(u32 paddr) {
|
||||
if (paddr >= PhysicalAddrs::FCRAM && paddr <= PhysicalAddrs::FCRAMEnd) {
|
||||
std::span<T> getPointerPhys(u32 paddr, u32 size) {
|
||||
if (paddr >= PhysicalAddrs::FCRAM && paddr + size <= PhysicalAddrs::FCRAMEnd) {
|
||||
u8* fcram = mem.getFCRAM();
|
||||
u32 index = paddr - PhysicalAddrs::FCRAM;
|
||||
|
||||
return (T*)&fcram[index];
|
||||
} else if (paddr >= PhysicalAddrs::VRAM && paddr <= PhysicalAddrs::VRAMEnd) {
|
||||
return std::span{(T*)&fcram[index], size / sizeof(T)};
|
||||
} else if (paddr >= PhysicalAddrs::VRAM && paddr + size <= PhysicalAddrs::VRAMEnd) {
|
||||
u32 index = paddr - PhysicalAddrs::VRAM;
|
||||
return (T*)&vram[index];
|
||||
return std::span{(T*)&vram[index], size / sizeof(T)};
|
||||
} else [[unlikely]] {
|
||||
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <fstream>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include <span>
|
||||
#include "crypto/aes_engine.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "handles.hpp"
|
||||
|
@ -168,6 +169,11 @@ public:
|
|||
u32 getLinearHeapVaddr();
|
||||
u8* getFCRAM() { return fcram; }
|
||||
|
||||
template <class T>
|
||||
std::span<T> getReadPointer(u32 address, u32 size) {
|
||||
return std::span{reinterpret_cast<T*>(getReadPointer(address), size / sizeof(T))};
|
||||
}
|
||||
|
||||
// Total amount of OS-only FCRAM available (Can vary depending on how much FCRAM the app requests via the cart exheader)
|
||||
u32 totalSysFCRAM() {
|
||||
return FCRAM_SIZE - FCRAM_APPLICATION_SIZE;
|
||||
|
@ -248,4 +254,4 @@ public:
|
|||
|
||||
void setVRAM(u8* pointer) { vram = pointer; }
|
||||
bool allocateMainThreadStack(u32 size);
|
||||
};
|
||||
};
|
||||
|
|
|
@ -40,7 +40,7 @@ struct Texture {
|
|||
|
||||
void allocate();
|
||||
void setNewConfig(u32 newConfig);
|
||||
void decodeTexture(const void* data);
|
||||
void decodeTexture(std::span<const u8> data);
|
||||
void free();
|
||||
u64 sizeInBytes();
|
||||
|
||||
|
@ -61,4 +61,4 @@ struct Texture {
|
|||
// TODO: Make hasAlpha a template parameter
|
||||
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, const void* data);
|
||||
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
|
||||
};
|
||||
};
|
||||
|
|
|
@ -131,12 +131,12 @@ void GPU::drawArrays() {
|
|||
vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg];
|
||||
} else {
|
||||
if (shortIndex) {
|
||||
auto ptr = getPointerPhys<u16>(indexBufferPointer);
|
||||
vertexIndex = *ptr; // TODO: This is very unsafe
|
||||
auto ptr = getPointerPhys<u16>(indexBufferPointer, 2);
|
||||
vertexIndex = ptr[0]; // TODO: This is very unsafe
|
||||
indexBufferPointer += 2;
|
||||
} else {
|
||||
auto ptr = getPointerPhys<u8>(indexBufferPointer);
|
||||
vertexIndex = *ptr; // TODO: This is also very unsafe
|
||||
auto ptr = getPointerPhys<u8>(indexBufferPointer, 1);
|
||||
vertexIndex = ptr[0]; // TODO: This is also very unsafe
|
||||
indexBufferPointer += 1;
|
||||
}
|
||||
}
|
||||
|
@ -194,47 +194,32 @@ void GPU::drawArrays() {
|
|||
vec4f& attribute = currentAttributes[attrCount];
|
||||
uint component; // Current component
|
||||
|
||||
const auto get_attrib = [&]<typename T>(T param) {
|
||||
auto ptr = getPointerPhys<T>(attrAddress, size * sizeof(T));
|
||||
for (component = 0; component < size; component++) {
|
||||
const float val = static_cast<float>(ptr[component]);
|
||||
attribute[component] = f24::fromFloat32(val);
|
||||
}
|
||||
attrAddress += size * sizeof(T);
|
||||
};
|
||||
|
||||
switch (attribType) {
|
||||
case 0: { // Signed byte
|
||||
s8* ptr = getPointerPhys<s8>(attrAddress);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = static_cast<float>(*ptr++);
|
||||
attribute[component] = f24::fromFloat32(val);
|
||||
}
|
||||
attrAddress += size * sizeof(s8);
|
||||
get_attrib(s8{});
|
||||
break;
|
||||
}
|
||||
|
||||
case 1: { // Unsigned byte
|
||||
u8* ptr = getPointerPhys<u8>(attrAddress);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = static_cast<float>(*ptr++);
|
||||
attribute[component] = f24::fromFloat32(val);
|
||||
}
|
||||
attrAddress += size * sizeof(u8);
|
||||
get_attrib(u8{});
|
||||
break;
|
||||
}
|
||||
|
||||
case 2: { // Short
|
||||
s16* ptr = getPointerPhys<s16>(attrAddress);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = static_cast<float>(*ptr++);
|
||||
attribute[component] = f24::fromFloat32(val);
|
||||
}
|
||||
attrAddress += size * sizeof(s16);
|
||||
get_attrib(s16{});
|
||||
break;
|
||||
}
|
||||
|
||||
case 3: { // Float
|
||||
float* ptr = getPointerPhys<float>(attrAddress);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = *ptr++;
|
||||
attribute[component] = f24::fromFloat32(val);
|
||||
}
|
||||
attrAddress += size * sizeof(float);
|
||||
get_attrib(float{});
|
||||
break;
|
||||
}
|
||||
|
||||
default: Helpers::panic("[PICA] Unimplemented attribute type %d", attribType);
|
||||
}
|
||||
|
||||
|
|
|
@ -305,9 +305,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;
|
||||
|
||||
// Set command buffer state to execute the new buffer
|
||||
cmdBuffStart = getPointerPhys<u32>(addr);
|
||||
cmdBuffCurr = cmdBuffStart;
|
||||
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
|
||||
cmdBuff = getPointerPhys<u32>(addr, size);
|
||||
cmdBuffCurr = 0;
|
||||
cmdBuffEnd = cmdBuff.size();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -336,12 +336,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
|
||||
void GPU::startCommandList(u32 addr, u32 size) {
|
||||
cmdBuffStart = static_cast<u32*>(mem.getReadPointer(addr));
|
||||
if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list");
|
||||
cmdBuff = mem.getReadPointer<u32>(addr, size);
|
||||
if (!cmdBuff.data())
|
||||
Helpers::panic("Couldn't get buffer for command list");
|
||||
// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB
|
||||
|
||||
cmdBuffCurr = cmdBuffStart;
|
||||
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
|
||||
cmdBuffCurr = 0;
|
||||
cmdBuffEnd = cmdBuff.size();
|
||||
|
||||
// LUT for converting the parameter mask to an actual 32-bit mask
|
||||
// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
|
||||
|
@ -357,13 +358,13 @@ void GPU::startCommandList(u32 addr, u32 size) {
|
|||
// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
|
||||
// So to check if it is aligned, we get the number of words it's been incremented by
|
||||
// If that number is an odd value then the buffer is not aligned, otherwise it is
|
||||
if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) {
|
||||
if (cmdBuffCurr % 2 != 0) {
|
||||
cmdBuffCurr++;
|
||||
}
|
||||
|
||||
// The first word of a command is the command parameter and the second one is the header
|
||||
u32 param1 = *cmdBuffCurr++;
|
||||
u32 header = *cmdBuffCurr++;
|
||||
const u32 param1 = cmdBuff[cmdBuffCurr++];
|
||||
const u32 header = cmdBuff[cmdBuffCurr++];
|
||||
|
||||
u32 id = header & 0xffff;
|
||||
u32 paramMaskIndex = getBits<16, 4>(header);
|
||||
|
@ -380,8 +381,8 @@ void GPU::startCommandList(u32 addr, u32 size) {
|
|||
writeInternalReg(id, param1, mask);
|
||||
for (u32 i = 0; i < paramCount; i++) {
|
||||
id += idIncrement;
|
||||
u32 param = *cmdBuffCurr++;
|
||||
u32 param = cmdBuff[cmdBuffCurr++];
|
||||
writeInternalReg(id, param, mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -448,7 +448,7 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) {
|
|||
if (buffer.has_value()) {
|
||||
return buffer.value().get().texture;
|
||||
} else {
|
||||
const void* textureData = gpu.getPointerPhys<void*>(tex.location); // Get pointer to the texture data in 3DS memory
|
||||
std::span<u8> textureData = gpu.getPointerPhys<u8>(tex.location, tex.sizeInBytes()); // Get pointer to the texture data in 3DS memory
|
||||
Texture& newTex = textureCache.add(tex);
|
||||
newTex.decodeTexture(textureData);
|
||||
|
||||
|
@ -515,4 +515,4 @@ void RendererGL::screenshot(const std::string& name) {
|
|||
}
|
||||
|
||||
stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -258,18 +258,18 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
}
|
||||
}
|
||||
|
||||
void Texture::decodeTexture(const void* data) {
|
||||
void Texture::decodeTexture(std::span<const u8> data) {
|
||||
std::vector<u32> decoded;
|
||||
decoded.reserve(u64(size.u()) * u64(size.v()));
|
||||
|
||||
// Decode texels line by line
|
||||
for (u32 v = 0; v < size.v(); v++) {
|
||||
for (u32 u = 0; u < size.u(); u++) {
|
||||
u32 colour = decodeTexel(u, v, format, data);
|
||||
u32 colour = decodeTexel(u, v, format, data.data());
|
||||
decoded.push_back(colour);
|
||||
}
|
||||
}
|
||||
|
||||
texture.bind();
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.u(), size.v(), GL_RGBA, GL_UNSIGNED_BYTE, decoded.data());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue