gpu: Use spans for memory access

2025-08-11 18:50:03 +00:00 · 2023-07-20 14:29:38 +03:00 · 2023-07-20 14:29:38 +03:00 · ca22b8ec4a
commit ca22b8ec4a
parent 50bf9bf7da
7 changed files with 55 additions and 63 deletions
--- a/include/PICA/gpu.hpp
+++ b/include/PICA/gpu.hpp
@ -1,5 +1,6 @@
 #pragma once
 #include <array>
+#include <span>

 #include "PICA/dynapica/shader_rec.hpp"
 #include "PICA/float_types.hpp"
@ -64,9 +65,9 @@ class GPU {
 	std::array<u32, 3> fixedAttrBuff;  // Buffer to hold fixed attributes in until they get submitted

 	// Command processor pointers for GPU command lists
-	u32* cmdBuffStart = nullptr;
-	u32* cmdBuffEnd = nullptr;
-	u32* cmdBuffCurr = nullptr;
+	std::span<u32> cmdBuff{};
+	u32 cmdBuffEnd{};
+	u32 cmdBuffCurr{};

 	std::unique_ptr<Renderer> renderer;
 	PICA::Vertex getImmediateModeVertex();
@ -127,19 +128,18 @@ class GPU {
 		}
 	}

-	// Get a pointer of type T* to the data starting from physical address paddr
+	// Get a span of type T to the data starting from physical address paddr
 	template <typename T>
-	T* getPointerPhys(u32 paddr) {
-		if (paddr >= PhysicalAddrs::FCRAM && paddr <= PhysicalAddrs::FCRAMEnd) {
+	std::span<T> getPointerPhys(u32 paddr, u32 size) {
+		if (paddr >= PhysicalAddrs::FCRAM && paddr + size <= PhysicalAddrs::FCRAMEnd) {
 			u8* fcram = mem.getFCRAM();
 			u32 index = paddr - PhysicalAddrs::FCRAM;
-
-			return (T*)&fcram[index];
-		} else if (paddr >= PhysicalAddrs::VRAM && paddr <= PhysicalAddrs::VRAMEnd) {
+			return std::span{(T*)&fcram[index], size / sizeof(T)};
+		} else if (paddr >= PhysicalAddrs::VRAM && paddr + size <= PhysicalAddrs::VRAMEnd) {
 			u32 index = paddr - PhysicalAddrs::VRAM;
-			return (T*)&vram[index];
+			return std::span{(T*)&vram[index], size / sizeof(T)};
 		} else [[unlikely]] {
 			Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
 		}
 	}
-};
+};
--- a/include/memory.hpp
+++ b/include/memory.hpp
@ -5,6 +5,7 @@
 #include <fstream>
 #include <optional>
 #include <vector>
+#include <span>
 #include "crypto/aes_engine.hpp"
 #include "helpers.hpp"
 #include "handles.hpp"
@ -168,6 +169,11 @@ public:
 	u32 getLinearHeapVaddr();
 	u8* getFCRAM() { return fcram; }

+	template <class T>
+	std::span<T> getReadPointer(u32 address, u32 size) {
+		return std::span{reinterpret_cast<T*>(getReadPointer(address), size / sizeof(T))};
+	}
+
 	// Total amount of OS-only FCRAM available (Can vary depending on how much FCRAM the app requests via the cart exheader)
 	u32 totalSysFCRAM() {
 		return FCRAM_SIZE - FCRAM_APPLICATION_SIZE;
@ -248,4 +254,4 @@ public:

 	void setVRAM(u8* pointer) { vram = pointer; }
 	bool allocateMainThreadStack(u32 size);
-};
+};
--- a/include/renderer_gl/textures.hpp
+++ b/include/renderer_gl/textures.hpp
@ -40,7 +40,7 @@ struct Texture {

    void allocate();
    void setNewConfig(u32 newConfig);
-    void decodeTexture(const void* data);
+    void decodeTexture(std::span<const u8> data);
    void free();
    u64 sizeInBytes();

@ -61,4 +61,4 @@ struct Texture {
    // TODO: Make hasAlpha a template parameter
    u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, const void* data);
    u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
-};
+};
--- a/src/core/PICA/gpu.cpp
+++ b/src/core/PICA/gpu.cpp
@ -131,12 +131,12 @@ void GPU::drawArrays() {
 			vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg];
 		} else {
 			if (shortIndex) {
-				auto ptr = getPointerPhys<u16>(indexBufferPointer);
-				vertexIndex = *ptr;  // TODO: This is very unsafe
+				auto ptr = getPointerPhys<u16>(indexBufferPointer, 2);
+				vertexIndex = ptr[0];  // TODO: This is very unsafe
 				indexBufferPointer += 2;
 			} else {
-				auto ptr = getPointerPhys<u8>(indexBufferPointer);
-				vertexIndex = *ptr;  // TODO: This is also very unsafe
+				auto ptr = getPointerPhys<u8>(indexBufferPointer, 1);
+				vertexIndex = ptr[0];  // TODO: This is also very unsafe
 				indexBufferPointer += 1;
 			}
 		}
@ -194,47 +194,32 @@ void GPU::drawArrays() {
 					vec4f& attribute = currentAttributes[attrCount];
 					uint component;  // Current component

+					const auto get_attrib = [&]<typename T>(T param) {
+						auto ptr = getPointerPhys<T>(attrAddress, size * sizeof(T));
+						for (component = 0; component < size; component++) {
+							const float val = static_cast<float>(ptr[component]);
+							attribute[component] = f24::fromFloat32(val);
+						}
+						attrAddress += size * sizeof(T);
+					};
+
 					switch (attribType) {
 						case 0: {  // Signed byte
-							s8* ptr = getPointerPhys<s8>(attrAddress);
-							for (component = 0; component < size; component++) {
-								float val = static_cast<float>(*ptr++);
-								attribute[component] = f24::fromFloat32(val);
-							}
-							attrAddress += size * sizeof(s8);
+							get_attrib(s8{});
 							break;
 						}
-
 						case 1: {  // Unsigned byte
-							u8* ptr = getPointerPhys<u8>(attrAddress);
-							for (component = 0; component < size; component++) {
-								float val = static_cast<float>(*ptr++);
-								attribute[component] = f24::fromFloat32(val);
-							}
-							attrAddress += size * sizeof(u8);
+							get_attrib(u8{});
 							break;
 						}
-
 						case 2: {  // Short
-							s16* ptr = getPointerPhys<s16>(attrAddress);
-							for (component = 0; component < size; component++) {
-								float val = static_cast<float>(*ptr++);
-								attribute[component] = f24::fromFloat32(val);
-							}
-							attrAddress += size * sizeof(s16);
+							get_attrib(s16{});
 							break;
 						}
-
 						case 3: {  // Float
-							float* ptr = getPointerPhys<float>(attrAddress);
-							for (component = 0; component < size; component++) {
-								float val = *ptr++;
-								attribute[component] = f24::fromFloat32(val);
-							}
-							attrAddress += size * sizeof(float);
+							get_attrib(float{});
 							break;
 						}
-
 						default: Helpers::panic("[PICA] Unimplemented attribute type %d", attribType);
 					}

--- a/src/core/PICA/regs.cpp
+++ b/src/core/PICA/regs.cpp
@ -305,9 +305,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 				u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;

 				// Set command buffer state to execute the new buffer
-				cmdBuffStart = getPointerPhys<u32>(addr);
-				cmdBuffCurr = cmdBuffStart;
-				cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
+				cmdBuff = getPointerPhys<u32>(addr, size);
+				cmdBuffCurr = 0;
+				cmdBuffEnd = cmdBuff.size();
 			}
 			break;
 		}
@ -336,12 +336,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 }

 void GPU::startCommandList(u32 addr, u32 size) {
-	cmdBuffStart = static_cast<u32*>(mem.getReadPointer(addr));
-	if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list");
+	cmdBuff = mem.getReadPointer<u32>(addr, size);
+	if (!cmdBuff.data())
+		Helpers::panic("Couldn't get buffer for command list");
 	// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB

-	cmdBuffCurr = cmdBuffStart;
-	cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
+	cmdBuffCurr = 0;
+	cmdBuffEnd = cmdBuff.size();

 	// LUT for converting the parameter mask to an actual 32-bit mask
 	// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
@ -357,13 +358,13 @@ void GPU::startCommandList(u32 addr, u32 size) {
 		// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
 		// So to check if it is aligned, we get the number of words it's been incremented by
 		// If that number is an odd value then the buffer is not aligned, otherwise it is
-		if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) {
+		if (cmdBuffCurr % 2 != 0) {
 			cmdBuffCurr++;
 		}

 		// The first word of a command is the command parameter and the second one is the header
-		u32 param1 = *cmdBuffCurr++;
-		u32 header = *cmdBuffCurr++;
+		const u32 param1 = cmdBuff[cmdBuffCurr++];
+		const u32 header = cmdBuff[cmdBuffCurr++];

 		u32 id = header & 0xffff;
 		u32 paramMaskIndex = getBits<16, 4>(header);
@ -380,8 +381,8 @@ void GPU::startCommandList(u32 addr, u32 size) {
 		writeInternalReg(id, param1, mask);
 		for (u32 i = 0; i < paramCount; i++) {
 			id += idIncrement;
-			u32 param = *cmdBuffCurr++;
+			u32 param = cmdBuff[cmdBuffCurr++];
 			writeInternalReg(id, param, mask);
 		}
 	}
-}
+}
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@ -448,7 +448,7 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) {
 	if (buffer.has_value()) {
 		return buffer.value().get().texture;
 	} else {
-		const void* textureData = gpu.getPointerPhys<void*>(tex.location);  // Get pointer to the texture data in 3DS memory
+		std::span<u8> textureData = gpu.getPointerPhys<u8>(tex.location, tex.sizeInBytes());  // Get pointer to the texture data in 3DS memory
 		Texture& newTex = textureCache.add(tex);
 		newTex.decodeTexture(textureData);

@ -515,4 +515,4 @@ void RendererGL::screenshot(const std::string& name) {
 	}

 	stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
-}
+}
--- a/src/core/renderer_gl/textures.cpp
+++ b/src/core/renderer_gl/textures.cpp
@ -258,18 +258,18 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
    }
 }

-void Texture::decodeTexture(const void* data) {
+void Texture::decodeTexture(std::span<const u8> data) {
    std::vector<u32> decoded;
    decoded.reserve(u64(size.u()) * u64(size.v()));

    // Decode texels line by line
    for (u32 v = 0; v < size.v(); v++) {
        for (u32 u = 0; u < size.u(); u++) {
-            u32 colour = decodeTexel(u, v, format, data);
+            u32 colour = decodeTexel(u, v, format, data.data());
            decoded.push_back(colour);
        }
    }

    texture.bind();
    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.u(), size.v(), GL_RGBA, GL_UNSIGNED_BYTE, decoded.data());
-}
+}