mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-20 19:44:58 +00:00
code: Better screen support
This commit is contained in:
parent
c8f4d41b47
commit
603074dc90
15 changed files with 465 additions and 140 deletions
|
@ -28,6 +28,7 @@ class GPU {
|
|||
static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes
|
||||
static constexpr u32 vramSize = 6_MB;
|
||||
Registers regs; // GPU internal registers
|
||||
std::array<u32, 0x1000> external_regs; // GPU external registers
|
||||
std::array<vec4f, 16> currentAttributes; // Vertex attributes before being passed to the shader
|
||||
|
||||
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
|
||||
|
@ -66,9 +67,9 @@ class GPU {
|
|||
std::array<u32, 3> fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted
|
||||
|
||||
// Command processor pointers for GPU command lists
|
||||
u32* cmdBuffStart = nullptr;
|
||||
u32* cmdBuffEnd = nullptr;
|
||||
u32* cmdBuffCurr = nullptr;
|
||||
std::span<u32> cmdBuffStart{};
|
||||
u32 cmdBuffEnd = 0;
|
||||
u32 cmdBuffCurr = 0;
|
||||
|
||||
Renderer renderer;
|
||||
PICA::Vertex getImmediateModeVertex();
|
||||
|
@ -100,6 +101,9 @@ class GPU {
|
|||
u32 readReg(u32 address);
|
||||
void writeReg(u32 address, u32 value);
|
||||
|
||||
u32 readExternalReg(u32 index);
|
||||
void writeExternalReg(u32 index, u32 value);
|
||||
|
||||
// Used when processing GPU command lists
|
||||
u32 readInternalReg(u32 index);
|
||||
void writeInternalReg(u32 index, u32 value, u32 mask);
|
||||
|
@ -116,6 +120,10 @@ class GPU {
|
|||
renderer.displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags);
|
||||
}
|
||||
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalCopyBytes, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
renderer.textureCopy(inputAddr, outputAddr, totalCopyBytes, inputSize, outputSize, flags);
|
||||
}
|
||||
|
||||
// Read a value of type T from physical address paddr
|
||||
// This is necessary because vertex attribute fetching uses physical addresses
|
||||
template <typename T>
|
||||
|
@ -130,19 +138,18 @@ class GPU {
|
|||
}
|
||||
}
|
||||
|
||||
// Get a pointer of type T* to the data starting from physical address paddr
|
||||
// Get a span with the specified size of type T to the data starting from physical address paddr
|
||||
template <typename T>
|
||||
T* getPointerPhys(u32 paddr) {
|
||||
if (paddr >= PhysicalAddrs::FCRAM && paddr <= PhysicalAddrs::FCRAMEnd) {
|
||||
std::span<T> getSpanPhys(u32 paddr, u32 size) {
|
||||
if (paddr >= PhysicalAddrs::FCRAM && paddr + size <= PhysicalAddrs::FCRAMEnd) {
|
||||
u8* fcram = mem.getFCRAM();
|
||||
u32 index = paddr - PhysicalAddrs::FCRAM;
|
||||
|
||||
return (T*)&fcram[index];
|
||||
} else if (paddr >= PhysicalAddrs::VRAM && paddr <= PhysicalAddrs::VRAMEnd) {
|
||||
return std::span{(T*)&fcram[index], size / sizeof(T)};
|
||||
} else if (paddr >= PhysicalAddrs::VRAM && paddr + size <= PhysicalAddrs::VRAMEnd) {
|
||||
u32 index = paddr - PhysicalAddrs::VRAM;
|
||||
return (T*)&vram[index];
|
||||
return std::span{(T*)&vram[index], size / sizeof(T)};
|
||||
} else [[unlikely]] {
|
||||
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#include <string_view>
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
|
@ -174,6 +175,54 @@ namespace PICA {
|
|||
};
|
||||
}
|
||||
|
||||
namespace ExternalRegs {
|
||||
enum : u32 {
|
||||
MemFill1BufferStartPaddr = 0x3,
|
||||
MemFill1BufferEndPAddr = 0x4,
|
||||
MemFill1Value = 0x5,
|
||||
MemFill1Control = 0x6,
|
||||
MemFill2BufferStartPaddr = 0x7,
|
||||
MemFill2BufferEndPAddr = 0x8,
|
||||
MemFill2Value = 0x9,
|
||||
MemFill2Control = 0xA,
|
||||
VramBankControl = 0xB,
|
||||
GPUBusy = 0xC,
|
||||
BacklightControl = 0xBC,
|
||||
// TODO: Framebuffer regs
|
||||
Framebuffer0Size = 0x2F,
|
||||
Framebuffer0AFirstAddr = 0x119,
|
||||
Framebuffer0ASecondAddr = 0x11A,
|
||||
Framebuffer0Config = 0x11B,
|
||||
Framebuffer0Select = 0x11D,
|
||||
Framebuffer0Stride = 0x123,
|
||||
Framebuffer0BFirstAddr = 0x124,
|
||||
Framebuffer0BSecondAddr = 0x125,
|
||||
Framebuffer1Size = 0x156,
|
||||
Framebuffer1AFirstAddr = 0x159,
|
||||
Framebuffer1ASecondAddr = 0x15A,
|
||||
Framebuffer1Config = 0x15B,
|
||||
Framebuffer1Select = 0x15D,
|
||||
Framebuffer1Stride = 0x163,
|
||||
Framebuffer1BFirstAddr = 0x164,
|
||||
Framebuffer1BSecondAddr = 0x165,
|
||||
TransferInputPAddr = 0x2FF,
|
||||
TransferOutputPAddr = 0x300,
|
||||
DisplayTransferOutputDim = 0x301,
|
||||
DisplayTransferInputDim = 0x302,
|
||||
TransferFlags = 0x303,
|
||||
TransferTrigger = 0x305,
|
||||
TextureCopyTotalBytes = 0x307,
|
||||
TextureCopyInputLineGap = 0x308,
|
||||
TextureCopyOutputLineGap = 0x309,
|
||||
};
|
||||
}
|
||||
|
||||
enum class Scaling : u32 {
|
||||
None = 0,
|
||||
X = 1,
|
||||
XY = 2,
|
||||
};
|
||||
|
||||
namespace Lights {
|
||||
enum : u32 {
|
||||
LUT_D0 = 0,
|
||||
|
@ -235,7 +284,7 @@ namespace PICA {
|
|||
};
|
||||
|
||||
// Returns the string representation of a texture format
|
||||
inline constexpr const char* textureFormatToString(TextureFmt fmt) {
|
||||
constexpr std::string_view textureFormatToString(TextureFmt fmt) {
|
||||
switch (fmt) {
|
||||
case TextureFmt::RGBA8: return "RGBA8";
|
||||
case TextureFmt::RGB8: return "RGB8";
|
||||
|
@ -255,16 +304,16 @@ namespace PICA {
|
|||
}
|
||||
}
|
||||
|
||||
inline constexpr const char* textureFormatToString(ColorFmt fmt) {
|
||||
constexpr std::string_view textureFormatToString(ColorFmt fmt) {
|
||||
return textureFormatToString(static_cast<TextureFmt>(fmt));
|
||||
}
|
||||
|
||||
inline constexpr bool hasStencil(DepthFmt format) { return format == PICA::DepthFmt::Depth24Stencil8; }
|
||||
constexpr bool hasStencil(DepthFmt format) { return format == PICA::DepthFmt::Depth24Stencil8; }
|
||||
|
||||
// Size occupied by each pixel in bytes
|
||||
|
||||
// All formats are 16BPP except for RGBA8 (32BPP) and BGR8 (24BPP)
|
||||
inline constexpr usize sizePerPixel(TextureFmt format) {
|
||||
constexpr usize sizePerPixel(TextureFmt format) {
|
||||
switch (format) {
|
||||
case TextureFmt::RGB8: return 3;
|
||||
case TextureFmt::RGBA8: return 4;
|
||||
|
@ -272,11 +321,11 @@ namespace PICA {
|
|||
}
|
||||
}
|
||||
|
||||
inline constexpr usize sizePerPixel(ColorFmt format) {
|
||||
constexpr usize sizePerPixel(ColorFmt format) {
|
||||
return sizePerPixel(static_cast<TextureFmt>(format));
|
||||
}
|
||||
|
||||
inline constexpr usize sizePerPixel(DepthFmt format) {
|
||||
constexpr usize sizePerPixel(DepthFmt format) {
|
||||
switch (format) {
|
||||
case DepthFmt::Depth16: return 2;
|
||||
case DepthFmt::Depth24: return 3;
|
||||
|
@ -292,4 +341,4 @@ namespace PICA {
|
|||
GeometryPrimitive = 3,
|
||||
};
|
||||
|
||||
} // namespace PICA
|
||||
} // namespace PICA
|
||||
|
|
|
@ -137,4 +137,4 @@ struct GLStateManager {
|
|||
};
|
||||
|
||||
static_assert(std::is_trivially_constructible<GLStateManager>(), "OpenGL State Manager class is not trivially constructible!");
|
||||
static_assert(std::is_trivially_destructible<GLStateManager>(), "OpenGL State Manager class is not trivially destructible!");
|
||||
static_assert(std::is_trivially_destructible<GLStateManager>(), "OpenGL State Manager class is not trivially destructible!");
|
||||
|
|
|
@ -80,6 +80,13 @@ namespace Helpers {
|
|||
}
|
||||
}
|
||||
|
||||
/// Align down an arbitrary-size value to the closect possible multiple of value.
|
||||
template <typename T>
|
||||
static constexpr T alignDown(T value, std::size_t size) {
|
||||
static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
|
||||
return static_cast<T>(value - value % size);
|
||||
}
|
||||
|
||||
/// Sign extend an arbitrary-size value to 32 bits
|
||||
static constexpr u32 inline signExtend32(u32 value, u32 startingSize) {
|
||||
auto temp = (s32)value;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <fstream>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include <span>
|
||||
#include "crypto/aes_engine.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "handles.hpp"
|
||||
|
@ -248,4 +249,4 @@ public:
|
|||
|
||||
void setVRAM(u8* pointer) { vram = pointer; }
|
||||
bool allocateMainThreadStack(u32 size);
|
||||
};
|
||||
};
|
||||
|
|
|
@ -44,8 +44,8 @@ class Renderer {
|
|||
float oldDepthOffset = 0.0;
|
||||
bool oldDepthmapEnable = false;
|
||||
|
||||
SurfaceCache<DepthBuffer, 10, true> depthBufferCache;
|
||||
SurfaceCache<ColourBuffer, 10, true> colourBufferCache;
|
||||
SurfaceCache<DepthBuffer, 64, true> depthBufferCache;
|
||||
SurfaceCache<ColourBuffer, 64, true> colourBufferCache;
|
||||
SurfaceCache<Texture, 256, true> textureCache;
|
||||
|
||||
OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)'
|
||||
|
@ -87,6 +87,7 @@ class Renderer {
|
|||
void getGraphicsContext(); // Set up graphics context for rendering
|
||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 copyBytes, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices); // Draw the given vertices
|
||||
|
||||
// Take a screenshot of the screen and store it in a file
|
||||
|
@ -97,6 +98,8 @@ class Renderer {
|
|||
fbSize.y() = height;
|
||||
}
|
||||
|
||||
ColourBuffer getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height);
|
||||
|
||||
void setColourFormat(PICA::ColorFmt format) { colourBufferFormat = format; }
|
||||
void setDepthFormat(PICA::DepthFmt format) {
|
||||
if (format == PICA::DepthFmt::Unknown1) {
|
||||
|
@ -109,4 +112,4 @@ class Renderer {
|
|||
void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; }
|
||||
|
||||
static constexpr u32 vertexBufferSize = 0x10000;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -19,6 +19,10 @@ struct ColourBuffer {
|
|||
OpenGL::Texture texture;
|
||||
OpenGL::Framebuffer fbo;
|
||||
|
||||
GLenum internalFormat;
|
||||
GLenum fmt;
|
||||
GLenum type;
|
||||
|
||||
ColourBuffer() : valid(false) {}
|
||||
|
||||
ColourBuffer(u32 loc, PICA::ColorFmt format, u32 x, u32 y, bool valid = true)
|
||||
|
@ -29,17 +33,40 @@ struct ColourBuffer {
|
|||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
|
||||
void allocate() {
|
||||
void allocate() {
|
||||
// Internal formats for the texture based on format
|
||||
static constexpr std::array<GLenum, 5> internalFormats = {
|
||||
GL_RGBA8, GL_RGB8, GL_RGB5_A1, GL_RGB565, GL_RGBA4
|
||||
};
|
||||
|
||||
// Format of the texture
|
||||
static constexpr std::array<GLenum, 5> formats = {
|
||||
GL_RGBA, GL_BGR, GL_RGBA, GL_RGB, GL_RGBA,
|
||||
};
|
||||
|
||||
static constexpr std::array<GLenum, 5> types = {
|
||||
GL_UNSIGNED_INT_8_8_8_8, GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT_5_5_5_1,
|
||||
GL_UNSIGNED_SHORT_5_6_5, GL_UNSIGNED_SHORT_4_4_4_4,
|
||||
};
|
||||
|
||||
internalFormat = internalFormats[(int)format];
|
||||
fmt = formats[(int)format];
|
||||
type = types[(int)format];
|
||||
|
||||
|
||||
// Create texture for the FBO, setting up filters and the like
|
||||
// Reading back the current texture is slow, but allocate calls should be few and far between.
|
||||
// If this becomes a bottleneck, we can fix it semi-easily
|
||||
auto prevTexture = OpenGL::getTex2D();
|
||||
texture.create(size.x(), size.y(), GL_RGBA8);
|
||||
texture.create(size.x(), size.y(), internalFormat);
|
||||
texture.bind();
|
||||
texture.setMinFilter(OpenGL::Linear);
|
||||
texture.setMagFilter(OpenGL::Linear);
|
||||
glBindTexture(GL_TEXTURE_2D, prevTexture);
|
||||
|
||||
OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), "Surface: %dx%d %s from %08X to %08X", size.x(),
|
||||
size.y(), textureFormatToString(format).data(),
|
||||
range.lower(), range.upper());
|
||||
//Helpers::panic("Creating FBO: %d, %d\n", size.x(), size.y());
|
||||
|
||||
fbo.createWithDrawTexture(texture);
|
||||
|
@ -144,4 +171,4 @@ struct DepthBuffer {
|
|||
size_t sizeInBytes() {
|
||||
return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
|
|
@ -40,7 +40,7 @@ struct Texture {
|
|||
|
||||
void allocate();
|
||||
void setNewConfig(u32 newConfig);
|
||||
void decodeTexture(const void* data);
|
||||
void decodeTexture(std::span<const u8> data);
|
||||
void free();
|
||||
u64 sizeInBytes();
|
||||
|
||||
|
@ -53,7 +53,7 @@ struct Texture {
|
|||
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
|
||||
|
||||
// Returns the format of this texture as a string
|
||||
std::string formatToString() {
|
||||
std::string_view formatToString() {
|
||||
return PICA::textureFormatToString(format);
|
||||
}
|
||||
|
||||
|
@ -61,4 +61,4 @@ struct Texture {
|
|||
// TODO: Make hasAlpha a template parameter
|
||||
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, const void* data);
|
||||
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
|
||||
};
|
||||
};
|
||||
|
|
|
@ -18,6 +18,24 @@ enum class GPUInterrupt : u8 {
|
|||
DMA = 6
|
||||
};
|
||||
|
||||
struct FramebufferInfo {
|
||||
u32 activeFb;
|
||||
u32 leftFramebufferVaddr;
|
||||
u32 rightFramebufferVaddr;
|
||||
u32 stride;
|
||||
u32 format;
|
||||
u32 displayFb;
|
||||
u32 attribute;
|
||||
};
|
||||
|
||||
struct FrameBufferUpdate {
|
||||
u8 index;
|
||||
u8 dirtyFlag;
|
||||
u16 pad0;
|
||||
std::array<FramebufferInfo, 2> framebufferInfo;
|
||||
u32 pad1;
|
||||
};
|
||||
|
||||
// More circular dependencies
|
||||
class Kernel;
|
||||
|
||||
|
@ -42,6 +60,7 @@ class GPUService {
|
|||
void flushDataCache(u32 messagePointer);
|
||||
void registerInterruptRelayQueue(u32 messagePointer);
|
||||
void setAxiConfigQoSMode(u32 messagePointer);
|
||||
void setBufferSwap(u32 messagePointer);
|
||||
void setInternalPriorities(u32 messagePointer);
|
||||
void setLCDForceBlack(u32 messagePointer);
|
||||
void storeDataCache(u32 messagePointer);
|
||||
|
@ -57,6 +76,8 @@ class GPUService {
|
|||
void triggerTextureCopy(u32* cmd);
|
||||
void flushCacheRegions(u32* cmd);
|
||||
|
||||
void setBufferSwapImpl(u32 screen_id, const FramebufferInfo& info);
|
||||
|
||||
public:
|
||||
GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu),
|
||||
kernel(kernel), currentPID(currentPID) {}
|
||||
|
@ -69,4 +90,4 @@ public:
|
|||
std::memset(ptr, 0, 0x1000);
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
|
|
@ -8,6 +8,12 @@
|
|||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
|
||||
constexpr u32 top_screen_width = 240;
|
||||
constexpr u32 top_screen_height = 400;
|
||||
|
||||
constexpr u32 bottom_screen_width = 240;
|
||||
constexpr u32 bottom_screen_height = 300;
|
||||
|
||||
using namespace Floats;
|
||||
|
||||
// Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it
|
||||
|
@ -41,6 +47,27 @@ void GPU::reset() {
|
|||
e.config2 = 0;
|
||||
}
|
||||
|
||||
// Initialize the framebuffer registers. Values taken from Citra.
|
||||
|
||||
using namespace PICA::ExternalRegs;
|
||||
// Top screen addresses and dimentions.
|
||||
external_regs[Framebuffer0AFirstAddr] = 0x181E6000;
|
||||
external_regs[Framebuffer0ASecondAddr] = 0x1822C800;
|
||||
external_regs[Framebuffer0BFirstAddr] = 0x18273000;
|
||||
external_regs[Framebuffer0BSecondAddr] = 0x182B9800;
|
||||
external_regs[Framebuffer0Size] = (top_screen_height << 16) | top_screen_width;
|
||||
external_regs[Framebuffer0Stride] = 720;
|
||||
external_regs[Framebuffer0Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
|
||||
external_regs[Framebuffer0Select] = 0;
|
||||
|
||||
// Bottom screen addresses and dimentions.
|
||||
external_regs[Framebuffer1AFirstAddr] = 0x1848F000;
|
||||
external_regs[Framebuffer1ASecondAddr] = 0x184C7800;
|
||||
external_regs[Framebuffer1Size] = (bottom_screen_height << 16) | bottom_screen_width;
|
||||
external_regs[Framebuffer1Stride] = 720;
|
||||
external_regs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
|
||||
external_regs[Framebuffer1Select] = 0;
|
||||
|
||||
renderer.reset();
|
||||
}
|
||||
|
||||
|
@ -123,12 +150,12 @@ void GPU::drawArrays() {
|
|||
vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg];
|
||||
} else {
|
||||
if (shortIndex) {
|
||||
auto ptr = getPointerPhys<u16>(indexBufferPointer);
|
||||
vertexIndex = *ptr; // TODO: This is very unsafe
|
||||
auto ptr = getSpanPhys<u16>(indexBufferPointer, sizeof(u16));
|
||||
vertexIndex = ptr[0]; // TODO: This is very unsafe
|
||||
indexBufferPointer += 2;
|
||||
} else {
|
||||
auto ptr = getPointerPhys<u8>(indexBufferPointer);
|
||||
vertexIndex = *ptr; // TODO: This is also very unsafe
|
||||
auto ptr = getSpanPhys<u8>(indexBufferPointer, sizeof(u8));
|
||||
vertexIndex = ptr[0]; // TODO: This is also very unsafe
|
||||
indexBufferPointer += 1;
|
||||
}
|
||||
}
|
||||
|
@ -188,42 +215,46 @@ void GPU::drawArrays() {
|
|||
|
||||
switch (attribType) {
|
||||
case 0: { // Signed byte
|
||||
s8* ptr = getPointerPhys<s8>(attrAddress);
|
||||
const u32 attr_bytes = size * sizeof(s8);
|
||||
const auto ptr = getSpanPhys<s8>(attrAddress, attr_bytes);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = static_cast<float>(*ptr++);
|
||||
float val = static_cast<float>(ptr[component]);
|
||||
attribute[component] = f24::fromFloat32(val);
|
||||
}
|
||||
attrAddress += size * sizeof(s8);
|
||||
attrAddress += attr_bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
case 1: { // Unsigned byte
|
||||
u8* ptr = getPointerPhys<u8>(attrAddress);
|
||||
const u32 attr_bytes = size * sizeof(u8);
|
||||
const auto ptr = getSpanPhys<u8>(attrAddress, attr_bytes);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = static_cast<float>(*ptr++);
|
||||
float val = static_cast<float>(ptr[component]);
|
||||
attribute[component] = f24::fromFloat32(val);
|
||||
}
|
||||
attrAddress += size * sizeof(u8);
|
||||
attrAddress += attr_bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
case 2: { // Short
|
||||
s16* ptr = getPointerPhys<s16>(attrAddress);
|
||||
const u32 attr_bytes = size * sizeof(s16);
|
||||
const auto ptr = getSpanPhys<s16>(attrAddress, attr_bytes);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = static_cast<float>(*ptr++);
|
||||
float val = static_cast<float>(ptr[component]);
|
||||
attribute[component] = f24::fromFloat32(val);
|
||||
}
|
||||
attrAddress += size * sizeof(s16);
|
||||
attrAddress += attr_bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
case 3: { // Float
|
||||
float* ptr = getPointerPhys<float>(attrAddress);
|
||||
const u32 attr_bytes = size * sizeof(float);
|
||||
const auto ptr = getSpanPhys<float>(attrAddress, attr_bytes);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = *ptr++;
|
||||
float val = ptr[component];
|
||||
attribute[component] = f24::fromFloat32(val);
|
||||
}
|
||||
attrAddress += size * sizeof(float);
|
||||
attrAddress += attr_bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,11 +18,36 @@ void GPU::writeReg(u32 address, u32 value) {
|
|||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
const u32 index = (address - 0x1EF01000) / sizeof(u32);
|
||||
writeInternalReg(index, value, 0xffffffff);
|
||||
} else if (address >= 0x1EF00004 && address < 0x1EF01000) {
|
||||
const u32 index = (address - 0x1EF00004) / sizeof(u32);
|
||||
writeExternalReg(index, value);
|
||||
} else {
|
||||
log("Ignoring write to external GPU register %08X. Value: %08X\n", address, value);
|
||||
log("Ignoring write to unknown GPU register %08X. Value: %08X\n", address, value);
|
||||
}
|
||||
}
|
||||
|
||||
u32 GPU::readExternalReg(u32 index) {
|
||||
using namespace PICA::ExternalRegs;
|
||||
|
||||
if (index > 0x1000) [[unlikely]] {
|
||||
Helpers::panic("Tried to read invalid external GPU register. Index: %X\n", index);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return external_regs[index];
|
||||
}
|
||||
|
||||
void GPU::writeExternalReg(u32 index, u32 value) {
|
||||
using namespace PICA::ExternalRegs;
|
||||
|
||||
if (index > 0x1000) [[unlikely]] {
|
||||
Helpers::panic("Tried to write to invalid external GPU register. Index: %X, value: %08X\n", index, value);
|
||||
return;
|
||||
}
|
||||
|
||||
external_regs[index] = value;
|
||||
}
|
||||
|
||||
u32 GPU::readInternalReg(u32 index) {
|
||||
using namespace PICA::InternalRegs;
|
||||
|
||||
|
@ -54,7 +79,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
using namespace PICA::InternalRegs;
|
||||
|
||||
if (index > regNum) [[unlikely]] {
|
||||
Helpers::panic("Tried to write to invalid GPU register. Index: %X, value: %08X\n", index, value);
|
||||
Helpers::panic("Tried to write to invalid internal GPU register. Index: %X, value: %08X\n", index, value);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -275,9 +300,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;
|
||||
|
||||
// Set command buffer state to execute the new buffer
|
||||
cmdBuffStart = getPointerPhys<u32>(addr);
|
||||
cmdBuffCurr = cmdBuffStart;
|
||||
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
|
||||
cmdBuffStart = getSpanPhys<u32>(addr, size);
|
||||
cmdBuffCurr = 0;
|
||||
cmdBuffEnd = (size / sizeof(u32));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -308,12 +333,15 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
|
||||
void GPU::startCommandList(u32 addr, u32 size) {
|
||||
cmdBuffStart = static_cast<u32*>(mem.getReadPointer(addr));
|
||||
if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list");
|
||||
cmdBuffStart = getSpanPhys<u32>(addr, size);
|
||||
if (cmdBuffStart.empty()) {
|
||||
Helpers::panic("Couldn't get buffer for command list");
|
||||
return;
|
||||
}
|
||||
// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB
|
||||
|
||||
cmdBuffCurr = cmdBuffStart;
|
||||
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
|
||||
cmdBuffCurr = 0;
|
||||
cmdBuffEnd = (size / sizeof(u32));
|
||||
|
||||
// LUT for converting the parameter mask to an actual 32-bit mask
|
||||
// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
|
||||
|
@ -329,13 +357,13 @@ void GPU::startCommandList(u32 addr, u32 size) {
|
|||
// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
|
||||
// So to check if it is aligned, we get the number of words it's been incremented by
|
||||
// If that number is an odd value then the buffer is not aligned, otherwise it is
|
||||
if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) {
|
||||
if (cmdBuffCurr % 2 != 0) {
|
||||
cmdBuffCurr++;
|
||||
}
|
||||
|
||||
// The first word of a command is the command parameter and the second one is the header
|
||||
u32 param1 = *cmdBuffCurr++;
|
||||
u32 header = *cmdBuffCurr++;
|
||||
u32 param1 = cmdBuffStart[cmdBuffCurr++];
|
||||
u32 header = cmdBuffStart[cmdBuffCurr++];
|
||||
|
||||
u32 id = header & 0xffff;
|
||||
u32 paramMaskIndex = getBits<16, 4>(header);
|
||||
|
@ -352,8 +380,8 @@ void GPU::startCommandList(u32 addr, u32 size) {
|
|||
writeInternalReg(id, param1, mask);
|
||||
for (u32 i = 0; i < paramCount; i++) {
|
||||
id += idIncrement;
|
||||
u32 param = *cmdBuffCurr++;
|
||||
u32 param = cmdBuffStart[cmdBuffCurr++];
|
||||
writeInternalReg(id, param, mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -453,4 +453,4 @@ u64 Memory::timeSince3DSEpoch() {
|
|||
constexpr u64 offset = 2208988800ull;
|
||||
milliseconds ms = duration_cast<milliseconds>(seconds(rawTime + timezoneDifference + offset));
|
||||
return ms.count();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -576,6 +576,12 @@ const char* displayFragmentShader = R"(
|
|||
}
|
||||
)";
|
||||
|
||||
static void APIENTRY debugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
|
||||
GLsizei length, const GLchar* message, const void* userParam) {
|
||||
Helpers::warn("%d: %s\n", id, message);
|
||||
}
|
||||
|
||||
|
||||
void Renderer::reset() {
|
||||
depthBufferCache.reset();
|
||||
colourBufferCache.reset();
|
||||
|
@ -695,6 +701,11 @@ void Renderer::initGraphicsContext() {
|
|||
OpenGL::clearColor();
|
||||
OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]);
|
||||
|
||||
#if defined(OPENGL_DEBUG_INFO)
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glDebugMessageCallback(debugHandler, nullptr);
|
||||
#endif
|
||||
|
||||
reset();
|
||||
}
|
||||
|
||||
|
@ -919,6 +930,39 @@ constexpr u32 bottomScreenBuffer = 0x1f05dc00;
|
|||
|
||||
void Renderer::display() {
|
||||
gl.disableScissor();
|
||||
gl.disableBlend();
|
||||
gl.disableDepth();
|
||||
gl.disableScissor();
|
||||
gl.setColourMask(true, true, true, true);
|
||||
gl.useProgram(displayProgram);
|
||||
gl.bindVAO(dummyVAO);
|
||||
|
||||
OpenGL::disableClipPlane(0);
|
||||
OpenGL::disableClipPlane(1);
|
||||
|
||||
using namespace PICA::ExternalRegs;
|
||||
const u32 topScreenAddr = gpu.readExternalReg(Framebuffer0AFirstAddr);
|
||||
const u32 bottomScreenAddr = gpu.readExternalReg(Framebuffer1AFirstAddr);
|
||||
|
||||
auto topScreen = colourBufferCache.findFromAddress(topScreenAddr);
|
||||
auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr);
|
||||
Helpers::warn("Top screen addr %08X\n", topScreenAddr);
|
||||
|
||||
screenFramebuffer.bind(OpenGL::DrawFramebuffer);
|
||||
|
||||
// Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture
|
||||
// We consider output gap == 320 to mean bottom, and anything else to mean top
|
||||
if (topScreen) {
|
||||
topScreen->get().texture.bind();
|
||||
OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport
|
||||
OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen
|
||||
}
|
||||
|
||||
if (bottomScreen) {
|
||||
bottomScreen->get().texture.bind();
|
||||
OpenGL::setViewport(40, 0, 320, 240);
|
||||
OpenGL::draw(OpenGL::TriangleStrip, 4);
|
||||
}
|
||||
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
|
||||
screenFramebuffer.bind(OpenGL::ReadFramebuffer);
|
||||
|
@ -986,7 +1030,7 @@ OpenGL::Texture Renderer::getTexture(Texture& tex) {
|
|||
if (buffer.has_value()) {
|
||||
return buffer.value().get().texture;
|
||||
} else {
|
||||
const void* textureData = gpu.getPointerPhys<void*>(tex.location); // Get pointer to the texture data in 3DS memory
|
||||
const std::span textureData = gpu.getSpanPhys<u8>(tex.location, tex.sizeInBytes()); // Get pointer to the texture data in 3DS memory
|
||||
Texture& newTex = textureCache.add(tex);
|
||||
newTex.decodeTexture(textureData);
|
||||
|
||||
|
@ -994,40 +1038,86 @@ OpenGL::Texture Renderer::getTexture(Texture& tex) {
|
|||
}
|
||||
}
|
||||
|
||||
// NOTE: The GPU format has RGB5551 and RGB655 swapped compared to internal regs format
|
||||
PICA::ColorFmt ToColorFmt(u32 format) {
|
||||
switch (format) {
|
||||
case 2: return PICA::ColorFmt::RGB565;
|
||||
case 3: return PICA::ColorFmt::RGBA5551;
|
||||
default: return static_cast<PICA::ColorFmt>(format);
|
||||
}
|
||||
}
|
||||
|
||||
void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
const u32 inputWidth = inputSize & 0xffff;
|
||||
const u32 inputGap = inputSize >> 16;
|
||||
const u32 inputHeight = inputSize >> 16;
|
||||
const auto inputFormat = ToColorFmt(Helpers::getBits<8, 3>(flags));
|
||||
const auto outputFormat = ToColorFmt(Helpers::getBits<12, 3>(flags));
|
||||
const PICA::Scaling scaling = static_cast<PICA::Scaling>(Helpers::getBits<24, 2>(flags));
|
||||
|
||||
const u32 outputWidth = outputSize & 0xffff;
|
||||
const u32 outputGap = outputSize >> 16;
|
||||
|
||||
auto framebuffer = colourBufferCache.findFromAddress(inputAddr);
|
||||
// If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0
|
||||
// Displays are hard I really don't want to try implementing them because getting a fast solution is terrible
|
||||
OpenGL::Texture& tex = framebuffer.has_value() ? framebuffer.value().get().texture : colourBufferCache[0].texture;
|
||||
|
||||
tex.bind();
|
||||
screenFramebuffer.bind(OpenGL::DrawFramebuffer);
|
||||
|
||||
gl.disableBlend();
|
||||
gl.disableDepth();
|
||||
gl.disableScissor();
|
||||
gl.setColourMask(true, true, true, true);
|
||||
gl.useProgram(displayProgram);
|
||||
gl.bindVAO(dummyVAO);
|
||||
|
||||
OpenGL::disableClipPlane(0);
|
||||
OpenGL::disableClipPlane(1);
|
||||
|
||||
// Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture
|
||||
// We consider output gap == 320 to mean bottom, and anything else to mean top
|
||||
if (outputGap == 320) {
|
||||
OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport
|
||||
} else {
|
||||
OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport
|
||||
u32 outputWidth = outputSize & 0xffff;
|
||||
if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) {
|
||||
outputWidth >>= 1;
|
||||
}
|
||||
u32 outputHeight = outputSize >> 16;
|
||||
if (scaling == PICA::Scaling::XY) {
|
||||
outputHeight >>= 1;
|
||||
}
|
||||
|
||||
OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen
|
||||
// If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0
|
||||
// Displays are hard I really don't want to try implementing them because getting a fast solution is terrible
|
||||
auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight);
|
||||
auto dstFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight);
|
||||
|
||||
Helpers::warn("Display transfer with outputAddr %08X\n", outputAddr);
|
||||
|
||||
// Blit the framebuffers
|
||||
srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer);
|
||||
dstFramebuffer.fbo.bind(OpenGL::DrawFramebuffer);
|
||||
glBlitFramebuffer(0, 0, inputWidth, inputHeight, 0, 0, outputWidth, outputHeight, GL_COLOR_BUFFER_BIT, GL_LINEAR);
|
||||
}
|
||||
|
||||
void Renderer::textureCopy(u32 inputAddr, u32 outputAddr, u32 copyBytes, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
copyBytes = Helpers::alignDown(copyBytes, 16);
|
||||
if (copyBytes == 0) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 inputWidth = (inputSize & 0xffff) * 16;
|
||||
const u32 inputGap = (inputSize >> 16) * 16;
|
||||
const u32 outputWidth = (outputSize & 0xffff) * 16;
|
||||
const u32 outputGap = (outputSize >> 16) * 16;
|
||||
|
||||
if (inputGap != 0 || inputWidth != outputWidth) {
|
||||
Helpers::warn("Texture copy with non zero input gap or mismatching widths, cannot be accelerated");
|
||||
return;
|
||||
}
|
||||
|
||||
// If the texture is tiled, apps set inputWidth to the scanline size which is width * 8.
|
||||
// HACK: We don't know if the src texture is tiled or not yet, assume it is for now, because it's the most common case.
|
||||
// Citra handles this by letting the width/stride be set as bytes and interpreting it differently
|
||||
// depending on the candidate surface.
|
||||
auto srcFramebuffer = getColourBuffer(inputAddr, PICA::ColorFmt::RGBA8, inputWidth / 8, copyBytes / inputWidth); // HACK: Assume RGBA8 format
|
||||
auto dstFramebuffer = getColourBuffer(outputAddr, srcFramebuffer.format, outputWidth / 8, copyBytes / outputWidth);
|
||||
|
||||
// Blit the framebuffers
|
||||
srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer);
|
||||
dstFramebuffer.fbo.bind(OpenGL::DrawFramebuffer);
|
||||
glBlitFramebuffer(0, 0, srcFramebuffer.size.x(), srcFramebuffer.size.y(),
|
||||
0, 0, dstFramebuffer.size.x(), dstFramebuffer.size.y(), GL_COLOR_BUFFER_BIT, GL_LINEAR);
|
||||
}
|
||||
|
||||
ColourBuffer Renderer::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height) {
|
||||
// Try to find an already existing buffer that contains the provided address
|
||||
// This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to
|
||||
// subrect of a surface and in case of texcopy we don't know the format of the surface.
|
||||
auto buffer = colourBufferCache.findFromAddress(addr);
|
||||
if (buffer.has_value()) {
|
||||
return buffer.value().get();
|
||||
}
|
||||
|
||||
// Otherwise create and cache a new buffer.
|
||||
ColourBuffer sampleBuffer(addr, format, width, height);
|
||||
return colourBufferCache.add(sampleBuffer);
|
||||
}
|
||||
|
||||
void Renderer::screenshot(const std::string& name) {
|
||||
|
@ -1053,4 +1143,4 @@ void Renderer::screenshot(const std::string& name) {
|
|||
}
|
||||
|
||||
stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -258,18 +258,18 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
}
|
||||
}
|
||||
|
||||
void Texture::decodeTexture(const void* data) {
|
||||
void Texture::decodeTexture(std::span<const u8> data) {
|
||||
std::vector<u32> decoded;
|
||||
decoded.reserve(u64(size.u()) * u64(size.v()));
|
||||
|
||||
// Decode texels line by line
|
||||
for (u32 v = 0; v < size.v(); v++) {
|
||||
for (u32 u = 0; u < size.u(); u++) {
|
||||
u32 colour = decodeTexel(u, v, format, data);
|
||||
u32 colour = decodeTexel(u, v, format, data.data());
|
||||
decoded.push_back(colour);
|
||||
}
|
||||
}
|
||||
|
||||
texture.bind();
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.u(), size.v(), GL_RGBA, GL_UNSIGNED_BYTE, decoded.data());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "services/gsp_gpu.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "ipc.hpp"
|
||||
#include "kernel.hpp"
|
||||
|
||||
|
@ -10,6 +11,7 @@ namespace ServiceCommands {
|
|||
RegisterInterruptRelayQueue = 0x00130042,
|
||||
WriteHwRegs = 0x00010082,
|
||||
WriteHwRegsWithMask = 0x00020084,
|
||||
SetBufferSwap = 0x00050200,
|
||||
FlushDataCache = 0x00080082,
|
||||
SetLCDForceBlack = 0x000B0040,
|
||||
TriggerCmdReqQueue = 0x000C0000,
|
||||
|
@ -19,15 +21,35 @@ namespace ServiceCommands {
|
|||
}
|
||||
|
||||
// Commands written to shared memory and processed by TriggerCmdReqQueue
|
||||
namespace GXCommands {
|
||||
enum : u32 {
|
||||
TriggerDMARequest = 0,
|
||||
ProcessCommandList = 1,
|
||||
MemoryFill = 2,
|
||||
TriggerDisplayTransfer = 3,
|
||||
TriggerTextureCopy = 4,
|
||||
FlushCacheRegions = 5
|
||||
};
|
||||
enum class GXCommands : u32 {
|
||||
TriggerDMARequest = 0,
|
||||
ProcessCommandList = 1,
|
||||
MemoryFill = 2,
|
||||
TriggerDisplayTransfer = 3,
|
||||
TriggerTextureCopy = 4,
|
||||
FlushCacheRegions = 5
|
||||
};
|
||||
|
||||
static u32 VaddrToPaddr(u32 addr) {
|
||||
if (addr >= VirtualAddrs::VramStart && addr < (VirtualAddrs::VramStart + VirtualAddrs::VramSize)) [[likely]] {
|
||||
return addr - VirtualAddrs::VramStart + PhysicalAddrs::VRAM;
|
||||
}
|
||||
|
||||
else if (addr >= VirtualAddrs::LinearHeapStartOld && addr < VirtualAddrs::LinearHeapEndOld) {
|
||||
return addr - VirtualAddrs::LinearHeapStartOld + PhysicalAddrs::FCRAM;
|
||||
}
|
||||
|
||||
else if (addr >= VirtualAddrs::LinearHeapStartNew && addr < VirtualAddrs::LinearHeapEndNew) {
|
||||
return addr - VirtualAddrs::LinearHeapStartNew + PhysicalAddrs::FCRAM;
|
||||
}
|
||||
|
||||
else if (addr == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
Helpers::warn("[GSP::GPU VaddrToPaddr] Unknown virtual address %08X", addr);
|
||||
// Obviously garbage address
|
||||
return 0xF3310932;
|
||||
}
|
||||
|
||||
void GPUService::reset() {
|
||||
|
@ -43,13 +65,14 @@ void GPUService::handleSyncRequest(u32 messagePointer) {
|
|||
case ServiceCommands::FlushDataCache: flushDataCache(messagePointer); break;
|
||||
case ServiceCommands::RegisterInterruptRelayQueue: registerInterruptRelayQueue(messagePointer); break;
|
||||
case ServiceCommands::SetAxiConfigQoSMode: setAxiConfigQoSMode(messagePointer); break;
|
||||
case ServiceCommands::SetBufferSwap: setBufferSwap(messagePointer); break;
|
||||
case ServiceCommands::SetInternalPriorities: setInternalPriorities(messagePointer); break;
|
||||
case ServiceCommands::SetLCDForceBlack: setLCDForceBlack(messagePointer); break;
|
||||
case ServiceCommands::StoreDataCache: storeDataCache(messagePointer); break;
|
||||
case ServiceCommands::TriggerCmdReqQueue: [[likely]] triggerCmdReqQueue(messagePointer); break;
|
||||
case ServiceCommands::WriteHwRegs: writeHwRegs(messagePointer); break;
|
||||
case ServiceCommands::WriteHwRegsWithMask: writeHwRegsWithMask(messagePointer); break;
|
||||
; default: Helpers::panic("GPU service requested. Command: %08X\n", command);
|
||||
default: Helpers::panic("GPU service requested. Command: %08X\n", command);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,15 +145,12 @@ void GPUService::requestInterrupt(GPUInterrupt type) {
|
|||
// Not emulating this causes Yoshi's Wooly World, Captain Toad, Metroid 2 et al to hang
|
||||
if (type == GPUInterrupt::VBlank0 || type == GPUInterrupt::VBlank1) {
|
||||
int screen = static_cast<u32>(type) - static_cast<u32>(GPUInterrupt::VBlank0); // 0 for top screen, 1 for bottom
|
||||
|
||||
constexpr u32 FBInfoSize = 0x40;
|
||||
// TODO: Offset depends on GSP thread being triggered
|
||||
u8* info = &sharedMem[0x200 + screen * FBInfoSize];
|
||||
u8& dirtyFlag = info[1];
|
||||
FrameBufferUpdate* update = reinterpret_cast<FrameBufferUpdate*>(&sharedMem[0x200 + screen * sizeof(FrameBufferUpdate)]);
|
||||
|
||||
if (dirtyFlag & 1) {
|
||||
// TODO: Submit buffer info here
|
||||
dirtyFlag &= ~1;
|
||||
if (update->dirtyFlag & 1) {
|
||||
setBufferSwapImpl(screen, update->framebufferInfo[update->index]);
|
||||
update->dirtyFlag &= ~1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -259,6 +279,18 @@ void GPUService::setAxiConfigQoSMode(u32 messagePointer) {
|
|||
mem.write32(messagePointer + 4, Result::Success);
|
||||
}
|
||||
|
||||
void GPUService::setBufferSwap(u32 messagePointer) {
|
||||
FramebufferInfo info{};
|
||||
const u32 screenId = mem.read32(messagePointer + 4); // Selects either PDC0 or PDC1
|
||||
info.activeFb = mem.read32(messagePointer + 8);
|
||||
info.leftFramebufferVaddr = mem.read32(messagePointer + 12);
|
||||
info.rightFramebufferVaddr = mem.read32(messagePointer + 16);
|
||||
info.stride = mem.read32(messagePointer + 20);
|
||||
info.format = mem.read32(messagePointer + 24);
|
||||
info.displayFb = mem.read32(messagePointer + 28); // Selects either framebuffer A or B
|
||||
setBufferSwapImpl(screenId, info);
|
||||
}
|
||||
|
||||
// Seems to also be completely undocumented
|
||||
void GPUService::setInternalPriorities(u32 messagePointer) {
|
||||
log("GSP::GPU::SetInternalPriorities\n");
|
||||
|
@ -281,7 +313,7 @@ void GPUService::processCommandBuffer() {
|
|||
log("Processing %d GPU commands\n", commandsLeft);
|
||||
|
||||
while (commandsLeft != 0) {
|
||||
u32 cmdID = cmd[0] & 0xff;
|
||||
const GXCommands cmdID = static_cast<GXCommands>(cmd[0] & 0xff);
|
||||
switch (cmdID) {
|
||||
case GXCommands::ProcessCommandList: processCommandList(cmd); break;
|
||||
case GXCommands::MemoryFill: memoryFill(cmd); break;
|
||||
|
@ -324,28 +356,6 @@ void GPUService::memoryFill(u32* cmd) {
|
|||
}
|
||||
}
|
||||
|
||||
static u32 VaddrToPaddr(u32 addr) {
|
||||
if (addr >= VirtualAddrs::VramStart && addr < (VirtualAddrs::VramStart + VirtualAddrs::VramSize)) [[likely]] {
|
||||
return addr - VirtualAddrs::VramStart + PhysicalAddrs::VRAM;
|
||||
}
|
||||
|
||||
else if (addr >= VirtualAddrs::LinearHeapStartOld && addr < VirtualAddrs::LinearHeapEndOld) {
|
||||
return addr - VirtualAddrs::LinearHeapStartOld + PhysicalAddrs::FCRAM;
|
||||
}
|
||||
|
||||
else if (addr >= VirtualAddrs::LinearHeapStartNew && addr < VirtualAddrs::LinearHeapEndNew) {
|
||||
return addr - VirtualAddrs::LinearHeapStartNew + PhysicalAddrs::FCRAM;
|
||||
}
|
||||
|
||||
else if (addr == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
Helpers::warn("[GSP::GPU VaddrToPaddr] Unknown virtual address %08X", addr);
|
||||
// Obviously garbage address
|
||||
return 0xF3310932;
|
||||
}
|
||||
|
||||
void GPUService::triggerDisplayTransfer(u32* cmd) {
|
||||
const u32 inputAddr = VaddrToPaddr(cmd[1]);
|
||||
const u32 outputAddr = VaddrToPaddr(cmd[2]);
|
||||
|
@ -373,23 +383,74 @@ void GPUService::flushCacheRegions(u32* cmd) {
|
|||
log("GSP::GPU::FlushCacheRegions (Stubbed)\n");
|
||||
}
|
||||
|
||||
void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) {
|
||||
using namespace PICA::ExternalRegs;
|
||||
|
||||
constexpr static std::array<u32, 8> fb_addresses = {
|
||||
Framebuffer0AFirstAddr,
|
||||
Framebuffer0ASecondAddr,
|
||||
Framebuffer0BFirstAddr,
|
||||
Framebuffer0BSecondAddr,
|
||||
Framebuffer1AFirstAddr,
|
||||
Framebuffer1ASecondAddr,
|
||||
Framebuffer1BFirstAddr,
|
||||
Framebuffer1BSecondAddr,
|
||||
};
|
||||
|
||||
const u32 fb_index = screenId * 4 + info.activeFb * 2;
|
||||
gpu.writeExternalReg(fb_addresses[fb_index], VaddrToPaddr(info.leftFramebufferVaddr));
|
||||
gpu.writeExternalReg(fb_addresses[fb_index + 1], VaddrToPaddr(info.rightFramebufferVaddr));
|
||||
|
||||
constexpr static std::array<u32, 6> config_addresses = {
|
||||
Framebuffer0Config,
|
||||
Framebuffer0Select,
|
||||
Framebuffer0Stride,
|
||||
Framebuffer1Config,
|
||||
Framebuffer1Select,
|
||||
Framebuffer1Stride,
|
||||
};
|
||||
|
||||
const u32 config_index = screenId * 3;
|
||||
gpu.writeExternalReg(config_addresses[config_index], info.format);
|
||||
gpu.writeExternalReg(config_addresses[config_index + 1], info.displayFb);
|
||||
gpu.writeExternalReg(config_addresses[config_index + 2], info.stride);
|
||||
}
|
||||
|
||||
// Actually send command list (aka display list) to GPU
|
||||
void GPUService::processCommandList(u32* cmd) {
|
||||
const u32 address = cmd[1] & ~7; // Buffer address
|
||||
const u32 size = cmd[2] & ~3; // Buffer size in bytes
|
||||
const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update)
|
||||
const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush)
|
||||
[[maybe_unused]] const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update)
|
||||
[[maybe_unused]] const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush)
|
||||
|
||||
log("GPU::GSP::processCommandList. Address: %08X, size in bytes: %08X\n", address, size);
|
||||
gpu.startCommandList(address, size);
|
||||
gpu.startCommandList(VaddrToPaddr(address), size);
|
||||
requestInterrupt(GPUInterrupt::P3D); // Send an IRQ when command list processing is over
|
||||
}
|
||||
|
||||
// TODO: Emulate the transfer engine & its registers
|
||||
// Then this can be emulated by just writing the appropriate values there
|
||||
void GPUService::triggerTextureCopy(u32* cmd) {
|
||||
Helpers::warn("GSP::GPU::TriggerTextureCopy (unimplemented)\n");
|
||||
const u32 inputBufferAddr = VaddrToPaddr(cmd[1]);
|
||||
const u32 outputBufferAddr = VaddrToPaddr(cmd[2]);
|
||||
const u32 totalCopyBytes = cmd[3];
|
||||
const u32 inputWidthGap = cmd[4];
|
||||
const u32 outputWidthGap = cmd[5];
|
||||
const u32 flags = cmd[6];
|
||||
|
||||
// Write the trigger register
|
||||
using namespace PICA::ExternalRegs;
|
||||
gpu.writeExternalReg(TransferTrigger, gpu.readExternalReg(TransferTrigger) | 1);
|
||||
|
||||
// Perform the texture copy
|
||||
gpu.textureCopy(inputBufferAddr, outputBufferAddr, totalCopyBytes, inputWidthGap, outputWidthGap, flags);
|
||||
|
||||
// This uses the transfer engine and thus needs to fire a PPF interrupt.
|
||||
// NSMB2 relies on this
|
||||
requestInterrupt(GPUInterrupt::PPF);
|
||||
}
|
||||
|
||||
// Writing to trigger will perform the texture copy.
|
||||
// Reset the bit here to singal completion.
|
||||
gpu.writeExternalReg(TransferTrigger, gpu.readExternalReg(TransferTrigger) & ~1);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue