From bac3a8e0404121f7a8b2dae1ac9b455c44db44bf Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 2 Jul 2024 13:25:05 +0200 Subject: [PATCH] add: texture cache --- CMakeLists.txt | 1 + include/renderer_gl/surface_cache.hpp | 2 - include/renderer_mtl/renderer_mtl.hpp | 6 + include/renderer_mtl/texture.hpp | 72 +++++++ src/core/renderer_mtl/renderer_mtl.cpp | 4 + src/core/renderer_mtl/texture.cpp | 255 +++++++++++++++++++++++++ 6 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 include/renderer_mtl/texture.hpp create mode 100644 src/core/renderer_mtl/texture.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 30cb57ed..c5745d02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -407,6 +407,7 @@ if(ENABLE_METAL AND APPLE) set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp src/core/renderer_mtl/renderer_mtl.cpp + src/core/renderer_mtl/texture.cpp src/host_shaders/metal_shaders.metal ) diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 5323741f..fb7c71a5 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -19,8 +19,6 @@ template class SurfaceCache { // Vanilla std::optional can't hold actual references using OptionalRef = std::optional>; - static_assert(std::is_same() || std::is_same() || - std::is_same(), "Invalid surface type"); size_t size; size_t evictionIndex; diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index c33df63e..892eedc1 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -2,6 +2,9 @@ #include #include "renderer.hpp" +#include "texture.hpp" +// HACK: use the OpenGL cache +#include "../renderer_gl/surface_cache.hpp" class GPU; @@ -30,6 +33,9 @@ class RendererMTL final : public Renderer { MTL::Device* device; MTL::CommandQueue* commandQueue; + // Caches + SurfaceCache textureCache; + // HACK MTL::Texture* topScreenTexture; diff --git a/include/renderer_mtl/texture.hpp b/include/renderer_mtl/texture.hpp new file mode 100644 index 00000000..44ec61fa --- /dev/null +++ b/include/renderer_mtl/texture.hpp @@ -0,0 +1,72 @@ +#pragma once +#include +#include +#include +#include "PICA/regs.hpp" +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "opengl.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + +struct Texture { + MTL::Device* device; + + u32 location; + u32 config; // Magnification/minification filter, wrapping configs, etc + PICA::TextureFmt format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + MTL::Texture* texture = nullptr; + + Texture() : valid(false) {} + + Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(Texture& other) { + return location == other.location && format == other.format && + size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate(); + void setNewConfig(u32 newConfig); + void decodeTexture(std::span data); + void free(); + u64 sizeInBytes(); + + u32 decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + + // Get the morton interleave offset of a texel based on its U and V values + static u32 mortonInterleave(u32 u, u32 v); + // Get the byte offset of texel (u, v) in the texture + static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); + static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); + + // Returns the format of this texture as a string + std::string_view formatToString() { + return PICA::textureFormatToString(format); + } + + // Returns the texel at coordinates (u, v) of an ETC1(A4) texture + // TODO: Make hasAlpha a template parameter + u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); + u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); +}; + +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 6a56eec0..dbf34881 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -17,6 +17,8 @@ RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, RendererMTL::~RendererMTL() {} void RendererMTL::reset() { + textureCache.reset(); + // TODO: implement Helpers::warn("RendererMTL::reset not implemented"); } @@ -219,6 +221,8 @@ void RendererMTL::screenshot(const std::string& name) { } void RendererMTL::deinitGraphicsContext() { + textureCache.reset(); + // TODO: implement Helpers::warn("RendererMTL::deinitGraphicsContext not implemented"); } diff --git a/src/core/renderer_mtl/texture.cpp b/src/core/renderer_mtl/texture.cpp new file mode 100644 index 00000000..a556263c --- /dev/null +++ b/src/core/renderer_mtl/texture.cpp @@ -0,0 +1,255 @@ +#include "renderer_mtl/texture.hpp" +#include "colour.hpp" +#include + +using namespace Helpers; + +namespace Metal { + +void Texture::allocate() { + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(MTL::PixelFormatRGBA8Unorm); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? + texture = device->newTexture(descriptor); + + setNewConfig(config); +} + +// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on +void Texture::setNewConfig(u32 cfg) { + config = cfg; + + // TODO: implement this +} + +void Texture::free() { + valid = false; + + if (texture) { + texture->release(); + } +} + +u64 Texture::sizeInBytes() { + u64 pixelCount = u64(size.x()) * u64(size.y()); + + switch (format) { + case PICA::TextureFmt::RGBA8: // 4 bytes per pixel + return pixelCount * 4; + + case PICA::TextureFmt::RGB8: // 3 bytes per pixel + return pixelCount * 3; + + case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel + case PICA::TextureFmt::RGB565: + case PICA::TextureFmt::RGBA4: + case PICA::TextureFmt::RG8: + case PICA::TextureFmt::IA8: + return pixelCount * 2; + + case PICA::TextureFmt::A8: // 1 byte per pixel + case PICA::TextureFmt::I8: + case PICA::TextureFmt::IA4: + return pixelCount; + + case PICA::TextureFmt::I4: // 4 bits per pixel + case PICA::TextureFmt::A4: + return pixelCount / 2; + + case PICA::TextureFmt::ETC1: // Compressed formats + case PICA::TextureFmt::ETC1A4: { + // Number of 4x4 tiles + const u64 tileCount = pixelCount / 16; + // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 + const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; + return tileCount * tileSize; + } + + default: + Helpers::panic("[PICA] Attempted to get size of invalid texture type"); + } +} + +// u and v are the UVs of the relevant texel +// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here +// https://en.wikipedia.org/wiki/Z-order_curve +// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel +// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 +// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg +u32 Texture::mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 }; + static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 }; + + return xOffsets[u & 7] + yOffsets[v & 7]; +} + +// Get the byte offset of texel (u, v) in the texture +u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset * bytesPerPixel; +} + +// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte +u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset / 2; +} + +// Get the texel at position (u, v) +// fmt: format of the texture +// data: texture data of the texture +u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RGBA4: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel)); + u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel)); + u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel)); + u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel)); + + return (alpha << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGBA5551: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = Colour::convert5To8Bit(getBits<1, 5, u8>(texel)); + u8 g = Colour::convert5To8Bit(getBits<6, 5, u8>(texel)); + u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel)); + + return (alpha << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGB565: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel)); + const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel)); + const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel)); + + return (0xff << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RG8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = data[offset]; + const u8 r = data[offset + 1]; + + return (0xff << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGB8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = data[offset]; + const u8 g = data[offset + 1]; + const u8 r = data[offset + 2]; + + return (0xff << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGBA8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = data[offset]; + const u8 b = data[offset + 1]; + const u8 g = data[offset + 2]; + const u8 r = data[offset + 3]; + + return (alpha << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::IA4: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = data[offset]; + const u8 alpha = Colour::convert4To8Bit(texel & 0xf); + const u8 intensity = Colour::convert4To8Bit(texel >> 4); + + // Intensity formats just copy the intensity value to every colour channel + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::A4: { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + + // A8 sets RGB to 0 + return (alpha << 24) | (0 << 16) | (0 << 8) | 0; + } + + case PICA::TextureFmt::A8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = data[offset]; + + // A8 sets RGB to 0 + return (alpha << 24) | (0 << 16) | (0 << 8) | 0; + } + + case PICA::TextureFmt::I4: { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); + intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity)); + + // Intensity formats just copy the intensity value to every colour channel + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::I8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = data[offset]; + + // Intensity formats just copy the intensity value to every colour channel + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::IA8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = data[offset]; + const u8 intensity = data[offset + 1]; + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); + case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +void Texture::decodeTexture(std::span data) { + std::vector decoded; + decoded.reserve(u64(size.u()) * u64(size.v())); + + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + u32 colour = decodeTexel(u, v, format, data); + decoded.push_back(colour); + } + } + + u32 bytesPerRow = sizeInBytes() / size.v(); + texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), bytesPerRow, 0); +} + +} // namespace Metal