diff --git a/.github/gles.patch b/.github/gles.patch index f1dc2c73..b16733b5 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -1,76 +1,5 @@ -diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp -index a11a6ffa..77486a09 100644 ---- a/src/core/renderer_gl/renderer_gl.cpp -+++ b/src/core/renderer_gl/renderer_gl.cpp -@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() { - } - - glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::updateLightingLUT() { -- gpu.lightingLUTDirty = false; -- std::array u16_lightinglut; -- -- for (int i = 0; i < gpu.lightingLUT.size(); i++) { -- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -- u16_lightinglut[i] = value * 65535 / 4095; -- } -- -- glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -- glActiveTexture(GL_TEXTURE0); -+ // gpu.lightingLUTDirty = false; -+ // std::array u16_lightinglut; -+ -+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) { -+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -+ // u16_lightinglut[i] = value * 65535 / 4095; -+ // } -+ -+ // glActiveTexture(GL_TEXTURE0 + 3); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -+ // glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::drawVertices(PICA::PrimType primType, std::span vertices) { -diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag -index 612671c8..1937f711 100644 ---- a/src/host_shaders/opengl_display.frag -+++ b/src/host_shaders/opengl_display.frag -@@ -1,4 +1,5 @@ --#version 410 core -+#version 300 es -+precision mediump float; - in vec2 UV; - out vec4 FragColor; - -diff --git a/src/host_shaders/opengl_display.vert b/src/host_shaders/opengl_display.vert -index 990e2f80..2e7842ac 100644 ---- a/src/host_shaders/opengl_display.vert -+++ b/src/host_shaders/opengl_display.vert -@@ -1,4 +1,5 @@ --#version 410 core -+#version 300 es -+precision mediump float; - out vec2 UV; - - void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index f6fa6c55..bb88e278 100644 +index 9f07df0b..96a35afa 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -78,36 +7,29 @@ index f6fa6c55..bb88e278 100644 +#version 300 es +precision mediump float; - in vec3 v_tangent; - in vec3 v_normal; -@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable; - uniform sampler2D u_tex0; - uniform sampler2D u_tex1; - uniform sampler2D u_tex2; --uniform sampler1DArray u_tex_lighting_lut; -+// uniform sampler1DArray u_tex_lighting_lut; + in vec4 v_quaternion; + in vec4 v_colour; +@@ -41,8 +42,8 @@ vec3 normal; + const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu); - uniform uint u_picaRegs[0x200 - 0x48]; + bool isSamplerEnabled(uint environment_id, uint lut_id) { +- uint index = 7 * environment_id + lut_id; +- uint arrayIndex = (index >> 5); ++ uint index = 7u * environment_id + lut_id; ++ uint arrayIndex = (index >> 5u); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; + } -@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) { - #define RR_LUT 6u +@@ -166,11 +167,17 @@ float lutLookup(uint lut, int index) { + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; + } - float lutLookup(uint lut, uint light, float value) { -- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -- if (lut == SP_LUT) lut = light + 8; -- return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -+ // if (lut == SP_LUT) lut = light + 8; -+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ return 0.0; -+} -+ -+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead ++// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead +uint bitfieldExtractCompat(uint val, int off, int size) { + uint mask = uint((1 << size) - 1); + return uint(val >> off) & mask; - } - ++} ++ vec3 regToColor(uint reg) { // Normalization scale to convert from [0...255] to [0.0...1.0] const float scale = 1.0 / 255.0; @@ -117,89 +39,115 @@ index f6fa6c55..bb88e278 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 view = normalize(v_view); +@@ -201,7 +208,7 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; +- } else if (lut_id <= 6) { ++ } else if (lut_id <= 6u) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { +@@ -210,16 +217,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + +- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { ++ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + +- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); ++ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; +- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); ++ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(normal, normalize(half_vector)); +@@ -243,9 +250,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually +- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); +- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); +- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); ++ int se_x = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_LOW), 0, 13)); ++ int se_y = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_LOW), 16, 13)); ++ int se_z = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_HIGH), 0, 13)); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; +@@ -272,9 +279,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + } + + // 0 = enabled +- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse +- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); +@@ -298,7 +305,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + // Implements the following algorthm: https://mathb.in/26766 + void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); - if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } -@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - bool error_unimpl = false; +@@ -315,7 +322,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); + +- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); ++ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bump_mode) { +@@ -328,15 +335,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + +- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); +- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; ++ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4); ++ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { -- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); -+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); +- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); ++ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); -@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); +@@ -348,12 +355,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); - } -@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + light_vector = light_position + v_view; } - for (int c = 0; c < 7; c++) { -- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; - -- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); -+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) -@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { -@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; -- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - -- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); -+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; -@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - float NdotL = dot(normal, light_vector); // Li dot N +@@ -369,23 +376,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -207,19 +155,40 @@ index f6fa6c55..bb88e278 100644 NdotL = max(0.0, NdotL); else NdotL = abs(NdotL); -@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + + float geometric_factor; +- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; +- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; ++ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; ++ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); + } + + float distance_attenuation = 1.0; +- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { +- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); +- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { ++ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); ++ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); +@@ -430,8 +437,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + - uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); - uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1); - - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert -index a25d7a6d..7cf40398 100644 +index 057f9a88..dc735ced 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -1,4 +1,6 @@ @@ -230,7 +199,7 @@ index a25d7a6d..7cf40398 100644 layout(location = 0) in vec4 a_coords; layout(location = 1) in vec4 a_quaternion; -@@ -20,7 +22,7 @@ out vec2 v_texcoord2; +@@ -18,7 +20,7 @@ out vec2 v_texcoord2; flat out vec4 v_textureEnvColor[6]; flat out vec4 v_textureEnvBufferColor; @@ -239,7 +208,7 @@ index a25d7a6d..7cf40398 100644 // TEV uniforms uniform uint u_textureEnvColor[6]; -@@ -93,6 +95,6 @@ void main() { +@@ -81,8 +83,8 @@ void main() { ); // There's also another, always-on clipping plane based on vertex z @@ -247,16 +216,20 @@ index a25d7a6d..7cf40398 100644 - gl_ClipDistance[1] = dot(clipData, a_coords); + // gl_ClipDistance[0] = -a_coords.z; + // gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp -index f368f573..5ead7f63 100644 +index 607815fa..cbfcc096 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp -@@ -520,21 +520,21 @@ namespace OpenGL { +@@ -602,22 +602,22 @@ namespace OpenGL { + static void disableScissor() { glDisable(GL_SCISSOR_TEST); } static void enableBlend() { glEnable(GL_BLEND); } static void disableBlend() { glDisable(GL_BLEND); } - static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } +- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } - static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); } ++ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ } + static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ } static void enableDepth() { glEnable(GL_DEPTH_TEST); } static void disableDepth() { glDisable(GL_DEPTH_TEST); } diff --git a/.github/mac-bundle-qt.sh b/.github/mac-bundle-qt.sh index e18be8a5..f8083936 100644 --- a/.github/mac-bundle-qt.sh +++ b/.github/mac-bundle-qt.sh @@ -5,16 +5,16 @@ PATH="$PATH:/usr/libexec" # Construct the app iconset. mkdir alber.iconset -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png iconutil --convert icns alber.iconset # Set up the .app directory diff --git a/.github/mac-bundle.sh b/.github/mac-bundle.sh index 314b30f7..3349d6d3 100755 --- a/.github/mac-bundle.sh +++ b/.github/mac-bundle.sh @@ -5,16 +5,16 @@ PATH="$PATH:/usr/libexec" # Construct the app iconset. mkdir alber.iconset -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png iconutil --convert icns alber.iconset # Set up the .app directory diff --git a/.github/workflows/Android_Build.yml b/.github/workflows/Android_Build.yml index 11811f8b..d9e75c47 100644 --- a/.github/workflows/Android_Build.yml +++ b/.github/workflows/Android_Build.yml @@ -8,7 +8,7 @@ on: jobs: x64: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: @@ -23,6 +23,9 @@ jobs: - name: Fetch submodules run: git submodule update --init --recursive + - name: Setup CCache + uses: hendrikmuhs/ccache-action@v1.2 + - name: Set up gradle caches uses: actions/cache@v4 with: @@ -47,7 +50,7 @@ jobs: java-version: '17' - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DBUILD_HYDRA_CORE=1 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64 -DENABLE_VULKAN=0 -DENABLE_USER_BUILD=ON + run: cmake -B ${{github.workspace}}/build -DBUILD_HYDRA_CORE=1 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64 -DENABLE_VULKAN=0 -DENABLE_USER_BUILD=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: | @@ -73,7 +76,7 @@ jobs: ./src/pandroid/app/build/outputs/apk/${{ env.BUILD_TYPE }}/app-${{ env.BUILD_TYPE }}.apk arm64: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: @@ -88,6 +91,9 @@ jobs: - name: Fetch submodules run: git submodule update --init --recursive + - name: Setup CCache + uses: hendrikmuhs/ccache-action@v1.2 + - name: Set up gradle caches uses: actions/cache@v4 with: @@ -112,7 +118,7 @@ jobs: java-version: '17' - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DBUILD_HYDRA_CORE=1 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DENABLE_VULKAN=0 -DENABLE_USER_BUILD=ON -DCMAKE_CXX_FLAGS="-march=armv8-a+crypto" + run: cmake -B ${{github.workspace}}/build -DBUILD_HYDRA_CORE=1 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DENABLE_VULKAN=0 -DENABLE_USER_BUILD=ON -DCMAKE_CXX_FLAGS="-march=armv8-a+crypto" -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: | diff --git a/.github/workflows/HTTP_Build.yml b/.github/workflows/HTTP_Build.yml index 7bfe9c7f..c4f7cfee 100644 --- a/.github/workflows/HTTP_Build.yml +++ b/.github/workflows/HTTP_Build.yml @@ -16,10 +16,10 @@ jobs: # well on Windows or Mac. You can convert this to a matrix build if you need # cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive diff --git a/.github/workflows/Hydra_Build.yml b/.github/workflows/Hydra_Build.yml index 645f2f7a..1b31edf7 100644 --- a/.github/workflows/Hydra_Build.yml +++ b/.github/workflows/Hydra_Build.yml @@ -15,7 +15,7 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -51,14 +51,14 @@ jobs: with: name: Windows Libretro core path: | - ${{github.workspace}}/build/panda3ds_libretro.dll + ${{github.workspace}}/build/${{ env.BUILD_TYPE }}/panda3ds_libretro.dll ${{github.workspace}}/docs/libretro/panda3ds_libretro.info MacOS: - runs-on: macos-13 + runs-on: macos-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -70,7 +70,7 @@ jobs: vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_HYDRA_CORE=ON + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_HYDRA_CORE=ON -DCMAKE_OSX_ARCHITECTURE=x86_64 - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} @@ -84,7 +84,7 @@ jobs: - name: Configure CMake (Again) run: | rm -rf ${{github.workspace}}/build - cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON + cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON -DCMAKE_OSX_ARCHITECTURE=x86_64 - name: Build (Again) run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} && ls -R ${{github.workspace}}/build @@ -98,16 +98,16 @@ jobs: ${{github.workspace}}/docs/libretro/panda3ds_libretro.info Linux: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages run: | - sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev + sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev - name: Install newer Clang run: | @@ -151,16 +151,16 @@ jobs: ${{github.workspace}}/docs/libretro/panda3ds_libretro.info Android-x64: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages run: | - sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev + sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev - name: Setup Vulkan SDK uses: humbletim/setup-vulkan-sdk@v1.2.0 diff --git a/.github/workflows/Linux_AppImage_Build.yml b/.github/workflows/Linux_AppImage_Build.yml index 507187a3..3c5af88a 100644 --- a/.github/workflows/Linux_AppImage_Build.yml +++ b/.github/workflows/Linux_AppImage_Build.yml @@ -16,15 +16,15 @@ jobs: # well on Windows or Mac. You can convert this to a matrix build if you need # cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages - run: sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 + run: sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev - name: Install newer Clang run: | @@ -33,11 +33,11 @@ jobs: sudo ./llvm.sh 17 - name: Setup Vulkan SDK - run: | - wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list - sudo apt update - sudo apt install vulkan-sdk + uses: humbletim/setup-vulkan-sdk@v1.2.0 + with: + vulkan-query-version: latest + vulkan-use-cache: true + vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. @@ -52,7 +52,7 @@ jobs: run: ./.github/linux-appimage.sh - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Linux executable path: './Alber-x86_64.AppImage' diff --git a/.github/workflows/Linux_Build.yml b/.github/workflows/Linux_Build.yml index 78e5cc5a..61f7eafa 100644 --- a/.github/workflows/Linux_Build.yml +++ b/.github/workflows/Linux_Build.yml @@ -16,15 +16,15 @@ jobs: # well on Windows or Mac. You can convert this to a matrix build if you need # cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages - run: sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev + run: sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libwayland-dev - name: Install newer Clang run: | @@ -49,7 +49,7 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Linux executable path: './build/Alber' diff --git a/.github/workflows/MacOS_Build.yml b/.github/workflows/MacOS_Build.yml index f6fafde9..ab8702fd 100644 --- a/.github/workflows/MacOS_Build.yml +++ b/.github/workflows/MacOS_Build.yml @@ -12,14 +12,15 @@ env: jobs: build: - # The CMake configure and build commands are platform agnostic and should work equally - # well on Windows or Mac. You can convert this to a matrix build if you need - # cross-platform coverage. - # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: macos-13 + strategy: + matrix: + arch: [x86_64, arm64] + + name: MacOS-${{ matrix.arch }} + runs-on: macos-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -33,7 +34,7 @@ jobs: - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} - name: Build # Build your program with the given configuration @@ -49,10 +50,45 @@ jobs: run: codesign --force -s - -vvvv Alber.app - name: Zip it up - run: zip -r Alber Alber.app + run: zip -r Alber-${{ matrix.arch }} Alber.app - name: Upload MacOS App - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: MacOS Alber App Bundle - path: 'Alber.zip' + name: MacOS Alber App Bundle (${{ matrix.arch }}) + path: Alber-${{ matrix.arch }}.zip + + MacOS-Universal: + name: MacOS-Universal + needs: [build] + runs-on: macos-latest + + steps: + - name: Download x86_64 + uses: actions/download-artifact@v4 + with: + name: MacOS Alber App Bundle (x86_64) + path: x86_64 + - name: Download ARM64 + uses: actions/download-artifact@v4 + with: + name: MacOS Alber App Bundle (arm64) + path: arm64 + - name: Combine app bundles + shell: bash + run: | + set -x + unzip x86_64/*.zip -d x86_64 + unzip arm64/*.zip -d arm64 + lipo {x86_64,arm64}/Alber.app/Contents/MacOS/Alber -create -output Alber + cp -v -a arm64/Alber.app Alber.app + cp -v Alber Alber.app/Contents/MacOS/Alber + # Mix in x86_64 files that do not appear in the ARM64 build (e.g. libvulkan) + cp -v -R -n x86_64/Alber.app/* Alber.app/ || true + codesign --force -s - -vvvv Alber.app + zip -r -y Alber-universal.zip Alber.app + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: MacOS Alber App Bundle (universal) + path: Alber-universal.zip diff --git a/.github/workflows/Qt_Build.yml b/.github/workflows/Qt_Build.yml index 5e622c54..fc4072da 100644 --- a/.github/workflows/Qt_Build.yml +++ b/.github/workflows/Qt_Build.yml @@ -15,7 +15,7 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -45,16 +45,21 @@ jobs: windeployqt --dir upload upload/Alber.exe - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Windows executable path: upload MacOS: - runs-on: macos-13 + strategy: + matrix: + arch: [x86_64, arm64] + + name: MacOS-${{ matrix.arch }} + runs-on: macos-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -69,11 +74,17 @@ jobs: run: | brew install dylibbundler imagemagick - - name: Install qt - run: brew install qt && which macdeployqt + - name: Install Qt + uses: jurplel/install-qt-action@v3 + with: + aqtversion: '==3.1.*' + version: '6.8.1' + host: 'mac' + target: 'desktop' + arch: 'clang_64' - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DENABLE_QT_GUI=ON + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DENABLE_QT_GUI=ON -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} @@ -87,28 +98,62 @@ jobs: run: codesign --force -s - -vvvv Alber.app - name: Zip it up - run: zip -r Alber Alber.app + run: zip -r Alber-${{ matrix.arch }} Alber.app - name: Upload MacOS App - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: MacOS Alber App Bundle - path: 'Alber.zip' + name: MacOS Alber App Bundle (${{ matrix.arch }}) + path: Alber-${{ matrix.arch }}.zip - Linux: - runs-on: ubuntu-20.04 + MacOS-Universal: + name: MacOS-Universal + needs: [MacOS] + runs-on: macos-latest steps: - - uses: actions/checkout@v2 + - name: Download x86_64 + uses: actions/download-artifact@v4 + with: + name: MacOS Alber App Bundle (x86_64) + path: x86_64 + - name: Download ARM64 + uses: actions/download-artifact@v4 + with: + name: MacOS Alber App Bundle (arm64) + path: arm64 + - name: Combine app bundles + shell: bash + run: | + set -x + unzip x86_64/*.zip -d x86_64 + unzip arm64/*.zip -d arm64 + lipo {x86_64,arm64}/Alber.app/Contents/MacOS/Alber -create -output Alber + cp -v -a arm64/Alber.app Alber.app + cp -v Alber Alber.app/Contents/MacOS/Alber + # Mix in x86_64 files that do not appear in the ARM64 build (e.g. libvulkan) + cp -v -R -n x86_64/Alber.app/* Alber.app/ || true + codesign --force -s - -vvvv Alber.app + zip -r -y Alber-universal.zip Alber.app + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: MacOS Alber App Bundle (universal) + path: Alber-universal.zip + + Linux: + runs-on: ubuntu-24.04 + + steps: + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages run: | - sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev - sudo add-apt-repository -y ppa:savoury1/qt-6-2 + sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev libgl1-mesa-dev sudo apt update - sudo apt install qt6-base-dev qt6-base-private-dev + sudo apt install qt6-base-dev qt6-base-private-dev qt6-tools-dev - name: Install newer Clang run: | @@ -117,11 +162,11 @@ jobs: sudo ./llvm.sh 17 - name: Setup Vulkan SDK - run: | - wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list - sudo apt update - sudo apt install vulkan-sdk + uses: humbletim/setup-vulkan-sdk@v1.2.0 + with: + vulkan-query-version: latest + vulkan-use-cache: true + vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DENABLE_QT_GUI=ON @@ -135,7 +180,7 @@ jobs: ./.github/linux-appimage-qt.sh - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Linux executable path: './Alber-x86_64.AppImage' diff --git a/.github/workflows/Windows_Build.yml b/.github/workflows/Windows_Build.yml index a06889eb..5497c3ef 100644 --- a/.github/workflows/Windows_Build.yml +++ b/.github/workflows/Windows_Build.yml @@ -19,7 +19,7 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -40,7 +40,7 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Windows executable path: './build/${{ env.BUILD_TYPE }}/Alber.exe' diff --git a/.gitignore b/.gitignore index 528462ad..817786a3 100644 --- a/.gitignore +++ b/.gitignore @@ -64,5 +64,9 @@ fb.bat *.elf *.smdh +# Compiled Metal shader files +*.ir +*.metallib + config.toml CMakeSettings.json diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..94ea8193 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,131 @@ +# DESCRIPTION: GitLab CI/CD for libRetro (NOT FOR GitLab-proper) + +############################################################################## +################################# BOILERPLATE ################################ +############################################################################## + +# Core definitions +.core-defs: + variables: + GIT_SUBMODULE_STRATEGY: recursive + CORENAME: panda3ds + BASE_CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DENABLE_USER_BUILD=ON -DENABLE_VULKAN=OFF -DENABLE_LUAJIT=OFF -DENABLE_DISCORD_RPC=OFF -DENABLE_METAL=OFF + CORE_ARGS: ${BASE_CORE_ARGS} + +# Inclusion templates, required for the build to work + +include: + ################################## DESKTOPS ################################ + # Linux + - project: 'libretro-infrastructure/ci-templates' + file: '/linux-cmake.yml' + + # Windows + - project: 'libretro-infrastructure/ci-templates' + file: '/windows-cmake-mingw.yml' + + # MacOS + - project: 'libretro-infrastructure/ci-templates' + file: 'osx-cmake-x86.yml' + + # MacOS + - project: 'libretro-infrastructure/ci-templates' + file: 'osx-cmake-arm64.yml' + + ################################## CELLULAR ################################ + # Android + - project: 'libretro-infrastructure/ci-templates' + file: '/android-cmake.yml' + + # iOS + - project: 'libretro-infrastructure/ci-templates' + file: '/ios-cmake.yml' + +# Stages for building +stages: + - build-prepare + - build-static + - build-shared + +############################################################################## +#################################### STAGES ################################## +############################################################################## +# +################################### DESKTOPS ################################# +# Linux 64-bit +libretro-build-linux-x64: + image: $CI_SERVER_HOST:5050/libretro-infrastructure/libretro-build-amd64-ubuntu:latest + before_script: + - export NUMPROC=$(($(nproc)/5)) + - sudo apt-get update -qy + - sudo apt-get install -qy software-properties-common + - sudo add-apt-repository -y ppa:savoury1/build-tools + - sudo add-apt-repository -y ppa:savoury1/gcc-defaults-12 + - sudo apt-get update -qy + - sudo apt-get install -qy cmake gcc-12 g++-12 + variables: + CC: /usr/bin/gcc-12 + CXX: /usr/bin/g++-12 + extends: + - .libretro-linux-cmake-x86_64 + - .core-defs + +# Windows 64-bit +libretro-build-windows-x64: + extends: + - .libretro-windows-cmake-x86_64 + - .core-defs + +# MacOS 64-bit +libretro-build-osx-x64: + tags: + - mac-apple-silicon + variables: + CORE_ARGS: ${BASE_CORE_ARGS} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCRYPTOPP_AMD64=1 + extends: + - .libretro-osx-cmake-x86 + - .core-defs + +# MacOS arm 64-bit +libretro-build-osx-arm64: + tags: + - mac-apple-silicon + extends: + - .libretro-osx-cmake-arm64 + - .core-defs + +################################### CELLULAR ################################# +# Android ARMv7a +#android-armeabi-v7a: +# extends: +# - .libretro-android-cmake-armeabi-v7a +# - .core-defs + +# Android ARMv8a +# android-arm64-v8a: +# extends: +# - .libretro-android-cmake-arm64-v8a +# - .core-defs + +# Android 64-bit x86 +# android-x86_64: +# extends: +# - .libretro-android-cmake-x86_64 +# - .core-defs + +# Android 32-bit x86 +# android-x86: +# extends: +# - .libretro-android-cmake-x86 +# - .core-defs + +# iOS +# libretro-build-ios-arm64: +# extends: +# - .libretro-ios-cmake-arm64 +# - .core-defs +# variables: +# CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DBUILD_PLAY=OFF -DENABLE_AMAZON_S3=off -DBUILD_TESTS=OFF -DCMAKE_TOOLCHAIN_FILE=deps/Dependencies/cmake-ios/ios.cmake -DTARGET_IOS=ON +# LIBNAME: ${CORENAME}_libretro_ios.dylib + +################################### CONSOLES ################################# diff --git a/.gitmodules b/.gitmodules index 9c2fb876..0be4d748 100644 --- a/.gitmodules +++ b/.gitmodules @@ -40,9 +40,6 @@ [submodule "third_party/zep"] path = third_party/zep url = https://github.com/Panda3DS-emu/zep -[submodule "third_party/oaknut"] - path = third_party/oaknut - url = https://github.com/merryhime/oaknut [submodule "third_party/luv"] path = third_party/luv url = https://github.com/luvit/luv @@ -75,4 +72,19 @@ url = https://github.com/machinezone/IXWebSocket [submodule "third_party/hips"] path = third_party/hips - url = https://github.com/wheremyfoodat/Hips \ No newline at end of file + url = https://github.com/wheremyfoodat/Hips +[submodule "third_party/metal-cpp"] + path = third_party/metal-cpp + url = https://github.com/Panda3DS-emu/metal-cpp +[submodule "third_party/fmt"] + path = third_party/fmt + url = https://github.com/fmtlib/fmt +[submodule "third_party/fdk-aac"] + path = third_party/fdk-aac + url = https://github.com/Panda3DS-emu/fdk-aac/ +[submodule "third_party/cryptoppwin"] + path = third_party/cryptoppwin + url = https://github.com/shadps4-emu/ext-cryptoppwin +[submodule "third_party/oaknut"] + path = third_party/oaknut + url = https://github.com/panda3ds-emu/oaknut diff --git a/CMakeLists.txt b/CMakeLists.txt index ef42fdfa..f29a02a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,19 +19,38 @@ endif() project(Alber) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") if(APPLE) enable_language(OBJC) endif() +# Enable RC support in order to use resource files for application icons +if(WIN32) + enable_language(RC) + set(APP_RESOURCES docs/img/windows_icon.rc) +endif() + if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security") -endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security -Wno-invalid-offsetof") +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size") +endif() + +if(ANDROID) + set(DEFAULT_OPENGL_PROFILE OpenGLES) +else() + set(DEFAULT_OPENGL_PROFILE OpenGL) +endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON) option(ENABLE_VULKAN "Enable Vulkan rendering backend" ON) +option(ENABLE_METAL "Enable Metal rendering backend (if available)" ON) +option(ENABLE_WAYLAND "Enable Wayland support on Linux platforms" ON) option(ENABLE_LTO "Enable link-time optimization" OFF) option(ENABLE_TESTS "Compile unit-tests" OFF) option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF) @@ -39,8 +58,20 @@ option(ENABLE_HTTP_SERVER "Enable HTTP server. Used for Discord bot support" OFF option(ENABLE_DISCORD_RPC "Compile with Discord RPC support (disabled by default)" ON) option(ENABLE_LUAJIT "Enable scripting with the Lua programming language" ON) option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF) +option(USE_SYSTEM_SDL2 "Use the system's SDL2 package" OFF) +option(ENABLE_GIT_VERSIONING "Enables querying git for the emulator version" ON) option(BUILD_HYDRA_CORE "Build a Hydra core" OFF) option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF) +option(ENABLE_RENDERDOC_API "Build with support for Renderdoc's capture API for graphics debugging" ON) +option(DISABLE_SSE4 "Build with SSE4 instructions disabled, may reduce performance" OFF) + +set(OPENGL_PROFILE ${DEFAULT_OPENGL_PROFILE} CACHE STRING "OpenGL profile to use if OpenGL is enabled. Valid values are 'OpenGL' and 'OpenGLES'.") +set_property(CACHE OPENGL_PROFILE PROPERTY STRINGS OpenGL OpenGLES) + +if(ENABLE_OPENGL AND (OPENGL_PROFILE STREQUAL "OpenGLES")) + message(STATUS "Building with OpenGLES support") + add_compile_definitions(USING_GLES) +endif() if(BUILD_HYDRA_CORE) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -51,6 +82,49 @@ if(BUILD_LIBRETRO_CORE) add_compile_definitions(__LIBRETRO__) endif() +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD) + # Disable stack buffer overflow checks in user builds + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-") +endif() + +# Generate versioning files +find_package(Git) +set(PANDA3DS_VERSION "0.9") + +if(NOT EXISTS ${CMAKE_BINARY_DIR}/include/version.hpp.in) + file(WRITE ${CMAKE_BINARY_DIR}/include/version.hpp.in "#define PANDA3DS_VERSION \"\${PANDA3DS_VERSION}\"") +endif() + +if(GIT_FOUND AND ENABLE_GIT_VERSIONING) + execute_process( + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 + OUTPUT_VARIABLE git_version_tag OUTPUT_STRIP_TRAILING_WHITESPACE + ) + execute_process( + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} rev-parse --short=7 HEAD + OUTPUT_VARIABLE git_version_rev OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(NOT git_version_tag STREQUAL "") + set(PANDA3DS_VERSION "${git_version_tag}") + execute_process( + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --tags + OUTPUT_VARIABLE git_version_desc OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(git_version_tag STREQUAL git_version_desc) + set(git_version_rev "") + endif() + unset(git_version_desc) + endif() + if(NOT git_version_rev STREQUAL "") + set(PANDA3DS_VERSION "${PANDA3DS_VERSION}.${git_version_rev}") + endif() + string(REGEX REPLACE "^v" "" PANDA3DS_VERSION "${PANDA3DS_VERSION}") + unset(git_version_tag) + unset(git_version_rev) +endif() +configure_file(${CMAKE_BINARY_DIR}/include/version.hpp.in ${CMAKE_BINARY_DIR}/include/version.hpp) +include_directories(${CMAKE_BINARY_DIR}/include/) + add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) @@ -61,7 +135,6 @@ include_directories(third_party/elfio/) include_directories(third_party/hips/include/) include_directories(third_party/imgui/) include_directories(third_party/dynarmic/src) -include_directories(third_party/cryptopp/) include_directories(third_party/cityhash/include) include_directories(third_party/result/include) include_directories(third_party/xxhash/include) @@ -75,24 +148,35 @@ add_compile_definitions(NOMINMAX) # Make windows.h not define min/ma add_compile_definitions(WIN32_LEAN_AND_MEAN) # Make windows.h not include literally everything add_compile_definitions(SDL_MAIN_HANDLED) +if(ENABLE_WAYLAND) + add_compile_definitions(WAYLAND_ENABLED) +endif() + if(ENABLE_DISCORD_RPC AND NOT ANDROID) add_subdirectory(third_party/discord-rpc) include_directories(third_party/discord-rpc/include) endif() -set(SDL_STATIC ON CACHE BOOL "" FORCE) -set(SDL_SHARED OFF CACHE BOOL "" FORCE) -set(SDL_TEST OFF CACHE BOOL "" FORCE) if (NOT ANDROID) - add_subdirectory(third_party/SDL2) - target_link_libraries(AlberCore PUBLIC SDL2-static) + if (USE_SYSTEM_SDL2) + find_package(SDL2 CONFIG REQUIRED) + target_link_libraries(AlberCore PUBLIC SDL2::SDL2) + else() + set(SDL_STATIC ON CACHE BOOL "" FORCE) + set(SDL_SHARED OFF CACHE BOOL "" FORCE) + set(SDL_TEST OFF CACHE BOOL "" FORCE) + add_subdirectory(third_party/SDL2) + target_link_libraries(AlberCore PUBLIC SDL2-static) + endif() endif() +add_subdirectory(third_party/fmt) add_subdirectory(third_party/toml11) -include_directories(${SDL2_INCLUDE_DIR}) include_directories(third_party/toml11) include_directories(third_party/glm) +include_directories(third_party/renderdoc) +include_directories(third_party/duckstation) add_subdirectory(third_party/cmrc) @@ -110,10 +194,26 @@ if(ANDROID) target_link_libraries(AlberCore PRIVATE EGL log) endif() -set(CRYPTOPP_BUILD_TESTING OFF) -add_subdirectory(third_party/cryptopp) add_subdirectory(third_party/glad) +# Cryptopp doesn't support compiling under clang-cl, so we have to include it as a prebuilt MSVC static library +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND MSVC) + add_subdirectory(third_party/cryptoppwin) + include_directories(third_party/cryptoppwin/include) + target_link_libraries(AlberCore PRIVATE cryptoppwin) + + # Also silence some of clang-cl's more... intrusive warnings + set(WARNING_FLAGS "/W1 -Wno-unused-function -Wno-unused-but-set-variable -Wno-reorder-ctor") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WARNING_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WARNING_FLAGS}") +else() + set(CRYPTOPP_BUILD_TESTING OFF) + + add_subdirectory(third_party/cryptopp) + include_directories(third_party/cryptopp) + target_link_libraries(AlberCore PRIVATE cryptopp) +endif() + if(ENABLE_LUAJIT) add_subdirectory(third_party/LuaJIT luajit) include_directories(third_party/LuaJIT/src ${CMAKE_BINARY_DIR}/luajit) @@ -129,26 +229,68 @@ if(ENABLE_LUAJIT) target_link_libraries(AlberCore PRIVATE libluajit) endif() -# Check for x64 -if (CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86-64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") +# Detect target architecture +if (NOT APPLE OR "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "") + # Normal target detection + if (CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86-64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") set(HOST_X64 TRUE) + else() + set(HOST_X64 FALSE) + endif() + + if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") + set(HOST_ARM64 TRUE) + else() + set(HOST_ARM64 FALSE) + endif() +else() + # Apple target detection + if("x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) + set(HOST_X64 TRUE) + else() + set(HOST_X64 FALSE) + endif() + + if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) + set(HOST_ARM64 TRUE) + else() + set(HOST_ARM64 FALSE) + endif() + + if (HOST_ARM64 AND HOST_X64) + message(FATAL_ERROR "Universal builds not supported like this! Please compile separately and stitch together") + endif() +endif() + +if (HOST_X64) add_subdirectory(third_party/xbyak) # Add xbyak submodule for x86 JITs include_directories(third_party/xbyak) add_compile_definitions(PANDA3DS_DYNAPICA_SUPPORTED) add_compile_definitions(PANDA3DS_X64_HOST) -else() - set(HOST_X64 FALSE) endif() -# Check for arm64 -if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") - set(HOST_ARM64 TRUE) +if (HOST_ARM64) add_subdirectory(third_party/oaknut) # Add Oaknut submodule for arm64 JITs include_directories(third_party/oaknut/include) add_compile_definitions(PANDA3DS_DYNAPICA_SUPPORTED) add_compile_definitions(PANDA3DS_ARM64_HOST) -else() - set(HOST_ARM64 FALSE) +endif() + +# Enable SSE4.1 if it's not explicitly disabled +# Annoyingly, we can't easily do this if we're using MSVC cause there's no SSE4.1 flag, only SSE4.1 +if(NOT MSVC OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT DISABLE_SSE4 AND HOST_X64) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1") +elseif(MSVC AND NOT DISABLE_SSE4) + # Tell our SIMD code to use SSE4.1 by defining the relevant macros. + # Clang defines these macros, MSVC does not. + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE3__ /D__SSE4_1__") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE3__ /D__SSE4_1__") +endif() + +if(ENABLE_RENDERDOC_API) + find_package(RenderDoc 1.6.0 MODULE REQUIRED) + add_compile_definitions(PANDA3DS_ENABLE_RENDERDOC) endif() if(HOST_X64 OR HOST_ARM64) @@ -162,6 +304,7 @@ else() endif() add_subdirectory(third_party/teakra EXCLUDE_FROM_ALL) +add_subdirectory(third_party/fdk-aac) set(CAPSTONE_ARCHITECTURE_DEFAULT OFF) set(CAPSTONE_ARM_SUPPORT ON) @@ -173,7 +316,8 @@ set(SOURCE_FILES src/emulator.cpp src/io_file.cpp src/config.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp src/core/memory.cpp src/renderer.cpp src/core/renderer_null/renderer_null.cpp src/http_server.cpp src/stb_image_write.c src/core/cheats.cpp src/core/action_replay.cpp - src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/miniaudio.cpp + src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/miniaudio.cpp src/renderdoc.cpp + src/frontend_settings.cpp ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp @@ -193,25 +337,29 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services src/core/services/act.cpp src/core/services/nfc.cpp src/core/services/dlp_srvr.cpp src/core/services/ir_user.cpp src/core/services/http.cpp src/core/services/soc.cpp src/core/services/ssl.cpp src/core/services/news_u.cpp src/core/services/amiibo_device.cpp - src/core/services/csnd.cpp src/core/services/nwm_uds.cpp + src/core/services/csnd.cpp src/core/services/nwm_uds.cpp src/core/services/fonts.cpp + src/core/services/ns.cpp ) set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp - src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp + src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp + src/core/PICA/shader_decompiler.cpp src/core/PICA/draw_acceleration.cpp ) set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp) set(FS_SOURCE_FILES src/core/fs/archive_self_ncch.cpp src/core/fs/archive_save_data.cpp src/core/fs/archive_sdmc.cpp src/core/fs/archive_ext_save_data.cpp src/core/fs/archive_ncch.cpp src/core/fs/romfs.cpp src/core/fs/ivfc.cpp src/core/fs/archive_user_save_data.cpp src/core/fs/archive_system_save_data.cpp + src/core/fs/archive_twl_photo.cpp src/core/fs/archive_twl_sound.cpp src/core/fs/archive_card_spi.cpp ) set(APPLET_SOURCE_FILES src/core/applets/applet.cpp src/core/applets/mii_selector.cpp src/core/applets/software_keyboard.cpp src/core/applets/applet_manager.cpp src/core/applets/error_applet.cpp ) set(AUDIO_SOURCE_FILES src/core/audio/dsp_core.cpp src/core/audio/null_core.cpp src/core/audio/teakra_core.cpp - src/core/audio/miniaudio_device.cpp src/core/audio/hle_core.cpp + src/core/audio/miniaudio_device.cpp src/core/audio/hle_core.cpp src/core/audio/aac_decoder.cpp + src/core/audio/audio_interpolation.cpp ) set(RENDERER_SW_SOURCE_FILES src/core/renderer_sw/renderer_sw.cpp) @@ -230,7 +378,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/services/mic.hpp include/services/cecd.hpp include/services/ac.hpp include/services/am.hpp include/services/boss.hpp include/services/frd.hpp include/services/nim.hpp include/fs/archive_ext_save_data.hpp include/fs/archive_ncch.hpp include/services/mcu/mcu_hwc.hpp - include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp + include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp include/services/ldr_ro.hpp include/ipc.hpp include/services/act.hpp include/services/nfc.hpp include/system_models.hpp include/services/dlp_srvr.hpp include/PICA/dynapica/pica_recs.hpp include/PICA/dynapica/x64_regs.hpp include/PICA/dynapica/vertex_loader_rec.hpp include/PICA/dynapica/shader_rec.hpp @@ -241,21 +389,28 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp include/fs/romfs.hpp include/fs/ivfc.hpp include/discord_rpc.hpp include/services/http.hpp include/result/result_cfg.hpp - include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp + include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp - include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp + include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp - include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/external_haptics_manager.hpp + include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp + include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp + include/PICA/pica_vert_config.hpp include/sdl_sensors.hpp include/PICA/draw_acceleration.hpp include/renderdoc.hpp + include/align.hpp include/audio/aac_decoder.hpp include/PICA/pica_simd.hpp include/services/fonts.hpp + include/audio/audio_interpolation.hpp include/audio/hle_mixer.hpp include/audio/dsp_simd.hpp + include/services/dsp_firmware_db.hpp include/frontend_settings.hpp include/fs/archive_twl_photo.hpp + include/fs/archive_twl_sound.hpp include/fs/archive_card_spi.hpp include/services/ns.hpp + include/external_haptics_manager.hpp ) cmrc_add_resource_library( resources_console_fonts NAMESPACE ConsoleFonts WHENCE "src/core/services/fonts/" - "src/core/services/fonts/CitraSharedFontUSRelocated.bin" + "src/core/services/fonts/SharedFontReplacement.bin" ) set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp @@ -285,17 +440,22 @@ if(ENABLE_LUAJIT AND NOT ANDROID) target_link_libraries(AlberCore PRIVATE buttplugCpp) endif() +set(GL_CONTEXT_SOURCE_FILES "") + if(ENABLE_QT_GUI) - include_directories(third_party/duckstation) - set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/window_info.cpp third_party/duckstation/gl/context.cpp) + set(GL_CONTEXT_SOURCE_FILES ${GL_CONTEXT_SOURCE_FILES} third_party/duckstation/window_info.cpp third_party/duckstation/gl/context.cpp) if(APPLE) - set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/context_agl.mm) + set(GL_CONTEXT_SOURCE_FILES ${GL_CONTEXT_SOURCE_FILES} third_party/duckstation/gl/context_agl.mm) elseif(WIN32) - set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/context_wgl.cpp) + set(GL_CONTEXT_SOURCE_FILES ${GL_CONTEXT_SOURCE_FILES} third_party/duckstation/gl/context_wgl.cpp) else() - set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/context_egl.cpp third_party/duckstation/gl/context_egl_wayland.cpp - third_party/duckstation/gl/context_egl_x11.cpp third_party/duckstation/gl/context_glx.cpp third_party/duckstation/gl/x11_window.cpp) + set(GL_CONTEXT_SOURCE_FILES ${GL_CONTEXT_SOURCE_FILES} third_party/duckstation/gl/context_egl.cpp third_party/duckstation/gl/context_egl_x11.cpp + third_party/duckstation/gl/context_glx.cpp third_party/duckstation/gl/x11_window.cpp) + + if(ENABLE_WAYLAND) + set(GL_CONTEXT_SOURCE_FILES ${GL_CONTEXT_SOURCE_FILES} third_party/duckstation/gl/context_egl_wayland.cpp) + endif() endif() endif() @@ -309,7 +469,7 @@ source_group("Source Files\\Core\\Applets" FILES ${APPLET_SOURCE_FILES}) source_group("Source Files\\Core\\PICA" FILES ${PICA_SOURCE_FILES}) source_group("Source Files\\Core\\Audio" FILES ${AUDIO_SOURCE_FILES}) source_group("Source Files\\Core\\Software Renderer" FILES ${RENDERER_SW_SOURCE_FILES}) -source_group("Source Files\\Third Party" FILES ${THIRD_PARTY_SOURCE_FILES}) +source_group("Source Files\\Third Party" FILES ${THIRD_PARTY_SOURCE_FILES} ${GL_CONTEXT_SOURCE_FILES}) set(RENDERER_GL_SOURCE_FILES "") # Empty by default unless we are compiling with the GL renderer set(RENDERER_VK_SOURCE_FILES "") # Empty by default unless we are compiling with the VK renderer @@ -319,16 +479,19 @@ if(ENABLE_OPENGL) set(RENDERER_GL_INCLUDE_FILES third_party/opengl/opengl.hpp include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp - include/renderer_gl/gl_state.hpp + include/renderer_gl/gl_state.hpp include/renderer_gl/gl_driver.hpp ) set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp src/core/renderer_gl/textures.cpp src/core/renderer_gl/etc1.cpp - src/core/renderer_gl/gl_state.cpp src/host_shaders/opengl_display.frag - src/host_shaders/opengl_display.vert src/host_shaders/opengl_vertex_shader.vert + src/core/renderer_gl/gl_state.cpp src/host_shaders/opengl_display.vert + src/host_shaders/opengl_display.frag src/host_shaders/opengl_es_display.vert + src/host_shaders/opengl_es_display.frag src/host_shaders/opengl_vertex_shader.vert src/host_shaders/opengl_fragment_shader.frag ) + set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/stream_buffer.cpp) + set(HEADER_FILES ${HEADER_FILES} ${RENDERER_GL_INCLUDE_FILES}) source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES}) @@ -336,8 +499,10 @@ if(ENABLE_OPENGL) resources_renderer_gl NAMESPACE RendererGL WHENCE "src/host_shaders/" - "src/host_shaders/opengl_display.frag" "src/host_shaders/opengl_display.vert" + "src/host_shaders/opengl_display.frag" + "src/host_shaders/opengl_es_display.vert" + "src/host_shaders/opengl_es_display.frag" "src/host_shaders/opengl_vertex_shader.vert" "src/host_shaders/opengl_fragment_shader.frag" ) @@ -411,14 +576,88 @@ if(ENABLE_VULKAN) target_link_libraries(AlberCore PRIVATE Vulkan::Vulkan resources_renderer_vk) endif() +if(ENABLE_METAL AND APPLE) + set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp + include/renderer_mtl/mtl_depth_stencil_cache.hpp + include/renderer_mtl/mtl_blit_pipeline_cache.hpp + include/renderer_mtl/mtl_draw_pipeline_cache.hpp + include/renderer_mtl/mtl_render_target.hpp + include/renderer_mtl/mtl_texture.hpp + include/renderer_mtl/mtl_vertex_buffer_cache.hpp + include/renderer_mtl/mtl_lut_texture.hpp + include/renderer_mtl/mtl_command_encoder.hpp + include/renderer_mtl/mtl_common.hpp + include/renderer_mtl/pica_to_mtl.hpp + include/renderer_mtl/objc_helper.hpp + ) + + set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp + src/core/renderer_mtl/renderer_mtl.cpp + src/core/renderer_mtl/mtl_texture.cpp + src/core/renderer_mtl/mtl_etc1.cpp + src/core/renderer_mtl/mtl_lut_texture.cpp + src/core/renderer_mtl/objc_helper.mm + src/host_shaders/metal_shaders.metal + src/host_shaders/metal_blit.metal + #src/host_shaders/metal_copy_to_lut_texture.metal + ) + + set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES}) + source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES}) + + set(RENDERER_MTL_HOST_SHADERS_SOURCES) + function (add_metal_shader SHADER) + set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal") + set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir") + set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib") + # TODO: only include sources in debug builds + add_custom_command( + OUTPUT ${SHADER_IR} + COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE} + DEPENDS ${SHADER_SOURCE} + VERBATIM) + add_custom_command( + OUTPUT ${SHADER_METALLIB} + COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR} + DEPENDS ${SHADER_IR} + VERBATIM) + set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB}) + endfunction() + + add_metal_shader(metal_shaders) + add_metal_shader(metal_blit) + #add_metal_shader(metal_copy_to_lut_texture) + + add_custom_target( + compile_msl_shaders + DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES} + ) + + cmrc_add_resource_library( + resources_renderer_mtl + NAMESPACE RendererMTL + WHENCE "src/host_shaders/" + "src/host_shaders/metal_shaders.metallib" + "src/host_shaders/metal_blit.metallib" + #"src/host_shaders/metal_copy_to_lut_texture.metallib" + ) + add_dependencies(resources_renderer_mtl compile_msl_shaders) + + target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES}) + target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1") + target_include_directories(AlberCore PRIVATE third_party/metal-cpp) + # TODO: check if all of them are needed + target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl) +endif() + source_group("Header Files\\Core" FILES ${HEADER_FILES}) -set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} +set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} ${APPLET_SOURCE_FILES} ${RENDERER_SW_SOURCE_FILES} ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${AUDIO_SOURCE_FILES} ${HEADER_FILES} ${FRONTEND_HEADER_FILES}) target_sources(AlberCore PRIVATE ${ALL_SOURCES}) -target_link_libraries(AlberCore PRIVATE dynarmic cryptopp glad resources_console_fonts teakra) -target_link_libraries(AlberCore PUBLIC glad capstone) +target_link_libraries(AlberCore PRIVATE dynarmic glad resources_console_fonts teakra fdk-aac) +target_link_libraries(AlberCore PUBLIC glad capstone fmt::fmt) if(ENABLE_DISCORD_RPC AND NOT ANDROID) target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_DISCORD_RPC=1") @@ -453,14 +692,17 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) add_executable(Alber) if(ENABLE_QT_GUI) - find_package(Qt6 REQUIRED COMPONENTS Widgets) + find_package(Qt6 REQUIRED COMPONENTS Widgets LinguistTools) if(NOT ENABLE_OPENGL) message(FATAL_ERROR "Qt frontend requires OpenGL") endif() + option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF) + set(QT_LANGUAGES docs/translations) + set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp - src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp src/panda_qt/shader_editor.cpp + src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp src/panda_qt/shader_editor.cpp src/panda_qt/translations.cpp ) set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp @@ -494,18 +736,49 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) endif() endif() + # Generates an en.ts file for translations + # To update the file, use cmake --build --target Alber_lupdate + if(GENERATE_QT_TRANSLATION) + find_package(Qt6 REQUIRED COMPONENTS LinguistTools) + qt_add_lupdate(Alber TS_FILES ${QT_LANGUAGES}/en.ts + SOURCES ${FRONTEND_SOURCE_FILES} + INCLUDE_DIRECTORIES ${FRONTEND_HEADER_FILES} + NO_GLOBAL_TARGET + ) + endif() + qt_add_resources(AlberCore "app_images" PREFIX "/" FILES - docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png + docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png docs/img/rsyn_icon.png + docs/img/settings_icon.png docs/img/display_icon.png docs/img/speaker_icon.png + docs/img/sparkling_icon.png docs/img/battery_icon.png docs/img/sdcard_icon.png + docs/img/rnap_icon.png docs/img/rcow_icon.png docs/img/skyemu_icon.png ) + + # Translation files in Qt's .ts format. Will be converted into binary files and embedded into the executable + set(TRANSLATIONS_TS docs/translations/en.ts docs/translations/el.ts docs/translations/es.ts docs/translations/pt_br.ts docs/translations/nl.ts) + set_source_files_properties(${TRANSLATIONS_TS} PROPERTIES OUTPUT_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/translations") + qt_add_translation(TRANSLATIONS_QM ${TRANSLATIONS_TS}) + + set(TRANSLATIONS_QRC ${CMAKE_CURRENT_BINARY_DIR}/translations/translations.qrc) + file(WRITE ${TRANSLATIONS_QRC} "\n") + foreach (QM ${TRANSLATIONS_QM}) + message("${QM}") + get_filename_component(QM_FILE ${QM} NAME) + file(APPEND ${TRANSLATIONS_QRC} "${QM_FILE}\n") + endforeach (QM) + file(APPEND ${TRANSLATIONS_QRC} "") + + qt_add_resources(TRANSLATIONS ${TRANSLATIONS_QRC}) + set(APP_RESOURCES ${APP_RESOURCES} ${TRANSLATIONS}) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) - set(FRONTEND_HEADER_FILES "") + set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp") endif() target_link_libraries(Alber PRIVATE AlberCore) - target_sources(Alber PRIVATE ${FRONTEND_SOURCE_FILES} ${FRONTEND_HEADER_FILES}) + target_sources(Alber PRIVATE ${FRONTEND_SOURCE_FILES} ${FRONTEND_HEADER_FILES} ${GL_CONTEXT_SOURCE_FILES} ${APP_RESOURCES}) elseif(BUILD_HYDRA_CORE) target_compile_definitions(AlberCore PRIVATE PANDA3DS_HYDRA_CORE=1) include_directories(third_party/hydra_core/include) @@ -513,17 +786,17 @@ elseif(BUILD_HYDRA_CORE) target_link_libraries(Alber PUBLIC AlberCore) elseif(BUILD_LIBRETRO_CORE) include_directories(third_party/libretro/include) - add_library(Alber SHARED src/libretro_core.cpp) - target_link_libraries(Alber PUBLIC AlberCore) - - set_target_properties(Alber PROPERTIES - OUTPUT_NAME "panda3ds_libretro" - PREFIX "" - ) + add_library(panda3ds_libretro SHARED src/libretro_core.cpp) + target_link_libraries(panda3ds_libretro PUBLIC AlberCore) + set_target_properties(panda3ds_libretro PROPERTIES PREFIX "") endif() if(ENABLE_LTO OR ENABLE_USER_BUILD) - set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) + if (NOT BUILD_LIBRETRO_CORE) + set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + set_target_properties(panda3ds_libretro PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) + endif() endif() if(ENABLE_TESTS) diff --git a/cmake/FindRenderDoc.cmake b/cmake/FindRenderDoc.cmake new file mode 100644 index 00000000..c00a0888 --- /dev/null +++ b/cmake/FindRenderDoc.cmake @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +set(RENDERDOC_INCLUDE_DIR third_party/renderdoc) + +if (RENDERDOC_INCLUDE_DIR AND EXISTS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h") + file(STRINGS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h" RENDERDOC_VERSION_LINE REGEX "typedef struct RENDERDOC_API") + string(REGEX REPLACE ".*typedef struct RENDERDOC_API_([0-9]+)_([0-9]+)_([0-9]+).*" "\\1.\\2.\\3" RENDERDOC_VERSION "${RENDERDOC_VERSION_LINE}") + unset(RENDERDOC_VERSION_LINE) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(RenderDoc + REQUIRED_VARS RENDERDOC_INCLUDE_DIR + VERSION_VAR RENDERDOC_VERSION +) + +if (RenderDoc_FOUND AND NOT TARGET RenderDoc::API) + add_library(RenderDoc::API INTERFACE IMPORTED) + set_target_properties(RenderDoc::API PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${RENDERDOC_INCLUDE_DIR}" + ) +endif() + +mark_as_advanced(RENDERDOC_INCLUDE_DIR) \ No newline at end of file diff --git a/docs/3ds/accelerometer_readings/readings_flat_1.png b/docs/3ds/accelerometer_readings/readings_flat_1.png new file mode 100644 index 00000000..b7a425fc Binary files /dev/null and b/docs/3ds/accelerometer_readings/readings_flat_1.png differ diff --git a/docs/3ds/accelerometer_readings/readings_flat_2.png b/docs/3ds/accelerometer_readings/readings_flat_2.png new file mode 100644 index 00000000..b23c1102 Binary files /dev/null and b/docs/3ds/accelerometer_readings/readings_flat_2.png differ diff --git a/docs/3ds/accelerometer_readings/readings_shaking_1.png b/docs/3ds/accelerometer_readings/readings_shaking_1.png new file mode 100644 index 00000000..91279149 Binary files /dev/null and b/docs/3ds/accelerometer_readings/readings_shaking_1.png differ diff --git a/docs/3ds/accelerometer_readings/readings_shaking_2.png b/docs/3ds/accelerometer_readings/readings_shaking_2.png new file mode 100644 index 00000000..551e7d2e Binary files /dev/null and b/docs/3ds/accelerometer_readings/readings_shaking_2.png differ diff --git a/docs/3ds/lighting.md b/docs/3ds/lighting.md new file mode 100644 index 00000000..8b6b9885 --- /dev/null +++ b/docs/3ds/lighting.md @@ -0,0 +1,79 @@ +## Info on the lighting implementation + +### Missing shadow attenuation +Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct +their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows. + +### Missing bump mapping +Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling +implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things, +namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation. + +Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl. + +### samplerEnabledBitfields +Holds the enabled state of the lighting samples for various PICA configurations +As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 + +```c +const bool samplerEnabled[9 * 7] = bool[9 * 7]( + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true // Configuration 8: All +); +``` + +The above has been condensed to two uints for performance reasons. +You can confirm they are the same by running the following: +```c +const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu }; +for (int i = 0; i < 9 * 7; i++) { + unsigned arrayIndex = (i >> 5); + bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u; + if (samplerEnabled[i] == b) { + printf("%d: happy\n", i); + } else { + printf("%d: unhappy\n", i); + } +} +``` + +### lightLutLookup +lut_id is one of these values +0 D0 +1 D1 +2 SP +3 FR +4 RB +5 RG +6 RR + +lut_index on the other hand represents the actual index of the LUT in the texture +u_tex_luts has 24 LUTs for lighting and they are used like so: +0 D0 +1 D1 +2 is missing because SP uses LUTs 8-15 +3 FR +4 RB +5 RG +6 RR +8-15 SP0-7 +16-23 DA0-7, but this is not handled in this function as the lookup is a bit different + +The light environment configuration controls which LUTs are available for use +If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1 +If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. + +### Distance attenuation +Distance attenuation is computed differently from the other factors, for example +it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use +GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the +fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. +See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE \ No newline at end of file diff --git a/docs/img/KirbyAndroid.png b/docs/img/KirbyAndroid.png new file mode 100644 index 00000000..05e8b466 Binary files /dev/null and b/docs/img/KirbyAndroid.png differ diff --git a/docs/img/battery_icon.png b/docs/img/battery_icon.png new file mode 100644 index 00000000..5768a928 Binary files /dev/null and b/docs/img/battery_icon.png differ diff --git a/docs/img/display_icon.png b/docs/img/display_icon.png new file mode 100644 index 00000000..cf6a68be Binary files /dev/null and b/docs/img/display_icon.png differ diff --git a/docs/img/alber-icon.ico b/docs/img/mac_icon.ico similarity index 100% rename from docs/img/alber-icon.ico rename to docs/img/mac_icon.ico diff --git a/docs/img/rcow_icon.png b/docs/img/rcow_icon.png new file mode 100644 index 00000000..5facb301 Binary files /dev/null and b/docs/img/rcow_icon.png differ diff --git a/docs/img/rnap_icon.png b/docs/img/rnap_icon.png new file mode 100644 index 00000000..7967102b Binary files /dev/null and b/docs/img/rnap_icon.png differ diff --git a/docs/img/rsyn_icon.png b/docs/img/rsyn_icon.png new file mode 100644 index 00000000..684c19b6 Binary files /dev/null and b/docs/img/rsyn_icon.png differ diff --git a/docs/img/sdcard_icon.png b/docs/img/sdcard_icon.png new file mode 100644 index 00000000..07ed3fce Binary files /dev/null and b/docs/img/sdcard_icon.png differ diff --git a/docs/img/settings_icon.png b/docs/img/settings_icon.png new file mode 100644 index 00000000..bf21c417 Binary files /dev/null and b/docs/img/settings_icon.png differ diff --git a/docs/img/skyemu_icon.png b/docs/img/skyemu_icon.png new file mode 100644 index 00000000..d5f3b9d4 Binary files /dev/null and b/docs/img/skyemu_icon.png differ diff --git a/docs/img/sparkling_icon.png b/docs/img/sparkling_icon.png new file mode 100644 index 00000000..4a46d8d8 Binary files /dev/null and b/docs/img/sparkling_icon.png differ diff --git a/docs/img/speaker_icon.png b/docs/img/speaker_icon.png new file mode 100644 index 00000000..06adcfb3 Binary files /dev/null and b/docs/img/speaker_icon.png differ diff --git a/docs/img/windows_alt_icon.ico b/docs/img/windows_alt_icon.ico new file mode 100644 index 00000000..aa3593c3 Binary files /dev/null and b/docs/img/windows_alt_icon.ico differ diff --git a/docs/img/windows_icon.ico b/docs/img/windows_icon.ico new file mode 100644 index 00000000..340f251e Binary files /dev/null and b/docs/img/windows_icon.ico differ diff --git a/docs/img/windows_icon.rc b/docs/img/windows_icon.rc new file mode 100644 index 00000000..22dc105e --- /dev/null +++ b/docs/img/windows_icon.rc @@ -0,0 +1 @@ +AlberIcon ICON "windows_icon.ico" \ No newline at end of file diff --git a/docs/libretro/panda3ds_libretro.info b/docs/libretro/panda3ds_libretro.info index 40df7e22..de1be2f6 100644 --- a/docs/libretro/panda3ds_libretro.info +++ b/docs/libretro/panda3ds_libretro.info @@ -1,7 +1,7 @@ # Software Information display_name = "Nintendo - 3DS (Panda3DS)" authors = "Panda3DS Authors (tm)" -supported_extensions = "3ds|3dsx|elf|axf|cci|cxi|app" +supported_extensions = "3ds|3dsx|elf|axf|cci|cxi|app|ncch" corename = "Panda3DS" categories = "Emulator" license = "GPLv3" diff --git a/docs/translations/el.ts b/docs/translations/el.ts new file mode 100644 index 00000000..0368d8bd --- /dev/null +++ b/docs/translations/el.ts @@ -0,0 +1,763 @@ + + + AboutWindow + + + About Panda3DS + Σχετικά με το Panda3DS + + + + Panda3DS is a free and open source Nintendo 3DS emulator, for Windows, MacOS and Linux + Τo Panda3DS είναι ένας δωρεάν και open source εξομοιωτής του Nintendo 3DS, για Windows, MacOS και Linux + + + + Visit panda3ds.com for help with Panda3DS and links to our official support sites. + Επισκεφτείται το panda3ds.com για βοήθεια με το Panda3DS και συνδέσμους στις επίσημες σελίδες υποστήριξης μας. + + + + Panda3DS is developed by volunteers in their spare time. Below is a list of some of these volunteers who've agreed to be listed here, in no particular order.<br>If you think you should be listed here too, please inform us<br><br>- Peach (wheremyfoodat)<br>- noumidev<br>- liuk707<br>- Wunk<br>- marysaka<br>- Sky<br>- merryhime<br>- TGP17<br>- Shadow<br> + + + + + CheatEditDialog + + + Edit Cheat + Επεξεργασία Κωδικού + + + + Cheat name + Όνομα κωδικού + + + + CheatEntryWidget + + + Edit + Επεξεργασία + + + + CheatsWindow + + + Cheats + Κωδικοί + + + + Add + Προσθήκη + + + + Remove + Αφαίρεση + + + + ConfigWindow + + + Configuration + Ρυθμίσεις + + + + Interface Settings + Ρυθμίσεις Διεπαφής + + + + System + Σύστημα + + + + Light + Φωτεινό + + + + Dark + Σκοτεινό + + + + Greetings Cat + Γεια Σου Γάτα + + + + Cream + Κρέμα + + + + Color theme + Χρώματα + + + + Happy panda + Χαρούμενο Πάντα + + + + Happy panda (colourful) + Χαρούμενο Πάντα (χρωματιστό) + + + + Sleepy panda + Πάντα που νυστάζει + + + + Cow panda + Αγελάδα πάντα + + + + The penguin from SkyEmu + Ο πιγκουίνος από το SkyEmu + + + + Window icon + Εικονίδιο Παραθύρου + + + + Language + Γλώσσα + + + + Show version on window title + Εμφάνιση έκδοσης στον τίτλο του παραθύρου + + + + Alber v%1 + Αλβέρτος v%1 + + + + Alber + Αλβέρτος + + + + Remember window position + Αποθήκευση θέσης παραθύρου + + + + General Settings + Γενικές Ρυθμίσεις + + + + Browse... + Περιήγηση + + + + Select Directory + Επιλογή φακέλου + + + + Default ROMs path + Προεπιλεγμένος φάκελος ROM + + + + Enable Discord RPC + Ενεργοποίηση Discord RPC + + + + Use portable build + Ενεργοποίηση φορητής εγκατάστασης + + + + Print version in console output + Εκτύπωση έκδοσης στην κονσόλα + + + + Graphics Settings + Ρυθμίσεις Γραφικών + + + + + Null + Κανένα + + + + OpenGL + + + + + Vulkan + + + + + GPU renderer + Πυρήνας GPU + + + + Enable Renderdoc + Ενεργοποίηση Renderdoc + + + + Enable shader JIT + Ενεργοποίηση μεταγλωττιστή shaders + + + + Enable VSync + Ενεργοποίηση VSync + + + + Use ubershaders (No stutter, maybe slower) + Χρήση ubershaders (Χωρίς stutter, ίσως πιο αργό) + + + + Accurate shader multiplication + Ακριβής πολλαπλασιασμός στα shaders + + + + Accelerate shaders + Επιτάχυνση shaders + + + + Force shadergen when rendering lights + Εξαναγκασμός shadergen όταν υπάρχουν φώτα + + + + Light threshold for forcing shadergen + Αριθμός φωτών για εξαναγκασμό shadergen + + + + Audio Settings + Ρυθμίσεις Ήχου + + + + LLE + + + + + HLE + + + + + DSP emulation + Εξομοίωση DSP + + + + Enable audio + Ενεργοποίηση ήχου + + + + Enable AAC audio + Ενεργοποίηση ήχου AAC + + + + Print DSP firmware + Εκτύπωση λογισμικού DSP + + + + Mute audio device + Σίγαση συσκευής ήχου + + + + Cubic + Κυβική + + + + Linear + Γραμμική + + + + Volume curve + Κλίμακα ήχου + + + + Audio device volume + Ένταση ήχου + + + + Battery Settings + Ρυθμίσεις μπαταρίας + + + + Battery percentage + Ποσοστό μπαταρίας + + + + Charger plugged + Φορτιστής + + + + SD Card Settings + Ρυθμίσης κάρτας SD + + + + Enable virtual SD card + Ενεργοποίηση εικονικής SD + + + + Write protect virtual SD card + Προστασία της SD από εγγραφή + + + + Interface + Διεπαφή + + + + User Interface settings + Ρυθμίσεις διεπαφής + + + + General + Γενικά + + + + General emulator settings + Γενικές ρυθμίσεις εξομοιωτή + + + + Graphics + Γραφικά + + + + Graphics emulation and output settings + Ρυθμίσεις εξομοίωσης γραφικών + + + + Audio + Ήχος + + + + Audio emulation and output settings + Ρυθμίσεις εξομοίωσης ήχου + + + + Battery + Μπαταρία + + + + Battery emulation settings + Ρυθμίσεις εξομοίωσης μπαταρίας + + + + SD Card + Κάρτα SD + + + + SD Card emulation settings + Ρυθμίσεις εξομοίωσης κάρτας SD + + + + Language change successful + Επιτυχία αλλαγής γλώσσας + + + + Restart Panda3DS for the new language to be used. + Επανεκκινήστε το Panda3DS για να εφαρμοστεί η νέα γλώσσα. + + + + Language change failed + Αποτυχία αλλαγής γλώσσας + + + + The language you selected is not included in Panda3DS. If you're seeing this, someone messed up the language UI code... + Το Panda3DS δεν υποστηρίζει τον γλώσσα που επιλέξατε. Αν το βλέπετε αυτό, κάποιος έκανε λάθος στον κώδικα, κατηγορήστε τον Πάρη... + + + + MainWindow + + + Alber + Αλβέρτος + + + + File + Αρχεία + + + + Emulation + Εξομοίωση + + + + Tools + Εργαλεία + + + + About + Σχετικά + + + + Load game + Φόρτωση παιχνιδιού + + + + Load Lua script + Φόρτωση αρχείου Lua + + + + Open Panda3DS folder + Άνοιγμα φακέλου Panda3DS + + + + Pause + Παύση + + + + Resume + Συνέχεια + + + + Reset + Επανέναρξη + + + + Configure + Ρύθμιση + + + + Dump RomFS + + + + + Open Lua Editor + Άνοιγμα Lua Editor + + + + Open Cheats Editor + Άνοιγμα Editor κωδικών + + + + Open Patch Window + Άνοιγμα παραθύρου για patching + + + + Open Shader Editor + + + + + Dump loaded DSP firmware + + + + + About Panda3DS + Σχετικά με το Panda3DS + + + + Select 3DS ROM to load + Επιλέξτε 3DS ROM για να φορτώσετε + + + + Nintendo 3DS ROMs (*.3ds *.cci *.cxi *.app *.ncch *.3dsx *.elf *.axf) + + + + + Select Lua script to load + Επιλέξτε αρχείο Lua για να φορτώσετε + + + + Lua scripts (*.lua *.txt) + + + + + Select folder to dump RomFS files to + + + + + Invalid format for RomFS dumping + + + + + The currently loaded app is not in a format that supports RomFS + + + + + + + OK + ΟΚ + + + + No RomFS found + Δεν βρέθηκε RomFS + + + + No RomFS partition was found in the loaded app + Δεν βρέθηκε RomFS στην εφαρμογή που έχει φορτωθεί + + + + Select file + Επιλέξτε αρχείο + + + + DSP firmware file (*.cdc) + + + + + No DSP firmware loaded + Δεν έχει φορτωθεί DSP firmware + + + + The currently loaded app has not uploaded a firmware to the DSP + + + + + Failed to open output file + Αποτυχία ανοίγματος του αρχείου εξόδου + + + + The currently loaded DSP firmware could not be written to the selected file. Please make sure you have permission to access this file + + + + + PatchWindow + + + ROM patcher + + + + + Select input file + Επιλογή αρχείου εισόδου + + + + + Select + Επιλέξτε + + + + + Select patch file + Επιλογή αρχείου patch + + + + Apply patch + Εφαρμογή patch + + + + Select file to patch + Επιλέξτε αρχείο να κάνετε patch + + + + + All files (*.*) + Όλα τα αρχεία (*.*) + + + + Patch files (*.ips *.ups *.bps) + + + + + Paths not provided correctly + + + + + Please provide paths for both the input file and the patch file + Παρακαλούμε διαλέξτε και αρχείο εισόδου και αρχείο patch + + + + Select file + Επιλογή αρχείου + + + + No output path + Δεν επιλέχθηκε φάκελος εξόδου + + + + No path was provided for the output file, no patching was done + Δεν επιλέχθηκε αρχείο εξόδου. Δεν έγινε patching + + + + Unknown patch format + Άγνωστο είδος patch + + + + Unknown format for patch file. Currently IPS, UPS and BPS are supported + Άγνωστο είδος αρχείου patch. Υποστηρίζονται αρχεία IPS, UPS και BPS + + + + Failed to open input files + Αποτυχία ανοίγματος των αρχείων εισόδου + + + + Make sure they're in a directory Panda3DS has access to + Βεβαιωθείτε ότι είναι σε φάκελο που έχει πρόσβαση το Panda3DS + + + + Patching Success + Επιτυχής Εφαρμογή Patch + + + + Your file was patched successfully. + To αρχείο σας έγινε patch με επιτυχία + + + + Checksum mismatch + + + + + Patch was applied successfully but a checksum mismatch was detected. The input or output files might not be correct + Το patch εφαρμόστηκε με επιτυχία αλλά ανιχνεύτηκε σφάλμα στο checksum. Ενδέχεται τα αρχεία εισόδου η εξόδου να είναι λανθασμένα + + + + Patching error + Σφάλμα στο patching + + + + An error occured while patching + Προέκυψε σφάλμα στο patching + + + + PatchWindow::PatchWindow + + + OK + ΟΚ + + + + ShaderEditorWindow + + + Reload shader + Επαναφόρτωση shader + + + + TextEditorWindow + + + Lua Editor + + + + + Load script + Φόρτωση αρχείου + + + \ No newline at end of file diff --git a/docs/translations/en.ts b/docs/translations/en.ts new file mode 100644 index 00000000..4a70fabb --- /dev/null +++ b/docs/translations/en.ts @@ -0,0 +1,766 @@ + + + + + AboutWindow + + + About Panda3DS + + + + + Panda3DS is a free and open source Nintendo 3DS emulator, for Windows, MacOS and Linux + + + + + Visit panda3ds.com for help with Panda3DS and links to our official support sites. + + + + + Panda3DS is developed by volunteers in their spare time. Below is a list of some of these volunteers who've agreed to be listed here, in no particular order.<br>If you think you should be listed here too, please inform us<br><br>- Peach (wheremyfoodat)<br>- noumidev<br>- liuk707<br>- Wunk<br>- marysaka<br>- Sky<br>- merryhime<br>- TGP17<br>- Shadow<br> + + + + + CheatEditDialog + + + Edit Cheat + + + + + Cheat name + + + + + CheatEntryWidget + + + Edit + + + + + CheatsWindow + + + Cheats + + + + + Add + + + + + Remove + + + + + ConfigWindow + + + Configuration + + + + + Interface Settings + + + + + System + + + + + Light + + + + + Dark + + + + + Greetings Cat + + + + + Cream + + + + + Color theme + + + + + Happy panda + + + + + Happy panda (colourful) + + + + + Sleepy panda + + + + + Cow panda + + + + + The penguin from SkyEmu + + + + + Window icon + + + + + Language + + + + + Show version on window title + + + + + + Alber v%1 + + + + + Alber + + + + + Remember window position + + + + + General Settings + + + + + Browse... + + + + + Select Directory + + + + + Default ROMs path + + + + + Enable Discord RPC + + + + + Use portable build + + + + + Print version in console output + + + + + Graphics Settings + + + + + + Null + + + + + OpenGL + + + + + Vulkan + + + + + GPU renderer + + + + + Enable Renderdoc + + + + + Enable shader JIT + + + + + Enable VSync + + + + + Use ubershaders (No stutter, maybe slower) + + + + + Accurate shader multiplication + + + + + Accelerate shaders + + + + + Force shadergen when rendering lights + + + + + Light threshold for forcing shadergen + + + + + Audio Settings + + + + + LLE + + + + + HLE + + + + + DSP emulation + + + + + Enable audio + + + + + Enable AAC audio + + + + + Print DSP firmware + + + + + Mute audio device + + + + + Cubic + + + + + Linear + + + + + Volume curve + + + + + Audio device volume + + + + + Battery Settings + + + + + Battery percentage + + + + + Charger plugged + + + + + SD Card Settings + + + + + Enable virtual SD card + + + + + Write protect virtual SD card + + + + + Interface + + + + + User Interface settings + + + + + General + + + + + General emulator settings + + + + + Graphics + + + + + Graphics emulation and output settings + + + + + Audio + + + + + Audio emulation and output settings + + + + + Battery + + + + + Battery emulation settings + + + + + SD Card + + + + + SD Card emulation settings + + + + + Language change successful + + + + + Restart Panda3DS for the new language to be used. + + + + + Language change failed + + + + + The language you selected is not included in Panda3DS. If you're seeing this, someone messed up the language UI code... + + + + + MainWindow + + + Alber + + + + + File + + + + + Emulation + + + + + Tools + + + + + About + + + + + Load game + + + + + Load Lua script + + + + + Open Panda3DS folder + + + + + Pause + + + + + Resume + + + + + Reset + + + + + Configure + + + + + Dump RomFS + + + + + Open Lua Editor + + + + + Open Cheats Editor + + + + + Open Patch Window + + + + + Open Shader Editor + + + + + Dump loaded DSP firmware + + + + + About Panda3DS + + + + + Select 3DS ROM to load + + + + + Nintendo 3DS ROMs (*.3ds *.cci *.cxi *.app *.ncch *.3dsx *.elf *.axf) + + + + + Select Lua script to load + + + + + Lua scripts (*.lua *.txt) + + + + + Select folder to dump RomFS files to + + + + + Invalid format for RomFS dumping + + + + + The currently loaded app is not in a format that supports RomFS + + + + + + + OK + + + + + No RomFS found + + + + + No RomFS partition was found in the loaded app + + + + + Select file + + + + + DSP firmware file (*.cdc) + + + + + No DSP firmware loaded + + + + + The currently loaded app has not uploaded a firmware to the DSP + + + + + Failed to open output file + + + + + The currently loaded DSP firmware could not be written to the selected file. Please make sure you have permission to access this file + + + + + PatchWindow + + + ROM patcher + + + + + Select input file + + + + + + Select + + + + + + Select patch file + + + + + Apply patch + + + + + Select file to patch + + + + + + All files (*.*) + + + + + Patch files (*.ips *.ups *.bps) + + + + + Paths not provided correctly + + + + + Please provide paths for both the input file and the patch file + + + + + Select file + + + + + No output path + + + + + No path was provided for the output file, no patching was done + + + + + Unknown patch format + + + + + Unknown format for patch file. Currently IPS, UPS and BPS are supported + + + + + Failed to open input files + + + + + Make sure they're in a directory Panda3DS has access to + + + + + Patching Success + + + + + Your file was patched successfully. + + + + + Checksum mismatch + + + + + Patch was applied successfully but a checksum mismatch was detected. The input or output files might not be correct + + + + + Patching error + + + + + An error occured while patching + + + + + PatchWindow::PatchWindow + + + OK + + + + + ShaderEditorWindow + + + Reload shader + + + + + TextEditorWindow + + + Lua Editor + + + + + Load script + + + + diff --git a/docs/translations/es.ts b/docs/translations/es.ts new file mode 100644 index 00000000..6f345261 --- /dev/null +++ b/docs/translations/es.ts @@ -0,0 +1,763 @@ + + + AboutWindow + + + About Panda3DS + Acerca de Panda3DS + + + + Panda3DS is a free and open source Nintendo 3DS emulator, for Windows, MacOS and Linux + Panda3DS es un emulador libre y de código abierto de Nintendo 3DS para Windows, MacOS y Linux + + + + Visit panda3ds.com for help with Panda3DS and links to our official support sites. + Visita panda3ds.com para obtener ayuda con Panda3DS y los links a nuestras páginas oficiales de soporte. + + + + Panda3DS is developed by volunteers in their spare time. Below is a list of some of these volunteers who've agreed to be listed here, in no particular order.<br>If you think you should be listed here too, please inform us<br><br>- Peach (wheremyfoodat)<br>- noumidev<br>- liuk707<br>- Wunk<br>- marysaka<br>- Sky<br>- merryhime<br>- TGP17<br>- Shadow<br> + Panda3DS es desarrollado por voluntarios en su tiempo libre. Debajo se encuentran los voluntarios que están de acuerdo con ser listados aquí, en ningún orden en particular.<br>Si piensas que deberías ser listado, por favor infórmanos<br><br>- Peach (wheremyfoodat)<br>- noumidev<br>- liuk707<br>- Wunk<br>- marysaka<br>- Sky<br>- merryhime<br>- TGP17<br>- Shadow<br> + + + + CheatEditDialog + + + Edit Cheat + Editar Truco + + + + Cheat name + Nombre del truco + + + + CheatEntryWidget + + + Edit + Editar + + + + CheatsWindow + + + Cheats + Trucos + + + + Add + Añadir + + + + Remove + Quitar + + + + ConfigWindow + + + Configuration + Configuración + + + + Interface Settings + Configuración de Interfaz + + + + System + Sistema + + + + Light + Claro + + + + Dark + Oscuro + + + + Greetings Cat + + + + + Cream + + + + + Color theme + Tema de color + + + + Happy panda + Panda feliz + + + + Happy panda (colourful) + Panda feliz (colorido) + + + + Sleepy panda + Panda somnoliento + + + + Cow panda + Panda vaca + + + + The penguin from SkyEmu + El pungüino de SkyEmu + + + + Window icon + Icono de ventana + + + + Language + Idioma + + + + Show version on window title + Mostrar versión en la barra de título de la ventana + + + + Alber v%1 + Alber v%1 + + + + Alber + Alber + + + + Remember window position + Recordar posición de la ventana + + + + General Settings + Configuración General + + + + Browse... + Examinar... + + + + Select Directory + Seleccionar Directorio + + + + Default ROMs path + Ruta predeterminada de ROMs + + + + Enable Discord RPC + Activar Discord RPC + + + + Use portable build + Usar build portable + + + + Print version in console output + Imprimir versión en consola + + + + Graphics Settings + Configuración de Gráficos + + + + + Null + Nulo + + + + OpenGL + OpenGL + + + + Vulkan + Vulkan + + + + GPU renderer + Renderizador GPU + + + + Enable Renderdoc + Activar Renderdoc + + + + Enable shader JIT + Activar JIT de shaders + + + + Enable VSync + Activar VSync + + + + Use ubershaders (No stutter, maybe slower) + Usar ubershaders (No stuttering, puede ser más lento) + + + + Accurate shader multiplication + Multiplicación precisa de shaders + + + + Accelerate shaders + Acelerar shaders + + + + Force shadergen when rendering lights + Forzar shadergen al renderizar luces + + + + Light threshold for forcing shadergen + Umbral de luz para forzar shadergen + + + + Audio Settings + Configuración de Audio + + + + LLE + LLE + + + + HLE + HLE + + + + DSP emulation + Emulación de DSP + + + + Enable audio + Activar audio + + + + Enable AAC audio + Activar audio AAC + + + + Print DSP firmware + Imprimir firmware DSP + + + + Mute audio device + Silenciar dispositivo de audio + + + + Cubic + Cúbico + + + + Linear + Linear + + + + Volume curve + Curva del volumen + + + + Audio device volume + Volumen del dispositivo de audio + + + + Battery Settings + Configuración de Batería + + + + Battery percentage + Porcentaje de batería + + + + Charger plugged + Cargador conectado + + + + SD Card Settings + Configuración de Tarjeta SD + + + + Enable virtual SD card + Activar tarjeta SD virtual + + + + Write protect virtual SD card + Proteger tarjeta SD de escritura + + + + Interface + Interfaz + + + + User Interface settings + Configuración de Interfaz de Usuario + + + + General + General + + + + General emulator settings + Configuración general del emulador + + + + Graphics + Gráficos + + + + Graphics emulation and output settings + Configuración de emulación de gráficos y salida de vídeo + + + + Audio + Audio + + + + Audio emulation and output settings + Configuración de emulación y salida de audio + + + + Battery + Batería + + + + Battery emulation settings + Configuración de emulación de la batería + + + + SD Card + Tarjeta SD + + + + SD Card emulation settings + Configuración de emulación de la tarjeta SD + + + + Language change successful + Idioma cambiado correctamente + + + + Restart Panda3DS for the new language to be used. + Reinicie Panda3DS para utilizar el nuevo idioma. + + + + Language change failed + Cambio de idioma fallido + + + + The language you selected is not included in Panda3DS. If you're seeing this, someone messed up the language UI code... + El idioma que ha seleccionado no está incluido en Panda3DS. Si está viendo esto, alguien cometió un error en el código... + + + + MainWindow + + + Alber + Alber + + + + File + Archivo + + + + Emulation + Emulación + + + + Tools + Herramientas + + + + About + Acerca de + + + + Load game + Cargar juego + + + + Load Lua script + Cargar script Lua + + + + Open Panda3DS folder + Abrir carpeta Panda3DS + + + + Pause + Pausar + + + + Resume + Reanudar + + + + Reset + Reiniciar + + + + Configure + Configurar + + + + Dump RomFS + Volcar RomFS + + + + Open Lua Editor + Abrir Editor Lua + + + + Open Cheats Editor + Abrir Editor de Trucos + + + + Open Patch Window + Abrir Ventana de Parches + + + + Open Shader Editor + Abrir Editor de Shaders + + + + Dump loaded DSP firmware + Volcar firmware DSP cargado + + + + About Panda3DS + Acerca de Panda3DS + + + + Select 3DS ROM to load + Seleccione el ROM de 3DS a cargar + + + + Nintendo 3DS ROMs (*.3ds *.cci *.cxi *.app *.ncch *.3dsx *.elf *.axf) + ROMs de Nintendo 3DS (*.3ds *.cci *.cxi *.app *.ncch *.3dsx *.elf *.axf) + + + + Select Lua script to load + Seleccione el script Lua a cargar + + + + Lua scripts (*.lua *.txt) + Scripts Lua (*.lua *.txt) + + + + Select folder to dump RomFS files to + Seleccione la carpeta donde volcar los archivos del RomFS + + + + Invalid format for RomFS dumping + Formato inváido para volcado de RomFS + + + + The currently loaded app is not in a format that supports RomFS + La aplicación cargada no tiene un formato que soporta RomFS + + + + + + OK + OK + + + + No RomFS found + RomFS no encontrado + + + + No RomFS partition was found in the loaded app + No se encontró una partición RomFS en la aplicación cargada + + + + Select file + Seleccionar archivo + + + + DSP firmware file (*.cdc) + Archivo de firmware DSP (*.cdc) + + + + No DSP firmware loaded + Firmware DSP no cargado + + + + The currently loaded app has not uploaded a firmware to the DSP + La aplicación cargada no ha subido un firmware al DSP + + + + Failed to open output file + Error al abrir el archivo de salida + + + + The currently loaded DSP firmware could not be written to the selected file. Please make sure you have permission to access this file + No se pudo escribir el firmware DSP cargado al archivo seleccionado. Por favor asegure que tiene los permisos necesarios para acceder a este archivo + + + + PatchWindow + + + ROM patcher + Parcheador de ROM + + + + Select input file + Seleccione el archivo de entrada + + + + + Select + Seleccionar + + + + + Select patch file + Seleccione el archivo de parche + + + + Apply patch + Aplicar parche + + + + Select file to patch + Seleccione el archivo a parchear + + + + + All files (*.*) + Todos los archivos (*.*) + + + + Patch files (*.ips *.ups *.bps) + Archivos de parche (*.ips *.ups *.bps) + + + + Paths not provided correctly + Rutas no proporcionadas correctamente + + + + Please provide paths for both the input file and the patch file + Por favor proporcione rutas para el archivo de entrada y el parche + + + + Select file + Seleccionar archivo + + + + No output path + No hay archivo de salida + + + + No path was provided for the output file, no patching was done + No se ha proporcionado una ruta para el archivo de salida, no se ha aplicado el parche + + + + Unknown patch format + Formato del parche desconocido + + + + Unknown format for patch file. Currently IPS, UPS and BPS are supported + Formato desconocido del archivo de parche. Actualmente son soportados IPS, UPS y BPS + + + + Failed to open input files + Error al abrir archivos de entrada + + + + Make sure they're in a directory Panda3DS has access to + Asegure que estén en un directorio al que Panda3DS tenga acceso + + + + Patching Success + Parche exitoso + + + + Your file was patched successfully. + Su archivo fue parcheado con éxito. + + + + Checksum mismatch + Discrepancia en la suma de verificación + + + + Patch was applied successfully but a checksum mismatch was detected. The input or output files might not be correct + El parche se aplicó con éxito pero se detectó una discrepancia en la suma de verificación. Los archivos de entrada o salida pueden no ser correctos + + + + Patching error + Error de parcheo + + + + An error occured while patching + Ha ocurrido un error en el parcheo + + + + PatchWindow::PatchWindow + + + OK + OK + + + + ShaderEditorWindow + + + Reload shader + Recargar shader + + + + TextEditorWindow + + + Lua Editor + Editor Lua + + + + Load script + Cargar script + + + \ No newline at end of file diff --git a/docs/translations/nl.ts b/docs/translations/nl.ts new file mode 100644 index 00000000..4e6d3e9e --- /dev/null +++ b/docs/translations/nl.ts @@ -0,0 +1,763 @@ + + + AboutWindow + + + About Panda3DS + Over Panda3DS + + + + Panda3DS is a free and open source Nintendo 3DS emulator, for Windows, MacOS and Linux + Panda3DS is een gratis, open source Nintendo 3DS-emulator voor Windows, MacOS en Linux + + + + Visit panda3ds.com for help with Panda3DS and links to our official support sites. + Bezoek panda3ds.com voor ondersteuning van Panda3DS en links naar onze officiële ondersteuningskanalen. + + + + Panda3DS is developed by volunteers in their spare time. Below is a list of some of these volunteers who've agreed to be listed here, in no particular order.<br>If you think you should be listed here too, please inform us<br><br>- Peach (wheremyfoodat)<br>- noumidev<br>- liuk707<br>- Wunk<br>- marysaka<br>- Sky<br>- merryhime<br>- TGP17<br>- Shadow<br> + Panda3DS wordt ontwikkeld door vrijwilligers in hun vrije tijd. Hieronder een lijst van sommige van deze vrijwilligers die akkoord zijn met een vermelding, in willekeurige volgorde.<br>Als jij vindt dat je op deze lijst zou moeten staan, laat het ons dan weten<br><br>- Peach (wheremyfoodat)<br>- noumidev<br>- liuk707<br>- Wunk<br>- marysaka<br>- Sky<br>- merryhime<br>- TGP17<br>- Shadow<br> + + + + CheatEditDialog + + + Edit Cheat + Cheat bewerken + + + + Cheat name + Cheatnaam + + + + CheatEntryWidget + + + Edit + Bewerken + + + + CheatsWindow + + + Cheats + Cheats + + + + Add + Toevoegen + + + + Remove + Verwijderen + + + + ConfigWindow + + + Configuration + Instellingen + + + + Interface Settings + Interfaceinstellingen + + + + System + Systeem + + + + Light + Licht + + + + Dark + Donker + + + + Greetings Cat + Begroetingskat + + + + Cream + Crème + + + + Color theme + Kleurenthema + + + + Happy panda + Blije panda + + + + Happy panda (colourful) + Blije panda (kleurrijk) + + + + Sleepy panda + Slaperige panda + + + + Cow panda + Koeienpanda + + + + The penguin from SkyEmu + De pinguïn van SkyEmu + + + + Window icon + Venstericoon + + + + Language + Taal + + + + Show version on window title + Toon versie in venstertitel + + + + Alber v%1 + Alber v%1 + + + + Alber + Alber + + + + Remember window position + Vensterpositie onthouden + + + + General Settings + Algemene instellingen + + + + Browse... + Bladeren... + + + + Select Directory + Kies map + + + + Default ROMs path + Standaard pad voor ROMs + + + + Enable Discord RPC + Discord RPC inschakelen + + + + Use portable build + Portable build gebruiken + + + + Print version in console output + Versie afdrukken in consoleuitvoer + + + + Graphics Settings + Grafische instellingen + + + + + Null + Null + + + + OpenGL + OpenGL + + + + Vulkan + Vulkan + + + + GPU renderer + Renderen op videokaart + + + + Enable Renderdoc + Renderdoc inschakelen + + + + Enable shader JIT + Shader JIT inschakelen + + + + Enable VSync + VSync inschakelen + + + + Use ubershaders (No stutter, maybe slower) + Ubershaders gebruiken (geen haperingen, mogelijk langzamer) + + + + Accurate shader multiplication + Nauwkeurige vermenigvuldigen in shaders + + + + Accelerate shaders + Shaders versnellen + + + + Force shadergen when rendering lights + Shadergen afdwingen bij tekenen licht + + + + Light threshold for forcing shadergen + Lichtgrens voor afdwingen shadergen + + + + Audio Settings + Audioinstellingen + + + + LLE + LLE + + + + HLE + HLE + + + + DSP emulation + DSP-emulatie + + + + Enable audio + Audio inschakelen + + + + Enable AAC audio + AAC-audio inschakelen + + + + Print DSP firmware + DSP-firmware afdrukken + + + + Mute audio device + Audioapparaat dempen + + + + Cubic + Kubiek + + + + Linear + Lineair + + + + Volume curve + Volumecurve + + + + Audio device volume + Volume audioapparaat + + + + Battery Settings + Batterij-instellingen + + + + Battery percentage + Batterijpercentage + + + + Charger plugged + Oplader aangesloten + + + + SD Card Settings + Instellingen SD-kaart + + + + Enable virtual SD card + Virtuele SD-kaart inschakelen + + + + Write protect virtual SD card + Virtuele SD-kaart schrijfbeveiligen + + + + Interface + Interface + + + + User Interface settings + Instellingen gebruikersinterface + + + + General + Algemeen + + + + General emulator settings + Algemene emulatorinstellingen + + + + Graphics + Weergave + + + + Graphics emulation and output settings + Instellingen grafische emulatie en weergave + + + + Audio + Audio + + + + Audio emulation and output settings + Instellingen audioemulatie en weergave + + + + Battery + Batterij + + + + Battery emulation settings + Instellingen batterijemulatie + + + + SD Card + SD-kaart + + + + SD Card emulation settings + Instellingen SD-kaart-emulatie + + + + Language change successful + Taal succesvol ingesteld + + + + Restart Panda3DS for the new language to be used. + Herstart Panda3DS om de nieuw gekozen taal te gebruiken. + + + + Language change failed + Wijzigen van taal mislukt + + + + The language you selected is not included in Panda3DS. If you're seeing this, someone messed up the language UI code... + De gekozen taal is niet beschikbaar in Panda3DS. Als je dit leest heeft iemand de taalcode verprutst... + + + + MainWindow + + + Alber + Alber + + + + File + Bestand + + + + Emulation + Emulatie + + + + Tools + Hulpmiddelen + + + + About + Over + + + + Load game + Spel laden + + + + Load Lua script + LUA-script laden + + + + Open Panda3DS folder + Open Panda3DS-map + + + + Pause + Pauzeren + + + + Resume + Hervatten + + + + Reset + Reset + + + + Configure + Instellingen + + + + Dump RomFS + RomFS dumpen + + + + Open Lua Editor + Open LUA-editor + + + + Open Cheats Editor + Open cheats-editor + + + + Open Patch Window + Open patchvenster + + + + Open Shader Editor + Open shader-editor + + + + Dump loaded DSP firmware + Geladen DSP-firmware dumpen + + + + About Panda3DS + Over Panda3DS + + + + Select 3DS ROM to load + Kies 3DS ROM om te laden + + + + Nintendo 3DS ROMs (*.3ds *.cci *.cxi *.app *.ncch *.3dsx *.elf *.axf) + Nintendo 3DS ROMs (*.3ds *.cci *.cxi *.app *.ncch *.3dsx *.elf *.axf) + + + + Select Lua script to load + Kies LUA-script om te laden + + + + Lua scripts (*.lua *.txt) + LUA-scripts (*.lua *.txt) + + + + Select folder to dump RomFS files to + Kies map om RomFS-bestanden heen te dumpen + + + + Invalid format for RomFS dumping + Ongeldig formaat voor RomFS dump + + + + The currently loaded app is not in a format that supports RomFS + Het formaat van de momenteel geladen applicatie ondersteunt geen RomFS + + + + + + OK + OK + + + + No RomFS found + Geen RomFS gevonden + + + + No RomFS partition was found in the loaded app + Geen RomFS-partitie gevonden in de geladen applicatie + + + + Select file + Selecteer bestand + + + + DSP firmware file (*.cdc) + DSP-firmware-bestand (*.cdc) + + + + No DSP firmware loaded + Geen DSP-firmware geladen + + + + The currently loaded app has not uploaded a firmware to the DSP + De momenteel geladen applicatie heeft geen firmware geüpload naar de DSP + + + + Failed to open output file + Uitvoerbestand openen mislukt + + + + The currently loaded DSP firmware could not be written to the selected file. Please make sure you have permission to access this file + De momenteel geladen DSP-firmware kan niet worden geschreven naar het gekozen bestand. Controleer de permissies van het gekozen bestand + + + + PatchWindow + + + ROM patcher + ROM-patcher + + + + Select input file + Kies invoerbestand + + + + + Select + Kies + + + + + Select patch file + Kies patchbestand + + + + Apply patch + Patch toepassen + + + + Select file to patch + Kies bestand om te patchen + + + + + All files (*.*) + Alle bestanden (*.*) + + + + Patch files (*.ips *.ups *.bps) + Patch-bestanden (*.ips *.ups *.bps) + + + + Paths not provided correctly + Paden incorrect meegegeven + + + + Please provide paths for both the input file and the patch file + Geef paden van invoerbestand en patchbestand op + + + + Select file + Kies bestand + + + + No output path + Geen uitvoerpad + + + + No path was provided for the output file, no patching was done + Geen pad opgegeven voor uitvoerbestand, patch niet toegepast + + + + Unknown patch format + Onbekend patchformaat + + + + Unknown format for patch file. Currently IPS, UPS and BPS are supported + Ongeldig formaat van patchbestand. Momenteel wordt IPS, UPS en BPS ondersteund + + + + Failed to open input files + Openen van invoerbestanden mislukt + + + + Make sure they're in a directory Panda3DS has access to + Zorg ervoor dat ze in een map staan waar Panda3DS toegang toe heeft + + + + Patching Success + Patch succesvol + + + + Your file was patched successfully. + Het bestand is succesvol gepatcht. + + + + Checksum mismatch + Checksum komt niet overeen + + + + Patch was applied successfully but a checksum mismatch was detected. The input or output files might not be correct + Patch is succesvol toegepast maar de checksum komt niet overeen. Invoer- of uitvoerbestand is mogelijk ongeldig + + + + Patching error + Fout tijdens patchen + + + + An error occured while patching + Er is bij het patchen een fout opgetreden + + + + PatchWindow::PatchWindow + + + OK + OK + + + + ShaderEditorWindow + + + Reload shader + Shader herladen + + + + TextEditorWindow + + + Lua Editor + LUA-editor + + + + Load script + Script laden + + + \ No newline at end of file diff --git a/docs/translations/pt_br.ts b/docs/translations/pt_br.ts new file mode 100644 index 00000000..e2afe3a9 --- /dev/null +++ b/docs/translations/pt_br.ts @@ -0,0 +1,764 @@ + + + AboutWindow + + + About Panda3DS + Sobre o Panda3DS + + + + Panda3DS is a free and open source Nintendo 3DS emulator, for Windows, MacOS and Linux + Panda3DS é um emulador gratuito e open-source para Windows, MacOS e Linux + + + + Visit panda3ds.com for help with Panda3DS and links to our official support sites. + Visite panda3ds.com para obter ajuda e links de suporte oficial. + + + + Panda3DS is developed by volunteers in their spare time. Below is a list of some of these volunteers who've agreed to be listed here, in no particular order.<br>If you think you should be listed here too, please inform us<br><br>- Peach (wheremyfoodat)<br>- noumidev<br>- liuk707<br>- Wunk<br>- marysaka<br>- Sky<br>- merryhime<br>- TGP17<br>- Shadow<br> + Panda3DS é desenvolvido por voluntários em seu tempo livre. Abaixo a lista de alguns volutário<br>(Lista sem nenhuma ordem específica)<br>Se acha que seu nome deveria estar listado aqui por favor informe-nos<br><br>- Peach (wheremyfoodat)<br>- noumidev<br>- liuk707<br>- Wunk<br>- marysaka<br>- Sky<br>- merryhime<br>- TGP17<br>- Shadow<br> + + + + CheatEditDialog + + + Edit Cheat + Editar Trapaças + + + + Cheat name + Nome da trapaça + + + + CheatEntryWidget + + + Edit + Editar + + + + CheatsWindow + + + Cheats + Trapaça + + + + Add + Adicionar + + + + Remove + Sair + + + + ConfigWindow + + + Configuration + Configurações + + + + Interface Settings + Configurações da interface + + + + System + Sistema + + + + Light + Claro + + + + Dark + Escuro + + + + Greetings Cat + + + + + Cream + + + + + Color theme + Tema de color + + + + Happy panda + Panda feliz + + + + Happy panda (colourful) + Panda feliz (colorido) + + + + Sleepy panda + Panda sonolento + + + + Cow panda + Panda vaca + + + + The penguin from SkyEmu + O pinguim do SkyEmu + + + + Window icon + Icone da janela + + + + Language + Idioma + + + + Show version on window title + Mostrar versão na barra de título + + + + Alber v%1 + Alber v%1 + + + + Alber + Alber + + + + Remember window position + Lembrar posição da janela + + + + General Settings + Configurações gerais + + + + Browse... + Navegar... + + + + Select Directory + Selecionar o diretório + + + + Default ROMs path + Diretório padrão das ROMs + + + + Enable Discord RPC + Ativar Discord RPC + + + + Use portable build + Usar build portatil + + + + Print version in console output + Imprimir versão no console + + + + Graphics Settings + Configurações gráficas + + + + + Null + Nulo + + + + OpenGL + OpenGL + + + + Vulkan + Vulkan + + + + GPU renderer + Renderizador GPU + + + + Enable Renderdoc + Ativar Renderdoc + + + + Enable shader JIT + Ativar JIT de shaders + + + + Enable VSync + Ativar VSync + + + + Use ubershaders (No stutter, maybe slower) + Usar ubershaders (No stuttering, puede ser más lento) + + + + Accurate shader multiplication + Multiplicação precisa de shaders + + + + Accelerate shaders + Graficos acelerados + + + + Force shadergen when rendering lights + Forçar shadergen ao renderizar luzes. + + + + Light threshold for forcing shadergen + Limear de luzes para forçar shadergen + + + + Audio Settings + Configurações de audio + + + + LLE + LLE + + + + HLE + HLE + + + + DSP emulation + Emulação DSP + + + + Enable audio + Ativar audio + + + + Enable AAC audio + Ativar audio AAC + + + + Print DSP firmware + Imprimir firmware DSP + + + + Mute audio device + Silenciar dispositivo de audio + + + + Cubic + Cúbico + + + + Linear + Linear + + + + Volume curve + Curva de volume + + + + Audio device volume + Volume do dispositivo de audio + + + + Battery Settings + Configurações de bateria + + + + Battery percentage + Porcentagem da bateria + + + + Charger plugged + Carregador conectado + + + + SD Card Settings + Configurações do cartão de memoria + + + + Enable virtual SD card + Ativar cartão de memoria virtual + + + + Write protect virtual SD card + Proteger cartão de memoria virtual contra escrita + + + + Interface + Interface + + + + User Interface settings + Configurações da interface de usuario + + + + General + Geral + + + + General emulator settings + Configurações gerais do emulador + + + + Graphics + Gráficos + + + + Graphics emulation and output settings + Configurações da emulação e saida de video + + + + Audio + Audio + + + + Audio emulation and output settings + Configurações da emulação e saida de audio + + + + Battery + Bateria + + + + Battery emulation settings + Configuração da emulação da bateria + + + + SD Card + Cartão de memoria + + + + SD Card emulation settings + Configurar a emulação do cartão de memoria + + + + Language change successful + Idioma alterado com sucesso + + + + Restart Panda3DS for the new language to be used. + Para aplicar o novo idioma feche e abra o emulador. + + + + Language change failed + A mudaça de idioma falhou + + + + The language you selected is not included in Panda3DS. If you're seeing this, someone messed up the language UI code... + O idioma selecionado não existe no Panda3DS. Se você esta vendo esse erro existe um erro no codigo... + + + + MainWindow + + + Alber + Alber + + + + File + Arquivo + + + + Emulation + Emulação + + + + Tools + Ferramentas + + + + About + Sobre + + + + Load game + Carregar jogo + + + + Load Lua script + Carregar Script Lua + + + + Open Panda3DS folder + Abrir pasta do Panda3DS + + + + Pause + Pausar + + + + Resume + Continuar + + + + Reset + Reiniciar + + + + Configure + Configurar + + + + Dump RomFS + Extrair RomFS + + + + Open Lua Editor + Abrir editor Lua + + + + Open Cheats Editor + Abrir editor de trapaças + + + + Open Patch Window + Abrir janela de trapaças + + + + Open Shader Editor + Abrir editor de shaders + + + + Dump loaded DSP firmware + Extrair firmware DSP carregado + + + + About Panda3DS + Sobre o Panda3DS + + + + Select 3DS ROM to load + Selecione uma ROM de 3DS para carregar + + + + Nintendo 3DS ROMs (*.3ds *.cci *.cxi *.app *.ncch *.3dsx *.elf *.axf) + ROMs de Nintendo 3DS (*.3ds *.cci *.cxi *.app *.ncch *.3dsx *.elf *.axf) + + + + Select Lua script to load + Selecione uma ROM de 3DS para carregar + + + + Lua scripts (*.lua *.txt) + Scripts Lua (*.lua *.txt) + + + + Select folder to dump RomFS files to + Selecione onde a RomFS será extraida + + + + Invalid format for RomFS dumping + Formato de RomFS inválido + + + + The currently loaded app is not in a format that supports RomFS + O aplicativo carregado não suporta RomFS + + + + + + OK + OK + + + + No RomFS found + RomFS no encontrado + + + + No RomFS partition was found in the loaded app + A partição RomFS não foi encontrada no aplicativo + + + + Select file + Selecionar arquivo + + + + DSP firmware file (*.cdc) + Arquivo do firmware DSP (*.cdc) + + + + No DSP firmware loaded + Nenhum firmware DSP carregado + + + + The currently loaded app has not uploaded a firmware to the DSP + A aplicação não carregou um firmware DSP + + + + Failed to open output file + Erro ao abrir arquvio de destino + + + + The currently loaded DSP firmware could not be written to the selected file. Please make sure you have permission to access this file + O firmware DSP carregado não pode escrever no arquivo selecionado. Porfavor veja se você tem permissão para modificalo-lo. + + + + PatchWindow + + + ROM patcher + Editor de ROM + + + + Select input file + Selecione o arquivo de entrada + + + + + Select + Selecionar + + + + + Select patch file + Seleciona um arquivo de patch + + + + Apply patch + Aplicar patch + + + + Select file to patch + Selecione um arquivo para editar + + + + + All files (*.*) + Todos os arquivos (*.*) + + + + Patch files (*.ips *.ups *.bps) + Arquivos de patch (*.ips *.ups *.bps) + + + + Paths not provided correctly + Diretórios não fornecidos corretamente + + + + Please provide paths for both the input file and the patch file + Por favor selecione os diretórios tanto para o arquivo de origem como para o patch + + + + Select file + Selecionar arquivo + + + + No output path + Sem diretório de saida + + + + No path was provided for the output file, no patching was done + Nenhum diretorio de destino foi fornecido, patch não aplicado. + + + + Unknown patch format + Formato de patch desconhecido + + + + Unknown format for patch file. Currently IPS, UPS and BPS are supported + Arquivo de patch inválido. Atualmete são suportado patches nos formatos IPS, UPS e BPS + + + + Failed to open input files + Falha ao abrir os arquivos + + + + Make sure they're in a directory Panda3DS has access to + Certifique-se de que eles estejam em um diretório ao qual o Panda3DS tenha acesso + + + + Patching Success + Patch aplicado + + + + Your file was patched successfully. + O patch foi aplicado com sucesso ao arquivo + + + + Checksum mismatch + Checagem inválido + + + + Patch was applied successfully but a checksum mismatch was detected. The input or output files might not be correct + O patch foi aplicado com sucesso porem a checagem falhou. O arquivo de origem ou destino pode não estar correto. + + + + Patching error + Erro de patch + + + + An error occured while patching + Um erro ocorreu ao aplicar o patch + + + + PatchWindow::PatchWindow + + + OK + OK + + + + ShaderEditorWindow + + + Reload shader + Recargar shader + + + + TextEditorWindow + + + Lua Editor + Editor Lua + + + + Load script + Cargar script + + + + diff --git a/include/PICA/draw_acceleration.hpp b/include/PICA/draw_acceleration.hpp new file mode 100644 index 00000000..6a66cdc1 --- /dev/null +++ b/include/PICA/draw_acceleration.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include + +#include "helpers.hpp" + +namespace PICA { + struct DrawAcceleration { + static constexpr u32 maxAttribCount = 16; + static constexpr u32 maxLoaderCount = 12; + + struct AttributeInfo { + u32 offset; + u32 stride; + + u8 type; + u8 componentCount; + + std::array fixedValue; // For fixed attributes + }; + + struct Loader { + // Data to upload for this loader + u8* data; + usize size; + }; + + u8* indexBuffer; + + // Minimum and maximum index in the index buffer for a draw call + u16 minimumIndex, maximumIndex; + u32 totalAttribCount; + u32 totalLoaderCount; + u32 enabledAttributeMask; + u32 fixedAttributes; + u32 vertexDataSize; + + std::array attributeInfo; + std::array loaders; + + bool canBeAccelerated; + bool indexed; + bool useShortIndices; + }; +} // namespace PICA \ No newline at end of file diff --git a/include/PICA/dynapica/pica_recs.hpp b/include/PICA/dynapica/pica_recs.hpp index acfd226e..eb0cf404 100644 --- a/include/PICA/dynapica/pica_recs.hpp +++ b/include/PICA/dynapica/pica_recs.hpp @@ -2,7 +2,7 @@ #include "helpers.hpp" #include "vertex_loader_rec.hpp" -// Common file for our PICA JITs (From vertex config -> CPU assembly and from PICA shader -> CPU assembly) +// Common file for our PICA JITs (From PICA shader -> CPU assembly) namespace Dynapica { #ifdef PANDA3DS_DYNAPICA_SUPPORTED diff --git a/include/PICA/dynapica/shader_rec.hpp b/include/PICA/dynapica/shader_rec.hpp index 2dabc128..a242d02f 100644 --- a/include/PICA/dynapica/shader_rec.hpp +++ b/include/PICA/dynapica/shader_rec.hpp @@ -22,8 +22,11 @@ class ShaderJIT { ShaderCache cache; #endif + bool accurateMul = false; public: + void setAccurateMul(bool value) { accurateMul = value; } + #ifdef PANDA3DS_SHADER_JIT_SUPPORTED // Call this before starting to process a batch of vertices // This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader @@ -36,11 +39,11 @@ class ShaderJIT { static constexpr bool isAvailable() { return true; } #else void prepare(PICAShader& shaderUnit) { - Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit"); + Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit"); } void run(PICAShader& shaderUnit) { - Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit"); + Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit"); } // Define dummy callback. This should never be called if the shader JIT is not supported diff --git a/include/PICA/dynapica/shader_rec_emitter_arm64.hpp b/include/PICA/dynapica/shader_rec_emitter_arm64.hpp index 7411c430..9351f383 100644 --- a/include/PICA/dynapica/shader_rec_emitter_arm64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_arm64.hpp @@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator { // Shows whether the loaded shader has any log2 and exp2 instructions bool codeHasLog2 = false; bool codeHasExp2 = false; + // Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul + bool useSafeMUL = false; oaknut::Label log2Func, exp2Func; oaknut::Label emitLog2Func(); @@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator { PrologueCallback prologueCb = nullptr; // Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer - ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {} + ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {} // PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer(instructionLabels.at(pc)); } diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index 0338911c..a43bd2dc 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -32,6 +32,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator { Label negateVector; // Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i) Label onesVector; + // Vector value of (0xFF, 0xFF, 0xFF, 0) for setting the w component to 0 in DP3 + Label dp3Vector; u32 recompilerPC = 0; // PC the recompiler is currently recompiling @ u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop) @@ -43,12 +45,17 @@ class ShaderEmitter : public Xbyak::CodeGenerator { // Shows whether the loaded shader has any log2 and exp2 instructions bool codeHasLog2 = false; bool codeHasExp2 = false; + // Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul + bool useSafeMUL = false; Xbyak::Label log2Func, exp2Func; Xbyak::Label emitLog2Func(); Xbyak::Label emitExp2Func(); Xbyak::util::Cpu cpuCaps; + // Emit a PICA200-compliant multiplication that handles "0 * inf = 0" + void emitSafeMUL(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch); + // Compile all instructions from [current recompiler PC, end) void compileUntil(const PICAShader& shaderUnit, u32 endPC); // Compile instruction "instr" @@ -125,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator { PrologueCallback prologueCb = nullptr; // Initialize our emitter with "allocSize" bytes of RWX memory - ShaderEmitter() : Xbyak::CodeGenerator(allocSize) { + ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) { cpuCaps = Xbyak::util::Cpu(); haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41); diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 61020f76..c168a9bf 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include "PICA/draw_acceleration.hpp" #include "PICA/dynapica/shader_rec.hpp" #include "PICA/float_types.hpp" #include "PICA/pica_vertex.hpp" @@ -13,6 +14,12 @@ #include "memory.hpp" #include "renderer.hpp" +enum class ShaderExecMode { + Interpreter, // Interpret shaders on the CPU + JIT, // Recompile shaders to CPU machine code + Hardware, // Recompiler shaders to host shaders and run them on the GPU +}; + class GPU { static constexpr u32 regNum = 0x300; static constexpr u32 extRegNum = 0x1000; @@ -45,7 +52,7 @@ class GPU { uint immediateModeVertIndex; uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading - template + template void drawArrays(); // Silly method of avoiding linking problems. TODO: Change to something less silly @@ -81,6 +88,7 @@ class GPU { std::unique_ptr renderer; PICA::Vertex getImmediateModeVertex(); + void getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed); public: // 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT // Encoded in PICA native format @@ -92,6 +100,9 @@ class GPU { // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; + bool fogLUTDirty = false; + std::array fogLUT; + GPU(Memory& mem, EmulatorConfig& config); void display() { renderer->display(); } void screenshot(const std::string& name) { renderer->screenshot(name); } @@ -164,7 +175,8 @@ class GPU { u32 index = paddr - PhysicalAddrs::VRAM; return (T*)&vram[index]; } else [[unlikely]] { - Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr); + Helpers::warn("[GPU] Tried to access unknown physical address: %08X", paddr); + return nullptr; } } diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp new file mode 100644 index 00000000..7b63a7b5 --- /dev/null +++ b/include/PICA/pica_frag_config.hpp @@ -0,0 +1,257 @@ +#pragma once +#include +#include +#include +#include + +#include "PICA/pica_hash.hpp" +#include "PICA/regs.hpp" +#include "bitfield.hpp" +#include "helpers.hpp" + +namespace PICA { + struct OutputConfig { + union { + u32 raw{}; + // Merge the enable + compare function into 1 field to avoid duplicate shaders + // enable == off means a CompareFunction of Always + BitField<0, 3, CompareFunction> alphaTestFunction; + BitField<3, 1, u32> depthMapEnable; + BitField<4, 4, LogicOpMode> logicOpMode; + }; + }; + + struct TextureConfig { + u32 texUnitConfig; + u32 texEnvUpdateBuffer; + + // There's 6 TEV stages, and each one is configured via 4 word-sized registers + // (+ the constant color register, which we don't include here, otherwise we'd generate too many shaders) + std::array tevConfigs; + }; + + struct FogConfig { + union { + u32 raw{}; + + BitField<0, 3, FogMode> mode; + BitField<3, 1, u32> flipDepth; + }; + }; + + struct Light { + union { + u16 raw; + BitField<0, 3, u16> num; + BitField<3, 1, u16> directional; + BitField<4, 1, u16> twoSidedDiffuse; + BitField<5, 1, u16> distanceAttenuationEnable; + BitField<6, 1, u16> spotAttenuationEnable; + BitField<7, 1, u16> geometricFactor0; + BitField<8, 1, u16> geometricFactor1; + BitField<9, 1, u16> shadowEnable; + }; + }; + + struct LightingLUTConfig { + union { + u32 raw; + BitField<0, 1, u32> enable; + BitField<1, 1, u32> absInput; + BitField<2, 3, u32> type; + BitField<5, 3, u32> scale; + }; + }; + + struct LightingConfig { + union { + u32 raw{}; + BitField<0, 1, u32> enable; + BitField<1, 4, u32> lightNum; + BitField<5, 2, u32> bumpMode; + BitField<7, 2, u32> bumpSelector; + BitField<9, 1, u32> bumpRenorm; + BitField<10, 1, u32> clampHighlights; + BitField<11, 4, u32> config; + BitField<15, 1, u32> enablePrimaryAlpha; + BitField<16, 1, u32> enableSecondaryAlpha; + BitField<17, 1, u32> enableShadow; + BitField<18, 1, u32> shadowPrimary; + BitField<19, 1, u32> shadowSecondary; + BitField<20, 1, u32> shadowInvert; + BitField<21, 1, u32> shadowAlpha; + BitField<22, 2, u32> shadowSelector; + }; + + std::array luts{}; + + std::array lights{}; + + LightingConfig(const std::array& regs) { + // Ignore lighting registers if it's disabled + if ((regs[InternalRegs::LightingEnable] & 1) == 0) { + return; + } + + const u32 config0 = regs[InternalRegs::LightConfig0]; + const u32 config1 = regs[InternalRegs::LightConfig1]; + const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1; + + enable = 1; + lightNum = totalLightCount; + + enableShadow = Helpers::getBit<0>(config0); + if (enableShadow) [[unlikely]] { + shadowPrimary = Helpers::getBit<16>(config0); + shadowSecondary = Helpers::getBit<17>(config0); + shadowInvert = Helpers::getBit<18>(config0); + shadowAlpha = Helpers::getBit<19>(config0); + shadowSelector = Helpers::getBits<24, 2>(config0); + } + + enablePrimaryAlpha = Helpers::getBit<2>(config0); + enableSecondaryAlpha = Helpers::getBit<3>(config0); + config = Helpers::getBits<4, 4>(config0); + + bumpSelector = Helpers::getBits<22, 2>(config0); + clampHighlights = Helpers::getBit<27>(config0); + bumpMode = Helpers::getBits<28, 2>(config0); + bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor + + for (int i = 0; i < totalLightCount; i++) { + auto& light = lights[i]; + light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7; + + const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num]; + light.directional = Helpers::getBit<0>(lightConfig); + light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig); + light.geometricFactor0 = Helpers::getBit<2>(lightConfig); + light.geometricFactor1 = Helpers::getBit<3>(lightConfig); + + light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled + light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here + light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here + } + + LightingLUTConfig& d0 = luts[Lights::LUT_D0]; + LightingLUTConfig& d1 = luts[Lights::LUT_D1]; + LightingLUTConfig& sp = luts[spotlightLutIndex]; + LightingLUTConfig& fr = luts[Lights::LUT_FR]; + LightingLUTConfig& rb = luts[Lights::LUT_RB]; + LightingLUTConfig& rg = luts[Lights::LUT_RG]; + LightingLUTConfig& rr = luts[Lights::LUT_RR]; + + d0.enable = Helpers::getBit<16>(config1) == 0; + d1.enable = Helpers::getBit<17>(config1) == 0; + fr.enable = Helpers::getBit<19>(config1) == 0; + rb.enable = Helpers::getBit<20>(config1) == 0; + rg.enable = Helpers::getBit<21>(config1) == 0; + rr.enable = Helpers::getBit<22>(config1) == 0; + sp.enable = 1; + + const u32 lutAbs = regs[InternalRegs::LightLUTAbs]; + const u32 lutSelect = regs[InternalRegs::LightLUTSelect]; + const u32 lutScale = regs[InternalRegs::LightLUTScale]; + + if (d0.enable) { + d0.absInput = Helpers::getBit<1>(lutAbs) == 0; + d0.type = Helpers::getBits<0, 3>(lutSelect); + d0.scale = Helpers::getBits<0, 3>(lutScale); + } + + if (d1.enable) { + d1.absInput = Helpers::getBit<5>(lutAbs) == 0; + d1.type = Helpers::getBits<4, 3>(lutSelect); + d1.scale = Helpers::getBits<4, 3>(lutScale); + } + + sp.absInput = Helpers::getBit<9>(lutAbs) == 0; + sp.type = Helpers::getBits<8, 3>(lutSelect); + sp.scale = Helpers::getBits<8, 3>(lutScale); + + if (fr.enable) { + fr.absInput = Helpers::getBit<13>(lutAbs) == 0; + fr.type = Helpers::getBits<12, 3>(lutSelect); + fr.scale = Helpers::getBits<12, 3>(lutScale); + } + + if (rb.enable) { + rb.absInput = Helpers::getBit<17>(lutAbs) == 0; + rb.type = Helpers::getBits<16, 3>(lutSelect); + rb.scale = Helpers::getBits<16, 3>(lutScale); + } + + if (rg.enable) { + rg.absInput = Helpers::getBit<21>(lutAbs) == 0; + rg.type = Helpers::getBits<20, 3>(lutSelect); + rg.scale = Helpers::getBits<20, 3>(lutScale); + } + + if (rr.enable) { + rr.absInput = Helpers::getBit<25>(lutAbs) == 0; + rr.type = Helpers::getBits<24, 3>(lutSelect); + rr.scale = Helpers::getBits<24, 3>(lutScale); + } + } + }; + + // Config used for identifying unique fragment pipeline configurations + struct FragmentConfig { + OutputConfig outConfig; + TextureConfig texConfig; + FogConfig fogConfig; + LightingConfig lighting; + + bool operator==(const FragmentConfig& config) const { + // Hash function and equality operator required by std::unordered_map + return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; + } + + FragmentConfig(const std::array& regs) : lighting(regs) { + auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; + auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); + + outConfig.alphaTestFunction = + (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; + outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1; + + // Shows if blending is enabled. If it is not enabled, then logic ops are enabled instead + const bool blendingEnabled = (regs[InternalRegs::ColourOperation] & (1 << 8)) != 0; + outConfig.logicOpMode = blendingEnabled ? LogicOpMode::Copy : LogicOpMode(Helpers::getBits<0, 4>(regs[InternalRegs::LogicOp])); + + texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + + // Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like + // {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO +#define setupTevStage(stage) \ + std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \ + texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4]; + + setupTevStage(0); + setupTevStage(1); + setupTevStage(2); + setupTevStage(3); + setupTevStage(4); + setupTevStage(5); +#undef setupTevStage + + fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]); + + if (fogConfig.mode == FogMode::Fog) { + fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]); + } + } + }; + + static_assert( + std::has_unique_object_representations() && std::has_unique_object_representations() && + std::has_unique_object_representations() && std::has_unique_object_representations() + ); +} // namespace PICA + +// Override std::hash for our fragment config class +template <> +struct std::hash { + std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); } +}; \ No newline at end of file diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp new file mode 100644 index 00000000..781fdcd3 --- /dev/null +++ b/include/PICA/pica_frag_uniforms.hpp @@ -0,0 +1,47 @@ +#pragma once +#include +#include +#include + +#include "helpers.hpp" + +namespace PICA { + struct LightUniform { + using vec3 = std::array; + + // std140 requires vec3s be aligned to 16 bytes + alignas(16) vec3 specular0; + alignas(16) vec3 specular1; + alignas(16) vec3 diffuse; + alignas(16) vec3 ambient; + alignas(16) vec3 position; + alignas(16) vec3 spotlightDirection; + + float distanceAttenuationBias; + float distanceAttenuationScale; + }; + + struct FragmentUniforms { + using vec3 = std::array; + using vec4 = std::array; + static constexpr usize tevStageCount = 6; + + s32 alphaReference; + float depthScale; + float depthOffset; + + alignas(16) vec4 constantColors[tevStageCount]; + alignas(16) vec4 tevBufferColor; + alignas(16) vec4 clipCoords; + + // Note: We upload these as a u32 and decode on GPU. + // Particularly the fog colour since fog is really uncommon and it doesn't matter if we decode on GPU. + u32 globalAmbientLight; + u32 fogColor; + // NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it + LightUniform lightUniforms[8]; + }; + + // Assert that lightUniforms is the last member of the structure + static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms)); +} // namespace PICA \ No newline at end of file diff --git a/include/PICA/pica_simd.hpp b/include/PICA/pica_simd.hpp new file mode 100644 index 00000000..efb00d43 --- /dev/null +++ b/include/PICA/pica_simd.hpp @@ -0,0 +1,275 @@ +#pragma once +#include +#include +#include + +#include "compiler_builtins.hpp" +#include "helpers.hpp" + +#if defined(_M_AMD64) || defined(__x86_64__) +#define PICA_SIMD_X64 +#include +#elif defined(_M_ARM64) || defined(__aarch64__) +#define PICA_SIMD_ARM64 +#include +#endif + +// Optimized functions for analyzing PICA index buffers (Finding minimum and maximum index values inside them) +namespace PICA::IndexBuffer { + // Non-SIMD, portable algorithm + template + std::pair analyzePortable(u8* indexBuffer, u32 vertexCount) { + u16 minimumIndex = std::numeric_limits::max(); + u16 maximumIndex = 0; + + // Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them + if constexpr (useShortIndices) { + u16* indexBuffer16 = reinterpret_cast(indexBuffer); + + for (u32 i = 0; i < vertexCount; i++) { + u16 index = indexBuffer16[i]; + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + } else { + for (u32 i = 0; i < vertexCount; i++) { + u16 index = u16(indexBuffer[i]); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + } + + return {minimumIndex, maximumIndex}; + } + +#ifdef PICA_SIMD_ARM64 + template + ALWAYS_INLINE std::pair analyzeNEON(u8* indexBuffer, u32 vertexCount) { + // We process 16 bytes per iteration, which is 8 vertices if we're using u16 indices or 16 vertices if we're using u8 indices + constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16; + + if (vertexCount < vertsPerLoop) { + return analyzePortable(indexBuffer, vertexCount); + } + + u16 minimumIndex, maximumIndex; + + if constexpr (useShortIndices) { + // 16-bit indices + uint16x8_t minima = vdupq_n_u16(0xffff); + uint16x8_t maxima = vdupq_n_u16(0); + + while (vertexCount >= vertsPerLoop) { + const uint16x8_t data = vld1q_u16(reinterpret_cast(indexBuffer)); + minima = vminq_u16(data, minima); + maxima = vmaxq_u16(data, maxima); + + indexBuffer += 16; + vertexCount -= vertsPerLoop; + } + + // Do horizontal min/max operations to get the actual minimum and maximum from all the vertices we processed with SIMD + // We want to gather the actual minimum and maximum in the line bottom lane of the minima/maxima vectors + // uint16x4_t foldedMinima1 = vmin_u16(vget_high_u16(minima), vget_low_u16(minima)); + // uint16x4_t foldedMaxima1 = vmax_u16(vget_high_u16(maxima), vget_low_u16(maxima)); + + uint16x8_t foldedMinima1 = vpminq_u16(minima, minima); + uint16x8_t foldedMinima2 = vpminq_u16(foldedMinima1, foldedMinima1); + uint16x8_t foldedMinima3 = vpminq_u16(foldedMinima2, foldedMinima2); + + uint16x8_t foldedMaxima1 = vpmaxq_u16(maxima, maxima); + uint16x8_t foldedMaxima2 = vpmaxq_u16(foldedMaxima1, foldedMaxima1); + uint16x8_t foldedMaxima3 = vpmaxq_u16(foldedMaxima2, foldedMaxima2); + + minimumIndex = vgetq_lane_u16(foldedMinima3, 0); + maximumIndex = vgetq_lane_u16(foldedMaxima3, 0); + } else { + // 8-bit indices + uint8x16_t minima = vdupq_n_u8(0xff); + uint8x16_t maxima = vdupq_n_u8(0); + + while (vertexCount >= vertsPerLoop) { + uint8x16_t data = vld1q_u8(indexBuffer); + minima = vminq_u8(data, minima); + maxima = vmaxq_u8(data, maxima); + + indexBuffer += 16; + vertexCount -= vertsPerLoop; + } + + // Do a similar horizontal min/max as in the u16 case, except now we're working uint8x16 instead of uint16x4 so we need 4 folds + uint8x16_t foldedMinima1 = vpminq_u8(minima, minima); + uint8x16_t foldedMinima2 = vpminq_u8(foldedMinima1, foldedMinima1); + uint8x16_t foldedMinima3 = vpminq_u8(foldedMinima2, foldedMinima2); + uint8x16_t foldedMinima4 = vpminq_u8(foldedMinima3, foldedMinima3); + + uint8x16_t foldedMaxima1 = vpmaxq_u8(maxima, maxima); + uint8x16_t foldedMaxima2 = vpmaxq_u8(foldedMaxima1, foldedMaxima1); + uint8x16_t foldedMaxima3 = vpmaxq_u8(foldedMaxima2, foldedMaxima2); + uint8x16_t foldedMaxima4 = vpmaxq_u8(foldedMaxima3, foldedMaxima3); + + minimumIndex = u16(vgetq_lane_u8(foldedMinima4, 0)); + maximumIndex = u16(vgetq_lane_u8(foldedMaxima4, 0)); + } + + // If any indices could not be processed cause the buffer size is not 16-byte aligned, process them the naive way + // Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them + while (vertexCount > 0) { + if constexpr (useShortIndices) { + u16 index = *reinterpret_cast(indexBuffer); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + indexBuffer += 2; + } else { + u16 index = u16(*indexBuffer++); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + + vertexCount -= 1; + } + + return {minimumIndex, maximumIndex}; + } +#endif + +#if defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) + template + ALWAYS_INLINE std::pair analyzeSSE4_1(u8* indexBuffer, u32 vertexCount) { + // We process 16 bytes per iteration, which is 8 vertices if we're using u16 + // indices or 16 vertices if we're using u8 indices + constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16; + + if (vertexCount < vertsPerLoop) { + return analyzePortable(indexBuffer, vertexCount); + } + + u16 minimumIndex, maximumIndex; + + if constexpr (useShortIndices) { + // Calculate the horizontal minimum/maximum value across an SSE vector of 16-bit unsigned integers. + // Based on https://stackoverflow.com/a/22259607 + auto horizontalMin16 = [](__m128i vector) -> u16 { return u16(_mm_cvtsi128_si32(_mm_minpos_epu16(vector))); }; + + auto horizontalMax16 = [](__m128i vector) -> u16 { + // We have an instruction to compute horizontal minimum but not maximum, so we use it. + // To use it, we have to subtract each value from 0xFFFF (which we do with an xor), then execute a horizontal minimum + __m128i flipped = _mm_xor_si128(vector, _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu)); + u16 min = u16(_mm_cvtsi128_si32(_mm_minpos_epu16(flipped))); + return u16(min ^ 0xffff); + }; + + // 16-bit indices + // Initialize the minima vector to all FFs (So 0xFFFF for each 16-bit lane) + // And the maxima vector to all 0s (0 for each 16-bit lane) + __m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu); + __m128i maxima = _mm_set_epi32(0, 0, 0, 0); + + while (vertexCount >= vertsPerLoop) { + const __m128i data = _mm_loadu_si128(reinterpret_cast(indexBuffer)); + minima = _mm_min_epu16(data, minima); + maxima = _mm_max_epu16(data, maxima); + + indexBuffer += 16; + vertexCount -= vertsPerLoop; + } + + minimumIndex = u16(horizontalMin16(minima)); + maximumIndex = u16(horizontalMax16(maxima)); + } else { + // Calculate the horizontal minimum/maximum value across an SSE vector of 8-bit unsigned integers. + // Based on https://stackoverflow.com/a/22259607 + auto horizontalMin8 = [](__m128i vector) -> u8 { + vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2))); + vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1))); + vector = _mm_min_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1))); + vector = _mm_min_epu8(vector, _mm_srli_epi16(vector, 8)); + return u8(_mm_cvtsi128_si32(vector)); + }; + + auto horizontalMax8 = [](__m128i vector) -> u8 { + vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2))); + vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1))); + vector = _mm_max_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1))); + vector = _mm_max_epu8(vector, _mm_srli_epi16(vector, 8)); + return u8(_mm_cvtsi128_si32(vector)); + }; + + // 8-bit indices + // Initialize the minima vector to all FFs (So 0xFF for each 8-bit lane) + // And the maxima vector to all 0s (0 for each 8-bit lane) + __m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu); + __m128i maxima = _mm_set_epi32(0, 0, 0, 0); + + while (vertexCount >= vertsPerLoop) { + const __m128i data = _mm_loadu_si128(reinterpret_cast(indexBuffer)); + minima = _mm_min_epu8(data, minima); + maxima = _mm_max_epu8(data, maxima); + + indexBuffer += 16; + vertexCount -= vertsPerLoop; + } + + minimumIndex = u16(horizontalMin8(minima)); + maximumIndex = u16(horizontalMax8(maxima)); + } + + // If any indices could not be processed cause the buffer size + // is not 16-byte aligned, process them the naive way + // Calculate the minimum and maximum indices used in the index + // buffer, so we'll only upload them + while (vertexCount > 0) { + if constexpr (useShortIndices) { + u16 index = *reinterpret_cast(indexBuffer); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + indexBuffer += 2; + } else { + u16 index = u16(*indexBuffer++); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + + vertexCount -= 1; + } + + return {minimumIndex, maximumIndex}; + } +#endif + + // Analyzes a PICA index buffer to get the minimum and maximum indices in the + // buffer, and returns them in a pair in the form [min, max]. Takes a template + // parameter to decide whether the indices in the buffer are u8 or u16 + template + std::pair analyze(u8* indexBuffer, u32 vertexCount) { +#if defined(PICA_SIMD_ARM64) + return analyzeNEON(indexBuffer, vertexCount); +#elif defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) + // Annoyingly, MSVC refuses to define __SSE4_1__ even when we're building with AVX + return analyzeSSE4_1(indexBuffer, vertexCount); +#else + return analyzePortable(indexBuffer, vertexCount); +#endif + } + + // In some really unfortunate scenarios (eg Android Studio emulator), we don't have access to glDrawRangeElementsBaseVertex + // So we need to subtract the base vertex index from every index in the index buffer ourselves + // This is not really common, so we do it without SIMD for the moment, just to be able to run on Android Studio + template + void subtractBaseIndex(u8* indexBuffer, u32 indexCount, u16 baseIndex) { + // Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them + if constexpr (useShortIndices) { + u16* indexBuffer16 = reinterpret_cast(indexBuffer); + + for (u32 i = 0; i < indexCount; i++) { + indexBuffer16[i] -= baseIndex; + } + } else { + u8 baseIndex8 = u8(baseIndex); + + for (u32 i = 0; i < indexCount; i++) { + indexBuffer[i] -= baseIndex8; + } + } + } +} // namespace PICA::IndexBuffer diff --git a/include/PICA/pica_vert_config.hpp b/include/PICA/pica_vert_config.hpp new file mode 100644 index 00000000..4300e454 --- /dev/null +++ b/include/PICA/pica_vert_config.hpp @@ -0,0 +1,57 @@ +#pragma once +#include +#include +#include +#include +#include + +#include "PICA/pica_hash.hpp" +#include "PICA/regs.hpp" +#include "PICA/shader.hpp" +#include "bitfield.hpp" +#include "helpers.hpp" + +namespace PICA { + // Configuration struct used + struct VertConfig { + PICAHash::HashType shaderHash; + PICAHash::HashType opdescHash; + u32 entrypoint; + + // PICA registers for configuring shader output->fragment semantic mapping + std::array outmaps{}; + u16 outputMask; + u8 outputCount; + bool usingUbershader; + + // Pad to 56 bytes so that the compiler won't insert unnecessary padding, which in turn will affect our unordered_map lookup + // As the padding will get hashed and memcmp'd... + u32 pad{}; + + bool operator==(const VertConfig& config) const { + // Hash function and equality operator required by std::unordered_map + return std::memcmp(this, &config, sizeof(VertConfig)) == 0; + } + + VertConfig(PICAShader& shader, const std::array& regs, bool usingUbershader) : usingUbershader(usingUbershader) { + shaderHash = shader.getCodeHash(); + opdescHash = shader.getOpdescHash(); + entrypoint = shader.entrypoint; + + outputCount = regs[PICA::InternalRegs::ShaderOutputCount] & 7; + outputMask = regs[PICA::InternalRegs::VertexShaderOutputMask]; + for (int i = 0; i < outputCount; i++) { + // Mask out unused bits + outmaps[i] = regs[PICA::InternalRegs::ShaderOutmap0 + i] & 0x1F1F1F1F; + } + } + }; +} // namespace PICA + +static_assert(sizeof(PICA::VertConfig) == 56); + +// Override std::hash for our vertex config class +template <> +struct std::hash { + std::size_t operator()(const PICA::VertConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); } +}; \ No newline at end of file diff --git a/include/PICA/pica_vertex.hpp b/include/PICA/pica_vertex.hpp index 800dff9a..bbb32edb 100644 --- a/include/PICA/pica_vertex.hpp +++ b/include/PICA/pica_vertex.hpp @@ -1,7 +1,8 @@ #pragma once -#include "PICA/float_types.hpp" #include +#include "PICA/float_types.hpp" + namespace PICA { // A representation of the output vertex as it comes out of the vertex shader, with padding and all struct Vertex { diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 4342ebe5..3185d350 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -51,6 +51,18 @@ namespace PICA { #undef defineTexEnv // clang-format on + // Fog registers + FogColor = 0xE1, + FogLUTIndex = 0xE6, + FogLUTData0 = 0xE8, + FogLUTData1 = 0xE9, + FogLUTData2 = 0xEA, + FogLUTData3 = 0xEB, + FogLUTData4 = 0xEC, + FogLUTData5 = 0xED, + FogLUTData6 = 0xEE, + FogLUTData7 = 0xEF, + // Framebuffer registers ColourOperation = 0x100, BlendFunc = 0x101, @@ -67,7 +79,29 @@ namespace PICA { ColourBufferLoc = 0x11D, FramebufferSize = 0x11E, - //LightingRegs + // Lighting registers + LightingEnable = 0x8F, + Light0Specular0 = 0x140, + Light0Specular1 = 0x141, + Light0Diffuse = 0x142, + Light0Ambient = 0x143, + Light0XY = 0x144, + Light0Z = 0x145, + Light0SpotlightXY = 0x146, + Light0SpotlightZ = 0x147, + Light0Config = 0x149, + Light0AttenuationBias = 0x14A, + Light0AttenuationScale = 0x14B, + + LightGlobalAmbient = 0x1C0, + LightNumber = 0x1C2, + LightConfig0 = 0x1C3, + LightConfig1 = 0x1C4, + LightPermutation = 0x1D9, + LightLUTAbs = 0x1D0, + LightLUTSelect = 0x1D1, + LightLUTScale = 0x1D2, + LightingLUTIndex = 0x01C5, LightingLUTData0 = 0x01C8, LightingLUTData1 = 0x01C9, @@ -231,7 +265,8 @@ namespace PICA { enum : u32 { LUT_D0 = 0, LUT_D1, - LUT_FR, + // LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders + LUT_FR = 0x3, LUT_RB, LUT_RG, LUT_RR, @@ -255,6 +290,11 @@ namespace PICA { }; } + // There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15) + // We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup + // This is particularly intuitive in several places, such as checking if a LUT is enabled + static constexpr int spotlightLutIndex = 2; + enum class TextureFmt : u32 { RGBA8 = 0x0, RGB8 = 0x1, @@ -345,4 +385,156 @@ namespace PICA { GeometryPrimitive = 3, }; + enum class CompareFunction : u32 { + Never = 0, + Always = 1, + Equal = 2, + NotEqual = 3, + Less = 4, + LessOrEqual = 5, + Greater = 6, + GreaterOrEqual = 7, + }; + + enum class LogicOpMode : u32 { + Clear = 0, + And = 1, + ReverseAnd = 2, + Copy = 3, + Set = 4, + InvertedCopy = 5, + Nop = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + InvertedAnd = 13, + ReverseOr = 14, + InvertedOr = 15, + }; + + enum class FogMode : u32 { + Disabled = 0, + Fog = 5, + Gas = 7, + }; + + struct TexEnvConfig { + enum class Source : u8 { + PrimaryColor = 0x0, + PrimaryFragmentColor = 0x1, + SecondaryFragmentColor = 0x2, + Texture0 = 0x3, + Texture1 = 0x4, + Texture2 = 0x5, + Texture3 = 0x6, + // TODO: Inbetween values are unknown + PreviousBuffer = 0xD, + Constant = 0xE, + Previous = 0xF, + }; + + enum class ColorOperand : u8 { + SourceColor = 0x0, + OneMinusSourceColor = 0x1, + SourceAlpha = 0x2, + OneMinusSourceAlpha = 0x3, + SourceRed = 0x4, + OneMinusSourceRed = 0x5, + // TODO: Inbetween values are unknown + SourceGreen = 0x8, + OneMinusSourceGreen = 0x9, + // Inbetween values are unknown + SourceBlue = 0xC, + OneMinusSourceBlue = 0xD, + }; + + enum class AlphaOperand : u8 { + SourceAlpha = 0x0, + OneMinusSourceAlpha = 0x1, + SourceRed = 0x2, + OneMinusSourceRed = 0x3, + SourceGreen = 0x4, + OneMinusSourceGreen = 0x5, + SourceBlue = 0x6, + OneMinusSourceBlue = 0x7, + }; + + enum class Operation : u8 { + Replace = 0, + Modulate = 1, + Add = 2, + AddSigned = 3, + Lerp = 4, + Subtract = 5, + Dot3RGB = 6, + Dot3RGBA = 7, + MultiplyAdd = 8, + AddMultiply = 9, + }; + + // RGB sources + Source colorSource1, colorSource2, colorSource3; + // Alpha sources + Source alphaSource1, alphaSource2, alphaSource3; + + // RGB operands + ColorOperand colorOperand1, colorOperand2, colorOperand3; + // Alpha operands + AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3; + + // Texture environment operations for this stage + Operation colorOp, alphaOp; + + u32 constColor; + + private: + // These are the only private members since their value doesn't actually reflect the scale + // So we make them public so we'll always use the appropriate member functions instead + u8 colorScale; + u8 alphaScale; + + public: + // Create texture environment object from TEV registers + TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) { + colorSource1 = Helpers::getBits<0, 4, Source>(source); + colorSource2 = Helpers::getBits<4, 4, Source>(source); + colorSource3 = Helpers::getBits<8, 4, Source>(source); + + alphaSource1 = Helpers::getBits<16, 4, Source>(source); + alphaSource2 = Helpers::getBits<20, 4, Source>(source); + alphaSource3 = Helpers::getBits<24, 4, Source>(source); + + colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand); + colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand); + colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand); + + alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand); + alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand); + alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand); + + colorOp = Helpers::getBits<0, 4, Operation>(combiner); + alphaOp = Helpers::getBits<16, 4, Operation>(combiner); + + colorScale = Helpers::getBits<0, 2>(scale); + alphaScale = Helpers::getBits<16, 2>(scale); + } + + u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; } + u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; } + + bool isPassthroughStage() { + // clang-format off + // Thank you to the Citra dev that wrote this out + return ( + colorOp == Operation::Replace && alphaOp == Operation::Replace && + colorSource1 == Source::Previous && alphaSource1 == Source::Previous && + colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha && + getColorScale() == 1 && getAlphaScale() == 1 + ); + // clang-format on + } + }; } // namespace PICA diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 10f6ec88..1040d2ff 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -1,6 +1,8 @@ #pragma once #include #include +#include +#include #include #include "PICA/float_types.hpp" @@ -21,7 +23,7 @@ namespace ShaderOpcodes { DST = 0x04, EX2 = 0x05, LG2 = 0x06, - LIT = 0x07, + LITP = 0x07, MUL = 0x08, SGE = 0x09, SLT = 0x0A, @@ -56,6 +58,10 @@ namespace ShaderOpcodes { }; } +namespace PICA::ShaderGen { + class ShaderDecompiler; +}; + // Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT class PICAShader { using f24 = Floats::f24; @@ -90,14 +96,22 @@ class PICAShader { public: // These are placed close to the temp registers and co because it helps the JIT generate better code u32 entrypoint = 0; // Initial shader PC - u32 boolUniform; - std::array, 4> intUniforms; + + // We want these registers in this order & with this alignment for uploading them directly to a UBO + // When emulating shaders on the GPU. Plus this alignment for float uniforms is necessary for doing SIMD in the shader->CPU recompilers. alignas(16) std::array floatUniforms; + alignas(16) std::array, 4> intUniforms; + u32 boolUniform; alignas(16) std::array fixedAttributes; // Fixed vertex attributes alignas(16) std::array inputs; // Attributes passed to the shader alignas(16) std::array outputs; alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT + + // We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT + // We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal + // Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first + using Hash = PICAHash::HashType; protected: std::array operandDescriptors; @@ -116,20 +130,20 @@ class PICAShader { std::array callInfo; ShaderType type; - // We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT - // We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal - // Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first - using Hash = PICAHash::HashType; - Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism) Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT) + public: + bool uniformsDirty = false; + + protected: bool codeHashDirty = false; bool opdescHashDirty = false; // Add these as friend classes for the JIT so it has access to all important state friend class ShaderJIT; friend class ShaderEmitter; + friend class PICA::ShaderGen::ShaderDecompiler; vec4f getSource(u32 source); vec4f& getDest(u32 dest); @@ -151,6 +165,7 @@ class PICAShader { void jmpc(u32 instruction); void jmpu(u32 instruction); void lg2(u32 instruction); + void litp(u32 instruction); void loop(u32 instruction); void mad(u32 instruction); void madi(u32 instruction); @@ -220,13 +235,9 @@ class PICAShader { public: static constexpr size_t maxInstructionCount = 4096; std::array loadedShader; // Currently loaded & active shader - std::array bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to PICAShader(ShaderType type) : type(type) {} - // Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them - void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); } - void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; } void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; } @@ -235,7 +246,7 @@ class PICAShader { Helpers::panic("o no, shader upload overflew"); } - bufferedShader[bufferIndex++] = word; + loadedShader[bufferIndex++] = word; bufferIndex &= 0xfff; codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed @@ -277,6 +288,7 @@ class PICAShader { uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16)); uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8); } + uniformsDirty = true; } } @@ -288,6 +300,12 @@ class PICAShader { u[1] = getBits<8, 8>(word); u[2] = getBits<16, 8>(word); u[3] = getBits<24, 8>(word); + uniformsDirty = true; + } + + void uploadBoolUniform(u32 value) { + boolUniform = value; + uniformsDirty = true; } void run(); @@ -295,4 +313,13 @@ class PICAShader { Hash getCodeHash(); Hash getOpdescHash(); -}; \ No newline at end of file + + // Returns how big the PICA uniforms are combined. Used for hw accelerated shaders where we upload the uniforms to our GPU. + static constexpr usize totalUniformSize() { return sizeof(floatUniforms) + sizeof(intUniforms) + sizeof(boolUniform); } + void* getUniformPointer() { return static_cast(&floatUniforms); } +}; + +static_assert( + offsetof(PICAShader, intUniforms) == offsetof(PICAShader, floatUniforms) + 96 * sizeof(float) * 4 && + offsetof(PICAShader, boolUniform) == offsetof(PICAShader, intUniforms) + 4 * sizeof(u8) * 4 +); \ No newline at end of file diff --git a/include/PICA/shader_decompiler.hpp b/include/PICA/shader_decompiler.hpp new file mode 100644 index 00000000..4a5cdc13 --- /dev/null +++ b/include/PICA/shader_decompiler.hpp @@ -0,0 +1,131 @@ +#pragma once +#include + +#include +#include +#include +#include +#include +#include + +#include "PICA/shader.hpp" +#include "PICA/shader_gen_types.hpp" + +struct EmulatorConfig; + +namespace PICA::ShaderGen { + // Control flow analysis is partially based on + // https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33 + struct ControlFlow { + // A continuous range of addresses + struct AddressRange { + u32 start, end; + AddressRange(u32 start, u32 end) : start(start), end(end) {} + + // Use lexicographic comparison for functions in order to sort them in a set + bool operator<(const AddressRange& other) const { return std::tie(start, end) < std::tie(other.start, other.end); } + }; + + struct Function { + using Labels = std::set; + + enum class ExitMode { + Unknown, // Can't guarantee whether we'll exit properly, fall back to CPU shaders (can happen with jmp shenanigans) + AlwaysReturn, // All paths reach the return point. + Conditional, // One or more code paths reach the return point or an END instruction conditionally. + AlwaysEnd, // All paths reach an END instruction. + }; + + u32 start; // Starting PC of the function + u32 end; // End PC of the function + Labels outLabels{}; // Labels this function can "goto" (jump) to + ExitMode exitMode = ExitMode::Unknown; + + explicit Function(u32 start, u32 end) : start(start), end(end) {} + bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); } + + std::string getIdentifier() const { return fmt::format("fn_{}_{}", start, end); } + // To handle weird control flow, we have to return from each function a bool that indicates whether or not the shader reached an end + // instruction and should thus terminate. This is necessary for games like Rayman and Gravity Falls, which have "END" instructions called + // from within functions deep in the callstack + std::string getForwardDecl() const { return fmt::format("bool fn_{}_{}();\n", start, end); } + std::string getCallStatement() const { return fmt::format("fn_{}_{}()", start, end); } + }; + + std::set functions{}; + std::map exitMap{}; + + // Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation + // On the CPU + bool analysisFailed = false; + + // This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions + const Function* addFunction(const PICAShader& shader, u32 start, u32 end) { + auto searchIterator = functions.find(Function(start, end)); + if (searchIterator != functions.end()) { + return &(*searchIterator); + } + + // Add this function and analyze it if it doesn't already exist + Function function(start, end); + function.exitMode = analyzeFunction(shader, start, end, function.outLabels); + + // This function could not be fully analyzed, report failure + if (function.exitMode == Function::ExitMode::Unknown) { + analysisFailed = true; + return nullptr; + } + + // Add function to our function list + auto [it, added] = functions.insert(std::move(function)); + return &(*it); + } + + void analyze(const PICAShader& shader, u32 entrypoint); + Function::ExitMode analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels); + }; + + class ShaderDecompiler { + using AddressRange = ControlFlow::AddressRange; + using Function = ControlFlow::Function; + + ControlFlow controlFlow{}; + + PICAShader& shader; + EmulatorConfig& config; + std::string decompiledShader; + + u32 entrypoint; + + API api; + Language language; + bool compilationError = false; + + void compileInstruction(u32& pc, bool& finished); + // Compile range "range" and returns the end PC or if we're "finished" with the program (called an END instruction) + std::pair compileRange(const AddressRange& range); + void callFunction(const Function& function); + const Function* findFunction(const AddressRange& range); + + void writeAttributes(); + + std::string getSource(u32 source, u32 index) const; + std::string getDest(u32 dest) const; + std::string getSwizzlePattern(u32 swizzle) const; + std::string getDestSwizzle(u32 destinationMask) const; + const char* getCondition(u32 cond, u32 refX, u32 refY); + + void setDest(u32 operandDescriptor, const std::string& dest, const std::string& value); + // Returns if the instruction uses the typical register encodings most instructions use + // With some exceptions like MAD/MADI, and the control flow instructions which are completely different + bool usesCommonEncoding(u32 instruction) const; + + public: + ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) + : shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {} + + std::string decompile(); + }; + + std::string decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language); +} // namespace PICA::ShaderGen \ No newline at end of file diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp new file mode 100644 index 00000000..b6751e05 --- /dev/null +++ b/include/PICA/shader_gen.hpp @@ -0,0 +1,43 @@ +#pragma once +#include + +#include "PICA/gpu.hpp" +#include "PICA/pica_frag_config.hpp" +#include "PICA/pica_vert_config.hpp" +#include "PICA/regs.hpp" +#include "PICA/shader_gen_types.hpp" +#include "helpers.hpp" + +namespace PICA::ShaderGen { + class FragmentGenerator { + API api; + Language language; + + void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config); + void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config); + void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config); + void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config); + void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + + void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config); + void compileLights(std::string& shader, const PICA::FragmentConfig& config); + void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID); + bool isSamplerEnabled(u32 environmentID, u32 lutID); + + void compileFog(std::string& shader, const PICA::FragmentConfig& config); + void compileLogicOps(std::string& shader, const PICA::FragmentConfig& config); + + public: + FragmentGenerator(API api, Language language) : api(api), language(language) {} + std::string generate(const PICA::FragmentConfig& config, void* driverInfo = nullptr); + std::string getDefaultVertexShader(); + // For when PICA shader is acceleration is enabled. Turn the PICA shader source into a proper vertex shader + std::string getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader); + + void setTarget(API api, Language language) { + this->api = api; + this->language = language; + } + }; +}; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/include/PICA/shader_gen_types.hpp b/include/PICA/shader_gen_types.hpp new file mode 100644 index 00000000..1877227f --- /dev/null +++ b/include/PICA/shader_gen_types.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace PICA::ShaderGen { + // Graphics API this shader is targetting + enum class API { GL, GLES, Vulkan }; + + // Shading language to use (Only GLSL for the time being) + enum class Language { GLSL }; +} // namespace PICA::ShaderGen \ No newline at end of file diff --git a/include/PICA/shader_unit.hpp b/include/PICA/shader_unit.hpp index d8d93160..80e01346 100644 --- a/include/PICA/shader_unit.hpp +++ b/include/PICA/shader_unit.hpp @@ -2,10 +2,9 @@ #include "PICA/shader.hpp" class ShaderUnit { - -public: - PICAShader vs; // Vertex shader - PICAShader gs; // Geometry shader + public: + PICAShader vs; // Vertex shader + PICAShader gs; // Geometry shader ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {} void reset(); diff --git a/include/align.hpp b/include/align.hpp new file mode 100644 index 00000000..8ce50bb2 --- /dev/null +++ b/include/align.hpp @@ -0,0 +1,100 @@ +// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include + +#include "compiler_builtins.hpp" +#include "helpers.hpp" + +#ifdef _WIN32 +#include +#endif + +namespace Common { + template + constexpr bool isAligned(T value, unsigned int alignment) { + return (value % static_cast(alignment)) == 0; + } + + template + constexpr T alignUp(T value, unsigned int alignment) { + return (value + static_cast(alignment - 1)) / static_cast(alignment) * static_cast(alignment); + } + + template + constexpr T alignDown(T value, unsigned int alignment) { + return value / static_cast(alignment) * static_cast(alignment); + } + + template + constexpr bool isAlignedPow2(T value, unsigned int alignment) { + return (value & static_cast(alignment - 1)) == 0; + } + + template + constexpr T alignUpPow2(T value, unsigned int alignment) { + return (value + static_cast(alignment - 1)) & static_cast(~static_cast(alignment - 1)); + } + + template + constexpr T alignDownPow2(T value, unsigned int alignment) { + return value & static_cast(~static_cast(alignment - 1)); + } + + template + constexpr bool isPow2(T value) { + return (value & (value - 1)) == 0; + } + + template + constexpr T previousPow2(T value) { + if (value == static_cast(0)) return 0; + + value |= (value >> 1); + value |= (value >> 2); + value |= (value >> 4); + if constexpr (sizeof(T) >= 16) value |= (value >> 8); + if constexpr (sizeof(T) >= 32) value |= (value >> 16); + if constexpr (sizeof(T) >= 64) value |= (value >> 32); + return value - (value >> 1); + } + + template + constexpr T nextPow2(T value) { + // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + if (value == static_cast(0)) return 0; + + value--; + value |= (value >> 1); + value |= (value >> 2); + value |= (value >> 4); + if constexpr (sizeof(T) >= 16) value |= (value >> 8); + if constexpr (sizeof(T) >= 32) value |= (value >> 16); + if constexpr (sizeof(T) >= 64) value |= (value >> 32); + value++; + return value; + } + + ALWAYS_INLINE static void* alignedMalloc(size_t size, size_t alignment) { +#ifdef _WIN32 + return _aligned_malloc(size, alignment); +#else + // Unaligned sizes are slow on macOS. +#ifdef __APPLE__ + if (isPow2(alignment)) size = (size + alignment - 1) & ~(alignment - 1); +#endif + void* ret = nullptr; + return (posix_memalign(&ret, alignment, size) == 0) ? ret : nullptr; +#endif + } + + ALWAYS_INLINE static void alignedFree(void* ptr) { +#ifdef _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif + } +} // namespace Common diff --git a/include/audio/aac.hpp b/include/audio/aac.hpp index afd2dbba..389ecc04 100644 --- a/include/audio/aac.hpp +++ b/include/audio/aac.hpp @@ -54,6 +54,15 @@ namespace Audio::AAC { u32_le sampleCount; }; + struct DecodeRequest { + u32_le address; // Address of input AAC stream + u32_le size; // Size of input AAC stream + u32_le destAddrLeft; // Output address for left channel samples + u32_le destAddrRight; // Output address for right channel samples + u32_le unknown1; + u32_le unknown2; + }; + struct Message { u16_le mode = Mode::None; // Encode or decode AAC? u16_le command = Command::Init; @@ -62,7 +71,9 @@ namespace Audio::AAC { // Info on the AAC request union { std::array commandData{}; + DecodeResponse decodeResponse; + DecodeRequest decodeRequest; }; }; diff --git a/include/audio/aac_decoder.hpp b/include/audio/aac_decoder.hpp new file mode 100644 index 00000000..8964e1f2 --- /dev/null +++ b/include/audio/aac_decoder.hpp @@ -0,0 +1,25 @@ +#pragma once +#include + +#include "audio/aac.hpp" +#include "helpers.hpp" + +struct AAC_DECODER_INSTANCE; + +namespace Audio::AAC { + class Decoder { + using DecoderHandle = AAC_DECODER_INSTANCE*; + using PaddrCallback = std::function; + + DecoderHandle decoderHandle = nullptr; + + bool isInitialized() { return decoderHandle != nullptr; } + void initialize(); + + public: + // Decode function. Takes in a reference to the AAC response & request, and a callback for paddr -> pointer conversions + // We also allow for optionally muting the AAC output (setting all of it to 0) instead of properly decoding it, for debug/research purposes + void decode(AAC::Message& response, const AAC::Message& request, PaddrCallback paddrCallback, bool enableAudio = true); + ~Decoder(); + }; +} // namespace Audio::AAC \ No newline at end of file diff --git a/include/audio/audio_interpolation.hpp b/include/audio/audio_interpolation.hpp new file mode 100644 index 00000000..8a87cbcd --- /dev/null +++ b/include/audio/audio_interpolation.hpp @@ -0,0 +1,58 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "audio/hle_mixer.hpp" +#include "helpers.hpp" + +namespace Audio::Interpolation { + // A variable length buffer of signed PCM16 stereo samples. + using StereoBuffer16 = std::deque>; + using StereoFrame16 = Audio::DSPMixer::StereoFrame; + + struct State { + // Two historical samples. + std::array xn1 = {}; //< x[n-1] + std::array xn2 = {}; //< x[n-2] + // Current fractional position. + u64 fposition = 0; + }; + + /** + * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay. + * @param state Interpolation state. + * @param input Input buffer. + * @param rate Stretch factor. Must be a positive non-zero value. + * rate > 1.0 performs decimation and rate < 1.0 performs upsampling. + * @param output The resampled audio buffer. + * @param outputi The index of output to start writing to. + */ + void none(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi); + + /** + * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay. + * @param state Interpolation state. + * @param input Input buffer. + * @param rate Stretch factor. Must be a positive non-zero value. + * rate > 1.0 performs decimation and rate < 1.0 performs upsampling. + * @param output The resampled audio buffer. + * @param outputi The index of output to start writing to. + */ + void linear(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi); + + /** + * Polyphase interpolation. This is currently stubbed to just perform linear interpolation + * @param state Interpolation state. + * @param input Input buffer. + * @param rate Stretch factor. Must be a positive non-zero value. + * rate > 1.0 performs decimation and rate < 1.0 performs upsampling. + * @param output The resampled audio buffer. + * @param outputi The index of output to start writing to. + */ + void polyphase(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi); +} // namespace Audio::Interpolation \ No newline at end of file diff --git a/include/audio/dsp_core.hpp b/include/audio/dsp_core.hpp index a4fb1ab1..f180e717 100644 --- a/include/audio/dsp_core.hpp +++ b/include/audio/dsp_core.hpp @@ -8,12 +8,13 @@ #include "helpers.hpp" #include "logger.hpp" -#include "scheduler.hpp" #include "ring_buffer.hpp" +#include "scheduler.hpp" // The DSP core must have access to the DSP service to be able to trigger interrupts properly class DSPService; class Memory; +struct EmulatorConfig; namespace Audio { // There are 160 stereo samples in 1 audio frame, so 320 samples total @@ -24,12 +25,14 @@ namespace Audio { static constexpr u64 lleSlice = 16384; class DSPCore { - using Samples = Common::RingBuffer; + // 0x2000 stereo (= 2 channel) samples + using Samples = Common::RingBuffer; protected: Memory& mem; Scheduler& scheduler; DSPService& dspService; + EmulatorConfig& settings; Samples sampleBuffer; bool audioEnabled = false; @@ -38,12 +41,12 @@ namespace Audio { public: enum class Type { Null, Teakra, HLE }; - DSPCore(Memory& mem, Scheduler& scheduler, DSPService& dspService) - : mem(mem), scheduler(scheduler), dspService(dspService) {} + DSPCore(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& settings) + : mem(mem), scheduler(scheduler), dspService(dspService), settings(settings) {} virtual ~DSPCore() {} virtual void reset() = 0; - virtual void runAudioFrame() = 0; + virtual void runAudioFrame(u64 eventTimestamp) = 0; virtual u8* getDspMemory() = 0; virtual u16 recvData(u32 regId) = 0; @@ -62,5 +65,5 @@ namespace Audio { virtual void setAudioEnabled(bool enable) { audioEnabled = enable; } }; - std::unique_ptr makeDSPCore(DSPCore::Type type, Memory& mem, Scheduler& scheduler, DSPService& dspService); + std::unique_ptr makeDSPCore(EmulatorConfig& config, Memory& mem, Scheduler& scheduler, DSPService& dspService); } // namespace Audio \ No newline at end of file diff --git a/include/audio/dsp_shared_mem.hpp b/include/audio/dsp_shared_mem.hpp index e776211d..272edf7e 100644 --- a/include/audio/dsp_shared_mem.hpp +++ b/include/audio/dsp_shared_mem.hpp @@ -324,8 +324,8 @@ namespace Audio::HLE { BitField<15, 1, u32> outputBufferCountDirty; BitField<16, 1, u32> masterVolumeDirty; - BitField<24, 1, u32> auxReturnVolume0Dirty; - BitField<25, 1, u32> auxReturnVolume1Dirty; + BitField<24, 1, u32> auxVolume0Dirty; + BitField<25, 1, u32> auxVolume1Dirty; BitField<26, 1, u32> outputFormatDirty; BitField<27, 1, u32> clippingModeDirty; BitField<28, 1, u32> headphonesConnectedDirty; @@ -337,7 +337,7 @@ namespace Audio::HLE { /// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for /// each at the final mixer. float_le masterVolume; - std::array auxReturnVolume; + std::array auxVolumes; u16_le outputBufferCount; u16 pad1[2]; @@ -422,7 +422,7 @@ namespace Audio::HLE { struct DspStatus { u16_le unknown; - u16_le dropped_frames; + u16_le droppedFrames; u16 pad0[0xE]; }; ASSERT_DSP_STRUCT(DspStatus, 32); diff --git a/include/audio/dsp_simd.hpp b/include/audio/dsp_simd.hpp new file mode 100644 index 00000000..9a0e723a --- /dev/null +++ b/include/audio/dsp_simd.hpp @@ -0,0 +1,78 @@ +#pragma once + +#include "audio/hle_mixer.hpp" +#include "compiler_builtins.hpp" +#include "helpers.hpp" + +#if defined(_M_AMD64) || defined(__x86_64__) +#define DSP_SIMD_X64 +#include +#elif defined(_M_ARM64) || defined(__aarch64__) +#define DSP_SIMD_ARM64 +#include +#endif + +// Optimized SIMD functions for mixing the stereo output of a DSP voice into a quadraphonic intermediate mix +namespace DSP::MixIntoQuad { + using IntermediateMix = Audio::DSPMixer::IntermediateMix; + using StereoFrame16 = Audio::DSPMixer::StereoFrame; + + // Non-SIMD, portable algorithm + ALWAYS_INLINE static void mixPortable(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { + for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { + // Mono samples are in the format: (l, r) + // When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one + mix[sampleIndex][0] += s32(frame[sampleIndex][0] * gains[0]); + mix[sampleIndex][1] += s32(frame[sampleIndex][1] * gains[1]); + mix[sampleIndex][2] += s32(frame[sampleIndex][0] * gains[2]); + mix[sampleIndex][3] += s32(frame[sampleIndex][1] * gains[3]); + } + } + +#if defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) + ALWAYS_INLINE static void mixSSE4_1(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { + __m128 gains_ = _mm_load_ps(gains); + + for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { + // The stereo samples, repeated every 4 bytes inside the vector register + __m128i stereoSamples = _mm_castps_si128(_mm_load1_ps((float*)&frame[sampleIndex][0])); + + __m128 currentFrame = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(stereoSamples)); + __m128i offset = _mm_cvttps_epi32(_mm_mul_ps(currentFrame, gains_)); + __m128i intermediateMixPrev = _mm_load_si128((__m128i*)&mix[sampleIndex][0]); + __m128i result = _mm_add_epi32(intermediateMixPrev, offset); + _mm_store_si128((__m128i*)&mix[sampleIndex][0], result); + } + } +#endif + +#ifdef DSP_SIMD_ARM64 + ALWAYS_INLINE static void mixNEON(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { + float32x4_t gains_ = vld1q_f32(gains); + + for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { + // Load l and r samples and repeat them every 4 bytes + int32x4_t stereoSamples = vld1q_dup_s32((s32*)&frame[sampleIndex][0]); + // Expand the bottom 4 s16 samples into an int32x4 with sign extension, then convert them to float32x4 + float32x4_t currentFrame = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vreinterpretq_s16_s32(stereoSamples)))); + + // Multiply samples by their respective gains, truncate the result, and add it into the intermediate mix buffer + int32x4_t offset = vcvtq_s32_f32(vmulq_f32(currentFrame, gains_)); + int32x4_t intermediateMixPrev = vld1q_s32((s32*)&mix[sampleIndex][0]); + int32x4_t result = vaddq_s32(intermediateMixPrev, offset); + vst1q_s32((s32*)&mix[sampleIndex][0], result); + } + } +#endif + + // Mixes the stereo output of a DSP voice into a quadraphonic intermediate mix + static void mix(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { +#if defined(DSP_SIMD_ARM64) + return mixNEON(mix, frame, gains); +#elif defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) + return mixSSE4_1(mix, frame, gains); +#else + return mixPortable(mix, frame, gains); +#endif + } +} // namespace DSP::MixIntoQuad \ No newline at end of file diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index b59dc811..0e3b8636 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -2,18 +2,19 @@ #include #include #include +#include #include #include #include "audio/aac.hpp" +#include "audio/aac_decoder.hpp" +#include "audio/audio_interpolation.hpp" #include "audio/dsp_core.hpp" #include "audio/dsp_shared_mem.hpp" +#include "audio/hle_mixer.hpp" #include "memory.hpp" namespace Audio { - using SampleFormat = HLE::SourceConfiguration::Configuration::Format; - using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo; - struct DSPSource { // Audio buffer information // https://www.3dbrew.org/wiki/DSP_Memory_Region @@ -33,8 +34,8 @@ namespace Audio { SampleFormat format; SourceType sourceType; - bool fromQueue = false; // Is this buffer from the buffer queue or an embedded buffer? - bool hasPlayedOnce = false; // Has the buffer been played at least once before? + bool fromQueue = false; // Is this buffer from the buffer queue or an embedded buffer? + bool hasPlayedOnce = false; // Has the buffer been played at least once before? bool operator<(const Buffer& other) const { // Lower ID = Higher priority @@ -42,17 +43,34 @@ namespace Audio { return this->bufferID > other.bufferID; } }; + // Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque? using SampleBuffer = std::deque>; - using BufferQueue = std::priority_queue; + using InterpolationMode = HLE::SourceConfiguration::Configuration::InterpolationMode; + using InterpolationState = Audio::Interpolation::State; + + // The samples this voice output for this audio frame. + // Aligned to 4 for SIMD purposes. + alignas(4) DSPMixer::StereoFrame currentFrame; BufferQueue buffers; SampleFormat sampleFormat = SampleFormat::ADPCM; SourceType sourceType = SourceType::Stereo; + InterpolationMode interpolationMode = InterpolationMode::Linear; + InterpolationState interpolationState; + + // There's one gain configuration for each of the 3 intermediate mixing stages + // And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample + // Aligned to 16 for SIMD purposes + alignas(16) std::array, 3> gains; + // Of the 3 intermediate mix stages, typically only the first one is actually enabled and the other ones do nothing + // Ie their gain is vec4(0.0). We track which stages are disabled (have a gain of all 0s) using this bitfield and skip them + // In order to save up on CPU time. + uint enabledMixStages = 0; - std::array gain0, gain1, gain2; u32 samplePosition; // Sample number into the current audio buffer + float rateMultiplier; u16 syncCount; u16 currentBufferID; u16 previousBufferID; @@ -95,22 +113,23 @@ namespace Audio { // The audio frame types are public in case we want to use them for unit tests public: template - using Sample = std::array; + using Sample = DSPMixer::Sample; template - using Frame = std::array, 160>; + using Frame = DSPMixer::Frame; template - using MonoFrame = Frame; + using MonoFrame = DSPMixer::MonoFrame; template - using StereoFrame = Frame; + using StereoFrame = DSPMixer::StereoFrame; template - using QuadFrame = Frame; + using QuadFrame = DSPMixer::QuadFrame; using Source = Audio::DSPSource; using SampleBuffer = Source::SampleBuffer; + using IntermediateMix = DSPMixer::IntermediateMix; private: enum class DSPState : u32 { @@ -127,6 +146,9 @@ namespace Audio { std::array sources; // DSP voices Audio::HLE::DspMemory dspRam; + Audio::DSPMixer mixer; + std::unique_ptr aacDecoder; + void resetAudioPipe(); bool loaded = false; // Have we loaded a component? @@ -142,7 +164,7 @@ namespace Audio { } else if (counter1 == 0xffff && counter0 != 0xfffe) { return 0; } else { - return counter0 > counter1 ? 0 : 0; + return (counter0 > counter1) ? 0 : 1; } } @@ -169,9 +191,12 @@ namespace Audio { void handleAACRequest(const AAC::Message& request); void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients); + void updateMixerConfig(HLE::SharedMemory& sharedMem); void generateFrame(StereoFrame& frame); void generateFrame(DSPSource& source); void outputFrame(); + // Perform the final mix, mixing the quadraphonic samples from all voices into the output audio frame + void performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion); // Decode an entire buffer worth of audio void decodeBuffer(DSPSource& source); @@ -181,11 +206,11 @@ namespace Audio { SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source); public: - HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService); + HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config); ~HLE_DSP() override {} void reset() override; - void runAudioFrame() override; + void runAudioFrame(u64 eventTimestamp) override; u8* getDspMemory() override { return dspRam.rawMemory.data(); } @@ -199,5 +224,4 @@ namespace Audio { void setSemaphore(u16 value) override {} void setSemaphoreMask(u16 value) override {} }; - } // namespace Audio diff --git a/include/audio/hle_mixer.hpp b/include/audio/hle_mixer.hpp new file mode 100644 index 00000000..ed8b4a09 --- /dev/null +++ b/include/audio/hle_mixer.hpp @@ -0,0 +1,50 @@ +#pragma once +#include + +#include "audio/dsp_shared_mem.hpp" +#include "helpers.hpp" + +namespace Audio { + using SampleFormat = HLE::SourceConfiguration::Configuration::Format; + using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo; + + class DSPMixer { + public: + template + using Sample = std::array; + + template + using Frame = std::array, 160>; + + template + using MonoFrame = Frame; + + template + using StereoFrame = Frame; + + template + using QuadFrame = Frame; + + // Internally the DSP uses four channels when mixing. + // Neatly, QuadFrame means that every sample is a uint32x4 value, which is particularly nice for SIMD mixing + using IntermediateMix = QuadFrame; + + private: + using ChannelFormat = HLE::DspConfiguration::OutputFormat; + // The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages + // Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU + static constexpr usize mixerStageCount = 3; + + public: + ChannelFormat channelFormat = ChannelFormat::Stereo; + std::array volumes; + std::array enableAuxStages; + + void reset() { + channelFormat = ChannelFormat::Stereo; + + volumes.fill(0.0); + enableAuxStages.fill(false); + } + }; +} // namespace Audio \ No newline at end of file diff --git a/include/audio/miniaudio_device.hpp b/include/audio/miniaudio_device.hpp index f4d126d8..0363aa44 100644 --- a/include/audio/miniaudio_device.hpp +++ b/include/audio/miniaudio_device.hpp @@ -3,29 +3,39 @@ #include #include +#include "config.hpp" +#include "helpers.hpp" #include "miniaudio.h" #include "ring_buffer.hpp" class MiniAudioDevice { - using Samples = Common::RingBuffer; + using Samples = Common::RingBuffer; static constexpr ma_uint32 sampleRate = 32768; // 3DS sample rate static constexpr ma_uint32 channelCount = 2; // Audio output is stereo + ma_device device; ma_context context; ma_device_config deviceConfig; - ma_device device; - ma_resampler resampler; Samples* samples = nullptr; + const AudioDeviceConfig& audioSettings; + bool initialized = false; bool running = false; + // Store the last stereo sample we output. We play this when underruning to avoid pops. + std::array lastStereoSample; std::vector audioDevices; + public: - MiniAudioDevice(); + MiniAudioDevice(const AudioDeviceConfig& audioSettings); + // If safe is on, we create a null audio device void init(Samples& samples, bool safe = false); + void close(); void start(); void stop(); + + bool isInitialized() const { return initialized; } }; \ No newline at end of file diff --git a/include/audio/null_core.hpp b/include/audio/null_core.hpp index 7d6f1c9e..e7ae12dc 100644 --- a/include/audio/null_core.hpp +++ b/include/audio/null_core.hpp @@ -20,14 +20,14 @@ namespace Audio { std::array dspRam; void resetAudioPipe(); - bool loaded = false; // Have we loaded a component? + bool loaded = false; // Have we loaded a component? public: - NullDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService) : DSPCore(mem, scheduler, dspService) {} + NullDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config) : DSPCore(mem, scheduler, dspService, config) {} ~NullDSP() override {} void reset() override; - void runAudioFrame() override; + void runAudioFrame(u64 eventTimestamp) override; u8* getDspMemory() override { return dspRam.data(); } diff --git a/include/audio/teakra_core.hpp b/include/audio/teakra_core.hpp index 6a011231..3fd5abc3 100644 --- a/include/audio/teakra_core.hpp +++ b/include/audio/teakra_core.hpp @@ -77,13 +77,13 @@ namespace Audio { } public: - TeakraDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService); + TeakraDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config); ~TeakraDSP() override {} void reset() override; // Run 1 slice of DSP instructions and schedule the next audio frame - void runAudioFrame() override { + void runAudioFrame(u64 eventTimestamp) override { runSlice(); scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::lleSlice * 2); } diff --git a/include/config.hpp b/include/config.hpp index 339e651c..d45aa05c 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -1,8 +1,35 @@ #pragma once #include +#include #include "audio/dsp_core.hpp" +#include "frontend_settings.hpp" #include "renderer.hpp" +#include "services/region_codes.hpp" + +struct AudioDeviceConfig { + // Audio curve to use for volumes between 0-100 + enum class VolumeCurve : int { + Cubic = 0, // Samples are scaled by volume ^ 3 + Linear = 1, // Samples are scaled by volume + }; + + float volumeRaw = 1.0f; + VolumeCurve volumeCurve = VolumeCurve::Cubic; + + bool muteAudio = false; + + float getVolume() const { + if (muteAudio) { + return 0.0f; + } + + return volumeRaw; + } + + static VolumeCurve volumeCurveFromString(std::string inString); + static const char* volumeCurveToString(VolumeCurve curve); +}; // Remember to initialize every field here to its default value otherwise bad things will happen struct EmulatorConfig { @@ -13,27 +40,80 @@ struct EmulatorConfig { static constexpr bool shaderJitDefault = false; #endif + // For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are + // horrible. On other platforms we default to ubershader + shadergen fallback for lights +#if defined(__ANDROID__) || defined(__APPLE__) + static constexpr bool ubershaderDefault = false; +#else + static constexpr bool ubershaderDefault = true; +#endif + static constexpr bool accelerateShadersDefault = true; + +#if defined(__LIBRETRO__) + static constexpr bool audioEnabledDefault = true; +#else + static constexpr bool audioEnabledDefault = false; +#endif + bool shaderJitEnabled = shaderJitDefault; + bool useUbershaders = ubershaderDefault; + bool accelerateShaders = accelerateShadersDefault; + bool accurateShaderMul = false; bool discordRpcEnabled = false; + + // Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance + bool forceShadergenForLights = true; + int lightShadergenThreshold = 1; + RendererType rendererType = RendererType::OpenGL; - Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null; + Audio::DSPCore::Type dspType = Audio::DSPCore::Type::HLE; bool sdCardInserted = true; bool sdWriteProtected = false; bool usePortableBuild = false; - bool audioEnabled = false; + bool audioEnabled = audioEnabledDefault; bool vsyncEnabled = true; + bool aacEnabled = true; // Enable AAC audio? + + bool enableRenderdoc = false; + bool printAppVersion = true; + bool printDSPFirmware = false; bool chargerPlugged = true; // Default to 3% battery to make users suffer int batteryPercentage = 3; + LanguageCodes systemLanguage = LanguageCodes::English; + // Default ROM path to open in Qt and misc frontends std::filesystem::path defaultRomPath = ""; std::filesystem::path filePath; + // Frontend window settings + struct WindowSettings { + static constexpr int defaultX = 200; + static constexpr int defaultY = 200; + static constexpr int defaultWidth = 800; + static constexpr int defaultHeight = 240 * 2; + + bool rememberPosition = false; // Remember window position & size + bool showAppVersion = false; + + int x = defaultX; + int y = defaultY; + int width = defaultHeight; + int height = defaultHeight; + }; + + WindowSettings windowSettings; + AudioDeviceConfig audioDeviceConfig; + FrontendSettings frontendSettings; + EmulatorConfig(const std::filesystem::path& path); void load(); void save(); + + static LanguageCodes languageCodeFromString(std::string inString); + static const char* languageCodeToString(LanguageCodes code); }; \ No newline at end of file diff --git a/include/crypto/aes_engine.hpp b/include/crypto/aes_engine.hpp index 324f4adf..c96b36d3 100644 --- a/include/crypto/aes_engine.hpp +++ b/include/crypto/aes_engine.hpp @@ -1,20 +1,29 @@ #pragma once #include -#include -#include #include +#include +#include #include #include +#include #include "helpers.hpp" +#include "io_file.hpp" +#include "swap.hpp" namespace Crypto { - constexpr std::size_t AesKeySize = 0x10; + constexpr usize AesKeySize = 0x10; using AESKey = std::array; - template - static std::array rolArray(const std::array& value, std::size_t bits) { + struct Seed { + u64_le titleID; + AESKey seed; + std::array pad; + }; + + template + static std::array rolArray(const std::array& value, usize bits) { const auto bitWidth = N * CHAR_BIT; bits %= bitWidth; @@ -24,18 +33,18 @@ namespace Crypto { std::array result; - for (std::size_t i = 0; i < N; i++) { + for (usize i = 0; i < N; i++) { result[i] = ((value[(i + byteShift) % N] << bitShift) | (value[(i + byteShift + 1) % N] >> (CHAR_BIT - bitShift))) & UINT8_MAX; } return result; } - template + template static std::array addArray(const std::array& a, const std::array& b) { std::array result; - std::size_t sum = 0; - std::size_t carry = 0; + usize sum = 0; + usize carry = 0; for (std::int64_t i = N - 1; i >= 0; i--) { sum = a[i] + b[i] + carry; @@ -46,11 +55,11 @@ namespace Crypto { return result; } - template + template static std::array xorArray(const std::array& a, const std::array& b) { std::array result; - for (std::size_t i = 0; i < N; i++) { + for (usize i = 0; i < N; i++) { result[i] = a[i] ^ b[i]; } @@ -63,7 +72,7 @@ namespace Crypto { } AESKey rawKey; - for (std::size_t i = 0; i < rawKey.size(); i++) { + for (usize i = 0; i < rawKey.size(); i++) { rawKey[i] = static_cast(std::stoi(hex.substr(i * 2, 2), 0, 16)); } @@ -76,7 +85,7 @@ namespace Crypto { std::optional normalKey = std::nullopt; }; - enum KeySlotId : std::size_t { + enum KeySlotId : usize { NCCHKey0 = 0x2C, NCCHKey1 = 0x25, NCCHKey2 = 0x18, @@ -84,14 +93,17 @@ namespace Crypto { }; class AESEngine { - private: - constexpr static std::size_t AesKeySlotCount = 0x40; + private: + constexpr static usize AesKeySlotCount = 0x40; std::optional m_generator = std::nullopt; std::array m_slots; bool keysLoaded = false; - constexpr void updateNormalKey(std::size_t slotId) { + std::vector seeds; + IOFile seedDatabase; + + constexpr void updateNormalKey(usize slotId) { if (m_generator.has_value() && hasKeyX(slotId) && hasKeyY(slotId)) { auto& keySlot = m_slots.at(slotId); AESKey keyX = keySlot.keyX.value(); @@ -101,13 +113,17 @@ namespace Crypto { } } - public: + public: AESEngine() {} void loadKeys(const std::filesystem::path& path); + void setSeedPath(const std::filesystem::path& path); + // Returns true on success, false on failure + bool loadSeeds(); + bool haveKeys() { return keysLoaded; } bool haveGenerator() { return m_generator.has_value(); } - constexpr bool hasKeyX(std::size_t slotId) { + constexpr bool hasKeyX(usize slotId) { if (slotId >= AesKeySlotCount) { return false; } @@ -115,18 +131,16 @@ namespace Crypto { return m_slots.at(slotId).keyX.has_value(); } - constexpr AESKey getKeyX(std::size_t slotId) { - return m_slots.at(slotId).keyX.value_or(AESKey{}); - } + constexpr AESKey getKeyX(usize slotId) { return m_slots.at(slotId).keyX.value_or(AESKey{}); } - constexpr void setKeyX(std::size_t slotId, const AESKey &key) { + constexpr void setKeyX(usize slotId, const AESKey& key) { if (slotId < AesKeySlotCount) { m_slots.at(slotId).keyX = key; updateNormalKey(slotId); } } - constexpr bool hasKeyY(std::size_t slotId) { + constexpr bool hasKeyY(usize slotId) { if (slotId >= AesKeySlotCount) { return false; } @@ -134,18 +148,16 @@ namespace Crypto { return m_slots.at(slotId).keyY.has_value(); } - constexpr AESKey getKeyY(std::size_t slotId) { - return m_slots.at(slotId).keyY.value_or(AESKey{}); - } + constexpr AESKey getKeyY(usize slotId) { return m_slots.at(slotId).keyY.value_or(AESKey{}); } - constexpr void setKeyY(std::size_t slotId, const AESKey &key) { + constexpr void setKeyY(usize slotId, const AESKey& key) { if (slotId < AesKeySlotCount) { m_slots.at(slotId).keyY = key; updateNormalKey(slotId); } } - constexpr bool hasNormalKey(std::size_t slotId) { + constexpr bool hasNormalKey(usize slotId) { if (slotId >= AesKeySlotCount) { return false; } @@ -153,14 +165,14 @@ namespace Crypto { return m_slots.at(slotId).normalKey.has_value(); } - constexpr AESKey getNormalKey(std::size_t slotId) { - return m_slots.at(slotId).normalKey.value_or(AESKey{}); - } + constexpr AESKey getNormalKey(usize slotId) { return m_slots.at(slotId).normalKey.value_or(AESKey{}); } - constexpr void setNormalKey(std::size_t slotId, const AESKey &key) { + constexpr void setNormalKey(usize slotId, const AESKey& key) { if (slotId < AesKeySlotCount) { m_slots.at(slotId).normalKey = key; } } + + std::optional getSeedFromDB(u64 titleID); }; -} \ No newline at end of file +} // namespace Crypto diff --git a/include/discord_rpc.hpp b/include/discord_rpc.hpp index 9b244faf..62bd0c6b 100644 --- a/include/discord_rpc.hpp +++ b/include/discord_rpc.hpp @@ -17,6 +17,8 @@ namespace Discord { void init(); void update(RPCStatus status, const std::string& title); void stop(); + + bool running() const { return enabled; } }; } // namespace Discord diff --git a/include/emulator.hpp b/include/emulator.hpp index de04648e..a222a021 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -66,7 +66,6 @@ class Emulator { #ifdef PANDA3DS_ENABLE_DISCORD_RPC Discord::RPC discordRpc; #endif - void setAudioEnabled(bool enable); void updateDiscord(); // Keep the handle for the ROM here to reload when necessary and to prevent deleting it @@ -90,7 +89,6 @@ class Emulator { ~Emulator(); void step(); - void render(); void reset(ReloadOption reload); void runFrame(); // Poll the scheduler for events @@ -99,6 +97,7 @@ class Emulator { void resume(); // Resume the emulator void pause(); // Pause the emulator void togglePause(); + void setAudioEnabled(bool enable); bool loadAmiibo(const std::filesystem::path& path); bool loadROM(const std::filesystem::path& path); @@ -118,6 +117,9 @@ class Emulator { void setOutputSize(u32 width, u32 height) { gpu.setOutputSize(width, height); } void deinitGraphicsContext() { gpu.deinitGraphicsContext(); } + // Reloads some settings that require special handling, such as audio enable + void reloadSettings(); + EmulatorConfig& getConfig() { return config; } Cheats& getCheats() { return cheats; } ServiceManager& getServiceManager() { return kernel.getServiceManager(); } @@ -135,4 +137,7 @@ class Emulator { std::filesystem::path getAppDataRoot(); std::span getSMDH(); + + private: + void loadRenderdoc(); }; diff --git a/include/frontend_settings.hpp b/include/frontend_settings.hpp new file mode 100644 index 00000000..ae967879 --- /dev/null +++ b/include/frontend_settings.hpp @@ -0,0 +1,34 @@ +#pragma once +#include + +// Some UI settings that aren't fully frontend-dependent. Note: Not all frontends will support the same settings. +// Note: Any enums should ideally be ordered in the same order we want to show them in UI dropdown menus, so that we can cast indices to enums +// directly. +struct FrontendSettings { + enum class Theme : int { + System = 0, + Light = 1, + Dark = 2, + GreetingsCat = 3, + Cream = 4, + }; + + // Different panda-themed window icons + enum class WindowIcon : int { + Rpog = 0, + Rsyn = 1, + Rnap = 2, + Rcow = 3, + SkyEmu = 4, + }; + + Theme theme = Theme::Dark; + WindowIcon icon = WindowIcon::Rpog; + std::string language = "en"; + + static Theme themeFromString(std::string inString); + static const char* themeToString(Theme theme); + + static WindowIcon iconFromString(std::string inString); + static const char* iconToString(WindowIcon icon); +}; diff --git a/include/fs/archive_base.hpp b/include/fs/archive_base.hpp index 2843be68..475ca50e 100644 --- a/include/fs/archive_base.hpp +++ b/include/fs/archive_base.hpp @@ -7,6 +7,7 @@ #include #include #include + #include "helpers.hpp" #include "memory.hpp" #include "result.hpp" @@ -15,13 +16,13 @@ using Result::HorizonResult; namespace PathType { - enum : u32 { - Invalid = 0, - Empty = 1, - Binary = 2, - ASCII = 3, - UTF16 = 4, - }; + enum : u32 { + Invalid = 0, + Empty = 1, + Binary = 2, + ASCII = 3, + UTF16 = 4, + }; } namespace ArchiveID { @@ -34,91 +35,103 @@ namespace ArchiveID { SDMC = 9, SDMCWriteOnly = 0xA, + CardSPI = 0x12345679, SavedataAndNcch = 0x2345678A, // 3DBrew: This is the same as the regular SaveData archive, except with this the savedata ID and mediatype is loaded from the input archive // lowpath. UserSaveData1 = 0x567890B2, // 3DBrew: Similar to 0x567890B2 but can only access Accessible Save specified in exheader? UserSaveData2 = 0x567890B4, + + TwlPhoto = 0x567890AC, + TwlSound = 0x567890AD, }; - static std::string toString(u32 id) { - switch (id) { - case SelfNCCH: return "SelfNCCH"; - case SaveData: return "SaveData"; - case ExtSaveData: return "ExtSaveData"; - case SharedExtSaveData: return "SharedExtSaveData"; - case SystemSaveData: return "SystemSaveData"; - case SDMC: return "SDMC"; - case SDMCWriteOnly: return "SDMC (Write-only)"; - case SavedataAndNcch: return "Savedata & NCCH (archive 0x2345678A)"; - default: return "Unknown archive"; - } - } -} + static std::string toString(u32 id) { + switch (id) { + case SelfNCCH: return "SelfNCCH"; + case SaveData: return "SaveData"; + case ExtSaveData: return "ExtSaveData"; + case SharedExtSaveData: return "SharedExtSaveData"; + case SystemSaveData: return "SystemSaveData"; + case SDMC: return "SDMC"; + case SDMCWriteOnly: return "SDMC (Write-only)"; + case SavedataAndNcch: return "Savedata & NCCH (archive 0x2345678A)"; + case TwlPhoto: return "TWL_PHOTO"; + case TwlSound: return "TWL_SOUND"; + default: return "Unknown archive"; + } + } +} // namespace ArchiveID struct FSPath { - u32 type = PathType::Invalid; + u32 type = PathType::Invalid; - std::vector binary; // Path data for binary paths - std::string string; // Path data for ASCII paths - std::u16string utf16_string; + std::vector binary; // Path data for binary paths + std::string string; // Path data for ASCII paths + std::u16string utf16_string; - FSPath() {} + FSPath() {} - FSPath(u32 type, const std::vector& vec) : type(type) { - switch (type) { - case PathType::Binary: - binary = std::move(vec); - break; + FSPath(u32 type, const std::vector& vec) : type(type) { + switch (type) { + case PathType::Binary: binary = std::move(vec); break; - case PathType::ASCII: - string.resize(vec.size() - 1); // -1 because of the null terminator - std::memcpy(string.data(), vec.data(), vec.size() - 1); // Copy string data - break; + case PathType::ASCII: + string.resize(vec.size() - 1); // -1 because of the null terminator + std::memcpy(string.data(), vec.data(), vec.size() - 1); // Copy string data + break; - case PathType::UTF16: { - const size_t size = vec.size() / sizeof(u16) - 1; // Character count. -1 because null terminator here too - utf16_string.resize(size); - std::memcpy(utf16_string.data(), vec.data(), size * sizeof(u16)); - break; - } -; } - } + case PathType::UTF16: { + const size_t size = vec.size() / sizeof(u16) - 1; // Character count. -1 because null terminator here too + utf16_string.resize(size); + std::memcpy(utf16_string.data(), vec.data(), size * sizeof(u16)); + break; + }; + } + } + + bool isUTF16() const { return type == PathType::UTF16; } + bool isASCII() const { return type == PathType::ASCII; } + bool isBinary() const { return type == PathType::Binary; } + // This is not called "isEmpty()" to make obvious that we're talking about an empty-type path, NOT an empty text path + bool isEmptyType() const { return type == PathType::Empty; } + + bool isTextPath() const { return isUTF16() || isASCII(); } }; struct FilePerms { - u32 raw; + u32 raw; - FilePerms(u32 val) : raw(val) {} - bool read() const { return (raw & 1) != 0; } - bool write() const { return (raw & 2) != 0; } - bool create() const { return (raw & 4) != 0; } + FilePerms(u32 val) : raw(val) {} + bool read() const { return (raw & 1) != 0; } + bool write() const { return (raw & 2) != 0; } + bool create() const { return (raw & 4) != 0; } }; class ArchiveBase; struct FileSession { - ArchiveBase* archive = nullptr; - FILE* fd = nullptr; // File descriptor for file sessions that require them. - FSPath path; - FSPath archivePath; - u32 priority = 0; // TODO: What does this even do - bool isOpen; + ArchiveBase* archive = nullptr; + FILE* fd = nullptr; // File descriptor for file sessions that require them. + FSPath path; + FSPath archivePath; + u32 priority = 0; // TODO: What does this even do + bool isOpen; - FileSession(ArchiveBase* archive, const FSPath& filePath, const FSPath& archivePath, FILE* fd, bool isOpen = true) : - archive(archive), path(filePath), archivePath(archivePath), fd(fd), isOpen(isOpen), priority(0) {} + FileSession(ArchiveBase* archive, const FSPath& filePath, const FSPath& archivePath, FILE* fd, bool isOpen = true) + : archive(archive), path(filePath), archivePath(archivePath), fd(fd), isOpen(isOpen), priority(0) {} - // For cloning a file session - FileSession(const FileSession& other) : archive(other.archive), path(other.path), - archivePath(other.archivePath), fd(other.fd), isOpen(other.isOpen), priority(other.priority) {} + // For cloning a file session + FileSession(const FileSession& other) + : archive(other.archive), path(other.path), archivePath(other.archivePath), fd(other.fd), isOpen(other.isOpen), priority(other.priority) {} }; struct ArchiveSession { - ArchiveBase* archive = nullptr; - FSPath path; - bool isOpen; + ArchiveBase* archive = nullptr; + FSPath path; + bool isOpen; - ArchiveSession(ArchiveBase* archive, const FSPath& filePath, bool isOpen = true) : archive(archive), path(filePath), isOpen(isOpen) {} + ArchiveSession(ArchiveBase* archive, const FSPath& filePath, bool isOpen = true) : archive(archive), path(filePath), isOpen(isOpen) {} }; struct DirectoryEntry { @@ -156,106 +169,125 @@ struct DirectorySession { using FileDescriptor = std::optional; class ArchiveBase { -public: - struct FormatInfo { - u32 size; // Archive size - u32 numOfDirectories; // Number of directories - u32 numOfFiles; // Number of files - bool duplicateData; // Whether to duplicate data or not - }; + public: + struct FormatInfo { + u32 size; // Archive size + u32 numOfDirectories; // Number of directories + u32 numOfFiles; // Number of files + bool duplicateData; // Whether to duplicate data or not + }; -protected: - using Handle = u32; + protected: + using Handle = u32; - static constexpr FileDescriptor NoFile = nullptr; - static constexpr FileDescriptor FileError = std::nullopt; - Memory& mem; + static constexpr FileDescriptor NoFile = nullptr; + static constexpr FileDescriptor FileError = std::nullopt; + Memory& mem; - // Returns if a specified 3DS path in UTF16 or ASCII format is safe or not - // A 3DS path is considered safe if its first character is '/' which means we're not trying to access anything outside the root of the fs - // And if it doesn't contain enough instances of ".." (Indicating "climb up a folder" in filesystems) to let the software climb up the directory tree - // And access files outside of the emulator's app data folder - template - bool isPathSafe(const FSPath& path) { - static_assert(format == PathType::ASCII || format == PathType::UTF16); - using String = typename std::conditional::type; // String type for the path - using Char = typename String::value_type; // Char type for the path + // Returns if a specified 3DS path in UTF16 or ASCII format is safe or not + // A 3DS path is considered safe if its first character is '/' which means we're not trying to access anything outside the root of the fs + // And if it doesn't contain enough instances of ".." (Indicating "climb up a folder" in filesystems) to let the software climb up the directory + // tree And access files outside of the emulator's app data folder + template + bool isPathSafe(const FSPath& path) { + static_assert(format == PathType::ASCII || format == PathType::UTF16); + using String = typename std::conditional::type; // String type for the path + using Char = typename String::value_type; // Char type for the path - String pathString, dots; - if constexpr (std::is_same()) { - pathString = path.utf16_string; - dots = u".."; - } else { - pathString = path.string; - dots = ".."; - } + String pathString, dots; + if constexpr (std::is_same()) { + pathString = path.utf16_string; + dots = u".."; + } else { + pathString = path.string; + dots = ".."; + } - // If the path string doesn't begin with / then that means it's accessing outside the FS root, which is invalid & unsafe - if (pathString[0] != Char('/')) return false; + // If the path string doesn't begin with / then that means it's accessing outside the FS root, which is invalid & unsafe + if (pathString[0] != Char('/')) return false; - // Counts how many folders sans the root our file is nested under. - // If it's < 0 at any point of parsing, then the path is unsafe and tries to crawl outside our file sandbox. - // If it's 0 then this is the FS root. - // If it's > 0 then we're in a subdirectory of the root. - int level = 0; + // Counts how many folders sans the root our file is nested under. + // If it's < 0 at any point of parsing, then the path is unsafe and tries to crawl outside our file sandbox. + // If it's 0 then this is the FS root. + // If it's > 0 then we're in a subdirectory of the root. + int level = 0; - // Split the string on / characters and see how many of the substrings are ".." - size_t pos = 0; - while ((pos = pathString.find(Char('/'))) != String::npos) { - String token = pathString.substr(0, pos); - pathString.erase(0, pos + 1); + // Split the string on / characters and see how many of the substrings are ".." + size_t pos = 0; + while ((pos = pathString.find(Char('/'))) != String::npos) { + String token = pathString.substr(0, pos); + pathString.erase(0, pos + 1); - if (token == dots) { - level--; - if (level < 0) return false; - } else { - level++; - } - } + if (token == dots) { + level--; + if (level < 0) return false; + } else { + level++; + } + } - return true; - } + return true; + } -public: - virtual std::string name() = 0; - virtual u64 getFreeBytes() = 0; - virtual HorizonResult createFile(const FSPath& path, u64 size) = 0; - virtual HorizonResult deleteFile(const FSPath& path) = 0; + public: + virtual std::string name() = 0; + virtual u64 getFreeBytes() = 0; + virtual HorizonResult createFile(const FSPath& path, u64 size) = 0; + virtual HorizonResult deleteFile(const FSPath& path) = 0; - virtual Rust::Result getFormatInfo(const FSPath& path) { - Helpers::panic("Unimplemented GetFormatInfo for %s archive", name().c_str()); - // Return a dummy struct just to avoid the UB of not returning anything, even if we panic - return Ok(FormatInfo{ .size = 0, .numOfDirectories = 0, .numOfFiles = 0, .duplicateData = false }); - } + virtual Rust::Result getFormatInfo(const FSPath& path) { + Helpers::panic("Unimplemented GetFormatInfo for %s archive", name().c_str()); + // Return a dummy struct just to avoid the UB of not returning anything, even if we panic + return Ok(FormatInfo{.size = 0, .numOfDirectories = 0, .numOfFiles = 0, .duplicateData = false}); + } - virtual HorizonResult createDirectory(const FSPath& path) { - Helpers::panic("Unimplemented CreateDirectory for %s archive", name().c_str()); - return Result::FS::AlreadyExists; - } + virtual HorizonResult createDirectory(const FSPath& path) { + Helpers::panic("Unimplemented CreateDirectory for %s archive", name().c_str()); + return Result::FS::AlreadyExists; + } - // Returns nullopt if opening the file failed, otherwise returns a file descriptor to it (nullptr if none is needed) - virtual FileDescriptor openFile(const FSPath& path, const FilePerms& perms) = 0; - virtual Rust::Result openArchive(const FSPath& path) = 0; + // Returns nullopt if opening the file failed, otherwise returns a file descriptor to it (nullptr if none is needed) + virtual FileDescriptor openFile(const FSPath& path, const FilePerms& perms) = 0; + virtual Rust::Result openArchive(const FSPath& path) = 0; - virtual Rust::Result openDirectory(const FSPath& path) { - Helpers::panic("Unimplemented OpenDirectory for %s archive", name().c_str()); - return Err(Result::FS::FileNotFoundAlt); - } + virtual Rust::Result openDirectory(const FSPath& path) { + Helpers::panic("Unimplemented OpenDirectory for %s archive", name().c_str()); + return Err(Result::FS::FileNotFoundAlt); + } - virtual void format(const FSPath& path, const FormatInfo& info) { - Helpers::panic("Unimplemented Format for %s archive", name().c_str()); - } + virtual void format(const FSPath& path, const FormatInfo& info) { Helpers::panic("Unimplemented Format for %s archive", name().c_str()); } - virtual HorizonResult renameFile(const FSPath& oldPath, const FSPath& newPath) { + virtual HorizonResult renameFile(const FSPath& oldPath, const FSPath& newPath) { Helpers::panic("Unimplemented RenameFile for %s archive", name().c_str()); return Result::Success; - } + } - // Read size bytes from a file starting at offset "offset" into a certain buffer in memory - // Returns the number of bytes read, or nullopt if the read failed - virtual std::optional readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) = 0; + // Read size bytes from a file starting at offset "offset" into a certain buffer in memory + // Returns the number of bytes read, or nullopt if the read failed + virtual std::optional readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) = 0; - ArchiveBase(Memory& mem) : mem(mem) {} + ArchiveBase(Memory& mem) : mem(mem) {} + + bool isSafeTextPath(const FSPath& path) { + if (path.type == PathType::UTF16) { + return isPathSafe(path); + } else if (path.type == PathType::ASCII){ + return isPathSafe(path); + } + + Helpers::panic("ArchiveBase::IsSafeTextPath: Invalid path"); + } + + // Appends a 3DS path to an std::filesystem::path + void appendPath(std::filesystem::path& diskPath, const FSPath& guestPath) { + if (guestPath.type == PathType::UTF16) { + diskPath += std::filesystem::path(guestPath.utf16_string).make_preferred(); + } else if (guestPath.type == PathType::ASCII) { + diskPath += std::filesystem::path(guestPath.string).make_preferred(); + } else [[unlikely]] { + Helpers::panic("ArchiveBase::AppendPath: Invalid 3DS path"); + } + } }; struct ArchiveResource { diff --git a/include/fs/archive_card_spi.hpp b/include/fs/archive_card_spi.hpp new file mode 100644 index 00000000..fefa9933 --- /dev/null +++ b/include/fs/archive_card_spi.hpp @@ -0,0 +1,30 @@ +#pragma once +#include "archive_base.hpp" +#include "result/result.hpp" + +using Result::HorizonResult; + +class CardSPIArchive : public ArchiveBase { + public: + CardSPIArchive(Memory& mem) : ArchiveBase(mem) {} + std::string name() override { return "Card SPI"; } + + u64 getFreeBytes() override { + Helpers::warn("Unimplemented GetFreeBytes for Card SPI archive"); + return 0_MB; + } + + HorizonResult createDirectory(const FSPath& path) override; + HorizonResult createFile(const FSPath& path, u64 size) override; + HorizonResult deleteFile(const FSPath& path) override; + + Rust::Result openArchive(const FSPath& path) override; + Rust::Result openDirectory(const FSPath& path) override; + + FileDescriptor openFile(const FSPath& path, const FilePerms& perms) override; + + std::optional readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) override { + Helpers::panic("Unimplemented ReadFile for Card SPI archive"); + return {}; + }; +}; \ No newline at end of file diff --git a/include/fs/archive_twl_photo.hpp b/include/fs/archive_twl_photo.hpp new file mode 100644 index 00000000..23111e2b --- /dev/null +++ b/include/fs/archive_twl_photo.hpp @@ -0,0 +1,30 @@ +#pragma once +#include "archive_base.hpp" +#include "result/result.hpp" + +using Result::HorizonResult; + +class TWLPhotoArchive : public ArchiveBase { + public: + TWLPhotoArchive(Memory& mem) : ArchiveBase(mem) {} + std::string name() override { return "TWL_PHOTO"; } + + u64 getFreeBytes() override { + Helpers::warn("Unimplemented GetFreeBytes for TWLPhoto archive"); + return 32_MB; + } + + HorizonResult createDirectory(const FSPath& path) override; + HorizonResult createFile(const FSPath& path, u64 size) override; + HorizonResult deleteFile(const FSPath& path) override; + + Rust::Result openArchive(const FSPath& path) override; + Rust::Result openDirectory(const FSPath& path) override; + + FileDescriptor openFile(const FSPath& path, const FilePerms& perms) override; + + std::optional readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) override { + Helpers::panic("Unimplemented ReadFile for TWL_PHOTO archive"); + return {}; + }; +}; \ No newline at end of file diff --git a/include/fs/archive_twl_sound.hpp b/include/fs/archive_twl_sound.hpp new file mode 100644 index 00000000..cc8fc866 --- /dev/null +++ b/include/fs/archive_twl_sound.hpp @@ -0,0 +1,30 @@ +#pragma once +#include "archive_base.hpp" +#include "result/result.hpp" + +using Result::HorizonResult; + +class TWLSoundArchive : public ArchiveBase { + public: + TWLSoundArchive(Memory& mem) : ArchiveBase(mem) {} + std::string name() override { return "TWL_SOUND"; } + + u64 getFreeBytes() override { + Helpers::warn("Unimplemented GetFreeBytes for TWLSound archive"); + return 32_MB; + } + + HorizonResult createDirectory(const FSPath& path) override; + HorizonResult createFile(const FSPath& path, u64 size) override; + HorizonResult deleteFile(const FSPath& path) override; + + Rust::Result openArchive(const FSPath& path) override; + Rust::Result openDirectory(const FSPath& path) override; + + FileDescriptor openFile(const FSPath& path, const FilePerms& perms) override; + + std::optional readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) override { + Helpers::panic("Unimplemented ReadFile for TWL_SOUND archive"); + return {}; + }; +}; \ No newline at end of file diff --git a/include/helpers.hpp b/include/helpers.hpp index 037c8976..a95931d4 100644 --- a/include/helpers.hpp +++ b/include/helpers.hpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -162,19 +161,6 @@ namespace Helpers { return std::bit_cast(from); } #endif - - static std::vector split(const std::string& s, const char c) { - std::istringstream tmp(s); - std::vector result(1); - - while (std::getline(tmp, *result.rbegin(), c)) { - result.emplace_back(); - } - - // Remove temporary slot - result.pop_back(); - return result; - } }; // namespace Helpers // UDLs for memory size values diff --git a/include/ipc.hpp b/include/ipc.hpp index 67a8897e..07a77f02 100644 --- a/include/ipc.hpp +++ b/include/ipc.hpp @@ -2,8 +2,19 @@ #include namespace IPC { + namespace BufferType { + enum : std::uint32_t { + Send = 1, + Receive = 2, + }; + } + constexpr std::uint32_t responseHeader(std::uint32_t commandID, std::uint32_t normalResponses, std::uint32_t translateResponses) { // TODO: Maybe validate the response count stuff fits in 6 bits return (commandID << 16) | (normalResponses << 6) | translateResponses; } -} \ No newline at end of file + + constexpr std::uint32_t pointerHeader(std::uint32_t index, std::uint32_t size, std::uint32_t type) { + return (size << 14) | (index << 10) | (type << 1); + } +} // namespace IPC \ No newline at end of file diff --git a/include/kernel/config_mem.hpp b/include/kernel/config_mem.hpp index 81f0fef1..7d1890a0 100644 --- a/include/kernel/config_mem.hpp +++ b/include/kernel/config_mem.hpp @@ -8,6 +8,7 @@ namespace ConfigMem { KernelVersionMajor = 0x1FF80003, SyscoreVer = 0x1FF80010, EnvInfo = 0x1FF80014, + PrevFirm = 0x1FF80016, AppMemAlloc = 0x1FF80040, FirmUnknown = 0x1FF80060, FirmRevision = 0x1FF80061, @@ -30,6 +31,11 @@ namespace ConfigMem { // Shows what type of hardware we're running on namespace HardwareCodes { - enum : u8 { Product = 1, Devboard = 2, Debugger = 3, Capture = 4 }; + enum : u8 { + Product = 1, + Devboard = 2, + Debugger = 3, + Capture = 4, + }; } } // namespace ConfigMem diff --git a/include/kernel/handles.hpp b/include/kernel/handles.hpp index fe746b65..158c7501 100644 --- a/include/kernel/handles.hpp +++ b/include/kernel/handles.hpp @@ -1,7 +1,7 @@ #pragma once #include "helpers.hpp" -using Handle = u32; +using HorizonHandle = u32; namespace KernelHandles { enum : u32 { @@ -20,6 +20,7 @@ namespace KernelHandles { CFG_U, // CFG service (Console & region info) CFG_I, CFG_S, // Used by most system apps in lieu of cfg:u + CFG_NOR, // Used by system settings app CSND, // Plays audio directly from PCM samples DLP_SRVR, // Download Play: Server. Used for network play. DSP, // DSP service (Used for audio decoding and output) @@ -38,11 +39,14 @@ namespace KernelHandles { NIM, // Updates, DLC, etc NDM, // ????? NS_S, // Nintendo Shell service + NWM_EXT, // ????? NWM_UDS, // Local multiplayer - NEWS_U, // This service literally has 1 command (AddNotification) and I don't even understand what it does + NEWS_S, // news:u on steroids + NEWS_U, // This service literally has 1 command (AddNotification) PTM_U, // PTM service (Used for accessing various console info, such as battery, shell and pedometer state) PTM_SYSM, // PTM system service PTM_PLAY, // PTM Play service, used for retrieving play history + PTM_GETS, // PTM RTC service (GetSystemTime) SOC, // Socket service SSL, // SSL service (Totally didn't expect that) Y2R, // Also does camera stuff @@ -61,17 +65,17 @@ namespace KernelHandles { }; // Returns whether "handle" belongs to one of the OS services - static constexpr bool isServiceHandle(Handle handle) { + static constexpr bool isServiceHandle(HorizonHandle handle) { return handle >= MinServiceHandle && handle <= MaxServiceHandle; } // Returns whether "handle" belongs to one of the OS services' shared memory areas - static constexpr bool isSharedMemHandle(Handle handle) { + static constexpr bool isSharedMemHandle(HorizonHandle handle) { return handle >= MinSharedMemHandle && handle <= MaxSharedMemHandle; } // Returns the name of a handle as a string based on the given handle - static const char* getServiceName(Handle handle) { + static const char* getServiceName(HorizonHandle handle) { switch (handle) { case AC: return "AC"; case ACT: return "ACT"; @@ -82,6 +86,8 @@ namespace KernelHandles { case CECD: return "CECD"; case CFG_U: return "CFG:U"; case CFG_I: return "CFG:I"; + case CFG_S: return "CFG:S"; + case CFG_NOR: return "CFG:NOR"; case CSND: return "CSND"; case DSP: return "DSP"; case DLP_SRVR: return "DLP::SRVR"; @@ -97,13 +103,16 @@ namespace KernelHandles { case MCU_HWC: return "MCU::HWC"; case MIC: return "MIC"; case NDM: return "NDM"; + case NEWS_S: return "NEWS_S"; case NEWS_U: return "NEWS_U"; + case NWM_EXT: return "nwm::EXT"; case NWM_UDS: return "nwm::UDS"; case NFC: return "NFC"; case NIM: return "NIM"; case PTM_U: return "PTM:U"; case PTM_SYSM: return "PTM:SYSM"; case PTM_PLAY: return "PTM:PLAY"; + case PTM_GETS: return "PTM:GETS"; case SOC: return "SOC"; case SSL: return "SSL"; case Y2R: return "Y2R"; diff --git a/include/kernel/kernel.hpp b/include/kernel/kernel.hpp index e0c0651b..abc508ac 100644 --- a/include/kernel/kernel.hpp +++ b/include/kernel/kernel.hpp @@ -18,6 +18,8 @@ class CPU; struct Scheduler; class Kernel { + using Handle = HorizonHandle; + std::span regs; CPU& cpu; Memory& mem; diff --git a/include/kernel/kernel_types.hpp b/include/kernel/kernel_types.hpp index a68ef8d5..a3a60c34 100644 --- a/include/kernel/kernel_types.hpp +++ b/include/kernel/kernel_types.hpp @@ -47,7 +47,7 @@ enum class ProcessorID : s32 { struct AddressArbiter {}; struct ResourceLimits { - Handle handle; + HorizonHandle handle; s32 currentCommit = 0; }; @@ -91,6 +91,8 @@ struct Port { }; struct Session { + using Handle = HorizonHandle; + Handle portHandle; // The port this session is subscribed to Session(Handle portHandle) : portHandle(portHandle) {} }; @@ -109,6 +111,8 @@ enum class ThreadStatus { }; struct Thread { + using Handle = HorizonHandle; + u32 initialSP; // Initial r13 value u32 entrypoint; // Initial r15 value u32 priority; @@ -161,6 +165,8 @@ static const char* kernelObjectTypeToString(KernelObjectType t) { } struct Mutex { + using Handle = HorizonHandle; + u64 waitlist; // Refer to the getWaitlist function below for documentation Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked Handle handle; // Handle of the mutex itself @@ -203,6 +209,8 @@ struct MemoryBlock { // Generic kernel object class struct KernelObject { + using Handle = HorizonHandle; + Handle handle = 0; // A u32 the OS will use to identify objects void* data = nullptr; KernelObjectType type; diff --git a/include/loader/ncch.hpp b/include/loader/ncch.hpp index 8e35643b..92ad5040 100644 --- a/include/loader/ncch.hpp +++ b/include/loader/ncch.hpp @@ -50,6 +50,7 @@ struct NCCH { static constexpr u64 mediaUnit = 0x200; u64 size = 0; // Size of NCCH converted to bytes + u64 saveDataSize = 0; u32 stackSize = 0; u32 bssSize = 0; u32 exheaderSize = 0; @@ -64,8 +65,6 @@ struct NCCH { // Contents of the .code file in the ExeFS std::vector codeFile; - // Contains of the cart's save data - std::vector saveData; // The cart region. Only the CXI's region matters to us. Necessary to get past region locking std::optional region = std::nullopt; std::vector smdh; @@ -78,7 +77,7 @@ struct NCCH { bool hasExeFS() { return exeFS.size != 0; } bool hasRomFS() { return romFS.size != 0; } bool hasCode() { return codeFile.size() != 0; } - bool hasSaveData() { return saveData.size() != 0; } + bool hasSaveData() { return saveDataSize != 0; } // Parse SMDH for region info and such. Returns false on failure, true on success bool parseSMDH(const std::vector &smdh); diff --git a/include/logger.hpp b/include/logger.hpp index 4fc521b6..626025fa 100644 --- a/include/logger.hpp +++ b/include/logger.hpp @@ -65,6 +65,7 @@ namespace Log { static Logger nwmUdsLogger; static Logger nimLogger; static Logger ndmLogger; + static Logger nsLogger; static Logger ptmLogger; static Logger socLogger; static Logger sslLogger; diff --git a/include/memory.hpp b/include/memory.hpp index 33ccbae5..bd002c54 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -102,6 +102,8 @@ namespace KernelMemoryTypes { } class Memory { + using Handle = HorizonHandle; + u8* fcram; u8* dspRam; // Provided to us by Audio u8* vram; // Provided to the memory class by the GPU class @@ -213,8 +215,14 @@ private: } enum class BatteryLevel { - Empty = 0, AlmostEmpty, OneBar, TwoBars, ThreeBars, FourBars + Empty = 0, + AlmostEmpty, + OneBar, + TwoBars, + ThreeBars, + FourBars, }; + u8 getBatteryState(bool adapterConnected, bool charging, BatteryLevel batteryLevel) { u8 value = static_cast(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5 if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected @@ -290,5 +298,5 @@ private: bool allocateMainThreadStack(u32 size); Regions getConsoleRegion(); - void copySharedFont(u8* ptr); + void copySharedFont(u8* ptr, u32 vaddr); }; diff --git a/include/panda_qt/cheats_window.hpp b/include/panda_qt/cheats_window.hpp index c82b2bd8..93228d5e 100644 --- a/include/panda_qt/cheats_window.hpp +++ b/include/panda_qt/cheats_window.hpp @@ -1,6 +1,13 @@ #pragma once #include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -24,3 +31,60 @@ class CheatsWindow final : public QWidget { std::filesystem::path cheatPath; Emulator* emu; }; + +struct CheatMetadata { + u32 handle = Cheats::badCheatHandle; + std::string name = "New cheat"; + std::string code; + bool enabled = true; +}; + +class CheatEntryWidget : public QWidget { + Q_OBJECT + + public: + CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent); + + void Update() { + name->setText(metadata.name.c_str()); + enabled->setChecked(metadata.enabled); + update(); + } + + void Remove() { + emu->getCheats().removeCheat(metadata.handle); + cheatList->takeItem(cheatList->row(listItem)); + deleteLater(); + } + + const CheatMetadata& getMetadata() { return metadata; } + void setMetadata(const CheatMetadata& metadata) { this->metadata = metadata; } + + private: + void checkboxChanged(int state); + void editClicked(); + + Emulator* emu; + CheatMetadata metadata; + u32 handle; + QLabel* name; + QCheckBox* enabled; + QListWidget* cheatList; + QListWidgetItem* listItem; +}; + +class CheatEditDialog : public QDialog { + Q_OBJECT + + public: + CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry); + + void accepted(); + void rejected(); + + private: + Emulator* emu; + CheatEntryWidget& cheatEntry; + QTextEdit* codeEdit; + QLineEdit* nameEdit; +}; \ No newline at end of file diff --git a/include/panda_qt/config_window.hpp b/include/panda_qt/config_window.hpp index 4a523879..1d37a8ca 100644 --- a/include/panda_qt/config_window.hpp +++ b/include/panda_qt/config_window.hpp @@ -1,30 +1,58 @@ #pragma once #include +#include #include #include +#include #include +#include +#include #include #include +#include +#include +#include + +#include "emulator.hpp" +#include "frontend_settings.hpp" class ConfigWindow : public QDialog { Q_OBJECT private: - enum class Theme : int { - System = 0, - Light = 1, - Dark = 2, - GreetingsCat = 3, - Cream = 4, - }; + using ConfigCallback = std::function; + using MainWindowCallback = std::function; - Theme currentTheme; - QComboBox* themeSelect = nullptr; + using Theme = FrontendSettings::Theme; + using WindowIcon = FrontendSettings::WindowIcon; - void setTheme(Theme theme); + QTextEdit* helpText = nullptr; + QListWidget* widgetList = nullptr; + QStackedWidget* widgetContainer = nullptr; + + static constexpr size_t settingWidgetCount = 6; + std::array helpTexts; + + // The config class holds a copy of the emulator config which it edits and sends + // over to the emulator in a thread-safe manner + EmulatorConfig config; + + ConfigCallback updateConfig; + MainWindowCallback getMainWindow; + + void addWidget(QWidget* widget, QString title, QString icon, QString helpText); + void setTheme(FrontendSettings::Theme theme); + void setIcon(FrontendSettings::WindowIcon icon); + + QComboBox* createLanguageSelect(); public: - ConfigWindow(QWidget* parent = nullptr); + ConfigWindow(ConfigCallback configCallback, MainWindowCallback windowCallback, const EmulatorConfig& config, QWidget* parent = nullptr); ~ConfigWindow(); + + EmulatorConfig& getConfig() { return config; } + + private: + Emulator* emu; }; diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index 831074a2..70e3ef75 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -50,6 +50,8 @@ class MainWindow : public QMainWindow { PressTouchscreen, ReleaseTouchscreen, ReloadUbershader, + SetScreenSize, + UpdateConfig, }; // Tagged union representing our message queue messages @@ -81,6 +83,11 @@ class MainWindow : public QMainWindow { u16 x; u16 y; } touchscreen; + + struct { + u32 width; + u32 height; + } screenSize; }; }; @@ -95,7 +102,7 @@ class MainWindow : public QMainWindow { QMenuBar* menuBar = nullptr; InputMappings keyboardMappings; - ScreenWidget screen; + ScreenWidget* screen; AboutWindow* aboutWindow; ConfigWindow* configWindow; CheatsWindow* cheatsEditor; @@ -116,12 +123,15 @@ class MainWindow : public QMainWindow { void showAboutMenu(); void initControllers(); void pollControllers(); + void setupControllerSensors(SDL_GameController* controller); void sendMessage(const EmulatorMessage& message); void dispatchMessage(const EmulatorMessage& message); + void loadTranslation(); // Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer bool usingGL = false; bool usingVk = false; + bool usingMtl = false; // Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard // This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state @@ -133,12 +143,18 @@ class MainWindow : public QMainWindow { MainWindow(QApplication* app, QWidget* parent = nullptr); ~MainWindow(); + void closeEvent(QCloseEvent* event) override; void keyPressEvent(QKeyEvent* event) override; void keyReleaseEvent(QKeyEvent* event) override; + void mousePressEvent(QMouseEvent* event) override; void mouseReleaseEvent(QMouseEvent* event) override; + void mouseMoveEvent(QMouseEvent* event) override; void loadLuaScript(const std::string& code); void reloadShader(const std::string& shader); void editCheat(u32 handle, const std::vector& cheat, const std::function& callback); + + void handleScreenResize(u32 width, u32 height); + void handleTouchscreenPress(QMouseEvent* event); }; diff --git a/include/panda_qt/screen.hpp b/include/panda_qt/screen.hpp index dcff3e90..1ed4966b 100644 --- a/include/panda_qt/screen.hpp +++ b/include/panda_qt/screen.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include "gl/context.h" @@ -10,15 +11,28 @@ class ScreenWidget : public QWidget { Q_OBJECT public: - ScreenWidget(QWidget* parent = nullptr); + using ResizeCallback = std::function; + + ScreenWidget(ResizeCallback resizeCallback, QWidget* parent = nullptr); + void resizeEvent(QResizeEvent* event) override; + // Called by the emulator thread for resizing the actual GL surface, since the emulator thread owns the GL context + void resizeSurface(u32 width, u32 height); + GL::Context* getGLContext() { return glContext.get(); } // Dimensions of our output surface u32 surfaceWidth = 0; u32 surfaceHeight = 0; + WindowInfo windowInfo; + + // Cached "previous" dimensions, used when resizing our window + u32 previousWidth = 0; + u32 previousHeight = 0; private: std::unique_ptr glContext = nullptr; + ResizeCallback resizeCallback; + bool createGLContext(); qreal devicePixelRatioFromScreen() const; diff --git a/include/panda_sdl/frontend_sdl.hpp b/include/panda_sdl/frontend_sdl.hpp index dd6ab6c0..cbd0b88e 100644 --- a/include/panda_sdl/frontend_sdl.hpp +++ b/include/panda_sdl/frontend_sdl.hpp @@ -23,6 +23,8 @@ class FrontendSDL { SDL_GameController* gameController = nullptr; InputMappings keyboardMappings; + u32 windowWidth = 400; + u32 windowHeight = 480; int gameControllerID; bool programRunning = true; @@ -35,4 +37,6 @@ class FrontendSDL { // And so the user can still use the keyboard to control the analog bool keyboardAnalogX = false; bool keyboardAnalogY = false; + + void setupControllerSensors(SDL_GameController* controller); }; \ No newline at end of file diff --git a/include/renderdoc.hpp b/include/renderdoc.hpp new file mode 100644 index 00000000..9c7de1e3 --- /dev/null +++ b/include/renderdoc.hpp @@ -0,0 +1,73 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include + +#include "helpers.hpp" + +#ifdef PANDA3DS_ENABLE_RENDERDOC +namespace Renderdoc { + // Loads renderdoc dynamic library module. + void loadRenderdoc(); + + // Begins a capture if a renderdoc instance is attached. + void startCapture(); + + // Ends current renderdoc capture. + void endCapture(); + + // Triggers capturing process. + void triggerCapture(); + + // Sets output directory for captures + void setOutputDir(const std::string& path, const std::string& prefix); + + // Returns whether Renderdoc has been loaded + bool isLoaded(); + + // Returns whether we've compiled with Renderdoc support + static constexpr bool isSupported() { return true; } +} // namespace Renderdoc +#else +namespace Renderdoc { + static void loadRenderdoc() {} + static void startCapture() { Helpers::panic("Tried to start a Renderdoc capture while support for renderdoc is disabled"); } + static void endCapture() { Helpers::panic("Tried to end a Renderdoc capture while support for renderdoc is disabled"); } + static void triggerCapture() { Helpers::panic("Tried to trigger a Renderdoc capture while support for renderdoc is disabled"); } + static void setOutputDir(const std::string& path, const std::string& prefix) {} + static constexpr bool isSupported() { return false; } + static constexpr bool isLoaded() { return false; } +} // namespace Renderdoc +#endif + +namespace Renderdoc { + // RAII scope class that encloses a Renderdoc capture, as long as it's triggered by triggerCapture + struct Scope { + Scope() { Renderdoc::startCapture(); } + ~Scope() { Renderdoc::endCapture(); } + + Scope(const Scope&) = delete; + Scope& operator=(const Scope&) = delete; + + Scope(Scope&&) = delete; + Scope& operator=(const Scope&&) = delete; + }; + + // RAII scope class that encloses a Renderdoc capture. Unlike regular Scope it doesn't wait for a trigger, it will always issue the capture + // trigger on its own and take a capture + struct InstantScope { + InstantScope() { + Renderdoc::triggerCapture(); + Renderdoc::startCapture(); + } + + ~InstantScope() { Renderdoc::endCapture(); } + + InstantScope(const InstantScope&) = delete; + InstantScope& operator=(const InstantScope&) = delete; + + InstantScope(InstantScope&&) = delete; + InstantScope& operator=(const InstantScope&&) = delete; + }; +} // namespace Renderdoc diff --git a/include/renderer.hpp b/include/renderer.hpp index 17812bcf..b458ecce 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -1,9 +1,10 @@ #pragma once #include +#include #include #include -#include +#include "PICA/draw_acceleration.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" #include "helpers.hpp" @@ -17,12 +18,16 @@ enum class RendererType : s8 { Null = 0, OpenGL = 1, Vulkan = 2, - Software = 3, + Metal = 3, + Software = 4, }; -class GPU; +struct EmulatorConfig; struct SDL_Window; +class GPU; +class ShaderUnit; + class Renderer { protected: GPU& gpu; @@ -46,6 +51,8 @@ class Renderer { u32 outputWindowWidth = 400; u32 outputWindowHeight = 240 * 2; + EmulatorConfig* emulatorConfig = nullptr; + public: Renderer(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); virtual ~Renderer(); @@ -74,6 +81,16 @@ class Renderer { virtual std::string getUbershader() { return ""; } virtual void setUbershader(const std::string& shader) {} + // Only relevant for OpenGL renderer and other OpenGL-based backends (eg software) + // Called to notify the core to use OpenGL ES and not desktop GL + virtual void setupGLES() {} + + // This function is called on every draw call before parsing vertex data. + // It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between + // ubershaders and shadergen, and so on. + // Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU + virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { return false; } + // Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window #ifdef PANDA3DS_FRONTEND_QT virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); } @@ -99,4 +116,6 @@ class Renderer { outputWindowWidth = width; outputWindowHeight = height; } + + void setConfig(EmulatorConfig* config) { emulatorConfig = config; } }; diff --git a/include/renderer_gl/gl_driver.hpp b/include/renderer_gl/gl_driver.hpp new file mode 100644 index 00000000..4a0b3727 --- /dev/null +++ b/include/renderer_gl/gl_driver.hpp @@ -0,0 +1,13 @@ +#pragma once + +// Information about our OpenGL/OpenGL ES driver that we should keep track of +// Stuff like whether specific extensions are supported, and potentially things like OpenGL context information +namespace OpenGL { + struct Driver { + bool usingGLES = false; + bool supportsExtFbFetch = false; + bool supportsArmFbFetch = false; + + bool supportFbFetch() const { return supportsExtFbFetch || supportsArmFbFetch; } + }; +} // namespace OpenGL \ No newline at end of file diff --git a/include/renderer_gl/gl_state.hpp b/include/renderer_gl/gl_state.hpp index 69960f1e..4085cabc 100644 --- a/include/renderer_gl/gl_state.hpp +++ b/include/renderer_gl/gl_state.hpp @@ -38,11 +38,14 @@ struct GLStateManager { GLuint stencilMask; GLuint boundVAO; - GLuint boundVBO; GLuint currentProgram; + GLuint boundUBO; GLenum depthFunc; GLenum logicOp; + GLenum blendEquationRGB, blendEquationAlpha; + GLenum blendFuncSourceRGB, blendFuncSourceAlpha; + GLenum blendFuncDestRGB, blendFuncDestAlpha; void reset(); void resetBlend(); @@ -51,7 +54,7 @@ struct GLStateManager { void resetColourMask(); void resetDepth(); void resetVAO(); - void resetVBO(); + void resetBuffers(); void resetProgram(); void resetScissor(); void resetStencil(); @@ -169,13 +172,6 @@ struct GLStateManager { } } - void bindVBO(GLuint handle) { - if (boundVBO != handle) { - boundVBO = handle; - glBindBuffer(GL_ARRAY_BUFFER, handle); - } - } - void useProgram(GLuint handle) { if (currentProgram != handle) { currentProgram = handle; @@ -183,8 +179,14 @@ struct GLStateManager { } } + void bindUBO(GLuint handle) { + if (boundUBO != handle) { + boundUBO = handle; + glBindBuffer(GL_UNIFORM_BUFFER, boundUBO); + } + } + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } - void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } void setColourMask(bool r, bool g, bool b, bool a) { @@ -224,6 +226,41 @@ struct GLStateManager { } void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast(func)); } + + // Counterpart to glBlendEquationSeparate + void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) { + if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) { + blendEquationRGB = modeRGB; + blendEquationAlpha = modeAlpha; + + glBlendEquationSeparate(modeRGB, modeAlpha); + } + } + + // Counterpart to glBlendFuncSeparate + void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) { + if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha || + blendFuncDestAlpha != destAlpha) { + + blendFuncSourceRGB = sourceRGB; + blendFuncDestRGB = destRGB; + blendFuncSourceAlpha = sourceAlpha; + blendFuncDestAlpha = destAlpha; + + glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha); + } + } + + // Counterpart to regular glBlendEquation + void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); } + + void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) { + setBlendEquation(static_cast(modeRGB), static_cast(modeAlpha)); + } + + void setBlendEquation(OpenGL::BlendEquation mode) { + setBlendEquation(static_cast(mode)); + } }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index c947583e..a862cd26 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,11 +1,23 @@ #pragma once #include +#include +#include +#include +#include #include +#include +#include #include "PICA/float_types.hpp" +#include "PICA/pica_frag_config.hpp" +#include "PICA/pica_hash.hpp" +#include "PICA/pica_vert_config.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" +#include "PICA/shader_gen.hpp" +#include "gl/stream_buffer.h" +#include "gl_driver.hpp" #include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" @@ -22,27 +34,48 @@ class RendererGL final : public Renderer { OpenGL::Program triangleProgram; OpenGL::Program displayProgram; - OpenGL::VertexArray vao; + // VAO for when not using accelerated vertex shaders. Contains attribute declarations matching to the PICA fixed function fragment attributes + OpenGL::VertexArray defaultVAO; + // VAO for when using accelerated vertex shaders. The PICA vertex shader inputs are passed as attributes without CPU processing. + OpenGL::VertexArray hwShaderVAO; OpenGL::VertexBuffer vbo; - // TEV configuration uniform locations - GLint textureEnvSourceLoc = -1; - GLint textureEnvOperandLoc = -1; - GLint textureEnvCombinerLoc = -1; - GLint textureEnvColorLoc = -1; - GLint textureEnvScaleLoc = -1; + // Data + struct { + // TEV configuration uniform locations + GLint textureEnvSourceLoc = -1; + GLint textureEnvOperandLoc = -1; + GLint textureEnvCombinerLoc = -1; + GLint textureEnvColorLoc = -1; + GLint textureEnvScaleLoc = -1; - // Uniform of PICA registers - GLint picaRegLoc = -1; + // Uniform of PICA registers + GLint picaRegLoc = -1; - // Depth configuration uniform locations - GLint depthOffsetLoc = -1; - GLint depthScaleLoc = -1; - GLint depthmapEnableLoc = -1; + // Depth configuration uniform locations + GLint depthOffsetLoc = -1; + GLint depthScaleLoc = -1; + GLint depthmapEnableLoc = -1; + } ubershaderData; float oldDepthScale = -1.0; float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; + // Set by prepareForDraw, tells us whether the current draw is using hw-accelerated shader + bool usingAcceleratedShader = false; + bool performIndexedRender = false; + bool usingShortIndices = false; + + // Set by prepareForDraw, metadata for indexed renders + GLuint minimumIndex = 0; + GLuint maximumIndex = 0; + void* hwIndexBufferOffset = nullptr; + + // When doing hw shaders, we cache which attributes are enabled in our VAO to avoid having to enable/disable all attributes on each draw + u32 previousAttributeMask = 0; + + // Cached pointer to the current vertex shader when using HW accelerated shaders + OpenGL::Shader* generatedVertexShader = nullptr; SurfaceCache depthBufferCache; SurfaceCache colourBufferCache; @@ -53,25 +86,82 @@ class RendererGL final : public Renderer { OpenGL::VertexBuffer dummyVBO; OpenGL::Texture screenTexture; - GLuint lightLUTTextureArray; + OpenGL::Texture LUTTexture; OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; + // The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation + // We can compile this once and then link it with all other generated fragment shaders + OpenGL::Shader defaultShadergenVs; + GLuint shadergenFragmentUBO; + // UBO for uploading the PICA uniforms when using hw shaders + GLuint hwShaderUniformUBO; + + using StreamBuffer = OpenGLStreamBuffer; + std::unique_ptr hwVertexBuffer; + std::unique_ptr hwIndexBuffer; + + // Cache of fixed attribute values so that we don't do any duplicate updates + std::array, 16> fixedAttrValues; + + // Cached recompiled fragment shader + struct CachedProgram { + OpenGL::Program program; + }; + + struct ShaderCache { + std::unordered_map> vertexShaderCache; + std::unordered_map fragmentShaderCache; + + // Program cache indexed by GLuints for the vertex and fragment shader to use + // Top 32 bits are the vertex shader GLuint, bottom 32 bits are the fs GLuint + std::unordered_map programCache; + + void clear() { + for (auto& it : programCache) { + CachedProgram& cachedProgram = it.second; + cachedProgram.program.free(); + } + + for (auto& it : vertexShaderCache) { + if (it.second.has_value()) { + it.second->free(); + } + } + + for (auto& it : fragmentShaderCache) { + it.second.free(); + } + + programCache.clear(); + vertexShaderCache.clear(); + fragmentShaderCache.clear(); + } + }; + ShaderCache shaderCache; OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); + OpenGL::Program& getSpecializedShader(); + + PICA::ShaderGen::FragmentGenerator fragShaderGen; + OpenGL::Driver driverInfo; MAKE_LOG_FUNCTION(log, rendererLogger) void setupBlending(); void setupStencilTest(bool stencilEnable); void bindDepthBuffer(); - void setupTextureEnvState(); + void setupUbershaderTexEnv(); void bindTexturesToSlots(); void updateLightingLUT(); + void updateFogLUT(); void initGraphicsContextInternal(); + void accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel); + void compileDisplayShader(); + public: RendererGL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) - : Renderer(gpu, internalRegs, externalRegs) {} + : Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {} ~RendererGL() override; void reset() override; @@ -80,12 +170,14 @@ class RendererGL final : public Renderer { void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; - void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices + void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices void deinitGraphicsContext() override; virtual bool supportsShaderReload() override { return true; } virtual std::string getUbershader() override; virtual void setUbershader(const std::string& shader) override; + virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override; + virtual void setupGLES() override; std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); @@ -100,4 +192,4 @@ class RendererGL final : public Renderer { // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; -}; +}; \ No newline at end of file diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 5323741f..fb7c71a5 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -19,8 +19,6 @@ template class SurfaceCache { // Vanilla std::optional can't hold actual references using OptionalRef = std::optional>; - static_assert(std::is_same() || std::is_same() || - std::is_same(), "Invalid surface type"); size_t size; size_t evictionIndex; diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp new file mode 100644 index 00000000..1fa47f42 --- /dev/null +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -0,0 +1,74 @@ +#pragma once + +#include + +#include "objc_helper.hpp" +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + struct BlitPipelineHash { + // Formats + ColorFmt colorFmt; + DepthFmt depthFmt; + }; + + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class BlitPipelineCache { + public: + BlitPipelineCache() = default; + + ~BlitPipelineCache() { + reset(); + vertexFunction->release(); + fragmentFunction->release(); + } + + void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { + device = dev; + vertexFunction = vert; + fragmentFunction = frag; + } + + MTL::RenderPipelineState* get(BlitPipelineHash hash) { + u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; + auto& pipeline = pipelineCache[intHash]; + if (!pipeline) { + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Blit pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + } + + private: + std::map pipelineCache; + + MTL::Device* device; + MTL::Function* vertexFunction; + MTL::Function* fragmentFunction; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp new file mode 100644 index 00000000..562e6b79 --- /dev/null +++ b/include/renderer_mtl/mtl_command_encoder.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include + +namespace Metal { + struct RenderState { + MTL::RenderPipelineState* renderPipelineState = nullptr; + MTL::DepthStencilState* depthStencilState = nullptr; + MTL::Texture* textures[3] = {nullptr}; + MTL::SamplerState* samplerStates[3] = {nullptr}; + }; + + class CommandEncoder { + public: + void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { + renderCommandEncoder = rce; + + // Reset the render state + renderState = RenderState{}; + } + + // Resource binding + void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { + if (renderPipelineState != renderState.renderPipelineState) { + renderCommandEncoder->setRenderPipelineState(renderPipelineState); + renderState.renderPipelineState = renderPipelineState; + } + } + + void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { + if (depthStencilState != renderState.depthStencilState) { + renderCommandEncoder->setDepthStencilState(depthStencilState); + renderState.depthStencilState = depthStencilState; + } + } + + void setFragmentTexture(MTL::Texture* texture, u32 index) { + if (texture != renderState.textures[index]) { + renderCommandEncoder->setFragmentTexture(texture, index); + renderState.textures[index] = texture; + } + } + + void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { + if (samplerState != renderState.samplerStates[index]) { + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + renderState.samplerStates[index] = samplerState; + } + } + + private: + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + + RenderState renderState; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_common.hpp b/include/renderer_mtl/mtl_common.hpp new file mode 100644 index 00000000..a148520f --- /dev/null +++ b/include/renderer_mtl/mtl_common.hpp @@ -0,0 +1,6 @@ +#pragma once + +#include + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp new file mode 100644 index 00000000..8f7256a9 --- /dev/null +++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp @@ -0,0 +1,80 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + struct DepthStencilHash { + u32 stencilConfig; + u16 stencilOpConfig; + bool depthStencilWrite; + u8 depthFunc; + }; + + class DepthStencilCache { + public: + DepthStencilCache() = default; + + ~DepthStencilCache() { reset(); } + + void set(MTL::Device* dev) { device = dev; } + + MTL::DepthStencilState* get(DepthStencilHash hash) { + u64 intHash = + ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; + auto& depthStencilState = depthStencilCache[intHash]; + if (!depthStencilState) { + MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); + desc->setDepthWriteEnabled(hash.depthStencilWrite); + desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); + + const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); + MTL::StencilDescriptor* stencilDesc = nullptr; + if (stencilEnable) { + const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); + const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); + + const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; + + const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); + const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); + const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); + + stencilDesc = MTL::StencilDescriptor::alloc()->init(); + stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); + stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); + stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); + stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); + stencilDesc->setReadMask(stencilRefMask); + stencilDesc->setWriteMask(stencilBufferMask); + + desc->setFrontFaceStencil(stencilDesc); + desc->setBackFaceStencil(stencilDesc); + } + + depthStencilState = device->newDepthStencilState(desc); + + desc->release(); + if (stencilDesc) { + stencilDesc->release(); + } + } + + return depthStencilState; + } + + void reset() { + for (auto& pair : depthStencilCache) { + pair.second->release(); + } + depthStencilCache.clear(); + } + + private: + std::map depthStencilCache; + MTL::Device* device; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp new file mode 100644 index 00000000..7178785e --- /dev/null +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -0,0 +1,162 @@ +#pragma once + +#include + +#include "objc_helper.hpp" +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + struct DrawFragmentFunctionHash { + u32 lightingConfig1; // 32 bits (TODO: check this) + bool lightingEnabled; // 1 bit + u8 lightingNumLights; // 3 bits + // | ref | func | on | + u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) + }; + + inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { + if (!l.lightingEnabled && r.lightingEnabled) return true; + if (l.lightingNumLights < r.lightingNumLights) return true; + if (l.lightingConfig1 < r.lightingConfig1) return true; + if (l.alphaControl < r.alphaControl) return true; + + return false; + } + + struct DrawPipelineHash { // 56 bits + // Formats + ColorFmt colorFmt; // 3 bits + DepthFmt depthFmt; // 3 bits + + // Blending + bool blendEnabled; // 1 bit + // | functions | aeq | ceq | + u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) + u8 colorWriteMask; // 4 bits + + DrawFragmentFunctionHash fragHash; + }; + + inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { + if ((u32)l.colorFmt < (u32)r.colorFmt) return true; + if ((u32)l.depthFmt < (u32)r.depthFmt) return true; + if (!l.blendEnabled && r.blendEnabled) return true; + if (l.blendControl < r.blendControl) return true; + if (l.colorWriteMask < r.colorWriteMask) return true; + if (l.fragHash < r.fragHash) return true; + + return false; + } + + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class DrawPipelineCache { + public: + DrawPipelineCache() = default; + + ~DrawPipelineCache() { + reset(); + vertexDescriptor->release(); + vertexFunction->release(); + } + + void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { + device = dev; + library = lib; + vertexFunction = vert; + vertexDescriptor = vertDesc; + } + + MTL::RenderPipelineState* get(DrawPipelineHash hash) { + auto& pipeline = pipelineCache[hash]; + + if (!pipeline) { + auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; + if (!fragmentFunction) { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); + constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); + constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); + + NS::Error* error = nullptr; + fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + } + + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + desc->setVertexDescriptor(vertexDescriptor); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + MTL::ColorWriteMask writeMask = 0; + if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; + if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; + if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; + if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; + colorAttachment->setWriteMask(writeMask); + if (hash.blendEnabled) { + const u8 rgbEquation = hash.blendControl & 0x7; + const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); + + // Get blending functions + const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); + const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); + const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); + const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); + + colorAttachment->setBlendingEnabled(true); + colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); + colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); + colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); + colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); + colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); + colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); + } + + MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); + desc->setDepthAttachmentPixelFormat(depthFormat); + if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Draw pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + + for (auto& pair : fragmentFunctionCache) { + pair.second->release(); + } + fragmentFunctionCache.clear(); + } + + private: + std::map pipelineCache; + std::map fragmentFunctionCache; + + MTL::Device* device; + MTL::Library* library; + MTL::Function* vertexFunction; + MTL::VertexDescriptor* vertexDescriptor; + }; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp new file mode 100644 index 00000000..531dc73c --- /dev/null +++ b/include/renderer_mtl/mtl_lut_texture.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace Metal { + +class LutTexture { +public: + LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); + ~LutTexture(); + u32 getNextIndex(); + + MTL::Texture* getTexture() { return texture; } + u32 getCurrentIndex() { return currentIndex; } +private: + MTL::Texture* texture; + u32 currentIndex = 0; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_render_target.hpp b/include/renderer_mtl/mtl_render_target.hpp new file mode 100644 index 00000000..8f80ea64 --- /dev/null +++ b/include/renderer_mtl/mtl_render_target.hpp @@ -0,0 +1,91 @@ +#pragma once +#include +#include +#include + +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "objc_helper.hpp" +#include "opengl.hpp" +#include "pica_to_mtl.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + template + struct RenderTarget { + MTL::Device* device; + + u32 location; + Format_t format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + MTL::Texture* texture = nullptr; + + RenderTarget() : valid(false) {} + + RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + const u32 startOffset = (inputAddress - location) / sizePerPixel(format); + const u32 x0 = (startOffset % (size.x() * 8)) / 8; + const u32 y0 = (startOffset / (size.x() * 8)) * 8; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(RenderTarget& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate() { + MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; + if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); + } else if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); + } else { + panic("Invalid format type"); + } + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModePrivate); + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + + std::to_string(size.v()) + )); + descriptor->release(); + } + + void free() { + valid = false; + + if (texture) { + texture->release(); + } + } + + u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); } + }; + + using ColorRenderTarget = RenderTarget; + using DepthStencilRenderTarget = RenderTarget; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp new file mode 100644 index 00000000..93103091 --- /dev/null +++ b/include/renderer_mtl/mtl_texture.hpp @@ -0,0 +1,73 @@ +#pragma once + +#include +#include +#include + +#include "PICA/regs.hpp" +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "opengl.hpp" +#include "renderer_mtl/pica_to_mtl.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + struct Texture { + MTL::Device* device; + + u32 location; + u32 config; // Magnification/minification filter, wrapping configs, etc + PICA::TextureFmt format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + PICA::PixelFormatInfo formatInfo; + MTL::Texture* texture = nullptr; + MTL::SamplerState* sampler = nullptr; + + Texture() : valid(false) {} + + Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(Texture& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate(); + void setNewConfig(u32 newConfig); + void decodeTexture(std::span data); + void free(); + u64 sizeInBytes(); + + u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + + // Get the morton interleave offset of a texel based on its U and V values + static u32 mortonInterleave(u32 u, u32 v); + // Get the byte offset of texel (u, v) in the texture + static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); + static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); + + // Returns the format of this texture as a string + std::string_view formatToString() { return PICA::textureFormatToString(format); } + + // Returns the texel at coordinates (u, v) of an ETC1(A4) texture + // TODO: Make hasAlpha a template parameter + u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); + u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp new file mode 100644 index 00000000..b392389c --- /dev/null +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -0,0 +1,83 @@ +#pragma once + +#include + +#include "helpers.hpp" +#include "pica_to_mtl.hpp" + + +using namespace PICA; + +namespace Metal { + struct BufferHandle { + MTL::Buffer* buffer; + usize offset; + }; + + class VertexBufferCache { + // 128MB buffer for caching vertex data + static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024; + + public: + VertexBufferCache() = default; + + ~VertexBufferCache() { + endFrame(); + buffer->release(); + } + + void set(MTL::Device* dev) { + device = dev; + create(); + } + + void endFrame() { + ptr = 0; + for (auto buffer : additionalAllocations) { + buffer->release(); + } + additionalAllocations.clear(); + } + + BufferHandle get(const void* data, usize size) { + // If the vertex buffer is too large, just create a new one + if (ptr + size > CACHE_BUFFER_SIZE) { + MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); + newBuffer->setLabel(toNSString("Additional vertex buffer")); + additionalAllocations.push_back(newBuffer); + Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); + + return BufferHandle{newBuffer, 0}; + } + + // Copy the data into the buffer + std::memcpy((char*)buffer->contents() + ptr, data, size); + + auto oldPtr = ptr; + ptr += size; + + return BufferHandle{buffer, oldPtr}; + } + + void reset() { + endFrame(); + + if (buffer) { + buffer->release(); + create(); + } + } + + private: + MTL::Buffer* buffer = nullptr; + usize ptr = 0; + std::vector additionalAllocations; + + MTL::Device* device; + + void create() { + buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); + buffer->setLabel(toNSString("Shared vertex buffer")); + } + }; +} // namespace Metal diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp new file mode 100644 index 00000000..86992f1d --- /dev/null +++ b/include/renderer_mtl/objc_helper.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include + +#include "mtl_common.hpp" + +namespace Metal { + dispatch_data_t createDispatchData(const void* data, size_t size); +} // namespace Metal + +// Cast from std::string to NS::String* +inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); } \ No newline at end of file diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp new file mode 100644 index 00000000..715088b4 --- /dev/null +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -0,0 +1,152 @@ +#pragma once + +#include + +#include "PICA/regs.hpp" + +namespace PICA { + struct PixelFormatInfo { + MTL::PixelFormat pixelFormat; + size_t bytesPerTexel; + }; + + constexpr PixelFormatInfo pixelFormatInfos[14] = { + {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 + {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 + {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 + {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 + {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 + {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 + {MTL::PixelFormatRG8Unorm, 2}, // RG8 + {MTL::PixelFormatRGBA8Unorm, 4}, // I8 + {MTL::PixelFormatA8Unorm, 1}, // A8 + {MTL::PixelFormatABGR4Unorm, 2}, // IA4 + {MTL::PixelFormatABGR4Unorm, 2}, // I4 + {MTL::PixelFormatA8Unorm, 1}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 + }; + + inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast(format)]; } + + inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { + switch (format) { + case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? + case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? + case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; + } + } + + inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { + switch (format) { + case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; + case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; + case DepthFmt::Depth24: + return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats + // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead + case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; + } + } + + inline MTL::CompareFunction toMTLCompareFunc(u8 func) { + switch (func) { + case 0: return MTL::CompareFunctionNever; + case 1: return MTL::CompareFunctionAlways; + case 2: return MTL::CompareFunctionEqual; + case 3: return MTL::CompareFunctionNotEqual; + case 4: return MTL::CompareFunctionLess; + case 5: return MTL::CompareFunctionLessEqual; + case 6: return MTL::CompareFunctionGreater; + case 7: return MTL::CompareFunctionGreaterEqual; + default: Helpers::panic("Unknown compare function %u", func); + } + + return MTL::CompareFunctionAlways; + } + + inline MTL::BlendOperation toMTLBlendOperation(u8 op) { + switch (op) { + case 0: return MTL::BlendOperationAdd; + case 1: return MTL::BlendOperationSubtract; + case 2: return MTL::BlendOperationReverseSubtract; + case 3: return MTL::BlendOperationMin; + case 4: return MTL::BlendOperationMax; + case 5: return MTL::BlendOperationAdd; // Unused (same as 0) + case 6: return MTL::BlendOperationAdd; // Unused (same as 0) + case 7: return MTL::BlendOperationAdd; // Unused (same as 0) + default: Helpers::panic("Unknown blend operation %u", op); + } + + return MTL::BlendOperationAdd; + } + + inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { + switch (factor) { + case 0: return MTL::BlendFactorZero; + case 1: return MTL::BlendFactorOne; + case 2: return MTL::BlendFactorSourceColor; + case 3: return MTL::BlendFactorOneMinusSourceColor; + case 4: return MTL::BlendFactorDestinationColor; + case 5: return MTL::BlendFactorOneMinusDestinationColor; + case 6: return MTL::BlendFactorSourceAlpha; + case 7: return MTL::BlendFactorOneMinusSourceAlpha; + case 8: return MTL::BlendFactorDestinationAlpha; + case 9: return MTL::BlendFactorOneMinusDestinationAlpha; + case 10: return MTL::BlendFactorBlendColor; + case 11: return MTL::BlendFactorOneMinusBlendColor; + case 12: return MTL::BlendFactorBlendAlpha; + case 13: return MTL::BlendFactorOneMinusBlendAlpha; + case 14: return MTL::BlendFactorSourceAlphaSaturated; + case 15: return MTL::BlendFactorOne; // Undocumented + default: Helpers::panic("Unknown blend factor %u", factor); + } + + return MTL::BlendFactorOne; + } + + inline MTL::StencilOperation toMTLStencilOperation(u8 op) { + switch (op) { + case 0: return MTL::StencilOperationKeep; + case 1: return MTL::StencilOperationZero; + case 2: return MTL::StencilOperationReplace; + case 3: return MTL::StencilOperationIncrementClamp; + case 4: return MTL::StencilOperationDecrementClamp; + case 5: return MTL::StencilOperationInvert; + case 6: return MTL::StencilOperationIncrementWrap; + case 7: return MTL::StencilOperationDecrementWrap; + default: Helpers::panic("Unknown stencil operation %u", op); + } + + return MTL::StencilOperationKeep; + } + + inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { + switch (primType) { + case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; + case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; + case PrimType::TriangleFan: + Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + case PrimType::GeometryPrimitive: + return MTL::PrimitiveTypeTriangle; + } + } + + inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { + switch (addrMode) { + case 0: return MTL::SamplerAddressModeClampToEdge; + case 1: return MTL::SamplerAddressModeClampToBorderColor; + case 2: return MTL::SamplerAddressModeRepeat; + case 3: return MTL::SamplerAddressModeMirrorRepeat; + case 4: return MTL::SamplerAddressModeClampToEdge; + case 5: return MTL::SamplerAddressModeClampToBorderColor; + case 6: return MTL::SamplerAddressModeRepeat; + case 7: return MTL::SamplerAddressModeRepeat; + default: Helpers::panic("Unknown sampler address mode %u", addrMode); + } + + return MTL::SamplerAddressModeClampToEdge; + } +} // namespace PICA diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp new file mode 100644 index 00000000..bd5c3bf1 --- /dev/null +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -0,0 +1,207 @@ +#pragma once + +#include +#include + +#include "mtl_blit_pipeline_cache.hpp" +#include "mtl_command_encoder.hpp" +#include "mtl_depth_stencil_cache.hpp" +#include "mtl_draw_pipeline_cache.hpp" +#include "mtl_lut_texture.hpp" +#include "mtl_render_target.hpp" +#include "mtl_texture.hpp" +#include "mtl_vertex_buffer_cache.hpp" +#include "renderer.hpp" + + +// HACK: use the OpenGL cache +#include "../renderer_gl/surface_cache.hpp" + +class GPU; + +struct Color4 { + float r, g, b, a; +}; + +class RendererMTL final : public Renderer { + public: + RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); + ~RendererMTL() override; + + void reset() override; + void display() override; + void initGraphicsContext(SDL_Window* window) override; + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; + void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; + void drawVertices(PICA::PrimType primType, std::span vertices) override; + void screenshot(const std::string& name) override; + void deinitGraphicsContext() override; + +#ifdef PANDA3DS_FRONTEND_QT + virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {} +#endif + + private: + CA::MetalLayer* metalLayer; + + MTL::Device* device; + MTL::CommandQueue* commandQueue; + + Metal::CommandEncoder commandEncoder; + + // Libraries + MTL::Library* library; + + // Caches + SurfaceCache colorRenderTargetCache; + SurfaceCache depthStencilRenderTargetCache; + SurfaceCache textureCache; + Metal::BlitPipelineCache blitPipelineCache; + Metal::DrawPipelineCache drawPipelineCache; + Metal::DepthStencilCache depthStencilCache; + Metal::VertexBufferCache vertexBufferCache; + + // Resources + MTL::SamplerState* nearestSampler; + MTL::SamplerState* linearSampler; + MTL::Texture* nullTexture; + MTL::DepthStencilState* defaultDepthStencilState; + + Metal::LutTexture* lutLightingTexture; + Metal::LutTexture* lutFogTexture; + + // Pipelines + MTL::RenderPipelineState* displayPipeline; + // MTL::RenderPipelineState* copyToLutTexturePipeline; + + // Clears + std::map colorClearOps; + std::map depthClearOps; + std::map stencilClearOps; + + // Active state + MTL::CommandBuffer* commandBuffer = nullptr; + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + MTL::Texture* lastColorTexture = nullptr; + MTL::Texture* lastDepthTexture = nullptr; + + // Debug + std::string nextRenderPassName; + + void createCommandBufferIfNeeded() { + if (!commandBuffer) { + commandBuffer = commandQueue->commandBuffer(); + } + } + + void endRenderPass() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder = nullptr; + } + } + + void beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr + ); + + void commitCommandBuffer() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder->release(); + renderCommandEncoder = nullptr; + } + if (commandBuffer) { + commandBuffer->commit(); + // HACK + commandBuffer->waitUntilCompleted(); + commandBuffer->release(); + commandBuffer = nullptr; + } + } + + template + inline void clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, + SetClearDataT setClearData + ) { + bool beginRenderPass = (renderPassDescriptor == nullptr); + if (!renderPassDescriptor) { + renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + } + + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + setClearData(attachment, clearData); + attachment->setLoadAction(MTL::LoadActionClear); + attachment->setStoreAction(MTL::StoreActionStore); + + if (beginRenderPass) { + if (std::is_same::value) + beginRenderPassIfNeeded(renderPassDescriptor, true, texture); + else + beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); + } + } + + template + inline bool clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, + GetAttachmentT getAttachment, SetClearDataT setClearData + ) { + auto it = clearOps.find(texture); + if (it != clearOps.end()) { + clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); + clearOps.erase(it); + return true; + } + + if (renderPassDescriptor) { + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + attachment->setLoadAction(MTL::LoadActionLoad); + attachment->setStoreAction(MTL::StoreActionStore); + } + + return false; + } + + bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, colorClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, + [](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); } + ); + } + + bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, depthClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, + [](auto attachment, auto& depth) { attachment->setClearDepth(depth); } + ); + } + + bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, stencilClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, + [](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); } + ); + } + + std::optional getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true + ); + Metal::DepthStencilRenderTarget& getDepthRenderTarget(); + Metal::Texture& getTexture(Metal::Texture& tex); + void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); + void bindTexturesToSlots(); + void updateLightingLUT(MTL::RenderCommandEncoder* encoder); + void updateFogLUT(MTL::RenderCommandEncoder* encoder); + void textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect + ); +}; diff --git a/include/renderer_null/renderer_null.hpp b/include/renderer_null/renderer_null.hpp index bd8f17df..50a724d8 100644 --- a/include/renderer_null/renderer_null.hpp +++ b/include/renderer_null/renderer_null.hpp @@ -17,6 +17,10 @@ class RendererNull final : public Renderer { void screenshot(const std::string& name) override; void deinitGraphicsContext() override; + // Tell the GPU core that we'll handle vertex fetch & shader execution in the renderer in order to speed up execution. + // Of course, we don't do this and geometry is never actually processed, since this is the null renderer. + virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override { return true; }; + #ifdef PANDA3DS_FRONTEND_QT virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {} #endif diff --git a/include/sdl_sensors.hpp b/include/sdl_sensors.hpp new file mode 100644 index 00000000..e34721af --- /dev/null +++ b/include/sdl_sensors.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +#include "helpers.hpp" +#include "services/hid.hpp" + +// Convert SDL sensor readings to 3DS format +// We use the same code for Android as well, since the values we get from Android are in the same format as SDL (m/s^2 for acceleration, rad/s for +// rotation) +namespace Sensors::SDL { + // Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID + // Returns [pitch, roll, yaw] + static glm::vec3 convertRotation(glm::vec3 rotation) { + // Annoyingly, Android doesn't support the header yet so we define pi ourselves + static constexpr double pi = 3.141592653589793; + // Convert the rotation from rad/s to deg/s and scale by the gyroscope coefficient in HID + constexpr float scale = 180.f / pi * HIDService::gyroscopeCoeff; + // The axes are also inverted, so invert scale before the multiplication. + return rotation * -scale; + } + + static glm::vec3 convertAcceleration(float* data) { + // Set our cap to ~9 m/s^2. The 3DS sensors cap at -930 and +930, so values above this value will get clamped to 930 + // At rest (3DS laid flat on table), hardware reads around ~0 for x and z axis, and around ~480 for y axis due to gravity. + // This code tries to mimic this approximately, with offsets based on measurements from my DualShock 4. + static constexpr float accelMax = 9.f; + // We define standard gravity(g) ourself instead of using the SDL one in order for the code to work on Android too. + static constexpr float standardGravity = 9.80665f; + + s16 x = std::clamp(s16(data[0] / accelMax * 930.f), -930, +930); + s16 y = std::clamp(s16(data[1] / (standardGravity * accelMax) * 930.f - 350.f), -930, +930); + s16 z = std::clamp(s16((data[2] - 2.1f) / accelMax * 930.f), -930, +930); + + return glm::vec3(x, y, z); + } +} // namespace Sensors::SDL diff --git a/include/services/ac.hpp b/include/services/ac.hpp index 4ba53033..02775e18 100644 --- a/include/services/ac.hpp +++ b/include/services/ac.hpp @@ -8,6 +8,8 @@ #include "result/result.hpp" class ACService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::AC; Memory& mem; MAKE_LOG_FUNCTION(log, acLogger) @@ -17,6 +19,7 @@ class ACService { void closeAsync(u32 messagePointer); void createDefaultConfig(u32 messagePointer); void getConnectingInfraPriority(u32 messagePointer); + void getNZoneBeaconNotFoundEvent(u32 messagePointer); void getStatus(u32 messagePointer); void getLastErrorCode(u32 messagePointer); void getWifiStatus(u32 messagePointer); diff --git a/include/services/act.hpp b/include/services/act.hpp index 92c69c60..3fe68993 100644 --- a/include/services/act.hpp +++ b/include/services/act.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class ACTService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::ACT; Memory& mem; MAKE_LOG_FUNCTION(log, actLogger) @@ -15,7 +17,7 @@ class ACTService { void generateUUID(u32 messagePointer); void getAccountDataBlock(u32 messagePointer); -public: + public: ACTService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/am.hpp b/include/services/am.hpp index 672909ff..f72a5efc 100644 --- a/include/services/am.hpp +++ b/include/services/am.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class AMService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::AM; Memory& mem; MAKE_LOG_FUNCTION(log, amLogger) @@ -15,7 +17,7 @@ class AMService { void getPatchTitleInfo(u32 messagePointer); void listTitleInfo(u32 messagePointer); -public: + public: AMService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/apt.hpp b/include/services/apt.hpp index 48a59c2d..624151c1 100644 --- a/include/services/apt.hpp +++ b/include/services/apt.hpp @@ -12,7 +12,8 @@ class Kernel; enum class ConsoleModel : u32 { - Old3DS, New3DS + Old3DS, + New3DS, }; // https://www.3dbrew.org/wiki/NS_and_APT_Services#Command @@ -41,6 +42,8 @@ namespace APT::Transitions { } class APTService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::APT; Memory& mem; Kernel& kernel; @@ -99,7 +102,7 @@ class APTService { u32 screencapPostPermission; -public: + public: APTService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel), appletManager(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/boss.hpp b/include/services/boss.hpp index 769184e5..bf5cd88d 100644 --- a/include/services/boss.hpp +++ b/include/services/boss.hpp @@ -6,36 +6,46 @@ #include "result/result.hpp" class BOSSService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::BOSS; Memory& mem; MAKE_LOG_FUNCTION(log, bossLogger) // Service commands void cancelTask(u32 messagePointer); + void deleteNsData(u32 messagePointer); void initializeSession(u32 messagePointer); + void getAppNewFlag(u32 messagePointer); void getErrorCode(u32 messagePointer); + void getNsDataHeaderInfo(u32 messagePointer); void getNewArrivalFlag(u32 messagePointer); void getNsDataIdList(u32 messagePointer, u32 commandWord); + void getNsDataLastUpdated(u32 messagePointer); void getOptoutFlag(u32 messagePointer); - void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra + void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra void getTaskIdList(u32 messagePointer); void getTaskInfo(u32 messagePointer); void getTaskServiceStatus(u32 messagePointer); void getTaskState(u32 messagePointer); void getTaskStatus(u32 messagePointer); void getTaskStorageInfo(u32 messagePointer); + void readNsData(u32 messagePointer); void receiveProperty(u32 messagePointer); void registerNewArrivalEvent(u32 messagePointer); void registerStorageEntry(u32 messagePointer); void registerTask(u32 messagePointer); void sendProperty(u32 messagePointer); + void setAppNewFlag(u32 messagePointer); void setOptoutFlag(u32 messagePointer); + void startBgImmediate(u32 messagePointer); void startTask(u32 messagePointer); void unregisterStorage(u32 messagePointer); void unregisterTask(u32 messagePointer); s8 optoutFlag; -public: + + public: BOSSService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/cam.hpp b/include/services/cam.hpp index 60ede3b9..e5254997 100644 --- a/include/services/cam.hpp +++ b/include/services/cam.hpp @@ -12,6 +12,7 @@ class Kernel; class CAMService { + using Handle = HorizonHandle; using Event = std::optional; struct Port { diff --git a/include/services/cecd.hpp b/include/services/cecd.hpp index 656e38ad..4612c17b 100644 --- a/include/services/cecd.hpp +++ b/include/services/cecd.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -9,6 +10,8 @@ class Kernel; class CECDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::CECD; Memory& mem; Kernel& kernel; @@ -20,7 +23,7 @@ class CECDService { void getInfoEventHandle(u32 messagePointer); void openAndRead(u32 messagePointer); -public: + public: CECDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/cfg.hpp b/include/services/cfg.hpp index 7241a409..6e6f697a 100644 --- a/include/services/cfg.hpp +++ b/include/services/cfg.hpp @@ -1,5 +1,7 @@ #pragma once #include + +#include "config.hpp" #include "helpers.hpp" #include "logger.hpp" #include "memory.hpp" @@ -7,15 +9,19 @@ #include "result/result.hpp" class CFGService { + using Handle = HorizonHandle; + Memory& mem; - CountryCodes country = CountryCodes::US; // Default to USA + const EmulatorConfig& settings; + + CountryCodes country = CountryCodes::US; // Default to USA MAKE_LOG_FUNCTION(log, cfgLogger) void writeStringU16(u32 pointer, const std::u16string& string); // Service functions void getConfigInfoBlk2(u32 messagePointer); - void getConfigInfoBlk8(u32 messagePointer); + void getConfigInfoBlk8(u32 messagePointer, u32 commandWord); void getCountryCodeID(u32 messagePointer); void getLocalFriendCodeSeed(u32 messagePointer); void getRegionCanadaUSA(u32 messagePointer); @@ -23,19 +29,26 @@ class CFGService { void genUniqueConsoleHash(u32 messagePointer); void secureInfoGetByte101(u32 messagePointer); void secureInfoGetRegion(u32 messagePointer); + void setConfigInfoBlk4(u32 messagePointer); + void updateConfigNANDSavegame(u32 messagePointer); void translateCountryInfo(u32 messagePointer); + void isFangateSupported(u32 messagePointer); + + // cfg:nor functions + void norInitialize(u32 messagePointer); + void norReadData(u32 messagePointer); void getConfigInfo(u32 output, u32 blockID, u32 size, u32 permissionMask); -public: + public: enum class Type { - U, // cfg:u - I, // cfg:i - S, // cfg:s - NOR, // cfg:nor + U, // cfg:u + I, // cfg:i + S, // cfg:s + NOR, // cfg:nor }; - CFGService(Memory& mem) : mem(mem) {} + CFGService(Memory& mem, const EmulatorConfig& settings) : mem(mem), settings(settings) {} void reset(); void handleSyncRequest(u32 messagePointer, Type type); }; \ No newline at end of file diff --git a/include/services/csnd.hpp b/include/services/csnd.hpp index 8f6d60f8..93fa941d 100644 --- a/include/services/csnd.hpp +++ b/include/services/csnd.hpp @@ -10,6 +10,8 @@ class Kernel; class CSNDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::CSND; Memory& mem; Kernel& kernel; @@ -30,7 +32,5 @@ class CSNDService { void reset(); void handleSyncRequest(u32 messagePointer); - void setSharedMemory(u8* ptr) { - sharedMemory = ptr; - } + void setSharedMemory(u8* ptr) { sharedMemory = ptr; } }; \ No newline at end of file diff --git a/include/services/dlp_srvr.hpp b/include/services/dlp_srvr.hpp index 1e714283..ae9cc96f 100644 --- a/include/services/dlp_srvr.hpp +++ b/include/services/dlp_srvr.hpp @@ -8,6 +8,8 @@ // Please forgive me for how everything in this file is named // "dlp:SRVR" is not a nice name to work with class DlpSrvrService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::DLP_SRVR; Memory& mem; MAKE_LOG_FUNCTION(log, dlpSrvrLogger) @@ -15,7 +17,7 @@ class DlpSrvrService { // Service commands void isChild(u32 messagePointer); -public: + public: DlpSrvrService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/dsp.hpp b/include/services/dsp.hpp index 5cbd4fd5..3013c94d 100644 --- a/include/services/dsp.hpp +++ b/include/services/dsp.hpp @@ -10,14 +10,19 @@ #include "memory.hpp" #include "result/result.hpp" +struct EmulatorConfig; // Circular dependencies! class Kernel; class DSPService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::DSP; Memory& mem; Kernel& kernel; + const EmulatorConfig& config; Audio::DSPCore* dsp = nullptr; + MAKE_LOG_FUNCTION(log, dspServiceLogger) // Number of DSP pipes @@ -39,9 +44,12 @@ class DSPService { size_t totalEventCount; std::vector loadedComponent; + bool headphonesInserted = true; + // Service functions void convertProcessAddressFromDspDram(u32 messagePointer); // Nice function name void flushDataCache(u32 messagePointer); + void forceHeadphoneOut(u32 messagePointer); void getHeadphoneStatus(u32 messagePointer); void getSemaphoreEventHandle(u32 messagePointer); void invalidateDCache(u32 messagePointer); @@ -56,7 +64,7 @@ class DSPService { void writeProcessPipe(u32 messagePointer); public: - DSPService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} + DSPService(Memory& mem, Kernel& kernel, const EmulatorConfig& config) : mem(mem), kernel(kernel), config(config) {} void reset(); void handleSyncRequest(u32 messagePointer); void setDSPCore(Audio::DSPCore* pointer) { dsp = pointer; } @@ -82,4 +90,5 @@ class DSPService { void triggerInterrupt1(); ComponentDumpResult dumpComponent(const std::filesystem::path& path); + void printFirmwareInfo(); }; \ No newline at end of file diff --git a/include/services/dsp_firmware_db.hpp b/include/services/dsp_firmware_db.hpp new file mode 100644 index 00000000..bac11d73 --- /dev/null +++ b/include/services/dsp_firmware_db.hpp @@ -0,0 +1,76 @@ +#pragma once + +#include + +#include "helpers.hpp" + +namespace DSP { + struct FirmwareInfo { + using Hash = std::array; + + Hash hash; // Firmware hash (SHA-256) + u32 size; // Firmware size in bytes + + bool supportsAAC; // Does this firmware support AAC decoding? + const char* notes; // Miscellaneous notes about the firmware + + explicit constexpr FirmwareInfo(const Hash& hash, u32 size, bool supportsAAC, const char* notes) + : hash(hash), size(size), supportsAAC(supportsAAC), notes(notes) {} + }; + + static constexpr std::array firmwareDB = { + FirmwareInfo( + {0x47, 0xD6, 0x6C, 0xD2, 0x13, 0x1, 0xFF, 0x62, 0xAD, 0x16, 0x98, 0x2, 0x46, 0x67, 0xF3, 0x9, + 0xDA, 0x7, 0x20, 0x9E, 0xFB, 0xB, 0x6A, 0x81, 0x98, 0xFF, 0x9B, 0xE0, 0x51, 0x67, 0xC9, 0xA6}, + 48480, false, "Spotted in some versions of Activity Log potentially other apps" + ), + + FirmwareInfo( + {0xF5, 0xDA, 0x79, 0xE7, 0x24, 0x6C, 0x51, 0x9A, 0x28, 0x6C, 0x50, 0xC9, 0x9F, 0xA1, 0xE6, 0x4D, + 0xA5, 0x72, 0x96, 0x5F, 0xEA, 0x14, 0x20, 0xA7, 0x70, 0x90, 0x57, 0x42, 0x34, 0x6E, 0x18, 0xD1}, + 49674, false, "One of the most common firmwares. Found in NSMB2 and others" + ), + + FirmwareInfo( + {0x94, 0x4B, 0x40, 0xB5, 0x46, 0x93, 0xF4, 0xB1, 0xD9, 0x52, 0xBE, 0x84, 0x87, 0xE9, 0xE9, 0x1F, + 0x66, 0x7F, 0xC4, 0x89, 0xF8, 0x15, 0x79, 0xF, 0x3D, 0x3E, 0x89, 0x26, 0x5F, 0xE0, 0x89, 0xC4}, + 49800, false, "One of the most common firmwares. Found in Majora's Mask and others" + ), + + FirmwareInfo( + {0x8E, 0x21, 0x3F, 0x3E, 0x71, 0xD2, 0xE3, 0xE4, 0x5D, 0x11, 0x69, 0xBA, 0xC6, 0x46, 0x5A, 0x70, + 0xEA, 0xBE, 0xB2, 0x2B, 0x30, 0x3F, 0x1F, 0xA6, 0xD7, 0x67, 0x93, 0x70, 0xFF, 0xAD, 0xF, 0x54}, + 49756, false, "Fairly common firmware. Found in PSMD and others" + ), + + FirmwareInfo( + {0xA2, 0x6C, 0x74, 0xD1, 0xEF, 0x7F, 0x4F, 0xA5, 0xFF, 0xFF, 0xFB, 0xEC, 0x75, 0x8A, 0x40, 0x8D, + 0x8F, 0x22, 0x87, 0x72, 0x78, 0x1B, 0x81, 0x88, 0x86, 0x5F, 0x83, 0x4D, 0x1D, 0x90, 0x6B, 0xAA}, + 48804, false, "Spotted in MK7" + ), + + FirmwareInfo( + {0x75, 0x12, 0x70, 0xB2, 0x43, 0xB0, 0xCA, 0xFB, 0x51, 0x99, 0xF2, 0x98, 0x2, 0x2, 0xC9, 0xB4, + 0xC7, 0x7A, 0x67, 0x5E, 0xF0, 0x43, 0x8F, 0xD5, 0xA8, 0x9E, 0x83, 0xAA, 0xB9, 0xA8, 0x7, 0x9B}, + 48652, false, "One of the most common firmwares. Found in OoT, Pokemon Rumble Blast, and others" + ), + + FirmwareInfo( + {0xF2, 0x96, 0xE2, 0xE5, 0xEC, 0x34, 0x9F, 0x6A, 0x6C, 0xF3, 0xE1, 0xC7, 0xC, 0xDD, 0x65, 0xC2, + 0x2, 0x72, 0xB6, 0xE7, 0xFF, 0xE5, 0x57, 0x92, 0x69, 0x4E, 0x83, 0xAE, 0x24, 0xF1, 0x68, 0xBF}, + 217976, true, "Most common AAC-enabled firmware. Found in Rhythm Heaven, Fire Emblem Fates/Echoes, Pokemon X/Y, and others" + ), + + FirmwareInfo( + {0xF0, 0x6C, 0x1B, 0x59, 0x23, 0xE1, 0x71, 0x19, 0x5, 0x66, 0x59, 0xCB, 0x3D, 0x9B, 0xF0, 0x26, + 0x62, 0x84, 0xE9, 0xA6, 0xC0, 0x8, 0x23, 0x99, 0xD7, 0x45, 0x8D, 0x7C, 0x52, 0xAE, 0x32, 0x1C}, + 48708, false, "Spotted in Super Mario 3D Land" + ), + + FirmwareInfo( + {0x7E, 0xA3, 0xC4, 0x4A, 0x1C, 0x57, 0x51, 0x4B, 0xEB, 0xBE, 0xBC, 0xE8, 0xA7, 0x99, 0x5F, 0x7F, + 0x3A, 0x29, 0x1, 0x70, 0xEA, 0x3B, 0x6C, 0x14, 0x57, 0x49, 0xAD, 0x93, 0x58, 0x67, 0x2C, 0x97}, + 49716, false, "Spotted in PMD: GTI" + ), + }; +} // namespace DSP \ No newline at end of file diff --git a/include/services/fonts.hpp b/include/services/fonts.hpp new file mode 100644 index 00000000..9fa84be1 --- /dev/null +++ b/include/services/fonts.hpp @@ -0,0 +1,84 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Adapted from https://github.com/PabloMK7/citra/blob/master/src/core/hle/service/apt/bcfnt/bcfnt.h + +#pragma once + +#include + +#include "helpers.hpp" +#include "swap.hpp" + +namespace HLE::Fonts { + struct CFNT { + u8 magic[4]; + u16_le endianness; + u16_le headerSize; + u32_le version; + u32_le fileSize; + u32_le numBlocks; + }; + + struct SectionHeader { + u8 magic[4]; + u32_le sectionSize; + }; + + struct FINF { + u8 magic[4]; + u32_le sectionSize; + u8 fontType; + u8 lineFeed; + u16_le alterCharIndex; + u8 default_width[3]; + u8 encoding; + u32_le tglpOffset; + u32_le cwdhOffset; + u32_le cmapOffset; + u8 height; + u8 width; + u8 ascent; + u8 reserved; + }; + + struct TGLP { + u8 magic[4]; + u32_le sectionSize; + u8 cellWidth; + u8 cellHeight; + u8 baselinePosition; + u8 maxCharacterWidth; + u32_le sheetSize; + u16_le numSheets; + u16_le sheetImageFormat; + u16_le numColumns; + u16_le numRows; + u16_le sheetWidth; + u16_le sheetHeight; + u32_le sheetDataOffset; + }; + + struct CMAP { + u8 magic[4]; + u32_le sectionSize; + u16_le codeBegin; + u16_le codeEnd; + u16_le mappingMethod; + u16_le reserved; + u32_le nextCmapOffset; + }; + + struct CWDH { + u8 magic[4]; + u32_le sectionSize; + u16_le startIndex; + u16_le endIndex; + u32_le nextCwdhOffset; + }; + + // Relocates the internal addresses of the BCFNT Shared Font to the new base. The current base will + // be auto-detected based on the file headers. + void relocateSharedFont(u8* sharedFont, u32 newAddress); +} // namespace HLE::Fonts \ No newline at end of file diff --git a/include/services/frd.hpp b/include/services/frd.hpp index b9b3b0fe..914d9251 100644 --- a/include/services/frd.hpp +++ b/include/services/frd.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -15,6 +16,8 @@ struct FriendKey { static_assert(sizeof(FriendKey) == 16); class FRDService { + using Handle = HorizonHandle; + Memory& mem; MAKE_LOG_FUNCTION(log, frdLogger) @@ -51,11 +54,11 @@ class FRDService { }; static_assert(sizeof(Profile) == 8); -public: + public: enum class Type { - A, // frd:a - N, // frd:n - U, // frd:u + A, // frd:a + N, // frd:n + U, // frd:u }; FRDService(Memory& mem) : mem(mem) {} diff --git a/include/services/fs.hpp b/include/services/fs.hpp index 4a613121..82f07077 100644 --- a/include/services/fs.hpp +++ b/include/services/fs.hpp @@ -1,11 +1,14 @@ #pragma once #include "config.hpp" +#include "fs/archive_card_spi.hpp" #include "fs/archive_ext_save_data.hpp" #include "fs/archive_ncch.hpp" #include "fs/archive_save_data.hpp" #include "fs/archive_sdmc.hpp" #include "fs/archive_self_ncch.hpp" #include "fs/archive_system_save_data.hpp" +#include "fs/archive_twl_photo.hpp" +#include "fs/archive_twl_sound.hpp" #include "fs/archive_user_save_data.hpp" #include "helpers.hpp" #include "kernel_types.hpp" @@ -16,6 +19,8 @@ class Kernel; class FSService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::FS; Memory& mem; Kernel& kernel; @@ -37,6 +42,10 @@ class FSService { ExtSaveDataArchive sharedExtSaveData_nand; SystemSaveDataArchive systemSaveData; + TWLPhotoArchive twlPhoto; + TWLSoundArchive twlSound; + CardSPIArchive cardSpi; + ArchiveBase* getArchiveFromID(u32 id, const FSPath& archivePath); Rust::Result openArchiveHandle(u32 archiveID, const FSPath& path); Rust::Result openDirectoryHandle(ArchiveBase* archive, const FSPath& path); @@ -81,11 +90,12 @@ class FSService { // Used for set/get priority: Not sure what sort of priority this is referring to u32 priority; -public: + public: FSService(Memory& mem, Kernel& kernel, const EmulatorConfig& config) : mem(mem), saveData(mem), sharedExtSaveData_nand(mem, "../SharedFiles/NAND", true), extSaveData_sdmc(mem, "SDMC"), sdmc(mem), sdmcWriteOnly(mem, true), selfNcch(mem), ncch(mem), userSaveData1(mem, ArchiveID::UserSaveData1), - userSaveData2(mem, ArchiveID::UserSaveData2), kernel(kernel), config(config), systemSaveData(mem) {} + userSaveData2(mem, ArchiveID::UserSaveData2), systemSaveData(mem), twlPhoto(mem), twlSound(mem), cardSpi(mem), kernel(kernel), + config(config) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/gsp_gpu.hpp b/include/services/gsp_gpu.hpp index 0da4fcd0..4ac8e747 100644 --- a/include/services/gsp_gpu.hpp +++ b/include/services/gsp_gpu.hpp @@ -1,6 +1,7 @@ #pragma once #include #include + #include "PICA/gpu.hpp" #include "helpers.hpp" #include "kernel_types.hpp" @@ -9,12 +10,12 @@ #include "result/result.hpp" enum class GPUInterrupt : u8 { - PSC0 = 0, // Memory fill completed - PSC1 = 1, // ? - VBlank0 = 2, // ? - VBlank1 = 3, // ? - PPF = 4, // Display transfer finished - P3D = 5, // Command list processing finished + PSC0 = 0, // Memory fill completed + PSC1 = 1, // ? + VBlank0 = 2, // ? + VBlank1 = 3, // ? + PPF = 4, // Display transfer finished + P3D = 5, // Command list processing finished DMA = 6 }; @@ -22,12 +23,14 @@ enum class GPUInterrupt : u8 { class Kernel; class GPUService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::GPU; Memory& mem; GPU& gpu; Kernel& kernel; - u32& currentPID; // Process ID of the current process - u8* sharedMem; // Pointer to GSP shared memory + u32& currentPID; // Process ID of the current process + u8* sharedMem; // Pointer to GSP shared memory // At any point in time only 1 process has privileges to use rendering functions // This is the PID of that process @@ -62,8 +65,8 @@ class GPUService { // Used for saving and restoring GPU state via ImportDisplayCaptureInfo struct CaptureInfo { - u32 leftFramebuffer; // Left framebuffer VA - u32 rightFramebuffer; // Right framebuffer VA (Top screen only) + u32 leftFramebuffer; // Left framebuffer VA + u32 rightFramebuffer; // Right framebuffer VA (Top screen only) u32 format; u32 stride; }; @@ -72,6 +75,7 @@ class GPUService { // Service commands void acquireRight(u32 messagePointer); void flushDataCache(u32 messagePointer); + void invalidateDataCache(u32 messagePointer); void importDisplayCaptureInfo(u32 messagePointer); void readHwRegs(u32 messagePointer); void registerInterruptRelayQueue(u32 messagePointer); @@ -106,16 +110,15 @@ class GPUService { FramebufferUpdate* getTopFramebufferInfo() { return getFramebufferInfo(0); } FramebufferUpdate* getBottomFramebufferInfo() { return getFramebufferInfo(1); } -public: - GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu), - kernel(kernel), currentPID(currentPID) {} + public: + GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu), kernel(kernel), currentPID(currentPID) {} void reset(); void handleSyncRequest(u32 messagePointer); void requestInterrupt(GPUInterrupt type); void setSharedMem(u8* ptr) { sharedMem = ptr; - if (ptr != nullptr) { // Zero-fill shared memory in case the process tries to read stale service data or vice versa + if (ptr != nullptr) { // Zero-fill shared memory in case the process tries to read stale service data or vice versa std::memset(ptr, 0, 0x1000); } } -}; +}; \ No newline at end of file diff --git a/include/services/gsp_lcd.hpp b/include/services/gsp_lcd.hpp index e7672d4f..7dbdae8f 100644 --- a/include/services/gsp_lcd.hpp +++ b/include/services/gsp_lcd.hpp @@ -6,13 +6,13 @@ #include "result/result.hpp" class LCDService { - Handle handle = KernelHandles::LCD; Memory& mem; MAKE_LOG_FUNCTION(log, gspLCDLogger) // Service commands + void setLedForceOff(u32 messagePointer); -public: + public: LCDService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/hid.hpp b/include/services/hid.hpp index d9018a4f..a0eefb1c 100644 --- a/include/services/hid.hpp +++ b/include/services/hid.hpp @@ -38,6 +38,8 @@ namespace HID::Keys { class Kernel; class HIDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::HID; Memory& mem; Kernel& kernel; @@ -54,6 +56,7 @@ class HIDService { s16 circlePadX, circlePadY; // Circlepad state s16 touchScreenX, touchScreenY; // Touchscreen state s16 roll, pitch, yaw; // Gyroscope state + s16 accelX, accelY, accelZ; // Accelerometer state bool accelerometerEnabled; bool eventsInitialized; @@ -85,7 +88,14 @@ class HIDService { *(T*)&sharedMem[offset] = value; } + template + T* getSharedMemPointer(size_t offset) { + return (T*)&sharedMem[offset]; + } + public: + static constexpr float gyroscopeCoeff = 14.375f; // Same as retail 3DS + HIDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); @@ -126,6 +136,12 @@ class HIDService { void setPitch(s16 value) { pitch = value; } void setYaw(s16 value) { yaw = value; } + void setAccel(s16 x, s16 y, s16 z) { + accelX = x; + accelY = y; + accelZ = z; + } + void updateInputs(u64 currentTimestamp); void setSharedMem(u8* ptr) { diff --git a/include/services/http.hpp b/include/services/http.hpp index 1e7f30c3..8b23fb2d 100644 --- a/include/services/http.hpp +++ b/include/services/http.hpp @@ -5,6 +5,8 @@ #include "memory.hpp" class HTTPService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::HTTP; Memory& mem; MAKE_LOG_FUNCTION(log, httpLogger) diff --git a/include/services/ir_user.hpp b/include/services/ir_user.hpp index 186d9717..d475bdaa 100644 --- a/include/services/ir_user.hpp +++ b/include/services/ir_user.hpp @@ -11,6 +11,8 @@ class Kernel; class IRUserService { + using Handle = HorizonHandle; + enum class DeviceID : u8 { CirclePadPro = 1, }; diff --git a/include/services/ldr_ro.hpp b/include/services/ldr_ro.hpp index 71516547..cf60e036 100644 --- a/include/services/ldr_ro.hpp +++ b/include/services/ldr_ro.hpp @@ -8,6 +8,8 @@ class Kernel; class LDRService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::LDR_RO; Memory& mem; Kernel& kernel; @@ -22,7 +24,7 @@ class LDRService { void loadCRR(u32 messagePointer); void unloadCRO(u32 messagePointer); -public: + public: LDRService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/mcu/mcu_hwc.hpp b/include/services/mcu/mcu_hwc.hpp index 354a0c20..c2fada28 100644 --- a/include/services/mcu/mcu_hwc.hpp +++ b/include/services/mcu/mcu_hwc.hpp @@ -7,6 +7,8 @@ namespace MCU { class HWCService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::MCU_HWC; Memory& mem; MAKE_LOG_FUNCTION(log, mcuLogger) @@ -15,6 +17,7 @@ namespace MCU { // Service commands void getBatteryLevel(u32 messagePointer); + void setInfoLEDPattern(u32 messagePointer); public: HWCService(Memory& mem, const EmulatorConfig& config) : mem(mem), config(config) {} diff --git a/include/services/mic.hpp b/include/services/mic.hpp index f709c27f..f166c5aa 100644 --- a/include/services/mic.hpp +++ b/include/services/mic.hpp @@ -9,6 +9,8 @@ class Kernel; class MICService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::MIC; Memory& mem; Kernel& kernel; @@ -29,14 +31,14 @@ class MICService { void unmapSharedMem(u32 messagePointer); void theCaptainToadFunction(u32 messagePointer); - u8 gain = 0; // How loud our microphone input signal is + u8 gain = 0; // How loud our microphone input signal is bool micEnabled = false; bool shouldClamp = false; bool currentlySampling = false; std::optional eventHandle; -public: + public: MICService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/ndm.hpp b/include/services/ndm.hpp index 6d4e5ad8..67679403 100644 --- a/include/services/ndm.hpp +++ b/include/services/ndm.hpp @@ -6,7 +6,14 @@ #include "result/result.hpp" class NDMService { - enum class ExclusiveState : u32 { None = 0, Infrastructure = 1, LocalComms = 2, StreetPass = 3, StreetPassData = 4 }; + using Handle = HorizonHandle; + enum class ExclusiveState : u32 { + None = 0, + Infrastructure = 1, + LocalComms = 2, + StreetPass = 3, + StreetPassData = 4, + }; Handle handle = KernelHandles::NDM; Memory& mem; @@ -25,7 +32,7 @@ class NDMService { ExclusiveState exclusiveState = ExclusiveState::None; -public: + public: NDMService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/news_u.hpp b/include/services/news_u.hpp index 61266e9a..15ae0b16 100644 --- a/include/services/news_u.hpp +++ b/include/services/news_u.hpp @@ -5,6 +5,8 @@ #include "memory.hpp" class NewsUService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NEWS_U; Memory& mem; MAKE_LOG_FUNCTION(log, newsLogger) diff --git a/include/services/nfc.hpp b/include/services/nfc.hpp index 8eea8a41..e242a326 100644 --- a/include/services/nfc.hpp +++ b/include/services/nfc.hpp @@ -12,6 +12,8 @@ class Kernel; class NFCService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NFC; Memory& mem; Kernel& kernel; diff --git a/include/services/nim.hpp b/include/services/nim.hpp index dfe13694..dbb3bb8b 100644 --- a/include/services/nim.hpp +++ b/include/services/nim.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class NIMService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NIM; Memory& mem; MAKE_LOG_FUNCTION(log, nimLogger) @@ -13,7 +15,7 @@ class NIMService { // Service commands void initialize(u32 messagePointer); -public: + public: NIMService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/ns.hpp b/include/services/ns.hpp new file mode 100644 index 00000000..cb00e49b --- /dev/null +++ b/include/services/ns.hpp @@ -0,0 +1,25 @@ +#pragma once +#include "helpers.hpp" +#include "kernel_types.hpp" +#include "logger.hpp" +#include "memory.hpp" +#include "result/result.hpp" + +class NSService { + Memory& mem; + MAKE_LOG_FUNCTION(log, nsLogger) + + // Service commands + void launchTitle(u32 messagePointer); + + public: + enum class Type { + S, // ns:s + P, // ns:p + C, // ns:c + }; + + NSService(Memory& mem) : mem(mem) {} + void reset(); + void handleSyncRequest(u32 messagePointer, Type type); +}; diff --git a/include/services/nwm_uds.hpp b/include/services/nwm_uds.hpp index bf116bcf..a3b342b8 100644 --- a/include/services/nwm_uds.hpp +++ b/include/services/nwm_uds.hpp @@ -10,6 +10,8 @@ class Kernel; class NwmUdsService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NWM_UDS; Memory& mem; Kernel& kernel; diff --git a/include/services/ptm.hpp b/include/services/ptm.hpp index f752839b..5480c398 100644 --- a/include/services/ptm.hpp +++ b/include/services/ptm.hpp @@ -13,17 +13,21 @@ class PTMService { const EmulatorConfig& config; // Service commands + void clearSoftwareClosedFlag(u32 messagePointer); void configureNew3DSCPU(u32 messagePointer); void getAdapterState(u32 messagePointer); void getBatteryChargeState(u32 messagePointer); void getBatteryLevel(u32 messagePointer); + void getSoftwareClosedFlag(u32 messagePointer); void getPedometerState(u32 messagePointer); void getStepHistory(u32 messagePointer); void getStepHistoryAll(u32 messagePointer); + void getSystemTime(u32 messagePointer); void getTotalStepCount(u32 messagePointer); -public: + public: enum class Type { + GETS, // ptm:gets U, // ptm:u SYSM, // ptm:sysm PLAY, // ptm:play diff --git a/include/services/region_codes.hpp b/include/services/region_codes.hpp index e06894cd..dba10e34 100644 --- a/include/services/region_codes.hpp +++ b/include/services/region_codes.hpp @@ -9,7 +9,7 @@ enum class Regions : u32 { Australia = 3, China = 4, Korea = 5, - Taiwan = 6 + Taiwan = 6, }; // Used for the language field in the NAND user data @@ -42,137 +42,137 @@ enum class LanguageCodes : u32 { }; enum class CountryCodes : u32 { - JP = 1, - AI = 8, - AG = 9, - AR = 10, - AW = 11, - BS = 12, - BB = 13, - BZ = 14, - BO = 15, - BR = 16, - VG = 17, - CA = 18, - KY = 19, - CL = 20, - CO = 21, - CR = 22, - DM = 23, - DO = 24, - EC = 25, - SV = 26, - GF = 27, - GD = 28, - GP = 29, - GT = 30, - GY = 31, - HT = 32, - HN = 33, - JM = 34, - MQ = 35, - MX = 36, - MS = 37, - AN = 38, - NI = 39, - PA = 40, - PY = 41, - PE = 42, - KN = 43, - LC = 44, - VC = 45, - SR = 46, - TT = 47, - TC = 48, - US = 49, - UY = 50, - VI = 51, - VE = 52, - AL = 64, - AU = 65, - AT = 66, - BE = 67, - BA = 68, - BW = 69, - BG = 70, - HR = 71, - CY = 72, - CZ = 73, - DK = 74, - EE = 75, - FI = 76, - FR = 77, - DE = 78, - GR = 79, - HU = 80, - IS = 81, - IE = 82, - IT = 83, - LV = 84, - LS = 85, - LI = 86, - LT = 87, - LU = 88, - MK = 89, - MT = 90, - ME = 91, - MZ = 92, - NA = 93, - NL = 94, - NZ = 95, - NO = 96, - PL = 97, - PT = 98, - RO = 99, - RU = 100, - RS = 101, - SK = 102, - SI = 103, - ZA = 104, - ES = 105, - SZ = 106, - SE = 107, - CH = 108, - TR = 109, - GB = 110, - ZM = 111, - ZW = 112, - AZ = 113, - MR = 114, - ML = 115, - NE = 116, - TD = 117, - SD = 118, - ER = 119, - DJ = 120, - SO = 121, - AD = 122, - GI = 123, - GG = 124, - IM = 125, - JE = 126, - MC = 127, - TW = 128, - KR = 136, - HK = 144, - MO = 145, - ID = 152, - SG = 153, - TH = 154, - PH = 155, - MY = 156, - CN = 160, - AE = 168, - IND = 169, // We can't use the 2-letter country code for India because the Windows SDK does #define IN... - EG = 170, - OM = 171, - QA = 172, - KW = 173, - SA = 174, - SY = 175, - BH = 176, - JO = 177, - SM = 184, - VA = 185, - BM = 186, + JP = 1, + AI = 8, + AG = 9, + AR = 10, + AW = 11, + BS = 12, + BB = 13, + BZ = 14, + BO = 15, + BR = 16, + VG = 17, + CA = 18, + KY = 19, + CL = 20, + CO = 21, + CR = 22, + DM = 23, + DO = 24, + EC = 25, + SV = 26, + GF = 27, + GD = 28, + GP = 29, + GT = 30, + GY = 31, + HT = 32, + HN = 33, + JM = 34, + MQ = 35, + MX = 36, + MS = 37, + AN = 38, + NI = 39, + PA = 40, + PY = 41, + PE = 42, + KN = 43, + LC = 44, + VC = 45, + SR = 46, + TT = 47, + TC = 48, + US = 49, + UY = 50, + VI = 51, + VE = 52, + AL = 64, + AU = 65, + AT = 66, + BE = 67, + BA = 68, + BW = 69, + BG = 70, + HR = 71, + CY = 72, + CZ = 73, + DK = 74, + EE = 75, + FI = 76, + FR = 77, + DE = 78, + GR = 79, + HU = 80, + IS = 81, + IE = 82, + IT = 83, + LV = 84, + LS = 85, + LI = 86, + LT = 87, + LU = 88, + MK = 89, + MT = 90, + ME = 91, + MZ = 92, + NA = 93, + NL = 94, + NZ = 95, + NO = 96, + PL = 97, + PT = 98, + RO = 99, + RU = 100, + RS = 101, + SK = 102, + SI = 103, + ZA = 104, + ES = 105, + SZ = 106, + SE = 107, + CH = 108, + TR = 109, + GB = 110, + ZM = 111, + ZW = 112, + AZ = 113, + MR = 114, + ML = 115, + NE = 116, + TD = 117, + SD = 118, + ER = 119, + DJ = 120, + SO = 121, + AD = 122, + GI = 123, + GG = 124, + IM = 125, + JE = 126, + MC = 127, + TW = 128, + KR = 136, + HK = 144, + MO = 145, + ID = 152, + SG = 153, + TH = 154, + PH = 155, + MY = 156, + CN = 160, + AE = 168, + IND = 169, // We can't use the 2-letter country code for India because the Windows SDK does #define IN... + EG = 170, + OM = 171, + QA = 172, + KW = 173, + SA = 174, + SY = 175, + BH = 176, + JO = 177, + SM = 184, + VA = 185, + BM = 186, }; \ No newline at end of file diff --git a/include/services/service_manager.hpp b/include/services/service_manager.hpp index 6679f98d..c777408c 100644 --- a/include/services/service_manager.hpp +++ b/include/services/service_manager.hpp @@ -28,10 +28,11 @@ #include "services/mcu/mcu_hwc.hpp" #include "services/mic.hpp" #include "services/ndm.hpp" -#include "services/nwm_uds.hpp" #include "services/news_u.hpp" #include "services/nfc.hpp" #include "services/nim.hpp" +#include "services/ns.hpp" +#include "services/nwm_uds.hpp" #include "services/ptm.hpp" #include "services/soc.hpp" #include "services/ssl.hpp" @@ -42,6 +43,8 @@ struct EmulatorConfig; class Kernel; class ServiceManager { + using Handle = HorizonHandle; + std::span regs; Memory& mem; Kernel& kernel; @@ -50,11 +53,11 @@ class ServiceManager { MAKE_LOG_FUNCTION(log, srvLogger) - ACService ac; + ACService ac; ACTService act; - AMService am; + AMService am; APTService apt; - BOSSService boss; + BOSSService boss; CAMService cam; CECDService cecd; CFGService cfg; @@ -74,7 +77,8 @@ class ServiceManager { NewsUService news_u; NFCService nfc; NwmUdsService nwm_uds; - NIMService nim; + NIMService nim; + NSService ns; PTMService ptm; SOCService soc; SSLService ssl; diff --git a/include/services/soc.hpp b/include/services/soc.hpp index 88f0b456..ff334a2c 100644 --- a/include/services/soc.hpp +++ b/include/services/soc.hpp @@ -5,6 +5,8 @@ #include "memory.hpp" class SOCService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::SOC; Memory& mem; MAKE_LOG_FUNCTION(log, socLogger) @@ -14,7 +16,7 @@ class SOCService { // Service commands void initializeSockets(u32 messagePointer); -public: + public: SOCService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/ssl.hpp b/include/services/ssl.hpp index 0282049a..4b45fc81 100644 --- a/include/services/ssl.hpp +++ b/include/services/ssl.hpp @@ -1,17 +1,19 @@ #pragma once +#include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" #include "memory.hpp" -#include - class SSLService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::SSL; Memory& mem; MAKE_LOG_FUNCTION(log, sslLogger) - std::mt19937 rng; // Use a Mersenne Twister for RNG since this service is supposed to have better rng than just rand() + std::mt19937 rng; // Use a Mersenne Twister for RNG since this service is supposed to have better rng than just rand() bool initialized; // Service commands diff --git a/include/services/y2r.hpp b/include/services/y2r.hpp index 4aa96d7b..6afebdb8 100644 --- a/include/services/y2r.hpp +++ b/include/services/y2r.hpp @@ -1,6 +1,7 @@ #pragma once #include #include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -10,6 +11,8 @@ class Kernel; class Y2RService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::Y2R; Memory& mem; Kernel& kernel; @@ -20,7 +23,7 @@ class Y2RService { enum class BusyStatus : u32 { NotBusy = 0, - Busy = 1 + Busy = 1, }; enum class InputFormat : u32 { @@ -35,7 +38,7 @@ class Y2RService { RGB32 = 0, RGB24 = 1, RGB15 = 2, - RGB565 = 3 + RGB565 = 3, }; // Clockwise rotation @@ -43,12 +46,12 @@ class Y2RService { None = 0, Rotate90 = 1, Rotate180 = 2, - Rotate270 = 3 + Rotate270 = 3, }; enum class BlockAlignment : u32 { - Line = 0, // Output buffer's pixels are arranged linearly. Used when outputting to the framebuffer. - Block8x8 = 1, // Output buffer's pixels are morton swizzled. Used when outputting to a GPU texture. + Line = 0, // Output buffer's pixels are arranged linearly. Used when outputting to the framebuffer. + Block8x8 = 1, // Output buffer's pixels are morton swizzled. Used when outputting to a GPU texture. }; // https://github.com/citra-emu/citra/blob/ac9d72a95ca9a60de8d39484a14aecf489d6d016/src/core/hle/service/cam/y2r_u.cpp#L33 @@ -60,7 +63,7 @@ class Y2RService { {{0x12A, 0x1CA, 0x88, 0x36, 0x21C, -0x1F04, 0x99C, -0x2421}}, // ITU_Rec709_Scaling }}; - CoefficientSet conversionCoefficients; // Current conversion coefficients + CoefficientSet conversionCoefficients; // Current conversion coefficients InputFormat inputFmt; OutputFormat outputFmt; diff --git a/readme.md b/readme.md index 5f803bde..7ffb7384 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,5 @@ # Panda3DS -[![Windows Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml) [![MacOS Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml) [![Linux Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml) [![AUR Package](https://img.shields.io/aur/version/panda3ds-git)](https://aur.archlinux.org/packages/panda3ds-git) +[![Windows Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml) [![MacOS Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml) [![Android Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Android_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Android_Build.yml) [![Linux Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml) [![AUR Package](https://img.shields.io/aur/version/panda3ds-git)](https://aur.archlinux.org/packages/panda3ds-git) Panda3DS is an HLE, red-panda-themed Nintendo 3DS emulator written in C++ which started out as a fun project out of curiosity, but evolved into something that can sort of play games! @@ -10,7 +10,7 @@ Join our Discord server by pressing on the banner below, or find us on other pla [![Discord Banner 2](https://discord.com/api/guilds/1118695732958994532/widget.png?style=banner2)](https://discord.gg/ZYbugsEmsw) -![screenshot1](docs/img/KirbyRobobot.png) ![screenshot2](docs/img/OoT_Title.png) ![screenshot3](docs/img/pokegang.png) +![screenshot1](docs/img/KirbyRobobot.png) ![screenshot2](docs/img/OoT_Title.png) ![screenshot3](docs/img/pokegang.png) ![screenshot4](docs/img/KirbyAndroid.png) # Download You can download stable builds from the Releases tab, or you can download the latest build from the tables below. Additionally, Panda3DS comes in 2 flavours on PC: A minimal SDL frontend, which does not have a GUI, and an experimental Qt 6 frontend with a proper user interface. @@ -19,19 +19,19 @@ SDL builds (No GUI): |Platform|Status|Download| |--------|------------|--------| |Windows build|[![Windows Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml)|[Windows Executable](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Windows_Build/master/Windows%20executable.zip)| -|MacOS build|[![MacOS Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml)|[MacOS App Bundle](https://nightly.link/wheremyfoodat/Panda3DS/workflows/MacOS_Build/master/MacOS%20Alber%20App%20Bundle.zip)| +|MacOS build|[![MacOS Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml)|[MacOS App Bundle](https://nightly.link/wheremyfoodat/Panda3DS/workflows/MacOS_Build/master/MacOS%20Alber%20App%20Bundle%20(universal).zip)| |Linux build|[![Linux Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml)|[Linux AppImage](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Linux_AppImage_Build/master/Linux%20executable.zip)| -Qt builds: +Qt and Android builds: |Platform|Status|Download| |--------|------------|--------| |Windows build|[![Qt Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml/badge.svg)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml)|[Windows Executable](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Qt_Build/master/Windows%20executable.zip)| -|MacOS build|[![Qt Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml/badge.svg)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml)|[MacOS App Bundle](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Qt_Build/master/MacOS%20Alber%20App%20Bundle.zip)| +|MacOS build|[![Qt Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml/badge.svg)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml)|[MacOS App Bundle](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Qt_Build/master/MacOS%20Alber%20App%20Bundle%20(universal).zip)| |Linux build|[![Qt Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml/badge.svg)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml)|[Linux AppImage](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Qt_Build/master/Linux%20executable.zip)| - +|Android build (arm64)|[![Android Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Android_Build.yml/badge.svg)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Android_Build.yml)|[Android APK](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Android_Build/master/Android%20APKs%20(arm64).zip)| # Compatibility -Panda3DS is still in the early stages of development. Many games boot, many don't. Lots of games have at least some hilariously broken graphics, audio is not supported, and some QoL features (including a GUI) are missing. However, even more things are implemented, such as most of the 3DS core required to play games, and various neat features, such as Lua scripting, discord bot support, support for some system apps, cheats, controller support, WIP amiibo support and many more! The emulator is constantly evolving, so make sure to take a peek every now and then! +Panda3DS is still in the early stages of development. Many games boot, many don't. Lots of games have at least some hilariously broken graphics, audio is WIP, and some QoL features are missing. However, even more things are implemented, such as most of the 3DS core required to play games, and various neat features, such as Lua scripting, discord bot support, support for some system apps, cheats, controller support, WIP amiibo support and many more! The emulator is constantly evolving, so make sure to take a peek every now and then! For documenting game compatibility, make sure to visit the [games list repository](https://github.com/Panda3DS-emu/Panda3DS-Games-List). For miscellaneous issues or more technical issues, feel free to use this repo's issues tab. # Why? @@ -68,7 +68,7 @@ Simply drag and drop a ROM to the executable if supported, or invoke the executa
Panda3DS can load ROMs in the following formats: - .3ds/.cci -- .cxi/.app +- .cxi/.app/.ncch - .elf/.axf - .3dsx @@ -116,9 +116,9 @@ Panda3DS also supports controller input using the SDL2 GameController API. - [MelonDS](https://github.com/melonDS-emu/melonDS): "DS emulator, sorta" - Arisotura - [Kaizen](https://github.com/SimoneN64/Kaizen): Experimental work-in-progress low-level N64 emulator - [ChonkyStation](https://github.com/liuk7071/ChonkyStation): Work-in-progress PlayStation emulator -- [shadPS4](https://github.com/georgemoralis/shadPS4): Work-in-progress PS4 emulator by the founder of PCSX, PCSX2 and more +- [shadPS4](https://github.com/shadps4-emu/shadPS4): Work-in-progress PS4 emulator by the founder of PCSX, PCSX2 and more - [Hydra](https://github.com/hydra-emu/hydra): Cross-platform GameBoy, NES, N64 and Chip-8 emulator - +- [Tanuki3DS](https://github.com/burhanr13/Tanuki3DS/): A new 3DS emulator for MacOS and Linux # Support If you find this project exciting and want to support the founder, check out [his Patreon](https://www.patreon.com/wheremyfoodat) or [Ko-fi](https://ko-fi.com/wheremyfoodat)
diff --git a/src/config.cpp b/src/config.cpp index 2f9b7e00..9b262744 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -1,9 +1,12 @@ #include "config.hpp" +#include +#include #include #include #include #include +#include #include "helpers.hpp" #include "toml.hpp" @@ -24,6 +27,7 @@ void EmulatorConfig::load() { return; } + printf("Loading existing configuration file %s\n", path.string().c_str()); toml::value data; try { @@ -41,6 +45,24 @@ void EmulatorConfig::load() { discordRpcEnabled = toml::find_or(general, "EnableDiscordRPC", false); usePortableBuild = toml::find_or(general, "UsePortableBuild", false); defaultRomPath = toml::find_or(general, "DefaultRomPath", ""); + + printAppVersion = toml::find_or(general, "PrintAppVersion", true); + systemLanguage = languageCodeFromString(toml::find_or(general, "SystemLanguage", "en")); + } + } + + if (data.contains("Window")) { + auto windowResult = toml::expect(data.at("Window")); + if (windowResult.is_ok()) { + auto window = windowResult.unwrap(); + + windowSettings.showAppVersion = toml::find_or(window, "AppVersionOnWindow", false); + windowSettings.rememberPosition = toml::find_or(window, "RememberWindowPosition", false); + + windowSettings.x = toml::find_or(window, "WindowPosX", WindowSettings::defaultX); + windowSettings.y = toml::find_or(window, "WindowPosY", WindowSettings::defaultY); + windowSettings.width = toml::find_or(window, "WindowWidth", WindowSettings::defaultWidth); + windowSettings.height = toml::find_or(window, "WindowHeight", WindowSettings::defaultHeight); } } @@ -62,6 +84,13 @@ void EmulatorConfig::load() { shaderJitEnabled = toml::find_or(gpu, "EnableShaderJIT", shaderJitDefault); vsyncEnabled = toml::find_or(gpu, "EnableVSync", true); + useUbershaders = toml::find_or(gpu, "UseUbershaders", ubershaderDefault); + accurateShaderMul = toml::find_or(gpu, "AccurateShaderMultiplication", false); + accelerateShaders = toml::find_or(gpu, "AccelerateShaders", accelerateShadersDefault); + + forceShadergenForLights = toml::find_or(gpu, "ForceShadergenForLighting", true); + lightShadergenThreshold = toml::find_or(gpu, "ShadergenLightThreshold", 1); + enableRenderdoc = toml::find_or(gpu, "EnableRenderdoc", false); } } @@ -70,9 +99,18 @@ void EmulatorConfig::load() { if (audioResult.is_ok()) { auto audio = audioResult.unwrap(); - auto dspCoreName = toml::find_or(audio, "DSPEmulation", "Null"); + auto dspCoreName = toml::find_or(audio, "DSPEmulation", "HLE"); dspType = Audio::DSPCore::typeFromString(dspCoreName); - audioEnabled = toml::find_or(audio, "EnableAudio", false); + + audioEnabled = toml::find_or(audio, "EnableAudio", audioEnabledDefault); + aacEnabled = toml::find_or(audio, "EnableAACAudio", true); + printDSPFirmware = toml::find_or(audio, "PrintDSPFirmware", false); + + audioDeviceConfig.muteAudio = toml::find_or(audio, "MuteAudio", false); + // Our volume ranges from 0.0 (muted) to 2.0 (boosted, using a logarithmic scale). 1.0 is the "default" volume, ie we don't adjust the PCM + // samples at all. + audioDeviceConfig.volumeRaw = float(std::clamp(toml::find_or(audio, "AudioVolume", 1.0), 0.0, 2.0)); + audioDeviceConfig.volumeCurve = AudioDeviceConfig::volumeCurveFromString(toml::find_or(audio, "VolumeCurve", "cubic")); } } @@ -98,6 +136,17 @@ void EmulatorConfig::load() { sdWriteProtected = toml::find_or(sd, "WriteProtectVirtualSD", false); } } + + if (data.contains("UI")) { + auto uiResult = toml::expect(data.at("UI")); + if (uiResult.is_ok()) { + auto ui = uiResult.unwrap(); + + frontendSettings.theme = FrontendSettings::themeFromString(toml::find_or(ui, "Theme", "dark")); + frontendSettings.icon = FrontendSettings::iconFromString(toml::find_or(ui, "WindowIcon", "rpog")); + frontendSettings.language = toml::find_or(ui, "Language", "en"); + } + } } void EmulatorConfig::save() { @@ -122,11 +171,33 @@ void EmulatorConfig::save() { data["General"]["EnableDiscordRPC"] = discordRpcEnabled; data["General"]["UsePortableBuild"] = usePortableBuild; data["General"]["DefaultRomPath"] = defaultRomPath.string(); + data["General"]["PrintAppVersion"] = printAppVersion; + data["General"]["SystemLanguage"] = languageCodeToString(systemLanguage); + + data["Window"]["AppVersionOnWindow"] = windowSettings.showAppVersion; + data["Window"]["RememberWindowPosition"] = windowSettings.rememberPosition; + data["Window"]["WindowPosX"] = windowSettings.x; + data["Window"]["WindowPosY"] = windowSettings.y; + data["Window"]["WindowWidth"] = windowSettings.width; + data["Window"]["WindowHeight"] = windowSettings.height; + data["GPU"]["EnableShaderJIT"] = shaderJitEnabled; data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType)); data["GPU"]["EnableVSync"] = vsyncEnabled; + data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul; + data["GPU"]["UseUbershaders"] = useUbershaders; + data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights; + data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold; + data["GPU"]["AccelerateShaders"] = accelerateShaders; + data["GPU"]["EnableRenderdoc"] = enableRenderdoc; + data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType)); data["Audio"]["EnableAudio"] = audioEnabled; + data["Audio"]["EnableAACAudio"] = aacEnabled; + data["Audio"]["MuteAudio"] = audioDeviceConfig.muteAudio; + data["Audio"]["AudioVolume"] = double(audioDeviceConfig.volumeRaw); + data["Audio"]["VolumeCurve"] = std::string(AudioDeviceConfig::volumeCurveToString(audioDeviceConfig.volumeCurve)); + data["Audio"]["PrintDSPFirmware"] = printDSPFirmware; data["Battery"]["ChargerPlugged"] = chargerPlugged; data["Battery"]["BatteryPercentage"] = batteryPercentage; @@ -134,7 +205,64 @@ void EmulatorConfig::save() { data["SD"]["UseVirtualSD"] = sdCardInserted; data["SD"]["WriteProtectVirtualSD"] = sdWriteProtected; + data["UI"]["Theme"] = std::string(FrontendSettings::themeToString(frontendSettings.theme)); + data["UI"]["WindowIcon"] = std::string(FrontendSettings::iconToString(frontendSettings.icon)); + data["UI"]["Language"] = frontendSettings.language; + std::ofstream file(path, std::ios::out); file << data; file.close(); } + +AudioDeviceConfig::VolumeCurve AudioDeviceConfig::volumeCurveFromString(std::string inString) { + // Transform to lower-case to make the setting case-insensitive + std::transform(inString.begin(), inString.end(), inString.begin(), [](unsigned char c) { return std::tolower(c); }); + + if (inString == "cubic") { + return VolumeCurve::Cubic; + } else if (inString == "linear") { + return VolumeCurve::Linear; + } + + // Default to cubic curve + return VolumeCurve::Cubic; +} + +const char* AudioDeviceConfig::volumeCurveToString(AudioDeviceConfig::VolumeCurve curve) { + switch (curve) { + case VolumeCurve::Linear: return "linear"; + + case VolumeCurve::Cubic: + default: return "cubic"; + } +} + +LanguageCodes EmulatorConfig::languageCodeFromString(std::string inString) { // Transform to lower-case to make the setting case-insensitive + std::transform(inString.begin(), inString.end(), inString.begin(), [](unsigned char c) { return std::tolower(c); }); + + static const std::unordered_map map = { + {"ja", LanguageCodes::Japanese}, {"en", LanguageCodes::English}, {"fr", LanguageCodes::French}, {"de", LanguageCodes::German}, + {"it", LanguageCodes::Italian}, {"es", LanguageCodes::Spanish}, {"zh", LanguageCodes::Chinese}, {"ko", LanguageCodes::Korean}, + {"nl", LanguageCodes::Dutch}, {"pt", LanguageCodes::Portuguese}, {"ru", LanguageCodes::Russian}, {"tw", LanguageCodes::Taiwanese}, + }; + + if (auto search = map.find(inString); search != map.end()) { + return search->second; + } + + // Default to English if no language code in our map matches + return LanguageCodes::English; +} + +const char* EmulatorConfig::languageCodeToString(LanguageCodes code) { + static constexpr std::array codes = { + "ja", "en", "fr", "de", "it", "es", "zh", "ko", "nl", "pt", "ru", "tw", + }; + + // Invalid country code, return english + if (static_cast(code) > static_cast(LanguageCodes::Taiwanese)) { + return "en"; + } else { + return codes[static_cast(code)]; + } +} \ No newline at end of file diff --git a/src/core/PICA/draw_acceleration.cpp b/src/core/PICA/draw_acceleration.cpp new file mode 100644 index 00000000..fe21fe1a --- /dev/null +++ b/src/core/PICA/draw_acceleration.cpp @@ -0,0 +1,141 @@ +#include "PICA/draw_acceleration.hpp" + +#include +#include + +#include "PICA/gpu.hpp" +#include "PICA/pica_simd.hpp" +#include "PICA/regs.hpp" + +void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) { + accel.indexed = indexed; + accel.totalAttribCount = totalAttribCount; + accel.enabledAttributeMask = 0; + + const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16; + const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer + + if (indexed) { + u32 indexBufferConfig = regs[PICA::InternalRegs::IndexBufferConfig]; + u32 indexBufferPointer = vertexBase + (indexBufferConfig & 0xfffffff); + + u8* indexBuffer = getPointerPhys(indexBufferPointer); + u16 minimumIndex = std::numeric_limits::max(); + u16 maximumIndex = 0; + + // Check whether the index buffer uses u16 indices or u8 + accel.useShortIndices = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit + + // Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them + if (accel.useShortIndices) { + std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze(indexBuffer, vertexCount); + } else { + std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze(indexBuffer, vertexCount); + } + + accel.indexBuffer = indexBuffer; + } else { + accel.indexBuffer = nullptr; + accel.minimumIndex = regs[PICA::InternalRegs::VertexOffsetReg]; + accel.maximumIndex = accel.minimumIndex + vertexCount - 1; + } + + const u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32); + const u64 inputAttrCfg = getVertexShaderInputConfig(); + + u32 attrCount = 0; + u32 loaderOffset = 0; + accel.vertexDataSize = 0; + accel.totalLoaderCount = 0; + + for (int i = 0; i < PICA::DrawAcceleration::maxLoaderCount; i++) { + auto& loaderData = attributeInfo[i]; // Get information for this attribute loader + + // This loader is empty, skip it + if (loaderData.componentCount == 0 || loaderData.size == 0) { + continue; + } + + auto& loader = accel.loaders[accel.totalLoaderCount++]; + + // The size of the loader in bytes is equal to the bytes supplied for 1 vertex, multiplied by the number of vertices we'll be uploading + // Which is equal to maximumIndex - minimumIndex + 1 + const u32 bytes = loaderData.size * (accel.maximumIndex - accel.minimumIndex + 1); + loader.size = bytes; + + // Add it to the total vertex data size, aligned to 4 bytes. + accel.vertexDataSize += (bytes + 3) & ~3; + + // Get a pointer to the data where this loader's data is stored + const u32 loaderAddress = vertexBase + loaderData.offset + (accel.minimumIndex * loaderData.size); + loader.data = getPointerPhys(loaderAddress); + + u64 attrCfg = loaderData.getConfigFull(); // Get config1 | (config2 << 32) + u32 attributeOffset = 0; + + for (int component = 0; component < loaderData.componentCount; component++) { + uint attributeIndex = (attrCfg >> (component * 4)) & 0xf; // Get index of attribute in vertexCfg + + // Vertex attributes used as padding + // 12, 13, 14 and 15 are equivalent to 4, 8, 12 and 16 bytes of padding respectively + if (attributeIndex >= 12) [[unlikely]] { + // Align attribute address up to a 4 byte boundary + attributeOffset = (attributeOffset + 3) & -4; + attributeOffset += (attributeIndex - 11) << 2; + continue; + } + + const u32 attribInfo = (vertexCfg >> (attributeIndex * 4)) & 0xf; + const u32 attribType = attribInfo & 0x3; // Type of attribute (sbyte/ubyte/short/float) + const u32 size = (attribInfo >> 2) + 1; // Total number of components + + // Size of each component based on the attribute type + [[maybe_unused]] static constexpr u32 sizePerComponent[4] = {1, 1, 2, 4}; + // To avoid a multiplication, instead of multiplying by the above values, we shift left instead + // So multiplication by 1 becomes a shift by 0, mul by 2 becomes a shift by 1, and mul by 4 becomes a shift by 2 + static constexpr u32 sizeShiftPerComponent[4] = {0, 0, 1, 2}; + + const u32 inputReg = (inputAttrCfg >> (attributeIndex * 4)) & 0xf; + // Mark the attribute as enabled + accel.enabledAttributeMask |= 1 << inputReg; + + auto& attr = accel.attributeInfo[inputReg]; + attr.componentCount = size; + attr.offset = attributeOffset + loaderOffset; + attr.stride = loaderData.size; + attr.type = attribType; + attributeOffset += size << sizeShiftPerComponent[attribType]; + } + + loaderOffset += loader.size; + } + + u32 fixedAttributes = fixedAttribMask; + accel.fixedAttributes = 0; + + // Fetch values for all fixed attributes using CLZ on the fixed attribute mask to find the attributes that are actually fixed + while (fixedAttributes != 0) { + // Get index of next fixed attribute and turn it off + const u32 index = std::countr_zero(fixedAttributes); + const u32 mask = 1u << index; + fixedAttributes ^= mask; + + // PICA register this fixed attribute is meant to go to + const u32 inputReg = (inputAttrCfg >> (index * 4)) & 0xf; + const u32 inputRegMask = 1u << inputReg; + + // If this input reg is already used for a non-fixed attribute then it will not be replaced by a fixed attribute + if ((accel.enabledAttributeMask & inputRegMask) == 0) { + vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[index]; + auto& attr = accel.attributeInfo[inputReg]; + + accel.fixedAttributes |= inputRegMask; + + for (int i = 0; i < 4; i++) { + attr.fixedValue[i] = fixedAttr[i].toFloat32(); + } + } + } + + accel.canBeAccelerated = true; +} diff --git a/src/core/PICA/dynapica/shader_rec.cpp b/src/core/PICA/dynapica/shader_rec.cpp index 20e171d7..e3c13c1e 100644 --- a/src/core/PICA/dynapica/shader_rec.cpp +++ b/src/core/PICA/dynapica/shader_rec.cpp @@ -16,7 +16,7 @@ void ShaderJIT::prepare(PICAShader& shaderUnit) { auto it = cache.find(hash); if (it == cache.end()) { // Block has not been compiled yet - auto emitter = std::make_unique(); + auto emitter = std::make_unique(accurateMul); emitter->compile(shaderUnit); // Get pointer to callbacks entrypointCallback = emitter->getInstructionCallback(shaderUnit.entrypoint); diff --git a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp index 15200e76..296ec932 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp @@ -7,9 +7,6 @@ using namespace Helpers; using namespace oaknut; using namespace oaknut::util; -// TODO: Expose safe/unsafe optimizations to the user -constexpr bool useSafeMUL = true; - // Similar to the x64 recompiler, we use an odd internal ABI, which abuses the fact that we'll very rarely be calling C++ functions // So to avoid pushing and popping, we'll be making use of volatile registers as much as possible static constexpr QReg src1Vec = Q1; @@ -491,7 +488,7 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) { // Now do a full DP4 // Do a piecewise multiplication of the vectors first - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -518,7 +515,7 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) { loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor); // Do a piecewise multiplication of the vectors first - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -551,7 +548,7 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { // Now perform a DP4 // Do a piecewise multiplication of the vectors first - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -834,7 +831,7 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) { loadRegister<1>(src1Vec, shader, src1, idx, operandDescriptor); loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor); - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -907,7 +904,7 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) { loadRegister<2>(src2Vec, shader, src2, isMADI ? 0 : idx, operandDescriptor); loadRegister<3>(src3Vec, shader, src3, isMADI ? idx : 0, operandDescriptor); - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); FADD(src3Vec.S4(), src3Vec.S4(), src1Vec.S4()); } else { diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp index c134b72f..ddec3a36 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp @@ -45,6 +45,16 @@ void ShaderEmitter::compile(const PICAShader& shaderUnit) { L(onesVector); dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); // 1.0 4 times + if (useSafeMUL) { + // When doing safe mul, we need a vector to set only the w component to 0 for DP3 + L(dp3Vector); + + dd(0xFFFFFFFF); + dd(0xFFFFFFFF); + dd(0xFFFFFFFF); + dd(0); + } + // Emit prologue first align(16); prologueCb = getCurr(); @@ -360,12 +370,11 @@ void ShaderEmitter::storeRegister(Xmm source, const PICAShader& shader, u32 dest } else if (haveSSE4_1) { // Bit reverse the write mask because that is what blendps expects u32 adjustedMask = ((writeMask >> 3) & 0b1) | ((writeMask >> 1) & 0b10) | ((writeMask << 1) & 0b100) | ((writeMask << 3) & 0b1000); - // Don't accidentally overwrite scratch1 if that is what we're writing derp - Xmm temp = (source == scratch1) ? scratch2 : scratch1; - movaps(temp, xword[statePointer + offset]); // Read current value of dest - blendps(temp, source, adjustedMask); // Blend with source - movaps(xword[statePointer + offset], temp); // Write back + // Blend current value of dest with source. We have to invert the bits of the mask, as we do blendps source, dest instead of dest, source + // Note: This destroys source + blendps(source, xword[statePointer + offset], adjustedMask ^ 0xF); + movaps(xword[statePointer + offset], source); // Write back } else { // Blend algo referenced from Citra const u8 selector = (((writeMask & 0b1000) ? 1 : 0) << 0) | @@ -523,24 +532,60 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) { const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); - dpps(src1_xmm, src2_xmm, 0b01111111); // 3-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + + if (!useSafeMUL) { + dpps(src1_xmm, src2_xmm, 0b01111111); + } else { + const u32 writeMask = operandDescriptor & 0xf; + + // Set w component to 0 and do a DP4 + andps(src1_xmm, xword[rip + dp3Vector]); + + // Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + haddps(src1_xmm, src1_xmm); + haddps(src1_xmm, src1_xmm); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) { const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; const u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); - dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + + if (!useSafeMUL) { + // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + dpps(src1_xmm, src2_xmm, 0b11111111); + } else { + const u32 writeMask = operandDescriptor & 0xf; + + // Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + haddps(src1_xmm, src1_xmm); + haddps(src1_xmm, src1_xmm); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } @@ -553,7 +598,6 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, isDPHI ? 0 : idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, isDPHI ? idx : 0, operandDescriptor); @@ -566,7 +610,25 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { unpcklpd(src1_xmm, scratch1); } - dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + // Now perform a DP4 + if (!useSafeMUL) { + // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + dpps(src1_xmm, src2_xmm, 0b11111111); + } else { + const u32 writeMask = operandDescriptor & 0xf; + + // Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + haddps(src1_xmm, src1_xmm); + haddps(src1_xmm, src1_xmm); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } @@ -603,10 +665,15 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) { const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); - mulps(src1_xmm, src2_xmm); + + if (!useSafeMUL) { + mulps(src1_xmm, src2_xmm); + } else { + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } @@ -662,23 +729,31 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) { loadRegister<2>(src2_xmm, shader, src2, isMADI ? 0 : idx, operandDescriptor); loadRegister<3>(src3_xmm, shader, src3, isMADI ? idx : 0, operandDescriptor); - // TODO: Implement safe PICA mul // If we have FMA3, optimize MAD to use FMA - if (haveFMA3) { - vfmadd213ps(src1_xmm, src2_xmm, src3_xmm); - storeRegister(src1_xmm, shader, dest, operandDescriptor); - } - - // If we don't have FMA3, do a multiplication and addition - else { - // Multiply src1 * src2 - if (haveAVX) { - vmulps(scratch1, src1_xmm, src2_xmm); - } else { - movaps(scratch1, src1_xmm); - mulps(scratch1, src2_xmm); + if (!useSafeMUL) { + if (haveFMA3) { + vfmadd213ps(src1_xmm, src2_xmm, src3_xmm); + storeRegister(src1_xmm, shader, dest, operandDescriptor); } + // If we don't have FMA3, do a multiplication and addition + else { + // Multiply src1 * src2 + if (haveAVX) { + vmulps(scratch1, src1_xmm, src2_xmm); + } else { + movaps(scratch1, src1_xmm); + mulps(scratch1, src2_xmm); + } + + // Add src3 + addps(scratch1, src3_xmm); + storeRegister(scratch1, shader, dest, operandDescriptor); + } + } else { + movaps(scratch1, src1_xmm); + emitSafeMUL(scratch1, src2_xmm, src1_xmm); + // Add src3 addps(scratch1, src3_xmm); storeRegister(scratch1, shader, dest, operandDescriptor); @@ -1115,6 +1190,41 @@ Xbyak::Label ShaderEmitter::emitLog2Func() { return subroutine; } +void ShaderEmitter::emitSafeMUL(Xmm src1, Xmm src2, Xmm scratch) { + // 0 * inf and inf * 0 in the PICA should return 0 instead of NaN + // This can be done by checking for NaNs before and after a multiplication + // To do this we can create a mask of which components of src1/src2 are NOT NaN using cmpordsps (cmpps with imm = 7) + // Then we multiply src1 and src2 and reate a mask of which components of the result ARE NaN using cmpunordps + // If the NaNs didn't exist (ie they were created by 0 * inf) before then we set them to 0 by XORing the 2 masks and ANDing the multiplication + // result with the xor result + // Based on Citra implementation, particularly the AVX-512 version + + if (cpuCaps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) { + const Xbyak::Opmask zeroMask = k1; + + vmulps(scratch, src1, src2); + // Mask of any NaN values found in the result + vcmpunordps(zeroMask, scratch, scratch); + // Mask of any non-NaN inputs producing NaN results + vcmpordps(zeroMask | zeroMask, src1, src2); + + knotb(zeroMask, zeroMask); + vmovaps(src1 | zeroMask | T_z, scratch); + } else { + if (haveAVX) { + vcmpordps(scratch, src1, src2); + } else { + movaps(scratch, src1); + cmpordps(scratch, src2); + } + + mulps(src1, src2); + cmpunordps(src2, src1); + xorps(src2, scratch); + andps(src1, src2); + } +} + Xbyak::Label ShaderEmitter::emitExp2Func() { Xbyak::Label subroutine; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index a777d0a3..838d3fb3 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -15,6 +15,9 @@ #ifdef PANDA3DS_ENABLE_VULKAN #include "renderer_vk/renderer_vk.hpp" #endif +#ifdef PANDA3DS_ENABLE_METAL +#include "renderer_mtl/renderer_mtl.hpp" +#endif constexpr u32 topScreenWidth = 240; constexpr u32 topScreenHeight = 400; @@ -52,22 +55,37 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { renderer.reset(new RendererVK(*this, regs, externalRegs)); break; } +#endif +#ifdef PANDA3DS_ENABLE_METAL + case RendererType::Metal: { + renderer.reset(new RendererMTL(*this, regs, externalRegs)); + break; + } #endif default: { Helpers::panic("Rendering backend not supported: %s", Renderer::typeToString(config.rendererType)); break; } } + + if (renderer != nullptr) { + renderer->setConfig(&config); + } } void GPU::reset() { regs.fill(0); shaderUnit.reset(); shaderJIT.reset(); + shaderJIT.setAccurateMul(config.accurateShaderMul); + std::memset(vram, 0, vramSize); lightingLUT.fill(0); lightingLUTDirty = true; + fogLUT.fill(0); + fogLUTDirty = true; + totalAttribCount = 0; fixedAttribMask = 0; fixedAttribIndex = 0; @@ -111,33 +129,59 @@ void GPU::reset() { renderer->reset(); } +static std::array vertices; + // Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter) // And whether we are going to use the shader JIT (second template parameter) void GPU::drawArrays(bool indexed) { - const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled; + PICA::DrawAcceleration accel; - if (indexed) { - if (shaderJITEnabled) - drawArrays(); - else - drawArrays(); + if (config.accelerateShaders) { + // If we are potentially going to use hw shaders, gather necessary to do vertex fetch, index buffering, etc on the GPU + // This includes parsing which vertices to upload, getting pointers to the index buffer data & vertex data, and so on + getAcceleratedDrawInfo(accel, indexed); + } + + const bool hwShaders = renderer->prepareForDraw(shaderUnit, &accel); + + if (hwShaders) { + // Hardware shaders have their own accelerated code path for draws, so they skip everything here + const PICA::PrimType primType = static_cast(Helpers::getBits<8, 2>(regs[PICA::InternalRegs::PrimitiveConfig])); + // Total # of vertices to render + const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; + + // Note: In the hardware shader path the vertices span shouldn't actually be used as the renderer will perform its own attribute fetching + renderer->drawVertices(primType, std::span(vertices).first(vertexCount)); } else { - if (shaderJITEnabled) - drawArrays(); - else - drawArrays(); + const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled; + + if (indexed) { + if (shaderJITEnabled) { + drawArrays(); + } else { + drawArrays(); + } + } else { + if (shaderJITEnabled) { + drawArrays(); + } else { + drawArrays(); + } + } } } -static std::array vertices; - -template +template void GPU::drawArrays() { - if constexpr (useShaderJIT) { + if constexpr (mode == ShaderExecMode::JIT) { shaderJIT.prepare(shaderUnit.vs); + } else if constexpr (mode == ShaderExecMode::Hardware) { + // Hardware shaders have their own accelerated code path for draws, so they're not meant to take this path + Helpers::panic("GPU::DrawArrays: Hardware shaders shouldn't take this path!"); } - setVsOutputMask(regs[PICA::InternalRegs::VertexShaderOutputMask]); + // We can have up to 16 attributes, each one consisting of 4 floats + constexpr u32 maxAttrSizeInFloats = 16 * 4; // Base address for vertex attributes // The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible @@ -147,7 +191,10 @@ void GPU::drawArrays() { // Configures the type of primitive and the number of vertex shader outputs const u32 primConfig = regs[PICA::InternalRegs::PrimitiveConfig]; const PICA::PrimType primType = static_cast(Helpers::getBits<8, 2>(primConfig)); - if (vertexCount > Renderer::vertexBufferSize) Helpers::panic("[PICA] vertexCount > vertexBufferSize"); + if (vertexCount > Renderer::vertexBufferSize) [[unlikely]] { + Helpers::warn("[PICA] vertexCount > vertexBufferSize"); + return; + } if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) || (primType == PICA::PrimType::TriangleStrip && vertexCount < 3) || (primType == PICA::PrimType::TriangleFan && vertexCount < 3)) { @@ -299,8 +346,6 @@ void GPU::drawArrays() { } // Fill the remaining attribute lanes with default parameters (1.0 for alpha/w, 0.0) for everything else - // Corgi does this although I'm not sure if it's actually needed for anything. - // TODO: Find out while (component < 4) { attribute[component] = (component == 3) ? f24::fromFloat32(1.0) : f24::fromFloat32(0.0); component++; @@ -314,13 +359,13 @@ void GPU::drawArrays() { // Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers // Based on the SH_ATTRIBUTES_PERMUTATION registers. - // Ie it might attribute #0 to v2, #1 to v7, etc + // Ie it might map attribute #0 to v2, #1 to v7, etc for (int j = 0; j < totalAttribCount; j++) { const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf; std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f)); } - if constexpr (useShaderJIT) { + if constexpr (mode == ShaderExecMode::JIT) { shaderJIT.run(shaderUnit.vs); } else { shaderUnit.vs.run(); @@ -355,7 +400,7 @@ PICA::Vertex GPU::getImmediateModeVertex() { // Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute shaderUnit.vs.run(); - + // Map shader outputs to fixed function properties const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; for (int i = 0; i < totalShaderOutputs; i++) { diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index baaa2256..4c865d12 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -135,6 +135,21 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } + case FogLUTData0: + case FogLUTData1: + case FogLUTData2: + case FogLUTData3: + case FogLUTData4: + case FogLUTData5: + case FogLUTData6: + case FogLUTData7: { + const uint32_t index = regs[FogLUTIndex] & 0x7F; + fogLUT[index] = value; + fogLUTDirty = true; + regs[FogLUTIndex] = (index + 1) & 0x7F; + break; + } + case LightingLUTData0: case LightingLUTData1: case LightingLUTData2: @@ -234,6 +249,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { // If we've reached 3 verts, issue a draw call // Handle rendering depending on the primitive type if (immediateModeVertIndex == 3) { + renderer->prepareForDraw(shaderUnit, nullptr); renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices); switch (primType) { @@ -285,7 +301,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } case VertexBoolUniform: { - shaderUnit.vs.boolUniform = value & 0xffff; + shaderUnit.vs.uploadBoolUniform(value & 0xffff); break; } @@ -314,9 +330,11 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } + /* TODO: Find out if this actually does anything case VertexShaderTransferEnd: if (value != 0) shaderUnit.vs.finalize(); break; + */ case VertexShaderTransferIndex: shaderUnit.vs.setBufferIndex(value); break; diff --git a/src/core/PICA/shader_decompiler.cpp b/src/core/PICA/shader_decompiler.cpp new file mode 100644 index 00000000..467c4727 --- /dev/null +++ b/src/core/PICA/shader_decompiler.cpp @@ -0,0 +1,832 @@ +#include "PICA/shader_decompiler.hpp" + +#include + +#include +#include + +#include "config.hpp" + +using namespace PICA; +using namespace PICA::ShaderGen; +using namespace Helpers; + +using Function = ControlFlow::Function; +using ExitMode = Function::ExitMode; + +void ControlFlow::analyze(const PICAShader& shader, u32 entrypoint) { + analysisFailed = false; + + const Function* function = addFunction(shader, entrypoint, PICAShader::maxInstructionCount); + if (function == nullptr || function->exitMode != ExitMode::AlwaysEnd) { + analysisFailed = true; + } +} + +// Helpers for merging parallel/series exit methods from Citra +// Merges exit method of two parallel branches. +static ExitMode exitParallel(ExitMode a, ExitMode b) { + if (a == ExitMode::Unknown) { + return b; + } + else if (b == ExitMode::Unknown) { + return a; + } + else if (a == b) { + return a; + } + return ExitMode::Conditional; +} + +// Cascades exit method of two blocks of code. +static ExitMode exitSeries(ExitMode a, ExitMode b) { + assert(a != ExitMode::AlwaysEnd); + + if (a == ExitMode::Unknown) { + return ExitMode::Unknown; + } + + if (a == ExitMode::AlwaysReturn) { + return b; + } + + if (b == ExitMode::Unknown || b == ExitMode::AlwaysEnd) { + return ExitMode::AlwaysEnd; + } + + return ExitMode::Conditional; +} + +ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels) { + // Initialize exit mode to unknown by default, in order to detect things like unending loops + auto [it, inserted] = exitMap.emplace(AddressRange(start, end), ExitMode::Unknown); + // Function has already been analyzed and is in the map so it wasn't added, don't analyze again + if (!inserted) { + return it->second; + } + + // Make sure not to go out of bounds on the shader + for (u32 pc = start; pc < PICAShader::maxInstructionCount && pc != end; pc++) { + const u32 instruction = shader.loadedShader[pc]; + const u32 opcode = instruction >> 26; + + switch (opcode) { + case ShaderOpcodes::JMPC: + case ShaderOpcodes::JMPU: { + const u32 dest = getBits<10, 12>(instruction); + // Register this jump address to our outLabels set + labels.insert(dest); + + // This opens up 2 parallel paths of execution + auto branchTakenExit = analyzeFunction(shader, dest, end, labels); + auto branchNotTakenExit = analyzeFunction(shader, pc + 1, end, labels); + it->second = exitParallel(branchTakenExit, branchNotTakenExit); + return it->second; + } + + case ShaderOpcodes::IFU: + case ShaderOpcodes::IFC: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + + const Function* branchTakenFunc = addFunction(shader, pc + 1, dest); + // Check if analysis of the branch taken func failed and return unknown if it did + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + // Next analyze the not taken func + ExitMode branchNotTakenExitMode = ExitMode::AlwaysReturn; + if (num != 0) { + const Function* branchNotTakenFunc = addFunction(shader, dest, dest + num); + // Check if analysis failed and return unknown if it did + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + branchNotTakenExitMode = branchNotTakenFunc->exitMode; + } + + auto parallel = exitParallel(branchTakenFunc->exitMode, branchNotTakenExitMode); + // Both branches of the if/else end, so there's nothing after the call + if (parallel == ExitMode::AlwaysEnd) { + it->second = parallel; + return it->second; + } else { + ExitMode afterConditional = analyzeFunction(shader, dest + num, end, labels); + ExitMode conditionalExitMode = exitSeries(parallel, afterConditional); + it->second = conditionalExitMode; + return it->second; + } + break; + } + + case ShaderOpcodes::CALL: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + const Function* calledFunction = addFunction(shader, dest, dest + num); + + // Check if analysis of the branch taken func failed and return unknown if it did + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + if (calledFunction->exitMode == ExitMode::AlwaysEnd) { + it->second = ExitMode::AlwaysEnd; + return it->second; + } + + // Exit mode of the remainder of this function, after we return from the callee + const ExitMode postCallExitMode = analyzeFunction(shader, pc + 1, end, labels); + const ExitMode exitMode = exitSeries(calledFunction->exitMode, postCallExitMode); + + it->second = exitMode; + return exitMode; + } + + case ShaderOpcodes::CALLC: + case ShaderOpcodes::CALLU: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + const Function* calledFunction = addFunction(shader, dest, dest + num); + + // Check if analysis of the branch taken func failed and return unknown if it did + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + // Exit mode of the remainder of this function, after we return from the callee + const ExitMode postCallExitMode = analyzeFunction(shader, pc + 1, end, labels); + const ExitMode exitMode = exitSeries(exitParallel(calledFunction->exitMode, ExitMode::AlwaysReturn), postCallExitMode); + + it->second = exitMode; + return exitMode; + } + + case ShaderOpcodes::LOOP: { + u32 dest = getBits<10, 12>(instruction); + const Function* loopFunction = addFunction(shader, pc + 1, dest + 1); + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + if (loopFunction->exitMode == ExitMode::AlwaysEnd) { + it->second = ExitMode::AlwaysEnd; + return it->second; + } + + const ExitMode afterLoop = analyzeFunction(shader, dest + 1, end, labels); + const ExitMode exitMode = exitSeries(loopFunction->exitMode, afterLoop); + it->second = exitMode; + return it->second; + } + + case ShaderOpcodes::END: it->second = ExitMode::AlwaysEnd; return it->second; + default: break; + } + } + + // A function without control flow instructions will always reach its "return point" and return + it->second = ExitMode::AlwaysReturn; + return it->second; +} + +std::pair ShaderDecompiler::compileRange(const AddressRange& range) { + u32 pc = range.start; + const u32 end = range.end >= range.start ? range.end : PICAShader::maxInstructionCount; + bool finished = false; + + while (pc < end && !finished) { + compileInstruction(pc, finished); + } + + return std::make_pair(pc, finished); +} + +const Function* ShaderDecompiler::findFunction(const AddressRange& range) { + for (const Function& func : controlFlow.functions) { + if (range.start == func.start && range.end == func.end) { + return &func; + } + } + + return nullptr; +} + +void ShaderDecompiler::writeAttributes() { + // Annoyingly, GLES does not support having an array as an input attribute, so declare each attribute separately for now + decompiledShader += R"( + layout(location = 0) in vec4 attr0; + layout(location = 1) in vec4 attr1; + layout(location = 2) in vec4 attr2; + layout(location = 3) in vec4 attr3; + layout(location = 4) in vec4 attr4; + layout(location = 5) in vec4 attr5; + layout(location = 6) in vec4 attr6; + layout(location = 7) in vec4 attr7; + layout(location = 8) in vec4 attr8; + layout(location = 9) in vec4 attr9; + layout(location = 10) in vec4 attr10; + layout(location = 11) in vec4 attr11; + layout(location = 12) in vec4 attr12; + layout(location = 13) in vec4 attr13; + layout(location = 14) in vec4 attr14; + layout(location = 15) in vec4 attr15; + + layout(std140) uniform PICAShaderUniforms { + vec4 uniform_f[96]; + uvec4 uniform_i; + uint uniform_bool; + }; + + vec4 temp[16]; + vec4 out_regs[16]; + vec4 dummy_vec = vec4(0.0); + ivec3 addr_reg = ivec3(0); + bvec2 cmp_reg = bvec2(false); + + vec4 uniform_indexed(int source, int offset) { + int clipped_offs = (offset >= -128 && offset <= 127) ? offset : 0; + uint index = uint(clipped_offs + source) & 127u; + return (index < 96u) ? uniform_f[index] : vec4(1.0); + } +)"; +} + +std::string ShaderDecompiler::decompile() { + controlFlow.analyze(shader, entrypoint); + + if (controlFlow.analysisFailed) { + return ""; + } + + compilationError = false; + decompiledShader.clear(); + // Reserve some memory for the shader string to avoid memory allocations + decompiledShader.reserve(256 * 1024); + + switch (api) { + case API::GL: decompiledShader += "#version 410 core\n"; break; + case API::GLES: decompiledShader += "#version 300 es\nprecision mediump float;\nprecision mediump int;\n"; break; + default: break; + } + + writeAttributes(); + + if (config.accurateShaderMul) { + // Safe multiplication handler from Citra: Handles the PICA's 0 * inf = 0 edge case + decompiledShader += R"( + vec4 safe_mul(vec4 a, vec4 b) { + vec4 res = a * b; + return mix(res, mix(mix(vec4(0.0), res, isnan(b)), res, isnan(a)), isnan(res)); + } + )"; + } + + // Forward declare every generated function first so that we can easily call anything from anywhere. + for (auto& func : controlFlow.functions) { + decompiledShader += func.getForwardDecl(); + } + + decompiledShader += "void pica_shader_main() {\n"; + AddressRange mainFunctionRange(entrypoint, PICAShader::maxInstructionCount); + auto mainFunc = findFunction(mainFunctionRange); + + decompiledShader += mainFunc->getCallStatement() + ";\n}\n"; + + for (const Function& func : controlFlow.functions) { + if (func.outLabels.empty()) { + decompiledShader += fmt::format("bool {}() {{\n", func.getIdentifier()); + + auto [pc, finished] = compileRange(AddressRange(func.start, func.end)); + if (!finished) { + decompiledShader += "return false;"; + } + + decompiledShader += "}\n"; + } else { + auto labels = func.outLabels; + labels.insert(func.start); + + // If a function has jumps and "labels", this needs to be emulated using a switch-case, with the variable being switched on being the + // current PC + decompiledShader += fmt::format("bool {}() {{\n", func.getIdentifier()); + decompiledShader += fmt::format("uint pc = {}u;\n", func.start); + decompiledShader += "while(true){\nswitch(pc){\n"; + + for (u32 label : labels) { + decompiledShader += fmt::format("case {}u: {{", label); + // Fetch the next label whose address > label + auto it = labels.lower_bound(label + 1); + u32 next = (it == labels.end()) ? func.end : *it; + + auto [endPC, finished] = compileRange(AddressRange(label, next)); + if (endPC > next && !finished) { + labels.insert(endPC); + decompiledShader += fmt::format("pc = {}u; break;", endPC); + } + + // Fallthrough to next label + decompiledShader += "}\n"; + } + + decompiledShader += "default: return false;\n"; + // Exit the switch and loop + decompiledShader += "} }\n"; + + // Exit the function + decompiledShader += "return false;\n"; + decompiledShader += "}\n"; + } + } + + // We allow some leeway for "compilation errors" in addition to control flow errors, in cases where eg an unimplemented instruction + // or an instruction that we can't emulate in GLSL is found in the instruction stream. Just like control flow errors, these return an empty string + // and the renderer core will decide to use CPU shaders instead + if (compilationError) [[unlikely]] { + return ""; + } + + return decompiledShader; +} + +std::string ShaderDecompiler::getSource(u32 source, [[maybe_unused]] u32 index) const { + if (source < 0x10) { + return "attr" + std::to_string(source); + } else if (source < 0x20) { + return "temp[" + std::to_string(source - 0x10) + "]"; + } else { + const usize floatIndex = (source - 0x20) & 0x7f; + + if (index == 0) { + if (floatIndex >= 96) [[unlikely]] { + return "dummy_vec"; + } + return "uniform_f[" + std::to_string(floatIndex) + "]"; + } else { + static constexpr std::array offsets = {"0", "addr_reg.x", "addr_reg.y", "addr_reg.z"}; + return fmt::format("uniform_indexed({}, {})", floatIndex, offsets[index]); + } + } +} + +std::string ShaderDecompiler::getDest(u32 dest) const { + if (dest < 0x10) { + return "out_regs[" + std::to_string(dest) + "]"; + } else if (dest < 0x20) { + return "temp[" + std::to_string(dest - 0x10) + "]"; + } else { + return "dummy_vec"; + } +} + +std::string ShaderDecompiler::getSwizzlePattern(u32 swizzle) const { + // If the swizzle field is this value then the swizzle pattern is .xyzw so we don't need a shuffle + static constexpr uint noSwizzle = 0x1B; + if (swizzle == noSwizzle) { + return ""; + } + + static constexpr std::array names = {'x', 'y', 'z', 'w'}; + std::string ret(". "); + + for (int i = 0; i < 4; i++) { + ret[3 - i + 1] = names[swizzle & 0x3]; + swizzle >>= 2; + } + + return ret; +} + +std::string ShaderDecompiler::getDestSwizzle(u32 destinationMask) const { + std::string ret = "."; + if (destinationMask & 0b1000) { + ret += "x"; + } + + if (destinationMask & 0b100) { + ret += "y"; + } + + if (destinationMask & 0b10) { + ret += "z"; + } + + if (destinationMask & 0b1) { + ret += "w"; + } + + return ret; +} + +void ShaderDecompiler::setDest(u32 operandDescriptor, const std::string& dest, const std::string& value) { + u32 destinationMask = operandDescriptor & 0xF; + + std::string destSwizzle = getDestSwizzle(destinationMask); + // We subtract 1 for the "." character of the swizzle descriptor + u32 writtenLaneCount = destSwizzle.size() - 1; + + // All lanes are masked out, so the operation is a nop. + if (writtenLaneCount == 0) { + return; + } + + // Don't write destination swizzle if all lanes are getting written to + decompiledShader += fmt::format("{}{} = ", dest, writtenLaneCount == 4 ? "" : destSwizzle); + if (writtenLaneCount <= 3) { + decompiledShader += fmt::format("({}){};\n", value, destSwizzle); + } else if (writtenLaneCount == 4) { + decompiledShader += fmt::format("{};\n", value); + } +} + +void ShaderDecompiler::compileInstruction(u32& pc, bool& finished) { + const u32 instruction = shader.loadedShader[pc]; + const u32 opcode = instruction >> 26; + + if (usesCommonEncoding(instruction)) { + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const bool invertSources = (opcode == ShaderOpcodes::SLTI || opcode == ShaderOpcodes::SGEI || opcode == ShaderOpcodes::DPHI); + + // src1 and src2 indexes depend on whether this is one of the inverting instructions or not + const u32 src1Index = invertSources ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction); + const u32 src2Index = invertSources ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction); + + const u32 idx = getBits<19, 2>(instruction); + const u32 destIndex = getBits<21, 5>(instruction); + + const bool negate1 = (getBit<4>(operandDescriptor)) != 0; + const u32 swizzle1 = getBits<5, 8>(operandDescriptor); + const bool negate2 = (getBit<13>(operandDescriptor)) != 0; + const u32 swizzle2 = getBits<14, 8>(operandDescriptor); + + std::string src1 = negate1 ? "-" : ""; + src1 += getSource(src1Index, invertSources ? 0 : idx); + src1 += getSwizzlePattern(swizzle1); + + std::string src2 = negate2 ? "-" : ""; + src2 += getSource(src2Index, invertSources ? idx : 0); + src2 += getSwizzlePattern(swizzle2); + + std::string dest = getDest(destIndex); + + switch (opcode) { + case ShaderOpcodes::MOV: setDest(operandDescriptor, dest, src1); break; + case ShaderOpcodes::ADD: setDest(operandDescriptor, dest, fmt::format("{} + {}", src1, src2)); break; + case ShaderOpcodes::MUL: + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("{} * {}", src1, src2)); + } else { + setDest(operandDescriptor, dest, fmt::format("safe_mul({}, {})", src1, src2)); + } + break; + case ShaderOpcodes::MAX: setDest(operandDescriptor, dest, fmt::format("max({}, {})", src1, src2)); break; + case ShaderOpcodes::MIN: setDest(operandDescriptor, dest, fmt::format("min({}, {})", src1, src2)); break; + + case ShaderOpcodes::DP3: + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("vec4(dot({}.xyz, {}.xyz))", src1, src2)); + } else { + // A dot product between a and b is equivalent to the per-lane multiplication of a and b followed by a dot product with vec3(1.0) + setDest(operandDescriptor, dest, fmt::format("vec4(dot(safe_mul({}, {}).xyz, vec3(1.0)))", src1, src2)); + } + break; + case ShaderOpcodes::DP4: + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("vec4(dot({}, {}))", src1, src2)); + } else { + // A dot product between a and b is equivalent to the per-lane multiplication of a and b followed by a dot product with vec4(1.0) + setDest(operandDescriptor, dest, fmt::format("vec4(dot(safe_mul({}, {}), vec4(1.0)))", src1, src2)); + } + break; + case ShaderOpcodes::FLR: setDest(operandDescriptor, dest, fmt::format("floor({})", src1)); break; + case ShaderOpcodes::RSQ: setDest(operandDescriptor, dest, fmt::format("vec4(inversesqrt({}.x))", src1)); break; + case ShaderOpcodes::RCP: setDest(operandDescriptor, dest, fmt::format("vec4(1.0 / {}.x)", src1)); break; + case ShaderOpcodes::LG2: setDest(operandDescriptor, dest, fmt::format("vec4(log2({}.x))", src1)); break; + case ShaderOpcodes::EX2: setDest(operandDescriptor, dest, fmt::format("vec4(exp2({}.x))", src1)); break; + + case ShaderOpcodes::SLT: + case ShaderOpcodes::SLTI: setDest(operandDescriptor, dest, fmt::format("vec4(lessThan({}, {}))", src1, src2)); break; + + case ShaderOpcodes::SGE: + case ShaderOpcodes::SGEI: setDest(operandDescriptor, dest, fmt::format("vec4(greaterThanEqual({}, {}))", src1, src2)); break; + + case ShaderOpcodes::DPH: + case ShaderOpcodes::DPHI: + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("vec4(dot(vec4({}.xyz, 1.0), {}))", src1, src2)); + } else { + // A dot product between a and b is equivalent to the per-lane multiplication of a and b followed by a dot product with vec4(1.0) + setDest(operandDescriptor, dest, fmt::format("vec4(dot(safe_mul(vec4({}.xyz, 1.0), {}), vec4(1.0)))", src1, src2)); + } + break; + + case ShaderOpcodes::CMP1: + case ShaderOpcodes::CMP2: { + static constexpr std::array operators = { + // The last 2 operators always return true and are handled specially + "==", "!=", "<", "<=", ">", ">=", "", "", + }; + + const u32 cmpY = getBits<21, 3>(instruction); + const u32 cmpX = getBits<24, 3>(instruction); + + // Compare x first + if (cmpX >= 6) { + decompiledShader += "cmp_reg.x = true;\n"; + } else { + decompiledShader += fmt::format("cmp_reg.x = {}.x {} {}.x;\n", src1, operators[cmpX], src2); + } + + // Then compare Y + if (cmpY >= 6) { + decompiledShader += "cmp_reg.y = true;\n"; + } else { + decompiledShader += fmt::format("cmp_reg.y = {}.y {} {}.y;\n", src1, operators[cmpY], src2); + } + break; + } + + case ShaderOpcodes::MOVA: { + const bool writeX = getBit<3>(operandDescriptor); // Should we write the x component of the address register? + const bool writeY = getBit<2>(operandDescriptor); + + if (writeX && writeY) { + decompiledShader += fmt::format("addr_reg.xy = ivec2({}.xy);\n", src1); + } else if (writeX) { + decompiledShader += fmt::format("addr_reg.x = int({}.x);\n", src1); + } else if (writeY) { + decompiledShader += fmt::format("addr_reg.y = int({}.y);\n", src1); + } + break; + } + + default: + Helpers::warn("GLSL recompiler: Unknown common opcode: %02X. Falling back to CPU shaders", opcode); + compilationError = true; + break; + } + } else if (opcode >= 0x30 && opcode <= 0x3F) { // MAD and MADI + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x1f]; + const bool isMADI = getBit<29>(instruction) == 0; // We detect MADI based on bit 29 of the instruction + + // src1 and src2 indexes depend on whether this is one of the inverting instructions or not + const u32 src1Index = getBits<17, 5>(instruction); + const u32 src2Index = isMADI ? getBits<12, 5>(instruction) : getBits<10, 7>(instruction); + const u32 src3Index = isMADI ? getBits<5, 7>(instruction) : getBits<5, 5>(instruction); + const u32 idx = getBits<22, 2>(instruction); + const u32 destIndex = getBits<24, 5>(instruction); + + const bool negate1 = (getBit<4>(operandDescriptor)) != 0; + const u32 swizzle1 = getBits<5, 8>(operandDescriptor); + const bool negate2 = (getBit<13>(operandDescriptor)) != 0; + const u32 swizzle2 = getBits<14, 8>(operandDescriptor); + + const bool negate3 = (getBit<22>(operandDescriptor)) != 0; + const u32 swizzle3 = getBits<23, 8>(operandDescriptor); + + std::string src1 = negate1 ? "-" : ""; + src1 += getSource(src1Index, 0); + src1 += getSwizzlePattern(swizzle1); + + std::string src2 = negate2 ? "-" : ""; + src2 += getSource(src2Index, isMADI ? 0 : idx); + src2 += getSwizzlePattern(swizzle2); + + std::string src3 = negate3 ? "-" : ""; + src3 += getSource(src3Index, isMADI ? idx : 0); + src3 += getSwizzlePattern(swizzle3); + + std::string dest = getDest(destIndex); + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("{} * {} + {}", src1, src2, src3)); + } else { + setDest(operandDescriptor, dest, fmt::format("safe_mul({}, {}) + {}", src1, src2, src3)); + } + } else { + switch (opcode) { + case ShaderOpcodes::JMPC: { + const u32 dest = getBits<10, 12>(instruction); + const u32 condOp = getBits<22, 2>(instruction); + const uint refY = getBit<24>(instruction); + const uint refX = getBit<25>(instruction); + const char* condition = getCondition(condOp, refX, refY); + + decompiledShader += fmt::format("if ({}) {{ pc = {}u; break; }}\n", condition, dest); + break; + } + + case ShaderOpcodes::JMPU: { + const u32 dest = getBits<10, 12>(instruction); + const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check + const u32 mask = 1u << bit; + const u32 test = (instruction & 1) ^ 1; // If the LSB is 0 we jump if bit = 1, otherwise 0 + + decompiledShader += fmt::format("if ((uniform_bool & {}u) {} 0u) {{ pc = {}u; break; }}\n", mask, (test != 0) ? "!=" : "==", dest); + break; + } + + case ShaderOpcodes::IFU: + case ShaderOpcodes::IFC: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + const Function* conditionalFunc = findFunction(AddressRange(pc + 1, dest)); + + if (opcode == ShaderOpcodes::IFC) { + const u32 condOp = getBits<22, 2>(instruction); + const uint refY = getBit<24>(instruction); + const uint refX = getBit<25>(instruction); + const char* condition = getCondition(condOp, refX, refY); + + decompiledShader += fmt::format("if ({}) {{", condition); + } else { + const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check + const u32 mask = 1u << bit; + + decompiledShader += fmt::format("if ((uniform_bool & {}u) != 0u) {{", mask); + } + + callFunction(*conditionalFunc); + decompiledShader += "}\n"; + + pc = dest; + if (num > 0) { + const Function* elseFunc = findFunction(AddressRange(dest, dest + num)); + pc = dest + num; + + decompiledShader += "else { "; + callFunction(*elseFunc); + decompiledShader += "}\n"; + + if (conditionalFunc->exitMode == ExitMode::AlwaysEnd && elseFunc->exitMode == ExitMode::AlwaysEnd) { + finished = true; + return; + } + } + + return; + } + + case ShaderOpcodes::CALL: + case ShaderOpcodes::CALLC: + case ShaderOpcodes::CALLU: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + const Function* calledFunc = findFunction(AddressRange(dest, dest + num)); + + // Handle conditions for CALLC/CALLU + if (opcode == ShaderOpcodes::CALLC) { + const u32 condOp = getBits<22, 2>(instruction); + const uint refY = getBit<24>(instruction); + const uint refX = getBit<25>(instruction); + const char* condition = getCondition(condOp, refX, refY); + + decompiledShader += fmt::format("if ({}) {{", condition); + } else if (opcode == ShaderOpcodes::CALLU) { + const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check + const u32 mask = 1u << bit; + + decompiledShader += fmt::format("if ((uniform_bool & {}u) != 0u) {{", mask); + } + + callFunction(*calledFunc); + + // Close brackets for CALLC/CALLU + if (opcode != ShaderOpcodes::CALL) { + decompiledShader += "}"; + } + + if (opcode == ShaderOpcodes::CALL && calledFunc->exitMode == ExitMode::AlwaysEnd) { + finished = true; + return; + } + break; + } + + case ShaderOpcodes::LOOP: { + const u32 dest = getBits<10, 12>(instruction); + const u32 uniformIndex = getBits<22, 2>(instruction); + + // loop counter = uniform.y + decompiledShader += fmt::format("addr_reg.z = int((uniform_i[{}] >> 8u) & 0xFFu);\n", uniformIndex); + decompiledShader += fmt::format( + "for (uint loopCtr{} = 0u; loopCtr{} <= (uniform_i[{}] & 0xFFu); loopCtr{}++, addr_reg.z += int((uniform_i[{}] >> " + "16u) & 0xFFu)) {{\n", + pc, pc, uniformIndex, pc, uniformIndex + ); + + AddressRange range(pc + 1, dest + 1); + const Function* func = findFunction(range); + callFunction(*func); + decompiledShader += "}\n"; + + // Jump to the end of the loop. We don't want to compile the code inside the loop again. + // This will be incremented by 1 due to the pc++ at the end of this loop. + pc = dest; + + if (func->exitMode == ExitMode::AlwaysEnd) { + finished = true; + return; + } + break; + } + + case ShaderOpcodes::END: + decompiledShader += "return true;\n"; + finished = true; + return; + + case ShaderOpcodes::NOP: break; + + default: + Helpers::warn("GLSL recompiler: Unknown opcode: %02X. Falling back to CPU shaders", opcode); + compilationError = true; + break; + } + } + + pc++; +} + +bool ShaderDecompiler::usesCommonEncoding(u32 instruction) const { + const u32 opcode = instruction >> 26; + switch (opcode) { + case ShaderOpcodes::ADD: + case ShaderOpcodes::CMP1: + case ShaderOpcodes::CMP2: + case ShaderOpcodes::MUL: + case ShaderOpcodes::MIN: + case ShaderOpcodes::MAX: + case ShaderOpcodes::FLR: + case ShaderOpcodes::DP3: + case ShaderOpcodes::DP4: + case ShaderOpcodes::DPH: + case ShaderOpcodes::DPHI: + case ShaderOpcodes::LG2: + case ShaderOpcodes::EX2: + case ShaderOpcodes::RCP: + case ShaderOpcodes::RSQ: + case ShaderOpcodes::MOV: + case ShaderOpcodes::MOVA: + case ShaderOpcodes::SLT: + case ShaderOpcodes::SLTI: + case ShaderOpcodes::SGE: + case ShaderOpcodes::SGEI: + case ShaderOpcodes::LITP: return true; + + default: return false; + } +} + +void ShaderDecompiler::callFunction(const Function& function) { + switch (function.exitMode) { + // This function always ends, so call it and return true to signal that we're gonna be ending the shader + case ExitMode::AlwaysEnd: decompiledShader += function.getCallStatement() + ";\nreturn true;\n"; break; + // This function will potentially end. Call it, see if it returns that it ended, and return that we're ending if it did + case ExitMode::Conditional: decompiledShader += fmt::format("if ({}) {{ return true; }}\n", function.getCallStatement()); break; + // This function will not end. Just call it like a normal function. + default: decompiledShader += function.getCallStatement() + ";\n"; break; + } +} + +std::string ShaderGen::decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) { + ShaderDecompiler decompiler(shader, config, entrypoint, api, language); + + return decompiler.decompile(); +} + +const char* ShaderDecompiler::getCondition(u32 cond, u32 refX, u32 refY) { + static constexpr std::array conditions = { + // ref(Y, X) = (0, 0) + "!all(cmp_reg)", + "all(not(cmp_reg))", + "!cmp_reg.x", + "!cmp_reg.y", + + // ref(Y, X) = (0, 1) + "cmp_reg.x || !cmp_reg.y", + "cmp_reg.x && !cmp_reg.y", + "cmp_reg.x", + "!cmp_reg.y", + + // ref(Y, X) = (1, 0) + "!cmp_reg.x || cmp_reg.y", + "!cmp_reg.x && cmp_reg.y", + "!cmp_reg.x", + "cmp_reg.y", + + // ref(Y, X) = (1, 1) + "any(cmp_reg)", + "all(cmp_reg)", + "cmp_reg.x", + "cmp_reg.y", + }; + const u32 key = (cond & 0b11) | (refX << 2) | (refY << 3); + + return conditions[key]; +} diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp new file mode 100644 index 00000000..a9ad3a90 --- /dev/null +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -0,0 +1,839 @@ +#include + +#include + +#include "PICA/pica_frag_config.hpp" +#include "PICA/regs.hpp" +#include "PICA/shader_gen.hpp" + +// We can include the driver headers here since they shouldn't have any actual API-specific code +#include "renderer_gl/gl_driver.hpp" + +using namespace PICA; +using namespace PICA::ShaderGen; + +// Note: We upload global ambient and fog colour as u32 and decode on the GPU +// This shouldn't matter much for GPU performance, especially fog since it's relatively rare +static constexpr const char* uniformDefinition = R"( + struct LightSource { + vec3 specular0; + vec3 specular1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spotlightDirection; + float distanceAttenuationBias; + float distanceAttenuationScale; + }; + + layout(std140) uniform FragmentUniforms { + int alphaReference; + float depthScale; + float depthOffset; + + vec4 constantColors[6]; + vec4 tevBufferColor; + vec4 clipCoords; + uint globalAmbientLight; + uint inFogColor; + LightSource lightSources[8]; + }; +)"; + +std::string FragmentGenerator::getDefaultVertexShader() { + std::string ret = ""; + // Reserve some space (128KB) in the output string to avoid too many allocations later + ret.reserve(128 * 1024); + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } + + ret += uniformDefinition; + + ret += R"( + layout(location = 0) in vec4 a_coords; + layout(location = 1) in vec4 a_quaternion; + layout(location = 2) in vec4 a_vertexColour; + layout(location = 3) in vec2 a_texcoord0; + layout(location = 4) in vec2 a_texcoord1; + layout(location = 5) in float a_texcoord0_w; + layout(location = 6) in vec3 a_view; + layout(location = 7) in vec2 a_texcoord2; + + out vec4 v_quaternion; + out vec4 v_colour; + out vec3 v_texcoord0; + out vec2 v_texcoord1; + out vec3 v_view; + out vec2 v_texcoord2; + + #ifndef USING_GLES + out float gl_ClipDistance[2]; + #endif + + void main() { + gl_Position = a_coords; + vec4 colourAbs = abs(a_vertexColour); + v_colour = min(colourAbs, vec4(1.f)); + + v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); + v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); + v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + v_quaternion = a_quaternion; + + #ifndef USING_GLES + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipCoords, a_coords); + #endif + } +)"; + + return ret; +} + +std::string FragmentGenerator::generate(const FragmentConfig& config, void* driverInfo) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + // For GLES we need to enable & use the framebuffer fetch extension in order to emulate logic ops + bool emitLogicOps = api == API::GLES && config.outConfig.logicOpMode != PICA::LogicOpMode::Copy && driverInfo != nullptr; + + if (emitLogicOps) { + auto driver = static_cast(driverInfo); + + // If the driver does not support framebuffer fetch at all, don't emit logic op code + if (!driver->supportFbFetch()) { + emitLogicOps = false; + } + + // Figure out which fb fetch extension we have and enable it + else { + if (driver->supportsExtFbFetch) { + ret += "\n#extension GL_EXT_shader_framebuffer_fetch : enable\n#define fb_color fragColor\n"; + } else if (driver->supportsArmFbFetch) { + ret += "\n#extension GL_ARM_shader_framebuffer_fetch : enable\n#define fb_color gl_LastFragColorARM[0]\n"; + } + } + } + + bool unimplementedFlag = false; + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + #define fma(a, b, c) ((a) * (b) + (c)) + + precision mediump int; + precision mediump float; + )"; + } + + // Input and output attributes + ret += R"( + in vec4 v_quaternion; + in vec4 v_colour; + in vec3 v_texcoord0; + in vec2 v_texcoord1; + in vec3 v_view; + in vec2 v_texcoord2; + + out vec4 fragColor; + uniform sampler2D u_tex0; + uniform sampler2D u_tex1; + uniform sampler2D u_tex2; + uniform sampler2D u_tex_luts; + )"; + + ret += uniformDefinition; + + if (config.lighting.enable) { + ret += R"( + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); + } + + float lutLookup(uint lut, int index) { + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; + } + + vec3 regToColor(uint reg) { + return (1.0 / 255.0) * vec3(float((reg >> 20u) & 0xFFu), float((reg >> 10u) & 0xFFu), float(reg & 0xFFu)); + } + )"; + } + + // Emit main function for fragment shader + // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour + ret += R"( + void main() { + vec4 combinerOutput = v_colour; + vec4 previousBuffer = vec4(0.0); + vec4 tevNextPreviousBuffer = tevBufferColor; + + vec4 primaryColor = vec4(0.0); + vec4 secondaryColor = vec4(0.0); + )"; + + compileLights(ret, config); + + ret += R"( + vec3 colorOp1 = vec3(0.0); + vec3 colorOp2 = vec3(0.0); + vec3 colorOp3 = vec3(0.0); + + float alphaOp1 = 0.0; + float alphaOp2 = 0.0; + float alphaOp3 = 0.0; + )"; + + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] + // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] + ret += R"( + float z_over_w = gl_FragCoord.z * 2.0f - 1.0f; + float depth = z_over_w * depthScale + depthOffset; + )"; + + if (!config.outConfig.depthMapEnable) { + ret += "depth /= gl_FragCoord.w;\n"; + } + + ret += "gl_FragDepth = depth;\n"; + + for (int i = 0; i < 6; i++) { + compileTEV(ret, i, config); + } + + compileFog(ret, config); + applyAlphaTest(ret, config); + + if (!emitLogicOps) { + ret += "fragColor = combinerOutput;\n}"; // End of main function + } else { + compileLogicOps(ret, config); + } + + return ret; +} + +void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config) { + const u32* tevValues = config.texConfig.tevConfigs.data() + stage * 4; + + // Pass a 0 to constColor here, as it doesn't matter for compilation + TexEnvConfig tev(tevValues[0], tevValues[1], tevValues[2], 0, tevValues[3]); + + if (!tev.isPassthroughStage()) { + // Get color operands + shader += "colorOp1 = "; + getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage, config); + + shader += ";\ncolorOp2 = "; + getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage, config); + + shader += ";\ncolorOp3 = "; + getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage, config); + + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp("; + getColorOperation(shader, tev.colorOp); + shader += ", vec3(0.0), vec3(1.0));\n"; + + if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { + // Dot3 RGBA also writes to the alpha component so we don't need to do anything more + shader += "float outputAlpha" + std::to_string(stage) + " = outputColor" + std::to_string(stage) + ".x;\n"; + } else { + // Get alpha operands + shader += "alphaOp1 = "; + getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage, config); + + shader += ";\nalphaOp2 = "; + getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage, config); + + shader += ";\nalphaOp3 = "; + getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage, config); + + shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp("; + getAlphaOperation(shader, tev.alphaOp); + // Clamp the alpha value to [0.0, 1.0] + shader += ", 0.0, 1.0);\n"; + } + + shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + + ".0, 0.0, 1.0));\n"; + } + + shader += "previousBuffer = tevNextPreviousBuffer;\n\n"; + + // Update the "next previous buffer" if necessary + const u32 textureEnvUpdateBuffer = config.texConfig.texEnvUpdateBuffer; + if (stage < 4) { + // Check whether to update rgb + if ((textureEnvUpdateBuffer & (0x100 << stage))) { + shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n"; + } + + // And whether to update alpha + if ((textureEnvUpdateBuffer & (0x1000u << stage))) { + shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n"; + } + } +} + +void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config) { + using OperandType = TexEnvConfig::ColorOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceColor || color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || + color == OperandType::OneMinusSourceBlue || color == OperandType::OneMinusSourceAlpha) { + shader += "vec3(1.0, 1.0, 1.0) - "; + } + + switch (color) { + case OperandType::SourceColor: + case OperandType::OneMinusSourceColor: + getSource(shader, source, index, config); + shader += ".rgb"; + break; + + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index, config); + shader += ".rrr"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index, config); + shader += ".ggg"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index, config); + shader += ".bbb"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index, config); + shader += ".aaa"; + break; + + default: + shader += "vec3(1.0, 1.0, 1.0)"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index, const PICA::FragmentConfig& config) { + using OperandType = TexEnvConfig::AlphaOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || color == OperandType::OneMinusSourceBlue || + color == OperandType::OneMinusSourceAlpha) { + shader += "1.0 - "; + } + + switch (color) { + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index, config); + shader += ".r"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index, config); + shader += ".g"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index, config); + shader += ".b"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index, config); + shader += ".a"; + break; + + default: + shader += "1.0"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config) { + switch (source) { + case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; + case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break; + case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break; + case TexEnvConfig::Source::Texture2: { + // If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2 + if (Helpers::getBit<13>(config.texConfig.texUnitConfig)) { + shader += "texture(u_tex2, v_texcoord1)"; + } else { + shader += "texture(u_tex2, v_texcoord2)"; + } + break; + } + + case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; + case TexEnvConfig::Source::Constant: shader += "constantColors[" + std::to_string(index) + "]"; break; + case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; + + // Lighting + case TexEnvConfig::Source::PrimaryFragmentColor: shader += "primaryColor"; break; + case TexEnvConfig::Source::SecondaryFragmentColor: shader += "secondaryColor"; break; + + default: + Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); + shader += "vec4(1.0, 1.0, 1.0, 1.0)"; + break; + } +} + +void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break; + case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "colorOp1 + colorOp2 - vec3(0.5)"; break; + case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(colorOp2, colorOp1, colorOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(colorOp1, colorOp2, colorOp3)"; break; + case TexEnvConfig::Operation::Dot3RGB: + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - vec3(0.5), colorOp2 - vec3(0.5)))"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented color op"); + shader += "vec3(1.0)"; + break; + } +} + +void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break; + case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "alphaOp1 + alphaOp2 - 0.5"; break; + case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(alphaOp2, alphaOp1, alphaOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(alphaOp1, alphaOp2, alphaOp3)"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented alpha op"); + shader += "1.0"; + break; + } +} + +void FragmentGenerator::applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config) { + const CompareFunction function = config.outConfig.alphaTestFunction; + + // Alpha test disabled + if (function == CompareFunction::Always) { + return; + } + + shader += "int testingAlpha = int(combinerOutput.a * 255.0);\n"; + shader += "if ("; + switch (function) { + case CompareFunction::Never: shader += "true"; break; + case CompareFunction::Always: shader += "false"; break; + case CompareFunction::Equal: shader += "testingAlpha != alphaReference"; break; + case CompareFunction::NotEqual: shader += "testingAlpha == alphaReference"; break; + case CompareFunction::Less: shader += "testingAlpha >= alphaReference"; break; + case CompareFunction::LessOrEqual: shader += "testingAlpha > alphaReference"; break; + case CompareFunction::Greater: shader += "testingAlpha <= alphaReference"; break; + case CompareFunction::GreaterOrEqual: shader += "testingAlpha < alphaReference"; break; + + default: + Helpers::warn("Unimplemented alpha test function"); + shader += "false"; + break; + } + + shader += ") { discard; }\n"; +} + +void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) { + if (!config.lighting.enable) { + return; + } + + // Currently ignore bump mode + shader += "vec3 normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);\n"; + shader += R"( + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec3 light_position, light_vector, half_vector, specular0, specular1, reflected_color; + + float light_distance, NdotL, light_factor, geometric_factor, distance_attenuation, distance_att_delta; + float spotlight_attenuation, specular0_dist, specular1_dist; + float lut_lookup_result, lut_lookup_delta; + int lut_lookup_index; + )"; + + uint lightID = 0; + + for (int i = 0; i < config.lighting.lightNum; i++) { + lightID = config.lighting.lights[i].num; + + const auto& lightConfig = config.lighting.lights[i]; + shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n"; + + if (lightConfig.directional) { // Directional lighting + shader += "light_vector = light_position;\n"; + } else { // Positional lighting + shader += "light_vector = light_position + v_view;\n"; + } + + shader += R"( + light_distance = length(light_vector); + light_vector = normalize(light_vector); + half_vector = light_vector + normalize(v_view); + + distance_attenuation = 1.0; + NdotL = dot(normal, light_vector); + )"; + + shader += lightConfig.twoSidedDiffuse ? "NdotL = abs(NdotL);\n" : "NdotL = max(NdotL, 0.0);\n"; + + if (lightConfig.geometricFactor0 || lightConfig.geometricFactor1) { + shader += R"( + geometric_factor = dot(half_vector, half_vector); + geometric_factor = (geometric_factor == 0.0) ? 0.0 : min(NdotL / geometric_factor, 1.0); + )"; + } + + if (lightConfig.distanceAttenuationEnable) { + shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(lightID) + + "].distanceAttenuationScale + lightSources[" + std::to_string(lightID) + "].distanceAttenuationBias, 0.0, 1.0);\n"; + + shader += "distance_attenuation = lutLookup(" + std::to_string(16 + lightID) + + "u, int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n"; + } + + compileLUTLookup(shader, config, i, spotlightLutIndex); + shader += "spotlight_attenuation = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, i, PICA::Lights::LUT_D0); + shader += "specular0_dist = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, i, PICA::Lights::LUT_D1); + shader += "specular1_dist = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RR); + shader += "reflected_color.r = lut_lookup_result;\n"; + + if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RG)) { + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RG); + shader += "reflected_color.g = lut_lookup_result;\n"; + } else { + shader += "reflected_color.g = reflected_color.r;\n"; + } + + if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RB)) { + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RB); + shader += "reflected_color.b = lut_lookup_result;\n"; + } else { + shader += "reflected_color.b = reflected_color.r;\n"; + } + + shader += "specular0 = lightSources[" + std::to_string(lightID) + "].specular0 * specular0_dist;\n"; + if (lightConfig.geometricFactor0) { + shader += "specular0 *= geometric_factor;\n"; + } + + shader += "specular1 = lightSources[" + std::to_string(lightID) + "].specular1 * specular1_dist * reflected_color;\n"; + if (lightConfig.geometricFactor1) { + shader += "specular1 *= geometric_factor;\n"; + } + + shader += "light_factor = distance_attenuation * spotlight_attenuation;\n"; + + if (config.lighting.clampHighlights) { + shader += "specular_sum.rgb += light_factor * (NdotL == 0.0 ? 0.0 : 1.0) * (specular0 + specular1);\n"; + } else { + shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n"; + } + + shader += "diffuse_sum.rgb += light_factor * (lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" + + std::to_string(lightID) + "].diffuse * NdotL);\n"; + } + + if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) { + compileLUTLookup(shader, config, config.lighting.lightNum - 1, PICA::Lights::LUT_FR); + shader += "float fresnel_factor = lut_lookup_result;\n"; + } + + if (config.lighting.enablePrimaryAlpha) { + shader += "diffuse_sum.a = fresnel_factor;\n"; + } + + if (config.lighting.enableSecondaryAlpha) { + shader += "specular_sum.a = fresnel_factor;\n"; + } + + shader += R"( + vec4 global_ambient = vec4(regToColor(globalAmbientLight), 1.0); + + primaryColor = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); + secondaryColor = clamp(specular_sum, vec4(0.0), vec4(1.0)); + )"; +} + +bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) { + static constexpr bool samplerEnabled[9 * 7] = { + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true, // Configuration 8: All + }; + + return samplerEnabled[environmentID * 7 + lutID]; +} + +void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID) { + const LightingLUTConfig& lut = config.lighting.luts[lutID]; + uint lightID = config.lighting.lights[lightIndex].num; + uint lutIndex = 0; + bool lutEnabled = false; + + if (lutID == spotlightLutIndex) { + // These are the spotlight attenuation LUTs + lutIndex = 8u + lightID; + lutEnabled = config.lighting.lights[lightIndex].spotAttenuationEnable; + } else if (lutID <= 6) { + lutIndex = lutID; + lutEnabled = lut.enable; + } else { + Helpers::warn("Shadergen: Unimplemented LUT value"); + } + + const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID); + + if (!samplerEnabled || !lutEnabled) { + shader += "lut_lookup_result = 1.0;\n"; + return; + } + + uint scale = lut.scale; + uint inputID = lut.type; + bool absEnabled = lut.absInput; + + switch (inputID) { + case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break; + case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break; + case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break; + case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break; + case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightID) + "].spotlightDirection);\n"; break; + + default: + Helpers::warn("Shadergen: Unimplemented LUT select %d", inputID); + shader += "lut_lookup_delta = 1.0;\n"; + break; + } + + static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f}; + + if (absEnabled) { + bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse; + shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n"; + shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + "u, int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n"; + if (scale != 0) { + shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n"; + } + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n"; + shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n"; + shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + "u, lut_lookup_index);\n"; + if (scale != 0) { + shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n"; + } + } +} + +void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConfig& config) { + if (config.fogConfig.mode != FogMode::Fog) { + return; + } + + if (config.fogConfig.flipDepth) { + shader += "float fog_index = (1.0 - depth) * 128.0;\n"; + } else { + shader += "float fog_index = depth * 128.0;\n"; + } + + shader += "float clamped_index = clamp(floor(fog_index), 0.0, 127.0);"; + shader += "float delta = fog_index - clamped_index;"; + shader += "vec3 fog_color = (1.0 / 255.0) * vec3(float(inFogColor & 0xffu), float((inFogColor >> 8u) & 0xffu), float((inFogColor >> 16u) & 0xffu));"; + shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs + shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);"; + shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);"; +} + +std::string FragmentGenerator::getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader) { + // First, calculate output register -> Fixed function fragment semantics based on the VAO config + // This array contains the mappings for the 32 fixed function semantics (8 variables, with 4 lanes each). + // Each entry is a pair, containing the output reg to use for this semantic (first) and which lane of that register (second) + std::array, 32> outputMappings{}; + // Output registers adjusted according to VS_OUTPUT_MASK, which handles enabling and disabling output attributes + std::array vsOutputRegisters; + + { + uint count = 0; + u16 outputMask = vertConfig.outputMask; + + // See which registers are actually enabled and ignore the disabled ones + for (int i = 0; i < 16; i++) { + if (outputMask & 1) { + vsOutputRegisters[count++] = i; + } + + outputMask >>= 1; + } + + // For the others, map the index to a vs output directly (TODO: What does hw actually do?) + for (; count < 16; count++) { + vsOutputRegisters[count] = count; + } + + for (int i = 0; i < vertConfig.outputCount; i++) { + const u32 config = vertConfig.outmaps[i]; + for (int j = 0; j < 4; j++) { + const u32 mapping = (config >> (j * 8)) & 0x1F; + outputMappings[mapping] = std::make_pair(vsOutputRegisters[i], j); + } + } + } + + auto getSemanticName = [&](u32 semanticIndex) { + auto [reg, lane] = outputMappings[semanticIndex]; + return fmt::format("out_regs[{}][{}]", reg, lane); + }; + + std::string semantics = fmt::format( + R"( + vec4 a_coords = vec4({}, {}, {}, {}); + vec4 a_quaternion = vec4({}, {}, {}, {}); + vec4 a_vertexColour = vec4({}, {}, {}, {}); + vec2 a_texcoord0 = vec2({}, {}); + float a_texcoord0_w = {}; + vec2 a_texcoord1 = vec2({}, {}); + vec2 a_texcoord2 = vec2({}, {}); + vec3 a_view = vec3({}, {}, {}); +)", + getSemanticName(0), getSemanticName(1), getSemanticName(2), getSemanticName(3), getSemanticName(4), getSemanticName(5), getSemanticName(6), + getSemanticName(7), getSemanticName(8), getSemanticName(9), getSemanticName(10), getSemanticName(11), getSemanticName(12), + getSemanticName(13), getSemanticName(16), getSemanticName(14), getSemanticName(15), getSemanticName(22), getSemanticName(23), + getSemanticName(18), getSemanticName(19), getSemanticName(20) + ); + + if (usingUbershader) { + Helpers::panic("Unimplemented: GetVertexShaderAccelerated for ubershader"); + return picaSource; + } else { + // TODO: Uniforms and don't hardcode fixed-function semantic indices... + std::string ret = picaSource; + if (api == API::GLES) { + ret += "\n#define USING_GLES\n"; + } + + ret += uniformDefinition; + + ret += R"( +out vec4 v_quaternion; +out vec4 v_colour; +out vec3 v_texcoord0; +out vec2 v_texcoord1; +out vec3 v_view; +out vec2 v_texcoord2; + +#ifndef USING_GLES + out float gl_ClipDistance[2]; +#endif + +void main() { + pica_shader_main(); +)"; + // Transfer fixed function fragment registers from vertex shader output to the fragment shader + ret += semantics; + + ret += R"( + gl_Position = a_coords; + vec4 colourAbs = abs(a_vertexColour); + v_colour = min(colourAbs, vec4(1.f)); + + v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); + v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); + v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + v_quaternion = a_quaternion; + +#ifndef USING_GLES + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipCoords, a_coords); +#endif +})"; + return ret; + } +} + +void FragmentGenerator::compileLogicOps(std::string& shader, const PICA::FragmentConfig& config) { + if (api != API::GLES) [[unlikely]] { + Helpers::warn("Shadergen: Unsupported API for compileLogicOps"); + shader += "fragColor = combinerOutput;\n}"; // End of main function + + return; + } + + shader += "fragColor = "; + switch (config.outConfig.logicOpMode) { + case PICA::LogicOpMode::Copy: shader += "combinerOutput"; break; + case PICA::LogicOpMode::Nop: shader += "fb_color"; break; + case PICA::LogicOpMode::Clear: shader += "vec4(0.0)"; break; + case PICA::LogicOpMode::Set: shader += "vec4(1.0)"; break; + case PICA::LogicOpMode::InvertedCopy: shader += "vec4(uvec4(combinerOutput * 255.0) ^ uvec4(0xFFu)) * (1.0 / 255.0)"; break; + + default: + shader += "combinerOutput"; + Helpers::warn("Shadergen: Unimplemented logic op mode"); + break; + } + + shader += ";\n}"; // End of main function +} diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index 003ef97a..a85c7464 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -74,6 +74,9 @@ void PICAShader::run() { break; } + // Undocumented, implementation based on 3DBrew and hw testing (see tests/PICA_LITP) + case ShaderOpcodes::LITP: [[unlikely]] litp(instruction); break; + default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); } @@ -753,4 +756,33 @@ void PICAShader::jmpu(u32 instruction) { if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want pc = dest; +} + +void PICAShader::litp(u32 instruction) { + const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; + u32 src = getBits<12, 7>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + + src = getIndexedSource(src, idx); + vec4f srcVec = getSourceSwizzled<1>(src, operandDescriptor); + vec4f& destVector = getDest(dest); + + // Compare registers are set based on whether src.x and src.w are >= 0.0 + cmpRegister[0] = (srcVec[0].toFloat32() >= 0.0f); + cmpRegister[1] = (srcVec[3].toFloat32() >= 0.0f); + + vec4f result; + // TODO: Does max here have the same non-IEEE NaN behavior as the max instruction? + result[0] = f24::fromFloat32(std::max(srcVec[0].toFloat32(), 0.0f)); + result[1] = f24::fromFloat32(std::clamp(srcVec[1].toFloat32(), -127.9961f, 127.9961f)); + result[2] = f24::zero(); + result[3] = f24::fromFloat32(std::max(srcVec[3].toFloat32(), 0.0f)); + + u32 componentMask = operandDescriptor & 0xf; + for (int i = 0; i < 4; i++) { + if (componentMask & (1 << i)) { + destVector[3 - i] = result[3 - i]; + } + } } \ No newline at end of file diff --git a/src/core/PICA/shader_unit.cpp b/src/core/PICA/shader_unit.cpp index aa7b4c12..6b291d31 100644 --- a/src/core/PICA/shader_unit.cpp +++ b/src/core/PICA/shader_unit.cpp @@ -9,7 +9,6 @@ void ShaderUnit::reset() { void PICAShader::reset() { loadedShader.fill(0); - bufferedShader.fill(0); operandDescriptors.fill(0); boolUniform = 0; @@ -35,4 +34,5 @@ void PICAShader::reset() { codeHashDirty = true; opdescHashDirty = true; + uniformsDirty = true; } \ No newline at end of file diff --git a/src/core/audio/aac_decoder.cpp b/src/core/audio/aac_decoder.cpp new file mode 100644 index 00000000..af88485c --- /dev/null +++ b/src/core/audio/aac_decoder.cpp @@ -0,0 +1,146 @@ +#include "audio/aac_decoder.hpp" + +#include + +#include +using namespace Audio; + +void AAC::Decoder::decode(AAC::Message& response, const AAC::Message& request, AAC::Decoder::PaddrCallback paddrCallback, bool enableAudio) { + // Copy the command and mode fields of the request to the response + response.command = request.command; + response.mode = request.mode; + response.decodeResponse.size = request.decodeRequest.size; + + // Write a dummy response at first. We'll be overwriting it later if decoding goes well + response.resultCode = AAC::ResultCode::Success; + response.decodeResponse.channelCount = 2; + response.decodeResponse.sampleCount = 1024; + response.decodeResponse.sampleRate = AAC::SampleRate::Rate48000; + + if (!isInitialized()) { + initialize(); + + // AAC decoder failed to initialize, return dummy data and return without decoding + if (!isInitialized()) { + Helpers::warn("Failed to initialize AAC decoder"); + return; + } + } + + u8* input = paddrCallback(request.decodeRequest.address); + const u8* inputEnd = paddrCallback(request.decodeRequest.address + request.decodeRequest.size); + u8* outputLeft = paddrCallback(request.decodeRequest.destAddrLeft); + u8* outputRight = nullptr; + + if (input == nullptr || inputEnd == nullptr || outputLeft == nullptr) { + Helpers::warn("Invalid pointers passed to AAC decoder"); + return; + } + + u32 bytesValid = request.decodeRequest.size; + u32 bufferSize = request.decodeRequest.size; + + // Each frame is 2048 samples with 2 channels + static constexpr usize frameSize = 2048 * 2; + std::array frame; + std::array, 2> audioStreams; + + bool queriedStreamInfo = false; + + while (bytesValid != 0) { + if (aacDecoder_Fill(decoderHandle, &input, &bufferSize, &bytesValid) != AAC_DEC_OK) { + Helpers::warn("Failed to fill AAC decoder with samples"); + return; + } + + auto decodeResult = aacDecoder_DecodeFrame(decoderHandle, frame.data(), frameSize, 0); + + if (decodeResult == AAC_DEC_TRANSPORT_SYNC_ERROR) { + // https://android.googlesource.com/platform/external/aac/+/2ddc922/libAACdec/include/aacdecoder_lib.h#362 + // According to the above, if we get a sync error, we're not meant to stop decoding, but rather just continue feeding data + } else if (decodeResult == AAC_DEC_OK) { + auto getSampleRate = [](u32 rate) { + switch (rate) { + case 8000: return AAC::SampleRate::Rate8000; + case 11025: return AAC::SampleRate::Rate11025; + case 12000: return AAC::SampleRate::Rate12000; + case 16000: return AAC::SampleRate::Rate16000; + case 22050: return AAC::SampleRate::Rate22050; + case 24000: return AAC::SampleRate::Rate24000; + case 32000: return AAC::SampleRate::Rate32000; + case 44100: return AAC::SampleRate::Rate44100; + case 48000: + default: return AAC::SampleRate::Rate48000; + } + }; + + auto info = aacDecoder_GetStreamInfo(decoderHandle); + response.decodeResponse.sampleCount = info->frameSize; + response.decodeResponse.channelCount = info->numChannels; + response.decodeResponse.sampleRate = getSampleRate(info->sampleRate); + + int channels = info->numChannels; + // Reserve space in our output stream vectors so push_back doesn't do allocations + for (int i = 0; i < channels; i++) { + audioStreams[i].reserve(audioStreams[i].size() + info->frameSize); + } + + // Fetch output pointer for right output channel if we've got > 1 channel + if (channels > 1 && outputRight == nullptr) { + outputRight = paddrCallback(request.decodeRequest.destAddrRight); + // If the right output channel doesn't point to a proper padddr, return + if (outputRight == nullptr) { + Helpers::warn("Right AAC output channel doesn't point to valid physical address"); + return; + } + } + + if (enableAudio) { + for (int sample = 0; sample < info->frameSize; sample++) { + for (int stream = 0; stream < channels; stream++) { + audioStreams[stream].push_back(frame[(sample * channels) + stream]); + } + } + } else { + // If audio is not enabled, push 0s + for (int stream = 0; stream < channels; stream++) { + audioStreams[stream].resize(audioStreams[stream].size() + info->frameSize, 0); + } + } + } else { + Helpers::warn("Failed to decode AAC frame"); + return; + } + } + + for (int i = 0; i < 2; i++) { + auto& stream = audioStreams[i]; + u8* pointer = (i == 0) ? outputLeft : outputRight; + + if (!stream.empty() && pointer != nullptr) { + std::memcpy(pointer, stream.data(), stream.size() * sizeof(s16)); + } + } +} + +void AAC::Decoder::initialize() { + decoderHandle = aacDecoder_Open(TRANSPORT_TYPE::TT_MP4_ADTS, 1); + + if (decoderHandle == nullptr) [[unlikely]] { + return; + } + + // Cap output channel count to 2 + if (aacDecoder_SetParam(decoderHandle, AAC_PCM_MAX_OUTPUT_CHANNELS, 2) != AAC_DEC_OK) [[unlikely]] { + aacDecoder_Close(decoderHandle); + decoderHandle = nullptr; + return; + } +} + +AAC::Decoder::~Decoder() { + if (isInitialized()) { + aacDecoder_Close(decoderHandle); + decoderHandle = nullptr; + } +} \ No newline at end of file diff --git a/src/core/audio/audio_interpolation.cpp b/src/core/audio/audio_interpolation.cpp new file mode 100644 index 00000000..d13c786e --- /dev/null +++ b/src/core/audio/audio_interpolation.cpp @@ -0,0 +1,73 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "audio/audio_interpolation.hpp" + +#include + +#include "helpers.hpp" + +namespace Audio::Interpolation { + // Calculations are done in fixed point with 24 fractional bits. + // (This is not verified. This was chosen for minimal error.) + static constexpr u64 scaleFactor = 1 << 24; + static constexpr u64 scaleMask = scaleFactor - 1; + + /// Here we step over the input in steps of rate, until we consume all of the input. + /// Three adjacent samples are passed to fn each step. + template + static void stepOverSamples(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi, Function fn) { + if (input.empty()) { + return; + } + + input.insert(input.begin(), {state.xn2, state.xn1}); + + const u64 step_size = static_cast(rate * scaleFactor); + u64 fposition = state.fposition; + usize inputi = 0; + + while (outputi < output.size()) { + inputi = static_cast(fposition / scaleFactor); + + if (inputi + 2 >= input.size()) { + inputi = input.size() - 2; + break; + } + + u64 fraction = fposition & scaleMask; + output[outputi++] = fn(fraction, input[inputi], input[inputi + 1], input[inputi + 2]); + + fposition += step_size; + } + + state.xn2 = input[inputi]; + state.xn1 = input[inputi + 1]; + state.fposition = fposition - inputi * scaleFactor; + + input.erase(input.begin(), std::next(input.begin(), inputi + 2)); + } + + void none(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi) { + stepOverSamples(state, input, rate, output, outputi, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { return x0; }); + } + + void linear(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi) { + // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware. + stepOverSamples(state, input, rate, output, outputi, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { + // This is a saturated subtraction. (Verified by black-box fuzzing.) + s64 delta0 = std::clamp(x1[0] - x0[0], -32768, 32767); + s64 delta1 = std::clamp(x1[1] - x0[1], -32768, 32767); + + return std::array{ + static_cast(x0[0] + fraction * delta0 / scaleFactor), + static_cast(x0[1] + fraction * delta1 / scaleFactor), + }; + }); + } + + void polyphase(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi) { + linear(state, input, rate, output, outputi); + } +} // namespace Audio::Interpolation \ No newline at end of file diff --git a/src/core/audio/dsp_core.cpp b/src/core/audio/dsp_core.cpp index 01cee11e..c793fcf8 100644 --- a/src/core/audio/dsp_core.cpp +++ b/src/core/audio/dsp_core.cpp @@ -8,17 +8,17 @@ #include "audio/null_core.hpp" #include "audio/teakra_core.hpp" -std::unique_ptr Audio::makeDSPCore(DSPCore::Type type, Memory& mem, Scheduler& scheduler, DSPService& dspService) { +std::unique_ptr Audio::makeDSPCore(EmulatorConfig& config, Memory& mem, Scheduler& scheduler, DSPService& dspService) { std::unique_ptr core; - switch (type) { - case DSPCore::Type::Null: core = std::make_unique(mem, scheduler, dspService); break; - case DSPCore::Type::Teakra: core = std::make_unique(mem, scheduler, dspService); break; - case DSPCore::Type::HLE: core = std::make_unique(mem, scheduler, dspService); break; + switch (config.dspType) { + case DSPCore::Type::Null: core = std::make_unique(mem, scheduler, dspService, config); break; + case DSPCore::Type::Teakra: core = std::make_unique(mem, scheduler, dspService, config); break; + case DSPCore::Type::HLE: core = std::make_unique(mem, scheduler, dspService, config); break; default: Helpers::warn("Invalid DSP core selected!"); - core = std::make_unique(mem, scheduler, dspService); + core = std::make_unique(mem, scheduler, dspService, config); break; } diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 12c8f4c8..73d39adb 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -2,9 +2,13 @@ #include #include +#include #include #include +#include "audio/aac_decoder.hpp" +#include "audio/dsp_simd.hpp" +#include "config.hpp" #include "services/dsp.hpp" namespace Audio { @@ -17,11 +21,14 @@ namespace Audio { }; } - HLE_DSP::HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService) : DSPCore(mem, scheduler, dspService) { + HLE_DSP::HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config) + : DSPCore(mem, scheduler, dspService, config) { // Set up source indices for (int i = 0; i < sources.size(); i++) { sources[i].index = i; } + + aacDecoder.reset(new Audio::AAC::Decoder()); } void HLE_DSP::resetAudioPipe() { @@ -72,6 +79,7 @@ namespace Audio { source.reset(); } + mixer.reset(); // Note: Reset audio pipe AFTER resetting all pipes, otherwise the new data will be yeeted resetAudioPipe(); } @@ -94,7 +102,7 @@ namespace Audio { scheduler.removeEvent(Scheduler::EventType::RunDSP); } - void HLE_DSP::runAudioFrame() { + void HLE_DSP::runAudioFrame(u64 eventTimestamp) { // Signal audio pipe when an audio frame is done if (dspState == DSPState::On) [[likely]] { dspService.triggerPipeEvent(DSPPipeType::Audio); @@ -102,7 +110,10 @@ namespace Audio { // TODO: Should this be called if dspState != DSPState::On? outputFrame(); - scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame); + + // How many cycles we were late + const u64 cycleDrift = scheduler.currentTimestamp - eventTimestamp; + scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame - cycleDrift); } u16 HLE_DSP::recvData(u32 regId) { @@ -110,7 +121,7 @@ namespace Audio { Helpers::panic("Audio: invalid register in HLE frontend"); } - return dspState == DSPState::On; + return dspState != DSPState::On; } void HLE_DSP::writeProcessPipe(u32 channel, u32 size, u32 buffer) { @@ -203,11 +214,11 @@ namespace Audio { if (audioEnabled) { // Wait until we've actually got room to push our frame - while (sampleBuffer.size() + 2 > sampleBuffer.Capacity()) { + while (sampleBuffer.size() + frame.size() * 2 > sampleBuffer.Capacity()) { std::this_thread::sleep_for(std::chrono::milliseconds{1}); } - sampleBuffer.push(frame.data(), frame.size()); + sampleBuffer.push(frame.data(), frame.size() * 2); } } @@ -216,6 +227,14 @@ namespace Audio { SharedMemory& read = readRegion(); SharedMemory& write = writeRegion(); + // TODO: Properly implement mixers + // The DSP checks the DSP configuration dirty bits on every frame, applies them, and clears them + read.dspConfiguration.dirtyRaw = 0; + read.dspConfiguration.dirtyRaw2 = 0; + + // The intermediate mix buffer is aligned to 16 for SIMD purposes + alignas(16) std::array mixes{}; + for (int i = 0; i < sourceCount; i++) { // Update source configuration from the read region of shared memory auto& config = read.sourceConfigurations.config[i]; @@ -231,14 +250,36 @@ namespace Audio { auto& status = write.sourceStatuses.status[i]; status.enabled = source.enabled; status.syncCount = source.syncCount; - status.currentBufferIDDirty = source.isBufferIDDirty ? 1 : 0; + status.currentBufferIDDirty = (source.isBufferIDDirty ? 1 : 0); status.currentBufferID = source.currentBufferID; status.previousBufferID = source.previousBufferID; - // TODO: Properly update sample position status.samplePosition = source.samplePosition; source.isBufferIDDirty = false; + + // If the source is still enabled, mix its output into the intermediate mix buffers + if (source.enabled) { + for (int mix = 0; mix < mixes.size(); mix++) { + // Check if this stage is passthrough, and if it is, then skip it + if ((source.enabledMixStages & (1u << mix)) == 0) { + continue; + } + + IntermediateMix& intermediateMix = mixes[mix]; + const std::array& gains = source.gains[mix]; + + DSP::MixIntoQuad::mix(intermediateMix, source.currentFrame, gains.data()); + } + } } + + for (int i = 0; i < Audio::samplesInFrame; i++) { + auto& mix0 = mixes[0]; + auto& sample = mix0[i]; + frame[i] = {s16(sample[0]), s16(sample[1])}; + } + + performMix(read, write); } void HLE_DSP::updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients) { @@ -247,6 +288,17 @@ namespace Audio { return; } + // The reset flags take priority, as you can reset a source and set it up to be played again at the same time + if (config.resetFlag) { + config.resetFlag = 0; + source.reset(); + } + + if (config.partialResetFlag) { + config.partialResetFlag = 0; + source.buffers = {}; + } + if (config.enableDirty) { config.enableDirty = 0; source.enabled = config.enable != 0; @@ -266,16 +318,6 @@ namespace Audio { ); } - if (config.resetFlag) { - config.resetFlag = 0; - source.reset(); - } - - if (config.partialResetFlag) { - config.partialResetFlag = 0; - source.buffers = {}; - } - // TODO: Should we check bufferQueueDirty here too? if (config.formatDirty || config.embeddedBufferDirty) { source.sampleFormat = config.format; @@ -285,7 +327,18 @@ namespace Audio { source.sourceType = config.monoOrStereo; } + if (config.interpolationDirty) { + source.interpolationMode = config.interpolationMode; + } + + if (config.rateMultiplierDirty) { + source.rateMultiplier = (config.rateMultiplier > 0.f) ? config.rateMultiplier : 1.f; + } + if (config.embeddedBufferDirty) { + // Annoyingly, and only for embedded buffer, whether we use config.playPosition depends on the relevant dirty bit + const u32 playPosition = config.playPositionDirty ? config.playPosition : 0; + config.embeddedBufferDirty = 0; if (s32(config.length) >= 0) [[likely]] { // TODO: Add sample format and channel count @@ -297,7 +350,7 @@ namespace Audio { .adpcmDirty = config.adpcmDirty != 0, .looping = config.isLooping != 0, .bufferID = config.bufferID, - .playPosition = config.playPosition, + .playPosition = playPosition, .format = source.sampleFormat, .sourceType = source.sourceType, .fromQueue = false, @@ -316,10 +369,64 @@ namespace Audio { } if (config.bufferQueueDirty) { + // printf("Buffer queue dirty for voice %d\n", source.index); + + u16 dirtyBuffers = config.buffersDirty; config.bufferQueueDirty = 0; - printf("Buffer queue dirty for voice %d\n", source.index); + config.buffersDirty = 0; + + for (int i = 0; i < 4; i++) { + bool dirty = ((dirtyBuffers >> i) & 1) != 0; + if (dirty) { + const auto& buffer = config.buffers[i]; + + if (s32(buffer.length) >= 0) [[likely]] { + // TODO: Add sample format and channel count + Source::Buffer newBuffer{ + .paddr = buffer.physicalAddress, + .sampleCount = buffer.length, + .adpcmScale = u8(buffer.adpcm_ps), + .previousSamples = {s16(buffer.adpcm_yn[0]), s16(buffer.adpcm_yn[1])}, + .adpcmDirty = buffer.adpcmDirty != 0, + .looping = buffer.isLooping != 0, + .bufferID = buffer.bufferID, + .playPosition = 0, + .format = source.sampleFormat, + .sourceType = source.sourceType, + .fromQueue = true, + .hasPlayedOnce = false, + }; + + source.buffers.emplace(std::move(newBuffer)); + } else { + printf("Buffer queue dirty: Invalid buffer size for DSP voice %d\n", source.index); + } + } + } } +#define CONFIG_GAIN(index) \ + if (config.gain##index##Dirty) { \ + auto& dest = source.gains[index]; \ + auto& sourceGain = config.gain[index]; \ + \ + dest[0] = float(sourceGain[0]); \ + dest[1] = float(sourceGain[1]); \ + dest[2] = float(sourceGain[2]); \ + dest[3] = float(sourceGain[3]); \ + \ + if (dest[0] == 0.f && dest[1] == 0.f && dest[2] == 0.f && dest[3] == 0.f) { \ + source.enabledMixStages &= ~(1u << index); \ + } else { \ + source.enabledMixStages |= (1u << index); \ + } \ + } + + CONFIG_GAIN(0); + CONFIG_GAIN(1); + CONFIG_GAIN(2); +#undef CONFIG_GAIN + config.dirtyRaw = 0; } @@ -369,9 +476,20 @@ namespace Audio { if (buffer.looping) { source.pushBuffer(buffer); } + + // We're skipping the first samplePosition samples, so remove them from the buffer so as not to consume them later + if (source.samplePosition > 0) { + auto start = source.currentSamples.begin(); + auto end = std::next(start, source.samplePosition); + source.currentSamples.erase(start, end); + } } void HLE_DSP::generateFrame(DSPSource& source) { + // Zero out all output samples at first. TODO: Don't zero out the entire frame initially, rather only zero-out the "unwritten" samples when + // the frame is done being processed. + source.currentFrame = {}; + if (source.currentSamples.empty()) { // There's no audio left to play, turn the voice off if (source.buffers.empty()) { @@ -385,10 +503,10 @@ namespace Audio { decodeBuffer(source); } else { - constexpr uint maxSampleCount = Audio::samplesInFrame; - uint outputCount = 0; + usize outputCount = 0; + static constexpr usize maxSamples = Audio::samplesInFrame; - while (outputCount < maxSampleCount) { + while (outputCount < maxSamples) { if (source.currentSamples.empty()) { if (source.buffers.empty()) { break; @@ -397,15 +515,75 @@ namespace Audio { } } - const uint sampleCount = std::min(maxSampleCount - outputCount, source.currentSamples.size()); - // samples.insert(samples.end(), source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); - source.currentSamples.erase(source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); + switch (source.interpolationMode) { + case Source::InterpolationMode::Linear: + Audio::Interpolation::linear( + source.interpolationState, source.currentSamples, source.rateMultiplier, source.currentFrame, outputCount + ); + break; + case Source::InterpolationMode::None: + Audio::Interpolation::none( + source.interpolationState, source.currentSamples, source.rateMultiplier, source.currentFrame, outputCount + ); + break; - outputCount += sampleCount; + case Source::InterpolationMode::Polyphase: + // Currently stubbed to be the same as linear + Audio::Interpolation::polyphase( + source.interpolationState, source.currentSamples, source.rateMultiplier, source.currentFrame, outputCount + ); + break; + } } + + source.samplePosition += u32(outputCount * source.rateMultiplier); } } + void HLE_DSP::performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion) { + updateMixerConfig(readRegion); + // TODO: Do the actual audio mixing + + auto& dspStatus = writeRegion.dspStatus; + // Stub the DSP status. It's unknown what the "unknown" field is but Citra sets it to 0, so we do too to be safe + dspStatus.droppedFrames = 0; + dspStatus.unknown = 0; + } + + void HLE_DSP::updateMixerConfig(Audio::HLE::SharedMemory& sharedMem) { + auto& config = sharedMem.dspConfiguration; + // No configs have been changed, so there's nothing to update + if (config.dirtyRaw == 0) { + return; + } + + if (config.outputFormatDirty) { + mixer.channelFormat = config.outputFormat; + } + + if (config.masterVolumeDirty) { + mixer.volumes[0] = config.masterVolume; + } + + if (config.auxVolume0Dirty) { + mixer.volumes[1] = config.auxVolumes[0]; + } + + if (config.auxVolume1Dirty) { + mixer.volumes[2] = config.auxVolumes[1]; + } + + if (config.auxBusEnable0Dirty) { + mixer.enableAuxStages[0] = config.auxBusEnable[0] != 0; + } + + if (config.auxBusEnable1Dirty) { + mixer.enableAuxStages[1] = config.auxBusEnable[1] != 0; + } + + config.dirtyRaw = 0; + } + HLE_DSP::SampleBuffer HLE_DSP::decodePCM8(const u8* data, usize sampleCount, Source& source) { SampleBuffer decodedSamples(sampleCount); @@ -527,16 +705,7 @@ namespace Audio { switch (request.command) { case AAC::Command::EncodeDecode: - // Dummy response to stop games from hanging - // TODO: Fix this when implementing AAC - response.resultCode = AAC::ResultCode::Success; - response.decodeResponse.channelCount = 2; - response.decodeResponse.sampleCount = 1024; - response.decodeResponse.size = 0; - response.decodeResponse.sampleRate = AAC::SampleRate::Rate48000; - - response.command = request.command; - response.mode = request.mode; + aacDecoder->decode(response, request, [this](u32 paddr) { return getPointerPhys(paddr); }, settings.aacEnabled); break; case AAC::Command::Init: @@ -546,7 +715,7 @@ namespace Audio { response = request; response.resultCode = AAC::ResultCode::Success; break; - + default: Helpers::warn("Unknown AAC command type"); break; } @@ -563,12 +732,19 @@ namespace Audio { // Initialize these to some sane defaults sampleFormat = SampleFormat::ADPCM; sourceType = SourceType::Stereo; + interpolationMode = InterpolationMode::Linear; samplePosition = 0; previousBufferID = 0; currentBufferID = 0; syncCount = 0; + rateMultiplier = 1.f; buffers = {}; + interpolationState = {}; + currentSamples.clear(); + + gains.fill({}); + enabledMixStages = 0; } } // namespace Audio diff --git a/src/core/audio/miniaudio_device.cpp b/src/core/audio/miniaudio_device.cpp index fa36cb84..550fb039 100644 --- a/src/core/audio/miniaudio_device.cpp +++ b/src/core/audio/miniaudio_device.cpp @@ -1,8 +1,14 @@ #include "audio/miniaudio_device.hpp" +#include +#include +#include +#include + #include "helpers.hpp" -MiniAudioDevice::MiniAudioDevice() : initialized(false), running(false), samples(nullptr) {} +MiniAudioDevice::MiniAudioDevice(const AudioDeviceConfig& audioSettings) + : initialized(false), running(false), samples(nullptr), audioSettings(audioSettings) {} void MiniAudioDevice::init(Samples& samples, bool safe) { this->samples = &samples; @@ -27,8 +33,8 @@ void MiniAudioDevice::init(Samples& samples, bool safe) { // TODO: Make backend selectable here found = true; - //count = 1; - //backends[0] = backend; + // count = 1; + // backends[0] = backend; } if (!found) { @@ -81,25 +87,80 @@ void MiniAudioDevice::init(Samples& samples, bool safe) { deviceConfig.playback.format = ma_format_s16; deviceConfig.playback.channels = channelCount; deviceConfig.sampleRate = sampleRate; - //deviceConfig.periodSizeInFrames = 64; - //deviceConfig.periods = 16; + // deviceConfig.periodSizeInFrames = 64; + // deviceConfig.periods = 16; deviceConfig.pUserData = this; deviceConfig.aaudio.usage = ma_aaudio_usage_game; deviceConfig.wasapi.noAutoConvertSRC = true; + lastStereoSample = {0, 0}; + deviceConfig.dataCallback = [](ma_device* device, void* out, const void* input, ma_uint32 frameCount) { auto self = reinterpret_cast(device->pUserData); - s16* output = reinterpret_cast(out); + if (!self->running) { + return; + } - // Wait until there's enough samples to pop - while (self->samples->size() < frameCount * channelCount) { - // If audio output is disabled from the emulator thread, make sure that this callback will return and not hang - if (!self->running) { - return; + s16* output = reinterpret_cast(out); + usize samplesWritten = 0; + samplesWritten += self->samples->pop(output, frameCount * channelCount); + + // Get the last sample for underrun handling + if (samplesWritten != 0) { + std::memcpy(&self->lastStereoSample[0], &output[(samplesWritten - 1) * 2], sizeof(lastStereoSample)); + } + + // Adjust the volume of our samples based on the emulator's volume slider + float audioVolume = self->audioSettings.getVolume(); + // If volume is 1.0 we don't need to do anything + if (audioVolume != 1.0f) { + s16* sample = output; + + // If our volume is > 1.0 then we boost samples using a logarithmic scale, + // In this case we also have to clamp samples to make sure they don't wrap around + if (audioVolume > 1.0f) { + audioVolume = 0.6 + 20 * std::log10(audioVolume); + + constexpr s32 min = s32(std::numeric_limits::min()); + constexpr s32 max = s32(std::numeric_limits::max()); + + for (usize i = 0; i < samplesWritten; i += 2) { + s16 l = s16(std::clamp(s32(float(sample[0]) * audioVolume), min, max)); + s16 r = s16(std::clamp(s32(float(sample[1]) * audioVolume), min, max)); + + *sample++ = l; + *sample++ = r; + } + } else { + // If our volume is in [0.0, 1.0) then just multiply by the volume. No need to clamp, since there is no danger of our samples wrapping + // around due to overflow + + // If we're applying cubic volume curve, raise volume to the power of 3 + if (self->audioSettings.volumeCurve == AudioDeviceConfig::VolumeCurve::Cubic) { + audioVolume = audioVolume * audioVolume * audioVolume; + } + + for (usize i = 0; i < samplesWritten; i += 2) { + s16 l = s16(float(sample[0]) * audioVolume); + s16 r = s16(float(sample[1]) * audioVolume); + + *sample++ = l; + *sample++ = r; + } } } - self->samples->pop(output, frameCount * channelCount); + // If underruning, copy the last output sample + { + s16* pointer = &output[samplesWritten * 2]; + s16 l = self->lastStereoSample[0]; + s16 r = self->lastStereoSample[1]; + + for (usize i = samplesWritten; i < frameCount; i++) { + *pointer++ = l; + *pointer++ = r; + } + } }; if (ma_device_init(&context, &deviceConfig, &device) != MA_SUCCESS) { @@ -129,7 +190,7 @@ void MiniAudioDevice::start() { void MiniAudioDevice::stop() { if (!initialized) { - Helpers::warn("MiniAudio device not initialized, can't start"); + Helpers::warn("MiniAudio device not initialized, can't stop"); return; } @@ -138,6 +199,17 @@ void MiniAudioDevice::stop() { if (ma_device_stop(&device) != MA_SUCCESS) { Helpers::warn("Failed to stop audio device"); - } + } + } +} + +void MiniAudioDevice::close() { + stop(); + + if (initialized) { + initialized = false; + + ma_device_uninit(&device); + ma_context_uninit(&context); } } diff --git a/src/core/audio/null_core.cpp b/src/core/audio/null_core.cpp index ec073ae7..7ba584cf 100644 --- a/src/core/audio/null_core.cpp +++ b/src/core/audio/null_core.cpp @@ -74,7 +74,7 @@ namespace Audio { scheduler.removeEvent(Scheduler::EventType::RunDSP); } - void NullDSP::runAudioFrame() { + void NullDSP::runAudioFrame(u64 eventTimestamp) { // Signal audio pipe when an audio frame is done if (dspState == DSPState::On) [[likely]] { dspService.triggerPipeEvent(DSPPipeType::Audio); @@ -82,7 +82,7 @@ namespace Audio { scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame); } - + u16 NullDSP::recvData(u32 regId) { if (regId != 0) { Helpers::panic("Audio: invalid register in null frontend"); @@ -116,13 +116,11 @@ namespace Audio { // TODO: Other initialization stuff here dspState = DSPState::On; resetAudioPipe(); - + dspService.triggerPipeEvent(DSPPipeType::Audio); break; - case StateChange::Shutdown: - dspState = DSPState::Off; - break; + case StateChange::Shutdown: dspState = DSPState::Off; break; default: Helpers::panic("Unimplemented DSP audio pipe state change %d", state); } diff --git a/src/core/audio/teakra_core.cpp b/src/core/audio/teakra_core.cpp index da2e5a5a..cf1484f8 100644 --- a/src/core/audio/teakra_core.cpp +++ b/src/core/audio/teakra_core.cpp @@ -36,8 +36,8 @@ struct Dsp1 { Segment segments[10]; }; -TeakraDSP::TeakraDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService) - : DSPCore(mem, scheduler, dspService), pipeBaseAddr(0), running(false) { +TeakraDSP::TeakraDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config) + : DSPCore(mem, scheduler, dspService, config), pipeBaseAddr(0), running(false) { // Set up callbacks for Teakra Teakra::AHBMCallback ahbm; diff --git a/src/core/crypto/aes_engine.cpp b/src/core/crypto/aes_engine.cpp index f4bf3494..1d7baad9 100644 --- a/src/core/crypto/aes_engine.cpp +++ b/src/core/crypto/aes_engine.cpp @@ -1,18 +1,36 @@ -#include -#include - #include "crypto/aes_engine.hpp" + +#include +#include +#include +#include +#include +#include + #include "helpers.hpp" namespace Crypto { void AESEngine::loadKeys(const std::filesystem::path& path) { std::ifstream file(path, std::ios::in); - + if (file.fail()) { Helpers::warn("Keys: Couldn't read key file: %s", path.c_str()); return; } + auto splitString = [](const std::string& s, const char c) -> std::vector { + std::istringstream tmp(s); + std::vector result(1); + + while (std::getline(tmp, *result.rbegin(), c)) { + result.emplace_back(); + } + + // Remove temporary slot + result.pop_back(); + return result; + }; + while (!file.eof()) { std::string line; std::getline(file, line); @@ -22,7 +40,7 @@ namespace Crypto { continue; } - const auto parts = Helpers::split(line, '='); + const auto parts = splitString(line, '='); if (parts.size() != 2) { Helpers::warn("Keys: Failed to parse %s", line.c_str()); continue; @@ -58,18 +76,10 @@ namespace Crypto { } switch (keyType) { - case 'X': - setKeyX(slotId, key.value()); - break; - case 'Y': - setKeyY(slotId, key.value()); - break; - case 'N': - setNormalKey(slotId, key.value()); - break; - default: - Helpers::warn("Keys: Invalid key type %c", keyType); - break; + case 'X': setKeyX(slotId, key.value()); break; + case 'Y': setKeyY(slotId, key.value()); break; + case 'N': setNormalKey(slotId, key.value()); break; + default: Helpers::warn("Keys: Invalid key type %c", keyType); break; } } @@ -80,4 +90,65 @@ namespace Crypto { keysLoaded = true; } -}; \ No newline at end of file + + void AESEngine::setSeedPath(const std::filesystem::path& path) { seedDatabase.open(path, "rb"); } + + // Loads seeds from a seed file, return true on success and false on failure + bool AESEngine::loadSeeds() { + if (!seedDatabase.isOpen()) { + return false; + } + + // The # of seeds is stored at offset 0 + u32_le seedCount = 0; + + if (!seedDatabase.rewind()) { + return false; + } + + auto [success, size] = seedDatabase.readBytes(&seedCount, sizeof(u32)); + if (!success || size != sizeof(u32)) { + return false; + } + + // Key data starts from offset 16 + if (!seedDatabase.seek(16)) { + return false; + } + + Crypto::Seed seed; + for (uint i = 0; i < seedCount; i++) { + std::tie(success, size) = seedDatabase.readBytes(&seed, sizeof(seed)); + if (!success || size != sizeof(seed)) { + return false; + } + + seeds.push_back(seed); + } + + return true; + } + + std::optional AESEngine::getSeedFromDB(u64 titleID) { + // We don't have a seed db nor any seeds loaded, return nullopt + if (!seedDatabase.isOpen() && seeds.empty()) { + return std::nullopt; + } + + // We have a seed DB but haven't loaded the seeds yet, so load them + if (seedDatabase.isOpen() && seeds.empty()) { + bool success = loadSeeds(); + if (!success) { + return std::nullopt; + } + } + + for (Crypto::Seed& seed : seeds) { + if (seed.titleID == titleID) { + return seed.seed; + } + } + + return std::nullopt; + } +}; // namespace Crypto \ No newline at end of file diff --git a/src/core/fs/archive_card_spi.cpp b/src/core/fs/archive_card_spi.cpp new file mode 100644 index 00000000..965ae982 --- /dev/null +++ b/src/core/fs/archive_card_spi.cpp @@ -0,0 +1,40 @@ +#include +#include + +#include "fs/archive_card_spi.hpp" + +namespace fs = std::filesystem; + +HorizonResult CardSPIArchive::createFile(const FSPath& path, u64 size) { + Helpers::panic("[Card SPI] CreateFile not yet supported"); + return Result::Success; +} + +HorizonResult CardSPIArchive::deleteFile(const FSPath& path) { + Helpers::panic("[Card SPI] Unimplemented DeleteFile"); + return Result::Success; +} + +HorizonResult CardSPIArchive::createDirectory(const FSPath& path) { + Helpers::panic("[Card SPI] CreateDirectory not yet supported"); + return Result::Success; +} + +FileDescriptor CardSPIArchive::openFile(const FSPath& path, const FilePerms& perms) { + Helpers::panic("[Card SPI] OpenFile not yet supported"); + return FileError; +} + +Rust::Result CardSPIArchive::openArchive(const FSPath& path) { + if (!path.isEmptyType()) { + Helpers::panic("Unimplemented path type for CardSPIArchive::OpenArchive"); + } + + Helpers::warn("Unimplemented: Card SPI archive"); + return Err(Result::FailurePlaceholder); +} + +Rust::Result CardSPIArchive::openDirectory(const FSPath& path) { + Helpers::panic("[Card SPI] OpenDirectory not yet supported"); + return Err(Result::FailurePlaceholder); +} diff --git a/src/core/fs/archive_ext_save_data.cpp b/src/core/fs/archive_ext_save_data.cpp index 4b57f245..2aaa157e 100644 --- a/src/core/fs/archive_ext_save_data.cpp +++ b/src/core/fs/archive_ext_save_data.cpp @@ -7,12 +7,13 @@ HorizonResult ExtSaveDataArchive::createFile(const FSPath& path, u64 size) { if (size == 0) Helpers::panic("ExtSaveData file does not support size == 0"); - if (path.type == PathType::UTF16) { - if (!isPathSafe(path)) - Helpers::panic("Unsafe path in ExtSaveData::CreateFile"); + if (path.isTextPath()) { + if (!isSafeTextPath(path)) { + Helpers::panic("Unsafe path in ExtSaveData::OpenFile"); + } fs::path p = IOFile::getAppData() / backingFolder; - p += fs::path(path.utf16_string).make_preferred(); + appendPath(p, path); if (fs::exists(p)) return Result::FS::AlreadyExists; @@ -28,17 +29,17 @@ HorizonResult ExtSaveDataArchive::createFile(const FSPath& path, u64 size) { return Result::FS::FileTooLarge; } - Helpers::panic("ExtSaveDataArchive::OpenFile: Failed"); + Helpers::panic("ExtSaveDataArchive::CreateFile: Failed"); return Result::Success; } HorizonResult ExtSaveDataArchive::deleteFile(const FSPath& path) { - if (path.type == PathType::UTF16) { - if (!isPathSafe(path)) + if (path.isTextPath()) { + if (!isSafeTextPath(path)) Helpers::panic("Unsafe path in ExtSaveData::DeleteFile"); fs::path p = IOFile::getAppData() / backingFolder; - p += fs::path(path.utf16_string).make_preferred(); + appendPath(p, path); if (fs::is_directory(p)) { Helpers::panic("ExtSaveData::DeleteFile: Tried to delete directory"); @@ -65,15 +66,16 @@ HorizonResult ExtSaveDataArchive::deleteFile(const FSPath& path) { } FileDescriptor ExtSaveDataArchive::openFile(const FSPath& path, const FilePerms& perms) { - if (path.type == PathType::UTF16) { - if (!isPathSafe(path)) + if (path.isTextPath()) { + if (!isSafeTextPath(path)) { Helpers::panic("Unsafe path in ExtSaveData::OpenFile"); + } if (perms.create()) Helpers::panic("[ExtSaveData] Can't open file with create flag"); fs::path p = IOFile::getAppData() / backingFolder; - p += fs::path(path.utf16_string).make_preferred(); + appendPath(p, path); if (fs::exists(p)) { // Return file descriptor if the file exists IOFile file(p.string().c_str(), "r+b"); // According to Citra, this ignores the OpenFlags field and always opens as r+b? TODO: Check @@ -88,7 +90,7 @@ FileDescriptor ExtSaveDataArchive::openFile(const FSPath& path, const FilePerms& } HorizonResult ExtSaveDataArchive::renameFile(const FSPath& oldPath, const FSPath& newPath) { - if (oldPath.type != PathType::UTF16 || newPath.type != PathType::UTF16) { + if (!oldPath.isUTF16() || !newPath.isUTF16()) { Helpers::panic("Invalid path type for ExtSaveData::RenameFile"); } @@ -125,15 +127,18 @@ HorizonResult ExtSaveDataArchive::renameFile(const FSPath& oldPath, const FSPath } HorizonResult ExtSaveDataArchive::createDirectory(const FSPath& path) { - if (path.type == PathType::UTF16) { - if (!isPathSafe(path)) { + if (path.isTextPath()) { + if (!isSafeTextPath(path)) { Helpers::panic("Unsafe path in ExtSaveData::OpenFile"); } fs::path p = IOFile::getAppData() / backingFolder; - p += fs::path(path.utf16_string).make_preferred(); + appendPath(p, path); + + if (fs::is_directory(p)) { + return Result::FS::AlreadyExists; + } - if (fs::is_directory(p)) return Result::FS::AlreadyExists; if (fs::is_regular_file(p)) { Helpers::panic("File path passed to ExtSaveData::CreateDirectory"); } @@ -156,7 +161,7 @@ std::string ExtSaveDataArchive::getExtSaveDataPathFromBinary(const FSPath& path) } Rust::Result ExtSaveDataArchive::openArchive(const FSPath& path) { - if (path.type != PathType::Binary || path.binary.size() != 12) { + if (!path.isBinary() || path.binary.size() != 12) { Helpers::panic("ExtSaveData accessed with an invalid path in OpenArchive"); } @@ -172,12 +177,12 @@ Rust::Result ExtSaveDataArchive::openArchive(const } Rust::Result ExtSaveDataArchive::openDirectory(const FSPath& path) { - if (path.type == PathType::UTF16) { - if (!isPathSafe(path)) + if (path.isTextPath()) { + if (!isSafeTextPath(path)) Helpers::panic("Unsafe path in ExtSaveData::OpenDirectory"); fs::path p = IOFile::getAppData() / backingFolder; - p += fs::path(path.utf16_string).make_preferred(); + appendPath(p, path); if (fs::is_regular_file(p)) { printf("ExtSaveData: OpenArchive used with a file path"); diff --git a/src/core/fs/archive_ncch.cpp b/src/core/fs/archive_ncch.cpp index d5a4bab5..5844bbda 100644 --- a/src/core/fs/archive_ncch.cpp +++ b/src/core/fs/archive_ncch.cpp @@ -32,7 +32,7 @@ HorizonResult NCCHArchive::deleteFile(const FSPath& path) { } FileDescriptor NCCHArchive::openFile(const FSPath& path, const FilePerms& perms) { - if (path.type != PathType::Binary || path.binary.size() != 20) { + if (!path.isBinary() || path.binary.size() != 20) { Helpers::panic("NCCHArchive::OpenFile: Invalid path"); } @@ -49,7 +49,7 @@ FileDescriptor NCCHArchive::openFile(const FSPath& path, const FilePerms& perms) } Rust::Result NCCHArchive::openArchive(const FSPath& path) { - if (path.type != PathType::Binary || path.binary.size() != 16) { + if (!path.isBinary() || path.binary.size() != 16) { Helpers::panic("NCCHArchive::OpenArchive: Invalid path"); } diff --git a/src/core/fs/archive_save_data.cpp b/src/core/fs/archive_save_data.cpp index 0bdb9e01..35bc93e4 100644 --- a/src/core/fs/archive_save_data.cpp +++ b/src/core/fs/archive_save_data.cpp @@ -5,7 +5,7 @@ namespace fs = std::filesystem; HorizonResult SaveDataArchive::createFile(const FSPath& path, u64 size) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) Helpers::panic("Unsafe path in SaveData::CreateFile"); @@ -39,7 +39,7 @@ HorizonResult SaveDataArchive::createFile(const FSPath& path, u64 size) { } HorizonResult SaveDataArchive::createDirectory(const FSPath& path) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) { Helpers::panic("Unsafe path in SaveData::OpenFile"); } @@ -63,7 +63,7 @@ HorizonResult SaveDataArchive::createDirectory(const FSPath& path) { } HorizonResult SaveDataArchive::deleteFile(const FSPath& path) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) { Helpers::panic("Unsafe path in SaveData::DeleteFile"); } @@ -96,7 +96,7 @@ HorizonResult SaveDataArchive::deleteFile(const FSPath& path) { } FileDescriptor SaveDataArchive::openFile(const FSPath& path, const FilePerms& perms) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) { Helpers::panic("Unsafe path in SaveData::OpenFile"); } @@ -132,7 +132,7 @@ FileDescriptor SaveDataArchive::openFile(const FSPath& path, const FilePerms& pe } Rust::Result SaveDataArchive::openDirectory(const FSPath& path) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) { Helpers::panic("Unsafe path in SaveData::OpenDirectory"); } @@ -193,7 +193,7 @@ void SaveDataArchive::format(const FSPath& path, const ArchiveBase::FormatInfo& } Rust::Result SaveDataArchive::openArchive(const FSPath& path) { - if (path.type != PathType::Empty) { + if (!path.isEmptyType()) { Helpers::panic("Unimplemented path type for SaveData archive: %d\n", path.type); return Err(Result::FS::NotFoundInvalid); } diff --git a/src/core/fs/archive_sdmc.cpp b/src/core/fs/archive_sdmc.cpp index 97b02b9e..f5f999f3 100644 --- a/src/core/fs/archive_sdmc.cpp +++ b/src/core/fs/archive_sdmc.cpp @@ -4,13 +4,13 @@ namespace fs = std::filesystem; HorizonResult SDMCArchive::createFile(const FSPath& path, u64 size) { - if (path.type == PathType::UTF16) { - if (!isPathSafe(path)) { + if (path.isTextPath()) { + if (!isSafeTextPath(path)) { Helpers::panic("Unsafe path in SDMC::CreateFile"); } fs::path p = IOFile::getAppData() / "SDMC"; - p += fs::path(path.utf16_string).make_preferred(); + appendPath(p, path); if (fs::exists(p)) { return Result::FS::AlreadyExists; @@ -39,13 +39,13 @@ HorizonResult SDMCArchive::createFile(const FSPath& path, u64 size) { } HorizonResult SDMCArchive::deleteFile(const FSPath& path) { - if (path.type == PathType::UTF16) { - if (!isPathSafe(path)) { + if (path.isTextPath()) { + if (!isSafeTextPath(path)) { Helpers::panic("Unsafe path in SDMC::DeleteFile"); } fs::path p = IOFile::getAppData() / "SDMC"; - p += fs::path(path.utf16_string).make_preferred(); + appendPath(p, path); if (fs::is_directory(p)) { Helpers::panic("SDMC::DeleteFile: Tried to delete directory"); @@ -171,13 +171,13 @@ Rust::Result SDMCArchive::openDirectory(const F return Err(Result::FS::UnexpectedFileOrDir); } - if (path.type == PathType::UTF16) { - if (!isPathSafe(path)) { + if (path.isTextPath()) { + if (!isSafeTextPath(path)) { Helpers::panic("Unsafe path in SDMC::OpenDirectory"); } fs::path p = IOFile::getAppData() / "SDMC"; - p += fs::path(path.utf16_string).make_preferred(); + appendPath(p, path); if (fs::is_regular_file(p)) { printf("SDMC: OpenDirectory used with a file path"); @@ -197,7 +197,7 @@ Rust::Result SDMCArchive::openDirectory(const F Rust::Result SDMCArchive::openArchive(const FSPath& path) { // TODO: Fail here if the SD is disabled in the connfig. - if (path.type != PathType::Empty) { + if (!path.isEmptyType()) { Helpers::panic("Unimplemented path type for SDMC::OpenArchive"); } diff --git a/src/core/fs/archive_self_ncch.cpp b/src/core/fs/archive_self_ncch.cpp index 9369152d..4d73300c 100644 --- a/src/core/fs/archive_self_ncch.cpp +++ b/src/core/fs/archive_self_ncch.cpp @@ -26,7 +26,7 @@ FileDescriptor SelfNCCHArchive::openFile(const FSPath& path, const FilePerms& pe return FileError; } - if (path.type != PathType::Binary || path.binary.size() != 12) { + if (!path.isBinary() || path.binary.size() != 12) { printf("Invalid SelfNCCH path type\n"); return FileError; } @@ -42,7 +42,7 @@ FileDescriptor SelfNCCHArchive::openFile(const FSPath& path, const FilePerms& pe } Rust::Result SelfNCCHArchive::openArchive(const FSPath& path) { - if (path.type != PathType::Empty) { + if (!path.isEmptyType()) { Helpers::panic("Invalid path type for SelfNCCH archive: %d\n", path.type); return Err(Result::FS::NotFoundInvalid); } diff --git a/src/core/fs/archive_system_save_data.cpp b/src/core/fs/archive_system_save_data.cpp index 37afb431..038b2fc8 100644 --- a/src/core/fs/archive_system_save_data.cpp +++ b/src/core/fs/archive_system_save_data.cpp @@ -4,7 +4,7 @@ namespace fs = std::filesystem; Rust::Result SystemSaveDataArchive::openArchive(const FSPath& path) { - if (path.type != PathType::Binary) { + if (!path.isBinary()) { Helpers::panic("Unimplemented path type for SystemSaveData::OpenArchive"); } @@ -14,7 +14,7 @@ Rust::Result SystemSaveDataArchive::openArchive(con FileDescriptor SystemSaveDataArchive::openFile(const FSPath& path, const FilePerms& perms) { // TODO: Validate this. Temporarily copied from SaveData archive - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) { Helpers::panic("Unsafe path in SystemSaveData::OpenFile"); } @@ -50,7 +50,7 @@ FileDescriptor SystemSaveDataArchive::openFile(const FSPath& path, const FilePer } HorizonResult SystemSaveDataArchive::createFile(const FSPath& path, u64 size) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) { Helpers::panic("Unsafe path in SystemSaveData::CreateFile"); } @@ -85,9 +85,9 @@ HorizonResult SystemSaveDataArchive::createFile(const FSPath& path, u64 size) { } HorizonResult SystemSaveDataArchive::createDirectory(const FSPath& path) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) { - Helpers::panic("Unsafe path in SystemSaveData::OpenFile"); + Helpers::panic("Unsafe path in SystemSaveData::CreateDirectory"); } fs::path p = IOFile::getAppData() / ".." / "SharedFiles" / "SystemSaveData"; @@ -110,7 +110,7 @@ HorizonResult SystemSaveDataArchive::createDirectory(const FSPath& path) { HorizonResult SystemSaveDataArchive::deleteFile(const FSPath& path) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) { Helpers::panic("Unsafe path in SystemSaveData::DeleteFile"); } @@ -143,7 +143,7 @@ HorizonResult SystemSaveDataArchive::deleteFile(const FSPath& path) { } Rust::Result SystemSaveDataArchive::openDirectory(const FSPath& path) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) { Helpers::warn("Unsafe path in SystemSaveData::OpenDirectory"); return Err(Result::FS::FileNotFoundAlt); diff --git a/src/core/fs/archive_twl_photo.cpp b/src/core/fs/archive_twl_photo.cpp new file mode 100644 index 00000000..4d56a5d7 --- /dev/null +++ b/src/core/fs/archive_twl_photo.cpp @@ -0,0 +1,40 @@ +#include +#include + +#include "fs/archive_twl_photo.hpp" + +namespace fs = std::filesystem; + +HorizonResult TWLPhotoArchive::createFile(const FSPath& path, u64 size) { + Helpers::panic("[TWL_PHOTO] CreateFile not yet supported"); + return Result::Success; +} + +HorizonResult TWLPhotoArchive::deleteFile(const FSPath& path) { + Helpers::panic("[TWL_PHOTO] Unimplemented DeleteFile"); + return Result::Success; +} + +HorizonResult TWLPhotoArchive::createDirectory(const FSPath& path) { + Helpers::panic("[TWL_PHOTO] CreateDirectory not yet supported"); + return Result::Success; +} + +FileDescriptor TWLPhotoArchive::openFile(const FSPath& path, const FilePerms& perms) { + Helpers::panic("[TWL_PHOTO] OpenFile not yet supported"); + return FileError; +} + +Rust::Result TWLPhotoArchive::openArchive(const FSPath& path) { + if (!path.isEmptyType()) { + Helpers::panic("Unimplemented path type for TWLPhotoArchive::OpenArchive"); + } + + Helpers::warn("Unimplemented: TWL_PHOTO archive"); + return Err(Result::FailurePlaceholder); +} + +Rust::Result TWLPhotoArchive::openDirectory(const FSPath& path) { + Helpers::panic("[TWL_PHOTO] OpenDirectory not yet supported"); + return Err(Result::FailurePlaceholder); +} diff --git a/src/core/fs/archive_twl_sound.cpp b/src/core/fs/archive_twl_sound.cpp new file mode 100644 index 00000000..a5f86c32 --- /dev/null +++ b/src/core/fs/archive_twl_sound.cpp @@ -0,0 +1,40 @@ +#include +#include + +#include "fs/archive_twl_sound.hpp" + +namespace fs = std::filesystem; + +HorizonResult TWLSoundArchive::createFile(const FSPath& path, u64 size) { + Helpers::panic("[TWL_SOUND] CreateFile not yet supported"); + return Result::Success; +} + +HorizonResult TWLSoundArchive::deleteFile(const FSPath& path) { + Helpers::panic("[TWL_SOUND] Unimplemented DeleteFile"); + return Result::Success; +} + +HorizonResult TWLSoundArchive::createDirectory(const FSPath& path) { + Helpers::panic("[TWL_SOUND] CreateDirectory not yet supported"); + return Result::Success; +} + +FileDescriptor TWLSoundArchive::openFile(const FSPath& path, const FilePerms& perms) { + Helpers::panic("[TWL_SOUND] OpenFile not yet supported"); + return FileError; +} + +Rust::Result TWLSoundArchive::openArchive(const FSPath& path) { + if (!path.isEmptyType()) { + Helpers::panic("Unimplemented path type for TWLSoundArchive::OpenArchive"); + } + + Helpers::warn("Unimplemented: TWL_SOUND archive"); + return Err(Result::FailurePlaceholder); +} + +Rust::Result TWLSoundArchive::openDirectory(const FSPath& path) { + Helpers::panic("[TWL_SOUND] OpenDirectory not yet supported"); + return Err(Result::FailurePlaceholder); +} diff --git a/src/core/fs/archive_user_save_data.cpp b/src/core/fs/archive_user_save_data.cpp index cba9bff8..dc558954 100644 --- a/src/core/fs/archive_user_save_data.cpp +++ b/src/core/fs/archive_user_save_data.cpp @@ -6,13 +6,15 @@ namespace fs = std::filesystem; HorizonResult UserSaveDataArchive::createFile(const FSPath& path, u64 size) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) Helpers::panic("Unsafe path in UserSaveData::CreateFile"); fs::path p = IOFile::getAppData() / "SaveData"; p += fs::path(path.utf16_string).make_preferred(); - if (fs::exists(p)) return Result::FS::AlreadyExists; + if (fs::exists(p)) { + return Result::FS::AlreadyExists; + } IOFile file(p.string().c_str(), "wb"); @@ -37,8 +39,10 @@ HorizonResult UserSaveDataArchive::createFile(const FSPath& path, u64 size) { } HorizonResult UserSaveDataArchive::createDirectory(const FSPath& path) { - if (path.type == PathType::UTF16) { - if (!isPathSafe(path)) Helpers::panic("Unsafe path in UserSaveData::OpenFile"); + if (path.isUTF16()) { + if (!isPathSafe(path)) { + Helpers::panic("Unsafe path in UserSaveData::OpenFile"); + } fs::path p = IOFile::getAppData() / "SaveData"; p += fs::path(path.utf16_string).make_preferred(); @@ -56,7 +60,7 @@ HorizonResult UserSaveDataArchive::createDirectory(const FSPath& path) { } HorizonResult UserSaveDataArchive::deleteFile(const FSPath& path) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) Helpers::panic("Unsafe path in UserSaveData::DeleteFile"); fs::path p = IOFile::getAppData() / "SaveData"; @@ -87,7 +91,7 @@ HorizonResult UserSaveDataArchive::deleteFile(const FSPath& path) { } FileDescriptor UserSaveDataArchive::openFile(const FSPath& path, const FilePerms& perms) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) Helpers::panic("Unsafe path in UserSaveData::OpenFile"); if (perms.raw == 0 || (perms.create() && !perms.write())) Helpers::panic("[UserSaveData] Unsupported flags for OpenFile"); @@ -119,7 +123,7 @@ FileDescriptor UserSaveDataArchive::openFile(const FSPath& path, const FilePerms } Rust::Result UserSaveDataArchive::openDirectory(const FSPath& path) { - if (path.type == PathType::UTF16) { + if (path.isUTF16()) { if (!isPathSafe(path)) Helpers::panic("Unsafe path in UserSaveData::OpenDirectory"); fs::path p = IOFile::getAppData() / "SaveData"; diff --git a/src/core/kernel/address_arbiter.cpp b/src/core/kernel/address_arbiter.cpp index 8c07b423..d15c81b8 100644 --- a/src/core/kernel/address_arbiter.cpp +++ b/src/core/kernel/address_arbiter.cpp @@ -12,7 +12,7 @@ static const char* arbitrationTypeToString(u32 type) { } } -Handle Kernel::makeArbiter() { +HorizonHandle Kernel::makeArbiter() { if (arbiterCount >= appResourceLimits.maxAddressArbiters) { Helpers::panic("Overflowed the number of address arbiters"); } diff --git a/src/core/kernel/events.cpp b/src/core/kernel/events.cpp index 7c0d3047..6d3dfbd7 100644 --- a/src/core/kernel/events.cpp +++ b/src/core/kernel/events.cpp @@ -12,7 +12,7 @@ const char* Kernel::resetTypeToString(u32 type) { } } -Handle Kernel::makeEvent(ResetType resetType, Event::CallbackType callback) { +HorizonHandle Kernel::makeEvent(ResetType resetType, Event::CallbackType callback) { Handle ret = makeObject(KernelObjectType::Event); objects[ret].data = new Event(resetType, callback); return ret; diff --git a/src/core/kernel/kernel.cpp b/src/core/kernel/kernel.cpp index 0d1efc15..d4229b55 100644 --- a/src/core/kernel/kernel.cpp +++ b/src/core/kernel/kernel.cpp @@ -82,7 +82,7 @@ void Kernel::setVersion(u8 major, u8 minor) { mem.kernelVersion = descriptor; // The memory objects needs a copy because you can read the kernel ver from config mem } -Handle Kernel::makeProcess(u32 id) { +HorizonHandle Kernel::makeProcess(u32 id) { const Handle processHandle = makeObject(KernelObjectType::Process); const Handle resourceLimitHandle = makeObject(KernelObjectType::ResourceLimit); diff --git a/src/core/kernel/memory_management.cpp b/src/core/kernel/memory_management.cpp index 0d234be5..26f50023 100644 --- a/src/core/kernel/memory_management.cpp +++ b/src/core/kernel/memory_management.cpp @@ -136,7 +136,7 @@ void Kernel::mapMemoryBlock() { break; case KernelHandles::FontSharedMemHandle: - mem.copySharedFont(ptr); + mem.copySharedFont(ptr, addr); break; case KernelHandles::CSNDSharedMemHandle: @@ -154,7 +154,7 @@ void Kernel::mapMemoryBlock() { regs[0] = Result::Success; } -Handle Kernel::makeMemoryBlock(u32 addr, u32 size, u32 myPermission, u32 otherPermission) { +HorizonHandle Kernel::makeMemoryBlock(u32 addr, u32 size, u32 myPermission, u32 otherPermission) { Handle ret = makeObject(KernelObjectType::MemoryBlock); objects[ret].data = new MemoryBlock(addr, size, myPermission, otherPermission); diff --git a/src/core/kernel/ports.cpp b/src/core/kernel/ports.cpp index 6038de44..61ab26e3 100644 --- a/src/core/kernel/ports.cpp +++ b/src/core/kernel/ports.cpp @@ -1,7 +1,7 @@ #include "kernel.hpp" #include -Handle Kernel::makePort(const char* name) { +HorizonHandle Kernel::makePort(const char* name) { Handle ret = makeObject(KernelObjectType::Port); portHandles.push_back(ret); // Push the port handle to our cache of port handles objects[ret].data = new Port(name); @@ -9,7 +9,7 @@ Handle Kernel::makePort(const char* name) { return ret; } -Handle Kernel::makeSession(Handle portHandle) { +HorizonHandle Kernel::makeSession(Handle portHandle) { const auto port = getObject(portHandle, KernelObjectType::Port); if (port == nullptr) [[unlikely]] { Helpers::panic("Trying to make session for non-existent port"); @@ -23,7 +23,7 @@ Handle Kernel::makeSession(Handle portHandle) { // Get the handle of a port based on its name // If there's no such port, return nullopt -std::optional Kernel::getPortHandle(const char* name) { +std::optional Kernel::getPortHandle(const char* name) { for (auto handle : portHandles) { const auto data = objects[handle].getData(); if (std::strncmp(name, data->name, Port::maxNameLen) == 0) { diff --git a/src/core/kernel/threads.cpp b/src/core/kernel/threads.cpp index 3a6201c1..9eb7a197 100644 --- a/src/core/kernel/threads.cpp +++ b/src/core/kernel/threads.cpp @@ -109,7 +109,7 @@ void Kernel::rescheduleThreads() { } // Internal OS function to spawn a thread -Handle Kernel::makeThread(u32 entrypoint, u32 initialSP, u32 priority, ProcessorID id, u32 arg, ThreadStatus status) { +HorizonHandle Kernel::makeThread(u32 entrypoint, u32 initialSP, u32 priority, ProcessorID id, u32 arg, ThreadStatus status) { int index; // Index of the created thread in the threads array if (threadCount < appResourceLimits.maxThreads) [[likely]] { // If we have not yet created over too many threads @@ -161,7 +161,7 @@ Handle Kernel::makeThread(u32 entrypoint, u32 initialSP, u32 priority, Processor return ret; } -Handle Kernel::makeMutex(bool locked) { +HorizonHandle Kernel::makeMutex(bool locked) { Handle ret = makeObject(KernelObjectType::Mutex); objects[ret].data = new Mutex(locked, ret); @@ -201,7 +201,7 @@ void Kernel::releaseMutex(Mutex* moo) { } } -Handle Kernel::makeSemaphore(u32 initialCount, u32 maximumCount) { +HorizonHandle Kernel::makeSemaphore(u32 initialCount, u32 maximumCount) { Handle ret = makeObject(KernelObjectType::Semaphore); objects[ret].data = new Semaphore(initialCount, maximumCount); diff --git a/src/core/kernel/timers.cpp b/src/core/kernel/timers.cpp index 35fc57a4..8cfa4773 100644 --- a/src/core/kernel/timers.cpp +++ b/src/core/kernel/timers.cpp @@ -4,7 +4,7 @@ #include "kernel.hpp" #include "scheduler.hpp" -Handle Kernel::makeTimer(ResetType type) { +HorizonHandle Kernel::makeTimer(ResetType type) { Handle ret = makeObject(KernelObjectType::Timer); objects[ret].data = new Timer(type); diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 47d5a4c2..96d13813 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -1,12 +1,15 @@ +#include "loader/ncch.hpp" + #include #include -#include -#include -#include "loader/lz77.hpp" -#include "loader/ncch.hpp" -#include "memory.hpp" +#include +#include #include +#include + +#include "loader/lz77.hpp" +#include "memory.hpp" bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSInfo &info) { // 0x200 bytes for the NCCH header @@ -25,7 +28,6 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } codeFile.clear(); - saveData.clear(); smdh.clear(); partitionInfo = info; @@ -71,8 +73,26 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn if (!seedCrypto) { secondaryKeyY = primaryKeyY; } else { - Helpers::warn("Seed crypto is not supported"); - gotCryptoKeys = false; + // In seed crypto mode, the secondary key is computed through a SHA256 hash of the primary key and a title-specific seed, which we fetch + // from seeddb.bin + std::optional seedOptional = aesEngine.getSeedFromDB(programID); + if (seedOptional.has_value()) { + auto seed = *seedOptional; + + CryptoPP::SHA256 shaEngine; + std::array data; + std::array hash; + + std::memcpy(&data[0], primaryKeyY.data(), primaryKeyY.size()); + std::memcpy(&data[16], seed.data(), seed.size()); + shaEngine.CalculateDigest(hash.data(), data.data(), data.size()); + // Note that SHA256 will produce a 256-bit hash, while we only need 128 bits cause this is an AES key + // So the latter 16 bytes of the SHA256 are thrown out. + std::memcpy(secondaryKeyY.data(), hash.data(), secondaryKeyY.size()); + } else { + Helpers::warn("Couldn't find a seed value for this title. Make sure you have a seeddb.bin file alongside your aes_keys.txt"); + gotCryptoKeys = false; + } } auto primaryResult = getPrimaryKey(aesEngine, primaryKeyY); @@ -88,8 +108,8 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn encryptionInfoTmp.normalKey = *primaryKey; encryptionInfoTmp.initialCounter.fill(0); - for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) { - encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i]; + for (usize i = 0; i < 8; i++) { + encryptionInfoTmp.initialCounter[i] = header[0x108 + 7 - i]; } encryptionInfoTmp.initialCounter[8] = 1; exheaderInfo.encryptionInfo = encryptionInfoTmp; @@ -155,8 +175,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } } - const u64 saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes - saveData.resize(saveDataSize, 0xff); + saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes compressCode = (exheader[0xD] & 1) != 0; stackSize = *(u32*)&exheader[0x1C]; @@ -305,6 +324,7 @@ std::pair NCCH::getPrimaryKey(Crypto::AESEngine &aesEngine if (encrypted) { if (fixedCryptoKey) { + result.fill(0); return {true, result}; } @@ -326,6 +346,7 @@ std::pair NCCH::getSecondaryKey(Crypto::AESEngine &aesEngi if (encrypted) { if (fixedCryptoKey) { + result.fill(0); return {true, result}; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 09b49eee..57eac8ca 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -7,6 +7,7 @@ #include "config_mem.hpp" #include "resource_limits.hpp" +#include "services/fonts.hpp" #include "services/ptm.hpp" CMRC_DECLARE(ConsoleFonts); @@ -51,7 +52,7 @@ void Memory::reset() { if (e.handle == KernelHandles::FontSharedMemHandle) { // Read font size from the cmrc filesystem the font is stored in auto fonts = cmrc::ConsoleFonts::get_filesystem(); - e.size = fonts.open("CitraSharedFontUSRelocated.bin").size(); + e.size = fonts.open("SharedFontReplacement.bin").size(); } e.mapped = false; @@ -520,10 +521,13 @@ Regions Memory::getConsoleRegion() { return region; } -void Memory::copySharedFont(u8* pointer) { +void Memory::copySharedFont(u8* pointer, u32 vaddr) { auto fonts = cmrc::ConsoleFonts::get_filesystem(); - auto font = fonts.open("CitraSharedFontUSRelocated.bin"); + auto font = fonts.open("SharedFontReplacement.bin"); std::memcpy(pointer, font.begin(), font.size()); + + // Relocate shared font to the address it's being loaded to + HLE::Fonts::relocateSharedFont(pointer, vaddr); } std::optional Memory::getProgramID() { diff --git a/src/core/renderer_gl/etc1.cpp b/src/core/renderer_gl/etc1.cpp index 8aefd622..0b4ed1a5 100644 --- a/src/core/renderer_gl/etc1.cpp +++ b/src/core/renderer_gl/etc1.cpp @@ -12,8 +12,9 @@ static constexpr u32 signExtend3To32(u32 val) { u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { // Pixel offset of the 8x8 tile based on u, v and the width of the texture u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); - if (!hasAlpha) + if (!hasAlpha) { offs >>= 1; + } // In-tile offsets for u/v u &= 7; diff --git a/src/core/renderer_gl/gl_state.cpp b/src/core/renderer_gl/gl_state.cpp index d2eec0d5..785cac41 100644 --- a/src/core/renderer_gl/gl_state.cpp +++ b/src/core/renderer_gl/gl_state.cpp @@ -5,9 +5,20 @@ void GLStateManager::resetBlend() { logicOpEnabled = false; logicOp = GL_COPY; + blendEquationRGB = GL_FUNC_ADD; + blendEquationAlpha = GL_FUNC_ADD; + + blendFuncSourceRGB = GL_SRC_COLOR; + blendFuncDestRGB = GL_DST_COLOR; + blendFuncSourceAlpha = GL_SRC_ALPHA; + blendFuncDestAlpha = GL_DST_ALPHA; + OpenGL::disableBlend(); OpenGL::disableLogicOp(); OpenGL::setLogicOp(GL_COPY); + + glBlendEquationSeparate(blendEquationRGB, blendEquationAlpha); + glBlendFuncSeparate(blendFuncSourceRGB, blendFuncDestRGB, blendFuncSourceAlpha, blendFuncDestAlpha); } void GLStateManager::resetClearing() { @@ -61,9 +72,9 @@ void GLStateManager::resetVAO() { glBindVertexArray(0); } -void GLStateManager::resetVBO() { - boundVBO = 0; - glBindBuffer(GL_ARRAY_BUFFER, 0); +void GLStateManager::resetBuffers() { + boundUBO = 0; + glBindBuffer(GL_UNIFORM_BUFFER, 0); } void GLStateManager::resetProgram() { @@ -79,7 +90,7 @@ void GLStateManager::reset() { resetDepth(); resetVAO(); - resetVBO(); + resetBuffers(); resetProgram(); resetScissor(); resetStencil(); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 2d29e682..c1899655 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -2,11 +2,16 @@ #include +#include #include #include "PICA/float_types.hpp" #include "PICA/gpu.hpp" +#include "PICA/pica_frag_uniforms.hpp" +#include "PICA/pica_simd.hpp" #include "PICA/regs.hpp" +#include "PICA/shader_decompiler.hpp" +#include "config.hpp" #include "math_util.hpp" CMRC_DECLARE(RendererGL); @@ -22,6 +27,8 @@ void RendererGL::reset() { colourBufferCache.reset(); textureCache.reset(); + shaderCache.clear(); + // Init the colour/depth buffer settings to some random defaults on reset colourBufferLoc = 0; colourBufferFormat = PICA::ColorFmt::RGBA8; @@ -38,9 +45,9 @@ void RendererGL::reset() { oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); gl.useProgram(oldProgram); // Switch to old GL program } @@ -50,7 +57,6 @@ void RendererGL::initGraphicsContextInternal() { gl.reset(); auto gl_resources = cmrc::RendererGL::get_filesystem(); - auto vertexShaderSource = gl_resources.open("opengl_vertex_shader.vert"); auto fragmentShaderSource = gl_resources.open("opengl_fragment_shader.frag"); @@ -59,45 +65,57 @@ void RendererGL::initGraphicsContextInternal() { triangleProgram.create({vert, frag}); initUbershader(triangleProgram); - auto displayVertexShaderSource = gl_resources.open("opengl_display.vert"); - auto displayFragmentShaderSource = gl_resources.open("opengl_display.frag"); + compileDisplayShader(); + // Create stream buffers for vertex, index and uniform buffers + static constexpr usize hwIndexBufferSize = 2_MB; + static constexpr usize hwVertexBufferSize = 16_MB; - OpenGL::Shader vertDisplay({displayVertexShaderSource.begin(), displayVertexShaderSource.size()}, OpenGL::Vertex); - OpenGL::Shader fragDisplay({displayFragmentShaderSource.begin(), displayFragmentShaderSource.size()}, OpenGL::Fragment); - displayProgram.create({vertDisplay, fragDisplay}); + hwIndexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, hwIndexBufferSize); + hwVertexBuffer = StreamBuffer::Create(GL_ARRAY_BUFFER, hwVertexBufferSize); - gl.useProgram(displayProgram); - glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object + // Allocate memory for the shadergen fragment uniform UBO + glGenBuffers(1, &shadergenFragmentUBO); + gl.bindUBO(shadergenFragmentUBO); + glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); - vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW); - gl.bindVBO(vbo); - vao.create(); - gl.bindVAO(vao); + // Allocate memory for the accelerated vertex shader uniform UBO + glGenBuffers(1, &hwShaderUniformUBO); + gl.bindUBO(hwShaderUniformUBO); + glBufferData(GL_UNIFORM_BUFFER, PICAShader::totalUniformSize(), nullptr, GL_DYNAMIC_DRAW); + + vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize * 2, GL_STREAM_DRAW); + vbo.bind(); + // Initialize the VAO used when not using hw shaders + defaultVAO.create(); + gl.bindVAO(defaultVAO); // Position (x, y, z, w) attributes - vao.setAttributeFloat(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions)); - vao.enableAttribute(0); + defaultVAO.setAttributeFloat(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions)); + defaultVAO.enableAttribute(0); // Quaternion attribute - vao.setAttributeFloat(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion)); - vao.enableAttribute(1); + defaultVAO.setAttributeFloat(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion)); + defaultVAO.enableAttribute(1); // Colour attribute - vao.setAttributeFloat(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour)); - vao.enableAttribute(2); + defaultVAO.setAttributeFloat(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour)); + defaultVAO.enableAttribute(2); // UV 0 attribute - vao.setAttributeFloat(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0)); - vao.enableAttribute(3); + defaultVAO.setAttributeFloat(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0)); + defaultVAO.enableAttribute(3); // UV 1 attribute - vao.setAttributeFloat(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1)); - vao.enableAttribute(4); + defaultVAO.setAttributeFloat(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1)); + defaultVAO.enableAttribute(4); // UV 0 W-component attribute - vao.setAttributeFloat(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w)); - vao.enableAttribute(5); + defaultVAO.setAttributeFloat(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w)); + defaultVAO.enableAttribute(5); // View - vao.setAttributeFloat(6, 3, sizeof(Vertex), offsetof(Vertex, s.view)); - vao.enableAttribute(6); + defaultVAO.setAttributeFloat(6, 3, sizeof(Vertex), offsetof(Vertex, s.view)); + defaultVAO.enableAttribute(6); // UV 2 attribute - vao.setAttributeFloat(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2)); - vao.enableAttribute(7); + defaultVAO.setAttributeFloat(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2)); + defaultVAO.enableAttribute(7); + + // Initialize the VAO used for hw shaders + hwShaderVAO.create(); dummyVBO.create(); dummyVAO.create(); @@ -107,7 +125,11 @@ void RendererGL::initGraphicsContextInternal() { const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall - glGenTextures(1, &lightLUTTextureArray); + // 24 rows for light, 1 for fog + LUTTexture.create(256, Lights::LUT_Count + 1, GL_RG32F); + LUTTexture.bind(); + LUTTexture.setMinFilter(OpenGL::Linear); + LUTTexture.setMagFilter(OpenGL::Linear); auto prevTexture = OpenGL::getTex2D(); @@ -148,7 +170,22 @@ void RendererGL::initGraphicsContextInternal() { OpenGL::clearColor(); OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]); + // Initialize fixed attributes + for (int i = 0; i < fixedAttrValues.size(); i++) { + fixedAttrValues[i] = {0.f, 0.f, 0.f, 0.f}; + glVertexAttrib4f(i, 0.0, 0.0, 0.0, 0.0); + } + reset(); + fragShaderGen.setTarget(driverInfo.usingGLES ? PICA::ShaderGen::API::GLES : PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL); + + // Populate our driver info structure + driverInfo.supportsExtFbFetch = (GLAD_GL_EXT_shader_framebuffer_fetch != 0); + driverInfo.supportsArmFbFetch = (GLAD_GL_ARM_shader_framebuffer_fetch != 0); + + // Initialize the default vertex shader used with shadergen + std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader(); + defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex); } // The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend) @@ -219,8 +256,8 @@ void RendererGL::setupBlending() { OpenGL::setBlendColor(float(r) / 255.f, float(g) / 255.f, float(b) / 255.f, float(a) / 255.f); // Translate equations and funcs to their GL equivalents and set them - glBlendEquationSeparate(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); - glBlendFuncSeparate(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); + gl.setBlendEquation(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); + gl.setBlendFunc(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); } } @@ -272,10 +309,8 @@ void RendererGL::setupStencilTest(bool stencilEnable) { glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); } - -void RendererGL::setupTextureEnvState() { +void RendererGL::setupUbershaderTexEnv() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. - static constexpr std::array ioBases = { PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, @@ -297,11 +332,11 @@ void RendererGL::setupTextureEnvState() { textureEnvScaleRegs[i] = regs[ioBase + 4]; } - glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs); - glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs); - glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); - glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); - glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); + glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs); + glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs); + glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs); + glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs); + glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs); } void RendererGL::bindTexturesToSlots() { @@ -340,26 +375,49 @@ void RendererGL::bindTexturesToSlots() { } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); + LUTTexture.bind(); glActiveTexture(GL_TEXTURE0); } void RendererGL::updateLightingLUT() { gpu.lightingLUTDirty = false; - std::array u16_lightinglut; + std::array lightingLut; - for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); - u16_lightinglut[i] = value * 65535 / 4095; + for (int i = 0; i < lightingLut.size(); i += 2) { + uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; + lightingLut[i] = (float)(value << 4) / 65535.0f; } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + LUTTexture.bind(); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RG, GL_FLOAT, lightingLut.data()); + glActiveTexture(GL_TEXTURE0); +} + +void RendererGL::updateFogLUT() { + gpu.fogLUTDirty = false; + + // Fog LUT elements are of this type: + // 0-12 fixed1.1.11, Difference from next element + // 13-23 fixed0.0.11, Value + // We will store them as a 128x1 RG texture with R being the value and G being the difference + std::array fogLut; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + glActiveTexture(GL_TEXTURE0 + 3); + LUTTexture.bind(); + // The fog LUT exists at the end of the lighting LUT + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, Lights::LUT_Count, 128, 1, GL_RG, GL_FLOAT, fogLut.data()); glActiveTexture(GL_TEXTURE0); } @@ -374,9 +432,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); - gl.bindVBO(vbo); - gl.bindVAO(vao); - gl.useProgram(triangleProgram); + + // If we're using accelerated shaders, the hw VAO, VBO and EBO objects will have already been bound in prepareForDraw + if (!usingAcceleratedShader) { + vbo.bind(); + gl.bindVAO(defaultVAO); + } gl.enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { @@ -394,35 +455,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const int depthFunc = getBits<4, 3>(depthControl); const int colourMask = getBits<8, 4>(depthControl); gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); - static constexpr std::array depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL}; - const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); - const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); - const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; - - // Update depth uniforms - if (oldDepthScale != depthScale) { - oldDepthScale = depthScale; - glUniform1f(depthScaleLoc, depthScale); - } - - if (oldDepthOffset != depthOffset) { - oldDepthOffset = depthOffset; - glUniform1f(depthOffsetLoc, depthOffset); - } - - if (oldDepthmapEnable != depthMapEnable) { - oldDepthmapEnable = depthMapEnable; - glUniform1i(depthmapEnableLoc, depthMapEnable); - } - - setupTextureEnvState(); bindTexturesToSlots(); - - // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) - // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); + if (gpu.fogLUTDirty) { + updateFogLUT(); + } if (gpu.lightingLUTDirty) { updateLightingLUT(); @@ -462,15 +500,38 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v setupStencilTest(stencilEnable); - vbo.bufferVertsSub(vertices); - OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); + if (!usingAcceleratedShader) { + vbo.bufferVertsSub(vertices); + OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); + } else { + if (performIndexedRender) { + // When doing indexed rendering, use glDrawRangeElementsBaseVertex to issue the indexed draw + hwIndexBuffer->Bind(); + + if (glDrawRangeElementsBaseVertex != nullptr) [[likely]] { + glDrawRangeElementsBaseVertex( + primitiveTopology, minimumIndex, maximumIndex, GLsizei(vertices.size()), usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, + hwIndexBufferOffset, -GLint(minimumIndex) + ); + } else { + // If glDrawRangeElementsBaseVertex is not available then prepareForDraw will have subtracted the base vertex from the index buffer + // for us, so just use glDrawRangeElements + glDrawRangeElements( + primitiveTopology, 0, GLint(maximumIndex - minimumIndex), GLsizei(vertices.size()), + usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, hwIndexBufferOffset + ); + } + } else { + // When doing non-indexed rendering, just use glDrawArrays + OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); + } + } } void RendererGL::display() { gl.disableScissor(); gl.disableBlend(); gl.disableDepth(); - gl.disableScissor(); // This will work fine whether or not logic ops are enabled. We set logic op to copy instead of disabling to avoid state changes gl.setLogicOp(GL_COPY); gl.setColourMask(true, true, true, true); @@ -599,7 +660,15 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) { if (buffer.has_value()) { return buffer.value().get().texture; } else { - const auto textureData = std::span{gpu.getPointerPhys(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory + const u8* startPointer = gpu.getPointerPhys(tex.location); + const usize sizeInBytes = tex.sizeInBytes(); + + if (startPointer == nullptr || (sizeInBytes > 0 && gpu.getPointerPhys(tex.location + sizeInBytes - 1) == nullptr)) [[unlikely]] { + Helpers::warn("Out-of-bounds texture fetch"); + return blankTexture; + } + + const auto textureData = std::span{startPointer, tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory Texture& newTex = textureCache.add(tex); newTex.decodeTexture(textureData); @@ -710,7 +779,8 @@ void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 if (inputWidth != 0) [[likely]] { copyHeight = (copySize / inputWidth) * 8; } else { - copyHeight = 0; + Helpers::warn("Zero-width texture copy"); + return; } // Find the source surface. @@ -761,6 +831,238 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt return colourBufferCache.add(sampleBuffer); } +OpenGL::Program& RendererGL::getSpecializedShader() { + constexpr uint vsUBOBlockBinding = 1; + constexpr uint fsUBOBlockBinding = 2; + + PICA::FragmentConfig fsConfig(regs); + // If we're not on GLES, ignore the logic op configuration and don't generate redundant shaders for it, since we use hw logic ops + if (!driverInfo.usingGLES) { + fsConfig.outConfig.logicOpMode = PICA::LogicOpMode(0); + } + + OpenGL::Shader& fragShader = shaderCache.fragmentShaderCache[fsConfig]; + if (!fragShader.exists()) { + std::string fs = fragShaderGen.generate(fsConfig); + fragShader.create({fs.c_str(), fs.size()}, OpenGL::Fragment); + } + + // Get the handle of the current vertex shader + OpenGL::Shader& vertexShader = usingAcceleratedShader ? *generatedVertexShader : defaultShadergenVs; + // And form the key for looking up a shader program + const u64 programKey = (u64(vertexShader.handle()) << 32) | u64(fragShader.handle()); + + CachedProgram& programEntry = shaderCache.programCache[programKey]; + OpenGL::Program& program = programEntry.program; + + if (!program.exists()) { + program.create({vertexShader, fragShader}); + gl.useProgram(program); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); + + // Set up the binding for our UBOs. Sadly we can't specify it in the shader like normal people, + // As it's an OpenGL 4.2 feature that MacOS doesn't support... + uint fsUBOIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms"); + glUniformBlockBinding(program.handle(), fsUBOIndex, fsUBOBlockBinding); + + if (usingAcceleratedShader) { + uint vertexUBOIndex = glGetUniformBlockIndex(program.handle(), "PICAShaderUniforms"); + glUniformBlockBinding(program.handle(), vertexUBOIndex, vsUBOBlockBinding); + } + } + glBindBufferBase(GL_UNIFORM_BUFFER, fsUBOBlockBinding, shadergenFragmentUBO); + if (usingAcceleratedShader) { + glBindBufferBase(GL_UNIFORM_BUFFER, vsUBOBlockBinding, hwShaderUniformUBO); + } + + // Upload uniform data to our shader's UBO + PICA::FragmentUniforms uniforms; + uniforms.alphaReference = Helpers::getBits<8, 8>(regs[InternalRegs::AlphaTestConfig]); + + // Set up the texenv buffer color + const u32 texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; + uniforms.tevBufferColor[0] = float(texEnvBufferColor & 0xFF) / 255.0f; + uniforms.tevBufferColor[1] = float((texEnvBufferColor >> 8) & 0xFF) / 255.0f; + uniforms.tevBufferColor[2] = float((texEnvBufferColor >> 16) & 0xFF) / 255.0f; + uniforms.tevBufferColor[3] = float((texEnvBufferColor >> 24) & 0xFF) / 255.0f; + + uniforms.depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + uniforms.depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + + if (regs[InternalRegs::ClipEnable] & 1) { + uniforms.clipCoords[0] = f24::fromRaw(regs[PICA::InternalRegs::ClipData0] & 0xffffff).toFloat32(); + uniforms.clipCoords[1] = f24::fromRaw(regs[PICA::InternalRegs::ClipData1] & 0xffffff).toFloat32(); + uniforms.clipCoords[2] = f24::fromRaw(regs[PICA::InternalRegs::ClipData2] & 0xffffff).toFloat32(); + uniforms.clipCoords[3] = f24::fromRaw(regs[PICA::InternalRegs::ClipData3] & 0xffffff).toFloat32(); + } + + // Set up the constant color for the 6 TEV stages + for (int i = 0; i < 6; i++) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + auto& vec = uniforms.constantColors[i]; + u32 base = ioBases[i]; + u32 color = regs[base + 3]; + + vec[0] = float(color & 0xFF) / 255.0f; + vec[1] = float((color >> 8) & 0xFF) / 255.0f; + vec[2] = float((color >> 16) & 0xFF) / 255.0f; + vec[3] = float((color >> 24) & 0xFF) / 255.0f; + } + + uniforms.fogColor = regs[PICA::InternalRegs::FogColor]; + + // Append lighting uniforms + if (fsConfig.lighting.enable) { + uniforms.globalAmbientLight = regs[InternalRegs::LightGlobalAmbient]; + for (int i = 0; i < 8; i++) { + auto& light = uniforms.lightUniforms[i]; + const u32 specular0 = regs[InternalRegs::Light0Specular0 + i * 0x10]; + const u32 specular1 = regs[InternalRegs::Light0Specular1 + i * 0x10]; + const u32 diffuse = regs[InternalRegs::Light0Diffuse + i * 0x10]; + const u32 ambient = regs[InternalRegs::Light0Ambient + i * 0x10]; + const u32 lightXY = regs[InternalRegs::Light0XY + i * 0x10]; + const u32 lightZ = regs[InternalRegs::Light0Z + i * 0x10]; + + const u32 spotlightXY = regs[InternalRegs::Light0SpotlightXY + i * 0x10]; + const u32 spotlightZ = regs[InternalRegs::Light0SpotlightZ + i * 0x10]; + const u32 attenuationBias = regs[InternalRegs::Light0AttenuationBias + i * 0x10]; + const u32 attenuationScale = regs[InternalRegs::Light0AttenuationScale + i * 0x10]; + +#define lightColorToVec3(value) \ + { \ + float(Helpers::getBits<20, 8>(value)) / 255.0f, \ + float(Helpers::getBits<10, 8>(value)) / 255.0f, \ + float(Helpers::getBits<0, 8>(value)) / 255.0f, \ + } + light.specular0 = lightColorToVec3(specular0); + light.specular1 = lightColorToVec3(specular1); + light.diffuse = lightColorToVec3(diffuse); + light.ambient = lightColorToVec3(ambient); + light.position[0] = Floats::f16::fromRaw(u16(lightXY)).toFloat32(); + light.position[1] = Floats::f16::fromRaw(u16(lightXY >> 16)).toFloat32(); + light.position[2] = Floats::f16::fromRaw(u16(lightZ)).toFloat32(); + + // Fixed point 1.11.1 to float, without negation + light.spotlightDirection[0] = float(s32(spotlightXY & 0x1FFF) << 19 >> 19) / 2047.0; + light.spotlightDirection[1] = float(s32((spotlightXY >> 16) & 0x1FFF) << 19 >> 19) / 2047.0; + light.spotlightDirection[2] = float(s32(spotlightZ & 0x1FFF) << 19 >> 19) / 2047.0; + + light.distanceAttenuationBias = Floats::f20::fromRaw(attenuationBias & 0xFFFFF).toFloat32(); + light.distanceAttenuationScale = Floats::f20::fromRaw(attenuationScale & 0xFFFFF).toFloat32(); +#undef lightColorToVec3 + } + } + + gl.bindUBO(shadergenFragmentUBO); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); + + return program; +} + +bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { + // First we figure out if we will be using an ubershader + bool usingUbershader = emulatorConfig->useUbershaders; + if (usingUbershader) { + const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0; + const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1; + + // Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen + // This way we generate fewer shaders overall than with full shadergen, but don't tank performance + if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) { + usingUbershader = false; + } + } + + // Then we figure out if we will use hw accelerated shaders, and try to fetch our shader + // TODO: Ubershader support for accelerated shaders + usingAcceleratedShader = emulatorConfig->accelerateShaders && !usingUbershader && accel != nullptr && accel->canBeAccelerated; + + if (usingAcceleratedShader) { + PICA::VertConfig vertexConfig(shaderUnit.vs, regs, usingUbershader); + + std::optional& shader = shaderCache.vertexShaderCache[vertexConfig]; + // If the optional is false, we have never tried to recompile the shader before. Try to recompile it and see if it works. + if (!shader.has_value()) { + // Initialize shader to a "null" shader (handle == 0) + shader = OpenGL::Shader(); + + std::string picaShaderSource = PICA::ShaderGen::decompileShader( + shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, + driverInfo.usingGLES ? PICA::ShaderGen::API::GLES : PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL + ); + + // Empty source means compilation error, if the source is not empty then we convert the recompiled PICA code into a valid shader and upload + // it to the GPU + if (!picaShaderSource.empty()) { + std::string vertexShaderSource = fragShaderGen.getVertexShaderAccelerated(picaShaderSource, vertexConfig, usingUbershader); + shader->create({vertexShaderSource}, OpenGL::Vertex); + } + } + + // Shader generation did not work out, so set usingAcceleratedShader to false + if (!shader->exists()) { + usingAcceleratedShader = false; + } else { + generatedVertexShader = &(*shader); + gl.bindUBO(hwShaderUniformUBO); + + if (shaderUnit.vs.uniformsDirty) { + shaderUnit.vs.uniformsDirty = false; + glBufferSubData(GL_UNIFORM_BUFFER, 0, PICAShader::totalUniformSize(), shaderUnit.vs.getUniformPointer()); + } + + performIndexedRender = accel->indexed; + minimumIndex = GLsizei(accel->minimumIndex); + maximumIndex = GLsizei(accel->maximumIndex); + + // Upload vertex data and index buffer data to our GPU + accelerateVertexUpload(shaderUnit, accel); + } + } + + if (!usingUbershader) { + OpenGL::Program& program = getSpecializedShader(); + gl.useProgram(program); + } else { // Bind ubershader & load ubershader uniforms + gl.useProgram(triangleProgram); + + const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + + if (oldDepthScale != depthScale) { + oldDepthScale = depthScale; + glUniform1f(ubershaderData.depthScaleLoc, depthScale); + } + + if (oldDepthOffset != depthOffset) { + oldDepthOffset = depthOffset; + glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); + } + + if (oldDepthmapEnable != depthMapEnable) { + oldDepthmapEnable = depthMapEnable; + glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); + } + + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) + // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates + glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + setupUbershaderTexEnv(); + } + + return usingAcceleratedShader; +} + void RendererGL::screenshot(const std::string& name) { constexpr uint width = 400; constexpr uint height = 2 * 240; @@ -774,7 +1076,7 @@ void RendererGL::screenshot(const std::string& name) { // Flip the image vertically for (int y = 0; y < height; y++) { - memcpy(&flippedPixels[y * width * 4], &pixels[(height - y - 1) * width * 4], width * 4); + std::memcpy(&flippedPixels[y * width * 4], &pixels[(height - y - 1) * width * 4], width * 4); // Swap R and B channels for (int x = 0; x < width; x++) { std::swap(flippedPixels[y * width * 4 + x * 4 + 0], flippedPixels[y * width * 4 + x * 4 + 2]); @@ -791,6 +1093,7 @@ void RendererGL::deinitGraphicsContext() { textureCache.reset(); depthBufferCache.reset(); colourBufferCache.reset(); + shaderCache.clear(); // All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext // TODO: Make it so that depth and colour buffers get written back to 3DS memory @@ -814,28 +1117,153 @@ void RendererGL::setUbershader(const std::string& shader) { initUbershader(triangleProgram); - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); } void RendererGL::initUbershader(OpenGL::Program& program) { gl.useProgram(program); - textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); + ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); + ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); + ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); + ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); + ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); - depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); + ubershaderData.depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); + ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); + ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); + ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); - // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2 and the LUTs go in TU 3 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); +} + +void RendererGL::compileDisplayShader() { + auto gl_resources = cmrc::RendererGL::get_filesystem(); + auto displayVertexShaderSource = driverInfo.usingGLES ? gl_resources.open("opengl_es_display.vert") : gl_resources.open("opengl_display.vert"); + auto displayFragmentShaderSource = driverInfo.usingGLES ? gl_resources.open("opengl_es_display.frag") : gl_resources.open("opengl_display.frag"); + + OpenGL::Shader vertDisplay({displayVertexShaderSource.begin(), displayVertexShaderSource.size()}, OpenGL::Vertex); + OpenGL::Shader fragDisplay({displayFragmentShaderSource.begin(), displayFragmentShaderSource.size()}, OpenGL::Fragment); + displayProgram.create({vertDisplay, fragDisplay}); + + gl.useProgram(displayProgram); + glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object +} + +void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { + u32 buffer = 0; // Vertex buffer index for non-fixed attributes + u32 attrCount = 0; + + const u32 totalAttribCount = accel->totalAttribCount; + + static constexpr GLenum attributeFormats[4] = { + GL_BYTE, // 0: Signed byte + GL_UNSIGNED_BYTE, // 1: Unsigned byte + GL_SHORT, // 2: Short + GL_FLOAT, // 3: Float + }; + + const u32 vertexCount = accel->maximumIndex - accel->minimumIndex + 1; + + // Update index buffer if necessary + if (accel->indexed) { + usingShortIndices = accel->useShortIndices; + const usize indexBufferSize = regs[PICA::InternalRegs::VertexCountReg] * (usingShortIndices ? sizeof(u16) : sizeof(u8)); + + hwIndexBuffer->Bind(); + auto indexBufferRes = hwIndexBuffer->Map(4, indexBufferSize); + hwIndexBufferOffset = reinterpret_cast(usize(indexBufferRes.buffer_offset)); + + std::memcpy(indexBufferRes.pointer, accel->indexBuffer, indexBufferSize); + // If we don't have glDrawRangeElementsBaseVertex, we must subtract the base index value from our index buffer manually + if (glDrawRangeElementsBaseVertex == nullptr) [[unlikely]] { + const u32 indexCount = regs[PICA::InternalRegs::VertexCountReg]; + usingShortIndices ? PICA::IndexBuffer::subtractBaseIndex((u8*)indexBufferRes.pointer, indexCount, accel->minimumIndex) + : PICA::IndexBuffer::subtractBaseIndex((u8*)indexBufferRes.pointer, indexCount, accel->minimumIndex); + } + + hwIndexBuffer->Unmap(indexBufferSize); + } + + hwVertexBuffer->Bind(); + auto vertexBufferRes = hwVertexBuffer->Map(4, accel->vertexDataSize); + u8* vertexData = static_cast(vertexBufferRes.pointer); + const u32 vertexBufferOffset = vertexBufferRes.buffer_offset; + + gl.bindVAO(hwShaderVAO); + + // Enable or disable vertex attributes as needed + const u32 currentAttributeMask = accel->enabledAttributeMask; + // Use bitwise xor to calculate which attributes changed + u32 attributeMaskDiff = currentAttributeMask ^ previousAttributeMask; + + while (attributeMaskDiff != 0) { + // Get index of next different attribute and turn it off + const u32 index = 31 - std::countl_zero(attributeMaskDiff); + const u32 mask = 1u << index; + attributeMaskDiff ^= mask; + + if ((currentAttributeMask & mask) != 0) { + // Attribute was disabled and is now enabled + hwShaderVAO.enableAttribute(index); + } else { + // Attribute was enabled and is now disabled + hwShaderVAO.disableAttribute(index); + } + } + + previousAttributeMask = currentAttributeMask; + + // Upload the data for each (enabled) attribute loader into our vertex buffer + for (int i = 0; i < accel->totalLoaderCount; i++) { + auto& loader = accel->loaders[i]; + + std::memcpy(vertexData, loader.data, loader.size); + vertexData += loader.size; + } + + hwVertexBuffer->Unmap(accel->vertexDataSize); + + // Iterate over the 16 PICA input registers and configure how they should be fetched. + for (int i = 0; i < 16; i++) { + const auto& attrib = accel->attributeInfo[i]; + const u32 attributeMask = 1u << i; + + if (accel->fixedAttributes & attributeMask) { + auto& attrValue = fixedAttrValues[i]; + // This is a fixed attribute, so set its fixed value, but only if it actually needs to be updated + if (attrValue[0] != attrib.fixedValue[0] || attrValue[1] != attrib.fixedValue[1] || attrValue[2] != attrib.fixedValue[2] || + attrValue[3] != attrib.fixedValue[3]) { + std::memcpy(attrValue.data(), attrib.fixedValue.data(), sizeof(attrib.fixedValue)); + glVertexAttrib4f(i, attrib.fixedValue[0], attrib.fixedValue[1], attrib.fixedValue[2], attrib.fixedValue[3]); + } + } else if (accel->enabledAttributeMask & attributeMask) { + glVertexAttribPointer( + i, attrib.componentCount, attributeFormats[attrib.type], GL_FALSE, attrib.stride, + reinterpret_cast(vertexBufferOffset + attrib.offset) + ); + } + } +} + +void RendererGL::setupGLES() { + driverInfo.usingGLES = true; + + // OpenGL ES hardware is typically way too slow to use the ubershader (eg RPi, mobile phones, handhelds) or has other issues with it. + // So, display a warning and turn them off on OpenGL ES. + if (emulatorConfig->useUbershaders) { + emulatorConfig->useUbershaders = false; + Helpers::warn("Ubershaders enabled on OpenGL ES. This usually results in a worse experience, turning it off..."); + } + + // Stub out logic operations so that calling them doesn't crash the emulator + if (!glLogicOp) { + glLogicOp = [](GLenum) {}; + } } diff --git a/src/core/renderer_mtl/metal_cpp_impl.cpp b/src/core/renderer_mtl/metal_cpp_impl.cpp new file mode 100644 index 00000000..7fa7137b --- /dev/null +++ b/src/core/renderer_mtl/metal_cpp_impl.cpp @@ -0,0 +1,6 @@ +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include +#include +#include diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp new file mode 100644 index 00000000..420a60ca --- /dev/null +++ b/src/core/renderer_mtl/mtl_etc1.cpp @@ -0,0 +1,116 @@ +#include + +#include "colour.hpp" +#include "renderer_mtl/mtl_texture.hpp" +#include "renderer_mtl/renderer_mtl.hpp" + + +using namespace Helpers; + +namespace Metal { + static constexpr u32 signExtend3To32(u32 val) { + return (u32)(s32(val) << 29 >> 29); + } + + u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { + // Pixel offset of the 8x8 tile based on u, v and the width of the texture + u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); + if (!hasAlpha) { + offs >>= 1; + } + + // In-tile offsets for u/v + u &= 7; + v &= 7; + + // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles + // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes + const u32 subTileSize = hasAlpha ? 16 : 8; + const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? + + // In-subtile offsets for u/v + u &= 3; + v &= 3; + offs += subTileSize * subTileIndex; + + u32 alpha; + const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* + + if (hasAlpha) { + // First 64 bits of the 4x4 subtile are alpha data + const u64 alphaData = *ptr++; + alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); + } else { + alpha = 0xff; // ETC1 without alpha uses ff for every pixel + } + + // Next 64 bits of the subtile are colour data + u64 colourData = *ptr; + return decodeETC(alpha, u, v, colourData); + } + + u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { + static constexpr u32 modifiers[8][2] = { + {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}, + }; + + // Parse colour data for 4x4 block + const u32 subindices = getBits<0, 16, u32>(colourData); + const u32 negationFlags = getBits<16, 16, u32>(colourData); + const bool flip = getBit<32>(colourData); + const bool diffMode = getBit<33>(colourData); + + // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits + const u32 tableIndex1 = getBits<37, 3, u32>(colourData); + const u32 tableIndex2 = getBits<34, 3, u32>(colourData); + const u32 texelIndex = u * 4 + v; // Index of the texel in the block + + if (flip) std::swap(u, v); + + s32 r, g, b; + if (diffMode) { + r = getBits<59, 5, s32>(colourData); + g = getBits<51, 5, s32>(colourData); + b = getBits<43, 5, s32>(colourData); + + if (u >= 2) { + r += signExtend3To32(getBits<56, 3, u32>(colourData)); + g += signExtend3To32(getBits<48, 3, u32>(colourData)); + b += signExtend3To32(getBits<40, 3, u32>(colourData)); + } + + // Expand from 5 to 8 bits per channel + r = Colour::convert5To8Bit(r); + g = Colour::convert5To8Bit(g); + b = Colour::convert5To8Bit(b); + } else { + if (u < 2) { + r = getBits<60, 4, s32>(colourData); + g = getBits<52, 4, s32>(colourData); + b = getBits<44, 4, s32>(colourData); + } else { + r = getBits<56, 4, s32>(colourData); + g = getBits<48, 4, s32>(colourData); + b = getBits<40, 4, s32>(colourData); + } + + // Expand from 4 to 8 bits per channel + r = Colour::convert4To8Bit(r); + g = Colour::convert4To8Bit(g); + b = Colour::convert4To8Bit(b); + } + + const u32 index = (u < 2) ? tableIndex1 : tableIndex2; + s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + + if (((negationFlags >> texelIndex) & 1) != 0) { + modifier = -modifier; + } + + r = std::clamp(r + modifier, 0, 255); + g = std::clamp(g + modifier, 0, 255); + b = std::clamp(b + modifier, 0, 255); + + return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_lut_texture.cpp b/src/core/renderer_mtl/mtl_lut_texture.cpp new file mode 100644 index 00000000..8486a50c --- /dev/null +++ b/src/core/renderer_mtl/mtl_lut_texture.cpp @@ -0,0 +1,27 @@ +#include "renderer_mtl/renderer_mtl.hpp" + +namespace Metal { + static constexpr u32 LAYER_COUNT = 1024; + + LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { + MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); + desc->setTextureType(type); + desc->setPixelFormat(pixelFormat); + desc->setWidth(width); + desc->setHeight(height); + desc->setArrayLength(LAYER_COUNT); + desc->setUsage(MTL::TextureUsageShaderRead /* | MTL::TextureUsageShaderWrite*/); + desc->setStorageMode(MTL::StorageModeShared); + + texture = device->newTexture(desc); + texture->setLabel(toNSString(name)); + desc->release(); + } + + LutTexture::~LutTexture() { texture->release(); } + + u32 LutTexture::getNextIndex() { + currentIndex = (currentIndex + 1) % LAYER_COUNT; + return currentIndex; + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp new file mode 100644 index 00000000..149fea26 --- /dev/null +++ b/src/core/renderer_mtl/mtl_texture.cpp @@ -0,0 +1,308 @@ +#include "renderer_mtl/mtl_texture.hpp" + +#include + +#include "colour.hpp" +#include "renderer_mtl/objc_helper.hpp" + + +using namespace Helpers; + +namespace Metal { + void Texture::allocate() { + formatInfo = PICA::getPixelFormatInfo(format); + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(formatInfo.pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + "Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()) + )); + descriptor->release(); + + setNewConfig(config); + } + + // Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on + void Texture::setNewConfig(u32 cfg) { + config = cfg; + + if (sampler) { + sampler->release(); + } + + const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); + const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); + + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMinFilter(minFilter); + samplerDescriptor->setMagFilter(magFilter); + samplerDescriptor->setSAddressMode(wrapS); + samplerDescriptor->setTAddressMode(wrapT); + + samplerDescriptor->setLabel(toNSString("Sampler")); + sampler = device->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); + } + + void Texture::free() { + valid = false; + + if (texture) { + texture->release(); + } + if (sampler) { + sampler->release(); + } + } + + u64 Texture::sizeInBytes() { + u64 pixelCount = u64(size.x()) * u64(size.y()); + + switch (format) { + case PICA::TextureFmt::RGBA8: // 4 bytes per pixel + return pixelCount * 4; + + case PICA::TextureFmt::RGB8: // 3 bytes per pixel + return pixelCount * 3; + + case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel + case PICA::TextureFmt::RGB565: + case PICA::TextureFmt::RGBA4: + case PICA::TextureFmt::RG8: + case PICA::TextureFmt::IA8: return pixelCount * 2; + + case PICA::TextureFmt::A8: // 1 byte per pixel + case PICA::TextureFmt::I8: + case PICA::TextureFmt::IA4: return pixelCount; + + case PICA::TextureFmt::I4: // 4 bits per pixel + case PICA::TextureFmt::A4: return pixelCount / 2; + + case PICA::TextureFmt::ETC1: // Compressed formats + case PICA::TextureFmt::ETC1A4: { + // Number of 4x4 tiles + const u64 tileCount = pixelCount / 16; + // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 + const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; + return tileCount * tileSize; + } + + default: Helpers::panic("[PICA] Attempted to get size of invalid texture type"); + } + } + + // u and v are the UVs of the relevant texel + // Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here + // https://en.wikipedia.org/wiki/Z-order_curve + // Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel + // The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 + // As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg + u32 Texture::mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21}; + static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42}; + + return xOffsets[u & 7] + yOffsets[v & 7]; + } + + // Get the byte offset of texel (u, v) in the texture + u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset * bytesPerPixel; + } + + // Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte + u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset / 2; + } + + u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::A4: { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + + // A8 + return alpha; + } + + case PICA::TextureFmt::A8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = data[offset]; + + // A8 + return alpha; + } + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RG8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = data[offset]; + const u8 r = data[offset + 1]; + + // RG8 + return (g << 8) | r; + } + + case PICA::TextureFmt::RGBA4: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBits<0, 4, u8>(texel); + u8 b = getBits<4, 4, u8>(texel); + u8 g = getBits<8, 4, u8>(texel); + u8 r = getBits<12, 4, u8>(texel); + + // ABGR4 + return (r << 12) | (g << 8) | (b << 4) | alpha; + } + + case PICA::TextureFmt::RGBA5551: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = getBits<1, 5, u8>(texel); + u8 g = getBits<6, 5, u8>(texel); + u8 r = getBits<11, 5, u8>(texel); + + // BGR5A1 + return (alpha << 15) | (r << 10) | (g << 5) | b; + } + + case PICA::TextureFmt::RGB565: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + const u8 b = getBits<0, 5, u8>(texel); + const u8 g = getBits<5, 6, u8>(texel); + const u8 r = getBits<11, 5, u8>(texel); + + // B5G6R5 + return (r << 11) | (g << 5) | b; + } + + case PICA::TextureFmt::IA4: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = data[offset]; + const u8 alpha = texel & 0xf; + const u8 intensity = texel >> 4; + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; + } + + case PICA::TextureFmt::I4: { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); + intensity = getBits<0, 4>(intensity); + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; + } + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RGB8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = data[offset]; + const u8 g = data[offset + 1]; + const u8 r = data[offset + 2]; + + // RGBA8 + return (0xff << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGBA8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = data[offset]; + const u8 b = data[offset + 1]; + const u8 g = data[offset + 2]; + const u8 r = data[offset + 3]; + + // RGBA8 + return (alpha << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::I8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = data[offset]; + + // RGBA8 + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::IA8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = data[offset]; + const u8 intensity = data[offset + 1]; + + // RGBA8 + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); + case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + void Texture::decodeTexture(std::span data) { + std::vector decoded; + decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + if (formatInfo.bytesPerTexel == 1) { + u8 texel = decodeTexelU8(u, v, format, data); + decoded.push_back(texel); + } else if (formatInfo.bytesPerTexel == 2) { + u16 texel = decodeTexelU16(u, v, format, data); + decoded.push_back((texel & 0x00ff) >> 0); + decoded.push_back((texel & 0xff00) >> 8); + } else if (formatInfo.bytesPerTexel == 4) { + u32 texel = decodeTexelU32(u, v, format, data); + decoded.push_back((texel & 0x000000ff) >> 0); + decoded.push_back((texel & 0x0000ff00) >> 8); + decoded.push_back((texel & 0x00ff0000) >> 16); + decoded.push_back((texel & 0xff000000) >> 24); + } else { + Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); + } + } + } + + texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); + } +} // namespace Metal diff --git a/src/core/renderer_mtl/objc_helper.mm b/src/core/renderer_mtl/objc_helper.mm new file mode 100644 index 00000000..eeea56a0 --- /dev/null +++ b/src/core/renderer_mtl/objc_helper.mm @@ -0,0 +1,12 @@ +#include "renderer_mtl/objc_helper.hpp" + +// TODO: change the include +#import + +namespace Metal { + +dispatch_data_t createDispatchData(const void* data, size_t size) { + return dispatch_data_create(data, size, dispatch_get_global_queue(0, 0), ^{}); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp new file mode 100644 index 00000000..9cf58716 --- /dev/null +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -0,0 +1,829 @@ +#include "renderer_mtl/renderer_mtl.hpp" + +#include +#include + +#include "renderer_mtl/mtl_lut_texture.hpp" + +// Hack: Apple annoyingly defines a global "NO" macro which ends up conflicting with our own code... +#undef NO + +#include "PICA/gpu.hpp" +#include "SDL_metal.h" + +using namespace PICA; + +CMRC_DECLARE(RendererMTL); + +static constexpr u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; +static constexpr u32 FOG_LUT_TEXTURE_WIDTH = 128; +// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices +static constexpr uint VERTEX_BUFFER_BINDING_INDEX = 30; + +// HACK: redefinition... +PICA::ColorFmt ToColorFormat(u32 format) { + switch (format) { + case 2: return PICA::ColorFmt::RGB565; + case 3: return PICA::ColorFmt::RGBA5551; + default: return static_cast(format); + } +} + +MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { + // MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); + NS::Error* error = nullptr; + MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); + // MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); + if (error) { + Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + return library; +} + +RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : Renderer(gpu, internalRegs, externalRegs) {} + +RendererMTL::~RendererMTL() {} + +void RendererMTL::reset() { + vertexBufferCache.reset(); + depthStencilCache.reset(); + drawPipelineCache.reset(); + blitPipelineCache.reset(); + textureCache.reset(); + depthStencilRenderTargetCache.reset(); + colorRenderTargetCache.reset(); +} + +void RendererMTL::display() { + CA::MetalDrawable* drawable = metalLayer->nextDrawable(); + if (!drawable) { + return; + } + + using namespace PICA::ExternalRegs; + + // Top screen + const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1; + const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr]; + auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr); + + if (topScreen) { + clearColor(nullptr, topScreen->get().texture); + } + + // Bottom screen + const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1; + const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr]; + auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr); + + if (bottomScreen) { + clearColor(nullptr, bottomScreen->get().texture); + } + + // Draw + commandBuffer->pushDebugGroup(toNSString("Display")); + + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + MTL::RenderPassColorAttachmentDescriptor* colorAttachment = renderPassDescriptor->colorAttachments()->object(0); + colorAttachment->setTexture(drawable->texture()); + colorAttachment->setLoadAction(MTL::LoadActionClear); + colorAttachment->setClearColor(MTL::ClearColor{0.0f, 0.0f, 0.0f, 1.0f}); + colorAttachment->setStoreAction(MTL::StoreActionStore); + + nextRenderPassName = "Display"; + beginRenderPassIfNeeded(renderPassDescriptor, false, drawable->texture()); + renderCommandEncoder->setRenderPipelineState(displayPipeline); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + + // Top screen + if (topScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(topScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + // Bottom screen + if (bottomScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{40, 240, 320, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(bottomScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + endRenderPass(); + + commandBuffer->presentDrawable(drawable); + commandBuffer->popDebugGroup(); + commitCommandBuffer(); + + // Inform the vertex buffer cache that the frame ended + vertexBufferCache.endFrame(); + + // Release + drawable->release(); +} + +void RendererMTL::initGraphicsContext(SDL_Window* window) { + // TODO: what should be the type of the view? + void* view = SDL_Metal_CreateView(window); + metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view); + device = MTL::CreateSystemDefaultDevice(); + metalLayer->setDevice(device); + commandQueue = device->newCommandQueue(); + + // Textures + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setTextureType(MTL::TextureType2D); + textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA8Unorm); + textureDescriptor->setWidth(1); + textureDescriptor->setHeight(1); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead); + + nullTexture = device->newTexture(textureDescriptor); + nullTexture->setLabel(toNSString("Null texture")); + textureDescriptor->release(); + + // Samplers + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setLabel(toNSString("Sampler (nearest)")); + nearestSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setLabel(toNSString("Sampler (linear)")); + linearSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->release(); + + lutLightingTexture = new Metal::LutTexture( + device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture" + ); + lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture"); + + // -------- Pipelines -------- + + // Load shaders + auto mtlResources = cmrc::RendererMTL::get_filesystem(); + library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); + MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib")); + // MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + + // Display + MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); + MTL::Function* fragmentDisplayFunction = library->newFunction(NS::String::string("fragmentDisplay", NS::ASCIIStringEncoding)); + + MTL::RenderPipelineDescriptor* displayPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + displayPipelineDescriptor->setVertexFunction(vertexDisplayFunction); + displayPipelineDescriptor->setFragmentFunction(fragmentDisplayFunction); + auto* displayColorAttachment = displayPipelineDescriptor->colorAttachments()->object(0); + displayColorAttachment->setPixelFormat(MTL::PixelFormat::PixelFormatBGRA8Unorm); + + NS::Error* error = nullptr; + displayPipelineDescriptor->setLabel(toNSString("Display pipeline")); + displayPipeline = device->newRenderPipelineState(displayPipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating display pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + displayPipelineDescriptor->release(); + vertexDisplayFunction->release(); + fragmentDisplayFunction->release(); + + // Blit + MTL::Function* vertexBlitFunction = blitLibrary->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); + MTL::Function* fragmentBlitFunction = blitLibrary->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); + + blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction); + + // Draw + MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding)); + + // -------- Vertex descriptor -------- + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + + // Position + MTL::VertexAttributeDescriptor* positionAttribute = vertexDescriptor->attributes()->object(0); + positionAttribute->setFormat(MTL::VertexFormatFloat4); + positionAttribute->setOffset(offsetof(Vertex, s.positions)); + positionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Quaternion + MTL::VertexAttributeDescriptor* quaternionAttribute = vertexDescriptor->attributes()->object(1); + quaternionAttribute->setFormat(MTL::VertexFormatFloat4); + quaternionAttribute->setOffset(offsetof(Vertex, s.quaternion)); + quaternionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Color + MTL::VertexAttributeDescriptor* colorAttribute = vertexDescriptor->attributes()->object(2); + colorAttribute->setFormat(MTL::VertexFormatFloat4); + colorAttribute->setOffset(offsetof(Vertex, s.colour)); + colorAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 + MTL::VertexAttributeDescriptor* texCoord0Attribute = vertexDescriptor->attributes()->object(3); + texCoord0Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord0Attribute->setOffset(offsetof(Vertex, s.texcoord0)); + texCoord0Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 1 + MTL::VertexAttributeDescriptor* texCoord1Attribute = vertexDescriptor->attributes()->object(4); + texCoord1Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord1Attribute->setOffset(offsetof(Vertex, s.texcoord1)); + texCoord1Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 W + MTL::VertexAttributeDescriptor* texCoord0WAttribute = vertexDescriptor->attributes()->object(5); + texCoord0WAttribute->setFormat(MTL::VertexFormatFloat); + texCoord0WAttribute->setOffset(offsetof(Vertex, s.texcoord0_w)); + texCoord0WAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // View + MTL::VertexAttributeDescriptor* viewAttribute = vertexDescriptor->attributes()->object(6); + viewAttribute->setFormat(MTL::VertexFormatFloat3); + viewAttribute->setOffset(offsetof(Vertex, s.view)); + viewAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 2 + MTL::VertexAttributeDescriptor* texCoord2Attribute = vertexDescriptor->attributes()->object(7); + texCoord2Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord2Attribute->setOffset(offsetof(Vertex, s.texcoord2)); + texCoord2Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + MTL::VertexBufferLayoutDescriptor* vertexBufferLayout = vertexDescriptor->layouts()->object(VERTEX_BUFFER_BINDING_INDEX); + vertexBufferLayout->setStride(sizeof(Vertex)); + vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex); + vertexBufferLayout->setStepRate(1); + + drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); + + // Copy to LUT texture + /* + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); + + error = nullptr; + MTL::Function* vertexCopyToLutTextureFunction = + copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + + MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); + // Disable rasterization + copyToLutTexturePipelineDescriptor->setRasterizationEnabled(false); + + error = nullptr; + copyToLutTexturePipelineDescriptor->setLabel(toNSString("Copy to LUT texture pipeline")); + copyToLutTexturePipeline = device->newRenderPipelineState(copyToLutTexturePipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + copyToLutTexturePipelineDescriptor->release(); + vertexCopyToLutTextureFunction->release(); + */ + + // Depth stencil cache + depthStencilCache.set(device); + + // Vertex buffer cache + vertexBufferCache.set(device); + + // -------- Depth stencil state -------- + MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + depthStencilDescriptor->setLabel(toNSString("Default depth stencil state")); + defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); + depthStencilDescriptor->release(); + + blitLibrary->release(); + // copyToLutTextureLibrary->release(); +} + +void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { + const auto color = colorRenderTargetCache.findFromAddress(startAddress); + if (color) { + const float r = Helpers::getBits<24, 8>(value) / 255.0f; + const float g = Helpers::getBits<16, 8>(value) / 255.0f; + const float b = Helpers::getBits<8, 8>(value) / 255.0f; + const float a = (value & 0xff) / 255.0f; + + colorClearOps[color->get().texture] = {r, g, b, a}; + + return; + } + + const auto depth = depthStencilRenderTargetCache.findFromAddress(startAddress); + if (depth) { + float depthVal; + const auto format = depth->get().format; + if (format == DepthFmt::Depth16) { + depthVal = (value & 0xffff) / 65535.0f; + } else { + depthVal = (value & 0xffffff) / 16777215.0f; + } + + depthClearOps[depth->get().texture] = depthVal; + + if (format == DepthFmt::Depth24Stencil8) { + const u8 stencilVal = value >> 24; + stencilClearOps[depth->get().texture] = stencilVal; + } + + return; + } + + Helpers::warn("[RendererMTL::ClearBuffer] No buffer found!\n"); +} + +void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { + const u32 inputWidth = inputSize & 0xffff; + const u32 inputHeight = inputSize >> 16; + const auto inputFormat = ToColorFormat(Helpers::getBits<8, 3>(flags)); + const auto outputFormat = ToColorFormat(Helpers::getBits<12, 3>(flags)); + const bool verticalFlip = flags & 1; + const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); + + u32 outputWidth = outputSize & 0xffff; + u32 outputHeight = outputSize >> 16; + + auto srcFramebuffer = getColorRenderTarget(inputAddr, inputFormat, inputWidth, outputHeight); + nextRenderPassName = "Clear before display transfer"; + clearColor(nullptr, srcFramebuffer->texture); + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight); + + if (verticalFlip) { + std::swap(srcRect.bottom, srcRect.top); + } + + // Apply scaling for the destination rectangle. + if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { + outputWidth >>= 1; + } + + if (scaling == PICA::Scaling::XY) { + outputHeight >>= 1; + } + + auto destFramebuffer = getColorRenderTarget(outputAddr, outputFormat, outputWidth, outputHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight); + + if (inputWidth != outputWidth) { + // Helpers::warn("Strided display transfer is not handled correctly!\n"); + } + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + // Texture copy size is aligned to 16 byte units + const u32 copySize = totalBytes & ~0xf; + if (copySize == 0) { + Helpers::warn("TextureCopy total bytes less than 16!\n"); + return; + } + + // The width and gap are provided in 16-byte units. + const u32 inputWidth = (inputSize & 0xffff) << 4; + const u32 inputGap = (inputSize >> 16) << 4; + const u32 outputWidth = (outputSize & 0xffff) << 4; + const u32 outputGap = (outputSize >> 16) << 4; + + if (inputGap != 0 || outputGap != 0) { + // Helpers::warn("Strided texture copy\n"); + } + + if (inputWidth != outputWidth) { + Helpers::warn("Input width does not match output width, cannot accelerate texture copy!"); + return; + } + + // Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine. + // Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or + // the width multiplied by eight (because tiles are stored linearly in memory). + // To properly accelerate this we must examine each surface individually. For now we assume the most common case + // of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting + // that surface is not correct. + + // We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it + // by eight * sizePerPixel(RGBA8) to convert it to a useable width. + const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8); + const u32 copyStride = (inputWidth + inputGap) / (8 * bpp); + const u32 copyWidth = inputWidth / (8 * bpp); + + // inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region + // in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result + // is the number of vertical tiles so multiply that by eight to get the actual copy height. + u32 copyHeight; + if (inputWidth != 0) [[likely]] { + copyHeight = (copySize / inputWidth) * 8; + } else { + copyHeight = 0; + } + + // Find the source surface. + auto srcFramebuffer = getColorRenderTarget(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); + if (!srcFramebuffer) { + Helpers::warn("RendererMTL::TextureCopy failed to locate src framebuffer!\n"); + return; + } + nextRenderPassName = "Clear before texture copy"; + clearColor(nullptr, srcFramebuffer->texture); + + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight); + + // Assume the destination surface has the same format. Unless the surfaces have the same block width, + // texture copy does not make sense. + auto destFramebuffer = getColorRenderTarget(outputAddr, srcFramebuffer->format, copyWidth, copyHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight); + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::drawVertices(PICA::PrimType primType, std::span vertices) { + // Color + auto colorRenderTarget = getColorRenderTarget(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); + + // Depth stencil + const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; + const bool depthStencilWrite = regs[PICA::InternalRegs::DepthBufferWrite]; + const bool depthEnable = depthControl & 0x1; + const bool depthWriteEnable = Helpers::getBit<12>(depthControl); + const u8 depthFunc = Helpers::getBits<4, 3>(depthControl); + const u8 colorMask = Helpers::getBits<8, 4>(depthControl); + + Metal::DepthStencilHash depthStencilHash; + depthStencilHash.stencilConfig = regs[PICA::InternalRegs::StencilTest]; + depthStencilHash.stencilOpConfig = regs[PICA::InternalRegs::StencilOp]; + depthStencilHash.depthStencilWrite = false; + depthStencilHash.depthFunc = 1; + const bool stencilEnable = Helpers::getBit<0>(depthStencilHash.stencilConfig); + + std::optional depthStencilRenderTarget = std::nullopt; + if (depthEnable) { + depthStencilHash.depthStencilWrite = depthWriteEnable && depthStencilWrite; + depthStencilHash.depthFunc = depthFunc; + depthStencilRenderTarget = getDepthRenderTarget(); + } else { + if (depthWriteEnable) { + depthStencilHash.depthStencilWrite = true; + depthStencilRenderTarget = getDepthRenderTarget(); + } else if (stencilEnable) { + depthStencilRenderTarget = getDepthRenderTarget(); + } + } + + // Depth uniforms + struct { + float depthScale; + float depthOffset; + bool depthMapEnable; + } depthUniforms; + depthUniforms.depthScale = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + depthUniforms.depthOffset = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + + // -------- Pipeline -------- + Metal::DrawPipelineHash pipelineHash; + pipelineHash.colorFmt = colorRenderTarget->format; + if (depthStencilRenderTarget) { + pipelineHash.depthFmt = depthStencilRenderTarget->format; + } else { + pipelineHash.depthFmt = DepthFmt::Unknown1; + } + pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; + pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; + pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; + pipelineHash.fragHash.alphaControl = regs[0x104]; + + // Blending and logic op + pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; + pipelineHash.colorWriteMask = colorMask; + + u8 logicOp = 3; // Copy + if (pipelineHash.blendEnabled) { + pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; + } else { + logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); + } + + MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash); + + // Depth stencil state + MTL::DepthStencilState* depthStencilState = depthStencilCache.get(depthStencilHash); + + // -------- Render -------- + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture); + if (depthStencilRenderTarget) { + if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { + if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + } + } + + nextRenderPassName = "Draw vertices"; + beginRenderPassIfNeeded( + renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr) + ); + + // Update the LUT texture if necessary + if (gpu.lightingLUTDirty) { + updateLightingLUT(renderCommandEncoder); + } + if (gpu.fogLUTDirty) { + updateFogLUT(renderCommandEncoder); + } + + commandEncoder.setRenderPipelineState(pipeline); + commandEncoder.setDepthStencilState(depthStencilState); + // If size is < 4KB, use inline vertex data, otherwise use a buffer + if (vertices.size_bytes() < 4 * 1024) { + renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); + } else { + Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); + } + + // Viewport + const u32 viewportX = regs[PICA::InternalRegs::ViewportXY] & 0x3ff; + const u32 viewportY = (regs[PICA::InternalRegs::ViewportXY] >> 16) & 0x3ff; + const u32 viewportWidth = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f; + const u32 viewportHeight = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f; + const auto rect = colorRenderTarget->getSubRect(colourBufferLoc, fbSize[0], fbSize[1]); + MTL::Viewport viewport{double(rect.left + viewportX), double(rect.bottom + viewportY), double(viewportWidth), double(viewportHeight), 0.0, 1.0}; + renderCommandEncoder->setViewport(viewport); + + // Blend color + if (pipelineHash.blendEnabled) { + u32 constantColor = regs[PICA::InternalRegs::BlendColour]; + const u8 r = constantColor & 0xff; + const u8 g = Helpers::getBits<8, 8>(constantColor); + const u8 b = Helpers::getBits<16, 8>(constantColor); + const u8 a = Helpers::getBits<24, 8>(constantColor); + + renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); + } + + // Stencil reference + if (stencilEnable) { + const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value + renderCommandEncoder->setStencilReferenceValue(reference); + } + + // Bind resources + setupTextureEnvState(renderCommandEncoder); + bindTexturesToSlots(); + renderCommandEncoder->setVertexBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); + renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); + u32 lutSlices[2] = {lutLightingTexture->getCurrentIndex(), lutFogTexture->getCurrentIndex()}; + renderCommandEncoder->setFragmentBytes(&lutSlices, sizeof(lutSlices), 3); + + renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); +} + +void RendererMTL::screenshot(const std::string& name) { + // TODO: implement + Helpers::warn("RendererMTL::screenshot not implemented"); +} + +void RendererMTL::deinitGraphicsContext() { + reset(); + + delete lutLightingTexture; + delete lutFogTexture; + + // copyToLutTexturePipeline->release(); + displayPipeline->release(); + defaultDepthStencilState->release(); + nullTexture->release(); + linearSampler->release(); + nearestSampler->release(); + library->release(); + commandQueue->release(); + device->release(); +} + +std::optional RendererMTL::getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound +) { + // Try to find an already existing buffer that contains the provided address + // This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to + // subrect of a surface and in case of texcopy we don't know the format of the surface. + auto buffer = colorRenderTargetCache.findFromAddress(addr); + if (buffer.has_value()) { + return buffer.value().get(); + } + + if (!createIfnotFound) { + return std::nullopt; + } + + // Otherwise create and cache a new buffer. + Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height); + auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); + + // Clear the color buffer + colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; + return colorBuffer; +} + +Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { + Metal::DepthStencilRenderTarget sampleBuffer(device, depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]); + auto buffer = depthStencilRenderTargetCache.find(sampleBuffer); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer); + + // Clear the depth buffer + depthClearOps[depthBuffer.texture] = 0.0f; + if (depthBuffer.format == DepthFmt::Depth24Stencil8) { + stencilClearOps[depthBuffer.texture] = 0; + } + + return depthBuffer; + } +} + +Metal::Texture& RendererMTL::getTexture(Metal::Texture& tex) { + auto buffer = textureCache.find(tex); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + const auto textureData = std::span{gpu.getPointerPhys(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory + Metal::Texture& newTex = textureCache.add(tex); + newTex.decodeTexture(textureData); + + return newTex; + } +} + +void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + struct { + u32 textureEnvSourceRegs[6]; + u32 textureEnvOperandRegs[6]; + u32 textureEnvCombinerRegs[6]; + u32 textureEnvScaleRegs[6]; + } envState; + u32 textureEnvColourRegs[6]; + + for (int i = 0; i < 6; i++) { + const u32 ioBase = ioBases[i]; + + envState.textureEnvSourceRegs[i] = regs[ioBase]; + envState.textureEnvOperandRegs[i] = regs[ioBase + 1]; + envState.textureEnvCombinerRegs[i] = regs[ioBase + 2]; + textureEnvColourRegs[i] = regs[ioBase + 3]; + envState.textureEnvScaleRegs[i] = regs[ioBase + 4]; + } + + encoder->setVertexBytes(&textureEnvColourRegs, sizeof(textureEnvColourRegs), 1); + encoder->setFragmentBytes(&envState, sizeof(envState), 1); +} + +void RendererMTL::bindTexturesToSlots() { + static constexpr std::array ioBases = { + PICA::InternalRegs::Tex0BorderColor, + PICA::InternalRegs::Tex1BorderColor, + PICA::InternalRegs::Tex2BorderColor, + }; + + for (int i = 0; i < 3; i++) { + if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { + commandEncoder.setFragmentTexture(nullTexture, i); + commandEncoder.setFragmentSamplerState(nearestSampler, i); + continue; + } + + const size_t ioBase = ioBases[i]; + + const u32 dim = regs[ioBase + 1]; + const u32 config = regs[ioBase + 2]; + const u32 height = dim & 0x7ff; + const u32 width = Helpers::getBits<16, 11>(dim); + const u32 addr = (regs[ioBase + 4] & 0x0FFFFFFF) << 3; + u32 format = regs[ioBase + (i == 0 ? 13 : 5)] & 0xF; + + if (addr != 0) [[likely]] { + Metal::Texture targetTex(device, addr, static_cast(format), width, height, config); + auto tex = getTexture(targetTex); + commandEncoder.setFragmentTexture(tex.texture, i); + commandEncoder.setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); + } else { + // TODO: Bind a blank texture here. Some games, like Pokemon X, will render with a texture bound to nullptr, triggering GPU open bus + // Binding a blank texture makes all of those games look normal + } + } +} + +void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { + gpu.lightingLUTDirty = false; + + std::array lightingLut; + + for (int i = 0; i < gpu.lightingLUT.size(); i++) { + uint64_t value = gpu.lightingLUT[i] & 0xFFF; + lightingLut[i] = (value << 4); + } + + u32 index = lutLightingTexture->getNextIndex(); + lutLightingTexture->getTexture()->replaceRegion( + MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0 + ); +} + +void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { + gpu.fogLUTDirty = false; + + std::array fogLut = {0.0f}; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + u32 index = lutFogTexture->getNextIndex(); + lutFogTexture->getTexture()->replaceRegion(MTL::Region(0, 0, FOG_LUT_TEXTURE_WIDTH, 1), 0, index, fogLut.data(), 0, 0); +} + +void RendererMTL::textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect +) { + nextRenderPassName = "Texture copy"; + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole + // texture + bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); + beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture); + + // Pipeline + Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1}; + auto blitPipeline = blitPipelineCache.get(hash); + + commandEncoder.setRenderPipelineState(blitPipeline); + + // Viewport + renderCommandEncoder->setViewport(MTL::Viewport{ + double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); + + float srcRectNDC[4] = { + srcRect.left / (float)srcFramebuffer.size.u(), + srcRect.bottom / (float)srcFramebuffer.size.v(), + (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), + (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v(), + }; + + // Bind resources + renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); + renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, GET_HELPER_TEXTURE_BINDING(0)); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, GET_HELPER_SAMPLER_STATE_BINDING(0)); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); +} + +void RendererMTL::beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture +) { + createCommandBufferIfNeeded(); + + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || + (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); + + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + commandEncoder.newRenderCommandEncoder(renderCommandEncoder); + + // Bind persistent resources + + // LUT texture + renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); + renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); + renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); + + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; + } + + renderPassDescriptor->release(); +} diff --git a/src/core/services/ac.cpp b/src/core/services/ac.cpp index 8f5545fe..f3cb5e83 100644 --- a/src/core/services/ac.cpp +++ b/src/core/services/ac.cpp @@ -1,4 +1,5 @@ #include "services/ac.hpp" + #include "ipc.hpp" namespace ACCommands { @@ -10,6 +11,7 @@ namespace ACCommands { GetStatus = 0x000C0000, GetWifiStatus = 0x000D0000, GetConnectingInfraPriority = 0x000F0000, + GetNZoneBeaconNotFoundEvent = 0x002F0004, RegisterDisconnectEvent = 0x00300004, IsConnected = 0x003E0042, SetClientVersion = 0x00400042, @@ -29,12 +31,17 @@ void ACService::handleSyncRequest(u32 messagePointer) { case ACCommands::CreateDefaultConfig: createDefaultConfig(messagePointer); break; case ACCommands::GetConnectingInfraPriority: getConnectingInfraPriority(messagePointer); break; case ACCommands::GetLastErrorCode: getLastErrorCode(messagePointer); break; + case ACCommands::GetNZoneBeaconNotFoundEvent: getNZoneBeaconNotFoundEvent(messagePointer); break; case ACCommands::GetStatus: getStatus(messagePointer); break; case ACCommands::GetWifiStatus: getWifiStatus(messagePointer); break; case ACCommands::IsConnected: isConnected(messagePointer); break; case ACCommands::RegisterDisconnectEvent: registerDisconnectEvent(messagePointer); break; case ACCommands::SetClientVersion: setClientVersion(messagePointer); break; - default: Helpers::panic("AC service requested. Command: %08X\n", command); + + default: + mem.write32(messagePointer + 4, Result::Success); + Helpers::warn("AC service requested. Command: %08X\n", command); + break; } } @@ -72,7 +79,7 @@ void ACService::getLastErrorCode(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x0A, 2, 0)); mem.write32(messagePointer + 4, Result::Success); - mem.write32(messagePointer + 8, 0); // Hopefully this means no error? + mem.write32(messagePointer + 8, 0); // Hopefully this means no error? } void ACService::getConnectingInfraPriority(u32 messagePointer) { @@ -136,4 +143,13 @@ void ACService::registerDisconnectEvent(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x30, 1, 0)); mem.write32(messagePointer + 4, Result::Success); +} + +void ACService::getNZoneBeaconNotFoundEvent(u32 messagePointer) { + const u32 processID = mem.read32(messagePointer + 8); + const Handle event = mem.read32(messagePointer + 16); + log("AC::GetNZoneBeaconNotFoundEvent (process ID = %X, event = %X) (stubbed)\n", processID, event); + + mem.write32(messagePointer, IPC::responseHeader(0x2F, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); } \ No newline at end of file diff --git a/src/core/services/boss.cpp b/src/core/services/boss.cpp index cb8fdf81..cf9cbe36 100644 --- a/src/core/services/boss.cpp +++ b/src/core/services/boss.cpp @@ -1,4 +1,5 @@ #include "services/boss.hpp" + #include "ipc.hpp" namespace BOSSCommands { @@ -25,27 +26,36 @@ namespace BOSSCommands { GetTaskState = 0x00200082, GetTaskStatus = 0x002300C2, GetTaskInfo = 0x00250082, + DeleteNsData = 0x00260040, + GetNsDataHeaderInfo = 0x002700C2, + ReadNsData = 0x00280102, + GetNsDataLastUpdated = 0x002D0040, GetErrorCode = 0x002E0040, RegisterStorageEntry = 0x002F0140, GetStorageEntryInfo = 0x00300000, + StartBgImmediate = 0x00330042, + InitializeSessionPrivileged = 0x04010082, + GetAppNewFlag = 0x04040080, + SetAppNewFlag = 0x040500C0, // Probably }; } -void BOSSService::reset() { - optoutFlag = 0; -} +void BOSSService::reset() { optoutFlag = 0; } void BOSSService::handleSyncRequest(u32 messagePointer) { const u32 command = mem.read32(messagePointer); switch (command) { case BOSSCommands::CancelTask: cancelTask(messagePointer); break; + case BOSSCommands::DeleteNsData: deleteNsData(messagePointer); break; + case BOSSCommands::GetAppNewFlag: getAppNewFlag(messagePointer); break; case BOSSCommands::GetErrorCode: getErrorCode(messagePointer); break; + case BOSSCommands::GetNsDataHeaderInfo: getNsDataHeaderInfo(messagePointer); break; case BOSSCommands::GetNewArrivalFlag: getNewArrivalFlag(messagePointer); break; case BOSSCommands::GetNsDataIdList: case BOSSCommands::GetNsDataIdList1: case BOSSCommands::GetNsDataIdList2: - case BOSSCommands::GetNsDataIdList3: - getNsDataIdList(messagePointer, command); break; + case BOSSCommands::GetNsDataIdList3: getNsDataIdList(messagePointer, command); break; + case BOSSCommands::GetNsDataLastUpdated: getNsDataLastUpdated(messagePointer); break; case BOSSCommands::GetOptoutFlag: getOptoutFlag(messagePointer); break; case BOSSCommands::GetStorageEntryInfo: getStorageEntryInfo(messagePointer); break; case BOSSCommands::GetTaskIdList: getTaskIdList(messagePointer); break; @@ -54,17 +64,31 @@ void BOSSService::handleSyncRequest(u32 messagePointer) { case BOSSCommands::GetTaskState: getTaskState(messagePointer); break; case BOSSCommands::GetTaskStatus: getTaskStatus(messagePointer); break; case BOSSCommands::GetTaskStorageInfo: getTaskStorageInfo(messagePointer); break; - case BOSSCommands::InitializeSession: initializeSession(messagePointer); break; + case BOSSCommands::InitializeSession: + case BOSSCommands::InitializeSessionPrivileged: initializeSession(messagePointer); break; + case BOSSCommands::ReadNsData: readNsData(messagePointer); break; case BOSSCommands::ReceiveProperty: receiveProperty(messagePointer); break; case BOSSCommands::RegisterNewArrivalEvent: registerNewArrivalEvent(messagePointer); break; case BOSSCommands::RegisterStorageEntry: registerStorageEntry(messagePointer); break; case BOSSCommands::RegisterTask: registerTask(messagePointer); break; case BOSSCommands::SendProperty: sendProperty(messagePointer); break; + case BOSSCommands::SetAppNewFlag: setAppNewFlag(messagePointer); break; case BOSSCommands::SetOptoutFlag: setOptoutFlag(messagePointer); break; + case BOSSCommands::StartBgImmediate: startBgImmediate(messagePointer); break; case BOSSCommands::StartTask: startTask(messagePointer); break; case BOSSCommands::UnregisterStorage: unregisterStorage(messagePointer); break; case BOSSCommands::UnregisterTask: unregisterTask(messagePointer); break; - default: Helpers::panic("BOSS service requested. Command: %08X\n", command); + + case 0x04500102: // Home Menu uses this command, what is this? + Helpers::warn("BOSS command 0x04500102"); + mem.write32(messagePointer, IPC::responseHeader(0x450, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); + break; + + default: + mem.write32(messagePointer + 4, Result::Success); + Helpers::warn("BOSS service requested. Command: %08X\n", command); + break; } } @@ -99,7 +123,7 @@ void BOSSService::getTaskState(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); mem.write8(messagePointer + 8, 0); // TaskStatus: Report the task finished successfully mem.write32(messagePointer + 12, 0); // Current state value for task PropertyID 0x4 - mem.write8(messagePointer + 16, 0); // TODO: Figure out what this should be + mem.write8(messagePointer + 16, 0); // TODO: Figure out what this should be } void BOSSService::getTaskStatus(u32 messagePointer) { @@ -150,15 +174,15 @@ void BOSSService::getErrorCode(u32 messagePointer) { log("BOSS::GetErrorCode (stubbed)\n"); mem.write32(messagePointer, IPC::responseHeader(0x2E, 2, 0)); mem.write32(messagePointer + 4, Result::Success); - mem.write32(messagePointer + 8, Result::Success); // No error code + mem.write32(messagePointer + 8, Result::Success); // No error code } void BOSSService::getStorageEntryInfo(u32 messagePointer) { log("BOSS::GetStorageEntryInfo (undocumented)\n"); mem.write32(messagePointer, IPC::responseHeader(0x30, 3, 0)); mem.write32(messagePointer + 4, Result::Success); - mem.write32(messagePointer + 8, 0); // u32, unknown meaning - mem.write16(messagePointer + 12, 0); // s16, unknown meaning + mem.write32(messagePointer + 8, 0); // u32, unknown meaning + mem.write16(messagePointer + 12, 0); // s16, unknown meaning } void BOSSService::sendProperty(u32 messagePointer) { @@ -173,7 +197,6 @@ void BOSSService::sendProperty(u32 messagePointer) { // TODO: Should this do anything else? } - void BOSSService::receiveProperty(u32 messagePointer) { const u32 id = mem.read32(messagePointer + 4); const u32 size = mem.read32(messagePointer + 8); @@ -182,13 +205,13 @@ void BOSSService::receiveProperty(u32 messagePointer) { log("BOSS::ReceiveProperty (id = %d, size = %08X, ptr = %08X) (stubbed)\n", id, size, ptr); mem.write32(messagePointer, IPC::responseHeader(0x16, 2, 2)); mem.write32(messagePointer + 4, Result::Success); - mem.write32(messagePointer + 8, 0); // Read size + mem.write32(messagePointer + 8, 0); // Read size } // This seems to accept a KEvent as a parameter and register it for something Spotpass related // I need to update the 3DBrew page when it's known what it does properly void BOSSService::registerNewArrivalEvent(u32 messagePointer) { - const Handle eventHandle = mem.read32(messagePointer + 4); // Kernel event handle to register + const Handle eventHandle = mem.read32(messagePointer + 4); // Kernel event handle to register log("BOSS::RegisterNewArrivalEvent (handle = %X)\n", eventHandle); mem.write32(messagePointer, IPC::responseHeader(0x8, 1, 0)); @@ -252,5 +275,92 @@ void BOSSService::getNewArrivalFlag(u32 messagePointer) { log("BOSS::GetNewArrivalFlag (stubbed)\n"); mem.write32(messagePointer, IPC::responseHeader(0x7, 2, 0)); mem.write32(messagePointer + 4, Result::Success); - mem.write8(messagePointer + 8, 0); // Flag + mem.write8(messagePointer + 8, 0); // Flag +} + +void BOSSService::startBgImmediate(u32 messagePointer) { + const u32 size = mem.read32(messagePointer + 8); + const u32 taskIDs = mem.read32(messagePointer + 12); + log("BOSS::StartBgImmediate (size = %X, task ID pointer = %X) (stubbed)\n", size, taskIDs); + + mem.write32(messagePointer, IPC::responseHeader(0x33, 1, 2)); + mem.write32(messagePointer + 4, Result::Success); + mem.write32(messagePointer + 8, IPC::pointerHeader(0, size, IPC::BufferType::Send)); + mem.write32(messagePointer + 12, taskIDs); +} + +void BOSSService::getAppNewFlag(u32 messagePointer) { + const u64 appID = mem.read64(messagePointer + 4); + log("BOSS::GetAppNewFlag (app ID = %llX)\n", appID); + + mem.write32(messagePointer, IPC::responseHeader(0x404, 2, 0)); + mem.write32(messagePointer + 4, Result::Success); + mem.write8(messagePointer + 8, 0); // No new content +} + +void BOSSService::getNsDataHeaderInfo(u32 messagePointer) { + const u32 nsDataID = mem.read32(messagePointer + 4); + const u8 type = mem.read8(messagePointer + 8); + const u32 size = mem.read32(messagePointer + 12); + const u32 nsDataHeaderInfo = mem.read32(messagePointer + 20); + log("BOSS::GetNsDataHeaderInfo (NS data ID = %X, type = %X, size = %X, NS data header info pointer = %X) (stubbed)\n", nsDataID, type, size, + nsDataHeaderInfo); + + switch (type) { + case 3: + case 5: mem.write32(nsDataHeaderInfo, 0); break; // ?? + + default: Helpers::panic("Unimplemented NS data header info type %X", type); + } + + mem.write32(messagePointer, IPC::responseHeader(0x27, 1, 2)); + mem.write32(messagePointer + 4, Result::Success); + mem.write32(messagePointer + 8, IPC::pointerHeader(0, size, IPC::BufferType::Receive)); + mem.write32(messagePointer + 12, nsDataHeaderInfo); +} + +void BOSSService::getNsDataLastUpdated(u32 messagePointer) { + const u32 nsDataID = mem.read32(messagePointer + 4); + log("BOSS::GetNsDataLastUpdated (NS data ID = %X) (stubbed)\n", nsDataID); + + mem.write32(messagePointer, IPC::responseHeader(0x2D, 3, 0)); + mem.write32(messagePointer + 4, Result::Success); + mem.write64(messagePointer + 8, 0); // Milliseconds since last update? +} + +void BOSSService::readNsData(u32 messagePointer) { + const u32 nsDataID = mem.read32(messagePointer + 4); + const s64 offset = mem.read64(messagePointer + 8); + const u32 size = mem.read32(messagePointer + 20); + const u32 data = mem.read32(messagePointer + 24); + log("BOSS::ReadNsData (NS data ID = %X, offset = %llX, size = %X, data pointer = %X) (stubbed)\n", nsDataID, offset, size, data); + + for (u32 i = 0; i < size; i++) { + mem.write8(data + i, 0); + } + + mem.write32(messagePointer, IPC::responseHeader(0x28, 3, 2)); + mem.write32(messagePointer + 4, Result::Success); + mem.write32(messagePointer + 8, size); // Technically how many bytes have been read + mem.write32(messagePointer + 12, 0); // ?? + mem.write32(messagePointer + 16, IPC::pointerHeader(0, size, IPC::BufferType::Receive)); + mem.write32(messagePointer + 20, data); +} + +void BOSSService::deleteNsData(u32 messagePointer) { + const u32 nsDataID = mem.read32(messagePointer + 4); + log("BOSS::DeleteNsData (NS data ID = %X) (stubbed)\n", nsDataID); + + mem.write32(messagePointer, IPC::responseHeader(0x26, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); +} + +// Judging by the inputs and command number, this could very well be a "SetAppNewFlag" +void BOSSService::setAppNewFlag(u32 messagePointer) { + const u64 appID = mem.read64(messagePointer + 4); + const u8 flag = mem.read32(messagePointer + 12); + log("BOSS::SetAppNewFlag (app ID = %llX, flag = %X)\n", appID, flag); + + mem.write32(messagePointer, IPC::responseHeader(0x405, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); } \ No newline at end of file diff --git a/src/core/services/cfg.cpp b/src/core/services/cfg.cpp index 0b5f6437..a9b80472 100644 --- a/src/core/services/cfg.cpp +++ b/src/core/services/cfg.cpp @@ -17,40 +17,80 @@ namespace CFGCommands { GetRegionCanadaUSA = 0x00040000, GetSystemModel = 0x00050000, TranslateCountryInfo = 0x00080080, - GetCountryCodeID = 0x000A0040, + GetCountryCodeID = 0x000A0040, + IsFangateSupported = 0x000B0000, + SetConfigInfoBlk4 = 0x04020082, + UpdateConfigNANDSavegame = 0x04030000, GetLocalFriendCodeSeed = 0x04050000, SecureInfoGetByte101 = 0x04070000, }; } +// cfg:i commands +namespace CFGICommands { + enum : u32 { + GetConfigInfoBlk8 = 0x08010082, + }; +} + +// cfg:nor commands +namespace NORCommands { + enum : u32 { + Initialize = 0x00010040, + ReadData = 0x00050082, + }; +} + void CFGService::reset() {} void CFGService::handleSyncRequest(u32 messagePointer, CFGService::Type type) { const u32 command = mem.read32(messagePointer); - switch (command) { - case CFGCommands::GetConfigInfoBlk2: [[likely]] getConfigInfoBlk2(messagePointer); break; - case CFGCommands::GetCountryCodeID: getCountryCodeID(messagePointer); break; - case CFGCommands::GetRegionCanadaUSA: getRegionCanadaUSA(messagePointer); break; - case CFGCommands::GetSystemModel: getSystemModel(messagePointer); break; - case CFGCommands::GenHashConsoleUnique: genUniqueConsoleHash(messagePointer); break; - case CFGCommands::SecureInfoGetRegion: secureInfoGetRegion(messagePointer); break; - case CFGCommands::TranslateCountryInfo: translateCountryInfo(messagePointer); break; - default: - if (type == Type::S) { - // cfg:s-only functions - switch (command) { - case CFGCommands::GetConfigInfoBlk8: getConfigInfoBlk8(messagePointer); break; - case CFGCommands::GetLocalFriendCodeSeed: getLocalFriendCodeSeed(messagePointer); break; - case CFGCommands::SecureInfoGetByte101: secureInfoGetByte101(messagePointer); break; - default: Helpers::panic("CFG:S service requested. Command: %08X\n", command); + if (type != Type::NOR) { + switch (command) { + case CFGCommands::GetConfigInfoBlk2: [[likely]] getConfigInfoBlk2(messagePointer); break; + case CFGCommands::GetCountryCodeID: getCountryCodeID(messagePointer); break; + case CFGCommands::GetRegionCanadaUSA: getRegionCanadaUSA(messagePointer); break; + case CFGCommands::GetSystemModel: getSystemModel(messagePointer); break; + case CFGCommands::GenHashConsoleUnique: genUniqueConsoleHash(messagePointer); break; + case CFGCommands::IsFangateSupported: isFangateSupported(messagePointer); break; + case CFGCommands::SecureInfoGetRegion: secureInfoGetRegion(messagePointer); break; + case CFGCommands::TranslateCountryInfo: translateCountryInfo(messagePointer); break; + + default: + if (type == Type::S) { + // cfg:s (and cfg:i) functions only functions + switch (command) { + case CFGCommands::GetConfigInfoBlk8: getConfigInfoBlk8(messagePointer, command); break; + case CFGCommands::GetLocalFriendCodeSeed: getLocalFriendCodeSeed(messagePointer); break; + case CFGCommands::SecureInfoGetByte101: secureInfoGetByte101(messagePointer); break; + case CFGCommands::SetConfigInfoBlk4: setConfigInfoBlk4(messagePointer); break; + case CFGCommands::UpdateConfigNANDSavegame: updateConfigNANDSavegame(messagePointer); break; + + default: Helpers::panic("CFG:S service requested. Command: %08X\n", command); + } + } else if (type == Type::I) { + switch (command) { + case CFGCommands::GetConfigInfoBlk8: + case CFGICommands::GetConfigInfoBlk8: getConfigInfoBlk8(messagePointer, command); break; + + default: Helpers::panic("CFG:I service requested. Command: %08X\n", command); + } + } else { + Helpers::panic("CFG service requested. Command: %08X\n", command); } - } else { - Helpers::panic("CFG service requested. Command: %08X\n", command); - } - break; + break; + } + } else { + // cfg:nor functions + switch (command) { + case NORCommands::Initialize: norInitialize(messagePointer); break; + case NORCommands::ReadData: norReadData(messagePointer); break; + + default: Helpers::panic("CFG:NOR service requested. Command: %08X\n", command); + } } } @@ -84,14 +124,14 @@ void CFGService::getConfigInfoBlk2(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); } -void CFGService::getConfigInfoBlk8(u32 messagePointer) { +void CFGService::getConfigInfoBlk8(u32 messagePointer, u32 commandWord) { u32 size = mem.read32(messagePointer + 4); u32 blockID = mem.read32(messagePointer + 8); u32 output = mem.read32(messagePointer + 16); // Pointer to write the output data to log("CFG::GetConfigInfoBlk8 (size = %X, block ID = %X, output pointer = %08X\n", size, blockID, output); getConfigInfo(output, blockID, size, 0x8); - mem.write32(messagePointer, IPC::responseHeader(0x401, 1, 2)); + mem.write32(messagePointer, IPC::responseHeader(commandWord >> 16, 1, 2)); mem.write32(messagePointer + 4, Result::Success); } @@ -100,7 +140,7 @@ void CFGService::getConfigInfo(u32 output, u32 blockID, u32 size, u32 permission if (size == 1 && blockID == 0x70001) { // Sound output mode mem.write8(output, static_cast(DSPService::SoundOutputMode::Stereo)); } else if (size == 1 && blockID == 0xA0002) { // System language - mem.write8(output, static_cast(LanguageCodes::English)); + mem.write8(output, static_cast(settings.systemLanguage)); } else if (size == 4 && blockID == 0xB0000) { // Country info mem.write8(output, 0); // Unknown mem.write8(output + 1, 0); // Unknown @@ -160,6 +200,37 @@ void CFGService::getConfigInfo(u32 output, u32 blockID, u32 size, u32 permission mem.write32(output, 0); } else if (size == 8 && blockID == 0x00090000) { mem.write64(output, 0); // Some sort of key used with nwm::UDS::InitializeWithVersion + } else if (size == 4 && blockID == 0x110000) { + mem.write32(output, 1); // According to 3Dbrew, 0 means system setup is required + } else if (size == 2 && blockID == 0x50001) { + // Backlight controls. Values taken from Citra + mem.write8(output, 0); + mem.write8(output + 1, 2); + } else if (size == 8 && blockID == 0x50009) { + // N3DS Backlight controls? + mem.write64(output, 0); + } else if (size == 4 && blockID == 0x180000) { + // Infrared LED related? + mem.write32(output, 0); + } else if (size == 1 && blockID == 0xE0000) { + mem.write8(output, 0); + } else if ((size == 512 && blockID == 0xC0002) || (size == 148 && blockID == 0x100001)) { + // CTR parental controls block (0xC0002) and TWL parental controls block (0x100001) + for (u32 i = 0; i < size; i++) { + mem.write8(output + i, 0); + } + } else if (size == 2 && blockID == 0x100000) { + // EULA agreed + mem.write8(output, 1); // We have agreed to the EULA + mem.write8(output + 1, 1); // EULA version = 1 + } else if (size == 1 && blockID == 0x100002) { + Helpers::warn("Unimplemented TWL country code access"); + mem.write8(output, 0); + } else if (size == 24 && blockID == 0x180001) { + // QTM calibration data + for (u32 i = 0; i < size; i++) { + mem.write8(output + i, 0); + } } else { Helpers::panic("Unhandled GetConfigInfoBlk2 configuration. Size = %d, block = %X", size, blockID); } @@ -260,6 +331,25 @@ void CFGService::getLocalFriendCodeSeed(u32 messagePointer) { mem.write64(messagePointer + 8, 0); } +void CFGService::setConfigInfoBlk4(u32 messagePointer) { + u32 blockID = mem.read32(messagePointer + 4); + u32 size = mem.read32(messagePointer + 8); + u32 input = mem.read32(messagePointer + 16); + log("CFG::SetConfigInfoBlk4 (block ID = %X, size = %X, input pointer = %08X)\n", blockID, size, input); + + mem.write32(messagePointer, IPC::responseHeader(0x401, 1, 2)); + mem.write32(messagePointer + 4, Result::Success); + mem.write32(messagePointer + 8, IPC::pointerHeader(0, size, IPC::BufferType::Receive)); + mem.write32(messagePointer + 12, input); +} + +void CFGService::updateConfigNANDSavegame(u32 messagePointer) { + log("CFG::UpdateConfigNANDSavegame\n"); + + mem.write32(messagePointer, IPC::responseHeader(0x403, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); +} + // https://www.3dbrew.org/wiki/Cfg:TranslateCountryInfo void CFGService::translateCountryInfo(u32 messagePointer) { const u32 country = mem.read32(messagePointer + 4); @@ -292,4 +382,28 @@ void CFGService::translateCountryInfo(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x8, 2, 0)); mem.write32(messagePointer + 4, Result::Success); mem.write32(messagePointer + 8, result); +} + +void CFGService::isFangateSupported(u32 messagePointer) { + log("CFG::IsFangateSupported\n"); + + // TODO: What even is fangate? + mem.write32(messagePointer, IPC::responseHeader(0xB, 2, 0)); + mem.write32(messagePointer + 4, Result::Success); + mem.write32(messagePointer + 8, 1); +} + +void CFGService::norInitialize(u32 messagePointer) { + log("CFG::NOR::Initialize\n"); + + mem.write32(messagePointer, IPC::responseHeader(0x1, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); +} + +void CFGService::norReadData(u32 messagePointer) { + log("CFG::NOR::ReadData\n"); + Helpers::warn("Unimplemented CFG::NOR::ReadData"); + + mem.write32(messagePointer, IPC::responseHeader(0x5, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); } \ No newline at end of file diff --git a/src/core/services/dsp.cpp b/src/core/services/dsp.cpp index 8c514761..93de78da 100644 --- a/src/core/services/dsp.cpp +++ b/src/core/services/dsp.cpp @@ -1,10 +1,18 @@ #include "services/dsp.hpp" -#include "ipc.hpp" -#include "kernel.hpp" + +#include +#include +#include #include +#include #include +#include "config.hpp" +#include "ipc.hpp" +#include "kernel.hpp" +#include "services/dsp_firmware_db.hpp" + namespace DSPCommands { enum : u32 { RecvData = 0x00010040, @@ -20,7 +28,8 @@ namespace DSPCommands { RegisterInterruptEvents = 0x00150082, GetSemaphoreEventHandle = 0x00160000, SetSemaphoreMask = 0x00170040, - GetHeadphoneStatus = 0x001F0000 + GetHeadphoneStatus = 0x001F0000, + ForceHeadphoneOut = 0x00200040, }; } @@ -34,6 +43,7 @@ namespace Result { void DSPService::reset() { totalEventCount = 0; semaphoreMask = 0; + headphonesInserted = true; semaphoreEvent = std::nullopt; interrupt0 = std::nullopt; @@ -52,6 +62,7 @@ void DSPService::handleSyncRequest(u32 messagePointer) { case DSPCommands::ConvertProcessAddressFromDspDram: convertProcessAddressFromDspDram(messagePointer); break; case DSPCommands::FlushDataCache: flushDataCache(messagePointer); break; case DSPCommands::InvalidateDataCache: invalidateDCache(messagePointer); break; + case DSPCommands::ForceHeadphoneOut: forceHeadphoneOut(messagePointer); break; case DSPCommands::GetHeadphoneStatus: getHeadphoneStatus(messagePointer); break; case DSPCommands::GetSemaphoreEventHandle: getSemaphoreEventHandle(messagePointer); break; case DSPCommands::LoadComponent: loadComponent(messagePointer); break; @@ -92,6 +103,10 @@ void DSPService::loadComponent(u32 messagePointer) { log("DSP::LoadComponent (size = %08X, program mask = %X, data mask = %X\n", size, programMask, dataMask); dsp->loadComponent(loadedComponent, programMask, dataMask); + if (config.printDSPFirmware) { + printFirmwareInfo(); + } + mem.write32(messagePointer, IPC::responseHeader(0x11, 2, 2)); mem.write32(messagePointer + 4, Result::Success); mem.write32(messagePointer + 8, 1); // Component loaded @@ -198,7 +213,8 @@ void DSPService::getHeadphoneStatus(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x1F, 2, 0)); mem.write32(messagePointer + 4, Result::Success); - mem.write32(messagePointer + 8, Result::HeadphonesInserted); // This should be toggleable for shits and giggles + // This should be toggleable for shits and giggles + mem.write32(messagePointer + 8, headphonesInserted ? Result::HeadphonesInserted : Result::HeadphonesNotInserted); } void DSPService::getSemaphoreEventHandle(u32 messagePointer) { @@ -266,6 +282,14 @@ void DSPService::invalidateDCache(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); } +void DSPService::forceHeadphoneOut(u32 messagePointer) { + headphonesInserted = mem.read8(messagePointer + 4) != 0; + + log("DSP::ForceHeadphoneOut\n"); + mem.write32(messagePointer, IPC::responseHeader(0x20, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); +} + DSPService::ComponentDumpResult DSPService::dumpComponent(const std::filesystem::path& path) { if (loadedComponent.empty()) { return ComponentDumpResult::NotLoaded; @@ -303,4 +327,44 @@ void DSPService::triggerInterrupt1() { if (interrupt1.has_value()) { kernel.signalEvent(*interrupt1); } -} \ No newline at end of file +} + +void DSPService::printFirmwareInfo() { + // No component has been loaded, do nothing. + if (!loadedComponent.size()) { + return; + } + + const auto& firmwareDB = DSP::firmwareDB; + const usize firmwareSize = loadedComponent.size(); + std::array hash; + + CryptoPP::SHA256 sha; + sha.CalculateDigest(hash.data(), loadedComponent.data(), firmwareSize); + + fmt::print("\nLoaded DSP firmware\n"); + fmt::print("Firmware SHA-256 hash: {:X}\n", fmt::join(hash, "")); + fmt::print("Size: {} bytes ({} KB)\n", firmwareSize, firmwareSize / 1024); + + bool knownFirmware = false; + + for (int i = 0; i < firmwareDB.size(); i++) { + const auto& entry = firmwareDB[i]; + if (entry.size == firmwareSize && std::memcmp(hash.data(), entry.hash.data(), hash.size()) == 0) { + knownFirmware = true; + fmt::print( + "Firmware found in DSP firmware DB.\nFeatures AAC decoder: {}\nOther notes: {}\n", entry.supportsAAC ? "yes" : "no", entry.notes + ); + + break; + } + } + + if (!knownFirmware) { + fmt::print("Firmware not found in DSP firmware DB.\nHash in case you want to add it to the DB: {{{:#X}}}\n", fmt::join(hash, ", ")); + // DSP firmwares that feature AAC decoding are usually around 210KB as opposed to the average DSP firmware which is around 48KB + fmt::print("Features AAC decoder: {}\n", firmwareSize >= 200_KB ? "probably yes" : "probably not"); + } + + fmt::print("\n"); +} diff --git a/src/core/services/fonts.cpp b/src/core/services/fonts.cpp new file mode 100644 index 00000000..ec4652ee --- /dev/null +++ b/src/core/services/fonts.cpp @@ -0,0 +1,109 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Adapted from https://github.com/PabloMK7/citra/blob/master/src/core/hle/service/apt/bcfnt/bcfnt.cpp + +#include "services/fonts.hpp" + +#include + +namespace HLE::Fonts { + void relocateSharedFont(u8* sharedFont, u32 newAddress) { + constexpr u32 sharedFontStartOffset = 0x80; + const u8* cfntData = &sharedFont[sharedFontStartOffset]; + + CFNT cfnt; + std::memcpy(&cfnt, cfntData, sizeof(cfnt)); + + u32 assumedCmapOffset = 0; + u32 assumedCwdhOffset = 0; + u32 assumedTglpOffset = 0; + u32 firstCmapOffset = 0; + u32 firstCwdhOffset = 0; + u32 firstTglpOffset = 0; + + // First discover the location of sections so that the rebase offset can be auto-detected + u32 currentOffset = sharedFontStartOffset + cfnt.headerSize; + for (uint block = 0; block < cfnt.numBlocks; ++block) { + const u8* data = &sharedFont[currentOffset]; + + SectionHeader sectionHeader; + std::memcpy(§ionHeader, data, sizeof(sectionHeader)); + + if (firstCmapOffset == 0 && std::memcmp(sectionHeader.magic, "CMAP", 4) == 0) { + firstCmapOffset = currentOffset; + } else if (firstCwdhOffset == 0 && std::memcmp(sectionHeader.magic, "CWDH", 4) == 0) { + firstCwdhOffset = currentOffset; + } else if (firstTglpOffset == 0 && std::memcmp(sectionHeader.magic, "TGLP", 4) == 0) { + firstTglpOffset = currentOffset; + } else if (std::memcmp(sectionHeader.magic, "FINF", 4) == 0) { + Fonts::FINF finf; + std::memcpy(&finf, data, sizeof(finf)); + + assumedCmapOffset = finf.cmapOffset - sizeof(SectionHeader); + assumedCwdhOffset = finf.cwdhOffset - sizeof(SectionHeader); + assumedTglpOffset = finf.tglpOffset - sizeof(SectionHeader); + } + + currentOffset += sectionHeader.sectionSize; + } + + u32 previousBase = assumedCmapOffset - firstCmapOffset; + if ((previousBase != assumedCwdhOffset - firstCwdhOffset) || (previousBase != assumedTglpOffset - firstTglpOffset)) { + Helpers::warn("You shouldn't be seeing this. Shared Font file offsets might be borked?"); + } + + u32 offset = newAddress - previousBase; + + // Reset pointer back to start of sections and do the actual rebase + currentOffset = sharedFontStartOffset + cfnt.headerSize; + for (uint block = 0; block < cfnt.numBlocks; ++block) { + u8* data = &sharedFont[currentOffset]; + + SectionHeader sectionHeader; + std::memcpy(§ionHeader, data, sizeof(sectionHeader)); + + if (std::memcmp(sectionHeader.magic, "FINF", 4) == 0) { + Fonts::FINF finf; + std::memcpy(&finf, data, sizeof(finf)); + + // Relocate the offsets in the FINF section + finf.cmapOffset += offset; + finf.cwdhOffset += offset; + finf.tglpOffset += offset; + + std::memcpy(data, &finf, sizeof(finf)); + } else if (std::memcmp(sectionHeader.magic, "CMAP", 4) == 0) { + Fonts::CMAP cmap; + std::memcpy(&cmap, data, sizeof(cmap)); + + // Relocate the offsets in the CMAP section + if (cmap.nextCmapOffset != 0) { + cmap.nextCmapOffset += offset; + } + + std::memcpy(data, &cmap, sizeof(cmap)); + } else if (std::memcmp(sectionHeader.magic, "CWDH", 4) == 0) { + Fonts::CWDH cwdh; + std::memcpy(&cwdh, data, sizeof(cwdh)); + + // Relocate the offsets in the CWDH section + if (cwdh.nextCwdhOffset != 0) { + cwdh.nextCwdhOffset += offset; + } + + std::memcpy(data, &cwdh, sizeof(cwdh)); + } else if (std::memcmp(sectionHeader.magic, "TGLP", 4) == 0) { + Fonts::TGLP tglp; + std::memcpy(&tglp, data, sizeof(tglp)); + + // Relocate the offsets in the TGLP section + tglp.sheetDataOffset += offset; + std::memcpy(data, &tglp, sizeof(tglp)); + } + + currentOffset += sectionHeader.sectionSize; + } + } +} // namespace HLE::Fonts diff --git a/src/core/services/fonts/CitraSharedFontUSRelocated.bin b/src/core/services/fonts/SharedFontReplacement.bin similarity index 100% rename from src/core/services/fonts/CitraSharedFontUSRelocated.bin rename to src/core/services/fonts/SharedFontReplacement.bin diff --git a/src/core/services/fs.cpp b/src/core/services/fs.cpp index 2e102958..54a4241d 100644 --- a/src/core/services/fs.cpp +++ b/src/core/services/fs.cpp @@ -99,13 +99,18 @@ ArchiveBase* FSService::getArchiveFromID(u32 id, const FSPath& archivePath) { case ArchiveID::SDMC: return &sdmc; case ArchiveID::SDMCWriteOnly: return &sdmcWriteOnly; case ArchiveID::SavedataAndNcch: return &ncch; // This can only access NCCH outside of FSPXI + + case ArchiveID::TwlPhoto: return &twlPhoto; + case ArchiveID::TwlSound: return &twlSound; + case ArchiveID::CardSPI: return &cardSpi; + default: Helpers::panic("Unknown archive. ID: %d\n", id); return nullptr; } } -std::optional FSService::openFileHandle(ArchiveBase* archive, const FSPath& path, const FSPath& archivePath, const FilePerms& perms) { +std::optional FSService::openFileHandle(ArchiveBase* archive, const FSPath& path, const FSPath& archivePath, const FilePerms& perms) { FileDescriptor opened = archive->openFile(path, perms); if (opened.has_value()) { // If opened doesn't have a value, we failed to open the file auto handle = kernel.makeObject(KernelObjectType::File); @@ -119,7 +124,7 @@ std::optional FSService::openFileHandle(ArchiveBase* archive, const FSPa } } -Rust::Result FSService::openDirectoryHandle(ArchiveBase* archive, const FSPath& path) { +Rust::Result FSService::openDirectoryHandle(ArchiveBase* archive, const FSPath& path) { Rust::Result opened = archive->openDirectory(path); if (opened.isOk()) { // If opened doesn't have a value, we failed to open the directory auto handle = kernel.makeObject(KernelObjectType::Directory); @@ -132,7 +137,7 @@ Rust::Result FSService::openDirectoryHandle(Archi } } -Rust::Result FSService::openArchiveHandle(u32 archiveID, const FSPath& path) { +Rust::Result FSService::openArchiveHandle(u32 archiveID, const FSPath& path) { ArchiveBase* archive = getArchiveFromID(archiveID, path); if (archive == nullptr) [[unlikely]] { diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index 8cf77a7e..5c6ab3d6 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -1,4 +1,5 @@ #include "services/gsp_gpu.hpp" + #include "PICA/regs.hpp" #include "ipc.hpp" #include "kernel.hpp" @@ -14,6 +15,7 @@ namespace ServiceCommands { WriteHwRegsWithMask = 0x00020084, SetBufferSwap = 0x00050200, FlushDataCache = 0x00080082, + InvalidateDataCache = 0x00090082, SetLCDForceBlack = 0x000B0040, TriggerCmdReqQueue = 0x000C0000, ReleaseRight = 0x00170000, @@ -21,7 +23,7 @@ namespace ServiceCommands { SaveVramSysArea = 0x00190000, RestoreVramSysArea = 0x001A0000, SetInternalPriorities = 0x001E0080, - StoreDataCache = 0x001F0082 + StoreDataCache = 0x001F0082, }; } @@ -38,7 +40,7 @@ namespace GXCommands { } void GPUService::reset() { - privilegedProcess = 0xFFFFFFFF; // Set the privileged process to an invalid handle + privilegedProcess = 0xFFFFFFFF; // Set the privileged process to an invalid handle interruptEvent = std::nullopt; gspThreadCount = 0; sharedMem = nullptr; @@ -63,6 +65,7 @@ void GPUService::handleSyncRequest(u32 messagePointer) { case ServiceCommands::ReadHwRegs: readHwRegs(messagePointer); break; case ServiceCommands::WriteHwRegs: writeHwRegs(messagePointer); break; case ServiceCommands::WriteHwRegsWithMask: writeHwRegsWithMask(messagePointer); break; + case ServiceCommands::InvalidateDataCache: invalidateDataCache(messagePointer); break; default: Helpers::panic("GPU service requested. Command: %08X\n", command); } } @@ -111,38 +114,38 @@ void GPUService::registerInterruptRelayQueue(u32 messagePointer) { log("GSP::GPU::RegisterInterruptRelayQueue (flags = %X, event handle = %X)\n", flags, eventHandle); const auto event = kernel.getObject(eventHandle, KernelObjectType::Event); - if (event == nullptr) { // Check if interrupt event is invalid + if (event == nullptr) { // Check if interrupt event is invalid Helpers::panic("Invalid event passed to GSP::GPU::RegisterInterruptRelayQueue"); } else { interruptEvent = eventHandle; } mem.write32(messagePointer, IPC::responseHeader(0x13, 2, 2)); - mem.write32(messagePointer + 4, Result::GSP::SuccessRegisterIRQ); // First init returns a unique result - mem.write32(messagePointer + 8, 0); // TODO: GSP module thread index - mem.write32(messagePointer + 12, 0); // Translation descriptor + mem.write32(messagePointer + 4, Result::GSP::SuccessRegisterIRQ); // First init returns a unique result + mem.write32(messagePointer + 8, 0); // TODO: GSP module thread index + mem.write32(messagePointer + 12, 0); // Translation descriptor mem.write32(messagePointer + 16, KernelHandles::GSPSharedMemHandle); } void GPUService::requestInterrupt(GPUInterrupt type) { - if (sharedMem == nullptr) [[unlikely]] { // Shared memory hasn't been set up yet + if (sharedMem == nullptr) [[unlikely]] { // Shared memory hasn't been set up yet return; } // TODO: Add support for multiple GSP threads - u8 index = sharedMem[0]; // The interrupt block is normally located at sharedMem + processGSPIndex*0x40 + u8 index = sharedMem[0]; // The interrupt block is normally located at sharedMem + processGSPIndex*0x40 u8& interruptCount = sharedMem[1]; u8 flagIndex = (index + interruptCount) % 0x34; interruptCount++; - sharedMem[2] = 0; // Set error code to 0 - sharedMem[0xC + flagIndex] = static_cast(type); // Write interrupt type to queue + sharedMem[2] = 0; // Set error code to 0 + sharedMem[0xC + flagIndex] = static_cast(type); // Write interrupt type to queue // Update framebuffer info in shared memory // Most new games check to make sure that the "flag" byte of the framebuffer info header is set to 0 // Not emulating this causes Yoshi's Wooly World, Captain Toad, Metroid 2 et al to hang if (type == GPUInterrupt::VBlank0 || type == GPUInterrupt::VBlank1) { - int screen = static_cast(type) - static_cast(GPUInterrupt::VBlank0); // 0 for top screen, 1 for bottom + int screen = static_cast(type) - static_cast(GPUInterrupt::VBlank0); // 0 for top screen, 1 for bottom FramebufferUpdate* update = getFramebufferInfo(screen); if (update->dirtyFlag & 1) { @@ -163,7 +166,6 @@ void GPUService::readHwRegs(u32 messagePointer) { const u32 initialDataPointer = mem.read32(messagePointer + 0x104); u32 dataPointer = initialDataPointer; log("GSP::GPU::ReadHwRegs (GPU address = %08X, size = %X, data address = %08X)\n", ioAddr, size, dataPointer); - // Check for alignment if ((size & 3) || (ioAddr & 3) || (dataPointer & 3)) { @@ -195,8 +197,8 @@ void GPUService::readHwRegs(u32 messagePointer) { } void GPUService::writeHwRegs(u32 messagePointer) { - u32 ioAddr = mem.read32(messagePointer + 4); // GPU address based at 0x1EB00000, word aligned - const u32 size = mem.read32(messagePointer + 8); // Size in bytes + u32 ioAddr = mem.read32(messagePointer + 4); // GPU address based at 0x1EB00000, word aligned + const u32 size = mem.read32(messagePointer + 8); // Size in bytes u32 dataPointer = mem.read32(messagePointer + 16); log("GSP::GPU::writeHwRegs (GPU address = %08X, size = %X, data address = %08X)\n", ioAddr, size, dataPointer); @@ -228,14 +230,14 @@ void GPUService::writeHwRegs(u32 messagePointer) { // Update sequential GPU registers using an array of data and mask values using this formula // GPU register = (register & ~mask) | (data & mask). void GPUService::writeHwRegsWithMask(u32 messagePointer) { - u32 ioAddr = mem.read32(messagePointer + 4); // GPU address based at 0x1EB00000, word aligned - const u32 size = mem.read32(messagePointer + 8); // Size in bytes + u32 ioAddr = mem.read32(messagePointer + 4); // GPU address based at 0x1EB00000, word aligned + const u32 size = mem.read32(messagePointer + 8); // Size in bytes - u32 dataPointer = mem.read32(messagePointer + 16); // Data pointer - u32 maskPointer = mem.read32(messagePointer + 24); // Mask pointer + u32 dataPointer = mem.read32(messagePointer + 16); // Data pointer + u32 maskPointer = mem.read32(messagePointer + 24); // Mask pointer - log("GSP::GPU::writeHwRegsWithMask (GPU address = %08X, size = %X, data address = %08X, mask address = %08X)\n", - ioAddr, size, dataPointer, maskPointer); + log("GSP::GPU::writeHwRegsWithMask (GPU address = %08X, size = %X, data address = %08X, mask address = %08X)\n", ioAddr, size, dataPointer, + maskPointer); // Check for alignment if ((size & 3) || (ioAddr & 3) || (dataPointer & 3) || (maskPointer & 3)) { @@ -278,6 +280,16 @@ void GPUService::flushDataCache(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); } +void GPUService::invalidateDataCache(u32 messagePointer) { + u32 address = mem.read32(messagePointer + 4); + u32 size = mem.read32(messagePointer + 8); + u32 processHandle = handle = mem.read32(messagePointer + 16); + log("GSP::GPU::InvalidateDataCache(address = %08X, size = %X, process = %X)\n", address, size, processHandle); + + mem.write32(messagePointer, IPC::responseHeader(0x9, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); +} + void GPUService::storeDataCache(u32 messagePointer) { u32 address = mem.read32(messagePointer + 4); u32 size = mem.read32(messagePointer + 8); @@ -339,11 +351,11 @@ void GPUService::setInternalPriorities(u32 messagePointer) { } void GPUService::processCommandBuffer() { - if (sharedMem == nullptr) [[unlikely]] { // Shared memory hasn't been set up yet + if (sharedMem == nullptr) [[unlikely]] { // Shared memory hasn't been set up yet return; } - constexpr int threadCount = 1; // TODO: More than 1 thread can have GSP commands at a time + constexpr int threadCount = 1; // TODO: More than 1 thread can have GSP commands at a time for (int t = 0; t < threadCount; t++) { u8* cmdBuffer = &sharedMem[0x800 + t * 0x200]; u8& commandsLeft = cmdBuffer[1]; @@ -396,9 +408,9 @@ void GPUService::memoryFill(u32* cmd) { u32 control = cmd[7]; // buf0 parameters - u32 start0 = cmd[1]; // Start address for the fill. If 0, don't fill anything - u32 value0 = cmd[2]; // Value to fill the framebuffer with - u32 end0 = cmd[3]; // End address for the fill + u32 start0 = cmd[1]; // Start address for the fill. If 0, don't fill anything + u32 value0 = cmd[2]; // Value to fill the framebuffer with + u32 end0 = cmd[3]; // End address for the fill u32 control0 = control & 0xffff; // buf1 parameters @@ -427,7 +439,7 @@ void GPUService::triggerDisplayTransfer(u32* cmd) { log("GSP::GPU::TriggerDisplayTransfer (Stubbed)\n"); gpu.displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags); - requestInterrupt(GPUInterrupt::PPF); // Send "Display transfer finished" interrupt + requestInterrupt(GPUInterrupt::PPF); // Send "Display transfer finished" interrupt } void GPUService::triggerDMARequest(u32* cmd) { @@ -441,22 +453,14 @@ void GPUService::triggerDMARequest(u32* cmd) { requestInterrupt(GPUInterrupt::DMA); } -void GPUService::flushCacheRegions(u32* cmd) { - log("GSP::GPU::FlushCacheRegions (Stubbed)\n"); -} +void GPUService::flushCacheRegions(u32* cmd) { log("GSP::GPU::FlushCacheRegions (Stubbed)\n"); } void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { using namespace PICA::ExternalRegs; static constexpr std::array fbAddresses = { - Framebuffer0AFirstAddr, - Framebuffer0BFirstAddr, - Framebuffer1AFirstAddr, - Framebuffer1BFirstAddr, - Framebuffer0ASecondAddr, - Framebuffer0BSecondAddr, - Framebuffer1ASecondAddr, - Framebuffer1BSecondAddr, + Framebuffer0AFirstAddr, Framebuffer0BFirstAddr, Framebuffer1AFirstAddr, Framebuffer1BFirstAddr, + Framebuffer0ASecondAddr, Framebuffer0BSecondAddr, Framebuffer1ASecondAddr, Framebuffer1BSecondAddr, }; auto& regs = gpu.getExtRegisters(); @@ -466,12 +470,7 @@ void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { regs[fbAddresses[fbIndex + 1]] = VaddrToPaddr(info.rightFramebufferVaddr); static constexpr std::array configAddresses = { - Framebuffer0Config, - Framebuffer0Select, - Framebuffer0Stride, - Framebuffer1Config, - Framebuffer1Select, - Framebuffer1Stride, + Framebuffer0Config, Framebuffer0Select, Framebuffer0Stride, Framebuffer1Config, Framebuffer1Select, Framebuffer1Stride, }; const u32 configIndex = screenId * 3; @@ -482,14 +481,14 @@ void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { // Actually send command list (aka display list) to GPU void GPUService::processCommandList(u32* cmd) { - const u32 address = cmd[1] & ~7; // Buffer address - const u32 size = cmd[2] & ~3; // Buffer size in bytes - [[maybe_unused]] const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update) - [[maybe_unused]] const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush) + const u32 address = cmd[1] & ~7; // Buffer address + const u32 size = cmd[2] & ~3; // Buffer size in bytes + [[maybe_unused]] const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update) + [[maybe_unused]] const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush) log("GPU::GSP::processCommandList. Address: %08X, size in bytes: %08X\n", address, size); gpu.startCommandList(address, size); - requestInterrupt(GPUInterrupt::P3D); // Send an IRQ when command list processing is over + requestInterrupt(GPUInterrupt::P3D); // Send an IRQ when command list processing is over } // TODO: Emulate the transfer engine & its registers @@ -564,4 +563,4 @@ void GPUService::importDisplayCaptureInfo(u32 messagePointer) { mem.write32(messagePointer + 28, bottomScreenCapture.rightFramebuffer); mem.write32(messagePointer + 32, bottomScreenCapture.format); mem.write32(messagePointer + 36, bottomScreenCapture.stride); -} +} \ No newline at end of file diff --git a/src/core/services/gsp_lcd.cpp b/src/core/services/gsp_lcd.cpp index d018166b..d2e0ac21 100644 --- a/src/core/services/gsp_lcd.cpp +++ b/src/core/services/gsp_lcd.cpp @@ -1,8 +1,10 @@ #include "services/gsp_lcd.hpp" + #include "ipc.hpp" namespace LCDCommands { enum : u32 { + SetLedForceOff = 0x00130040, }; } @@ -11,6 +13,16 @@ void LCDService::reset() {} void LCDService::handleSyncRequest(u32 messagePointer) { const u32 command = mem.read32(messagePointer); switch (command) { + case LCDCommands::SetLedForceOff: setLedForceOff(messagePointer); break; + default: Helpers::panic("LCD service requested. Command: %08X\n", command); } +} + +void LCDService::setLedForceOff(u32 messagePointer) { + const u8 state = mem.read8(messagePointer + 4); + log("LCD::SetLedForceOff (state = %X)\n", state); + + mem.write32(messagePointer, IPC::responseHeader(0x13, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); } \ No newline at end of file diff --git a/src/core/services/hid.cpp b/src/core/services/hid.cpp index ef6cbb41..a7b9b13b 100644 --- a/src/core/services/hid.cpp +++ b/src/core/services/hid.cpp @@ -35,6 +35,7 @@ void HIDService::reset() { circlePadX = circlePadY = 0; touchScreenX = touchScreenY = 0; roll = pitch = yaw = 0; + accelX = accelY = accelZ = 0; } void HIDService::handleSyncRequest(u32 messagePointer) { @@ -103,7 +104,6 @@ void HIDService::getGyroscopeLowCalibrateParam(u32 messagePointer) { void HIDService::getGyroscopeCoefficient(u32 messagePointer) { log("HID::GetGyroscopeLowRawToDpsCoefficient\n"); - constexpr float gyroscopeCoeff = 14.375f; // Same as retail 3DS mem.write32(messagePointer, IPC::responseHeader(0x15, 2, 0)); mem.write32(messagePointer + 4, Result::Success); mem.write32(messagePointer + 8, Helpers::bit_cast(gyroscopeCoeff)); @@ -190,6 +190,20 @@ void HIDService::updateInputs(u64 currentTick) { writeSharedMem(0x108, currentTick); // Write new tick count } writeSharedMem(0x118, nextAccelerometerIndex); // Index last updated by the HID module + const size_t accelEntryOffset = 0x128 + (nextAccelerometerIndex * 6); // Offset in the array of 8 accelerometer entries + + // Raw data of current accelerometer entry + // TODO: How is the "raw" data actually calculated? + s16* accelerometerDataRaw = getSharedMemPointer(0x120); + accelerometerDataRaw[0] = accelX; + accelerometerDataRaw[1] = accelY; + accelerometerDataRaw[2] = accelZ; + + // Accelerometer entry in entry table + s16* accelerometerData = getSharedMemPointer(accelEntryOffset); + accelerometerData[0] = accelX; + accelerometerData[1] = accelY; + accelerometerData[2] = accelZ; nextAccelerometerIndex = (nextAccelerometerIndex + 1) % 8; // Move to next entry // Next, update gyro state @@ -198,9 +212,10 @@ void HIDService::updateInputs(u64 currentTick) { writeSharedMem(0x158, currentTick); // Write new tick count } const size_t gyroEntryOffset = 0x178 + (nextGyroIndex * 6); // Offset in the array of 8 touchscreen entries - writeSharedMem(gyroEntryOffset, pitch); - writeSharedMem(gyroEntryOffset + 2, yaw); - writeSharedMem(gyroEntryOffset + 4, roll); + s16* gyroData = getSharedMemPointer(gyroEntryOffset); + gyroData[0] = pitch; + gyroData[1] = yaw; + gyroData[2] = roll; // Since gyroscope euler angles are relative, we zero them out here and the frontend will update them again when we receive a new rotation roll = pitch = yaw = 0; diff --git a/src/core/services/mcu/mcu_hwc.cpp b/src/core/services/mcu/mcu_hwc.cpp index 2873adf5..0e4e6ed3 100644 --- a/src/core/services/mcu/mcu_hwc.cpp +++ b/src/core/services/mcu/mcu_hwc.cpp @@ -1,10 +1,12 @@ +#include "services/mcu/mcu_hwc.hpp" + #include "ipc.hpp" #include "result/result.hpp" -#include "services/mcu/mcu_hwc.hpp" namespace MCU::HWCCommands { enum : u32 { GetBatteryLevel = 0x00050000, + SetInfoLedPattern = 0x000A0640, }; } @@ -14,6 +16,7 @@ void MCU::HWCService::handleSyncRequest(u32 messagePointer) { const u32 command = mem.read32(messagePointer); switch (command) { case HWCCommands::GetBatteryLevel: getBatteryLevel(messagePointer); break; + case HWCCommands::SetInfoLedPattern: setInfoLEDPattern(messagePointer); break; default: Helpers::panic("MCU::HWC service requested. Command: %08X\n", command); } } @@ -24,4 +27,12 @@ void MCU::HWCService::getBatteryLevel(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x5, 2, 0)); mem.write32(messagePointer + 4, Result::Success); mem.write8(messagePointer + 8, config.batteryPercentage); +} + +void MCU::HWCService::setInfoLEDPattern(u32 messagePointer) { + log("MCU::HWC::SetInfoLedPattern\n"); + + // 25 parameters to make some notification LEDs blink... + mem.write32(messagePointer, IPC::responseHeader(0xA, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); } \ No newline at end of file diff --git a/src/core/services/ns.cpp b/src/core/services/ns.cpp new file mode 100644 index 00000000..f26e72d3 --- /dev/null +++ b/src/core/services/ns.cpp @@ -0,0 +1,32 @@ +#include "services/ns.hpp" + +#include "ipc.hpp" + +namespace NSCommands { + enum : u32 { + LaunchTitle = 0x000200C0, + }; +} + +void NSService::reset() {} + +void NSService::handleSyncRequest(u32 messagePointer, Type type) { + const u32 command = mem.read32(messagePointer); + + // ns:s commands + switch (command) { + case NSCommands::LaunchTitle: launchTitle(messagePointer); break; + + default: Helpers::panic("NS service requested. Command: %08X\n", command); + } +} + +void NSService::launchTitle(u32 messagePointer) { + const u64 titleID = mem.read64(messagePointer + 4); + const u32 launchFlags = mem.read32(messagePointer + 12); + Helpers::warn("NS::LaunchTitle (title ID = %llX, launch flags = %X) (stubbed)", titleID, launchFlags); + + mem.write32(messagePointer, IPC::responseHeader(0x2, 2, 0)); + mem.write32(messagePointer + 4, Result::Success); + mem.write32(messagePointer + 8, 0); // Process ID +} diff --git a/src/core/services/ptm.cpp b/src/core/services/ptm.cpp index 67451cc2..d01a14ff 100644 --- a/src/core/services/ptm.cpp +++ b/src/core/services/ptm.cpp @@ -1,4 +1,5 @@ #include "services/ptm.hpp" + #include "ipc.hpp" namespace PTMCommands { @@ -12,11 +13,16 @@ namespace PTMCommands { GetStepHistoryAll = 0x000F0084, ConfigureNew3DSCPU = 0x08180040, + // ptm:gets functions + GetSystemTime = 0x04010000, + // ptm:play functions GetPlayHistory = 0x08070082, GetPlayHistoryStart = 0x08080000, GetPlayHistoryLength = 0x08090000, CalcPlayHistoryStart = 0x080B0080, + GetSoftwareClosedFlag = 0x080F0000, + ClearSoftwareClosedFlag = 0x08100000, }; } @@ -27,33 +33,49 @@ void PTMService::handleSyncRequest(u32 messagePointer, PTMService::Type type) { // ptm:play functions switch (command) { - case PTMCommands::ConfigureNew3DSCPU: configureNew3DSCPU(messagePointer); break; - case PTMCommands::GetAdapterState: getAdapterState(messagePointer); break; - case PTMCommands::GetBatteryChargeState: getBatteryChargeState(messagePointer); break; - case PTMCommands::GetBatteryLevel: getBatteryLevel(messagePointer); break; - case PTMCommands::GetPedometerState: getPedometerState(messagePointer); break; - case PTMCommands::GetStepHistory: getStepHistory(messagePointer); break; - case PTMCommands::GetStepHistoryAll: getStepHistoryAll(messagePointer); break; - case PTMCommands::GetTotalStepCount: getTotalStepCount(messagePointer); break; + case PTMCommands::ConfigureNew3DSCPU: configureNew3DSCPU(messagePointer); break; + case PTMCommands::GetAdapterState: getAdapterState(messagePointer); break; + case PTMCommands::GetBatteryChargeState: getBatteryChargeState(messagePointer); break; + case PTMCommands::GetBatteryLevel: getBatteryLevel(messagePointer); break; + case PTMCommands::GetPedometerState: getPedometerState(messagePointer); break; + case PTMCommands::GetStepHistory: getStepHistory(messagePointer); break; + case PTMCommands::GetStepHistoryAll: getStepHistoryAll(messagePointer); break; + case PTMCommands::GetTotalStepCount: getTotalStepCount(messagePointer); break; - default: - // ptm:play-only functions - if (type == Type::PLAY) { - switch (command) { - case PTMCommands::GetPlayHistory: - case PTMCommands::GetPlayHistoryStart: - case PTMCommands::GetPlayHistoryLength: - mem.write32(messagePointer + 4, Result::Success); - mem.write64(messagePointer + 8, 0); - Helpers::warn("Stubbed PTM:PLAY service requested. Command: %08X\n", command); - break; + default: + // ptm:play-only functions + if (type == Type::PLAY) { + switch (command) { + case PTMCommands::GetPlayHistory: + case PTMCommands::GetPlayHistoryStart: + case PTMCommands::GetPlayHistoryLength: + mem.write32(messagePointer + 4, Result::Success); + mem.write64(messagePointer + 8, 0); + Helpers::warn("Stubbed PTM:PLAY service requested. Command: %08X\n", command); + break; - default: Helpers::panic("PTM PLAY service requested. Command: %08X\n", command); break; - } - } else { - Helpers::panic("PTM service requested. Command: %08X\n", command); + default: Helpers::panic("PTM PLAY service requested. Command: %08X\n", command); break; } - } + } else if (type == Type::GETS) { + switch (command) { + case PTMCommands::GetSystemTime: getSystemTime(messagePointer); break; + + default: Helpers::panic("PTM GETS service requested. Command: %08X\n", command); break; + } + } else if (type == Type::SYSM) { + switch (command) { + case PTMCommands::GetSoftwareClosedFlag: getSoftwareClosedFlag(messagePointer); break; + case PTMCommands::ClearSoftwareClosedFlag: clearSoftwareClosedFlag(messagePointer); break; + + default: + mem.write32(messagePointer + 4, Result::Success); + Helpers::warn("PTM SYSM service requested. Command: %08X\n", command); + break; + } + } else { + Helpers::panic("PTM service requested. Command: %08X\n", command); + } + } } void PTMService::getAdapterState(u32 messagePointer) { @@ -107,11 +129,33 @@ void PTMService::getTotalStepCount(u32 messagePointer) { log("PTM::GetTotalStepCount\n"); mem.write32(messagePointer, IPC::responseHeader(0xC, 2, 0)); mem.write32(messagePointer + 4, Result::Success); - mem.write32(messagePointer + 8, 3); // We walk a lot + mem.write32(messagePointer + 8, 3); // We walk a lot } void PTMService::configureNew3DSCPU(u32 messagePointer) { log("PTM::ConfigureNew3DSCPU [stubbed]\n"); mem.write32(messagePointer, IPC::responseHeader(0x818, 1, 0)); mem.write32(messagePointer + 4, Result::Success); +} + +void PTMService::getSystemTime(u32 messagePointer) { + log("PTM::GetSystemTime [stubbed]\n"); + Helpers::warn("PTM::GetSystemTime called"); + + mem.write32(messagePointer, IPC::responseHeader(0x401, 3, 0)); + mem.write32(messagePointer + 4, Result::Success); + mem.write64(messagePointer + 8, 0); // Milliseconds since 2000? +} + +void PTMService::getSoftwareClosedFlag(u32 messagePointer) { + log("PTM::GetSoftwareClosedFlag\n"); + mem.write32(messagePointer, IPC::responseHeader(0x80F, 2, 0)); + mem.write32(messagePointer + 4, Result::Success); + mem.write8(messagePointer + 8, 0); // Show software closed dialog +} + +void PTMService::clearSoftwareClosedFlag(u32 messagePointer) { + log("PTM::ClearSoftwareClosedFlag\n"); + mem.write32(messagePointer, IPC::responseHeader(0x810, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); } \ No newline at end of file diff --git a/src/core/services/service_manager.cpp b/src/core/services/service_manager.cpp index 2a95b5c9..ccbbdee8 100644 --- a/src/core/services/service_manager.cpp +++ b/src/core/services/service_manager.cpp @@ -6,10 +6,10 @@ #include "kernel.hpp" ServiceManager::ServiceManager(std::span regs, Memory& mem, GPU& gpu, u32& currentPID, Kernel& kernel, const EmulatorConfig& config) - : regs(regs), mem(mem), kernel(kernel), ac(mem), am(mem), boss(mem), act(mem), apt(mem, kernel), cam(mem, kernel), cecd(mem, kernel), cfg(mem), - csnd(mem, kernel), dlp_srvr(mem), dsp(mem, kernel), hid(mem, kernel), http(mem), ir_user(mem, kernel), frd(mem), fs(mem, kernel, config), - gsp_gpu(mem, gpu, kernel, currentPID), gsp_lcd(mem), ldr(mem, kernel), mcu_hwc(mem, config), mic(mem, kernel), nfc(mem, kernel), nim(mem), ndm(mem), - news_u(mem), nwm_uds(mem, kernel), ptm(mem, config), soc(mem), ssl(mem), y2r(mem, kernel) {} + : regs(regs), mem(mem), kernel(kernel), ac(mem), am(mem), boss(mem), act(mem), apt(mem, kernel), cam(mem, kernel), cecd(mem, kernel), + cfg(mem, config), csnd(mem, kernel), dlp_srvr(mem), dsp(mem, kernel, config), hid(mem, kernel), http(mem), ir_user(mem, kernel), frd(mem), + fs(mem, kernel, config), gsp_gpu(mem, gpu, kernel, currentPID), gsp_lcd(mem), ldr(mem, kernel), mcu_hwc(mem, config), mic(mem, kernel), + nfc(mem, kernel), nim(mem), ndm(mem), news_u(mem), ns(mem), nwm_uds(mem, kernel), ptm(mem, config), soc(mem), ssl(mem), y2r(mem, kernel) {} static constexpr int MAX_NOTIFICATION_COUNT = 16; @@ -40,6 +40,7 @@ void ServiceManager::reset() { news_u.reset(); nfc.reset(); nim.reset(); + ns.reset(); ptm.reset(); soc.reset(); ssl.reset(); @@ -93,24 +94,29 @@ void ServiceManager::registerClient(u32 messagePointer) { } // clang-format off -static std::map serviceMap = { +static std::map serviceMap = { { "ac:u", KernelHandles::AC }, + { "ac:i", KernelHandles::AC }, { "act:a", KernelHandles::ACT }, { "act:u", KernelHandles::ACT }, { "am:app", KernelHandles::AM }, + { "am:sys", KernelHandles::AM }, { "APT:S", KernelHandles::APT }, // TODO: APT:A, APT:S and APT:U are slightly different { "APT:A", KernelHandles::APT }, { "APT:U", KernelHandles::APT }, { "boss:U", KernelHandles::BOSS }, + { "boss:P", KernelHandles::BOSS }, { "cam:u", KernelHandles::CAM }, { "cecd:u", KernelHandles::CECD }, { "cfg:u", KernelHandles::CFG_U }, { "cfg:i", KernelHandles::CFG_I }, { "cfg:s", KernelHandles::CFG_S }, + { "cfg:nor", KernelHandles::CFG_NOR }, { "csnd:SND", KernelHandles::CSND }, { "dlp:SRVR", KernelHandles::DLP_SRVR }, { "dsp::DSP", KernelHandles::DSP }, { "hid:USER", KernelHandles::HID }, + { "hid:SPVR", KernelHandles::HID }, { "http:C", KernelHandles::HTTP }, { "ir:USER", KernelHandles::IR_USER }, { "frd:a", KernelHandles::FRD_A }, @@ -125,11 +131,13 @@ static std::map serviceMap = { { "news:u", KernelHandles::NEWS_U }, { "nfc:u", KernelHandles::NFC }, { "ns:s", KernelHandles::NS_S }, + { "nwm::EXT", KernelHandles::NWM_EXT }, { "nwm::UDS", KernelHandles::NWM_UDS }, { "nim:aoc", KernelHandles::NIM }, { "ptm:u", KernelHandles::PTM_U }, // TODO: ptm:u and ptm:sysm have very different command sets { "ptm:sysm", KernelHandles::PTM_SYSM }, { "ptm:play", KernelHandles::PTM_PLAY }, + { "ptm:gets", KernelHandles::PTM_GETS }, { "soc:U", KernelHandles::SOC }, { "ssl:C", KernelHandles::SSL }, { "y2r:u", KernelHandles::Y2R }, @@ -212,6 +220,7 @@ void ServiceManager::sendCommandToService(u32 messagePointer, Handle handle) { case KernelHandles::CFG_U: cfg.handleSyncRequest(messagePointer, CFGService::Type::U); break; case KernelHandles::CFG_I: cfg.handleSyncRequest(messagePointer, CFGService::Type::I); break; case KernelHandles::CFG_S: cfg.handleSyncRequest(messagePointer, CFGService::Type::S); break; + case KernelHandles::CFG_NOR: cfg.handleSyncRequest(messagePointer, CFGService::Type::NOR); break; case KernelHandles::CSND: csnd.handleSyncRequest(messagePointer); break; case KernelHandles::DLP_SRVR: dlp_srvr.handleSyncRequest(messagePointer); break; case KernelHandles::HID: hid.handleSyncRequest(messagePointer); break; @@ -227,11 +236,12 @@ void ServiceManager::sendCommandToService(u32 messagePointer, Handle handle) { case KernelHandles::NIM: nim.handleSyncRequest(messagePointer); break; case KernelHandles::NDM: ndm.handleSyncRequest(messagePointer); break; case KernelHandles::NEWS_U: news_u.handleSyncRequest(messagePointer); break; - case KernelHandles::NS_S: Helpers::panic("Unimplemented SendSyncRequest to ns:s"); break; + case KernelHandles::NS_S: ns.handleSyncRequest(messagePointer, NSService::Type::S); break; case KernelHandles::NWM_UDS: nwm_uds.handleSyncRequest(messagePointer); break; case KernelHandles::PTM_PLAY: ptm.handleSyncRequest(messagePointer, PTMService::Type::PLAY); break; case KernelHandles::PTM_SYSM: ptm.handleSyncRequest(messagePointer, PTMService::Type::SYSM); break; case KernelHandles::PTM_U: ptm.handleSyncRequest(messagePointer, PTMService::Type::U); break; + case KernelHandles::PTM_GETS: ptm.handleSyncRequest(messagePointer, PTMService::Type::GETS); break; case KernelHandles::SOC: soc.handleSyncRequest(messagePointer); break; case KernelHandles::SSL: ssl.handleSyncRequest(messagePointer); break; case KernelHandles::Y2R: y2r.handleSyncRequest(messagePointer); break; diff --git a/src/emulator.cpp b/src/emulator.cpp index db6c2e1f..11970d91 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,11 +1,13 @@ #include "emulator.hpp" -#ifndef __ANDROID__ +#if !defined(__ANDROID__) && !defined(__LIBRETRO__) #include #endif #include +#include "renderdoc.hpp" + #ifdef _WIN32 #include @@ -18,7 +20,7 @@ __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 1; Emulator::Emulator() : config(getConfigPath()), kernel(cpu, memory, gpu, config), cpu(memory, kernel, *this), gpu(memory, config), memory(cpu.getTicksRef(), config), - cheats(memory, kernel.getServiceManager().getHID()), lua(*this), running(false) + cheats(memory, kernel.getServiceManager().getHID()), audioDevice(config.audioDeviceConfig), lua(*this), running(false) #ifdef PANDA3DS_ENABLE_HTTP_SERVER , httpServer(this) @@ -26,12 +28,16 @@ Emulator::Emulator() { DSPService& dspService = kernel.getServiceManager().getDSP(); - dsp = Audio::makeDSPCore(config.dspType, memory, scheduler, dspService); + dsp = Audio::makeDSPCore(config, memory, scheduler, dspService); dspService.setDSPCore(dsp.get()); audioDevice.init(dsp->getSamples()); setAudioEnabled(config.audioEnabled); + if (Renderdoc::isSupported() && config.enableRenderdoc) { + loadRenderdoc(); + } + #ifdef PANDA3DS_ENABLE_DISCORD_RPC if (config.discordRpcEnabled) { discordRpc.init(); @@ -44,6 +50,7 @@ Emulator::Emulator() Emulator::~Emulator() { config.save(); lua.close(); + audioDevice.close(); #ifdef PANDA3DS_ENABLE_DISCORD_RPC discordRpc.stop(); @@ -98,13 +105,18 @@ std::filesystem::path Emulator::getConfigPath() { if constexpr (Helpers::isAndroid()) { return getAndroidAppPath() / "config.toml"; } else { - return std::filesystem::current_path() / "config.toml"; + std::filesystem::path localPath = std::filesystem::current_path() / "config.toml"; + + if (std::filesystem::exists(localPath)) { + return localPath; + } else { + return getAppDataRoot() / "config.toml"; + } } } #endif void Emulator::step() {} -void Emulator::render() {} // Only resume if a ROM is properly loaded void Emulator::resume() { @@ -167,7 +179,7 @@ void Emulator::pollScheduler() { case Scheduler::EventType::UpdateTimers: kernel.pollTimers(); break; case Scheduler::EventType::RunDSP: { - dsp->runAudioFrame(); + dsp->runAudioFrame(time); break; } @@ -220,6 +232,8 @@ bool Emulator::loadROM(const std::filesystem::path& path) { const std::filesystem::path appDataPath = getAppDataRoot(); const std::filesystem::path dataPath = appDataPath / path.filename().stem(); const std::filesystem::path aesKeysPath = appDataPath / "sysdata" / "aes_keys.txt"; + const std::filesystem::path seedDBPath = appDataPath / "sysdata" / "seeddb.bin"; + IOFile::setAppDataDir(dataPath); // Open the text file containing our AES keys if it exists. We use the std::filesystem::exists overload that takes an error code param to @@ -229,6 +243,10 @@ bool Emulator::loadROM(const std::filesystem::path& path) { aesEngine.loadKeys(aesKeysPath); } + if (std::filesystem::exists(seedDBPath, ec) && !ec) { + aesEngine.setSeedPath(seedDBPath); + } + kernel.initializeFS(); auto extension = path.extension(); bool success; // Tracks if we loaded the ROM successfully @@ -237,7 +255,7 @@ bool Emulator::loadROM(const std::filesystem::path& path) { success = loadELF(path); else if (extension == ".3ds" || extension == ".cci") success = loadNCSD(path, ROMType::NCSD); - else if (extension == ".cxi" || extension == ".app") + else if (extension == ".cxi" || extension == ".app" || extension == ".ncch") success = loadNCSD(path, ROMType::CXI); else if (extension == ".3dsx") success = load3DSX(path); @@ -299,6 +317,11 @@ bool Emulator::load3DSX(const std::filesystem::path& path) { } bool Emulator::loadELF(const std::filesystem::path& path) { + // We can't open a new file with this ifstream if it's associated with a file + if (loadedELF.is_open()) { + loadedELF.close(); + } + loadedELF.open(path, std::ios_base::binary); // Open ROM in binary mode romType = ROMType::ELF; @@ -410,6 +433,10 @@ RomFS::DumpingResult Emulator::dumpRomFS(const std::filesystem::path& path) { } void Emulator::setAudioEnabled(bool enable) { + // Don't enable audio if we didn't manage to find an audio device and initialize it properly, otherwise audio sync will break, + // because the emulator will expect the audio device to drain the sample buffer, but there's no audio device running... + enable = enable && audioDevice.isInitialized(); + if (!enable) { audioDevice.stop(); } else if (enable && romType != ROMType::None && running) { @@ -420,3 +447,30 @@ void Emulator::setAudioEnabled(bool enable) { dsp->setAudioEnabled(enable); } + +void Emulator::loadRenderdoc() { + std::string capturePath = (std::filesystem::current_path() / "RenderdocCaptures").generic_string(); + Renderdoc::loadRenderdoc(); + Renderdoc::setOutputDir(capturePath, ""); +} + +void Emulator::reloadSettings() { + setAudioEnabled(config.audioEnabled); + + if (Renderdoc::isSupported() && config.enableRenderdoc && !Renderdoc::isLoaded()) { + loadRenderdoc(); + } + +#ifdef PANDA3DS_ENABLE_DISCORD_RPC + // Reload RPC setting if we're compiling with RPC support + + if (discordRpc.running() != config.discordRpcEnabled) { + if (config.discordRpcEnabled) { + discordRpc.init(); + updateDiscord(); + } else { + discordRpc.stop(); + } + } +#endif +} diff --git a/src/frontend_settings.cpp b/src/frontend_settings.cpp new file mode 100644 index 00000000..498ba500 --- /dev/null +++ b/src/frontend_settings.cpp @@ -0,0 +1,63 @@ +#include "frontend_settings.hpp" + +#include +#include +#include + +// Frontend setting serialization/deserialization functions + +FrontendSettings::Theme FrontendSettings::themeFromString(std::string inString) { + // Transform to lower-case to make the setting case-insensitive + std::transform(inString.begin(), inString.end(), inString.begin(), [](unsigned char c) { return std::tolower(c); }); + + static const std::unordered_map map = { + {"system", Theme::System}, {"light", Theme::Light}, {"dark", Theme::Dark}, {"greetingscat", Theme::GreetingsCat}, {"cream", Theme::Cream}, + }; + + if (auto search = map.find(inString); search != map.end()) { + return search->second; + } + + // Default to dark theme + return Theme::Dark; +} + +const char* FrontendSettings::themeToString(Theme theme) { + switch (theme) { + case Theme::System: return "system"; + case Theme::Light: return "light"; + case Theme::GreetingsCat: return "greetingscat"; + case Theme::Cream: return "cream"; + + case Theme::Dark: + default: return "dark"; + } +} + +FrontendSettings::WindowIcon FrontendSettings::iconFromString(std::string inString) { // Transform to lower-case to make the setting case-insensitive + std::transform(inString.begin(), inString.end(), inString.begin(), [](unsigned char c) { return std::tolower(c); }); + + static const std::unordered_map map = { + {"rpog", WindowIcon::Rpog}, {"rsyn", WindowIcon::Rsyn}, {"rcow", WindowIcon::Rcow}, + {"rnap", WindowIcon::Rnap}, {"skyemu", WindowIcon::SkyEmu}, + }; + + if (auto search = map.find(inString); search != map.end()) { + return search->second; + } + + // Default to the icon rpog icon + return WindowIcon::Rpog; +} + +const char* FrontendSettings::iconToString(WindowIcon icon) { + switch (icon) { + case WindowIcon::Rsyn: return "rsyn"; + case WindowIcon::Rcow: return "rcow"; + case WindowIcon::Rnap: return "rnap"; + case WindowIcon::SkyEmu: return "skyemu"; + + case WindowIcon::Rpog: + default: return "rpog"; + } +} \ No newline at end of file diff --git a/src/host_shaders/metal_blit.metal b/src/host_shaders/metal_blit.metal new file mode 100644 index 00000000..31b94ec4 --- /dev/null +++ b/src/host_shaders/metal_blit.metal @@ -0,0 +1,29 @@ +#include +using namespace metal; + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct NDCViewport { + float2 offset; + float2 scale; +}; + +vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { + BasicVertexOut out; + out.uv = float2((vid << 1) & 2, vid & 2); + out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); + out.position.y = -out.position.y; + out.uv = out.uv * viewport.scale + viewport.offset; + + return out; +} + +fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(GET_HELPER_TEXTURE_BINDING(0))]], sampler samplr [[sampler(GET_HELPER_SAMPLER_STATE_BINDING(0))]]) { + return tex.sample(samplr, in.uv); +} diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal new file mode 100644 index 00000000..c21246f1 --- /dev/null +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -0,0 +1,9 @@ +#include +using namespace metal; + +constant ushort lutTextureWidth [[function_constant(0)]]; + +// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { + out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); +} diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal new file mode 100644 index 00000000..18c310f7 --- /dev/null +++ b/src/host_shaders/metal_shaders.metal @@ -0,0 +1,759 @@ +#include +using namespace metal; + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +constant float4 displayPositions[4] = { + float4(-1.0, -1.0, 0.0, 1.0), + float4( 1.0, -1.0, 0.0, 1.0), + float4(-1.0, 1.0, 0.0, 1.0), + float4( 1.0, 1.0, 0.0, 1.0) +}; + +constant float2 displayTexCoord[4] = { + float2(0.0, 1.0), + float2(0.0, 0.0), + float2(1.0, 1.0), + float2(1.0, 0.0) +}; + +vertex BasicVertexOut vertexDisplay(uint vid [[vertex_id]]) { + BasicVertexOut out; + out.position = displayPositions[vid]; + out.uv = displayTexCoord[vid]; + + return out; +} + +fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { + return tex.sample(samplr, in.uv); +} + +struct PicaRegs { + uint regs[0x200 - 0x48]; + + uint read(uint reg) constant { + return regs[reg - 0x48]; + } +}; + +struct VertTEV { + uint textureEnvColor[6]; +}; + +float4 abgr8888ToFloat4(uint abgr) { + const float scale = 1.0 / 255.0; + + return scale * float4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); +} + +struct DrawVertexIn { + float4 position [[attribute(0)]]; + float4 quaternion [[attribute(1)]]; + float4 color [[attribute(2)]]; + float2 texCoord0 [[attribute(3)]]; + float2 texCoord1 [[attribute(4)]]; + float texCoord0W [[attribute(5)]]; + float3 view [[attribute(6)]]; + float2 texCoord2 [[attribute(7)]]; +}; + +// Metal cannot return arrays from vertex functions, this is an ugly workaround +struct EnvColor { + float4 c0; + float4 c1; + float4 c2; + float4 c3; + float4 c4; + float4 c5; + + thread float4& operator[](int i) { + switch (i) { + case 0: return c0; + case 1: return c1; + case 2: return c2; + case 3: return c3; + case 4: return c4; + case 5: return c5; + default: return c0; + } + } +}; + +float3 rotateFloat3ByQuaternion(float3 v, float4 q) { + float3 u = q.xyz; + float s = q.w; + + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + +// Convert an arbitrary-width floating point literal to an f32 +float decodeFP(uint hex, uint E, uint M) { + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0u) { + if (exponent == (1u << E) - 1u) + exponent = 255u; + else + exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return as_type(hex); +} + +struct DepthUniforms { + float depthScale; + float depthOffset; + bool depthMapEnable; +}; + +struct DrawVertexOut { + float4 position [[position]]; + float4 quaternion; + float4 color; + float3 texCoord0; + float2 texCoord1; + float2 texCoord2; + float3 view; + float3 normal; + float3 tangent; + float3 bitangent; + EnvColor textureEnvColor [[flat]]; + float4 textureEnvBufferColor [[flat]]; +}; + +struct DrawVertexOutWithClip { + DrawVertexOut out; + float clipDistance [[clip_distance]] [2]; +}; + +// TODO: check this +float transformZ(float z, float w, constant DepthUniforms& depthUniforms) { + z = z / w * depthUniforms.depthScale + depthUniforms.depthOffset; + if (!depthUniforms.depthMapEnable) { + z *= w; + } + + return z * w; +} + +vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant VertTEV& tev [[buffer(1)]], constant DepthUniforms& depthUniforms [[buffer(2)]]) { + DrawVertexOut out; + + // Position + out.position = in.position; + // Flip the y position + out.position.y = -out.position.y; + + // Apply depth uniforms + out.position.z = transformZ(out.position.z, out.position.w, depthUniforms); + + // Color + out.color = min(abs(in.color), 1.0); + + // Texture coordinates + out.texCoord0 = float3(in.texCoord0, in.texCoord0W); + out.texCoord0.y = 1.0 - out.texCoord0.y; + out.texCoord1 = in.texCoord1; + out.texCoord1.y = 1.0 - out.texCoord1.y; + out.texCoord2 = in.texCoord2; + out.texCoord2.y = 1.0 - out.texCoord2.y; + + // View + out.view = in.view; + + // TBN + out.normal = normalize(rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion)); + out.tangent = normalize(rotateFloat3ByQuaternion(float3(1.0, 0.0, 0.0), in.quaternion)); + out.bitangent = normalize(rotateFloat3ByQuaternion(float3(0.0, 1.0, 0.0), in.quaternion)); + out.quaternion = in.quaternion; + + // Environment + for (int i = 0; i < 6; i++) { + out.textureEnvColor[i] = abgr8888ToFloat4(tev.textureEnvColor[i]); + } + + out.textureEnvBufferColor = abgr8888ToFloat4(picaRegs.read(0xFDu)); + + DrawVertexOutWithClip outWithClip; + outWithClip.out = out; + + // Parse clipping plane registers + float4 clipData = float4( + decodeFP(picaRegs.read(0x48u) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x49u) & 0xffffffu, 7u, 16u), + decodeFP(picaRegs.read(0x4Au) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x4Bu) & 0xffffffu, 7u, 16u) + ); + + // There's also another, always-on clipping plane based on vertex z + // TODO: transform + outWithClip.clipDistance[0] = -in.position.z; + outWithClip.clipDistance[1] = dot(clipData, in.position); + + return outWithClip; +} + +constant bool lightingEnabled [[function_constant(0)]]; +constant uint8_t lightingNumLights [[function_constant(1)]]; +constant uint32_t lightingConfig1 [[function_constant(2)]]; +constant uint16_t alphaControl [[function_constant(3)]]; + +struct Globals { + bool error_unimpl; + + float4 tevSources[16]; + float4 tevNextPreviousBuffer; + bool tevUnimplementedSourceFlag = false; + + uint GPUREG_LIGHTING_LUTINPUT_SCALE; + uint GPUREG_LIGHTING_LUTINPUT_ABS; + uint GPUREG_LIGHTING_LUTINPUT_SELECT; + uint GPUREG_LIGHTi_CONFIG; + + // HACK + //bool lightingEnabled; + //uint8_t lightingNumLights; + //uint32_t lightingConfig1; + //uint16_t alphaControl; + + float3 normal; +}; + +// See docs/lighting.md +constant uint samplerEnabledBitfields[2] = {0x7170e645u, 0x7f013fefu}; + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; +} + +struct FragTEV { + uint textureEnvSource[6]; + uint textureEnvOperand[6]; + uint textureEnvCombiner[6]; + uint textureEnvScale[6]; + + float4 fetchSource(thread Globals& globals, uint src_id) constant { + if (src_id >= 6u && src_id < 13u) { + globals.tevUnimplementedSourceFlag = true; + } + + return globals.tevSources[src_id]; + } + + float4 getColorAndAlphaSource(thread Globals& globals, int tev_id, int src_id) constant { + float4 result; + + float4 colorSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4)) & 15u); + float4 alphaSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u); + + uint colorOperand = (textureEnvOperand[tev_id] >> (src_id * 4)) & 15u; + uint alphaOperand = (textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u; + + // TODO: figure out what the undocumented values do + switch (colorOperand) { + case 0u: result.rgb = colorSource.rgb; break; // Source color + case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color + case 2u: result.rgb = float3(colorSource.a); break; // Source alpha + case 3u: result.rgb = float3(1.0 - colorSource.a); break; // One minus source alpha + case 4u: result.rgb = float3(colorSource.r); break; // Source red + case 5u: result.rgb = float3(1.0 - colorSource.r); break; // One minus source red + case 8u: result.rgb = float3(colorSource.g); break; // Source green + case 9u: result.rgb = float3(1.0 - colorSource.g); break; // One minus source green + case 12u: result.rgb = float3(colorSource.b); break; // Source blue + case 13u: result.rgb = float3(1.0 - colorSource.b); break; // One minus source blue + default: break; + } + + // TODO: figure out what the undocumented values do + switch (alphaOperand) { + case 0u: result.a = alphaSource.a; break; // Source alpha + case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha + case 2u: result.a = alphaSource.r; break; // Source red + case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red + case 4u: result.a = alphaSource.g; break; // Source green + case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green + case 6u: result.a = alphaSource.b; break; // Source blue + case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue + default: break; + } + + return result; + } + + float4 calculateCombiner(thread Globals& globals, int tev_id) constant { + float4 source0 = getColorAndAlphaSource(globals, tev_id, 0); + float4 source1 = getColorAndAlphaSource(globals, tev_id, 1); + float4 source2 = getColorAndAlphaSource(globals, tev_id, 2); + + uint colorCombine = textureEnvCombiner[tev_id] & 15u; + uint alphaCombine = (textureEnvCombiner[tev_id] >> 16) & 15u; + + float4 result = float4(1.0); + + // TODO: figure out what the undocumented values do + switch (colorCombine) { + case 0u: result.rgb = source0.rgb; break; // Replace + case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate + case 2u: result.rgb = min(float3(1.0), source0.rgb + source1.rgb); break; // Add + case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate + case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract + case 6u: result.rgb = float3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB + case 7u: result = float4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA + case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add + case 9u: result.rgb = min((source0.rgb + source1.rgb), 1.0) * source2.rgb; break; // Add then multiply + default: break; + } + + if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode. + // TODO: figure out what the undocumented values do + // TODO: test if the alpha combiner supports all the same modes as the color combiner. + switch (alphaCombine) { + case 0u: result.a = source0.a; break; // Replace + case 1u: result.a = source0.a * source1.a; break; // Modulate + case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add + case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate + case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract + case 8u: result.a = min(source0.a * source1.a + source2.a, 1.0); break; // Multiply then add + case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply + default: break; + } + } + + result.rgb *= float(1 << (textureEnvScale[tev_id] & 3u)); + result.a *= float(1 << ((textureEnvScale[tev_id] >> 16) & 3u)); + + return result; + } +}; + +enum class LogicOp : uint8_t { + Clear = 0, + And = 1, + AndReverse = 2, + Copy = 3, + Set = 4, + CopyInverted = 5, + NoOp = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + AndInverted = 13, + OrReverse = 14, + OrInverted = 15 +}; + +uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { + switch (logicOp) { + case LogicOp::Clear: return as_type(float4(0.0)); + case LogicOp::And: return s & d; + case LogicOp::AndReverse: return s & ~d; + case LogicOp::Copy: return s; + case LogicOp::Set: return as_type(float4(1.0)); + case LogicOp::CopyInverted: return ~s; + case LogicOp::NoOp: return d; + case LogicOp::Invert: return ~d; + case LogicOp::Nand: return ~(s & d); + case LogicOp::Or: return s | d; + case LogicOp::Nor: return ~(s | d); + case LogicOp::Xor: return s ^ d; + case LogicOp::Equiv: return ~(s ^ d); + case LogicOp::AndInverted: return ~s & d; + case LogicOp::OrReverse: return s | ~d; + case LogicOp::OrInverted: return ~s | d; + } +} + +#define D0_LUT 0u +#define D1_LUT 1u +#define SP_LUT 2u +#define FR_LUT 3u +#define RB_LUT 4u +#define RG_LUT 5u +#define RR_LUT 6u + +float lutLookup(texture2d_array texLut, uint slice, uint lut, uint index) { + return texLut.read(uint2(index, lut), slice).r; +} + +float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { + uint lut_index; + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + globals.error_unimpl = true; + } + + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (extract_bits(lightingConfig1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(globals.normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(in.view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(globals.normal, normalize(in.view)); + break; + } + case 3u: { + delta = dot(light_vector, globals.normal); + break; + } + case 4u: { + int GPUREG_LIGHTi_SPOTDIR_LOW = int(picaRegs.read(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(picaRegs.read(0x0147u + (light_id << 4u))); + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = extract_bits(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; + float3 spotlight_vector = float3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + globals.error_unimpl = true; + break; + } + default: { + delta = 1.0; + globals.error_unimpl = true; + break; + } + } + + // 0 = enabled + if (extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); + return lutLookup(texLut, slice, lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(texLut, slice, lut_index, index) * scale; + } +} + +float3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * float3(float(extract_bits(reg, 20, 8)), float(extract_bits(reg, 10, 8)), float(extract_bits(reg, 00, 8))); +} + +// Implements the following algorthm: https://mathb.in/26766 +void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { + // Quaternions describe a transformation from surface-local space to eye space. + // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), + // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). + //float3 normal = normalize(in.normal); + //float3 tangent = normalize(in.tangent); + //float3 bitangent = normalize(in.bitangent); + //float3 view = normalize(in.view); + + uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u); + + primaryColor = float4(0.0, 0.0, 0.0, 1.0); + secondaryColor = float4(0.0, 0.0, 0.0, 1.0); + + uint GPUREG_LIGHTING_CONFIG0 = picaRegs.read(0x01C3u); + globals.GPUREG_LIGHTING_LUTINPUT_SCALE = picaRegs.read(0x01D2u); + globals.GPUREG_LIGHTING_LUTINPUT_ABS = picaRegs.read(0x01D0u); + globals.GPUREG_LIGHTING_LUTINPUT_SELECT = picaRegs.read(0x01D1u); + + uint bumpMode = extract_bits(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bumpMode) { + default: { + globals.normal = rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion); + break; + } + } + + float4 diffuseSum = float4(0.0, 0.0, 0.0, 1.0); + float4 specularSum = float4(0.0, 0.0, 0.0, 1.0); + + uint environmentId = extract_bits(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clampHighlights = extract_bits(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint lightId; + float3 lightVector = float3(0.0); + float3 halfVector = float3(0.0); + + for (uint i = 0u; i < lightingNumLights + 1; i++) { + lightId = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); + + uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + (lightId << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = picaRegs.read(0x0141u + (lightId << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = picaRegs.read(0x0142u + (lightId << 4u)); + uint GPUREG_LIGHTi_AMBIENT = picaRegs.read(0x0143u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = picaRegs.read(0x0144u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = picaRegs.read(0x0145u + (lightId << 4u)); + globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u)); + + float lightDistance; + float3 lightPosition = float3( + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + ); + + // Positional Light + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + // error_unimpl = true; + lightVector = lightPosition + in.view; + } + + // Directional light + else { + lightVector = lightPosition; + } + + lightDistance = length(lightVector); + lightVector = normalize(lightVector); + halfVector = lightVector + normalize(in.view); + + float NdotL = dot(globals.normal, lightVector); // N dot Li + + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) + NdotL = max(0.0, NdotL); + else + NdotL = abs(NdotL); + + float geometricFactor; + bool useGeo0 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool useGeo1 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (useGeo0 || useGeo1) { + geometricFactor = dot(halfVector, halfVector); + geometricFactor = geometricFactor == 0.0 ? 0.0 : min(NdotL / geometricFactor, 1.0); + } + + float distanceAttenuation = 1.0; + if (extract_bits(lightingConfig1, 24 + int(lightId), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = extract_bits(picaRegs.read(0x014Au + (lightId << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = extract_bits(picaRegs.read(0x014Bu + (lightId << 4u)), 0, 20); + + float distanceAttenuationBias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distanceAttenuationScale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distanceAttenuation = lutLookup(texLut, slice, 16u + lightId, index); + } + + float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, SP_LUT, lightId, lightVector, halfVector); + float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D0_LUT, lightId, lightVector, halfVector); + float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D1_LUT, lightId, lightVector, halfVector); + float3 reflectedColor; + reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RR_LUT, lightId, lightVector, halfVector); + + if (isSamplerEnabled(environmentId, RG_LUT)) { + reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RG_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.g = reflectedColor.r; + } + + if (isSamplerEnabled(environmentId, RB_LUT)) { + reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RB_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.b = reflectedColor.r; + } + + float3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0Distribution; + float3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1Distribution * reflectedColor; + + specular0 *= useGeo0 ? geometricFactor : 1.0; + specular1 *= useGeo1 ? geometricFactor : 1.0; + + float clampFactor = 1.0; + if (clampHighlights && NdotL == 0.0) { + clampFactor = 0.0; + } + + float lightFactor = distanceAttenuation * spotlightAttenuation; + diffuseSum.rgb += lightFactor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specularSum.rgb += lightFactor * clampFactor * (specular0 + specular1); + } + uint fresnelOutput1 = extract_bits(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnelOutput2 = extract_bits(GPUREG_LIGHTING_CONFIG0, 3, 1); + + float fresnelFactor; + + if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { + fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, FR_LUT, lightId, lightVector, halfVector); + } + + if (fresnelOutput1 == 1u) { + diffuseSum.a = fresnelFactor; + } + + if (fresnelOutput2 == 1u) { + specularSum.a = fresnelFactor; + } + + uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u); + float4 globalAmbient = float4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primaryColor = clamp(globalAmbient + diffuseSum, 0.0, 1.0); + secondaryColor = clamp(specularSum, 0.0, 1.0); +} + +float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { + return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); +} + +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d_array texLightingLut [[texture(3)]], texture1d_array texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { + Globals globals; + + // HACK + //globals.lightingEnabled = picaRegs.read(0x008Fu) != 0u; + //globals.lightingNumLights = picaRegs.read(0x01C2u); + //globals.lightingConfig1 = picaRegs.read(0x01C4u); + //globals.alphaControl = picaRegs.read(0x104); + + globals.tevSources[0] = in.color; + if (lightingEnabled) { + calcLighting(globals, in, picaRegs, texLightingLut, lutSlices.x, linearSampler, globals.tevSources[1], globals.tevSources[2]); + } else { + globals.tevSources[1] = float4(0.0); + globals.tevSources[2] = float4(0.0); + } + + uint textureConfig = picaRegs.read(0x80u); + float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2; + + if ((textureConfig & 1u) != 0u) globals.tevSources[3] = tex0.sample(samplr0, in.texCoord0.xy); + if ((textureConfig & 2u) != 0u) globals.tevSources[4] = tex1.sample(samplr1, in.texCoord1); + if ((textureConfig & 4u) != 0u) globals.tevSources[5] = tex2.sample(samplr2, texCoord2); + globals.tevSources[13] = float4(0.0); // Previous buffer + globals.tevSources[15] = in.color; // Previous combiner + + globals.tevNextPreviousBuffer = in.textureEnvBufferColor; + uint textureEnvUpdateBuffer = picaRegs.read(0xE0u); + + for (int i = 0; i < 6; i++) { + globals.tevSources[14] = in.textureEnvColor[i]; // Constant color + globals.tevSources[15] = tev.calculateCombiner(globals, i); + globals.tevSources[13] = globals.tevNextPreviousBuffer; + + if (i < 4) { + if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { + globals.tevNextPreviousBuffer.rgb = globals.tevSources[15].rgb; + } + + if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { + globals.tevNextPreviousBuffer.a = globals.tevSources[15].a; + } + } + } + + float4 color = globals.tevSources[15]; + + // Fog + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flipDepth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fogIndex = flipDepth ? 1.0 - in.position.z : in.position.z; + fogIndex *= 128.0; + float clampedIndex = clamp(floor(fogIndex), 0.0, 127.0); + float delta = fogIndex - clampedIndex; + float2 value = texFogLut.read(clampedIndex, lutSlices.y).rg; + float fogFactor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; + float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; + float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; + float3 fogColor = float3(r, g, b); + + color.rgb = mix(fogColor, color.rgb, fogFactor); + } + + // Perform alpha test + if ((alphaControl & 1u) != 0u) { // Check if alpha test is on + uint func = (alphaControl >> 4u) & 7u; + float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; + float alpha = color.a; + + switch (func) { + case 0u: discard_fragment(); // Never pass alpha test + case 1u: break; // Always pass alpha test + case 2u: // Pass if equal + if (alpha != reference) discard_fragment(); + break; + case 3u: // Pass if not equal + if (alpha == reference) discard_fragment(); + break; + case 4u: // Pass if less than + if (alpha >= reference) discard_fragment(); + break; + case 5u: // Pass if less than or equal + if (alpha > reference) discard_fragment(); + break; + case 6u: // Pass if greater than + if (alpha <= reference) discard_fragment(); + break; + case 7u: // Pass if greater than or equal + if (alpha < reference) discard_fragment(); + break; + } + } + + return performLogicOp(logicOp, color, prevColor); +} diff --git a/src/host_shaders/opengl_es_display.frag b/src/host_shaders/opengl_es_display.frag new file mode 100644 index 00000000..600ebfcd --- /dev/null +++ b/src/host_shaders/opengl_es_display.frag @@ -0,0 +1,10 @@ +#version 310 es +precision mediump float; + +in vec2 UV; +out vec4 FragColor; + +uniform sampler2D u_texture; +void main() { + FragColor = texture(u_texture, UV); +} \ No newline at end of file diff --git a/src/host_shaders/opengl_es_display.vert b/src/host_shaders/opengl_es_display.vert new file mode 100644 index 00000000..04fadfc6 --- /dev/null +++ b/src/host_shaders/opengl_es_display.vert @@ -0,0 +1,25 @@ +#version 310 es +precision mediump float; + +out vec2 UV; + +void main() { + const vec4 positions[4] = vec4[]( + vec4(-1.0, 1.0, 1.0, 1.0), // Top-left + vec4(1.0, 1.0, 1.0, 1.0), // Top-right + vec4(-1.0, -1.0, 1.0, 1.0), // Bottom-left + vec4(1.0, -1.0, 1.0, 1.0) // Bottom-right + ); + + // The 3DS displays both screens' framebuffer rotated 90 deg counter clockwise + // So we adjust our texcoords accordingly + const vec2 texcoords[4] = vec2[]( + vec2(1.0, 1.0), // Top-right + vec2(1.0, 0.0), // Bottom-right + vec2(0.0, 1.0), // Top-left + vec2(0.0, 0.0) // Bottom-left + ); + + gl_Position = positions[gl_VertexID]; + UV = texcoords[gl_VertexID]; +} \ No newline at end of file diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index f6fa6c55..9f07df0b 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,8 +1,6 @@ #version 410 core -in vec3 v_tangent; -in vec3 v_normal; -in vec3 v_bitangent; +in vec4 v_quaternion; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; @@ -27,7 +25,7 @@ uniform bool u_depthmapEnable; uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; -uniform sampler1DArray u_tex_lighting_lut; +uniform sampler2D u_tex_luts; uniform uint u_picaRegs[0x200 - 0x48]; @@ -37,6 +35,16 @@ uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48u]; } vec4 tevSources[16]; vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; +vec3 normal; + +// See docs/lighting.md +const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu); + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; +} // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): // https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml @@ -110,7 +118,7 @@ vec4 tevCalculateCombiner(int tev_id) { case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add - case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply + case 9u: result.rgb = min(source0.rgb + source1.rgb, 1.0) * source2.rgb; break; // Add then multiply default: break; } @@ -125,7 +133,7 @@ vec4 tevCalculateCombiner(int tev_id) { case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add - case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply + case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply default: break; } } @@ -144,10 +152,18 @@ vec4 tevCalculateCombiner(int tev_id) { #define RG_LUT 5u #define RR_LUT 6u -float lutLookup(uint lut, uint light, float value) { - if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; - if (lut == SP_LUT) lut = light + 8; - return texture(u_tex_lighting_lut, vec2(value, lut)).r; +#define FOG_INDEX 24 + +uint GPUREG_LIGHTi_CONFIG; +uint GPUREG_LIGHTING_CONFIG1; +uint GPUREG_LIGHTING_LUTINPUT_SELECT; +uint GPUREG_LIGHTING_LUTINPUT_SCALE; +uint GPUREG_LIGHTING_LUTINPUT_ABS; +bool error_unimpl = false; +vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); + +float lutLookup(uint lut, int index) { + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; } vec3 regToColor(uint reg) { @@ -178,136 +194,179 @@ float decodeFP(uint hex, uint E, uint M) { return uintBitsToFloat(hex); } +float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) { + uint lut_index; + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + error_unimpl = true; + } + + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(v_view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(normal, normalize(v_view)); + break; + } + case 3u: { + delta = dot(light_vector, normal); + break; + } + case 4u: { + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; + vec3 spotlight_vector = vec3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + error_unimpl = true; + break; + } + default: { + delta = 1.0; + error_unimpl = true; + break; + } + } + + // 0 = enabled + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); + return lutLookup(lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(lut_index, index) * scale; + } +} + +vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - // Quaternions describe a transformation from surface-local space to eye space. - // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), - // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). - vec3 normal = normalize(v_normal); - vec3 tangent = normalize(v_tangent); - vec3 bitangent = normalize(v_bitangent); - vec3 view = normalize(v_view); - uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } - uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u; uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u); primary_color = vec4(vec3(0.0), 1.0); secondary_color = vec4(vec3(0.0), 1.0); - primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); - - uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); - uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u); - uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); - uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); - float d[7]; + GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); + GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); - bool error_unimpl = false; + uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bump_mode) { + default: { + normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion); + break; + } + } + + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + + uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); + light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); - uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + 0x10u * light_id); - uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id); - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + (light_id << 4u)); + uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + (light_id << 4u)); + GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + (light_id << 4u)); - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); + light_vector = light_position + v_view; } // Directional light else { - half_vector = normalize(normalize(light_vector) + view); + light_vector = light_position; } - for (int c = 0; c < 7; c++) { - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; + light_distance = length(light_vector); + light_vector = normalize(light_vector); + half_vector = light_vector + normalize(v_view); - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) - d[c] = dot(view, half_vector); - else if (input_id == 2u) - d[c] = dot(normal, view); - else if (input_id == 3u) - d[c] = dot(light_vector, normal); - else if (input_id == 4u) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { - d[c] = 1.0; // TODO: cos (aka CP); - error_unimpl = true; - } else { - d[c] = 1.0; - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; - if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - - uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 1u) { - d[D0_LUT] = 0.0; - d[D1_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 2u) { - d[FR_LUT] = 0.0; - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 3u) { - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0; - } else if (lookup_config == 4u) { - d[FR_LUT] = 0.0; - } else if (lookup_config == 5u) { - d[D1_LUT] = 0.0; - } else if (lookup_config == 6u) { - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } - - float distance_factor = 1.0; // a - float indirect_factor = 1.0; // fi - float shadow_factor = 1.0; // o - - float NdotL = dot(normal, light_vector); // Li dot N + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -315,20 +374,86 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { else NdotL = abs(NdotL); - float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor; + float geometric_factor; + bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); + } - primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + float distance_attenuation = 1.0; + if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = light_distance * distance_attenuation_scale + distance_attenuation_bias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distance_attenuation = lutLookup(16u + light_id, index); + } + + float spotlight_attenuation = lightLutLookup(environment_id, SP_LUT, light_id, light_vector, half_vector); + float specular0_distribution = lightLutLookup(environment_id, D0_LUT, light_id, light_vector, half_vector); + float specular1_distribution = lightLutLookup(environment_id, D1_LUT, light_id, light_vector, half_vector); + vec3 reflected_color; + reflected_color.r = lightLutLookup(environment_id, RR_LUT, light_id, light_vector, half_vector); + + if (isSamplerEnabled(environment_id, RG_LUT)) { + reflected_color.g = lightLutLookup(environment_id, RG_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.g = reflected_color.r; + } + + if (isSamplerEnabled(environment_id, RB_LUT)) { + reflected_color.b = lightLutLookup(environment_id, RB_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.b = reflected_color.r; + } + + vec3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0_distribution; + vec3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1_distribution * reflected_color; + + specular0 *= use_geo_0 ? geometric_factor : 1.0; + specular1 *= use_geo_1 ? geometric_factor : 1.0; + + float clamp_factor = 1.0; + if (clamp_highlights && NdotL == 0.0) { + clamp_factor = 0.0; + } + + float light_factor = distance_attenuation * spotlight_attenuation; + diffuse_sum.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + + if (fresnel_output1 == 1u || fresnel_output2 == 1u) { + fresnel_factor = lightLutLookup(environment_id, FR_LUT, light_id, light_vector, half_vector); + } + + if (fresnel_output1 == 1u) { + diffuse_sum.a = fresnel_factor; + } - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + if (fresnel_output2 == 1u) { + specular_sum.a = fresnel_factor; + } + + uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); + vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primary_color = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); + secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0)); if (error_unimpl) { - // secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0); + // secondary_color = primary_color = unimpl_color; } } @@ -371,7 +496,7 @@ void main() { if (tevUnimplementedSourceFlag) { // fragColour = vec4(1.0, 0.0, 1.0, 1.0); } - // fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; + // fragColour.rg = texture(u_tex_luts,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] @@ -384,6 +509,28 @@ void main() { // Write final fragment depth gl_FragDepth = depth; + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fog_index = flip_depth ? 1.0 - depth : depth; + fog_index *= 128.0; + float clamped_index = clamp(floor(fog_index), 0.0, 127.0); + float delta = fog_index - clamped_index; + vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), FOG_INDEX), 0).rg; + float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = readPicaReg(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = float(GPUREG_FOG_COLOR & 0xFFu); + float g = float((GPUREG_FOG_COLOR >> 8u) & 0xFFu); + float b = float((GPUREG_FOG_COLOR >> 16u) & 0xFFu); + vec3 fog_color = (1.0 / 255.0) * vec3(r, g, b); + + fragColour.rgb = mix(fog_color, fragColour.rgb, fog_factor); + } + // Perform alpha test uint alphaControl = readPicaReg(0x104u); if ((alphaControl & 1u) != 0u) { // Check if alpha test is on diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert index a25d7a6d..057f9a88 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -9,9 +9,7 @@ layout(location = 5) in float a_texcoord0_w; layout(location = 6) in vec3 a_view; layout(location = 7) in vec2 a_texcoord2; -out vec3 v_normal; -out vec3 v_tangent; -out vec3 v_bitangent; +out vec4 v_quaternion; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; @@ -35,12 +33,6 @@ vec4 abgr8888ToVec4(uint abgr) { return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); } -vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { - vec3 u = q.xyz; - float s = q.w; - return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); -} - // Convert an arbitrary-width floating point literal to an f32 float decodeFP(uint hex, uint E, uint M) { uint width = M + E + 1u; @@ -73,10 +65,6 @@ void main() { v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_view = a_view; - v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); - v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); - v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); - for (int i = 0; i < 6; i++) { v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } @@ -95,4 +83,6 @@ void main() { // There's also another, always-on clipping plane based on vertex z gl_ClipDistance[0] = -a_coords.z; gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } diff --git a/src/hydra_core.cpp b/src/hydra_core.cpp index acbf30a8..0bcd21a8 100644 --- a/src/hydra_core.cpp +++ b/src/hydra_core.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -113,10 +114,11 @@ hydra::Size HydraCore::getNativeSize() { return {400, 480}; } void HydraCore::setOutputSize(hydra::Size size) {} void HydraCore::resetContext() { -#ifdef __ANDROID__ +#ifdef USING_GLES if (!gladLoadGLES2Loader(reinterpret_cast(getProcAddress))) { Helpers::panic("OpenGL ES init failed"); } + emulator->getRenderer()->setupGLES(); #else if (!gladLoadGLLoader(reinterpret_cast(getProcAddress))) { Helpers::panic("OpenGL init failed"); @@ -150,7 +152,7 @@ HC_API const char* getInfo(hydra::InfoType type) { case hydra::InfoType::SystemName: return "Nintendo 3DS"; case hydra::InfoType::Description: return "HLE 3DS emulator. There's a little Alber in your computer and he runs Nintendo 3DS games."; case hydra::InfoType::Author: return "wheremyfoodat (Peach)"; - case hydra::InfoType::Version: return "0.7"; + case hydra::InfoType::Version: return PANDA3DS_VERSION; case hydra::InfoType::License: return "GPLv3"; case hydra::InfoType::Website: return "https://panda3ds.com/"; case hydra::InfoType::Extensions: return "3ds,cci,cxi,app,3dsx,elf,axf"; diff --git a/src/jni_driver.cpp b/src/jni_driver.cpp index e4ce2b39..6a156360 100644 --- a/src/jni_driver.cpp +++ b/src/jni_driver.cpp @@ -8,6 +8,7 @@ #include "renderer_gl/renderer_gl.hpp" #include "services/hid.hpp" #include "android_utils.hpp" +#include "sdl_sensors.hpp" std::unique_ptr emulator = nullptr; HIDService* hidService = nullptr; @@ -43,9 +44,15 @@ extern "C" { AlberFunction(void, functionName) (JNIEnv* env, jobject obj, type value) { emulator->getConfig().settingName = value; } MAKE_SETTING(setShaderJitEnabled, jboolean, shaderJitEnabled) +MAKE_SETTING(setAccurateShaderMulEnable, jboolean, accurateShaderMul) #undef MAKE_SETTING +AlberFunction(void, setAudioEnabled)(JNIEnv* env, jobject obj, jboolean value) { + emulator->getConfig().audioEnabled = value; + emulator->setAudioEnabled(value); +} + AlberFunction(void, Setup)(JNIEnv* env, jobject obj) { env->GetJavaVM(&jvm); @@ -71,6 +78,7 @@ AlberFunction(void, Initialize)(JNIEnv* env, jobject obj) { } __android_log_print(ANDROID_LOG_INFO, "AlberDriver", "OpenGL ES %d.%d", GLVersion.major, GLVersion.minor); + emulator->getRenderer()->setupGLES(); emulator->initGraphicsContext(nullptr); } @@ -87,6 +95,7 @@ AlberFunction(void, Finalize)(JNIEnv* env, jobject obj) { emulator = nullptr; hidService = nullptr; renderer = nullptr; + romLoaded = false; } AlberFunction(jboolean, HasRomLoaded)(JNIEnv* env, jobject obj) { return romLoaded; } @@ -110,6 +119,19 @@ AlberFunction(void, TouchScreenUp)(JNIEnv* env, jobject obj) { hidService->relea AlberFunction(void, KeyUp)(JNIEnv* env, jobject obj, jint keyCode) { hidService->releaseKey((u32)keyCode); } AlberFunction(void, KeyDown)(JNIEnv* env, jobject obj, jint keyCode) { hidService->pressKey((u32)keyCode); } +AlberFunction(void, SetGyro)(JNIEnv* env, jobject obj, jfloat roll, jfloat pitch, jfloat yaw) { + auto rotation = Sensors::SDL::convertRotation({ float(roll), float(pitch), float(yaw) }); + hidService->setPitch(s16(rotation.x)); + hidService->setRoll(s16(rotation.y)); + hidService->setYaw(s16(rotation.z)); +} + +AlberFunction(void, SetAccel)(JNIEnv* env, jobject obj, jfloat rawX, jfloat rawY, jfloat rawZ) { + float data[3] = { float(rawX), float(rawY), float(rawZ) }; + auto accel = Sensors::SDL::convertAcceleration(data); + hidService->setAccel(accel.x, accel.y, accel.z); +} + AlberFunction(void, SetCirclepadAxis)(JNIEnv* env, jobject obj, jint x, jint y) { hidService->setCirclepadX((s16)x); hidService->setCirclepadY((s16)y); @@ -132,11 +154,10 @@ int AndroidUtils::openDocument(const char* path, const char* perms) { jstring uri = env->NewStringUTF(path); jstring jmode = env->NewStringUTF(perms); - jint result = env->CallStaticIntMethod(alberClass, alberClassOpenDocument, uri, jmode); env->DeleteLocalRef(uri); env->DeleteLocalRef(jmode); return (int)result; -} \ No newline at end of file +} diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index f9772b37..727da8d2 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -1,20 +1,25 @@ #include #include +#include #include +#include #include #include -static retro_environment_t envCallbacks; -static retro_video_refresh_t videoCallbacks; +static retro_environment_t envCallback; +static retro_video_refresh_t videoCallback; static retro_audio_sample_batch_t audioBatchCallback; static retro_input_poll_t inputPollCallback; static retro_input_state_t inputStateCallback; -static retro_hw_render_callback hw_render; +static retro_hw_render_callback hwRender; static std::filesystem::path savePath; +static bool screenTouched = false; +static bool usingGLES = false; + std::unique_ptr emulator; RendererGL* renderer; @@ -26,49 +31,54 @@ std::filesystem::path Emulator::getAppDataRoot() { return std::filesystem::path(savePath / "Emulator Files"); } -static void* GetGLProcAddress(const char* name) { - return (void*)hw_render.get_proc_address(name); +static void* getGLProcAddress(const char* name) { + return (void*)hwRender.get_proc_address(name); } -static void VideoResetContext() { -#ifdef USING_GLES - if (!gladLoadGLES2Loader(reinterpret_cast(GetGLProcAddress))) { - Helpers::panic("OpenGL ES init failed"); +static void videoResetContext() { + if (usingGLES) { + if (!gladLoadGLES2Loader(reinterpret_cast(getGLProcAddress))) { + Helpers::panic("OpenGL ES init failed"); + } + + emulator->getRenderer()->setupGLES(); } -#else - if (!gladLoadGLLoader(reinterpret_cast(GetGLProcAddress))) { - Helpers::panic("OpenGL init failed"); + + else { + if (!gladLoadGLLoader(reinterpret_cast(getGLProcAddress))) { + Helpers::panic("OpenGL init failed"); + } } -#endif emulator->initGraphicsContext(nullptr); } -static void VideoDestroyContext() { - emulator->deinitGraphicsContext(); +static void videoDestroyContext() { + emulator->deinitGraphicsContext(); } -static bool SetHWRender(retro_hw_context_type type) { - hw_render.context_type = type; - hw_render.context_reset = VideoResetContext; - hw_render.context_destroy = VideoDestroyContext; - hw_render.bottom_left_origin = true; +static bool setHWRender(retro_hw_context_type type) { + hwRender.context_type = type; + hwRender.context_reset = videoResetContext; + hwRender.context_destroy = videoDestroyContext; + hwRender.bottom_left_origin = true; switch (type) { case RETRO_HW_CONTEXT_OPENGL_CORE: - hw_render.version_major = 4; - hw_render.version_minor = 1; + hwRender.version_major = 4; + hwRender.version_minor = 1; - if (envCallbacks(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { + if (envCallback(RETRO_ENVIRONMENT_SET_HW_RENDER, &hwRender)) { return true; } break; case RETRO_HW_CONTEXT_OPENGLES3: case RETRO_HW_CONTEXT_OPENGL: - hw_render.version_major = 3; - hw_render.version_minor = 1; + hwRender.version_major = 3; + hwRender.version_minor = 1; - if (envCallbacks(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { + if (envCallback(RETRO_ENVIRONMENT_SET_HW_RENDER, &hwRender)) { + usingGLES = true; return true; } break; @@ -80,18 +90,18 @@ static bool SetHWRender(retro_hw_context_type type) { static void videoInit() { retro_hw_context_type preferred = RETRO_HW_CONTEXT_NONE; - envCallbacks(RETRO_ENVIRONMENT_GET_PREFERRED_HW_RENDER, &preferred); + envCallback(RETRO_ENVIRONMENT_GET_PREFERRED_HW_RENDER, &preferred); - if (preferred && SetHWRender(preferred)) return; - if (SetHWRender(RETRO_HW_CONTEXT_OPENGL_CORE)) return; - if (SetHWRender(RETRO_HW_CONTEXT_OPENGL)) return; - if (SetHWRender(RETRO_HW_CONTEXT_OPENGLES3)) return; + if (preferred && setHWRender(preferred)) return; + if (setHWRender(RETRO_HW_CONTEXT_OPENGL_CORE)) return; + if (setHWRender(RETRO_HW_CONTEXT_OPENGL)) return; + if (setHWRender(RETRO_HW_CONTEXT_OPENGLES3)) return; - hw_render.context_type = RETRO_HW_CONTEXT_NONE; + hwRender.context_type = RETRO_HW_CONTEXT_NONE; } -static bool GetButtonState(uint id) { return inputStateCallback(0, RETRO_DEVICE_JOYPAD, 0, id); } -static float GetAxisState(uint index, uint id) { return inputStateCallback(0, RETRO_DEVICE_ANALOG, index, id); } +static bool getButtonState(uint id) { return inputStateCallback(0, RETRO_DEVICE_JOYPAD, 0, id); } +static float getAxisState(uint index, uint id) { return inputStateCallback(0, RETRO_DEVICE_ANALOG, index, id); } static void inputInit() { static const retro_controller_description controllers[] = { @@ -104,7 +114,7 @@ static void inputInit() { {NULL, 0}, }; - envCallbacks(RETRO_ENVIRONMENT_SET_CONTROLLER_INFO, (void*)ports); + envCallback(RETRO_ENVIRONMENT_SET_CONTROLLER_INFO, (void*)ports); retro_input_descriptor desc[] = { {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "Left"}, @@ -124,14 +134,14 @@ static void inputInit() { {0}, }; - envCallbacks(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, &desc); + envCallback(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, &desc); } -static std::string FetchVariable(std::string key, std::string def) { +static std::string fetchVariable(std::string key, std::string def) { retro_variable var = {nullptr}; var.key = key.c_str(); - if (!envCallbacks(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value == nullptr) { + if (!envCallback(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value == nullptr) { Helpers::warn("Fetching variable %s failed.", key.c_str()); return def; } @@ -139,46 +149,86 @@ static std::string FetchVariable(std::string key, std::string def) { return std::string(var.value); } -static bool FetchVariableBool(std::string key, bool def) { - return FetchVariable(key, def ? "enabled" : "disabled") == "enabled"; +static int fetchVariableInt(std::string key, int def) { + std::string value = fetchVariable(key, std::to_string(def)); + + if (!value.empty() && std::isdigit(value[0])) { + return std::stoi(value); + } + + return 0; +} + +static bool fetchVariableBool(std::string key, bool def) { + return fetchVariable(key, def ? "enabled" : "disabled") == "enabled"; +} + +static int fetchVariableRange(std::string key, int min, int max) { + return std::clamp(fetchVariableInt(key, min), min, max); } static void configInit() { static const retro_variable values[] = { - {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, + {"panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault ? "Enable shader JIT; enabled|disabled" : "Enable shader JIT; disabled|enabled"}, + {"panda3ds_accelerate_shaders", + EmulatorConfig::accelerateShadersDefault ? "Run 3DS shaders on the GPU; enabled|disabled" : "Run 3DS shaders on the GPU; disabled|enabled"}, + {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, + {"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled" + : "Use ubershaders (No stutter, maybe slower); disabled|enabled"}, {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, - {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, - {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, + {"panda3ds_system_language", "System language; En|Fr|Es|De|It|Pt|Nl|Ru|Ja|Zh|Ko|Tw"}, + {"panda3ds_dsp_emulation", "DSP emulation; HLE|LLE|Null"}, + {"panda3ds_use_audio", EmulatorConfig::audioEnabledDefault ? "Enable audio; enabled|disabled" : "Enable audio; disabled|enabled"}, + {"panda3ds_audio_volume", "Audio volume; 100|0|10|20|40|60|80|90|100|120|140|150|180|200"}, + {"panda3ds_mute_audio", "Mute audio; disabled|enabled"}, + {"panda3ds_enable_aac", "Enable AAC audio; enabled|disabled"}, + + {"panda3ds_ubershader_lighting_override", "Force shadergen when rendering lights; enabled|disabled"}, + {"panda3ds_ubershader_lighting_override_threshold", "Light threshold for forcing shadergen; 1|2|3|4|5|6|7|8"}, {"panda3ds_use_virtual_sd", "Enable virtual SD card; enabled|disabled"}, {"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"}, {"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"}, {"panda3ds_use_charger", "Charger plugged; enabled|disabled"}, - {nullptr, nullptr} + {nullptr, nullptr}, }; - envCallbacks(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)values); + envCallback(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)values); } static void configUpdate() { EmulatorConfig& config = emulator->getConfig(); config.rendererType = RendererType::OpenGL; - config.vsyncEnabled = FetchVariableBool("panda3ds_use_vsync", true); - config.shaderJitEnabled = FetchVariableBool("panda3ds_use_shader_jit", true); - config.chargerPlugged = FetchVariableBool("panda3ds_use_charger", true); - config.batteryPercentage = std::clamp(std::stoi(FetchVariable("panda3ds_battery_level", "5")), 0, 100); - config.dspType = Audio::DSPCore::typeFromString(FetchVariable("panda3ds_dsp_emulation", "null")); - config.audioEnabled = FetchVariableBool("panda3ds_use_audio", false); - config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true); - config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); + config.vsyncEnabled = fetchVariableBool("panda3ds_use_vsync", true); + config.shaderJitEnabled = fetchVariableBool("panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault); + config.chargerPlugged = fetchVariableBool("panda3ds_use_charger", true); + config.batteryPercentage = fetchVariableRange("panda3ds_battery_level", 5, 100); + config.systemLanguage = EmulatorConfig::languageCodeFromString(fetchVariable("panda3ds_system_language", "en")); + + config.dspType = Audio::DSPCore::typeFromString(fetchVariable("panda3ds_dsp_emulation", "null")); + config.audioEnabled = fetchVariableBool("panda3ds_use_audio", false); + config.aacEnabled = fetchVariableBool("panda3ds_enable_aac", true); + config.audioDeviceConfig.muteAudio = fetchVariableBool("panda3ds_mute_audio", false); + config.audioDeviceConfig.volumeRaw = float(fetchVariableRange("panda3ds_audio_volume", 0, 200)) / 100.0f; + + config.sdCardInserted = fetchVariableBool("panda3ds_use_virtual_sd", true); + config.sdWriteProtected = fetchVariableBool("panda3ds_write_protect_virtual_sd", false); + config.accurateShaderMul = fetchVariableBool("panda3ds_accurate_shader_mul", false); + config.useUbershaders = fetchVariableBool("panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault); + config.accelerateShaders = fetchVariableBool("panda3ds_accelerate_shaders", EmulatorConfig::accelerateShadersDefault); + + config.forceShadergenForLights = fetchVariableBool("panda3ds_ubershader_lighting_override", true); + config.lightShadergenThreshold = fetchVariableRange("panda3ds_ubershader_lighting_override_threshold", 1, 8); config.discordRpcEnabled = false; + // Handle any settings that might need the emulator core to be notified when they're changed, and save the config. + emulator->setAudioEnabled(config.audioEnabled); config.save(); } -static void ConfigCheckVariables() { +static void configCheckVariables() { bool updated = false; - envCallbacks(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated); + envCallback(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated); if (updated) { configUpdate(); @@ -188,9 +238,9 @@ static void ConfigCheckVariables() { void retro_get_system_info(retro_system_info* info) { info->need_fullpath = true; info->valid_extensions = "3ds|3dsx|elf|axf|cci|cxi|app"; - info->library_version = "0.8"; + info->library_version = PANDA3DS_VERSION; info->library_name = "Panda3DS"; - info->block_extract = true; + info->block_extract = false; } void retro_get_system_av_info(retro_system_av_info* info) { @@ -206,11 +256,11 @@ void retro_get_system_av_info(retro_system_av_info* info) { } void retro_set_environment(retro_environment_t cb) { - envCallbacks = cb; + envCallback = cb; } void retro_set_video_refresh(retro_video_refresh_t cb) { - videoCallbacks = cb; + videoCallback = cb; } void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { @@ -229,15 +279,15 @@ void retro_set_input_state(retro_input_state_t cb) { void retro_init() { enum retro_pixel_format xrgb888 = RETRO_PIXEL_FORMAT_XRGB8888; - envCallbacks(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &xrgb888); + envCallback(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &xrgb888); - char* save_dir = nullptr; + char* saveDir = nullptr; - if (!envCallbacks(RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY, &save_dir) || save_dir == nullptr) { + if (!envCallback(RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY, &saveDir) || saveDir == nullptr) { Helpers::warn("No save directory provided by LibRetro."); savePath = std::filesystem::current_path(); } else { - savePath = std::filesystem::path(save_dir); + savePath = std::filesystem::path(saveDir); } emulator = std::make_unique(); @@ -276,36 +326,37 @@ void retro_reset() { } void retro_run() { - ConfigCheckVariables(); + configCheckVariables(); - renderer->setFBO(hw_render.get_current_framebuffer()); + renderer->setFBO(hwRender.get_current_framebuffer()); renderer->resetStateManager(); inputPollCallback(); HIDService& hid = emulator->getServiceManager().getHID(); - hid.setKey(HID::Keys::A, GetButtonState(RETRO_DEVICE_ID_JOYPAD_A)); - hid.setKey(HID::Keys::B, GetButtonState(RETRO_DEVICE_ID_JOYPAD_B)); - hid.setKey(HID::Keys::X, GetButtonState(RETRO_DEVICE_ID_JOYPAD_X)); - hid.setKey(HID::Keys::Y, GetButtonState(RETRO_DEVICE_ID_JOYPAD_Y)); - hid.setKey(HID::Keys::L, GetButtonState(RETRO_DEVICE_ID_JOYPAD_L)); - hid.setKey(HID::Keys::R, GetButtonState(RETRO_DEVICE_ID_JOYPAD_R)); - hid.setKey(HID::Keys::Start, GetButtonState(RETRO_DEVICE_ID_JOYPAD_START)); - hid.setKey(HID::Keys::Select, GetButtonState(RETRO_DEVICE_ID_JOYPAD_SELECT)); - hid.setKey(HID::Keys::Up, GetButtonState(RETRO_DEVICE_ID_JOYPAD_UP)); - hid.setKey(HID::Keys::Down, GetButtonState(RETRO_DEVICE_ID_JOYPAD_DOWN)); - hid.setKey(HID::Keys::Left, GetButtonState(RETRO_DEVICE_ID_JOYPAD_LEFT)); - hid.setKey(HID::Keys::Right, GetButtonState(RETRO_DEVICE_ID_JOYPAD_RIGHT)); + hid.setKey(HID::Keys::A, getButtonState(RETRO_DEVICE_ID_JOYPAD_A)); + hid.setKey(HID::Keys::B, getButtonState(RETRO_DEVICE_ID_JOYPAD_B)); + hid.setKey(HID::Keys::X, getButtonState(RETRO_DEVICE_ID_JOYPAD_X)); + hid.setKey(HID::Keys::Y, getButtonState(RETRO_DEVICE_ID_JOYPAD_Y)); + hid.setKey(HID::Keys::L, getButtonState(RETRO_DEVICE_ID_JOYPAD_L)); + hid.setKey(HID::Keys::R, getButtonState(RETRO_DEVICE_ID_JOYPAD_R)); + hid.setKey(HID::Keys::Start, getButtonState(RETRO_DEVICE_ID_JOYPAD_START)); + hid.setKey(HID::Keys::Select, getButtonState(RETRO_DEVICE_ID_JOYPAD_SELECT)); + hid.setKey(HID::Keys::Up, getButtonState(RETRO_DEVICE_ID_JOYPAD_UP)); + hid.setKey(HID::Keys::Down, getButtonState(RETRO_DEVICE_ID_JOYPAD_DOWN)); + hid.setKey(HID::Keys::Left, getButtonState(RETRO_DEVICE_ID_JOYPAD_LEFT)); + hid.setKey(HID::Keys::Right, getButtonState(RETRO_DEVICE_ID_JOYPAD_RIGHT)); // Get analog values for the left analog stick (Right analog stick is N3DS-only and unimplemented) - float xLeft = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X); - float yLeft = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y); + float xLeft = getAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X); + float yLeft = getAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y); hid.setCirclepadX((xLeft / +32767) * 0x9C); hid.setCirclepadY((yLeft / -32767) * 0x9C); - bool touch = inputStateCallback(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT); + bool touchScreen = false; + const int posX = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X); const int posY = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_Y); @@ -315,22 +366,29 @@ void retro_run() { const int offsetX = 40; const int offsetY = emulator->height / 2; - const bool inScreenX = newX >= offsetX && newX < emulator->width - offsetX; + const bool inScreenX = newX >= offsetX && newX <= emulator->width - offsetX; const bool inScreenY = newY >= offsetY && newY <= emulator->height; - if (touch && inScreenX && inScreenY) { + if (inScreenX && inScreenY) { + touchScreen |= inputStateCallback(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT); + touchScreen |= inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_PRESSED); + } + + if (touchScreen) { u16 x = static_cast(newX - offsetX); u16 y = static_cast(newY - offsetY); hid.setTouchScreenPress(x, y); - } else { + screenTouched = true; + } else if (screenTouched) { hid.releaseTouchScreen(); + screenTouched = false; } hid.updateInputs(emulator->getTicks()); emulator->runFrame(); - videoCallbacks(RETRO_HW_FRAME_BUFFER_VALID, emulator->width, emulator->height, 0); + videoCallback(RETRO_HW_FRAME_BUFFER_VALID, emulator->width, emulator->height, 0); } void retro_set_controller_port_device(uint port, uint device) {} @@ -348,7 +406,7 @@ uint retro_api_version() { return RETRO_API_VERSION; } usize retro_get_memory_size(uint id) { if (id == RETRO_MEMORY_SYSTEM_RAM) { - return 0; + return Memory::FCRAM_SIZE; } return 0; @@ -356,11 +414,30 @@ usize retro_get_memory_size(uint id) { void* retro_get_memory_data(uint id) { if (id == RETRO_MEMORY_SYSTEM_RAM) { - return 0; + return emulator->getMemory().getFCRAM(); } return nullptr; } -void retro_cheat_set(uint index, bool enabled, const char* code) {} -void retro_cheat_reset() {} +void retro_cheat_set(uint index, bool enabled, const char* code) { + std::string cheatCode = std::regex_replace(code, std::regex("[^0-9a-fA-F]"), ""); + std::vector bytes; + + for (usize i = 0; i < cheatCode.size(); i += 2) { + std::string hex = cheatCode.substr(i, 2); + bytes.push_back((u8)std::stoul(hex, nullptr, 16)); + } + + u32 id = emulator->getCheats().addCheat(bytes.data(), bytes.size()); + + if (enabled) { + emulator->getCheats().enableCheat(id); + } else { + emulator->getCheats().disableCheat(id); + } +} + +void retro_cheat_reset() { + emulator->getCheats().reset(); +} diff --git a/src/lua.cpp b/src/lua.cpp index c5b86624..d56e23e5 100644 --- a/src/lua.cpp +++ b/src/lua.cpp @@ -133,6 +133,32 @@ MAKE_MEMORY_FUNCTIONS(32) MAKE_MEMORY_FUNCTIONS(64) #undef MAKE_MEMORY_FUNCTIONS +static int readFloatThunk(lua_State* L) { + const u32 vaddr = (u32)lua_tonumber(L, 1); + lua_pushnumber(L, (lua_Number)Helpers::bit_cast(LuaManager::g_emulator->getMemory().read32(vaddr))); + return 1; +} + +static int writeFloatThunk(lua_State* L) { + const u32 vaddr = (u32)lua_tonumber(L, 1); + const float value = (float)lua_tonumber(L, 2); + LuaManager::g_emulator->getMemory().write32(vaddr, Helpers::bit_cast(value)); + return 0; +} + +static int readDoubleThunk(lua_State* L) { + const u32 vaddr = (u32)lua_tonumber(L, 1); + lua_pushnumber(L, (lua_Number)Helpers::bit_cast(LuaManager::g_emulator->getMemory().read64(vaddr))); + return 1; +} + +static int writeDoubleThunk(lua_State* L) { + const u32 vaddr = (u32)lua_tonumber(L, 1); + const double value = (double)lua_tonumber(L, 2); + LuaManager::g_emulator->getMemory().write64(vaddr, Helpers::bit_cast(value)); + return 0; +} + static int getAppIDThunk(lua_State* L) { std::optional id = LuaManager::g_emulator->getMemory().getProgramID(); @@ -325,10 +351,14 @@ static constexpr luaL_Reg functions[] = { { "__read16", read16Thunk }, { "__read32", read32Thunk }, { "__read64", read64Thunk }, + { "__readFloat", readFloatThunk }, + { "__readDouble", readDoubleThunk }, { "__write8", write8Thunk} , { "__write16", write16Thunk }, { "__write32", write32Thunk }, { "__write64", write64Thunk }, + { "__writeFloat", writeFloatThunk }, + { "__writeDouble", writeDoubleThunk }, { "__getAppID", getAppIDThunk }, { "__pause", pauseThunk }, { "__resume", resumeThunk }, @@ -350,10 +380,15 @@ void LuaManager::initializeThunks() { read16 = function(addr) return GLOBALS.__read16(addr) end, read32 = function(addr) return GLOBALS.__read32(addr) end, read64 = function(addr) return GLOBALS.__read64(addr) end, + readFloat = function(addr) return GLOBALS.__readFloat(addr) end, + readDouble = function(addr) return GLOBALS.__readDouble(addr) end, + write8 = function(addr, value) GLOBALS.__write8(addr, value) end, write16 = function(addr, value) GLOBALS.__write16(addr, value) end, write32 = function(addr, value) GLOBALS.__write32(addr, value) end, write64 = function(addr, value) GLOBALS.__write64(addr, value) end, + writeFloat = function(addr, value) GLOBALS.__writeFloat(addr, value) end, + writeDouble = function(addr, value) GLOBALS.__writeDouble(addr, value) end, getAppID = function() local ffi = require("ffi") diff --git a/src/miniaudio.cpp b/src/miniaudio.cpp index e42fea68..a61979e0 100644 --- a/src/miniaudio.cpp +++ b/src/miniaudio.cpp @@ -1,5 +1,5 @@ // We do not need the ability to be able to encode or decode audio files for the time being -// So we disable said functionality to make the executable smaller +// So we disable said functionality to make the executable smaller. #define MA_NO_DECODING #define MA_NO_ENCODING #define MINIAUDIO_IMPLEMENTATION diff --git a/src/panda_qt/about_window.cpp b/src/panda_qt/about_window.cpp index 67767198..a388dad3 100644 --- a/src/panda_qt/about_window.cpp +++ b/src/panda_qt/about_window.cpp @@ -1,10 +1,13 @@ #include "panda_qt/about_window.hpp" +#include #include #include #include #include +#include "version.hpp" + // Based on https://github.com/dolphin-emu/dolphin/blob/master/Source/Core/DolphinQt/AboutDialog.cpp AboutWindow::AboutWindow(QWidget* parent) : QDialog(parent) { @@ -17,6 +20,8 @@ AboutWindow::AboutWindow(QWidget* parent) : QDialog(parent) { QStringLiteral(R"(

Panda3DS

+

v%VERSION_STRING%

+

%ABOUT_PANDA3DS%
%SUPPORT%
@@ -26,6 +31,7 @@ AboutWindow::AboutWindow(QWidget* parent) : QDialog(parent) { %AUTHORS%

)") + .replace(QStringLiteral("%VERSION_STRING%"), PANDA3DS_VERSION) .replace(QStringLiteral("%ABOUT_PANDA3DS%"), tr("Panda3DS is a free and open source Nintendo 3DS emulator, for Windows, MacOS and Linux")) .replace(QStringLiteral("%SUPPORT%"), tr("Visit panda3ds.com for help with Panda3DS and links to our official support sites.")) .replace( diff --git a/src/panda_qt/cheats_window.cpp b/src/panda_qt/cheats_window.cpp index dbd251cc..2485c677 100644 --- a/src/panda_qt/cheats_window.cpp +++ b/src/panda_qt/cheats_window.cpp @@ -1,15 +1,9 @@ #include "panda_qt/cheats_window.hpp" -#include -#include #include -#include -#include -#include -#include -#include -#include +#include #include +#include #include #include "cheats.hpp" @@ -18,71 +12,17 @@ MainWindow* mainWindow = nullptr; -struct CheatMetadata { - u32 handle = Cheats::badCheatHandle; - std::string name = "New cheat"; - std::string code; - bool enabled = true; -}; - void dispatchToMainThread(std::function callback) { - QTimer* timer = new QTimer(); - timer->moveToThread(qApp->thread()); - timer->setSingleShot(true); - QObject::connect(timer, &QTimer::timeout, [=]() - { - callback(); - timer->deleteLater(); - }); - QMetaObject::invokeMethod(timer, "start", Qt::QueuedConnection, Q_ARG(int, 0)); + QTimer* timer = new QTimer(); + timer->moveToThread(qApp->thread()); + timer->setSingleShot(true); + QObject::connect(timer, &QTimer::timeout, [=]() { + callback(); + timer->deleteLater(); + }); + QMetaObject::invokeMethod(timer, "start", Qt::QueuedConnection, Q_ARG(int, 0)); } -class CheatEntryWidget : public QWidget { - public: - CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent); - - void Update() { - name->setText(metadata.name.c_str()); - enabled->setChecked(metadata.enabled); - update(); - } - - void Remove() { - emu->getCheats().removeCheat(metadata.handle); - cheatList->takeItem(cheatList->row(listItem)); - deleteLater(); - } - - const CheatMetadata& getMetadata() { return metadata; } - void setMetadata(const CheatMetadata& metadata) { this->metadata = metadata; } - - private: - void checkboxChanged(int state); - void editClicked(); - - Emulator* emu; - CheatMetadata metadata; - u32 handle; - QLabel* name; - QCheckBox* enabled; - QListWidget* cheatList; - QListWidgetItem* listItem; -}; - -class CheatEditDialog : public QDialog { - public: - CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry); - - void accepted(); - void rejected(); - - private: - Emulator* emu; - CheatEntryWidget& cheatEntry; - QTextEdit* codeEdit; - QLineEdit* nameEdit; -}; - CheatEntryWidget::CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent) : QWidget(), emu(emu), metadata(metadata), cheatList(parent) { QHBoxLayout* layout = new QHBoxLayout; @@ -129,6 +69,8 @@ void CheatEntryWidget::editClicked() { } CheatEditDialog::CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry) : QDialog(), emu(emu), cheatEntry(cheatEntry) { + setWindowTitle(tr("Edit Cheat")); + setAttribute(Qt::WA_DeleteOnClose); setModal(true); @@ -219,7 +161,8 @@ void CheatEditDialog::rejected() { CheatsWindow::CheatsWindow(Emulator* emu, const std::filesystem::path& cheatPath, QWidget* parent) : QWidget(parent, Qt::Window), emu(emu), cheatPath(cheatPath) { - mainWindow = static_cast(parent); + setWindowTitle(tr("Cheats")); + mainWindow = static_cast(parent); QVBoxLayout* layout = new QVBoxLayout; layout->setContentsMargins(6, 6, 6, 6); diff --git a/src/panda_qt/config_window.cpp b/src/panda_qt/config_window.cpp index 75293742..a4e43f52 100644 --- a/src/panda_qt/config_window.cpp +++ b/src/panda_qt/config_window.cpp @@ -1,26 +1,353 @@ #include "panda_qt/config_window.hpp" -ConfigWindow::ConfigWindow(QWidget* parent) : QDialog(parent) { +#include "version.hpp" + +ConfigWindow::ConfigWindow(ConfigCallback configCallback, MainWindowCallback windowCallback, const EmulatorConfig& emuConfig, QWidget* parent) + : QDialog(parent), config(emuConfig), updateConfig(std::move(configCallback)), getMainWindow(std::move(windowCallback)) { setWindowTitle(tr("Configuration")); // Set up theme selection - setTheme(Theme::Dark); - themeSelect = new QComboBox(this); + setTheme(config.frontendSettings.theme); + setIcon(config.frontendSettings.icon); + + // Set the window title of the main window appropriately if we enable showing the app version on the window + if (config.windowSettings.showAppVersion) { + getMainWindow()->setWindowTitle(tr("Alber v%1").arg(PANDA3DS_VERSION)); + } + + // Initialize the widget list and the widget container widgets + widgetList = new QListWidget(this); + widgetContainer = new QStackedWidget(this); + + helpText = new QTextEdit(this); + helpText->setReadOnly(true); + + helpText->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Fixed); + helpText->setFixedHeight(50); + + widgetList->setMinimumWidth(100); + widgetList->setMaximumWidth(100); + widgetList->setSizePolicy(QSizePolicy::MinimumExpanding, QSizePolicy::MinimumExpanding); + widgetList->setPalette(QPalette(QColor(25, 25, 25))); + + widgetList->setCurrentRow(0); + widgetContainer->setCurrentIndex(0); + + connect(widgetList, &QListWidget::currentRowChanged, this, [&](int row) { + widgetContainer->setCurrentIndex(row); + helpText->setText(helpTexts[row]); + }); + + auto connectCheckbox = [&](QCheckBox* checkbox, bool& setting) { + checkbox->setChecked(setting); + + connect(checkbox, &QCheckBox::toggled, this, [&](bool checked) { + setting = checked; + updateConfig(); + }); + }; + + QVBoxLayout* mainLayout = new QVBoxLayout(); + QHBoxLayout* hLayout = new QHBoxLayout(); + + // Set up widget layouts + setLayout(mainLayout); + mainLayout->addLayout(hLayout); + mainLayout->addWidget(helpText); + + hLayout->setAlignment(Qt::AlignLeft); + hLayout->addWidget(widgetList); + hLayout->addWidget(widgetContainer); + + // Interface settings + QGroupBox* guiGroupBox = new QGroupBox(tr("Interface Settings"), this); + QFormLayout* guiLayout = new QFormLayout(guiGroupBox); + guiLayout->setHorizontalSpacing(20); + guiLayout->setVerticalSpacing(10); + + QComboBox* themeSelect = new QComboBox(); themeSelect->addItem(tr("System")); themeSelect->addItem(tr("Light")); themeSelect->addItem(tr("Dark")); themeSelect->addItem(tr("Greetings Cat")); themeSelect->addItem(tr("Cream")); - themeSelect->setCurrentIndex(static_cast(currentTheme)); + themeSelect->setCurrentIndex(static_cast(config.frontendSettings.theme)); + connect(themeSelect, &QComboBox::currentIndexChanged, this, [&](int index) { + config.frontendSettings.theme = static_cast(index); + setTheme(static_cast(index)); - themeSelect->setGeometry(40, 40, 100, 50); - themeSelect->show(); - connect(themeSelect, &QComboBox::currentIndexChanged, this, [&](int index) { setTheme(static_cast(index)); }); + updateConfig(); + }); + guiLayout->addRow(tr("Color theme"), themeSelect); + + QComboBox* iconSelect = new QComboBox(); + iconSelect->addItem(tr("Happy panda")); + iconSelect->addItem(tr("Happy panda (colourful)")); + iconSelect->addItem(tr("Sleepy panda")); + iconSelect->addItem(tr("Cow panda")); + iconSelect->addItem(tr("The penguin from SkyEmu")); + iconSelect->setCurrentIndex(static_cast(config.frontendSettings.icon)); + + connect(iconSelect, &QComboBox::currentIndexChanged, this, [&](int index) { + config.frontendSettings.icon = static_cast(index); + setIcon(static_cast(index)); + + updateConfig(); + }); + guiLayout->addRow(tr("Window icon"), iconSelect); + + QComboBox* languageSelect = createLanguageSelect(); + guiLayout->addRow(tr("Language"), languageSelect); + + QCheckBox* showAppVersion = new QCheckBox(tr("Show version on window title")); + showAppVersion->setChecked(config.windowSettings.showAppVersion); + connect(showAppVersion, &QCheckBox::toggled, this, [&](bool checked) { + config.windowSettings.showAppVersion = checked; + updateConfig(); + + // Update main window title + getMainWindow()->setWindowTitle(checked ? tr("Alber v%1").arg(PANDA3DS_VERSION) : tr("Alber")); + }); + connectCheckbox(showAppVersion, config.windowSettings.showAppVersion); + guiLayout->addRow(showAppVersion); + + QCheckBox* rememberPosition = new QCheckBox(tr("Remember window position")); + connectCheckbox(rememberPosition, config.windowSettings.rememberPosition); + guiLayout->addRow(rememberPosition); + + // General settings + QGroupBox* genGroupBox = new QGroupBox(tr("General Settings"), this); + QFormLayout* genLayout = new QFormLayout(genGroupBox); + genLayout->setHorizontalSpacing(20); + genLayout->setVerticalSpacing(10); + + QLineEdit* defaultRomPath = new QLineEdit; + defaultRomPath->setText(QString::fromStdU16String(config.defaultRomPath.u16string())); + connect(defaultRomPath, &QLineEdit::textChanged, this, [&](const QString& text) { + config.defaultRomPath = text.toStdString(); + updateConfig(); + }); + QPushButton* browseRomPath = new QPushButton(tr("Browse...")); + browseRomPath->setAutoDefault(false); + connect(browseRomPath, &QPushButton::pressed, this, [&, defaultRomPath]() { + QString newPath = QFileDialog::getExistingDirectory( + this, tr("Select Directory"), QString::fromStdU16String(config.defaultRomPath.u16string()), + QFileDialog::ShowDirsOnly | QFileDialog::DontResolveSymlinks + ); + if (!newPath.isEmpty()) { + defaultRomPath->setText(newPath); + } + }); + + QHBoxLayout* romLayout = new QHBoxLayout(); + romLayout->setSpacing(4); + romLayout->addWidget(defaultRomPath); + romLayout->addWidget(browseRomPath); + genLayout->addRow(tr("Default ROMs path"), romLayout); + + QComboBox* systemLanguage = new QComboBox(); + systemLanguage->addItem(tr("Japanese")); + systemLanguage->addItem(tr("English")); + systemLanguage->addItem(tr("French")); + systemLanguage->addItem(tr("German")); + systemLanguage->addItem(tr("Italian")); + systemLanguage->addItem(tr("Spanish")); + systemLanguage->addItem(tr("Chinese")); + systemLanguage->addItem(tr("Korean")); + systemLanguage->addItem(tr("Dutch")); + systemLanguage->addItem(tr("Portuguese")); + systemLanguage->addItem(tr("Russian")); + systemLanguage->addItem(tr("Taiwanese")); + + systemLanguage->setCurrentIndex(static_cast(config.systemLanguage)); + connect(systemLanguage, &QComboBox::currentIndexChanged, this, [&](int index) { + config.systemLanguage = static_cast(index); + updateConfig(); + }); + genLayout->addRow(tr("System language"), systemLanguage); + + QCheckBox* discordRpcEnabled = new QCheckBox(tr("Enable Discord RPC")); + connectCheckbox(discordRpcEnabled, config.discordRpcEnabled); + genLayout->addRow(discordRpcEnabled); + + QCheckBox* usePortableBuild = new QCheckBox(tr("Use portable build")); + connectCheckbox(usePortableBuild, config.usePortableBuild); + genLayout->addRow(usePortableBuild); + + QCheckBox* printAppVersion = new QCheckBox(tr("Print version in console output")); + connectCheckbox(printAppVersion, config.printAppVersion); + genLayout->addRow(printAppVersion); + + // Graphics settings + QGroupBox* gpuGroupBox = new QGroupBox(tr("Graphics Settings"), this); + QFormLayout* gpuLayout = new QFormLayout(gpuGroupBox); + gpuLayout->setHorizontalSpacing(20); + gpuLayout->setVerticalSpacing(10); + + QComboBox* rendererType = new QComboBox(); + rendererType->addItem(tr("Null")); + rendererType->addItem(tr("OpenGL")); + rendererType->addItem(tr("Vulkan")); + rendererType->setCurrentIndex(static_cast(config.rendererType)); + connect(rendererType, &QComboBox::currentIndexChanged, this, [&](int index) { + auto type = static_cast(index); + + if (type == RendererType::Vulkan) { + QMessageBox messageBox( + QMessageBox::Icon::Critical, tr("Vulkan renderer unavailable"), + tr("Qt UI doesn't currently support Vulkan, try again at a later time") + ); + messageBox.exec(); + } else { + config.rendererType = type; + updateConfig(); + } + }); + gpuLayout->addRow(tr("GPU renderer"), rendererType); + + QCheckBox* enableRenderdoc = new QCheckBox(tr("Enable Renderdoc")); + connectCheckbox(enableRenderdoc, config.enableRenderdoc); + gpuLayout->addRow(enableRenderdoc); + + QCheckBox* shaderJitEnabled = new QCheckBox(tr("Enable shader JIT")); + connectCheckbox(shaderJitEnabled, config.shaderJitEnabled); + gpuLayout->addRow(shaderJitEnabled); + + QCheckBox* vsyncEnabled = new QCheckBox(tr("Enable VSync")); + connectCheckbox(vsyncEnabled, config.vsyncEnabled); + gpuLayout->addRow(vsyncEnabled); + + QCheckBox* useUbershaders = new QCheckBox(tr("Use ubershaders (No stutter, maybe slower)")); + connectCheckbox(useUbershaders, config.useUbershaders); + gpuLayout->addRow(useUbershaders); + + QCheckBox* accurateShaderMul = new QCheckBox(tr("Accurate shader multiplication")); + connectCheckbox(accurateShaderMul, config.accurateShaderMul); + gpuLayout->addRow(accurateShaderMul); + + QCheckBox* accelerateShaders = new QCheckBox(tr("Accelerate shaders")); + connectCheckbox(accelerateShaders, config.accelerateShaders); + gpuLayout->addRow(accelerateShaders); + + QCheckBox* forceShadergenForLights = new QCheckBox(tr("Force shadergen when rendering lights")); + connectCheckbox(forceShadergenForLights, config.forceShadergenForLights); + gpuLayout->addRow(forceShadergenForLights); + + QSpinBox* lightShadergenThreshold = new QSpinBox; + lightShadergenThreshold->setRange(1, 8); + lightShadergenThreshold->setValue(config.lightShadergenThreshold); + connect(lightShadergenThreshold, &QSpinBox::valueChanged, this, [&](int value) { + config.lightShadergenThreshold = static_cast(value); + updateConfig(); + }); + gpuLayout->addRow(tr("Light threshold for forcing shadergen"), lightShadergenThreshold); + + // Audio settings + QGroupBox* spuGroupBox = new QGroupBox(tr("Audio Settings"), this); + QFormLayout* audioLayout = new QFormLayout(spuGroupBox); + audioLayout->setHorizontalSpacing(20); + audioLayout->setVerticalSpacing(10); + + QComboBox* dspType = new QComboBox(); + dspType->addItem(tr("Null")); + dspType->addItem(tr("LLE")); + dspType->addItem(tr("HLE")); + dspType->setCurrentIndex(static_cast(config.dspType)); + connect(dspType, &QComboBox::currentIndexChanged, this, [&](int index) { + config.dspType = static_cast(index); + updateConfig(); + }); + audioLayout->addRow(tr("DSP emulation"), dspType); + + QCheckBox* audioEnabled = new QCheckBox(tr("Enable audio")); + connectCheckbox(audioEnabled, config.audioEnabled); + audioLayout->addRow(audioEnabled); + + QCheckBox* aacEnabled = new QCheckBox(tr("Enable AAC audio")); + connectCheckbox(aacEnabled, config.aacEnabled); + audioLayout->addRow(aacEnabled); + + QCheckBox* printDSPFirmware = new QCheckBox(tr("Print DSP firmware")); + connectCheckbox(printDSPFirmware, config.printDSPFirmware); + audioLayout->addRow(printDSPFirmware); + + QCheckBox* muteAudio = new QCheckBox(tr("Mute audio device")); + connectCheckbox(muteAudio, config.audioDeviceConfig.muteAudio); + audioLayout->addRow(muteAudio); + + QComboBox* volumeCurveType = new QComboBox(); + volumeCurveType->addItem(tr("Cubic")); + volumeCurveType->addItem(tr("Linear")); + volumeCurveType->setCurrentIndex(static_cast(config.audioDeviceConfig.volumeCurve)); + connect(volumeCurveType, &QComboBox::currentIndexChanged, this, [&](int index) { + config.audioDeviceConfig.volumeCurve = static_cast(index); + updateConfig(); + }); + audioLayout->addRow(tr("Volume curve"), volumeCurveType); + + QLabel* volumeLabel = new QLabel(QString::number(int(config.audioDeviceConfig.volumeRaw * 100))); + + QSlider* volumeSlider = new QSlider(Qt::Horizontal); + volumeSlider->setRange(0, 200); + volumeSlider->setValue(int(config.audioDeviceConfig.volumeRaw * 100)); + connect(volumeSlider, &QSlider::valueChanged, this, [this, volumeLabel](int value) { + config.audioDeviceConfig.volumeRaw = static_cast(value) / 100.0f; + volumeLabel->setText(QString::number(value)); + + updateConfig(); + }); + + QHBoxLayout* volumeLayout = new QHBoxLayout(); + volumeLayout->setSpacing(4); + volumeLayout->addWidget(volumeSlider); + volumeLayout->addWidget(volumeLabel); + audioLayout->addRow(tr("Audio device volume"), volumeLayout); + + // Battery settings + QGroupBox* batGroupBox = new QGroupBox(tr("Battery Settings"), this); + QFormLayout* batLayout = new QFormLayout(batGroupBox); + batLayout->setHorizontalSpacing(20); + batLayout->setVerticalSpacing(10); + + QSpinBox* batteryPercentage = new QSpinBox; + batteryPercentage->setRange(1, 100); + batteryPercentage->setValue(config.batteryPercentage); + connect(batteryPercentage, &QSpinBox::valueChanged, this, [&](int value) { + config.batteryPercentage = static_cast(value); + updateConfig(); + }); + batLayout->addRow(tr("Battery percentage"), batteryPercentage); + + QCheckBox* chargerPlugged = new QCheckBox(tr("Charger plugged")); + connectCheckbox(chargerPlugged, config.chargerPlugged); + batLayout->addRow(chargerPlugged); + + // SD Card settings + QGroupBox* sdcGroupBox = new QGroupBox(tr("SD Card Settings"), this); + QFormLayout* sdcLayout = new QFormLayout(sdcGroupBox); + sdcLayout->setHorizontalSpacing(20); + sdcLayout->setVerticalSpacing(10); + + QCheckBox* sdCardInserted = new QCheckBox(tr("Enable virtual SD card")); + connectCheckbox(sdCardInserted, config.sdCardInserted); + sdcLayout->addRow(sdCardInserted); + + QCheckBox* sdWriteProtected = new QCheckBox(tr("Write protect virtual SD card")); + connectCheckbox(sdWriteProtected, config.sdWriteProtected); + sdcLayout->addRow(sdWriteProtected); + + // Add all our settings widgets to our widget list + addWidget(guiGroupBox, tr("Interface"), ":/docs/img/sparkling_icon.png", tr("User Interface settings")); + addWidget(genGroupBox, tr("General"), ":/docs/img/settings_icon.png", tr("General emulator settings")); + addWidget(gpuGroupBox, tr("Graphics"), ":/docs/img/display_icon.png", tr("Graphics emulation and output settings")); + addWidget(spuGroupBox, tr("Audio"), ":/docs/img/speaker_icon.png", tr("Audio emulation and output settings")); + addWidget(batGroupBox, tr("Battery"), ":/docs/img/battery_icon.png", tr("Battery emulation settings")); + addWidget(sdcGroupBox, tr("SD Card"), ":/docs/img/sdcard_icon.png", tr("SD Card emulation settings")); + + widgetList->setCurrentRow(0); } void ConfigWindow::setTheme(Theme theme) { - currentTheme = theme; - switch (theme) { case Theme::Dark: { QApplication::setStyle(QStyleFactory::create("Fusion")); @@ -119,4 +446,39 @@ void ConfigWindow::setTheme(Theme theme) { } } -ConfigWindow::~ConfigWindow() { delete themeSelect; } +void ConfigWindow::setIcon(WindowIcon icon) { + auto updateIcon = [&](const QString& iconPath) { getMainWindow()->setWindowIcon(QIcon(iconPath)); }; + + switch (icon) { + case WindowIcon::Rsyn: updateIcon(":/docs/img/rsyn_icon.png"); break; + case WindowIcon::Rnap: updateIcon(":/docs/img/rnap_icon.png"); break; + case WindowIcon::Rcow: updateIcon(":/docs/img/rcow_icon.png"); break; + case WindowIcon::SkyEmu: updateIcon(":/docs/img/skyemu_icon.png"); break; + + case WindowIcon::Rpog: + default: updateIcon(":/docs/img/rpog_icon.png"); break; + } +} + +void ConfigWindow::addWidget(QWidget* widget, QString title, QString icon, QString helpText) { + const int index = widgetList->count(); + + QListWidgetItem* item = new QListWidgetItem(widgetList); + item->setText(title); + if (!icon.isEmpty()) { + item->setIcon(QIcon::fromTheme(icon)); + } + + widgetContainer->addWidget(widget); + + if (index >= settingWidgetCount) { + Helpers::panic("Qt: ConfigWindow::settingWidgetCount has not been updated correctly!"); + } + helpTexts[index] = std::move(helpText); +} + +ConfigWindow::~ConfigWindow() { + delete helpText; + delete widgetList; + delete widgetContainer; +} diff --git a/src/panda_qt/main.cpp b/src/panda_qt/main.cpp index a7a6216c..4ab737b0 100644 --- a/src/panda_qt/main.cpp +++ b/src/panda_qt/main.cpp @@ -7,6 +7,5 @@ int main(int argc, char *argv[]) { QApplication app(argc, argv); MainWindow window(&app); - window.show(); return app.exec(); } diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index cfa45e85..c060318e 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -9,20 +9,28 @@ #include "cheats.hpp" #include "input_mappings.hpp" +#include "sdl_sensors.hpp" #include "services/dsp.hpp" +#include "version.hpp" + +MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent), keyboardMappings(InputMappings::defaultKeyboardMappings()) { + emu = new Emulator(); + + loadTranslation(); + setWindowTitle(tr("Alber")); -MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent), keyboardMappings(InputMappings::defaultKeyboardMappings()), screen(this) { - setWindowTitle("Alber"); // Enable drop events for loading ROMs setAcceptDrops(true); resize(800, 240 * 4); - screen.show(); + show(); + + // We pass a callback to the screen widget that will be triggered every time we resize the screen + screen = new ScreenWidget([this](u32 width, u32 height) { handleScreenResize(width, height); }, this); + setCentralWidget(screen); appRunning = true; - // Set our menu bar up - menuBar = new QMenuBar(this); - setMenuBar(menuBar); + menuBar = new QMenuBar(nullptr); // Create menu bar menus auto fileMenu = menuBar->addMenu(tr("File")); @@ -46,6 +54,8 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) auto resumeAction = emulationMenu->addAction(tr("Resume")); auto resetAction = emulationMenu->addAction(tr("Reset")); auto configureAction = emulationMenu->addAction(tr("Configure")); + configureAction->setMenuRole(QAction::PreferencesRole); + connect(pauseAction, &QAction::triggered, this, [this]() { sendMessage(EmulatorMessage{.type = MessageType::Pause}); }); connect(resumeAction, &QAction::triggered, this, [this]() { sendMessage(EmulatorMessage{.type = MessageType::Resume}); }); connect(resetAction, &QAction::triggered, this, [this]() { sendMessage(EmulatorMessage{.type = MessageType::Reset}); }); @@ -66,14 +76,15 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) connect(dumpDspFirmware, &QAction::triggered, this, &MainWindow::dumpDspFirmware); auto aboutAction = aboutMenu->addAction(tr("About Panda3DS")); + aboutAction->setMenuRole(QAction::AboutRole); connect(aboutAction, &QAction::triggered, this, &MainWindow::showAboutMenu); - emu = new Emulator(); - emu->setOutputSize(screen.surfaceWidth, screen.surfaceHeight); + setMenuBar(menuBar); + + emu->setOutputSize(screen->surfaceWidth, screen->surfaceHeight); // Set up misc objects aboutWindow = new AboutWindow(nullptr); - configWindow = new ConfigWindow(this); cheatsEditor = new CheatsWindow(emu, {}, this); patchWindow = new PatchWindow(this); luaEditor = new TextEditorWindow(this, "script.lua", ""); @@ -84,6 +95,14 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) shaderEditor->setText(emu->getRenderer()->getUbershader()); } + configWindow = new ConfigWindow( + [&]() { + EmulatorMessage message{.type = MessageType::UpdateConfig}; + sendMessage(message); + }, + [&]() { return this; }, emu->getConfig(), this + ); + auto args = QCoreApplication::arguments(); if (args.size() > 1) { auto romPath = std::filesystem::current_path() / args.at(1).toStdU16String(); @@ -93,21 +112,42 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) } } + // Handle UI configs before setting up the emulator thread + { + auto& config = emu->getConfig(); + auto& windowSettings = config.windowSettings; + + if (windowSettings.rememberPosition) { + setGeometry(windowSettings.x, windowSettings.y, windowSettings.width, config.windowSettings.height); + } + + if (config.printAppVersion) { + printf("Welcome to Panda3DS v%s!\n", PANDA3DS_VERSION); + } + } + // The emulator graphics context for the thread should be initialized in the emulator thread due to how GL contexts work emuThread = std::thread([this]() { const RendererType rendererType = emu->getConfig().rendererType; usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null); usingVk = (rendererType == RendererType::Vulkan); + usingMtl = (rendererType == RendererType::Metal); if (usingGL) { // Make GL context current for this thread, enable VSync - GL::Context* glContext = screen.getGLContext(); + GL::Context* glContext = screen->getGLContext(); glContext->MakeCurrent(); glContext->SetSwapInterval(emu->getConfig().vsyncEnabled ? 1 : 0); + if (glContext->IsGLES()) { + emu->getRenderer()->setupGLES(); + } + emu->initGraphicsContext(glContext); } else if (usingVk) { Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!"); + } else if (usingMtl) { + Helpers::panic("Metal on Qt currently doesn't work, try the SDL frontend instead!"); } else { Helpers::panic("Unsupported graphics backend for Qt frontend!"); } @@ -145,13 +185,13 @@ void MainWindow::emuThreadMainLoop() { // Unbind GL context if we're using GL, otherwise some setups seem to be unable to join this thread if (usingGL) { - screen.getGLContext()->DoneCurrent(); + screen->getGLContext()->DoneCurrent(); } } void MainWindow::swapEmuBuffer() { if (usingGL) { - screen.getGLContext()->SwapBuffers(); + screen->getGLContext()->SwapBuffers(); } else { Helpers::panic("[Qt] Don't know how to swap buffers for the current rendering backend :("); } @@ -160,7 +200,7 @@ void MainWindow::swapEmuBuffer() { void MainWindow::selectROM() { auto path = QFileDialog::getOpenFileName( this, tr("Select 3DS ROM to load"), QString::fromStdU16String(emu->getConfig().defaultRomPath.u16string()), - tr("Nintendo 3DS ROMs (*.3ds *.cci *.cxi *.app *.3dsx *.elf *.axf)") + tr("Nintendo 3DS ROMs (*.3ds *.cci *.cxi *.app *.ncch *.3dsx *.elf *.axf)") ); if (!path.isEmpty()) { @@ -200,14 +240,26 @@ void MainWindow::selectLuaFile() { } } -// Cleanup when the main window closes -MainWindow::~MainWindow() { +// Stop emulator thread when the main window closes +void MainWindow::closeEvent(QCloseEvent* event) { appRunning = false; // Set our running atomic to false in order to make the emulator thread stop, and join it if (emuThread.joinable()) { emuThread.join(); } + // Cache window position/size in config file to restore next time + const QRect& windowGeometry = geometry(); + auto& windowConfig = emu->getConfig().windowSettings; + + windowConfig.x = windowGeometry.x(); + windowConfig.y = windowGeometry.y(); + windowConfig.width = windowGeometry.width(); + windowConfig.height = windowGeometry.height(); +} + +// Cleanup when the main window closes +MainWindow::~MainWindow() { delete emu; delete menuBar; delete aboutWindow; @@ -272,8 +324,7 @@ void MainWindow::dumpDspFirmware() { case DSPService::ComponentDumpResult::Success: break; case DSPService::ComponentDumpResult::NotLoaded: { QMessageBox messageBox( - QMessageBox::Icon::Warning, tr("No DSP firmware loaded"), - tr("The currently loaded app has not uploaded a firmware to the DSP") + QMessageBox::Icon::Warning, tr("No DSP firmware loaded"), tr("The currently loaded app has not uploaded a firmware to the DSP") ); QAbstractButton* button = messageBox.addButton(tr("OK"), QMessageBox::ButtonRole::YesRole); @@ -360,6 +411,23 @@ void MainWindow::dispatchMessage(const EmulatorMessage& message) { emu->getRenderer()->setUbershader(*message.string.str); delete message.string.str; break; + + case MessageType::SetScreenSize: { + const u32 width = message.screenSize.width; + const u32 height = message.screenSize.height; + + emu->setOutputSize(width, height); + screen->resizeSurface(width, height); + break; + } + + case MessageType::UpdateConfig: + emu->getConfig() = configWindow->getConfig(); + emu->reloadSettings(); + + // Save new settings to disk + emu->getConfig().save(); + break; } } @@ -422,29 +490,14 @@ void MainWindow::keyReleaseEvent(QKeyEvent* event) { void MainWindow::mousePressEvent(QMouseEvent* event) { if (event->button() == Qt::MouseButton::LeftButton) { - const QPointF clickPos = event->globalPosition(); - const QPointF widgetPos = screen.mapFromGlobal(clickPos); + // We handle actual mouse press & movement logic inside the mouseMoveEvent handler + handleTouchscreenPress(event); + } +} - // Press is inside the screen area - if (widgetPos.x() >= 0 && widgetPos.x() < screen.width() && widgetPos.y() >= 0 && widgetPos.y() < screen.height()) { - // Go from widget positions to [0, 400) for x and [0, 480) for y - uint x = (uint)std::round(widgetPos.x() / screen.width() * 400.f); - uint y = (uint)std::round(widgetPos.y() / screen.height() * 480.f); - - // Check if touch falls in the touch screen area - if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { - // Convert to 3DS coordinates - u16 x_converted = static_cast(x) - 40; - u16 y_converted = static_cast(y) - 240; - - EmulatorMessage message{.type = MessageType::PressTouchscreen}; - message.touchscreen.x = x_converted; - message.touchscreen.y = y_converted; - sendMessage(message); - } else { - sendMessage(EmulatorMessage{.type = MessageType::ReleaseTouchscreen}); - } - } +void MainWindow::mouseMoveEvent(QMouseEvent* event) { + if (event->buttons().testFlag(Qt::MouseButton::LeftButton)) { + handleTouchscreenPress(event); } } @@ -454,6 +507,32 @@ void MainWindow::mouseReleaseEvent(QMouseEvent* event) { } } +void MainWindow::handleTouchscreenPress(QMouseEvent* event) { + const QPointF clickPos = event->globalPosition(); + const QPointF widgetPos = screen->mapFromGlobal(clickPos); + + // Press is inside the screen area + if (widgetPos.x() >= 0 && widgetPos.x() < screen->width() && widgetPos.y() >= 0 && widgetPos.y() < screen->height()) { + // Go from widget positions to [0, 400) for x and [0, 480) for y + uint x = (uint)std::round(widgetPos.x() / screen->width() * 400.f); + uint y = (uint)std::round(widgetPos.y() / screen->height() * 480.f); + + // Check if touch falls in the touch screen area + if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { + // Convert to 3DS coordinates + u16 x_converted = static_cast(x) - 40; + u16 y_converted = static_cast(y) - 240; + + EmulatorMessage message{.type = MessageType::PressTouchscreen}; + message.touchscreen.x = x_converted; + message.touchscreen.y = y_converted; + sendMessage(message); + } else { + sendMessage(EmulatorMessage{.type = MessageType::ReleaseTouchscreen}); + } + } +} + void MainWindow::loadLuaScript(const std::string& code) { EmulatorMessage message{.type = MessageType::LoadLuaScript}; @@ -482,6 +561,14 @@ void MainWindow::editCheat(u32 handle, const std::vector& cheat, const sendMessage(message); } +void MainWindow::handleScreenResize(u32 width, u32 height) { + EmulatorMessage message{.type = MessageType::SetScreenSize}; + message.screenSize.width = width; + message.screenSize.height = height; + + sendMessage(message); +} + void MainWindow::initControllers() { // Make SDL use consistent positional button mapping SDL_SetHint(SDL_HINT_GAMECONTROLLER_USE_BUTTON_LABELS, "0"); @@ -497,6 +584,8 @@ void MainWindow::initControllers() { SDL_Joystick* stick = SDL_GameControllerGetJoystick(gameController); gameControllerID = SDL_JoystickInstanceID(stick); } + + setupControllerSensors(gameController); } } @@ -534,6 +623,8 @@ void MainWindow::pollControllers() { if (gameController == nullptr) { gameController = SDL_GameControllerOpen(event.cdevice.which); gameControllerID = event.cdevice.which; + + setupControllerSensors(gameController); } break; @@ -574,6 +665,37 @@ void MainWindow::pollControllers() { } break; } + + case SDL_CONTROLLERSENSORUPDATE: { + if (event.csensor.sensor == SDL_SENSOR_GYRO) { + auto rotation = Sensors::SDL::convertRotation({ + event.csensor.data[0], + event.csensor.data[1], + event.csensor.data[2], + }); + + hid.setPitch(s16(rotation.x)); + hid.setRoll(s16(rotation.y)); + hid.setYaw(s16(rotation.z)); + } else if (event.csensor.sensor == SDL_SENSOR_ACCEL) { + auto accel = Sensors::SDL::convertAcceleration(event.csensor.data); + hid.setAccel(accel.x, accel.y, accel.z); + } + break; + } } } -} \ No newline at end of file +} + +void MainWindow::setupControllerSensors(SDL_GameController* controller) { + bool haveGyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; + bool haveAccelerometer = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL) == SDL_TRUE; + + if (haveGyro) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); + } + + if (haveAccelerometer) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE); + } +} diff --git a/src/panda_qt/mappings.cpp b/src/panda_qt/mappings.cpp index 22741a73..d41b0a31 100644 --- a/src/panda_qt/mappings.cpp +++ b/src/panda_qt/mappings.cpp @@ -1,7 +1,7 @@ -#include "input_mappings.hpp" - #include +#include "input_mappings.hpp" + InputMappings InputMappings::defaultKeyboardMappings() { InputMappings mappings; mappings.setMapping(Qt::Key_L, HID::Keys::A); diff --git a/src/panda_qt/patch_window.cpp b/src/panda_qt/patch_window.cpp index 189288eb..6096d89a 100644 --- a/src/panda_qt/patch_window.cpp +++ b/src/panda_qt/patch_window.cpp @@ -12,6 +12,8 @@ #include "io_file.hpp" PatchWindow::PatchWindow(QWidget* parent) : QWidget(parent, Qt::Window) { + setWindowTitle(tr("ROM patcher")); + QVBoxLayout* layout = new QVBoxLayout; layout->setContentsMargins(6, 6, 6, 6); setLayout(layout); @@ -155,4 +157,4 @@ void PatchWindow::PatchWindow::displayMessage(const QString& title, const QStrin } messageBox.exec(); -} \ No newline at end of file +} diff --git a/src/panda_qt/screen.cpp b/src/panda_qt/screen.cpp index 5a254e79..fc783683 100644 --- a/src/panda_qt/screen.cpp +++ b/src/panda_qt/screen.cpp @@ -18,7 +18,7 @@ // and https://github.com/melonDS-emu/melonDS/blob/master/src/frontend/qt_sdl/main.cpp #ifdef PANDA3DS_ENABLE_OPENGL -ScreenWidget::ScreenWidget(QWidget* parent) : QWidget(parent) { +ScreenWidget::ScreenWidget(ResizeCallback resizeCallback, QWidget* parent) : QWidget(parent), resizeCallback(resizeCallback) { // Create a native window for use with our graphics API of choice resize(800, 240 * 4); @@ -29,23 +29,55 @@ ScreenWidget::ScreenWidget(QWidget* parent) : QWidget(parent) { setAttribute(Qt::WA_KeyCompression, false); setFocusPolicy(Qt::StrongFocus); setMouseTracking(true); + show(); if (!createGLContext()) { Helpers::panic("Failed to create GL context for display"); } } +void ScreenWidget::resizeEvent(QResizeEvent* event) { + previousWidth = surfaceWidth; + previousHeight = surfaceHeight; + QWidget::resizeEvent(event); + + // Update surfaceWidth/surfaceHeight following the resize + std::optional windowInfo = getWindowInfo(); + if (windowInfo) { + this->windowInfo = *windowInfo; + } + + // This will call take care of calling resizeSurface from the emulator thread + resizeCallback(surfaceWidth, surfaceHeight); +} + +// Note: This will run on the emulator thread, we don't want any Qt calls happening there. +void ScreenWidget::resizeSurface(u32 width, u32 height) { + if (previousWidth != width || previousHeight != height) { + if (glContext) { + glContext->ResizeSurface(width, height); + } + } +} + bool ScreenWidget::createGLContext() { - // List of GL context versions we will try. Anything 4.1+ is good - static constexpr std::array versionsToTry = { + // List of GL context versions we will try. Anything 4.1+ is good for desktop OpenGL, and 3.1+ for OpenGL ES + static constexpr std::array versionsToTry = { GL::Context::Version{GL::Context::Profile::Core, 4, 6}, GL::Context::Version{GL::Context::Profile::Core, 4, 5}, GL::Context::Version{GL::Context::Profile::Core, 4, 4}, GL::Context::Version{GL::Context::Profile::Core, 4, 3}, GL::Context::Version{GL::Context::Profile::Core, 4, 2}, GL::Context::Version{GL::Context::Profile::Core, 4, 1}, + GL::Context::Version{GL::Context::Profile::ES, 3, 2}, GL::Context::Version{GL::Context::Profile::ES, 3, 1}, }; std::optional windowInfo = getWindowInfo(); if (windowInfo.has_value()) { + this->windowInfo = *windowInfo; + glContext = GL::Context::Create(*getWindowInfo(), versionsToTry); + if (glContext == nullptr) { + return false; + } + glContext->DoneCurrent(); } @@ -53,7 +85,7 @@ bool ScreenWidget::createGLContext() { } qreal ScreenWidget::devicePixelRatioFromScreen() const { - const QScreen* screenForRatio = window()->windowHandle()->screen(); + const QScreen* screenForRatio = windowHandle()->screen(); if (!screenForRatio) { screenForRatio = QGuiApplication::primaryScreen(); } @@ -110,4 +142,4 @@ std::optional ScreenWidget::getWindowInfo() { return wi; } -#endif \ No newline at end of file +#endif diff --git a/src/panda_qt/shader_editor.cpp b/src/panda_qt/shader_editor.cpp index 122d841f..4ca41e22 100644 --- a/src/panda_qt/shader_editor.cpp +++ b/src/panda_qt/shader_editor.cpp @@ -1,8 +1,9 @@ +#include "panda_qt/shader_editor.hpp" + #include #include #include "panda_qt/main_window.hpp" -#include "panda_qt/shader_editor.hpp" using namespace Zep; diff --git a/src/panda_qt/text_editor.cpp b/src/panda_qt/text_editor.cpp index a31a829f..7ac1d5f2 100644 --- a/src/panda_qt/text_editor.cpp +++ b/src/panda_qt/text_editor.cpp @@ -9,6 +9,7 @@ using namespace Zep; TextEditorWindow::TextEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText) : QDialog(parent), zepWidget(this, qApp->applicationDirPath().toStdString(), fontSize) { + setWindowTitle(tr("Lua Editor")); resize(600, 600); // Register our extensions diff --git a/src/panda_qt/translations.cpp b/src/panda_qt/translations.cpp new file mode 100644 index 00000000..bfadd570 --- /dev/null +++ b/src/panda_qt/translations.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include + +#include "panda_qt/config_window.hpp" +#include "panda_qt/main_window.hpp" + +void MainWindow::loadTranslation() { + // TODO: This should become a member variable when we allow changing language at runtime. + QTranslator* translator = nullptr; + + // Fetch the .qm file for our language and load it + auto language = QString::fromStdString(emu->getConfig().frontendSettings.language); + const QString baseDir = QStringLiteral(":/translations"); + const QString basePath = QStringLiteral("%1/%2.qm").arg(baseDir).arg(language); + + if (QFile::exists(basePath)) { + if (translator != nullptr) { + qApp->removeTranslator(translator); + } + + translator = new QTranslator(qApp); + if (!translator->load(basePath)) { + QMessageBox::warning( + nullptr, QStringLiteral("Translation Error"), + QStringLiteral("Failed to find load translation file for '%1':\n%2").arg(language).arg(basePath) + ); + delete translator; + } else { + qApp->installTranslator(translator); + } + } else { + printf("Language file %s does not exist. Defaulting to English\n", basePath.toStdString().c_str()); + } +} + +struct LanguageInfo { + QString name; // Full name of the language (for example "English (US)") + const char* code; // ISO 639 language code (for example "en_us") + + explicit LanguageInfo(const QString& name, const char* code) : name(name), code(code) {} +}; + +// List of languages in the order they should appear in the menu +// Please keep this list mostly in alphabetical order. +// Also, for Unicode characters in language names, use Unicode keycodes instead of writing out the name, +// as some compilers/toolchains may not enjoy Unicode in source files. +static std::array languages = { + LanguageInfo(QStringLiteral(u"English"), "en"), // English + LanguageInfo(QStringLiteral(u"\u0395\u03BB\u03BB\u03B7\u03BD\u03B9\u03BA\u03AC"), "el"), // Greek + LanguageInfo(QStringLiteral(u"Espa\u00F1ol"), "es"), // Spanish + LanguageInfo(QStringLiteral(u"Nederlands"), "nl"), // Dutch + LanguageInfo(QStringLiteral(u"Portugu\u00EAs (Brasil)"), "pt_br") // Portuguese (Brazilian) +}; + +QComboBox* ConfigWindow::createLanguageSelect() { + QComboBox* select = new QComboBox(); + + for (usize i = 0; i < languages.size(); i++) { + const auto& lang = languages[i]; + select->addItem(lang.name); + + if (config.frontendSettings.language == lang.code) { + select->setCurrentIndex(i); + } + } + + connect(select, &QComboBox::currentIndexChanged, this, [&](int index) { + const QString baseDir = QStringLiteral(":/translations"); + const QString basePath = QStringLiteral("%1/%2.qm").arg(baseDir).arg(languages[index].code); + + if (QFile::exists(basePath)) { + config.frontendSettings.language = languages[index].code; + updateConfig(); + + QMessageBox messageBox( + QMessageBox::Icon::Information, tr("Language change successful"), + tr("Restart Panda3DS for the new language to be used.") + ); + + messageBox.exec(); + } else { + QMessageBox messageBox( + QMessageBox::Icon::Warning, tr("Language change failed"), + tr("The language you selected is not included in Panda3DS. If you're seeing this, someone messed up the language UI code...") + ); + + messageBox.exec(); + } + }); + + return select; +} diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 0c78eea1..2d60d2fa 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -2,6 +2,10 @@ #include +#include "renderdoc.hpp" +#include "sdl_sensors.hpp" +#include "version.hpp" + FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMappings()) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { Helpers::panic("Failed to initialize SDL2"); @@ -20,22 +24,46 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp SDL_Joystick* stick = SDL_GameControllerGetJoystick(gameController); gameControllerID = SDL_JoystickInstanceID(stick); } + + setupControllerSensors(gameController); } const EmulatorConfig& config = emu.getConfig(); - // We need OpenGL for software rendering or for OpenGL if it's enabled - bool needOpenGL = config.rendererType == RendererType::Software; + // We need OpenGL for software rendering/null renderer or for the OpenGL renderer if it's enabled. + bool needOpenGL = config.rendererType == RendererType::Software || config.rendererType == RendererType::Null; #ifdef PANDA3DS_ENABLE_OPENGL needOpenGL = needOpenGL || (config.rendererType == RendererType::OpenGL); #endif + const char* windowTitle = config.windowSettings.showAppVersion ? ("Alber v" PANDA3DS_VERSION) : "Alber"; + if (config.printAppVersion) { + printf("Welcome to Panda3DS v%s!\n", PANDA3DS_VERSION); + } + + // Positions of the window + int windowX, windowY; + + // Apply window size settings if the appropriate option is enabled + if (config.windowSettings.rememberPosition) { + windowX = config.windowSettings.x; + windowY = config.windowSettings.y; + windowWidth = config.windowSettings.width; + windowHeight = config.windowSettings.height; + } else { + windowX = SDL_WINDOWPOS_CENTERED; + windowY = SDL_WINDOWPOS_CENTERED; + windowWidth = 400; + windowHeight = 480; + } + emu.setOutputSize(windowWidth, windowHeight); + if (needOpenGL) { - // Demand 3.3 core for software renderer, or 4.1 core for OpenGL renderer (max available on MacOS) + // Demand 4.1 core for OpenGL renderer (max available on MacOS), 3.3 for the software & null renderers // MacOS gets mad if we don't explicitly demand a core profile SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, config.rendererType == RendererType::Software ? 3 : 4); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, config.rendererType == RendererType::Software ? 3 : 1); - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_OPENGL); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, config.rendererType == RendererType::OpenGL ? 4 : 3); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, config.rendererType == RendererType::OpenGL ? 1 : 3); + window = SDL_CreateWindow(windowTitle, windowX, windowY, windowWidth, windowHeight, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::panic("Window creation failed: %s", SDL_GetError()); @@ -43,11 +71,27 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp glContext = SDL_GL_CreateContext(window); if (glContext == nullptr) { - Helpers::panic("OpenGL context creation failed: %s", SDL_GetError()); - } + Helpers::warn("OpenGL context creation failed: %s\nTrying again with OpenGL ES.", SDL_GetError()); - if (!gladLoadGLLoader(reinterpret_cast(SDL_GL_GetProcAddress))) { - Helpers::panic("OpenGL init failed"); + // Some low end devices (eg RPi, emulation handhelds) don't support desktop GL, but only OpenGL ES, so fall back to that if GL context + // creation failed + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 1); + glContext = SDL_GL_CreateContext(window); + if (glContext == nullptr) { + Helpers::panic("OpenGL context creation failed: %s", SDL_GetError()); + } + + if (!gladLoadGLES2Loader(reinterpret_cast(SDL_GL_GetProcAddress))) { + Helpers::panic("OpenGL init failed"); + } + + emu.getRenderer()->setupGLES(); + } else { + if (!gladLoadGLLoader(reinterpret_cast(SDL_GL_GetProcAddress))) { + Helpers::panic("OpenGL init failed"); + } } SDL_GL_SetSwapInterval(config.vsyncEnabled ? 1 : 0); @@ -55,7 +99,17 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp #ifdef PANDA3DS_ENABLE_VULKAN if (config.rendererType == RendererType::Vulkan) { - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_VULKAN); + window = SDL_CreateWindow(windowTitle, windowX, windowY, windowWidth, windowHeight, SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE); + + if (window == nullptr) { + Helpers::warn("Window creation failed: %s", SDL_GetError()); + } + } +#endif + +#ifdef PANDA3DS_ENABLE_METAL + if (config.rendererType == RendererType::Metal) { + window = SDL_CreateWindow(windowTitle, windowX, windowY, windowWidth, windowHeight, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::warn("Window creation failed: %s", SDL_GetError()); @@ -87,10 +141,16 @@ void FrontendSDL::run() { namespace Keys = HID::Keys; switch (event.type) { - case SDL_QUIT: + case SDL_QUIT: { printf("Bye :(\n"); programRunning = false; + + // Remember window position & size for future runs + auto& windowSettings = emu.getConfig().windowSettings; + SDL_GetWindowPosition(window, &windowSettings.x, &windowSettings.y); + SDL_GetWindowSize(window, &windowSettings.width, &windowSettings.height); return; + } case SDL_KEYDOWN: { if (emu.romType == ROMType::None) break; @@ -130,6 +190,14 @@ void FrontendSDL::run() { emu.reset(Emulator::ReloadOption::Reload); break; } + + case SDLK_F11: { + if constexpr (Renderdoc::isSupported()) { + Renderdoc::triggerCapture(); + } + + break; + } } } break; @@ -162,8 +230,13 @@ void FrontendSDL::run() { if (emu.romType == ROMType::None) break; if (event.button.button == SDL_BUTTON_LEFT) { - const s32 x = event.button.x; - const s32 y = event.button.y; + if (windowWidth == 0 || windowHeight == 0) [[unlikely]] { + break; + } + + // Go from window positions to [0, 400) for x and [0, 480) for y + const s32 x = (s32)std::round(event.button.x * 400.f / windowWidth); + const s32 y = (s32)std::round(event.button.y * 480.f / windowHeight); // Check if touch falls in the touch screen area if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { @@ -195,6 +268,8 @@ void FrontendSDL::run() { if (gameController == nullptr) { gameController = SDL_GameControllerOpen(event.cdevice.which); gameControllerID = event.cdevice.which; + + setupControllerSensors(gameController); } break; @@ -242,8 +317,13 @@ void FrontendSDL::run() { // Handle "dragging" across the touchscreen if (hid.isTouchScreenPressed()) { - const s32 x = event.motion.x; - const s32 y = event.motion.y; + if (windowWidth == 0 || windowHeight == 0) [[unlikely]] { + break; + } + + // Go from window positions to [0, 400) for x and [0, 480) for y + const s32 x = (s32)std::round(event.motion.x * 400.f / windowWidth); + const s32 y = (s32)std::round(event.motion.y * 480.f / windowHeight); // Check if touch falls in the touch screen area and register the new touch screen position if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { @@ -271,6 +351,24 @@ void FrontendSDL::run() { break; } + case SDL_CONTROLLERSENSORUPDATE: { + if (event.csensor.sensor == SDL_SENSOR_GYRO) { + auto rotation = Sensors::SDL::convertRotation({ + event.csensor.data[0], + event.csensor.data[1], + event.csensor.data[2], + }); + + hid.setPitch(s16(rotation.x)); + hid.setRoll(s16(rotation.y)); + hid.setYaw(s16(rotation.z)); + } else if (event.csensor.sensor == SDL_SENSOR_ACCEL) { + auto accel = Sensors::SDL::convertAcceleration(event.csensor.data); + hid.setAccel(accel.x, accel.y, accel.z); + } + break; + } + case SDL_DROPFILE: { char* droppedDir = event.drop.file; @@ -289,6 +387,15 @@ void FrontendSDL::run() { } break; } + + case SDL_WINDOWEVENT: { + auto type = event.window.event; + if (type == SDL_WINDOWEVENT_RESIZED) { + windowWidth = event.window.data1; + windowHeight = event.window.data2; + emu.setOutputSize(windowWidth, windowHeight); + } + } } } @@ -323,3 +430,16 @@ void FrontendSDL::run() { SDL_GL_SwapWindow(window); } } + +void FrontendSDL::setupControllerSensors(SDL_GameController* controller) { + bool haveGyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; + bool haveAccelerometer = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL) == SDL_TRUE; + + if (haveGyro) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); + } + + if (haveAccelerometer) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE); + } +} diff --git a/src/pandroid/app/src/main/ic_launcher-playstore.png b/src/pandroid/app/src/main/ic_launcher-playstore.png new file mode 100644 index 00000000..04b65284 Binary files /dev/null and b/src/pandroid/app/src/main/ic_launcher-playstore.png differ diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/AlberDriver.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/AlberDriver.java index f7a3394b..83392b0a 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/AlberDriver.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/AlberDriver.java @@ -24,13 +24,17 @@ public class AlberDriver { public static native void KeyUp(int code); public static native void SetCirclepadAxis(int x, int y); public static native void TouchScreenUp(); - public static native void TouchScreenDown(int x, int y); + public static native void TouchScreenDown(int x, int y);; + public static native void SetGyro(float roll, float pitch, float yaw); + public static native void SetAccel(float x, float y, float z); public static native void Pause(); public static native void Resume(); public static native void LoadLuaScript(String script); public static native byte[] GetSmdh(); public static native void setShaderJitEnabled(boolean enable); + public static native void setAccurateShaderMulEnable(boolean enable); + public static native void setAudioEnabled(boolean enable); public static int openDocument(String path, String mode) { try { diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/GameActivity.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/GameActivity.java index 83d18d99..c6764d38 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/GameActivity.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/GameActivity.java @@ -3,11 +3,22 @@ package com.panda3ds.pandroid.app; import android.app.ActivityManager; import android.app.PictureInPictureParams; import android.content.Intent; +import android.content.res.Configuration; +import android.hardware.Sensor; +import android.hardware.SensorEvent; +import android.hardware.SensorEventListener; +import android.hardware.SensorManager; +import android.opengl.Matrix; import android.os.Build; import android.os.Bundle; +import android.renderscript.Matrix3f; +import android.renderscript.Matrix4f; +import android.util.Log; import android.util.Rational; +import android.view.Display; import android.view.KeyEvent; import android.view.MotionEvent; +import android.view.Surface; import android.view.View; import android.view.ViewGroup; import android.view.WindowManager; @@ -25,6 +36,7 @@ import com.panda3ds.pandroid.app.game.EmulatorCallback; import com.panda3ds.pandroid.data.config.GlobalConfig; import com.panda3ds.pandroid.input.InputHandler; import com.panda3ds.pandroid.input.InputMap; +import com.panda3ds.pandroid.math.Vector3; import com.panda3ds.pandroid.utils.Constants; import com.panda3ds.pandroid.view.PandaGlSurfaceView; import com.panda3ds.pandroid.view.PandaLayoutController; @@ -32,7 +44,7 @@ import com.panda3ds.pandroid.view.ds.DsLayoutManager; import com.panda3ds.pandroid.view.renderer.ConsoleRenderer; import com.panda3ds.pandroid.view.utils.PerformanceView; -public class GameActivity extends BaseActivity implements EmulatorCallback { +public class GameActivity extends BaseActivity implements EmulatorCallback, SensorEventListener { private final DrawerFragment drawerFragment = new DrawerFragment(); private final AlberInputListener inputListener = new AlberInputListener(this); private ConsoleRenderer renderer; @@ -74,6 +86,19 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { ((FrameLayout) findViewById(R.id.panda_gl_frame)).addView(view, new FrameLayout.LayoutParams(ViewGroup.LayoutParams.WRAP_CONTENT, ViewGroup.LayoutParams.WRAP_CONTENT)); } swapScreens(GlobalConfig.get(GlobalConfig.KEY_CURRENT_DS_LAYOUT)); + registerSensors(); + } + + private void registerSensors() { + SensorManager sensorManager = (SensorManager) getSystemService(SENSOR_SERVICE); + Sensor accel = sensorManager.getDefaultSensor(Sensor.TYPE_ACCELEROMETER); + if (accel != null) { + sensorManager.registerListener(this, accel, 1); + } + Sensor gryro = sensorManager.getDefaultSensor(Sensor.TYPE_GYROSCOPE); + if (gryro != null) { + sensorManager.registerListener(this, gryro, 1); + } } private void changeOverlayVisibility(boolean visible) { @@ -85,7 +110,7 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { @Override protected void onResume() { super.onResume(); - getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON); + getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON); getWindow().getDecorView().setSystemUiVisibility(View.SYSTEM_UI_FLAG_FULLSCREEN | View.SYSTEM_UI_FLAG_HIDE_NAVIGATION); getWindow().addFlags(WindowManager.LayoutParams.FLAG_FULLSCREEN); InputHandler.reset(); @@ -94,6 +119,7 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O_MR1) { getTheme().applyStyle(R.style.GameActivityNavigationBar, true); } + registerSensors(); } private void enablePIP() { @@ -113,6 +139,7 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { protected void onPause() { super.onPause(); + ((SensorManager)getSystemService(SENSOR_SERVICE)).unregisterListener(this); InputHandler.reset(); if (GlobalConfig.get(GlobalConfig.KEY_PICTURE_IN_PICTURE)) { if (Build.VERSION.SDK_INT > Build.VERSION_CODES.O) { @@ -125,6 +152,10 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { @Override public boolean dispatchKeyEvent(KeyEvent event) { + if (event.getKeyCode() == KeyEvent.KEYCODE_VOLUME_UP || event.getKeyCode() == KeyEvent.KEYCODE_VOLUME_DOWN) { + return super.dispatchKeyEvent(event); + } + if ((!drawerFragment.isOpened()) && InputHandler.processKeyEvent(event)) { return true; } @@ -174,10 +205,48 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { @Override protected void onDestroy() { + ((SensorManager)getSystemService(SENSOR_SERVICE)).unregisterListener(this); if (AlberDriver.HasRomLoaded()) { AlberDriver.Finalize(); } super.onDestroy(); } + + private float getDeviceRotationAngle() { + if (getWindow().getDecorView() == null || getWindow().getDecorView().getDisplay() == null) + return 0.0f; + + int rotation = getWindow().getDecorView().getDisplay().getRotation(); + switch (rotation) { + case Surface.ROTATION_90: return 90.0f; + case Surface.ROTATION_180: return 180.0f; + case Surface.ROTATION_270: return -90.0f; + default: return 0.0f; + } + } + + @Override + public void onSensorChanged(SensorEvent event) { + if (AlberDriver.HasRomLoaded()) { + Sensor sensor = event.sensor; + switch (sensor.getType()) { + case Sensor.TYPE_ACCELEROMETER: { + float[] values = event.values; + Vector3 vec3 = new Vector3(values[0], values[1], values[2]); + vec3.rotateByEuler(new Vector3(0, 0, (float) (getDeviceRotationAngle() * (Math.PI / 180.0f)))); + AlberDriver.SetAccel(vec3.x, vec3.y, vec3.z); + } break; + case Sensor.TYPE_GYROSCOPE: { + float[] values = event.values; + Vector3 vec3 = new Vector3(values[0], values[1], values[2]); + vec3.rotateByEuler(new Vector3(0, 0, (float) (getDeviceRotationAngle() * (Math.PI / 180.0f)))); + AlberDriver.SetGyro(vec3.x, vec3.y, vec3.z); + } break; + } + } + } + + @Override + public void onAccuracyChanged(Sensor sensor, int accuracy) {} } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/base/BasePreferenceFragment.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/base/BasePreferenceFragment.java index 9426c098..ae8d49ad 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/base/BasePreferenceFragment.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/base/BasePreferenceFragment.java @@ -26,6 +26,10 @@ public abstract class BasePreferenceFragment extends PreferenceFragmentCompat { ((SwitchPreferenceCompat)findPreference(id)).setChecked(value); } + protected void setSummaryValue(String id,String text) { + findPreference(id).setSummary(text); + } + protected void setActivityTitle(@StringRes int titleId) { ActionBar header = ((AppCompatActivity) requireActivity()).getSupportActionBar(); if (header != null) { diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/AdvancedPreferences.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/AdvancedPreferences.java index 176bab14..13c579dd 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/AdvancedPreferences.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/AdvancedPreferences.java @@ -22,6 +22,8 @@ public class AdvancedPreferences extends BasePreferenceFragment { setItemClick("performanceMonitor", pref -> GlobalConfig.set(GlobalConfig.KEY_SHOW_PERFORMANCE_OVERLAY, ((SwitchPreferenceCompat) pref).isChecked())); setItemClick("shaderJit", pref -> GlobalConfig.set(GlobalConfig.KEY_SHADER_JIT, ((SwitchPreferenceCompat) pref).isChecked())); + setItemClick("accurateShaderMul", pref -> GlobalConfig.set(GlobalConfig.KEY_ACCURATE_SHADER_MULTIPLY, ((SwitchPreferenceCompat) pref).isChecked())); + setItemClick("enableAudio", pref -> GlobalConfig.set(GlobalConfig.KEY_ENABLE_AUDIO, ((SwitchPreferenceCompat) pref).isChecked())); setItemClick("loggerService", pref -> { boolean checked = ((SwitchPreferenceCompat) pref).isChecked(); Context ctx = PandroidApplication.getAppContext(); @@ -46,5 +48,7 @@ public class AdvancedPreferences extends BasePreferenceFragment { ((SwitchPreferenceCompat) findPreference("performanceMonitor")).setChecked(GlobalConfig.get(GlobalConfig.KEY_SHOW_PERFORMANCE_OVERLAY)); ((SwitchPreferenceCompat) findPreference("loggerService")).setChecked(GlobalConfig.get(GlobalConfig.KEY_LOGGER_SERVICE)); ((SwitchPreferenceCompat) findPreference("shaderJit")).setChecked(GlobalConfig.get(GlobalConfig.KEY_SHADER_JIT)); + ((SwitchPreferenceCompat) findPreference("accurateShaderMul")).setChecked(GlobalConfig.get(GlobalConfig.KEY_ACCURATE_SHADER_MULTIPLY)); + ((SwitchPreferenceCompat) findPreference("enableAudio")).setChecked(GlobalConfig.get(GlobalConfig.KEY_ENABLE_AUDIO)); } } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/GeneralPreferences.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/GeneralPreferences.java index 0b003db9..86182c3b 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/GeneralPreferences.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/GeneralPreferences.java @@ -1,7 +1,13 @@ package com.panda3ds.pandroid.app.preferences; +import android.net.Uri; import android.os.Bundle; +import android.util.Log; +import android.widget.Toast; +import androidx.activity.result.ActivityResultCallback; +import androidx.activity.result.ActivityResultLauncher; +import androidx.activity.result.contract.ActivityResultContracts; import androidx.annotation.Nullable; import androidx.preference.SwitchPreferenceCompat; @@ -10,8 +16,11 @@ import com.panda3ds.pandroid.app.PreferenceActivity; import com.panda3ds.pandroid.app.base.BasePreferenceFragment; import com.panda3ds.pandroid.app.preferences.screen_editor.ScreenLayoutsPreference; import com.panda3ds.pandroid.data.config.GlobalConfig; +import com.panda3ds.pandroid.utils.FileUtils; -public class GeneralPreferences extends BasePreferenceFragment { +public class GeneralPreferences extends BasePreferenceFragment implements ActivityResultCallback { + private final ActivityResultContracts.OpenDocument openFolderContract = new ActivityResultContracts.OpenDocument(); + private ActivityResultLauncher pickFileRequest; @Override public void onCreatePreferences(@Nullable Bundle savedInstanceState, @Nullable String rootKey) { setPreferencesFromResource(R.xml.general_preference, rootKey); @@ -21,6 +30,11 @@ public class GeneralPreferences extends BasePreferenceFragment { setItemClick("behavior.pictureInPicture", (pref)-> GlobalConfig.set(GlobalConfig.KEY_PICTURE_IN_PICTURE, ((SwitchPreferenceCompat)pref).isChecked())); setActivityTitle(R.string.general); refresh(); + + setItemClick("games.aes_key", pref -> pickFileRequest.launch(new String[]{ "text/plain" })); + setItemClick("games.seed_db", pref -> pickFileRequest.launch(new String[]{ "application/octet-stream" })); + + pickFileRequest = registerForActivityResult(openFolderContract, this); } @Override @@ -31,5 +45,45 @@ public class GeneralPreferences extends BasePreferenceFragment { private void refresh() { setSwitchValue("behavior.pictureInPicture", GlobalConfig.get(GlobalConfig.KEY_PICTURE_IN_PICTURE)); + setSummaryValue("games.aes_key", String.format(getString(FileUtils.exists(FileUtils.getPrivatePath()+"/sysdata/aes_keys.txt") ? R.string.file_available : R.string.file_not_available), "aes_keys.txt")); + setSummaryValue("games.seed_db", String.format(getString(FileUtils.exists(FileUtils.getPrivatePath()+"/sysdata/seeddb.bin") ? R.string.file_available : R.string.file_not_available), "seeddb.bin")); } + + @Override + public void onDestroy() { + super.onDestroy(); + if (pickFileRequest != null) { + pickFileRequest.unregister(); + pickFileRequest = null; + } + } + + @Override + public void onActivityResult(Uri result) { + if (result != null) { + String path = result.toString(); + Log.w("File", path + " -> " + FileUtils.getName(path)); + switch (String.valueOf(FileUtils.getName(path))) { + case "aes_keys.txt": + case "seeddb.bin": { + String name = FileUtils.getName(path); + if (FileUtils.getLength(path) < 1024 * 256) { + String sysdataFolder = FileUtils.getPrivatePath() + "/sysdata"; + if (!FileUtils.exists(sysdataFolder)) { + FileUtils.createDir(FileUtils.getPrivatePath(), "sysdata"); + } + if (FileUtils.exists(sysdataFolder + "/" + name)) { + FileUtils.delete(sysdataFolder + "/" + name); + } + FileUtils.copyFile(path, FileUtils.getPrivatePath() + "/sysdata/", name); + Toast.makeText(getActivity(), String.format(getString(R.string.file_imported), name), Toast.LENGTH_LONG).show(); + } else { + Toast.makeText(getActivity(), R.string.invalid_file, Toast.LENGTH_LONG).show(); + } + } break; + default: Toast.makeText(getActivity(), R.string.invalid_file, Toast.LENGTH_LONG).show(); break; + } + refresh(); + } + } } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/screen_editor/ScreenEditorPreference.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/screen_editor/ScreenEditorPreference.java index 4bc6e299..14c4e576 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/screen_editor/ScreenEditorPreference.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/screen_editor/ScreenEditorPreference.java @@ -23,7 +23,7 @@ public class ScreenEditorPreference extends Fragment { @Override public View onCreateView(@NonNull LayoutInflater inflater, @Nullable ViewGroup container, @Nullable Bundle savedInstanceState) { layout = new LinearLayout(container.getContext()); - layout.setSystemUiVisibility(View.SYSTEM_UI_FLAG_HIDE_NAVIGATION|View.SYSTEM_UI_FLAG_FULLSCREEN|View.SYSTEM_UI_FLAG_IMMERSIVE); + layout.setSystemUiVisibility(View.SYSTEM_UI_FLAG_FULLSCREEN|View.SYSTEM_UI_FLAG_IMMERSIVE); return layout; } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/provider/AppDataDocumentProvider.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/provider/AppDataDocumentProvider.java index ca6fad90..397eef05 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/provider/AppDataDocumentProvider.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/provider/AppDataDocumentProvider.java @@ -95,7 +95,7 @@ public class AppDataDocumentProvider extends DocumentsProvider { private void includeFile(MatrixCursor cursor, File file) { int flags = 0; if (file.isDirectory()) { - flags = Document.FLAG_DIR_SUPPORTS_CREATE; + flags = Document.FLAG_DIR_SUPPORTS_CREATE | Document.FLAG_SUPPORTS_DELETE; } else { flags = Document.FLAG_SUPPORTS_WRITE | Document.FLAG_SUPPORTS_REMOVE | Document.FLAG_SUPPORTS_DELETE; } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java index 448d561a..e9e32aed 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java @@ -22,6 +22,8 @@ public class GlobalConfig { public static DataModel data; public static final Key KEY_SHADER_JIT = new Key<>("emu.shader_jit", true); + public static final Key KEY_ACCURATE_SHADER_MULTIPLY = new Key<>("emu.accurate_shader_mul", false); + public static final Key KEY_ENABLE_AUDIO = new Key<>("emu.enable_audio", true); public static final Key KEY_PICTURE_IN_PICTURE = new Key<>("app.behavior.pictureInPicture", false); public static final Key KEY_SHOW_PERFORMANCE_OVERLAY = new Key<>("dev.performanceOverlay", false); public static final Key KEY_LOGGER_SERVICE = new Key<>("dev.loggerService", false); diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Quaternion.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Quaternion.java new file mode 100644 index 00000000..7c485c6c --- /dev/null +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Quaternion.java @@ -0,0 +1,31 @@ +package com.panda3ds.pandroid.math; + +public class Quaternion { + public float x, y, z, w; + public Quaternion(float x, float y, float z, float w) { + this.x = x; + this.y = y; + this.z = z; + this.w = w; + } + + public Quaternion fromEuler(Vector3 euler) { + float x = euler.x; + float y = euler.y; + float z = euler.z; + + double c1 = Math.cos(x / 2.0); + double c2 = Math.cos(y / 2.0); + double c3 = Math.cos(z / 2.0); + + double s1 = Math.sin(x / 2.0); + double s2 = Math.sin(y / 2.0); + double s3 = Math.sin(z / 2.0); + + this.x = (float) (s1 * c2 * c3 + c1 * s2 * s3); + this.y = (float) (c1 * s2 * c3 - s1 * c2 * s3); + this.z = (float) (c1 * c2 * s3 + s1 * s2 * c3); + this.w = (float) (c1 * c2 * c3 - s1 * s2 * s3); + return this; + } +} diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Vector3.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Vector3.java new file mode 100644 index 00000000..055972ec --- /dev/null +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Vector3.java @@ -0,0 +1,32 @@ +package com.panda3ds.pandroid.math; + +public class Vector3 { + private final Quaternion quaternion = new Quaternion(0, 0, 0, 0); + public float x, y, z; + + public Vector3(float x, float y, float z) { + this.x = x; + this.y = y; + this.z = z; + } + + public Vector3 rotateByEuler(Vector3 euler) { + this.quaternion.fromEuler(euler); + + float x = this.x, y = this.y, z = this.z; + float qx = this.quaternion.x; + float qy = this.quaternion.y; + float qz = this.quaternion.z; + float qw = this.quaternion.w; + + float ix = qw * x + qy * z - qz * y; + float iy = qw * y + qz * x - qx * z; + float iz = qw * z + qx * y - qy * x; + float iw = -qx * x - qy * qz * z; + + this.x = ix * qw + iw * -qx + iy * -qz - iz * -qy; + this.y = iy * qw + iw * -qy + iz * -qx - ix * -qz; + this.z = iz * qw + iw * -qz + ix * -qy - iy * -qx; + return this; + } +} diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/utils/FileUtils.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/utils/FileUtils.java index 85245454..26b029d9 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/utils/FileUtils.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/utils/FileUtils.java @@ -230,6 +230,10 @@ public class FileUtils { return parseFile(path).lastModified(); } + public static long getLength(String path) { + return parseFile(path).length(); + } + public static String[] listFiles(String path) { DocumentFile folder = parseFile(path); DocumentFile[] files = folder.listFiles(); diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/view/PandaGlRenderer.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/view/PandaGlRenderer.java index c57421ab..dfdad225 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/view/PandaGlRenderer.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/view/PandaGlRenderer.java @@ -93,6 +93,8 @@ public class PandaGlRenderer implements GLSurfaceView.Renderer, ConsoleRenderer AlberDriver.Initialize(); AlberDriver.setShaderJitEnabled(GlobalConfig.get(GlobalConfig.KEY_SHADER_JIT)); + AlberDriver.setAccurateShaderMulEnable(GlobalConfig.get(GlobalConfig.KEY_ACCURATE_SHADER_MULTIPLY)); + AlberDriver.setAudioEnabled(GlobalConfig.get(GlobalConfig.KEY_ENABLE_AUDIO)); // If loading the ROM failed, display an error message and early exit if (!AlberDriver.LoadRom(romPath)) { diff --git a/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml index 6f3b755b..036d09bc 100644 --- a/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml +++ b/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -1,6 +1,5 @@ - - - + + \ No newline at end of file diff --git a/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml index 6f3b755b..036d09bc 100644 --- a/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml +++ b/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml @@ -1,6 +1,5 @@ - - - + + \ No newline at end of file diff --git a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher.webp index c209e78e..61d2084a 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..9cae8387 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp index b2dfe3d1..c3c7e5a2 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher.webp index 4f0f1d64..3f5d5d42 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..3c486012 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp index 62b611da..b11dd67f 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher.webp index 948a3070..eebe375d 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..aef35be5 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp index 1b9a6956..a52a83b3 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp index 28d4b77f..9368d16c 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..95242200 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp index 9287f508..56373697 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp index aa7d6427..408caf75 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..df4f2156 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp index 9126ae37..9c7b64f0 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/values-pt-rBR/strings.xml b/src/pandroid/app/src/main/res/values-pt-rBR/strings.xml index f2e144c3..521f199e 100644 --- a/src/pandroid/app/src/main/res/values-pt-rBR/strings.xml +++ b/src/pandroid/app/src/main/res/values-pt-rBR/strings.xml @@ -90,4 +90,12 @@ Comportamento Jogo invalido Ferramentas + Multiplicação precisa de shader + Usar calculos mais precisos para shaders + Importar chaves + %s disponível + %s não disponível + Importar SeedDB + Arquivo inválido + %s Importado diff --git a/src/pandroid/app/src/main/res/values/ic_launcher_background.xml b/src/pandroid/app/src/main/res/values/ic_launcher_background.xml new file mode 100644 index 00000000..28382334 --- /dev/null +++ b/src/pandroid/app/src/main/res/values/ic_launcher_background.xml @@ -0,0 +1,4 @@ + + + #C45F5F + \ No newline at end of file diff --git a/src/pandroid/app/src/main/res/values/strings.xml b/src/pandroid/app/src/main/res/values/strings.xml index 25569528..b0479ade 100644 --- a/src/pandroid/app/src/main/res/values/strings.xml +++ b/src/pandroid/app/src/main/res/values/strings.xml @@ -59,6 +59,9 @@ Graphics Shader JIT Use shader recompiler. + Audio + Enable Audio + Audio will be enabled Tools Logger Store application logs to file. @@ -96,4 +99,12 @@ Taiwan Behavior Invalid game + Accurate shader multiplication + Can improve rendering at a small performance loss + Import keys + %s imported + %s available + %s not available + Import SeedDB + Invalid file diff --git a/src/pandroid/app/src/main/res/xml/advanced_preferences.xml b/src/pandroid/app/src/main/res/xml/advanced_preferences.xml index 6602fdfd..0f025994 100644 --- a/src/pandroid/app/src/main/res/xml/advanced_preferences.xml +++ b/src/pandroid/app/src/main/res/xml/advanced_preferences.xml @@ -28,5 +28,23 @@ app:summary="@string/pref_shader_jit_summary" app:iconSpaceReserved="false"/> + + - \ No newline at end of file + + + + + + + diff --git a/src/pandroid/app/src/main/res/xml/general_preference.xml b/src/pandroid/app/src/main/res/xml/general_preference.xml index 3e2d93c8..4352ee54 100644 --- a/src/pandroid/app/src/main/res/xml/general_preference.xml +++ b/src/pandroid/app/src/main/res/xml/general_preference.xml @@ -23,6 +23,16 @@ app:title="@string/pref_game_folders" app:summary="@string/pref_game_folders_summary" app:iconSpaceReserved="false"/> + + + +#ifdef _WIN32 +#include +#else +#include +#endif + +#include +#include + +namespace Renderdoc { + enum class CaptureState { + Idle, + Triggered, + InProgress, + }; + + static CaptureState captureState{CaptureState::Idle}; + static bool renderdocLoaded{false}; + + RENDERDOC_API_1_6_0* rdocAPI{}; + + void loadRenderdoc() { +#ifdef WIN32 + // Check if we are running in Renderdoc GUI + HMODULE mod = GetModuleHandleA("renderdoc.dll"); + if (!mod) { + // If enabled in config, try to load RDoc runtime in offline mode + HKEY h_reg_key; + LONG result = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Classes\\RenderDoc.RDCCapture.1\\DefaultIcon\\", 0, KEY_READ, &h_reg_key); + if (result != ERROR_SUCCESS) { + return; + } + std::array keyString{}; + DWORD stringSize{keyString.size()}; + result = RegQueryValueExW(h_reg_key, L"", 0, NULL, (LPBYTE)keyString.data(), &stringSize); + if (result != ERROR_SUCCESS) { + return; + } + + std::filesystem::path path{keyString.cbegin(), keyString.cend()}; + path = path.parent_path().append("renderdoc.dll"); + const auto path_to_lib = path.generic_string(); + mod = LoadLibraryA(path_to_lib.c_str()); + } + + if (mod) { + const auto RENDERDOC_GetAPI = reinterpret_cast(GetProcAddress(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdocAPI); + + if (ret != 1) { + Helpers::panic("Invalid return value from RENDERDOC_GetAPI"); + } + } +#else +#ifdef ANDROID + static constexpr const char RENDERDOC_LIB[] = "libVkLayer_GLES_RenderDoc.so"; +#else + static constexpr const char RENDERDOC_LIB[] = "librenderdoc.so"; +#endif + if (void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD)) { + const auto RENDERDOC_GetAPI = reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdocAPI); + + if (ret != 1) { + Helpers::panic("Invalid return value from RENDERDOC_GetAPI"); + } + } +#endif + if (rdocAPI) { + renderdocLoaded = true; + + // Disable default capture keys as they suppose to trigger present-to-present capturing + // and it is not what we want + rdocAPI->SetCaptureKeys(nullptr, 0); + + // Also remove rdoc crash handler + rdocAPI->UnloadCrashHandler(); + } + } + + void startCapture() { + if (!rdocAPI) { + return; + } + + if (captureState == CaptureState::Triggered) { + rdocAPI->StartFrameCapture(nullptr, nullptr); + captureState = CaptureState::InProgress; + } + } + + void endCapture() { + if (!rdocAPI) { + return; + } + + if (captureState == CaptureState::InProgress) { + rdocAPI->EndFrameCapture(nullptr, nullptr); + captureState = CaptureState::Idle; + } + } + + void triggerCapture() { + if (captureState == CaptureState::Idle) { + captureState = CaptureState::Triggered; + } + } + + void setOutputDir(const std::string& path, const std::string& prefix) { + if (rdocAPI) { + rdocAPI->SetCaptureFilePathTemplate((path + '\\' + prefix).c_str()); + } + } + + bool isLoaded() { return renderdocLoaded; } +} // namespace Renderdoc +#endif \ No newline at end of file diff --git a/src/renderer.cpp b/src/renderer.cpp index 76c3e7a0..6a18df85 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -17,6 +17,7 @@ std::optional Renderer::typeFromString(std::string inString) { {"null", RendererType::Null}, {"nil", RendererType::Null}, {"none", RendererType::Null}, {"gl", RendererType::OpenGL}, {"ogl", RendererType::OpenGL}, {"opengl", RendererType::OpenGL}, {"vk", RendererType::Vulkan}, {"vulkan", RendererType::Vulkan}, {"vulcan", RendererType::Vulkan}, + {"mtl", RendererType::Metal}, {"metal", RendererType::Metal}, {"sw", RendererType::Software}, {"soft", RendererType::Software}, {"software", RendererType::Software}, {"softrast", RendererType::Software}, }; @@ -33,7 +34,8 @@ const char* Renderer::typeToString(RendererType rendererType) { case RendererType::Null: return "null"; case RendererType::OpenGL: return "opengl"; case RendererType::Vulkan: return "vulkan"; + case RendererType::Metal: return "metal"; case RendererType::Software: return "software"; default: return "Invalid"; } -} \ No newline at end of file +} diff --git a/tests/PICA_LITP/Makefile b/tests/PICA_LITP/Makefile new file mode 100644 index 00000000..46a94048 --- /dev/null +++ b/tests/PICA_LITP/Makefile @@ -0,0 +1,255 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITARM)/3ds_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# GRAPHICS is a list of directories containing graphics files +# GFXBUILD is the directory where converted graphics files will be placed +# If set to $(BUILD), it will statically link in the converted +# files as if they were data files. +# +# NO_SMDH: if set to anything, no SMDH file is generated. +# ROMFS is the directory which contains the RomFS, relative to the Makefile (Optional) +# APP_TITLE is the name of the app stored in the SMDH file (Optional) +# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) +# APP_AUTHOR is the author of the app stored in the SMDH file (Optional) +# ICON is the filename of the icon (.png), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .png +# - icon.png +# - /default_icon.png +#--------------------------------------------------------------------------------- +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source +DATA := data +INCLUDES := include +GRAPHICS := gfx +GFXBUILD := $(BUILD) +#ROMFS := romfs +#GFXBUILD := $(ROMFS)/gfx + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft + +CFLAGS := -g -Wall -O2 -mword-relocations \ + -ffunction-sections \ + $(ARCH) + +CFLAGS += $(INCLUDE) -D__3DS__ + +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11 + +ASFLAGS := -g $(ARCH) +LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) + +LIBS := -lcitro3d -lctru -lm + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(CTRULIB) + + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) +export TOPDIR := $(CURDIR) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(GRAPHICS),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.v.pica))) +SHLISTFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.shlist))) +GFXFILES := $(foreach dir,$(GRAPHICS),$(notdir $(wildcard $(dir)/*.t3s))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +#--------------------------------------------------------------------------------- +ifeq ($(GFXBUILD),$(BUILD)) +#--------------------------------------------------------------------------------- +export T3XFILES := $(GFXFILES:.t3s=.t3x) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- +export ROMFS_T3XFILES := $(patsubst %.t3s, $(GFXBUILD)/%.t3x, $(GFXFILES)) +export T3XHFILES := $(patsubst %.t3s, $(BUILD)/%.h, $(GFXFILES)) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES_SOURCES := $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) + +export OFILES_BIN := $(addsuffix .o,$(BINFILES)) \ + $(PICAFILES:.v.pica=.shbin.o) $(SHLISTFILES:.shlist=.shbin.o) \ + $(addsuffix .o,$(T3XFILES)) + +export OFILES := $(OFILES_BIN) $(OFILES_SOURCES) + +export HFILES := $(PICAFILES:.v.pica=_shbin.h) $(SHLISTFILES:.shlist=_shbin.h) \ + $(addsuffix .h,$(subst .,_,$(BINFILES))) \ + $(GFXFILES:.t3s=.h) + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +export _3DSXDEPS := $(if $(NO_SMDH),,$(OUTPUT).smdh) + +ifeq ($(strip $(ICON)),) + icons := $(wildcard *.png) + ifneq (,$(findstring $(TARGET).png,$(icons))) + export APP_ICON := $(TOPDIR)/$(TARGET).png + else + ifneq (,$(findstring icon.png,$(icons))) + export APP_ICON := $(TOPDIR)/icon.png + endif + endif +else + export APP_ICON := $(TOPDIR)/$(ICON) +endif + +ifeq ($(strip $(NO_SMDH)),) + export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh +endif + +ifneq ($(ROMFS),) + export _3DSXFLAGS += --romfs=$(CURDIR)/$(ROMFS) +endif + +.PHONY: all clean + +#--------------------------------------------------------------------------------- +all: $(BUILD) $(GFXBUILD) $(DEPSDIR) $(ROMFS_T3XFILES) $(T3XHFILES) + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +$(BUILD): + @mkdir -p $@ + +ifneq ($(GFXBUILD),$(BUILD)) +$(GFXBUILD): + @mkdir -p $@ +endif + +ifneq ($(DEPSDIR),$(BUILD)) +$(DEPSDIR): + @mkdir -p $@ +endif + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... + @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf $(GFXBUILD) + +#--------------------------------------------------------------------------------- +$(GFXBUILD)/%.t3x $(BUILD)/%.h : %.t3s +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @tex3ds -i $< -H $(BUILD)/$*.h -d $(DEPSDIR)/$*.d -o $(GFXBUILD)/$*.t3x + +#--------------------------------------------------------------------------------- +else + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +$(OUTPUT).3dsx : $(OUTPUT).elf $(_3DSXDEPS) + +$(OFILES_SOURCES) : $(HFILES) + +$(OUTPUT).elf : $(OFILES) + +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o %_bin.h : %.bin +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +.PRECIOUS : %.t3x +#--------------------------------------------------------------------------------- +%.t3x.o %_t3x.h : %.t3x +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +# rules for assembling GPU shaders +#--------------------------------------------------------------------------------- +define shader-as + $(eval CURBIN := $*.shbin) + $(eval DEPSFILE := $(DEPSDIR)/$*.shbin.d) + echo "$(CURBIN).o: $< $1" > $(DEPSFILE) + echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h + picasso -o $(CURBIN) $1 + bin2s $(CURBIN) | $(AS) -o $*.shbin.o +endef + +%.shbin.o %_shbin.h : %.v.pica %.g.pica + @echo $(notdir $^) + @$(call shader-as,$^) + +%.shbin.o %_shbin.h : %.v.pica + @echo $(notdir $<) + @$(call shader-as,$<) + +%.shbin.o %_shbin.h : %.shlist + @echo $(notdir $<) + @$(call shader-as,$(foreach file,$(shell cat $<),$(dir $<)$(file))) + +#--------------------------------------------------------------------------------- +%.t3x %.h : %.t3s +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @tex3ds -i $< -H $*.h -d $*.d -o $*.t3x + +-include $(DEPSDIR)/*.d + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/tests/PICA_LITP/source/main.c b/tests/PICA_LITP/source/main.c new file mode 100644 index 00000000..9bcab5b9 --- /dev/null +++ b/tests/PICA_LITP/source/main.c @@ -0,0 +1,123 @@ +#include <3ds.h> +#include +#include + +#include "vshader_shbin.h" + + +#define CLEAR_COLOR 0x68B0D8FF + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | \ + GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static DVLB_s* vshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection; +static C3D_Mtx projection; + +static void sceneInit(void) { + // Load the vertex shader, create a shader program and bind it + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + C3D_BindProgram(&program); + + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + + // Configure attributes for use with the vertex shader + // Attribute format and element count are ignored in immediate mode + C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); + AttrInfo_Init(attrInfo); + AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position + AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color + + // Compute the projection matrix + Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); + + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + C3D_TexEnv* env = C3D_GetTexEnv(0); + C3D_TexEnvInit(env); + C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); + C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); +} + +static void sceneRender(void) { + // Update the uniforms + C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); + + // Draw the triangle directly + C3D_ImmDrawBegin(GPU_TRIANGLES); + // Triangle 1 + // This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1) + C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color + + // This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1) + C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f); + + // This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0) + C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f); + + // Triangle 2 + // The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp with alpha set to 1 (cmp.x = cmp.y = 0) + C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 64 / 128 = 0.5 + C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f); + + C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 128 / 128 = 1.0 + C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f); + + C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f); + // Output g component should be 0 / 128 = 0 + C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f); + C3D_ImmDrawEnd(); +} + +static void sceneExit(void) { + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); +} + +int main() { + // Initialize graphics + gfxInitDefault(); + C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); + + // Initialize the render target + C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); + C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + + // Initialize the scene + sceneInit(); + + // Main loop + while (aptMainLoop()) { + hidScanInput(); + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) break; // break in order to return to hbmenu + + // Render the scene + C3D_FrameBegin(C3D_FRAME_SYNCDRAW); + C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); + C3D_FrameDrawOn(target); + sceneRender(); + C3D_FrameEnd(0); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + C3D_Fini(); + gfxExit(); + return 0; +} \ No newline at end of file diff --git a/tests/PICA_LITP/source/vshader.v.pica b/tests/PICA_LITP/source/vshader.v.pica new file mode 100644 index 00000000..d745f939 --- /dev/null +++ b/tests/PICA_LITP/source/vshader.v.pica @@ -0,0 +1,73 @@ +; Example PICA200 vertex shader + +; Uniforms +.fvec projection[4] + +; Constants +.constf myconst(0.0, 1.0, -1.0, 0.1) +.constf myconst2(0.3, 0.0, 0.0, 0.0) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +.constf magenta(0.8, 0.192, 0.812, 1.0) +.constf cyan(0.137, 0.949, 0.906, 1.0) +.constf lime(0.286, 0.929, 0.412, 1.0) + +.constf normalize_y(1.0, 1.0/128.0, 1.0, 1.0) + +; Outputs +.out outpos position +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 +.alias inclr v1 + +.bool test + +.proc main + ; Force the w component of inpos to be 1.0 + mov r0.xyz, inpos + mov r0.w, ones + + ; outpos = projectionMatrix * inpos + dp4 outpos.x, projection[0], r0 + dp4 outpos.y, projection[1], r0 + dp4 outpos.z, projection[2], r0 + dp4 outpos.w, projection[3], r0 + + ; Test litp via the output fragment colour + ; r1 = input colour + mov r1, inclr + + ; This should perform the following operation: + ; cmp = (x >= 0, w >= 0) + ; dest = ( max(x, 0), clamp(y, -128, +128 ), 0, max(w, 0) ); + litp r2, r1 + + ifc cmp.x + ifc cmp.y + ; cmp.x = 1, cmp.y = 1, write magenta + mov outclr, magenta + end + .else + ; cmp.x = 1, cmp.y = 0, write cyan + mov outclr, cyan + end + .end + .else + ifc cmp.y + ; cmp.x = 0, cmp.y + mov outclr, lime + end + .end + .end + + ; cmp.x 0, cmp.y = 0, write output of litp to out colour, with y normalized to [-1, 1] + mul r2.xyz, normalize_y, r2 + ; Set alpha to one + mov r2.a, ones.a + + mov outclr, r2 + end +.end \ No newline at end of file diff --git a/tests/shader.cpp b/tests/shader.cpp index 6b0dece8..f5e70d87 100644 --- a/tests/shader.cpp +++ b/tests/shader.cpp @@ -32,7 +32,6 @@ static std::unique_ptr assembleVertexShader(std::initializer_listuploadDescriptor(swizzle.hex); } - newShader->finalize(); return newShader; } @@ -85,7 +84,11 @@ class ShaderJITTest final : public ShaderInterpreterTest { private: ShaderJIT shaderJit = {}; - void runShader() override { shaderJit.run(*shader); } + void runShader() override { + // We prefer to run tests with accurate NaN emulation + shaderJit.setAccurateMul(true); + shaderJit.run(*shader); + } public: explicit ShaderJITTest(std::initializer_list code) : ShaderInterpreterTest(code) { shaderJit.prepare(*shader); } @@ -364,4 +367,4 @@ SHADER_TEST_CASE("Address Register Offset", "[video_core][shader][shader_jit]") REQUIRE(shader->runVector({-73.f}) == floatUniforms[95]); REQUIRE(shader->runVector({-127.f}) == floatUniforms[41]); REQUIRE(shader->runVector({-129.f}) == floatUniforms[40]); -} \ No newline at end of file +} diff --git a/third_party/LuaJIT b/third_party/LuaJIT index 41edf095..8bf7686d 160000 --- a/third_party/LuaJIT +++ b/third_party/LuaJIT @@ -1 +1 @@ -Subproject commit 41edf0959b9504d36dd85f5f16893c004ea7d7ba +Subproject commit 8bf7686d820f868eae1a522c481fee09c18c90b9 diff --git a/third_party/cryptopp/CMakeLists.txt b/third_party/cryptopp/CMakeLists.txt index 9c410050..aa915e3f 100644 --- a/third_party/cryptopp/CMakeLists.txt +++ b/third_party/cryptopp/CMakeLists.txt @@ -209,7 +209,10 @@ function(DumpMachine output pattern) set(${output} 0 PARENT_SCOPE) else () - if(CMAKE_SYSTEM_PROCESSOR MATCHES ${pattern}) + if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "" AND CMAKE_SYSTEM_PROCESSOR MATCHES ${pattern}) + set(${output} TRUE PARENT_SCOPE) + endif() + if(CMAKE_OSX_ARCHITECTURES MATCHES ${pattern}) set(${output} TRUE PARENT_SCOPE) endif() endif() diff --git a/third_party/cryptoppwin b/third_party/cryptoppwin new file mode 160000 index 00000000..bc3441dd --- /dev/null +++ b/third_party/cryptoppwin @@ -0,0 +1 @@ +Subproject commit bc3441dd2d6a9728e747dc0180bc8b9065a2923c diff --git a/third_party/duckstation/gl/stream_buffer.cpp b/third_party/duckstation/gl/stream_buffer.cpp new file mode 100644 index 00000000..b7a40603 --- /dev/null +++ b/third_party/duckstation/gl/stream_buffer.cpp @@ -0,0 +1,288 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "gl/stream_buffer.h" + +#include +#include + +#include "align.hpp" + +OpenGLStreamBuffer::OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : m_target(target), m_buffer_id(buffer_id), m_size(size) {} +OpenGLStreamBuffer::~OpenGLStreamBuffer() { glDeleteBuffers(1, &m_buffer_id); } + +void OpenGLStreamBuffer::Bind() { glBindBuffer(m_target, m_buffer_id); } +void OpenGLStreamBuffer::Unbind() { glBindBuffer(m_target, 0); } + +void OpenGLStreamBuffer::SetDebugName(std::string_view name) { +#ifdef GPU_DEBUG_INFO + if (glObjectLabel) { + glObjectLabel(GL_BUFFER, GetGLBufferId(), static_cast(name.length()), static_cast(name.data())); + } +#endif +} + +namespace { + // Uses glBufferSubData() to update. Preferred for drivers which don't support {ARB,EXT}_buffer_storage. + class BufferSubDataStreamBuffer final : public OpenGLStreamBuffer { + public: + ~BufferSubDataStreamBuffer() override { Common::alignedFree(m_cpu_buffer); } + + MappingResult Map(u32 alignment, u32 min_size) override { return MappingResult{static_cast(m_cpu_buffer), 0, 0, m_size / alignment}; } + + u32 Unmap(u32 used_size) override { + if (used_size == 0) return 0; + + glBindBuffer(m_target, m_buffer_id); + glBufferSubData(m_target, 0, used_size, m_cpu_buffer); + return 0; + } + + u32 GetChunkSize() const override { return m_size; } + + static std::unique_ptr Create(GLenum target, u32 size) { + glGetError(); + + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(target, buffer_id); + glBufferData(target, size, nullptr, GL_STREAM_DRAW); + + GLenum err = glGetError(); + if (err != GL_NO_ERROR) { + glBindBuffer(target, 0); + glDeleteBuffers(1, &buffer_id); + return {}; + } + + return std::unique_ptr(new BufferSubDataStreamBuffer(target, buffer_id, size)); + } + + private: + BufferSubDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) { + m_cpu_buffer = static_cast(Common::alignedMalloc(size, 32)); + if (!m_cpu_buffer) Panic("Failed to allocate CPU storage for GL buffer"); + } + + u8* m_cpu_buffer; + }; + + // Uses BufferData() to orphan the buffer after every update. Used on Mali where BufferSubData forces a sync. + class BufferDataStreamBuffer final : public OpenGLStreamBuffer { + public: + ~BufferDataStreamBuffer() override { Common::alignedFree(m_cpu_buffer); } + + MappingResult Map(u32 alignment, u32 min_size) override { return MappingResult{static_cast(m_cpu_buffer), 0, 0, m_size / alignment}; } + + u32 Unmap(u32 used_size) override { + if (used_size == 0) return 0; + + glBindBuffer(m_target, m_buffer_id); + glBufferData(m_target, used_size, m_cpu_buffer, GL_STREAM_DRAW); + return 0; + } + + u32 GetChunkSize() const override { return m_size; } + + static std::unique_ptr Create(GLenum target, u32 size) { + glGetError(); + + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(target, buffer_id); + glBufferData(target, size, nullptr, GL_STREAM_DRAW); + + GLenum err = glGetError(); + if (err != GL_NO_ERROR) { + glBindBuffer(target, 0); + glDeleteBuffers(1, &buffer_id); + return {}; + } + + return std::unique_ptr(new BufferDataStreamBuffer(target, buffer_id, size)); + } + + private: + BufferDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) { + m_cpu_buffer = static_cast(Common::alignedMalloc(size, 32)); + if (!m_cpu_buffer) Panic("Failed to allocate CPU storage for GL buffer"); + } + + u8* m_cpu_buffer; + }; + + // Base class for implementations which require syncing. + class SyncingStreamBuffer : public OpenGLStreamBuffer { + public: + enum : u32 { NUM_SYNC_POINTS = 16 }; + + virtual ~SyncingStreamBuffer() override { + for (u32 i = m_available_block_index; i <= m_used_block_index; i++) { + glDeleteSync(m_sync_objects[i]); + } + } + + protected: + SyncingStreamBuffer(GLenum target, GLuint buffer_id, u32 size) + : OpenGLStreamBuffer(target, buffer_id, size), m_bytes_per_block((size + (NUM_SYNC_POINTS)-1) / NUM_SYNC_POINTS) {} + + ALWAYS_INLINE u32 GetSyncIndexForOffset(u32 offset) { return offset / m_bytes_per_block; } + + ALWAYS_INLINE void AddSyncsForOffset(u32 offset) { + const u32 end = GetSyncIndexForOffset(offset); + for (; m_used_block_index < end; m_used_block_index++) { + if (m_sync_objects[m_used_block_index]) { + Helpers::warn("GL stream buffer: Fence slot we're trying to insert is already in use"); + } + + m_sync_objects[m_used_block_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + } + + ALWAYS_INLINE void WaitForSync(GLsync& sync) { + glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(sync); + sync = nullptr; + } + + ALWAYS_INLINE void EnsureSyncsWaitedForOffset(u32 offset) { + const u32 end = std::min(GetSyncIndexForOffset(offset) + 1, NUM_SYNC_POINTS); + for (; m_available_block_index < end; m_available_block_index++) { + if (!m_sync_objects[m_available_block_index]) [[unlikely]] { + Helpers::warn("GL stream buffer: Fence slot we're trying to wait on is not in use"); + } + + WaitForSync(m_sync_objects[m_available_block_index]); + } + } + + void AllocateSpace(u32 size) { + // add sync objects for writes since the last allocation + AddSyncsForOffset(m_position); + + // wait for sync objects for the space we want to use + EnsureSyncsWaitedForOffset(m_position + size); + + // wrap-around? + if ((m_position + size) > m_size) { + // current position ... buffer end + AddSyncsForOffset(m_size); + + // rewind, and try again + m_position = 0; + + // wait for the sync at the start of the buffer + WaitForSync(m_sync_objects[0]); + m_available_block_index = 1; + + // and however much more we need to satisfy the allocation + EnsureSyncsWaitedForOffset(size); + m_used_block_index = 0; + } + } + + u32 GetChunkSize() const override { return m_size / NUM_SYNC_POINTS; } + + u32 m_position = 0; + u32 m_used_block_index = 0; + u32 m_available_block_index = NUM_SYNC_POINTS; + u32 m_bytes_per_block; + std::array m_sync_objects{}; + }; + + class BufferStorageStreamBuffer : public SyncingStreamBuffer { + public: + ~BufferStorageStreamBuffer() override { + glBindBuffer(m_target, m_buffer_id); + glUnmapBuffer(m_target); + glBindBuffer(m_target, 0); + } + + MappingResult Map(u32 alignment, u32 min_size) override { + if (m_position > 0) m_position = Common::alignUp(m_position, alignment); + + AllocateSpace(min_size); + if ((m_position + min_size) > (m_available_block_index * m_bytes_per_block)) [[unlikely]] { + Helpers::panic("GL stream buffer: Invalid size passed to Unmap"); + } + + const u32 free_space_in_block = ((m_available_block_index * m_bytes_per_block) - m_position); + return MappingResult{static_cast(m_mapped_ptr + m_position), m_position, m_position / alignment, free_space_in_block / alignment}; + } + + u32 Unmap(u32 used_size) override { + if ((m_position + used_size) > m_size) [[unlikely]] { + Helpers::panic("GL stream buffer: Invalid size passed to Unmap"); + } + + if (!m_coherent) { + if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_direct_state_access) { + glFlushMappedNamedBufferRange(m_buffer_id, m_position, used_size); + } else { + Bind(); + glFlushMappedBufferRange(m_target, m_position, used_size); + } + } + + const u32 prev_position = m_position; + m_position += used_size; + return prev_position; + } + + static std::unique_ptr Create(GLenum target, u32 size, bool coherent = true) { + glGetError(); + + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(target, buffer_id); + + const u32 flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); + const u32 map_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT); + if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage) + glBufferStorage(target, size, nullptr, flags); + else if (GLAD_GL_EXT_buffer_storage) + glBufferStorageEXT(target, size, nullptr, flags); + + GLenum err = glGetError(); + if (err != GL_NO_ERROR) { + glBindBuffer(target, 0); + glDeleteBuffers(1, &buffer_id); + return {}; + } + + u8* mapped_ptr = static_cast(glMapBufferRange(target, 0, size, map_flags)); + AssertMsg(mapped_ptr, "Persistent buffer was mapped"); + + return std::unique_ptr(new BufferStorageStreamBuffer(target, buffer_id, size, mapped_ptr, coherent)); + } + + private: + BufferStorageStreamBuffer(GLenum target, GLuint buffer_id, u32 size, u8* mapped_ptr, bool coherent) + : SyncingStreamBuffer(target, buffer_id, size), m_mapped_ptr(mapped_ptr), m_coherent(coherent) {} + + u8* m_mapped_ptr; + bool m_coherent; + }; + +} // namespace + +std::unique_ptr OpenGLStreamBuffer::Create(GLenum target, u32 size) { + std::unique_ptr buf; + if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) { + buf = BufferStorageStreamBuffer::Create(target, size); + if (buf) return buf; + } + + // BufferSubData is slower on all drivers except NVIDIA... +#if 0 + const char* vendor = reinterpret_cast(glGetString(GL_VENDOR)); + if (std::strcmp(vendor, "ARM") == 0 || std::strcmp(vendor, "Qualcomm") == 0) { + // Mali and Adreno drivers can't do sub-buffer tracking... + return BufferDataStreamBuffer::Create(target, size); + } + + return BufferSubDataStreamBuffer::Create(target, size); +#else + return BufferDataStreamBuffer::Create(target, size); +#endif +} \ No newline at end of file diff --git a/third_party/duckstation/gl/stream_buffer.h b/third_party/duckstation/gl/stream_buffer.h new file mode 100644 index 00000000..6b3562e7 --- /dev/null +++ b/third_party/duckstation/gl/stream_buffer.h @@ -0,0 +1,53 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include +// Comment to avoid clang-format reordering the glad header + +#include +#include +#include +#include + +#include "duckstation_compat.h" +#include "helpers.hpp" + +class OpenGLStreamBuffer { + public: + virtual ~OpenGLStreamBuffer(); + + ALWAYS_INLINE GLuint GetGLBufferId() const { return m_buffer_id; } + ALWAYS_INLINE GLenum GetGLTarget() const { return m_target; } + ALWAYS_INLINE u32 GetSize() const { return m_size; } + + void Bind(); + void Unbind(); + + void SetDebugName(std::string_view name); + + struct MappingResult { + void* pointer; + u32 buffer_offset; + u32 index_aligned; // offset / alignment, suitable for base vertex + u32 space_aligned; // remaining space / alignment + }; + + virtual MappingResult Map(u32 alignment, u32 min_size) = 0; + + /// Returns the position in the buffer *before* the start of used_size. + virtual u32 Unmap(u32 used_size) = 0; + + /// Returns the minimum granularity of blocks which sync objects will be created around. + virtual u32 GetChunkSize() const = 0; + + static std::unique_ptr Create(GLenum target, u32 size); + + protected: + OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size); + + GLenum m_target; + GLuint m_buffer_id; + u32 m_size; +}; \ No newline at end of file diff --git a/third_party/fdk-aac b/third_party/fdk-aac new file mode 160000 index 00000000..5559136b --- /dev/null +++ b/third_party/fdk-aac @@ -0,0 +1 @@ +Subproject commit 5559136bb53ce38f6f07dac4f47674dd4f032d03 diff --git a/third_party/fmt b/third_party/fmt new file mode 160000 index 00000000..f8581bce --- /dev/null +++ b/third_party/fmt @@ -0,0 +1 @@ +Subproject commit f8581bcecf317e8753887b68187c9ef1ba0524f4 diff --git a/third_party/metal-cpp b/third_party/metal-cpp new file mode 160000 index 00000000..a63bd172 --- /dev/null +++ b/third_party/metal-cpp @@ -0,0 +1 @@ +Subproject commit a63bd172ddcba73a3d87ca32032b66ad41ddb9a6 diff --git a/third_party/oaknut b/third_party/oaknut index 94c726ce..790374d7 160000 --- a/third_party/oaknut +++ b/third_party/oaknut @@ -1 +1 @@ -Subproject commit 94c726ce0338b054eb8cb5ea91de8fe6c19f4392 +Subproject commit 790374d7e66257b1f8ed89d798e5dcfb5363af05 diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index 9997e63b..607815fa 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -355,76 +355,117 @@ namespace OpenGL { } }; - enum ShaderType { - Fragment = GL_FRAGMENT_SHADER, - Vertex = GL_VERTEX_SHADER, - Geometry = GL_GEOMETRY_SHADER, - Compute = GL_COMPUTE_SHADER, - TessControl = GL_TESS_CONTROL_SHADER, - TessEvaluation = GL_TESS_EVALUATION_SHADER - }; + enum ShaderType { + Fragment = GL_FRAGMENT_SHADER, + Vertex = GL_VERTEX_SHADER, + Geometry = GL_GEOMETRY_SHADER, + Compute = GL_COMPUTE_SHADER, + TessControl = GL_TESS_CONTROL_SHADER, + TessEvaluation = GL_TESS_EVALUATION_SHADER + }; - struct Shader { - GLuint m_handle = 0; + struct Shader { + GLuint m_handle = 0; - Shader() {} - Shader(const std::string_view source, ShaderType type) { create(source, static_cast(type)); } + Shader() {} + Shader(const std::string_view source, ShaderType type) { create(source, static_cast(type)); } - // Returns whether compilation failed or not - bool create(const std::string_view source, GLenum type) { - m_handle = glCreateShader(type); - const GLchar* const sources[1] = { source.data() }; + // Returns whether compilation failed or not + bool create(const std::string_view source, GLenum type) { + m_handle = glCreateShader(type); + const GLchar* const sources[1] = {source.data()}; - glShaderSource(m_handle, 1, sources, nullptr); - glCompileShader(m_handle); + glShaderSource(m_handle, 1, sources, nullptr); + glCompileShader(m_handle); - GLint success; - glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success); - if (success == GL_FALSE) { - char buf[4096]; - glGetShaderInfoLog(m_handle, 4096, nullptr, buf); - fprintf(stderr, "Failed to compile shader\nError: %s\n", buf); - glDeleteShader(m_handle); + GLint success; + glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success); + if (success == GL_FALSE) { + char buf[4096]; + glGetShaderInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to compile shader\nError: %s\n", buf); + glDeleteShader(m_handle); - m_handle = 0; - } + m_handle = 0; + } - return m_handle != 0; - } + return m_handle != 0; + } - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - }; + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + + void free() { + if (exists()) { + glDeleteShader(m_handle); + m_handle = 0; + } + } + +#ifdef OPENGL_DESTRUCTORS + ~Shader() { free(); } +#endif + }; struct Program { - GLuint m_handle = 0; + GLuint m_handle = 0; - bool create(std::initializer_list> shaders) { - m_handle = glCreateProgram(); - for (const auto& shader : shaders) { - glAttachShader(m_handle, shader.get().handle()); - } + bool create(std::initializer_list> shaders) { + m_handle = glCreateProgram(); + for (const auto& shader : shaders) { + glAttachShader(m_handle, shader.get().handle()); + } - glLinkProgram(m_handle); - GLint success; - glGetProgramiv(m_handle, GL_LINK_STATUS, &success); + glLinkProgram(m_handle); + GLint success; + glGetProgramiv(m_handle, GL_LINK_STATUS, &success); - if (!success) { - char buf[4096]; - glGetProgramInfoLog(m_handle, 4096, nullptr, buf); - fprintf(stderr, "Failed to link program\nError: %s\n", buf); - glDeleteProgram(m_handle); + if (!success) { + char buf[4096]; + glGetProgramInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to link program\nError: %s\n", buf); + glDeleteProgram(m_handle); - m_handle = 0; - } + m_handle = 0; + } - return m_handle != 0; - } + return m_handle != 0; + } - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - void use() const { glUseProgram(m_handle); } - }; + bool createFromBinary(const uint8_t* binary, size_t size, GLenum format) { + m_handle = glCreateProgram(); + glProgramBinary(m_handle, format, binary, size); + + GLint success; + glGetProgramiv(m_handle, GL_LINK_STATUS, &success); + + if (!success) { + char buf[4096]; + glGetProgramInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to link program\nError: %s\n", buf); + glDeleteProgram(m_handle); + + m_handle = 0; + } + + return m_handle != 0; + } + + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void use() const { glUseProgram(m_handle); } + + void free() { + if (exists()) { + glDeleteProgram(m_handle); + m_handle = 0; + } + } + +#ifdef OPENGL_DESTRUCTORS + ~Program() { free(); } +#endif + }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { glDispatchCompute(groupsX, groupsY, groupsZ); diff --git a/third_party/renderdoc/renderdoc_app.h b/third_party/renderdoc/renderdoc_app.h new file mode 100644 index 00000000..e73f1c90 --- /dev/null +++ b/third_party/renderdoc/renderdoc_app.h @@ -0,0 +1,721 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html +// + +#if !defined(RENDERDOC_NO_STDINT) +#include +#endif + +#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) +#define RENDERDOC_CC __cdecl +#elif defined(__linux__) || defined(__FreeBSD__) +#define RENDERDOC_CC +#elif defined(__APPLE__) +#define RENDERDOC_CC +#else +#error "Unknown platform" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////// +// Constants not used directly in below API + +// This is a GUID/magic value used for when applications pass a path where shader debug +// information can be found to match up with a stripped shader. +// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue = +// RENDERDOC_ShaderDebugMagicValue_value +#define RENDERDOC_ShaderDebugMagicValue_struct \ + { 0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff } + +// as an alternative when you want a byte array (assuming x86 endianness): +#define RENDERDOC_ShaderDebugMagicValue_bytearray \ + { 0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff } + +// truncated version when only a uint64_t is available (e.g. Vulkan tags): +#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc capture options +// + +typedef enum RENDERDOC_CaptureOption { + // Allow the application to enable vsync + // + // Default - enabled + // + // 1 - The application can enable or disable vsync at will + // 0 - vsync is force disabled + eRENDERDOC_Option_AllowVSync = 0, + + // Allow the application to enable fullscreen + // + // Default - enabled + // + // 1 - The application can enable or disable fullscreen at will + // 0 - fullscreen is force disabled + eRENDERDOC_Option_AllowFullscreen = 1, + + // Record API debugging events and messages + // + // Default - disabled + // + // 1 - Enable built-in API debugging features and records the results into + // the capture, which is matched up with events on replay + // 0 - no API debugging is forcibly enabled + eRENDERDOC_Option_APIValidation = 2, + eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum + + // Capture CPU callstacks for API events + // + // Default - disabled + // + // 1 - Enables capturing of callstacks + // 0 - no callstacks are captured + eRENDERDOC_Option_CaptureCallstacks = 3, + + // When capturing CPU callstacks, only capture them from actions. + // This option does nothing without the above option being enabled + // + // Default - disabled + // + // 1 - Only captures callstacks for actions. + // Ignored if CaptureCallstacks is disabled + // 0 - Callstacks, if enabled, are captured for every event. + eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4, + eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4, + + // Specify a delay in seconds to wait for a debugger to attach, after + // creating or injecting into a process, before continuing to allow it to run. + // + // 0 indicates no delay, and the process will run immediately after injection + // + // Default - 0 seconds + // + eRENDERDOC_Option_DelayForDebugger = 5, + + // Verify buffer access. This includes checking the memory returned by a Map() call to + // detect any out-of-bounds modification, as well as initialising buffers with undefined contents + // to a marker value to catch use of uninitialised memory. + // + // NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do + // not do the same kind of interception & checking and undefined contents are really undefined. + // + // Default - disabled + // + // 1 - Verify buffer access + // 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in + // RenderDoc. + eRENDERDOC_Option_VerifyBufferAccess = 6, + + // The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites. + // This option now controls the filling of uninitialised buffers with 0xdddddddd which was + // previously always enabled + eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess, + + // Hooks any system API calls that create child processes, and injects + // RenderDoc into them recursively with the same options. + // + // Default - disabled + // + // 1 - Hooks into spawned child processes + // 0 - Child processes are not hooked by RenderDoc + eRENDERDOC_Option_HookIntoChildren = 7, + + // By default RenderDoc only includes resources in the final capture necessary + // for that frame, this allows you to override that behaviour. + // + // Default - disabled + // + // 1 - all live resources at the time of capture are included in the capture + // and available for inspection + // 0 - only the resources referenced by the captured frame are included + eRENDERDOC_Option_RefAllResources = 8, + + // **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or + // getting it will be ignored, to allow compatibility with older versions. + // In v1.1 the option acts as if it's always enabled. + // + // By default RenderDoc skips saving initial states for resources where the + // previous contents don't appear to be used, assuming that writes before + // reads indicate previous contents aren't used. + // + // Default - disabled + // + // 1 - initial contents at the start of each captured frame are saved, even if + // they are later overwritten or cleared before being used. + // 0 - unless a read is detected, initial contents will not be saved and will + // appear as black or empty data. + eRENDERDOC_Option_SaveAllInitials = 9, + + // In APIs that allow for the recording of command lists to be replayed later, + // RenderDoc may choose to not capture command lists before a frame capture is + // triggered, to reduce overheads. This means any command lists recorded once + // and replayed many times will not be available and may cause a failure to + // capture. + // + // NOTE: This is only true for APIs where multithreading is difficult or + // discouraged. Newer APIs like Vulkan and D3D12 will ignore this option + // and always capture all command lists since the API is heavily oriented + // around it and the overheads have been reduced by API design. + // + // 1 - All command lists are captured from the start of the application + // 0 - Command lists are only captured if their recording begins during + // the period when a frame capture is in progress. + eRENDERDOC_Option_CaptureAllCmdLists = 10, + + // Mute API debugging output when the API validation mode option is enabled + // + // Default - enabled + // + // 1 - Mute any API debug messages from being displayed or passed through + // 0 - API debugging is displayed as normal + eRENDERDOC_Option_DebugOutputMute = 11, + + // Option to allow vendor extensions to be used even when they may be + // incompatible with RenderDoc and cause corrupted replays or crashes. + // + // Default - inactive + // + // No values are documented, this option should only be used when absolutely + // necessary as directed by a RenderDoc developer. + eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12, + + // Define a soft memory limit which some APIs may aim to keep overhead under where + // possible. Anything above this limit will where possible be saved directly to disk during + // capture. + // This will cause increased disk space use (which may cause a capture to fail if disk space is + // exhausted) as well as slower capture times. + // + // Not all memory allocations may be deferred like this so it is not a guarantee of a memory + // limit. + // + // Units are in MBs, suggested values would range from 200MB to 1000MB. + // + // Default - 0 Megabytes + eRENDERDOC_Option_SoftMemoryLimit = 13, +} RENDERDOC_CaptureOption; + +// Sets an option that controls how RenderDoc behaves on capture. +// +// Returns 1 if the option and value are valid +// Returns 0 if either is invalid and the option is unchanged +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val); +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val); + +// Gets the current value of an option as a uint32_t +// +// If the option is invalid, 0xffffffff is returned +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt); + +// Gets the current value of an option as a float +// +// If the option is invalid, -FLT_MAX is returned +typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt); + +typedef enum RENDERDOC_InputButton { + // '0' - '9' matches ASCII values + eRENDERDOC_Key_0 = 0x30, + eRENDERDOC_Key_1 = 0x31, + eRENDERDOC_Key_2 = 0x32, + eRENDERDOC_Key_3 = 0x33, + eRENDERDOC_Key_4 = 0x34, + eRENDERDOC_Key_5 = 0x35, + eRENDERDOC_Key_6 = 0x36, + eRENDERDOC_Key_7 = 0x37, + eRENDERDOC_Key_8 = 0x38, + eRENDERDOC_Key_9 = 0x39, + + // 'A' - 'Z' matches ASCII values + eRENDERDOC_Key_A = 0x41, + eRENDERDOC_Key_B = 0x42, + eRENDERDOC_Key_C = 0x43, + eRENDERDOC_Key_D = 0x44, + eRENDERDOC_Key_E = 0x45, + eRENDERDOC_Key_F = 0x46, + eRENDERDOC_Key_G = 0x47, + eRENDERDOC_Key_H = 0x48, + eRENDERDOC_Key_I = 0x49, + eRENDERDOC_Key_J = 0x4A, + eRENDERDOC_Key_K = 0x4B, + eRENDERDOC_Key_L = 0x4C, + eRENDERDOC_Key_M = 0x4D, + eRENDERDOC_Key_N = 0x4E, + eRENDERDOC_Key_O = 0x4F, + eRENDERDOC_Key_P = 0x50, + eRENDERDOC_Key_Q = 0x51, + eRENDERDOC_Key_R = 0x52, + eRENDERDOC_Key_S = 0x53, + eRENDERDOC_Key_T = 0x54, + eRENDERDOC_Key_U = 0x55, + eRENDERDOC_Key_V = 0x56, + eRENDERDOC_Key_W = 0x57, + eRENDERDOC_Key_X = 0x58, + eRENDERDOC_Key_Y = 0x59, + eRENDERDOC_Key_Z = 0x5A, + + // leave the rest of the ASCII range free + // in case we want to use it later + eRENDERDOC_Key_NonPrintable = 0x100, + + eRENDERDOC_Key_Divide, + eRENDERDOC_Key_Multiply, + eRENDERDOC_Key_Subtract, + eRENDERDOC_Key_Plus, + + eRENDERDOC_Key_F1, + eRENDERDOC_Key_F2, + eRENDERDOC_Key_F3, + eRENDERDOC_Key_F4, + eRENDERDOC_Key_F5, + eRENDERDOC_Key_F6, + eRENDERDOC_Key_F7, + eRENDERDOC_Key_F8, + eRENDERDOC_Key_F9, + eRENDERDOC_Key_F10, + eRENDERDOC_Key_F11, + eRENDERDOC_Key_F12, + + eRENDERDOC_Key_Home, + eRENDERDOC_Key_End, + eRENDERDOC_Key_Insert, + eRENDERDOC_Key_Delete, + eRENDERDOC_Key_PageUp, + eRENDERDOC_Key_PageDn, + + eRENDERDOC_Key_Backspace, + eRENDERDOC_Key_Tab, + eRENDERDOC_Key_PrtScrn, + eRENDERDOC_Key_Pause, + + eRENDERDOC_Key_Max, +} RENDERDOC_InputButton; + +// Sets which key or keys can be used to toggle focus between multiple windows +// +// If keys is NULL or num is 0, toggle keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num); + +// Sets which key or keys can be used to capture the next frame +// +// If keys is NULL or num is 0, captures keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num); + +typedef enum RENDERDOC_OverlayBits { + // This single bit controls whether the overlay is enabled or disabled globally + eRENDERDOC_Overlay_Enabled = 0x1, + + // Show the average framerate over several seconds as well as min/max + eRENDERDOC_Overlay_FrameRate = 0x2, + + // Show the current frame number + eRENDERDOC_Overlay_FrameNumber = 0x4, + + // Show a list of recent captures, and how many captures have been made + eRENDERDOC_Overlay_CaptureList = 0x8, + + // Default values for the overlay mask + eRENDERDOC_Overlay_Default = + (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate | eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList), + + // Enable all bits + eRENDERDOC_Overlay_All = ~0U, + + // Disable all bits + eRENDERDOC_Overlay_None = 0, +} RENDERDOC_OverlayBits; + +// returns the overlay bits that have been set +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)(); +// sets the overlay bits with an and & or mask +typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or); + +// this function will attempt to remove RenderDoc's hooks in the application. +// +// Note: that this can only work correctly if done immediately after +// the module is loaded, before any API work happens. RenderDoc will remove its +// injected hooks and shut down. Behaviour is undefined if this is called +// after any API functions have been called, and there is still no guarantee of +// success. +typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)(); + +// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers. +typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown; + +// This function will unload RenderDoc's crash handler. +// +// If you use your own crash handler and don't want RenderDoc's handler to +// intercede, you can call this function to unload it and any unhandled +// exceptions will pass to the next handler. +typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)(); + +// Sets the capture file path template +// +// pathtemplate is a UTF-8 string that gives a template for how captures will be named +// and where they will be saved. +// +// Any extension is stripped off the path, and captures are saved in the directory +// specified, and named with the filename and the frame number appended. If the +// directory does not exist it will be created, including any parent directories. +// +// If pathtemplate is NULL, the template will remain unchanged +// +// Example: +// +// SetCaptureFilePathTemplate("my_captures/example"); +// +// Capture #1 -> my_captures/example_frame123.rdc +// Capture #2 -> my_captures/example_frame456.rdc +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate); + +// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string +typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers. +typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate; +typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate; + +// returns the number of captures that have been made +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)(); + +// This function returns the details of a capture, by index. New captures are added +// to the end of the list. +// +// filename will be filled with the absolute path to the capture file, as a UTF-8 string +// pathlength will be written with the length in bytes of the filename string +// timestamp will be written with the time of the capture, in seconds since the Unix epoch +// +// Any of the parameters can be NULL and they'll be skipped. +// +// The function will return 1 if the capture index is valid, or 0 if the index is invalid +// If the index is invalid, the values will be unchanged +// +// Note: when captures are deleted in the UI they will remain in this list, so the +// capture path may not exist anymore. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename, uint32_t *pathlength, uint64_t *timestamp); + +// Sets the comments associated with a capture file. These comments are displayed in the +// UI program when opening. +// +// filePath should be a path to the capture file to add comments to. If set to NULL or "" +// the most recent capture file created made will be used instead. +// comments should be a NULL-terminated UTF-8 string to add as comments. +// +// Any existing comments will be overwritten. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath, const char *comments); + +// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers. +// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for +// backwards compatibility with old code, it is castable either way since it's ABI compatible +// as the same function pointer type. +typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected; + +// This function will launch the Replay UI associated with the RenderDoc library injected +// into the running application. +// +// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter +// to connect to this application +// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open +// if cmdline is NULL, the command line will be empty. +// +// returns the PID of the replay UI if successful, 0 if not successful. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl, const char *cmdline); + +// RenderDoc can return a higher version than requested if it's backwards compatible, +// this function returns the actual version returned. If a parameter is NULL, it will be +// ignored and the others will be filled out. +typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch); + +// Requests that the replay UI show itself (if hidden or not the current top window). This can be +// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle +// showing the UI after making a capture. +// +// This will return 1 if the request was successfully passed on, though it's not guaranteed that +// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current +// target control connection to make such a request, or if there was another error +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); + +////////////////////////////////////////////////////////////////////////// +// Capturing functions +// + +// A device pointer is a pointer to the API's root handle. +// +// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc +typedef void *RENDERDOC_DevicePointer; + +// A window handle is the OS's native window handle +// +// This would be an HWND, GLXDrawable, etc +typedef void *RENDERDOC_WindowHandle; + +// A helper macro for Vulkan, where the device handle cannot be used directly. +// +// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use. +// +// Specifically, the value needed is the dispatch table pointer, which sits as the first +// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and +// indirect once. +#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst))) + +// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will +// respond to keypresses. Neither parameter can be NULL +typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// capture the next frame on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)(); + +// capture the next N frames on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames); + +// When choosing either a device pointer or a window handle to capture, you can pass NULL. +// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify +// any API rendering to a specific window, or a specific API instance rendering to any window, +// or in the simplest case of one window and one API, you can just pass NULL for both. +// +// In either case, if there are two or more possible matching (device,window) pairs it +// is undefined which one will be captured. +// +// Note: for headless rendering you can pass NULL for the window handle and either specify +// a device pointer or leave it NULL as above. + +// Immediately starts capturing API calls on the specified device pointer and window handle. +// +// If there is no matching thing to capture (e.g. no supported API has been initialised), +// this will do nothing. +// +// The results are undefined (including crashes) if two captures are started overlapping, +// even on separate devices and/oror windows. +typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// Returns whether or not a frame capture is currently ongoing anywhere. +// +// This will return 1 if a capture is ongoing, and 0 if there is no capture running +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)(); + +// Ends capturing immediately. +// +// This will return 1 if the capture succeeded, and 0 if there was an error capturing. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// Ends capturing immediately and discard any data stored without saving to disk. +// +// This will return 1 if the capture was discarded, and 0 if there was an error or no capture +// was in progress +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom +// title to the capture produced which will be displayed in the UI. +// +// If multiple captures are ongoing, this title will be applied to the first capture to end after +// this call. The second capture to end will have no title, unless this function is called again. +// +// Calling this function has no effect if no capture is currently running, and if it is called +// multiple times only the last title will be used. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title); + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API versions +// + +// RenderDoc uses semantic versioning (http://semver.org/). +// +// MAJOR version is incremented when incompatible API changes happen. +// MINOR version is incremented when functionality is added in a backwards-compatible manner. +// PATCH version is incremented when backwards-compatible bug fixes happen. +// +// Note that this means the API returned can be higher than the one you might have requested. +// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned +// instead of 1.0.0. You can check this with the GetAPIVersion entry point +typedef enum RENDERDOC_Version { + eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00 + eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01 + eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02 + eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00 + eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01 + eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02 + eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00 + eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00 + eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00 + eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01 + eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02 + eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00 + eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00 +} RENDERDOC_Version; + +// API version changelog: +// +// 1.0.0 - initial release +// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered +// by keypress or TriggerCapture, instead of Start/EndFrameCapture. +// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation +// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new +// function pointer is added to the end of the struct, the original layout is identical +// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote +// replay/remote server concept in replay UI) +// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these +// are captures and not debug logging files. This is the first API version in the v1.0 +// branch. +// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be +// displayed in the UI program on load. +// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions +// which allows users to opt-in to allowing unsupported vendor extensions to function. +// Should be used at the user's own risk. +// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to +// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to +// 0xdddddddd of uninitialised buffer contents. +// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop +// capturing without saving anything to disk. +// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening +// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option. +// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected +// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a +// capture made with StartFrameCapture() or EndFrameCapture() + +typedef struct RENDERDOC_API_1_6_0 { + pRENDERDOC_GetAPIVersion GetAPIVersion; + + pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32; + pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32; + + pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32; + pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32; + + pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys; + pRENDERDOC_SetCaptureKeys SetCaptureKeys; + + pRENDERDOC_GetOverlayBits GetOverlayBits; + pRENDERDOC_MaskOverlayBits MaskOverlayBits; + + // Shutdown was renamed to RemoveHooks in 1.4.1. + // These unions allow old code to continue compiling without changes + union { + pRENDERDOC_Shutdown Shutdown; + pRENDERDOC_RemoveHooks RemoveHooks; + }; + pRENDERDOC_UnloadCrashHandler UnloadCrashHandler; + + // Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2. + // These unions allow old code to continue compiling without changes + union { + // deprecated name + pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate; + // current name + pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate; + }; + union { + // deprecated name + pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate; + // current name + pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate; + }; + + pRENDERDOC_GetNumCaptures GetNumCaptures; + pRENDERDOC_GetCapture GetCapture; + + pRENDERDOC_TriggerCapture TriggerCapture; + + // IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1. + // This union allows old code to continue compiling without changes + union { + // deprecated name + pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected; + // current name + pRENDERDOC_IsTargetControlConnected IsTargetControlConnected; + }; + pRENDERDOC_LaunchReplayUI LaunchReplayUI; + + pRENDERDOC_SetActiveWindow SetActiveWindow; + + pRENDERDOC_StartFrameCapture StartFrameCapture; + pRENDERDOC_IsFrameCapturing IsFrameCapturing; + pRENDERDOC_EndFrameCapture EndFrameCapture; + + // new function in 1.1.0 + pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture; + + // new function in 1.2.0 + pRENDERDOC_SetCaptureFileComments SetCaptureFileComments; + + // new function in 1.4.0 + pRENDERDOC_DiscardFrameCapture DiscardFrameCapture; + + // new function in 1.5.0 + pRENDERDOC_ShowReplayUI ShowReplayUI; + + // new function in 1.6.0 + pRENDERDOC_SetCaptureTitle SetCaptureTitle; +} RENDERDOC_API_1_6_0; + +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0; + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API entry point +// +// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available. +// +// The name is the same as the typedef - "RENDERDOC_GetAPI" +// +// This function is not thread safe, and should not be called on multiple threads at once. +// Ideally, call this once as early as possible in your application's startup, before doing +// any API work, since some configuration functionality etc has to be done also before +// initialising any APIs. +// +// Parameters: +// version is a single value from the RENDERDOC_Version above. +// +// outAPIPointers will be filled out with a pointer to the corresponding struct of function +// pointers. +// +// Returns: +// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested +// 0 - if the requested version is not supported or the arguments are invalid. +// +typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers); + +#ifdef __cplusplus +} // extern "C" +#endif \ No newline at end of file