diff --git a/.github/mac-bundle-qt.sh b/.github/mac-bundle-qt.sh old mode 100644 new mode 100755 index f8083936..aed59d60 --- a/.github/mac-bundle-qt.sh +++ b/.github/mac-bundle-qt.sh @@ -2,26 +2,38 @@ # For Plist buddy PATH="$PATH:/usr/libexec" - # Construct the app iconset. mkdir alber.iconset -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png + +# Create a mask for rounding our icon. We don't want it to be square, as most MacOS icons are rounded +convert -size 1024x1024 xc:none -draw "roundrectangle 0,0,1024,1024,220,220" rounded_mask.png +convert docs/img/mac_icon.ico -alpha on -background none -resize 1024x1024 PNG32:temp.png +# Apply the mask to our icon +convert temp.png rounded_mask.png -compose DstIn -composite temp.png + +# Normal icons +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 64x64 alber.iconset/icon_64x64.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png + +# High DPI icons +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 128x128 alber.iconset/icon_64x64@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png + iconutil --convert icns alber.iconset +rm rounded_mask.png temp.png # Set up the .app directory mkdir -p Alber.app/Contents/MacOS/Libraries mkdir Alber.app/Contents/Resources - # Copy binary into App cp ./build/Alber Alber.app/Contents/MacOS/Alber chmod a+x Alber.app/Contents/Macos/Alber diff --git a/.github/mac-bundle.sh b/.github/mac-bundle.sh index 3349d6d3..274f1115 100755 --- a/.github/mac-bundle.sh +++ b/.github/mac-bundle.sh @@ -2,26 +2,38 @@ # For Plist buddy PATH="$PATH:/usr/libexec" - # Construct the app iconset. mkdir alber.iconset -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png -convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png + +# Create a mask for rounding our icon. We don't want it to be square, as most MacOS icons are rounded +convert -size 1024x1024 xc:none -draw "roundrectangle 0,0,1024,1024,220,220" rounded_mask.png +convert docs/img/mac_icon.ico -alpha on -background none -resize 1024x1024 PNG32:temp.png +# Apply the mask to our icon +convert temp.png rounded_mask.png -compose DstIn -composite temp.png + +# Normal icons +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 64x64 alber.iconset/icon_64x64.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png + +# High DPI icons +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 128x128 alber.iconset/icon_64x64@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png +convert temp.png -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png + iconutil --convert icns alber.iconset +rm rounded_mask.png temp.png # Set up the .app directory mkdir -p Alber.app/Contents/MacOS/Libraries mkdir Alber.app/Contents/Resources - # Copy binary into App cp ./build/Alber Alber.app/Contents/MacOS/Alber chmod a+x Alber.app/Contents/Macos/Alber diff --git a/.github/workflows/Hydra_Build.yml b/.github/workflows/Hydra_Build.yml index dbdfbf1b..66cd2e27 100644 --- a/.github/workflows/Hydra_Build.yml +++ b/.github/workflows/Hydra_Build.yml @@ -213,3 +213,32 @@ jobs: path: | ${{github.workspace}}/build/panda3ds_libretro.so ${{github.workspace}}/docs/libretro/panda3ds_libretro.info + + ARM-Libretro-Android: + runs-on: ubuntu-24.04 + + steps: + - uses: actions/checkout@v4 + - name: Fetch submodules + run: git submodule update --init --recursive + + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DBUILD_LIBRETRO_CORE=1 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DENABLE_VULKAN=0 -DENABLE_USER_BUILD=ON -DCMAKE_CXX_FLAGS="-march=armv8-a+crypto" + + - name: Build + run: | + # Apply patch for GLES compatibility + git apply ./.github/gles.patch + # Build the project with CMake + cmake --build ${{github.workspace}}/build --config ${{ env.BUILD_TYPE }} + + # Strip the generated library + ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip --strip-unneeded ./build/panda3ds_libretro.so + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: Android arm64 Libretro core + path: | + ${{github.workspace}}/build/panda3ds_libretro.so + ${{github.workspace}}/docs/libretro/panda3ds_libretro.info diff --git a/.github/workflows/Test_Build.yml b/.github/workflows/Test_Build.yml new file mode 100644 index 00000000..edfa9fa9 --- /dev/null +++ b/.github/workflows/Test_Build.yml @@ -0,0 +1,44 @@ +name: Hardware Test Build + +on: + push: + branches: + - master + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + container: devkitpro/devkitarm + + steps: + - uses: actions/checkout@v4 + + - name: Install and update packages + run: | + apt-get -y install python3 python3-pip python3-venv p7zip-full libarchive13 + python3 --version + python3 -m venv venv + . ./venv/bin/activate + python3 -m pip install --upgrade pip setuptools + + - name: Compile tests + run: | + make -C tests/AppCpuTimeLimit + make -C tests/DetectEmulator + make -C tests/HelloWorldSVC + make -C tests/ImmediateModeTriangles + make -C tests/PICA_LITP + make -C tests/SimplerTri + + - name: Clone and compile 3ds-examples + run: | + git clone --recursive https://github.com/devkitPro/3ds-examples tests/3ds-examples + make -C tests/3ds-examples + + - name: Upload binaries + uses: actions/upload-artifact@v4 + with: + name: Source & Binaries + path: tests + diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 94ea8193..5fb1065c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -102,10 +102,17 @@ libretro-build-osx-arm64: # - .core-defs # Android ARMv8a -# android-arm64-v8a: -# extends: -# - .libretro-android-cmake-arm64-v8a -# - .core-defs +android-arm64-v8a: + extends: + - .libretro-android-cmake-arm64-v8a + - .core-defs + before_script: + - export NUMPROC=$(($(nproc)/5)) + - export ANDROID_NDK_VERSION=26.2.11394342 + - export NDK_ROOT=/android-sdk-linux/ndk/$ANDROID_NDK_VERSION + - /android-sdk-linux/cmdline-tools/latest/bin/sdkmanager "ndk;$ANDROID_NDK_VERSION" + - /android-sdk-linux/cmdline-tools/latest/bin/sdkmanager "cmake;3.30.3" + - export PATH=/android-sdk-linux/cmake/3.30.3/bin:$PATH # Android 64-bit x86 # android-x86_64: diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e522bdb..8eded674 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,7 @@ option(BUILD_HYDRA_CORE "Build a Hydra core" OFF) option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF) option(ENABLE_RENDERDOC_API "Build with support for Renderdoc's capture API for graphics debugging" ON) option(DISABLE_SSE4 "Build with SSE4 instructions disabled, may reduce performance" OFF) +option(ENABLE_FASTMEM "Build with support for hardware fastmem" ON) option(USE_LIBRETRO_AUDIO "Enable to use the LR audio device with the LR core. Otherwise our own device is used" OFF) option(IOS_SIMULATOR_BUILD "Compiling for IOS simulator (Set to off if compiling for a real iPhone)" ON) @@ -188,6 +189,7 @@ include_directories(third_party/toml11) include_directories(third_party/glm) include_directories(third_party/renderdoc) include_directories(third_party/duckstation) +include_directories(third_party/host_memory/include) add_subdirectory(third_party/cmrc) @@ -329,6 +331,7 @@ set(SOURCE_FILES src/emulator.cpp src/io_file.cpp src/config.cpp src/http_server.cpp src/stb_image_write.c src/core/cheats.cpp src/core/action_replay.cpp src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/renderdoc.cpp src/frontend_settings.cpp src/miniaudio/miniaudio.cpp src/core/screen_layout.cpp + src/dynamic_library.cpp ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp @@ -337,6 +340,7 @@ set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limi src/core/kernel/address_arbiter.cpp src/core/kernel/error.cpp src/core/kernel/file_operations.cpp src/core/kernel/directory_operations.cpp src/core/kernel/idle_thread.cpp src/core/kernel/timers.cpp + src/core/kernel/fcram.cpp ) set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services/apt.cpp src/core/services/hid.cpp src/core/services/fs.cpp src/core/services/gsp_gpu.cpp src/core/services/gsp_lcd.cpp @@ -416,7 +420,8 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/fs/archive_twl_sound.hpp include/fs/archive_card_spi.hpp include/services/ns.hpp include/audio/audio_device.hpp include/audio/audio_device_interface.hpp include/audio/libretro_audio_device.hpp include/services/ir/ir_types.hpp include/services/ir/ir_device.hpp include/services/ir/circlepad_pro.hpp include/services/service_intercept.hpp - include/screen_layout.hpp include/services/service_map.hpp include/audio/dsp_binary.hpp + include/screen_layout.hpp include/services/service_map.hpp include/audio/dsp_binary.hpp include/dynamic_library.hpp + include/enum_flag_ops.hpp include/kernel/fcram.hpp ) if(IOS) @@ -443,6 +448,9 @@ set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp third_party/cityhash/cityhash.cpp third_party/xxhash/xxhash.c + + third_party/host_memory/host_memory.cpp + third_party/host_memory/virtual_buffer.cpp ) if(ENABLE_LUAJIT AND NOT ANDROID) @@ -675,7 +683,7 @@ if(ENABLE_METAL AND APPLE) target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES}) target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1") - target_include_directories(AlberCore PRIVATE third_party/metal-cpp) + target_include_directories(AlberCore PUBLIC third_party/metal-cpp) # TODO: check if all of them are needed target_link_libraries(AlberCore PUBLIC "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl) endif() @@ -710,6 +718,10 @@ if(ENABLE_HTTP_SERVER) target_compile_definitions(AlberCore PRIVATE PANDA3DS_ENABLE_HTTP_SERVER=1) endif() +if(ENABLE_FASTMEM) + target_compile_definitions(AlberCore PRIVATE PANDA3DS_HARDWARE_FASTMEM=1) +endif() + # Configure frontend if(ENABLE_QT_GUI) @@ -730,18 +742,25 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF) set(QT_LANGUAGES docs/translations) - set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp + set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp src/panda_qt/shader_editor.cpp src/panda_qt/translations.cpp src/panda_qt/thread_debugger.cpp src/panda_qt/cpu_debugger.cpp src/panda_qt/dsp_debugger.cpp src/panda_qt/input_window.cpp + src/panda_qt/screen/screen.cpp src/panda_qt/screen/screen_gl.cpp src/panda_qt/screen/screen_mtl.cpp ) - set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp + + set(FRONTEND_HEADER_FILES include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp include/panda_qt/shader_editor.hpp include/panda_qt/thread_debugger.hpp include/panda_qt/cpu_debugger.hpp include/panda_qt/dsp_debugger.hpp - include/panda_qt/disabled_widget_overlay.hpp include/panda_qt/input_window.hpp + include/panda_qt/disabled_widget_overlay.hpp include/panda_qt/input_window.hpp include/panda_qt/screen/screen.hpp + include/panda_qt/screen/screen_gl.hpp include/panda_qt/screen/screen_mtl.hpp ) + if (APPLE AND ENABLE_METAL) + set(FRONTEND_SOURCE_FILES ${FRONTEND_SOURCE_FILES} src/panda_qt/screen/metal_context.mm) + endif() + source_group("Source Files\\Qt" FILES ${FRONTEND_SOURCE_FILES}) source_group("Header Files\\Qt" FILES ${FRONTEND_HEADER_FILES}) include_directories(${Qt6Gui_PRIVATE_INCLUDE_DIRS}) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 77bfb8ed..e62f41b3 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -109,11 +109,7 @@ class GPU { void screenshot(const std::string& name) { renderer->screenshot(name); } void deinitGraphicsContext() { renderer->deinitGraphicsContext(); } -#if defined(PANDA3DS_FRONTEND_SDL) - void initGraphicsContext(SDL_Window* window) { renderer->initGraphicsContext(window); } -#elif defined(PANDA3DS_FRONTEND_QT) - void initGraphicsContext(GL::Context* context) { renderer->initGraphicsContext(context); } -#endif + void initGraphicsContext(void* context) { renderer->initGraphicsContext(context); } void fireDMA(u32 dest, u32 source, u32 size); void reset(); diff --git a/include/audio/audio_interpolation.hpp b/include/audio/audio_interpolation.hpp index 8a87cbcd..2c9f6540 100644 --- a/include/audio/audio_interpolation.hpp +++ b/include/audio/audio_interpolation.hpp @@ -16,9 +16,9 @@ namespace Audio::Interpolation { using StereoFrame16 = Audio::DSPMixer::StereoFrame; struct State { - // Two historical samples. - std::array xn1 = {}; //< x[n-1] - std::array xn2 = {}; //< x[n-2] + // Two history samples. + std::array xn1 = {}; // x[n-1] + std::array xn2 = {}; // x[n-2] // Current fractional position. u64 fposition = 0; }; diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index d374e304..f04ed6c2 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -69,7 +69,9 @@ namespace Audio { // In order to save up on CPU time. uint enabledMixStages = 0; - u32 samplePosition; // Sample number into the current audio buffer + u32 samplePosition; // Sample number into the current audio buffer + u32 currentBufferPaddr; // Physical address of current audio buffer + float rateMultiplier; u16 syncCount; u16 currentBufferID; diff --git a/include/config.hpp b/include/config.hpp index b890e251..4aa4cce1 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -2,10 +2,10 @@ #include #include -#include "screen_layout.hpp" #include "audio/dsp_core.hpp" #include "frontend_settings.hpp" #include "renderer.hpp" +#include "screen_layout.hpp" #include "services/region_codes.hpp" struct AudioDeviceConfig { @@ -49,12 +49,7 @@ struct EmulatorConfig { static constexpr bool ubershaderDefault = true; #endif static constexpr bool accelerateShadersDefault = true; - -#if defined(__LIBRETRO__) static constexpr bool audioEnabledDefault = true; -#else - static constexpr bool audioEnabledDefault = false; -#endif // We default to OpenGL on all platforms other than iOS #if defined(PANDA3DS_IOS) @@ -63,11 +58,13 @@ struct EmulatorConfig { static constexpr RendererType rendererDefault = RendererType::OpenGL; #endif + static constexpr bool enableFastmemDefault = true; static constexpr bool hashTexturesDefault = false; bool shaderJitEnabled = shaderJitDefault; bool useUbershaders = ubershaderDefault; bool accelerateShaders = accelerateShadersDefault; + bool fastmemEnabled = enableFastmemDefault; bool hashTextures = hashTexturesDefault; ScreenLayout::Layout screenLayout = ScreenLayout::Layout::Default; diff --git a/include/dynamic_library.hpp b/include/dynamic_library.hpp new file mode 100644 index 00000000..95f86920 --- /dev/null +++ b/include/dynamic_library.hpp @@ -0,0 +1,73 @@ +// SPDX-FileCopyrightText: 2019 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace Common { + /** + * Provides a platform-independent interface for loading a dynamic library and retrieving symbols. + * The interface maintains an internal reference count to allow one handle to be shared between + * multiple users. + */ + class DynamicLibrary final { + public: + /// Default constructor, does not load a library. + explicit DynamicLibrary(); + + /// Automatically loads the specified library. Call IsOpen() to check validity before use. + explicit DynamicLibrary(const char* filename); + + /// Initializes the dynamic library with an already opened handle. + explicit DynamicLibrary(void* handle_); + + /// Moves the library. + DynamicLibrary(DynamicLibrary&&) noexcept; + DynamicLibrary& operator=(DynamicLibrary&&) noexcept; + + /// Delete copies, we can't copy a dynamic library. + DynamicLibrary(const DynamicLibrary&) = delete; + DynamicLibrary& operator=(const DynamicLibrary&) = delete; + + /// Closes the library. + ~DynamicLibrary(); + + /// Returns the specified library name with the platform-specific suffix added. + [[nodiscard]] static std::string getUnprefixedFilename(const char* filename); + + /// Returns the specified library name in platform-specific format. + /// Major/minor versions will not be included if set to -1. + /// If libname already contains the "lib" prefix, it will not be added again. + /// Windows: LIBNAME-MAJOR-MINOR.dll + /// Linux: libLIBNAME.so.MAJOR.MINOR + /// Mac: libLIBNAME.MAJOR.MINOR.dylib + [[nodiscard]] static std::string getVersionedFilename(const char* libname, int major = -1, int minor = -1); + + /// Returns true if a module is loaded, otherwise false. + [[nodiscard]] bool isOpen() const { return handle != nullptr; } + + /// Loads (or replaces) the handle with the specified library file name. + /// Returns true if the library was loaded and can be used. + [[nodiscard]] bool open(const char* filename); + + /// Unloads the library, any function pointers from this library are no longer valid. + void close(); + + /// Returns the address of the specified symbol (function or variable) as an untyped pointer. + /// If the specified symbol does not exist in this library, nullptr is returned. + [[nodiscard]] void* getSymbolAddress(const char* name) const; + + /// Obtains the address of the specified symbol, automatically casting to the correct type. + /// Returns true if the symbol was found and assigned, otherwise false. + template + [[nodiscard]] bool getSymbol(const char* name, T* ptr) const { + *ptr = reinterpret_cast(getSymbolAddress(name)); + return *ptr != nullptr; + } + + private: + /// Platform-dependent data type representing a dynamic library handle. + void* handle = nullptr; + }; +} // namespace Common \ No newline at end of file diff --git a/include/dynarmic_cp15.hpp b/include/dynarmic_cp15.hpp index 1345aad4..ba7fbbf1 100644 --- a/include/dynarmic_cp15.hpp +++ b/include/dynarmic_cp15.hpp @@ -1,71 +1,51 @@ #pragma once -#include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/coprocessor.h" #include "helpers.hpp" -#include "memory.hpp" class CP15 final : public Dynarmic::A32::Coprocessor { - using Callback = Dynarmic::A32::Coprocessor::Callback; - using CoprocReg = Dynarmic::A32::CoprocReg; - using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; - using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; + using Callback = Dynarmic::A32::Coprocessor::Callback; + using CoprocReg = Dynarmic::A32::CoprocReg; + using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; + using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; - u32 threadStoragePointer; // Pointer to thread-local storage - u32 dummy; // MCR writes here for registers whose values are ignored + u32 threadStoragePointer; // Pointer to thread-local storage + u32 dummy; // MCR writes here for registers whose values are ignored - std::optional CompileInternalOperation(bool two, unsigned opc1, - CoprocReg CRd, CoprocReg CRn, - CoprocReg CRm, unsigned opc2) override { - return std::nullopt; - } + std::optional CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, CoprocReg CRn, CoprocReg CRm, unsigned opc2) override { + return std::nullopt; + } - CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, - CoprocReg CRm, unsigned opc2) override { - if (!two && opc1 == 0 && CRn == CoprocReg::C7 && CRm == CoprocReg::C10 && opc2 == 4) { - return &dummy; // Normally inserts a "Data Synchronization Barrier" - } + CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, unsigned opc2) override { + if (!two && opc1 == 0 && CRn == CoprocReg::C7 && CRm == CoprocReg::C10 && opc2 == 4) { + return &dummy; // Normally inserts a "Data Synchronization Barrier" + } - if (!two && opc1 == 0 && CRn == CoprocReg::C7 && CRm == CoprocReg::C10 && opc2 == 5) { - return &dummy; // Normally inserts a "Data Memory Barrier" - } - Helpers::panic("CP15: CompileSendOneWord\nopc1: %d CRn: %d CRm: %d opc2: %d\n", opc1, (int)CRn, (int)CRm, opc2); - } + if (!two && opc1 == 0 && CRn == CoprocReg::C7 && CRm == CoprocReg::C10 && opc2 == 5) { + return &dummy; // Normally inserts a "Data Memory Barrier" + } + Helpers::panic("CP15: CompileSendOneWord\nopc1: %d CRn: %d CRm: %d opc2: %d\n", opc1, (int)CRn, (int)CRm, opc2); + } - CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override { - return std::monostate{}; - } + CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, unsigned opc2) override { + // Stores a pointer to thread-local storage, accessed via mrc p15, 0, rd, c13, c0, 3 + if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 3) { + return &threadStoragePointer; + } - CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, - CoprocReg CRm, unsigned opc2) override { - // Stores a pointer to thread-local storage, accessed via mrc p15, 0, rd, c13, c0, 3 - if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 3) { - return &threadStoragePointer; - } + Helpers::panic("CP15: CompileGetOneWord\nopc1: %d CRn: %d CRm: %d opc2: %d\n", opc1, (int)CRn, (int)CRm, opc2); + } - Helpers::panic("CP15: CompileGetOneWord\nopc1: %d CRn: %d CRm: %d opc2: %d\n", opc1, (int)CRn, (int)CRm, opc2); - } + CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override { return std::monostate{}; } + CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override { return std::monostate{}; } - CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override { - return std::monostate{}; - } + std::optional CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, std::optional option) override { return std::nullopt; } + std::optional CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, std::optional option) override { return std::nullopt; } - std::optional CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, - std::optional option) override { - return std::nullopt; - } + public: + void setTLSBase(u32 value) { threadStoragePointer = value; } - std::optional CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, - std::optional option) override { - return std::nullopt; - } - -public: - void setTLSBase(u32 value) { - threadStoragePointer = value; - } - - // Currently does nothing but may be needed in the future - void reset() {} + // Currently does nothing but may be needed in the future + void reset() {} }; \ No newline at end of file diff --git a/include/emulator.hpp b/include/emulator.hpp index bed01937..b1191f6e 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -24,12 +24,6 @@ #include "http_server.hpp" #endif -#ifdef PANDA3DS_FRONTEND_QT -#include "gl/context.h" -#endif - -struct SDL_Window; - enum class ROMType { None, ELF, @@ -39,10 +33,13 @@ enum class ROMType { }; class Emulator { + // Config should be initialized before anything else EmulatorConfig config; + + Memory memory; + // We want memory to be constructed before the rest of the emulator, so it's at the top of the struct CPU cpu; GPU gpu; - Memory memory; Kernel kernel; std::unique_ptr dsp; Scheduler scheduler; @@ -106,12 +103,8 @@ class Emulator { bool loadELF(const std::filesystem::path& path); bool loadELF(std::ifstream& file); -#ifdef PANDA3DS_FRONTEND_QT - // For passing the GL context from Qt to the renderer - void initGraphicsContext(GL::Context* glContext) { gpu.initGraphicsContext(nullptr); } -#else - void initGraphicsContext(SDL_Window* window) { gpu.initGraphicsContext(window); } -#endif + // For passing the SDL Window, GL context, etc from the frontend to the renderer + void initGraphicsContext(void* context) { gpu.initGraphicsContext(context); } RomFS::DumpingResult dumpRomFS(const std::filesystem::path& path); void setOutputSize(u32 width, u32 height) { gpu.setOutputSize(width, height); } diff --git a/include/enum_flag_ops.hpp b/include/enum_flag_ops.hpp new file mode 100644 index 00000000..e41eac28 --- /dev/null +++ b/include/enum_flag_ops.hpp @@ -0,0 +1,60 @@ +// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#define DECLARE_ENUM_FLAG_OPERATORS(type) \ + [[nodiscard]] constexpr type operator|(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) | static_cast(b)); \ + } \ + [[nodiscard]] constexpr type operator&(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) & static_cast(b)); \ + } \ + [[nodiscard]] constexpr type operator^(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) ^ static_cast(b)); \ + } \ + [[nodiscard]] constexpr type operator<<(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) << static_cast(b)); \ + } \ + [[nodiscard]] constexpr type operator>>(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) >> static_cast(b)); \ + } \ + constexpr type& operator|=(type& a, type b) noexcept { \ + a = a | b; \ + return a; \ + } \ + constexpr type& operator&=(type& a, type b) noexcept { \ + a = a & b; \ + return a; \ + } \ + constexpr type& operator^=(type& a, type b) noexcept { \ + a = a ^ b; \ + return a; \ + } \ + constexpr type& operator<<=(type& a, type b) noexcept { \ + a = a << b; \ + return a; \ + } \ + constexpr type& operator>>=(type& a, type b) noexcept { \ + a = a >> b; \ + return a; \ + } \ + [[nodiscard]] constexpr type operator~(type key) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(~static_cast(key)); \ + } \ + [[nodiscard]] constexpr bool True(type key) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(key) != 0; \ + } \ + [[nodiscard]] constexpr bool False(type key) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(key) == 0; \ + } \ No newline at end of file diff --git a/include/helpers.hpp b/include/helpers.hpp index a95931d4..e13aed7e 100644 --- a/include/helpers.hpp +++ b/include/helpers.hpp @@ -3,10 +3,8 @@ #include #include #include -#include -#include -#include #include +#include #include "termcolor.hpp" @@ -37,7 +35,7 @@ namespace Helpers { return {}; } const auto buf = std::make_unique(size); - std::snprintf(buf.get(), size, fmt.c_str(), args ...); + std::snprintf(buf.get(), size, fmt.c_str(), args...); return std::string(buf.get(), buf.get() + size - 1); } @@ -50,7 +48,7 @@ namespace Helpers { exit(1); } - + #ifdef PANDA3DS_LIMITED_PANICS template static void panicDev(const char* fmt, Args&&... args) {} diff --git a/include/kernel/fcram.hpp b/include/kernel/fcram.hpp new file mode 100644 index 00000000..e1690ad0 --- /dev/null +++ b/include/kernel/fcram.hpp @@ -0,0 +1,63 @@ +#pragma once +#include +#include + +#include "helpers.hpp" + +class Memory; + +enum class FcramRegion { + App = 0x100, + Sys = 0x200, + Base = 0x300, +}; + +struct FcramBlock { + u32 paddr; + s32 pages; + + FcramBlock(u32 paddr, s32 pages) : paddr(paddr), pages(pages) {} +}; + +using FcramBlockList = std::list; + +class KFcram { + struct Region { + struct Block { + s32 pages; + s32 pageOffset; + bool used; + + Block(s32 pages, u32 pageOffset) : pages(pages), pageOffset(pageOffset), used(false) {} + }; + + std::list blocks; + u32 start; + s32 pages; + s32 freePages; + + public: + Region() : start(0), pages(0) {} + void reset(u32 start, size_t size); + void alloc(std::list& out, s32 pages, bool linear); + + u32 getUsedCount(); + u32 getFreeCount(); + }; + + Memory& mem; + + Region appRegion, sysRegion, baseRegion; + uint8_t* fcram; + std::unique_ptr refs; + + public: + KFcram(Memory& memory); + void reset(size_t ramSize, size_t appSize, size_t sysSize, size_t baseSize); + void alloc(FcramBlockList& out, s32 pages, FcramRegion region, bool linear); + + void incRef(FcramBlockList& list); + void decRef(FcramBlockList& list); + + u32 getUsedCount(FcramRegion region); +}; \ No newline at end of file diff --git a/include/kernel/kernel.hpp b/include/kernel/kernel.hpp index 3fd926e7..c6d5044e 100644 --- a/include/kernel/kernel.hpp +++ b/include/kernel/kernel.hpp @@ -1,12 +1,11 @@ #pragma once #include -#include -#include #include #include #include #include "config.hpp" +#include "fcram.hpp" #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -25,6 +24,10 @@ class Kernel { CPU& cpu; Memory& mem; + public: + KFcram fcramManager; + + private: // The handle number for the next kernel object to be created u32 handleCounter; // A list of our OS threads, the max number of which depends on the resource limit (hardcoded 32 per process on retail it seems). diff --git a/include/kernel/kernel_types.hpp b/include/kernel/kernel_types.hpp index a5b27498..7ea6d28e 100644 --- a/include/kernel/kernel_types.hpp +++ b/include/kernel/kernel_types.hpp @@ -1,93 +1,107 @@ #pragma once #include #include -#include "fs/archive_base.hpp" +#include + #include "handles.hpp" #include "helpers.hpp" -#include "result/result.hpp" enum class KernelObjectType : u8 { - AddressArbiter, Archive, Directory, File, MemoryBlock, Process, ResourceLimit, Session, Dummy, - // Bundle waitable objects together in the enum to let the compiler optimize certain checks better - Event, Mutex, Port, Semaphore, Timer, Thread + AddressArbiter, + Archive, + Directory, + File, + MemoryBlock, + Process, + ResourceLimit, + Session, + Dummy, + // Bundle waitable objects together in the enum to let the compiler optimize certain checks better + Event, + Mutex, + Port, + Semaphore, + Timer, + Thread }; enum class ResourceLimitCategory : int { - Application = 0, - SystemApplet = 1, - LibraryApplet = 2, - Misc = 3 + Application = 0, + SystemApplet = 1, + LibraryApplet = 2, + Misc = 3, }; // Reset types (for use with events and timers) enum class ResetType { - OneShot = 0, // When the primitive is signaled, it will wake up exactly one thread and will clear itself automatically. - Sticky = 1, // When the primitive is signaled, it will wake up all threads and it won't clear itself automatically. - Pulse = 2, // Only meaningful for timers: same as ONESHOT but it will periodically signal the timer instead of just once. + OneShot = 0, // When the primitive is signaled, it will wake up exactly one thread and will clear itself automatically. + Sticky = 1, // When the primitive is signaled, it will wake up all threads and it won't clear itself automatically. + Pulse = 2, // Only meaningful for timers: same as ONESHOT but it will periodically signal the timer instead of just once. }; enum class ArbitrationType { - Signal = 0, - WaitIfLess = 1, - DecrementAndWaitIfLess = 2, - WaitIfLessTimeout = 3, - DecrementAndWaitIfLessTimeout = 4 + Signal = 0, + WaitIfLess = 1, + DecrementAndWaitIfLess = 2, + WaitIfLessTimeout = 3, + DecrementAndWaitIfLessTimeout = 4, }; enum class ProcessorID : s32 { - AllCPUs = -1, - Default = -2, - - AppCore = 0, - Syscore = 1, - New3DSExtra1 = 2, - New3DSExtra2 = 3 + AllCPUs = -1, + Default = -2, + + AppCore = 0, + Syscore = 1, + New3DSExtra1 = 2, + New3DSExtra2 = 3 }; struct AddressArbiter {}; struct ResourceLimits { - HorizonHandle handle; + HorizonHandle handle; - s32 currentCommit = 0; + s32 currentCommit = 0; }; struct Process { - // Resource limits for this process - ResourceLimits limits; - // Process ID - u32 id; + // Resource limits for this process + ResourceLimits limits; + // Process ID + u32 id; - Process(u32 id) : id(id) {} + Process(u32 id) : id(id) {} }; struct Event { - // Some events (for now, only the DSP semaphore events) need to execute a callback when signalled - // This enum stores what kind of callback they should execute - enum class CallbackType : u32 { - None, DSPSemaphore, - }; + // Some events (for now, only the DSP semaphore events) need to execute a callback when signalled + // This enum stores what kind of callback they should execute + enum class CallbackType : u32 { + None, + DSPSemaphore, + }; - u64 waitlist; // A bitfield where each bit symbolizes if the thread with thread with the corresponding index is waiting on the event - ResetType resetType = ResetType::OneShot; - CallbackType callback = CallbackType::None; - bool fired = false; + u64 waitlist; // A bitfield where each bit symbolizes if the thread with thread with the corresponding index is waiting on the event + ResetType resetType = ResetType::OneShot; + CallbackType callback = CallbackType::None; + bool fired = false; - Event(ResetType resetType) : resetType(resetType), waitlist(0) {} - Event(ResetType resetType, CallbackType cb) : resetType(resetType), waitlist(0), callback(cb) {} + Event(ResetType resetType) : resetType(resetType), waitlist(0) {} + Event(ResetType resetType, CallbackType cb) : resetType(resetType), waitlist(0), callback(cb) {} }; struct Port { - static constexpr u32 maxNameLen = 11; + static constexpr u32 maxNameLen = 11; - char name[maxNameLen + 1] = {}; - bool isPublic = false; // Setting name=NULL creates a private port not accessible from svcConnectToPort. + char name[maxNameLen + 1] = {}; + bool isPublic = false; // Setting name=NULL creates a private port not accessible from svcConnectToPort. - Port(const char* name) { - // If the name is empty (ie the first char is the null terminator) then the port is private - isPublic = name[0] != '\0'; - std::strncpy(this->name, name, maxNameLen); - } + Port(const char* name) { + // If the name is empty (ie the first char is the null terminator) then the port is private + isPublic = name[0] != '\0'; + std::strncpy(this->name, name, maxNameLen); + } }; struct Session { @@ -146,92 +160,90 @@ struct Thread { }; static const char* kernelObjectTypeToString(KernelObjectType t) { - switch (t) { - case KernelObjectType::AddressArbiter: return "address arbiter"; - case KernelObjectType::Archive: return "archive"; - case KernelObjectType::Directory: return "directory"; - case KernelObjectType::Event: return "event"; - case KernelObjectType::File: return "file"; - case KernelObjectType::MemoryBlock: return "memory block"; - case KernelObjectType::Port: return "port"; - case KernelObjectType::Process: return "process"; - case KernelObjectType::ResourceLimit: return "resource limit"; - case KernelObjectType::Session: return "session"; - case KernelObjectType::Mutex: return "mutex"; - case KernelObjectType::Semaphore: return "semaphore"; - case KernelObjectType::Thread: return "thread"; - case KernelObjectType::Dummy: return "dummy"; - default: return "unknown"; - } + switch (t) { + case KernelObjectType::AddressArbiter: return "address arbiter"; + case KernelObjectType::Archive: return "archive"; + case KernelObjectType::Directory: return "directory"; + case KernelObjectType::Event: return "event"; + case KernelObjectType::File: return "file"; + case KernelObjectType::MemoryBlock: return "memory block"; + case KernelObjectType::Port: return "port"; + case KernelObjectType::Process: return "process"; + case KernelObjectType::ResourceLimit: return "resource limit"; + case KernelObjectType::Session: return "session"; + case KernelObjectType::Mutex: return "mutex"; + case KernelObjectType::Semaphore: return "semaphore"; + case KernelObjectType::Thread: return "thread"; + case KernelObjectType::Dummy: return "dummy"; + default: return "unknown"; + } } struct Mutex { - using Handle = HorizonHandle; + using Handle = HorizonHandle; - u64 waitlist; // Refer to the getWaitlist function below for documentation - Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked - Handle handle; // Handle of the mutex itself - u32 lockCount; // Number of times this mutex has been locked by its daddy. 0 = not locked - bool locked; + u64 waitlist; // Refer to the getWaitlist function below for documentation + Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked + Handle handle; // Handle of the mutex itself + u32 lockCount; // Number of times this mutex has been locked by its daddy. 0 = not locked + bool locked; - Mutex(bool lock, Handle handle) : locked(lock), waitlist(0), lockCount(lock ? 1 : 0), handle(handle) {} + Mutex(bool lock, Handle handle) : locked(lock), waitlist(0), lockCount(lock ? 1 : 0), handle(handle) {} }; struct Semaphore { - u64 waitlist; // Refer to the getWaitlist function below for documentation - s32 availableCount; - s32 maximumCount; + u64 waitlist; // Refer to the getWaitlist function below for documentation + s32 availableCount; + s32 maximumCount; - Semaphore(s32 initialCount, s32 maximumCount) : availableCount(initialCount), maximumCount(maximumCount), waitlist(0) {} + Semaphore(s32 initialCount, s32 maximumCount) : availableCount(initialCount), maximumCount(maximumCount), waitlist(0) {} }; struct Timer { u64 waitlist; // Refer to the getWaitlist function below for documentation ResetType resetType = ResetType::OneShot; - u64 fireTick; // CPU tick the timer will be fired - u64 interval; // Number of ns until the timer fires for the second and future times - bool fired; // Has this timer been signalled? - bool running; // Is this timer running or stopped? + u64 fireTick; // CPU tick the timer will be fired + u64 interval; // Number of ns until the timer fires for the second and future times + bool fired; // Has this timer been signalled? + bool running; // Is this timer running or stopped? Timer(ResetType type) : resetType(type), fireTick(0), interval(0), waitlist(0), fired(false), running(false) {} }; struct MemoryBlock { - u32 addr = 0; - u32 size = 0; - u32 myPermission = 0; - u32 otherPermission = 0; - bool mapped = false; + u32 addr = 0; + u32 size = 0; + u32 myPermission = 0; + u32 otherPermission = 0; + bool mapped = false; - MemoryBlock(u32 addr, u32 size, u32 myPerm, u32 otherPerm) : addr(addr), size(size), myPermission(myPerm), otherPermission(otherPerm), - mapped(false) {} + MemoryBlock(u32 addr, u32 size, u32 myPerm, u32 otherPerm) + : addr(addr), size(size), myPermission(myPerm), otherPermission(otherPerm), mapped(false) {} }; // Generic kernel object class struct KernelObject { using Handle = HorizonHandle; - Handle handle = 0; // A u32 the OS will use to identify objects - void* data = nullptr; - KernelObjectType type; + Handle handle = 0; // A u32 the OS will use to identify objects + void* data = nullptr; + KernelObjectType type; - KernelObject(Handle handle, KernelObjectType type) : handle(handle), type(type) {} + KernelObject(Handle handle, KernelObjectType type) : handle(handle), type(type) {} - // Our destructor does not free the data in order to avoid it being freed when our std::vector is expanded - // Thus, the kernel needs to delete it when appropriate - ~KernelObject() {} + // Our destructor does not free the data in order to avoid it being freed when our std::vector is expanded + // Thus, the kernel needs to delete it when appropriate + ~KernelObject() {} - template - T* getData() { - return static_cast(data); - } + template + T* getData() { + return static_cast(data); + } - const char* getTypeName() const { - return kernelObjectTypeToString(type); - } + const char* getTypeName() const { return kernelObjectTypeToString(type); } - // Retrieves a reference to the waitlist for a specified object + // Retrieves a reference to the waitlist for a specified object // We return a reference because this function is only called in the kernel threading internals // We want the kernel to be able to easily manage waitlists, by reading/parsing them or setting/clearing bits. // As we mention in the definition of the "Event" struct, the format for wailists is very simple and made to be efficient. @@ -247,8 +259,7 @@ struct KernelObject { case KernelObjectType::Timer: return getData()->waitlist; // This should be unreachable once we fully implement sync objects - default: [[unlikely]] - Helpers::panic("Called GetWaitList on kernel object without a waitlist (Type: %s)", getTypeName()); + default: [[unlikely]] Helpers::panic("Called GetWaitList on kernel object without a waitlist (Type: %s)", getTypeName()); } } -}; \ No newline at end of file +}; diff --git a/include/memory.hpp b/include/memory.hpp index b1dd09de..d21e4be2 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -1,8 +1,8 @@ #pragma once #include -#include #include #include +#include #include #include @@ -10,8 +10,11 @@ #include "crypto/aes_engine.hpp" #include "handles.hpp" #include "helpers.hpp" -#include "loader/ncsd.hpp" +#include "host_memory/host_memory.h" +#include "kernel/fcram.hpp" #include "loader/3dsx.hpp" +#include "loader/ncsd.hpp" +#include "result/result.hpp" #include "services/region_codes.hpp" namespace PhysicalAddrs { @@ -38,15 +41,15 @@ namespace VirtualAddrs { DefaultStackSize = 0x4000, NormalHeapStart = 0x08000000, - LinearHeapStartOld = 0x14000000, // If kernel version < 0x22C + LinearHeapStartOld = 0x14000000, // If kernel version < 0x22C LinearHeapEndOld = 0x1C000000, LinearHeapStartNew = 0x30000000, LinearHeapEndNew = 0x40000000, - // Start of TLS for first thread. Next thread's storage will be at TLSBase + 0x1000, and so on - TLSBase = 0xFF400000, - TLSSize = 0x1000, + // Start of TLS for first thread. Next thread's storage will be at TLSBase + 0x200, and so on + TLSBase = 0x1FF82000, + TLSSize = 0x200, VramStart = 0x1F000000, VramSize = 0x00600000, @@ -76,63 +79,79 @@ namespace KernelMemoryTypes { PERMISSION_W = 1 << 1, PERMISSION_X = 1 << 2 }; - - // I assume this is referring to a single piece of allocated memory? If it's for pages, it makes no sense. - // If it's for multiple allocations, it also makes no sense + struct MemoryInfo { - u32 baseAddr; // Base process virtual address. Used as a paddr in lockedMemoryInfo instead - u32 size; // Of what? - u32 perms; // Is this referring to a single page or? + u32 baseAddr; + u32 pages; + u32 perms; u32 state; - u32 end() { return baseAddr + size; } - MemoryInfo(u32 baseAddr, u32 size, u32 perms, u32 state) : baseAddr(baseAddr), size(size) - , perms(perms), state(state) {} + u32 end() { return baseAddr + (pages << 12); } + MemoryInfo() : baseAddr(0), pages(0), perms(0), state(0) {} + MemoryInfo(u32 baseAddr, u32 pages, u32 perms, u32 state) : baseAddr(baseAddr), pages(pages), perms(perms), state(state) {} }; // Shared memory block for HID, GSP:GPU etc struct SharedMemoryBlock { - u32 paddr; // Physical address of this block's memory - u32 size; // Size of block - u32 handle; // The handle of the shared memory block - bool mapped; // Has this block been mapped at least once? + u32 paddr; // Physical address of this block's memory + u32 size; // Size of block + u32 handle; // The handle of the shared memory block + bool mapped; // Has this block been mapped at least once? SharedMemoryBlock(u32 paddr, u32 size, u32 handle) : paddr(paddr), size(size), handle(handle), mapped(false) {} }; -} +} // namespace KernelMemoryTypes class Memory { + // Used internally by changeMemoryState + struct Operation { + KernelMemoryTypes::MemoryState newState = KernelMemoryTypes::MemoryState::Free; + bool r = false, w = false, x = false; + bool changeState = false; + bool changePerms = false; + }; using Handle = HorizonHandle; u8* fcram; u8* dspRam; // Provided to us by Audio u8* vram; // Provided to the memory class by the GPU class - u64& cpuTicks; // Reference to the CPU tick counter + const u64* cpuTicks = nullptr; // Pointer to the CPU tick counter, provided to us by the CPU class using SharedMemoryBlock = KernelMemoryTypes::SharedMemoryBlock; + // TODO: remove this reference when Peach's excellent page table code is moved to a better home + KFcram& fcramManager; + // Our dynarmic core uses page tables for reads and writes with 4096 byte pages std::vector readTable, writeTable; + // vaddr->paddr translation table + std::vector paddrTable; + // This tracks our OS' memory allocations - std::vector memoryInfo; + std::list memoryInfo; std::array sharedMemBlocks = { - SharedMemoryBlock(0, 0, KernelHandles::FontSharedMemHandle), // Shared memory for the system font (size is 0 because we read the size from the cmrc filesystem - SharedMemoryBlock(0, 0x1000, KernelHandles::GSPSharedMemHandle), // GSP shared memory - SharedMemoryBlock(0, 0x1000, KernelHandles::HIDSharedMemHandle), // HID shared memory - SharedMemoryBlock(0, 0x3000, KernelHandles::CSNDSharedMemHandle), // CSND shared memory + SharedMemoryBlock( + 0, 0, KernelHandles::FontSharedMemHandle + ), // Shared memory for the system font (size is 0 because we read the size from the cmrc filesystem + SharedMemoryBlock(0, 0x1000, KernelHandles::GSPSharedMemHandle), // GSP shared memory + SharedMemoryBlock(0, 0x1000, KernelHandles::HIDSharedMemHandle), // HID shared memory + SharedMemoryBlock(0, 0x3000, KernelHandles::CSNDSharedMemHandle), // CSND shared memory SharedMemoryBlock(0, 0xE7000, KernelHandles::APTCaptureSharedMemHandle), // APT Capture Buffer memory - }; + }; -public: + public: static constexpr u32 pageShift = 12; static constexpr u32 pageSize = 1 << pageShift; static constexpr u32 pageMask = pageSize - 1; static constexpr u32 totalPageCount = 1 << (32 - pageShift); - + static constexpr u32 FCRAM_SIZE = u32(128_MB); - static constexpr u32 FCRAM_APPLICATION_SIZE = u32(80_MB); + static constexpr u32 FCRAM_APPLICATION_SIZE = u32(64_MB + 16_MB); + static constexpr u32 FCRAM_SYSTEM_SIZE = u32(44_MB - 16_MB); + static constexpr u32 FCRAM_BASE_SIZE = u32(20_MB); + static constexpr u32 FCRAM_PAGE_COUNT = FCRAM_SIZE / pageSize; static constexpr u32 FCRAM_APPLICATION_PAGE_COUNT = FCRAM_APPLICATION_SIZE / pageSize; @@ -140,18 +159,48 @@ public: static constexpr u32 DSP_CODE_MEMORY_OFFSET = u32(0_KB); static constexpr u32 DSP_DATA_MEMORY_OFFSET = u32(256_KB); -private: - std::bitset usedFCRAMPages; - std::optional findPaddr(u32 size); + private: + // We also use MMU-accelerated fastmem for fast memory emulation + // This means that we've got a 4GB memory arena which is organized the same way as the emulated 3DS' memory map + // And we can access this directly instead of calling the memory read/write functions, which would be slower + // Regions that are not mapped or can't be accelerated this way will segfault, and the caller (eg dynarmic), will + // handle this segfault and call the Slower memory read/write functions + bool useFastmem = false; + static constexpr size_t FASTMEM_FCRAM_OFFSET = 0; // Offset of FCRAM in the fastmem arena + static constexpr size_t FASTMEM_DSP_RAM_OFFSET = FASTMEM_FCRAM_OFFSET + FCRAM_SIZE; // Offset of DSP RAM + + static constexpr size_t FASTMEM_BACKING_SIZE = FCRAM_SIZE + DSP_RAM_SIZE; + // Total size of the virtual address space we will occupy (4GB) + static constexpr size_t FASTMEM_VIRTUAL_SIZE = 4_GB; + + Common::HostMemory* arena; + + void addFastmemView(u32 guestVaddr, size_t arenaOffset, size_t size, bool w, bool x = false) { + if (useFastmem) { + Common::MemoryPermission perms = Common::MemoryPermission::Read; + if (w) { + perms |= Common::MemoryPermission::Write; + } + + if (x) { + // perms |= Common::MemoryPermission::Execute; + } + + // If anything is mapped at the place we're trying to map, unmap it. Then, create our mapping. + arena->Unmap(guestVaddr, size, false); + arena->Map(guestVaddr, arenaOffset, size, perms, false); + } + } + u64 timeSince3DSEpoch(); // https://www.3dbrew.org/wiki/Configuration_Memory#ENVINFO // Report a retail unit without JTAG static constexpr u32 envInfo = 1; - // Stored in Configuration Memory starting @ 0x1FF80060 + // Stored in Configuration Memory starting @ 0x1FF80060 struct FirmwareInfo { - u8 unk; // Usually 0 according to 3DBrew + u8 unk; // Usually 0 according to 3DBrew u8 revision; u8 minor; u8 major; @@ -167,12 +216,15 @@ private: static constexpr std::array MACAddress = {0x40, 0xF4, 0x07, 0xFF, 0xFF, 0xEE}; + void changeMemoryState(u32 vaddr, s32 pages, const Operation& op); + void queryPhysicalBlocks(std::list& outList, u32 vaddr, s32 pages); + void mapPhysicalMemory(u32 vaddr, u32 paddr, s32 pages, bool r, bool w, bool x); + void unmapPhysicalMemory(u32 vaddr, u32 paddr, s32 pages); + public: u16 kernelVersion = 0; - u32 usedUserMemory = u32(0_MB); // How much of the APPLICATION FCRAM range is used (allocated to the appcore) - u32 usedSystemMemory = u32(0_MB); // Similar for the SYSTEM range (reserved for the syscore) - Memory(u64& cpuTicks, const EmulatorConfig& config); + Memory(KFcram& fcramManager, const EmulatorConfig& config); void reset(); void* getReadPointer(u32 address); void* getWritePointer(u32 address); @@ -198,22 +250,6 @@ private: u32 getLinearHeapVaddr(); u8* getFCRAM() { return fcram; } - // Total amount of OS-only FCRAM available (Can vary depending on how much FCRAM the app requests via the cart exheader) - u32 totalSysFCRAM() { - return FCRAM_SIZE - FCRAM_APPLICATION_SIZE; - } - - // Amount of OS-only FCRAM currently available - u32 remainingSysFCRAM() { - return totalSysFCRAM() - usedSystemMemory; - } - - // Physical FCRAM index to the start of OS FCRAM - // We allocate the first part of physical FCRAM for the application, and the rest to the OS. So the index for the OS = application ram size - u32 sysFCRAMIndex() { - return FCRAM_APPLICATION_SIZE; - } - enum class BatteryLevel { Empty = 0, AlmostEmpty, @@ -224,9 +260,9 @@ private: }; u8 getBatteryState(bool adapterConnected, bool charging, BatteryLevel batteryLevel) { - u8 value = static_cast(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5 - if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected - if (charging) value |= 1 << 1; // Bit 1 shows if we're charging + u8 value = static_cast(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5 + if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected + if (charging) value |= 1 << 1; // Bit 1 shows if we're charging return value; } @@ -248,27 +284,20 @@ private: } // Returns whether "addr" is aligned to a page (4096 byte) boundary - static constexpr bool isAligned(u32 addr) { - return (addr & pageMask) == 0; - } + static constexpr bool isAligned(u32 addr) { return (addr & pageMask) == 0; } - // Allocate "size" bytes of RAM starting from FCRAM index "paddr" (We pick it ourself if paddr == 0) - // And map them to virtual address "vaddr" (We also pick it ourself if vaddr == 0). - // If the "linear" flag is on, the paddr pages must be adjacent in FCRAM - // This function is for interacting with the *user* portion of FCRAM mainly. For OS RAM, we use other internal functions below - // r, w, x: Permissions for the allocated memory - // adjustAddrs: If it's true paddr == 0 or vaddr == 0 tell the allocator to pick its own addresses. Used for eg svc ControlMemory - // isMap: Shows whether this is a reserve operation, that allocates memory and maps it to the addr space, or if it's a map operation, - // which just maps memory from paddr to vaddr without hassle. The latter is useful for shared memory mapping, the "map" ControlMemory, op, etc - // Returns the vaddr the FCRAM was mapped to or nullopt if allocation failed - std::optional allocateMemory(u32 vaddr, u32 paddr, u32 size, bool linear, bool r = true, bool w = true, bool x = true, - bool adjustsAddrs = false, bool isMap = false); - KernelMemoryTypes::MemoryInfo queryMemory(u32 vaddr); + bool allocMemory(u32 vaddr, s32 pages, FcramRegion region, bool r, bool w, bool x, KernelMemoryTypes::MemoryState state); + bool allocMemoryLinear(u32& outVaddr, u32 inVaddr, s32 pages, FcramRegion region, bool r, bool w, bool x); + bool mapVirtualMemory( + u32 dstVaddr, u32 srcVaddr, s32 pages, bool r, bool w, bool x, KernelMemoryTypes::MemoryState oldDstState, + KernelMemoryTypes::MemoryState oldSrcState, KernelMemoryTypes::MemoryState newDstState, KernelMemoryTypes::MemoryState newSrcState, + bool unmapPages = true + ); + void changePermissions(u32 vaddr, s32 pages, bool r, bool w, bool x); + Result::HorizonResult queryMemory(KernelMemoryTypes::MemoryInfo& out, u32 vaddr); + Result::HorizonResult testMemoryState(u32 vaddr, s32 pages, KernelMemoryTypes::MemoryState desiredState); - // For internal use - // Allocates a "size"-sized chunk of system FCRAM and returns the index of physical FCRAM used for the allocation - // Used for allocating things like shared memory and the like - u32 allocateSysMemory(u32 size); + void copyToVaddr(u32 dstVaddr, const u8* srcHost, s32 size); // Map a shared memory block to virtual address vaddr with permissions "myPerms" // The kernel has a second permission parameter in MapMemoryBlock but not sure what's used for @@ -276,10 +305,6 @@ private: // Returns a pointer to the FCRAM block used for the memory if allocation succeeded u8* mapSharedMemory(Handle handle, u32 vaddr, u32 myPerms, u32 otherPerms); - // Mirrors the page mapping for "size" bytes starting from sourceAddress, to "size" bytes in destAddress - // All of the above must be page-aligned. - void mirrorMapping(u32 destAddress, u32 sourceAddress, u32 size); - // Backup of the game's CXI partition info, if any std::optional loadedCXI = std::nullopt; std::optional loaded3DSX = std::nullopt; @@ -291,12 +316,15 @@ private: u8* getDSPMem() { return dspRam; } u8* getDSPDataMem() { return &dspRam[DSP_DATA_MEMORY_OFFSET]; } u8* getDSPCodeMem() { return &dspRam[DSP_CODE_MEMORY_OFFSET]; } - u32 getUsedUserMem() { return usedUserMemory; } void setVRAM(u8* pointer) { vram = pointer; } void setDSPMem(u8* pointer) { dspRam = pointer; } + void setCPUTicks(const u64& ticks) { cpuTicks = &ticks; } bool allocateMainThreadStack(u32 size); Regions getConsoleRegion(); void copySharedFont(u8* ptr, u32 vaddr); + + bool isFastmemEnabled() { return useFastmem; } + u8* getFastmemArenaBase() { return arena->VirtualBasePointer(); } }; diff --git a/include/panda_qt/elided_label.hpp b/include/panda_qt/elided_label.hpp index 9d937f9b..de31a439 100644 --- a/include/panda_qt/elided_label.hpp +++ b/include/panda_qt/elided_label.hpp @@ -6,6 +6,7 @@ class ElidedLabel : public QLabel { Q_OBJECT + public: explicit ElidedLabel(Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); explicit ElidedLabel(QString text, Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index 7bdf6b96..b259a1bc 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -20,7 +20,7 @@ #include "panda_qt/cpu_debugger.hpp" #include "panda_qt/dsp_debugger.hpp" #include "panda_qt/patch_window.hpp" -#include "panda_qt/screen.hpp" +#include "panda_qt/screen/screen.hpp" #include "panda_qt/shader_editor.hpp" #include "panda_qt/text_editor.hpp" #include "panda_qt/thread_debugger.hpp" @@ -136,7 +136,7 @@ class MainWindow : public QMainWindow { void loadKeybindings(); void saveKeybindings(); - // Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer + // Tracks what graphics API is backing our renderer bool usingGL = false; bool usingVk = false; bool usingMtl = false; diff --git a/include/panda_qt/screen.hpp b/include/panda_qt/screen/screen.hpp similarity index 54% rename from include/panda_qt/screen.hpp rename to include/panda_qt/screen/screen.hpp index 270bf10f..4908b096 100644 --- a/include/panda_qt/screen.hpp +++ b/include/panda_qt/screen/screen.hpp @@ -1,25 +1,27 @@ #pragma once #include #include -#include #include "gl/context.h" #include "screen_layout.hpp" #include "window_info.h" -// OpenGL widget for drawing the 3DS screen +// Abstract screen widget for drawing the 3DS screen. We've got a child class for each graphics API (ScreenWidgetGL, ScreenWidgetMTL, ...) class ScreenWidget : public QWidget { Q_OBJECT public: using ResizeCallback = std::function; - ScreenWidget(ResizeCallback resizeCallback, QWidget* parent = nullptr); - void resizeEvent(QResizeEvent* event) override; - // Called by the emulator thread for resizing the actual GL surface, since the emulator thread owns the GL context - void resizeSurface(u32 width, u32 height); + enum class API { OpenGL, Metal, Vulkan }; - GL::Context* getGLContext() { return glContext.get(); } + ScreenWidget(API api, ResizeCallback resizeCallback, QWidget* parent = nullptr); + virtual ~ScreenWidget() {} + + void resizeEvent(QResizeEvent* event) override; + + virtual GL::Context* getGLContext() { return nullptr; } + virtual void* getMTKLayer() { return nullptr; } // Dimensions of our output surface u32 surfaceWidth = 0; @@ -30,8 +32,9 @@ class ScreenWidget : public QWidget { u32 previousWidth = 0; u32 previousHeight = 0; - // Coordinates (x/y/width/height) for the two screens in window space, used for properly handling touchscreen regardless - // of layout or resizing + API api = API::OpenGL; + + // Coordinates (x/y/width/height) for the two screens in window space, used for properly handling touchscreen ScreenLayout::WindowCoordinates screenCoordinates; // Screen layouts and sizes ScreenLayout::Layout screenLayout = ScreenLayout::Layout::Default; @@ -39,16 +42,23 @@ class ScreenWidget : public QWidget { void reloadScreenLayout(ScreenLayout::Layout newLayout, float newTopScreenSize); - private: - std::unique_ptr glContext = nullptr; + // Creates a screen widget depending on the graphics API we're using + static ScreenWidget* getWidget(API api, ResizeCallback resizeCallback, QWidget* parent = nullptr); + + // Called by the emulator thread on OpenGL for resizing the actual GL surface, since the emulator thread owns the GL context + virtual void resizeSurface(u32 width, u32 height) {}; + + protected: ResizeCallback resizeCallback; - bool createGLContext(); + virtual bool createContext() = 0; + virtual void resizeDisplay() = 0; + std::optional getWindowInfo(); + private: qreal devicePixelRatioFromScreen() const; int scaledWindowWidth() const; int scaledWindowHeight() const; - std::optional getWindowInfo(); void reloadScreenCoordinates(); }; diff --git a/include/panda_qt/screen/screen_gl.hpp b/include/panda_qt/screen/screen_gl.hpp new file mode 100644 index 00000000..04a2c11b --- /dev/null +++ b/include/panda_qt/screen/screen_gl.hpp @@ -0,0 +1,18 @@ +#pragma once +#include + +#include "gl/context.h" +#include "panda_qt/screen/screen.hpp" + +class ScreenWidgetGL : public ScreenWidget { + std::unique_ptr glContext = nullptr; + + public: + ScreenWidgetGL(API api, ResizeCallback resizeCallback, QWidget* parent = nullptr); + + virtual GL::Context* getGLContext() override; + virtual bool createContext() override; + + virtual void resizeDisplay() override; + virtual void resizeSurface(u32 width, u32 height) override; +}; \ No newline at end of file diff --git a/include/panda_qt/screen/screen_mtl.hpp b/include/panda_qt/screen/screen_mtl.hpp new file mode 100644 index 00000000..cdd240e6 --- /dev/null +++ b/include/panda_qt/screen/screen_mtl.hpp @@ -0,0 +1,18 @@ +#pragma once +#include "panda_qt/screen/screen.hpp" + +class ScreenWidgetMTL : public ScreenWidget { + void* mtkLayer = nullptr; + + // Objective-C++ functions for handling the Metal context + bool createMetalContext(); + void resizeMetalView(); + + public: + ScreenWidgetMTL(API api, ResizeCallback resizeCallback, QWidget* parent = nullptr); + ~ScreenWidgetMTL() override; + + virtual void* getMTKLayer() override; + virtual bool createContext() override; + virtual void resizeDisplay() override; +}; \ No newline at end of file diff --git a/include/renderer.hpp b/include/renderer.hpp index 40f244db..27d6a437 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -9,10 +9,6 @@ #include "PICA/regs.hpp" #include "helpers.hpp" -#ifdef PANDA3DS_FRONTEND_QT -#include "gl/context.h" -#endif - enum class RendererType : s8 { // Todo: Auto = -1, Null = 0, @@ -23,7 +19,6 @@ enum class RendererType : s8 { }; struct EmulatorConfig; -struct SDL_Window; class GPU; class ShaderUnit; @@ -69,7 +64,7 @@ class Renderer { virtual void reset() = 0; virtual void display() = 0; // Display the 3DS screen contents to the window - virtual void initGraphicsContext(SDL_Window* window) = 0; // Initialize graphics context + virtual void initGraphicsContext(void* context) = 0; // Initialize graphics context virtual void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) = 0; // Clear a GPU buffer in VRAM virtual void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) = 0; // Perform display transfer virtual void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) = 0; @@ -91,9 +86,9 @@ class Renderer { // Called to notify the core to use OpenGL ES and not desktop GL virtual void setupGLES() {} - // Only relevant for Metal renderer on iOS - // Passes a SwiftUI MTKView's layer (CAMetalLayer) to the renderer - virtual void setMTKLayer(void* layer) {}; + // Used for Metal renderer on Qt and iOS + // Passes an NSView's backing layer (CAMetalLayer) to the renderer + virtual void setMTKLayer(void* layer) { Helpers::panic("Renderer doesn't support MTK Layer"); }; // This function is called on every draw call before parsing vertex data. // It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between @@ -101,11 +96,6 @@ class Renderer { // Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { return false; } - // Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window -#ifdef PANDA3DS_FRONTEND_QT - virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); } -#endif - void setFBSize(u32 width, u32 height) { fbSize[0] = width; fbSize[1] = height; diff --git a/include/renderer_gl/gl_driver.hpp b/include/renderer_gl/gl_driver.hpp index 4a0b3727..dfb78ffe 100644 --- a/include/renderer_gl/gl_driver.hpp +++ b/include/renderer_gl/gl_driver.hpp @@ -1,4 +1,5 @@ #pragma once +#include "opengl.hpp" // Information about our OpenGL/OpenGL ES driver that we should keep track of // Stuff like whether specific extensions are supported, and potentially things like OpenGL context information @@ -8,6 +9,9 @@ namespace OpenGL { bool supportsExtFbFetch = false; bool supportsArmFbFetch = false; + // Minimum alignment for UBO offsets. Fetched by the OpenGL renderer using glGetIntegerV. + GLuint uboAlignment = 16; + bool supportFbFetch() const { return supportsExtFbFetch || supportsArmFbFetch; } }; } // namespace OpenGL \ No newline at end of file diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index b105f3e9..2023966f 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -92,14 +92,18 @@ class RendererGL final : public Renderer { // The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation // We can compile this once and then link it with all other generated fragment shaders OpenGL::Shader defaultShadergenVs; - GLuint shadergenFragmentUBO; - // UBO for uploading the PICA uniforms when using hw shaders - GLuint hwShaderUniformUBO; using StreamBuffer = OpenGLStreamBuffer; + + std::unique_ptr shadergenFragmentUBO; + // UBO for uploading the PICA uniforms when using hw shaders + std::unique_ptr hwShaderUniformUBO; std::unique_ptr hwVertexBuffer; std::unique_ptr hwIndexBuffer; + // Current offset for our hw shader uniform UBO + u32 hwShaderUniformUBOOffset = 0; + // Cache of fixed attribute values so that we don't do any duplicate updates std::array, 16> fixedAttrValues; @@ -187,7 +191,7 @@ class RendererGL final : public Renderer { void reset() override; void display() override; // Display the 3DS screen contents to the window - void initGraphicsContext(SDL_Window* window) override; // Initialize graphics context + void initGraphicsContext(void* context) override; // Initialize graphics context void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; @@ -207,10 +211,6 @@ class RendererGL final : public Renderer { void resetStateManager() { gl.reset(); } void initUbershader(OpenGL::Program& program); -#ifdef PANDA3DS_FRONTEND_QT - virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); } -#endif - // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; }; \ No newline at end of file diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp index 562e6b79..ce758e19 100644 --- a/include/renderer_mtl/mtl_command_encoder.hpp +++ b/include/renderer_mtl/mtl_command_encoder.hpp @@ -2,6 +2,8 @@ #include +#include "helpers.hpp" + namespace Metal { struct RenderState { MTL::RenderPipelineState* renderPipelineState = nullptr; diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp index 8f7256a9..b902346d 100644 --- a/include/renderer_mtl/mtl_depth_stencil_cache.hpp +++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp @@ -2,6 +2,7 @@ #include +#include "helpers.hpp" #include "pica_to_mtl.hpp" using namespace PICA; @@ -17,7 +18,6 @@ namespace Metal { class DepthStencilCache { public: DepthStencilCache() = default; - ~DepthStencilCache() { reset(); } void set(MTL::Device* dev) { device = dev; } diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index 7178785e..17bead9a 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -2,6 +2,7 @@ #include +#include "helpers.hpp" #include "objc_helper.hpp" #include "pica_to_mtl.hpp" diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp index 531dc73c..e2f67b6b 100644 --- a/include/renderer_mtl/mtl_lut_texture.hpp +++ b/include/renderer_mtl/mtl_lut_texture.hpp @@ -2,19 +2,22 @@ #include +#include "helpers.hpp" + namespace Metal { -class LutTexture { -public: - LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); - ~LutTexture(); - u32 getNextIndex(); + class LutTexture { + public: + LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); + ~LutTexture(); + u32 getNextIndex(); - MTL::Texture* getTexture() { return texture; } - u32 getCurrentIndex() { return currentIndex; } -private: - MTL::Texture* texture; - u32 currentIndex = 0; -}; + MTL::Texture* getTexture() { return texture; } + u32 getCurrentIndex() { return currentIndex; } -} // namespace Metal + private: + MTL::Texture* texture; + u32 currentIndex = 0; + }; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index b392389c..d50ea336 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -5,7 +5,6 @@ #include "helpers.hpp" #include "pica_to_mtl.hpp" - using namespace PICA; namespace Metal { diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp index d4c6dc7c..94474dad 100644 --- a/include/renderer_mtl/pica_to_mtl.hpp +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -3,6 +3,7 @@ #include #include "PICA/regs.hpp" +#include "helpers.hpp" // TODO: remove dependency on OpenGL #include "opengl.hpp" @@ -14,10 +15,10 @@ namespace PICA { bool needsSwizzle = false; MTL::TextureSwizzleChannels swizzle{ - .red = MTL::TextureSwizzleRed, - .green = MTL::TextureSwizzleGreen, - .blue = MTL::TextureSwizzleBlue, - .alpha = MTL::TextureSwizzleAlpha, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleGreen, + MTL::TextureSwizzleBlue, + MTL::TextureSwizzleAlpha, }; }; @@ -33,7 +34,7 @@ namespace PICA { case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? #ifdef PANDA3DS_IOS - case ColorFmt::RGBA4: return MTL::PixelFormatRGBA8Unorm; // IOS + Metal doesn't support AGBR4 properly, at least on simulator + case ColorFmt::RGBA4: return MTL::PixelFormatRGBA8Unorm; // IOS + Metal doesn't support AGBR4 properly, at least on simulator #else case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; #endif @@ -130,8 +131,7 @@ namespace PICA { case PrimType::TriangleFan: Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); return MTL::PrimitiveTypeTriangle; - case PrimType::GeometryPrimitive: - return MTL::PrimitiveTypeTriangle; + case PrimType::GeometryPrimitive: return MTL::PrimitiveTypeTriangle; } } diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index c9b4b9b6..29eb02d4 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -13,7 +13,6 @@ #include "mtl_vertex_buffer_cache.hpp" #include "renderer.hpp" - // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -30,7 +29,7 @@ class RendererMTL final : public Renderer { void reset() override; void display() override; - void initGraphicsContext(SDL_Window* window) override; + void initGraphicsContext(void* context) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; @@ -38,10 +37,6 @@ class RendererMTL final : public Renderer { void screenshot(const std::string& name) override; void deinitGraphicsContext() override; -#ifdef PANDA3DS_FRONTEND_QT - virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {} -#endif - virtual void setMTKLayer(void* layer) override; private: diff --git a/include/renderer_null/renderer_null.hpp b/include/renderer_null/renderer_null.hpp index 50a724d8..28ab41af 100644 --- a/include/renderer_null/renderer_null.hpp +++ b/include/renderer_null/renderer_null.hpp @@ -9,7 +9,7 @@ class RendererNull final : public Renderer { void reset() override; void display() override; - void initGraphicsContext(SDL_Window* window) override; + void initGraphicsContext(void* context) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; @@ -20,8 +20,4 @@ class RendererNull final : public Renderer { // Tell the GPU core that we'll handle vertex fetch & shader execution in the renderer in order to speed up execution. // Of course, we don't do this and geometry is never actually processed, since this is the null renderer. virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override { return true; }; - -#ifdef PANDA3DS_FRONTEND_QT - virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {} -#endif }; diff --git a/include/renderer_sw/renderer_sw.hpp b/include/renderer_sw/renderer_sw.hpp index dd12bf0a..e2dd90cb 100644 --- a/include/renderer_sw/renderer_sw.hpp +++ b/include/renderer_sw/renderer_sw.hpp @@ -9,15 +9,11 @@ class RendererSw final : public Renderer { void reset() override; void display() override; - void initGraphicsContext(SDL_Window* window) override; + void initGraphicsContext(void* context) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; void screenshot(const std::string& name) override; void deinitGraphicsContext() override; - -#ifdef PANDA3DS_FRONTEND_QT - virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {} -#endif }; diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 25cc26f8..c90b1d4d 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -9,6 +9,7 @@ #include "vk_sampler_cache.hpp" class GPU; +struct SDL_Window; class RendererVK final : public Renderer { SDL_Window* targetWindow; @@ -113,7 +114,7 @@ class RendererVK final : public Renderer { void reset() override; void display() override; - void initGraphicsContext(SDL_Window* window) override; + void initGraphicsContext(void* context) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; diff --git a/include/scheduler.hpp b/include/scheduler.hpp index 9a51f893..6083aeb2 100644 --- a/include/scheduler.hpp +++ b/include/scheduler.hpp @@ -4,7 +4,6 @@ #include #include "helpers.hpp" -#include "logger.hpp" struct Scheduler { enum class EventType { diff --git a/include/services/dsp_firmware_db.hpp b/include/services/dsp_firmware_db.hpp index bac11d73..b6b594d4 100644 --- a/include/services/dsp_firmware_db.hpp +++ b/include/services/dsp_firmware_db.hpp @@ -18,7 +18,7 @@ namespace DSP { : hash(hash), size(size), supportsAAC(supportsAAC), notes(notes) {} }; - static constexpr std::array firmwareDB = { + static constexpr std::array firmwareDB = { FirmwareInfo( {0x47, 0xD6, 0x6C, 0xD2, 0x13, 0x1, 0xFF, 0x62, 0xAD, 0x16, 0x98, 0x2, 0x46, 0x67, 0xF3, 0x9, 0xDA, 0x7, 0x20, 0x9E, 0xFB, 0xB, 0x6A, 0x81, 0x98, 0xFF, 0x9B, 0xE0, 0x51, 0x67, 0xC9, 0xA6}, @@ -72,5 +72,11 @@ namespace DSP { 0x3A, 0x29, 0x1, 0x70, 0xEA, 0x3B, 0x6C, 0x14, 0x57, 0x49, 0xAD, 0x93, 0x58, 0x67, 0x2C, 0x97}, 49716, false, "Spotted in PMD: GTI" ), + + FirmwareInfo( + {0x96, 0xF3, 0x96, 0x28, 0x38, 0xEB, 0xE9, 0x2A, 0x9E, 0x99, 0xD0, 0xB0, 0x78, 0xAD, 0xE3, 0x67, + 0x3B, 0x9B, 0x2F, 0x24, 0x3E, 0xBE, 0xC0, 0x47, 0x4D, 0x3E, 0x49, 0xA9, 0x2B, 0x65, 0x5B, 0x85}, + 49772, false, "Spotted in Luigi's Mansion" + ), }; } // namespace DSP \ No newline at end of file diff --git a/include/services/frd.hpp b/include/services/frd.hpp index 914d9251..a5aee738 100644 --- a/include/services/frd.hpp +++ b/include/services/frd.hpp @@ -40,6 +40,7 @@ class FRDService { void hasLoggedIn(u32 messagePointer); void isOnline(u32 messagePointer); void logout(u32 messagePointer); + void saveLocalAccountData(u32 messagePointer); void setClientSDKVersion(u32 messagePointer); void setNotificationMask(u32 messagePointer); void updateGameModeDescription(u32 messagePointer); diff --git a/src/config.cpp b/src/config.cpp index f2cddaba..4d489c95 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -48,6 +48,7 @@ void EmulatorConfig::load() { printAppVersion = toml::find_or(general, "PrintAppVersion", true); circlePadProEnabled = toml::find_or(general, "EnableCirclePadPro", true); + fastmemEnabled = toml::find_or(general, "EnableFastmem", enableFastmemDefault); systemLanguage = languageCodeFromString(toml::find_or(general, "SystemLanguage", "en")); } } @@ -180,6 +181,7 @@ void EmulatorConfig::save() { data["General"]["PrintAppVersion"] = printAppVersion; data["General"]["SystemLanguage"] = languageCodeToString(systemLanguage); data["General"]["EnableCirclePadPro"] = circlePadProEnabled; + data["General"]["EnableFastmem"] = fastmemEnabled; data["Window"]["AppVersionOnWindow"] = windowSettings.showAppVersion; data["Window"]["RememberWindowPosition"] = windowSettings.rememberPosition; diff --git a/src/core/CPU/cpu_dynarmic.cpp b/src/core/CPU/cpu_dynarmic.cpp index 85dc70d9..124647d8 100644 --- a/src/core/CPU/cpu_dynarmic.cpp +++ b/src/core/CPU/cpu_dynarmic.cpp @@ -6,6 +6,7 @@ CPU::CPU(Memory& mem, Kernel& kernel, Emulator& emu) : mem(mem), emu(emu), scheduler(emu.getScheduler()), env(mem, kernel, emu.getScheduler()) { cp15 = std::make_shared(); + mem.setCPUTicks(getTicksRef()); Dynarmic::A32::UserConfig config; config.arch_version = Dynarmic::A32::ArchVersion::v6K; @@ -15,6 +16,12 @@ CPU::CPU(Memory& mem, Kernel& kernel, Emulator& emu) : mem(mem), emu(emu), sched config.global_monitor = &exclusiveMonitor; config.processor_id = 0; + if (mem.isFastmemEnabled()) { + config.fastmem_pointer = u64(mem.getFastmemArenaBase()); + } else { + config.fastmem_pointer = std::nullopt; + } + jit = std::make_unique(config); } diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index f5d26784..e38f4416 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -138,7 +138,7 @@ void GPU::drawArrays(bool indexed) { if (config.accelerateShaders) { // If we are potentially going to use hw shaders, gather necessary to do vertex fetch, index buffering, etc on the GPU - // This includes parsing which vertices to upload, getting pointers to the index buffer data & vertex data, and so on + // This includes parsing which vertices to upload, getting pointers to the index buffer data & vertex data, and so on getAcceleratedDrawInfo(accel, indexed); } @@ -182,6 +182,7 @@ void GPU::drawArrays() { // We can have up to 16 attributes, each one consisting of 4 floats constexpr u32 maxAttrSizeInFloats = 16 * 4; + setVsOutputMask(regs[PICA::InternalRegs::VertexShaderOutputMask]); // Base address for vertex attributes // The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index a85c7464..9be382b3 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -124,16 +124,24 @@ u8 PICAShader::getIndexedSource(u32 source, u32 index) { switch (index) { // No offset applied case 0: [[likely]] return u8(source); - // Address register + + // An address register (if index == 1 or 2) or the loop counter (if index == 3) is used as the offset + // There's several edge cases to handle, which have been verified with our shader tests and on a real 3DS case 1: - case 2: { - const s32 offset = addrRegister[index - 1]; + case 2: + case 3: { + s32 offset = (index == 3) ? s32(loopCounter) : addrRegister[index - 1]; if (offset < -128 || offset > 127) [[unlikely]] { - return u8(source); + offset = 0; } - return u8(source + offset); + + // Subtract 0x20 to get the index of the float uniform. Add the offset to it, then mask the sum with 0x7F like the PICA does + // After that, add 0x20 again to undo the initial subtraction + u8 floatUniformIndex = u8(((source - 0x20) + offset) & 0x7F); + floatUniformIndex += 0x20; + + return floatUniformIndex; } - case 3: return u8(source + loopCounter); } Helpers::panic("Reached unreachable path in PICAShader::getIndexedSource"); diff --git a/src/core/applets/software_keyboard.cpp b/src/core/applets/software_keyboard.cpp index fc58a3ec..379134a6 100644 --- a/src/core/applets/software_keyboard.cpp +++ b/src/core/applets/software_keyboard.cpp @@ -55,7 +55,7 @@ Result::HorizonResult SoftwareKeyboardApplet::start(const MemoryBlock* sharedMem } mem.write16(textAddress, 0); // Write UTF-16 null terminator - // Temporarily hardcode the pressed button to be the firs tone + // Temporarily hardcode the pressed button to be the first one switch (config.numButtonsM1) { case SoftwareKeyboardButtonConfig::SingleButton: config.returnCode = SoftwareKeyboardResult::D0Click; break; case SoftwareKeyboardButtonConfig::DualButton: config.returnCode = SoftwareKeyboardResult::D1Click1; break; diff --git a/src/core/audio/aac_decoder.cpp b/src/core/audio/aac_decoder.cpp index af88485c..58cebda6 100644 --- a/src/core/audio/aac_decoder.cpp +++ b/src/core/audio/aac_decoder.cpp @@ -45,8 +45,6 @@ void AAC::Decoder::decode(AAC::Message& response, const AAC::Message& request, A std::array frame; std::array, 2> audioStreams; - bool queriedStreamInfo = false; - while (bytesValid != 0) { if (aacDecoder_Fill(decoderHandle, &input, &bufferSize, &bytesValid) != AAC_DEC_OK) { Helpers::warn("Failed to fill AAC decoder with samples"); @@ -143,4 +141,4 @@ AAC::Decoder::~Decoder() { aacDecoder_Close(decoderHandle); decoderHandle = nullptr; } -} \ No newline at end of file +} diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 97a6211e..059318c8 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -396,7 +396,28 @@ namespace Audio { if (config.partialEmbeddedBufferDirty) { config.partialEmbeddedBufferDirty = 0; - printf("Partial embedded buffer dirty for voice %d\n", source.index); + + const u8* data = getPointerPhys(source.currentBufferPaddr & ~0x3); + + if (data != nullptr) { + switch (source.sampleFormat) { + case SampleFormat::PCM8: source.currentSamples = decodePCM8(data, config.length, source); break; + case SampleFormat::PCM16: source.currentSamples = decodePCM16(data, config.length, source); break; + case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, config.length, source); break; + + default: + Helpers::warn("Invalid DSP sample format"); + source.currentSamples = {}; + break; + } + + // We're skipping the first samplePosition samples, so remove them from the buffer so as not to consume them later + if (source.samplePosition > 0) { + auto start = source.currentSamples.begin(); + auto end = std::next(start, source.samplePosition); + source.currentSamples.erase(start, end); + } + } } if (config.bufferQueueDirty) { @@ -478,6 +499,7 @@ namespace Audio { return; } + source.currentBufferPaddr = buffer.paddr; source.currentBufferID = buffer.bufferID; source.previousBufferID = 0; // For looping buffers, this is only set for the first time we play it. Loops do not set the dirty bit. @@ -766,6 +788,7 @@ namespace Audio { interpolationMode = InterpolationMode::Linear; samplePosition = 0; + currentBufferPaddr = 0; previousBufferID = 0; currentBufferID = 0; syncCount = 0; diff --git a/src/core/kernel/fcram.cpp b/src/core/kernel/fcram.cpp new file mode 100644 index 00000000..ebb58c34 --- /dev/null +++ b/src/core/kernel/fcram.cpp @@ -0,0 +1,102 @@ +#include "fcram.hpp" + +#include "memory.hpp" + +void KFcram::Region::reset(u32 start, size_t size) { + this->start = start; + pages = size >> 12; + freePages = pages; + + Block initialBlock(pages, 0); + blocks.clear(); + blocks.push_back(initialBlock); +} + +void KFcram::Region::alloc(std::list& out, s32 allocPages, bool linear) { + for (auto it = blocks.begin(); it != blocks.end(); it++) { + if (it->used) continue; + + // On linear allocations, only a single contiguous block may be used + if (it->pages < allocPages && linear) continue; + + // If the current block is bigger than the allocation, split it + if (it->pages > allocPages) { + Block newBlock(it->pages - allocPages, it->pageOffset + allocPages); + it->pages = allocPages; + blocks.insert(it, newBlock); + } + + // Mark the block as allocated and add it to the output list + it->used = true; + allocPages -= it->pages; + freePages -= it->pages; + + u32 paddr = start + (it->pageOffset << 12); + FcramBlock outBlock(paddr, it->pages); + out.push_back(outBlock); + + if (allocPages < 1) { + return; + } + } + + // Official kernel panics here + Helpers::panic("Failed to allocate FCRAM, not enough guest memory"); +} + +u32 KFcram::Region::getUsedCount() { return pages - freePages; } +u32 KFcram::Region::getFreeCount() { return freePages; } + +KFcram::KFcram(Memory& mem) : mem(mem) {} + +void KFcram::reset(size_t ramSize, size_t appSize, size_t sysSize, size_t baseSize) { + fcram = mem.getFCRAM(); + refs = std::unique_ptr(new u32[ramSize >> 12]); + std::memset(refs.get(), 0, (ramSize >> 12) * sizeof(u32)); + + appRegion.reset(0, appSize); + sysRegion.reset(appSize, sysSize); + baseRegion.reset(appSize + sysSize, baseSize); +} + +void KFcram::alloc(FcramBlockList& out, s32 pages, FcramRegion region, bool linear) { + switch (region) { + case FcramRegion::App: appRegion.alloc(out, pages, linear); break; + case FcramRegion::Sys: sysRegion.alloc(out, pages, linear); break; + case FcramRegion::Base: baseRegion.alloc(out, pages, linear); break; + default: Helpers::panic("Invalid FCRAM region chosen for allocation!"); break; + } + + incRef(out); +} + +void KFcram::incRef(FcramBlockList& list) { + for (auto it = list.begin(); it != list.end(); it++) { + for (int i = 0; i < it->pages; i++) { + u32 index = (it->paddr >> 12) + i; + refs.get()[index]++; + } + } +} + +void KFcram::decRef(FcramBlockList& list) { + for (auto it = list.begin(); it != list.end(); it++) { + for (int i = 0; i < it->pages; i++) { + u32 index = (it->paddr >> 12) + i; + refs.get()[index]--; + + if (!refs.get()[index]) { + Helpers::panic("TODO: Freeing FCRAM"); + } + } + } +} + +u32 KFcram::getUsedCount(FcramRegion region) { + switch (region) { + case FcramRegion::App: return appRegion.getUsedCount(); + case FcramRegion::Sys: return sysRegion.getUsedCount(); + case FcramRegion::Base: return baseRegion.getUsedCount(); + default: Helpers::panic("Invalid FCRAM region in getUsedCount!"); + } +} \ No newline at end of file diff --git a/src/core/kernel/idle_thread.cpp b/src/core/kernel/idle_thread.cpp index d6f79360..cfc71d2d 100644 --- a/src/core/kernel/idle_thread.cpp +++ b/src/core/kernel/idle_thread.cpp @@ -17,6 +17,8 @@ idle_thread_main: b idle_thread_main */ +using namespace KernelMemoryTypes; + static constexpr u8 idleThreadCode[] = { 0x00, 0x00, 0xA0, 0xE3, // mov r0, #0 0x00, 0x10, 0xA0, 0xE3, // mov r1, #0 @@ -27,18 +29,16 @@ static constexpr u8 idleThreadCode[] = { // Set up an idle thread to run when no thread is able to run void Kernel::setupIdleThread() { Thread& t = threads[idleThreadIndex]; - constexpr u32 codeAddress = 0xBFC00000; - // Reserve some memory for the idle thread's code. We map this memory to vaddr BFC00000 which is not userland-accessible + // Reserve some memory for the idle thread's code. We map this memory to vaddr 3FC00000 which shouldn't be accessed by applications // We only allocate 4KB (1 page) because our idle code is pretty small - const u32 fcramIndex = mem.allocateSysMemory(Memory::pageSize); - auto vaddr = mem.allocateMemory(codeAddress, fcramIndex, Memory::pageSize, true, true, false, true, false, true); - if (!vaddr.has_value() || vaddr.value() != codeAddress) { + constexpr u32 codeAddress = 0x3FC00000; + if (!mem.allocMemory(codeAddress, 1, FcramRegion::Base, true, true, false, MemoryState::Locked)) { Helpers::panic("Failed to setup idle thread"); } // Copy idle thread code to the allocated FCRAM - std::memcpy(&mem.getFCRAM()[fcramIndex], idleThreadCode, sizeof(idleThreadCode)); + mem.copyToVaddr(codeAddress, idleThreadCode, sizeof(idleThreadCode)); t.entrypoint = codeAddress; t.initialSP = 0; diff --git a/src/core/kernel/kernel.cpp b/src/core/kernel/kernel.cpp index 824017d0..7a0aacc0 100644 --- a/src/core/kernel/kernel.cpp +++ b/src/core/kernel/kernel.cpp @@ -7,7 +7,7 @@ #include "kernel_types.hpp" Kernel::Kernel(CPU& cpu, Memory& mem, GPU& gpu, const EmulatorConfig& config, LuaManager& lua) - : cpu(cpu), regs(cpu.regs()), mem(mem), handleCounter(0), serviceManager(regs, mem, gpu, currentProcess, *this, config, lua) { + : cpu(cpu), regs(cpu.regs()), mem(mem), handleCounter(0), serviceManager(regs, mem, gpu, currentProcess, *this, config, lua), fcramManager(mem) { objects.reserve(512); // Make room for a few objects to avoid further memory allocs later mutexHandles.reserve(8); portHandles.reserve(32); @@ -271,7 +271,7 @@ void Kernel::getProcessInfo() { // According to 3DBrew: Amount of private (code, data, heap) memory used by the process + total supervisor-mode // stack size + page-rounded size of the external handle table case 2: - regs[1] = mem.getUsedUserMem(); + regs[1] = fcramManager.getUsedCount(FcramRegion::App) * Memory::pageSize; regs[2] = 0; break; @@ -364,7 +364,7 @@ void Kernel::getSystemInfo() { switch (subtype) { // Total used memory size in the APPLICATION memory region case 1: - regs[1] = mem.getUsedUserMem(); + regs[1] = fcramManager.getUsedCount(FcramRegion::App) * Memory::pageSize; regs[2] = 0; break; diff --git a/src/core/kernel/memory_management.cpp b/src/core/kernel/memory_management.cpp index 58a46c4d..9199a32c 100644 --- a/src/core/kernel/memory_management.cpp +++ b/src/core/kernel/memory_management.cpp @@ -30,10 +30,10 @@ namespace MemoryPermissions { }; } +using namespace KernelMemoryTypes; + // Returns whether "value" is aligned to a page boundary (Ie a boundary of 4096 bytes) -static constexpr bool isAligned(u32 value) { - return (value & 0xFFF) == 0; -} +static constexpr bool isAligned(u32 value) { return (value & 0xFFF) == 0; } // Result ControlMemory(u32* outaddr, u32 addr0, u32 addr1, u32 size, // MemoryOperation operation, MemoryPermission permissions) @@ -44,6 +44,7 @@ void Kernel::controlMemory() { u32 addr0 = regs[1]; u32 addr1 = regs[2]; u32 size = regs[3]; + u32 pages = size >> 12; // Official kernel truncates nonaligned sizes u32 perms = regs[4]; if (perms == MemoryPermissions::DontCare) { @@ -61,7 +62,7 @@ void Kernel::controlMemory() { Helpers::panic("ControlMemory: attempted to allocate executable memory"); } - if (!isAligned(addr0) || !isAligned(addr1) || !isAligned(size)) { + if (!isAligned(addr0) || !isAligned(addr1)) { Helpers::panic("ControlMemory: Unaligned parameters\nAddr0: %08X\nAddr1: %08X\nSize: %08X", addr0, addr1, size); } @@ -72,22 +73,54 @@ void Kernel::controlMemory() { switch (operation & 0xFF) { case Operation::Commit: { - std::optional address = mem.allocateMemory(addr0, 0, size, linear, r, w, x, true); - if (!address.has_value()) { - Helpers::panic("ControlMemory: Failed to allocate memory"); + // TODO: base this from the exheader + auto region = FcramRegion::App; + + u32 outAddr = 0; + if (linear) { + if (!mem.allocMemoryLinear(outAddr, addr0, pages, region, r, w, false)) { + Helpers::panic("ControlMemory: Failed to allocate linear memory"); + } + } else { + if (!mem.allocMemory(addr0, pages, region, r, w, false, MemoryState::Private)) { + Helpers::panic("ControlMemory: Failed to allocate memory"); + } + + outAddr = addr0; } - regs[1] = address.value(); + regs[1] = outAddr; break; } - case Operation::Map: mem.mirrorMapping(addr0, addr1, size); break; + case Operation::Map: + // Official kernel only allows Private regions to be mapped to Free regions. An Alias or Aliased region cannot be mapped again + if (!mem.mapVirtualMemory( + addr0, addr1, pages, r, w, false, MemoryState::Free, MemoryState::Private, MemoryState::Alias, MemoryState::Aliased + )) + Helpers::panic("ControlMemory: Failed to map memory"); + break; + + case Operation::Unmap: + // The same as a Map operation, except in reverse + if (!mem.mapVirtualMemory( + addr0, addr1, pages, false, false, false, MemoryState::Alias, MemoryState::Aliased, MemoryState::Free, MemoryState::Private + )) { + Helpers::panic("ControlMemory: Failed to unmap memory"); + } + break; case Operation::Protect: - Helpers::warn( - "Ignoring mprotect! Hope nothing goes wrong but if the game accesses invalid memory or crashes then we prolly need to implement " - "this\n" - ); + // Official kernel has an internal state bit to indicate that the region's permissions may be changed + // But this should account for all cases + if (!mem.testMemoryState(addr0, pages, MemoryState::Private) && !mem.testMemoryState(addr0, pages, MemoryState::Alias) && + !mem.testMemoryState(addr0, pages, MemoryState::Aliased) && !mem.testMemoryState(addr0, pages, MemoryState::AliasCode)) { + Helpers::warn("Tried to mprotect invalid region!"); + return; + } + + mem.changePermissions(addr0, pages, r, w, false); + regs[1] = addr0; break; default: Helpers::warn("ControlMemory: unknown operation %X\n", operation); break; @@ -104,10 +137,11 @@ void Kernel::queryMemory() { logSVC("QueryMemory(mem info pointer = %08X, page info pointer = %08X, addr = %08X)\n", memInfo, pageInfo, addr); - const auto info = mem.queryMemory(addr); - regs[0] = Result::Success; + KernelMemoryTypes::MemoryInfo info; + const auto result = mem.queryMemory(info, addr); + regs[0] = result; regs[1] = info.baseAddr; - regs[2] = info.size; + regs[2] = info.pages * Memory::pageSize; regs[3] = info.perms; regs[4] = info.state; regs[5] = 0; // page flags diff --git a/src/core/kernel/resource_limits.cpp b/src/core/kernel/resource_limits.cpp index 28fbeea8..65556c01 100644 --- a/src/core/kernel/resource_limits.cpp +++ b/src/core/kernel/resource_limits.cpp @@ -82,7 +82,9 @@ void Kernel::getResourceLimitCurrentValues() { s32 Kernel::getCurrentResourceValue(const KernelObject* limit, u32 resourceName) { const auto data = static_cast(limit->data); switch (resourceName) { - case ResourceType::Commit: return mem.usedUserMemory; + // TODO: needs to use the current amount of memory allocated by the process + case ResourceType::Commit: return fcramManager.getUsedCount(FcramRegion::App) * Memory::pageSize; + case ResourceType::Thread: return threadIndices.size(); default: Helpers::panic("Attempted to get current value of unknown kernel resource: %d\n", resourceName); } diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp index ca6bdd19..2790c8f8 100644 --- a/src/core/loader/3dsx.cpp +++ b/src/core/loader/3dsx.cpp @@ -6,6 +6,8 @@ #include "memory.hpp" +using namespace KernelMemoryTypes; + namespace { struct LoadInfo { u32 codeSegSizeAligned; @@ -53,12 +55,6 @@ bool Memory::map3DSX(HB3DSX& hb3dsx, const HB3DSX::Header& header) { // suum of aligned values is always aligned, have an extra RW page for libctru const u32 totalSize = hbInfo.codeSegSizeAligned + hbInfo.rodataSegSizeAligned + hbInfo.dataSegSizeAligned + 4_KB; - const auto opt = findPaddr(totalSize); - if (!opt.has_value()) { - Helpers::panic("Failed to find paddr to map 3DSX file's code to"); - return false; - } - // Map the ROM on the kernel side const u32 textOffset = 0; const u32 rodataOffset = textOffset + hbInfo.codeSegSizeAligned; @@ -213,7 +209,8 @@ bool Memory::map3DSX(HB3DSX& hb3dsx, const HB3DSX::Header& header) { { pst->heapSize = u32(48_MB); pst->linearHeapSize = u32(64_MB); - } else */ { + } else */ + { pst.heapSize = u32(24_MB); pst.linearHeapSize = u32(32_MB); } @@ -221,12 +218,17 @@ bool Memory::map3DSX(HB3DSX& hb3dsx, const HB3DSX::Header& header) { std::memcpy(&code[4], &pst, sizeof(pst)); } - const auto paddr = opt.value(); - std::memcpy(&fcram[paddr], &code[0], totalSize); // Copy the 3 segments + BSS to FCRAM + // Text is R-X + allocMemory(textSegAddr, hbInfo.codeSegSizeAligned / Memory::pageSize, FcramRegion::App, true, false, true, MemoryState::Code); + copyToVaddr(textSegAddr, &code[textOffset], hbInfo.codeSegSizeAligned); - allocateMemory(textSegAddr, paddr + textOffset, hbInfo.codeSegSizeAligned, true, true, false, true); // Text is R-X - allocateMemory(rodataSegAddr, paddr + rodataOffset, hbInfo.rodataSegSizeAligned, true, true, false, false); // Rodata is R-- - allocateMemory(dataSegAddr, paddr + dataOffset, hbInfo.dataSegSizeAligned + 0x1000, true, true, true, false); // Data+BSS+Extra is RW- + // Rodata is R-- + allocMemory(rodataSegAddr, hbInfo.rodataSegSizeAligned / Memory::pageSize, FcramRegion::App, true, false, false, MemoryState::Code); + copyToVaddr(rodataSegAddr, &code[rodataOffset], hbInfo.rodataSegSizeAligned); + + // Data + BSS + Extra is RW-. We allocate 1 extra page (4KB) which is not initialized to anything. + allocMemory(dataSegAddr, (hbInfo.dataSegSizeAligned + 4_KB) / Memory::pageSize, FcramRegion::App, true, true, false, MemoryState::Private); + copyToVaddr(dataSegAddr, &code[dataOffset], hbInfo.dataSegSizeAligned); return true; } diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 8b2a7807..205b16ec 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -4,6 +4,7 @@ #include "memory.hpp" using namespace ELFIO; +using namespace KernelMemoryTypes; std::optional Memory::loadELF(std::ifstream& file) { loadedCXI = std::nullopt; // ELF files don't have a CXI, so set this to null @@ -24,6 +25,7 @@ std::optional Memory::loadELF(std::ifstream& file) { auto segNum = reader.segments.size(); printf("Number of segments: %d\n", segNum); printf(" # Perms Vaddr File Size Mem Size\n"); + for (int i = 0; i < segNum; ++i) { const auto seg = reader.segments[i]; const auto flags = seg->get_flags(); @@ -55,12 +57,8 @@ std::optional Memory::loadELF(std::ifstream& file) { Helpers::warn("Rounding ELF segment size to %08X\n", memorySize); } - // This should also assert that findPaddr doesn't fail - u32 fcramAddr = findPaddr(memorySize).value(); - std::memcpy(&fcram[fcramAddr], data, fileSize); - - // Allocate the segment on the OS side - allocateMemory(vaddr, fcramAddr, memorySize, true, r, w, x); + allocMemory(vaddr, memorySize / Memory::pageSize, FcramRegion::App, r, w, x, MemoryState::Code); + copyToVaddr(vaddr, data, fileSize); } // ELF can't specify a region, make it default to USA diff --git a/src/core/loader/ncsd.cpp b/src/core/loader/ncsd.cpp index 13d68892..500b70ce 100644 --- a/src/core/loader/ncsd.cpp +++ b/src/core/loader/ncsd.cpp @@ -3,8 +3,11 @@ #include #include +#include "kernel/fcram.hpp" #include "memory.hpp" +using namespace KernelMemoryTypes; + bool Memory::mapCXI(NCSD& ncsd, NCCH& cxi) { printf("Text address = %08X, size = %08X\n", cxi.text.address, cxi.text.size); printf("Rodata address = %08X, size = %08X\n", cxi.rodata.address, cxi.rodata.size); @@ -24,12 +27,6 @@ bool Memory::mapCXI(NCSD& ncsd, NCCH& cxi) { // Round up the size of the CXI stack size to a page (4KB) boundary, as the OS can only allocate memory this way u32 stackSize = (cxi.stackSize + pageSize - 1) & -pageSize; - if (stackSize > 512_KB) { - // TODO: Figure out the actual max stack size - Helpers::warn("CXI stack size is %08X which seems way too big. Clamping to 512KB", stackSize); - stackSize = 512_KB; - } - // Allocate stack if (!allocateMainThreadStack(stackSize)) { // Should be unreachable @@ -42,40 +39,41 @@ bool Memory::mapCXI(NCSD& ncsd, NCCH& cxi) { u32 bssSize = (cxi.bssSize + 0xfff) & ~0xfff; // Round BSS size up to a page boundary // Total memory to allocate for loading u32 totalSize = (cxi.text.pageCount + cxi.rodata.pageCount + cxi.data.pageCount) * pageSize + bssSize; - code.resize(code.size() + bssSize, 0); // Pad the .code file with zeroes for the BSS segment - if (code.size() < totalSize) { + if (code.size() + bssSize < totalSize) { Helpers::panic("Total code size as reported by the exheader is larger than the .code file"); return false; } - const auto opt = findPaddr(totalSize); - if (!opt.has_value()) { - Helpers::panic("Failed to find paddr to map CXI file's code to"); - return false; - } - - const auto paddr = opt.value(); - std::memcpy(&fcram[paddr], &code[0], totalSize); // Copy the 3 segments + BSS to FCRAM - // Map the ROM on the kernel side - u32 textOffset = 0; u32 textAddr = cxi.text.address; u32 textSize = cxi.text.pageCount * pageSize; - u32 rodataOffset = textOffset + textSize; u32 rodataAddr = cxi.rodata.address; u32 rodataSize = cxi.rodata.pageCount * pageSize; - u32 dataOffset = rodataOffset + rodataSize; u32 dataAddr = cxi.data.address; u32 dataSize = cxi.data.pageCount * pageSize + bssSize; // We're merging the data and BSS segments, as BSS is just pre-initted .data - allocateMemory(textAddr, paddr + textOffset, textSize, true, true, false, true); // Text is R-X - allocateMemory(rodataAddr, paddr + rodataOffset, rodataSize, true, true, false, false); // Rodata is R-- - allocateMemory(dataAddr, paddr + dataOffset, dataSize, true, true, true, false); // Data+BSS is RW- + // TODO: base this off the exheader + auto region = FcramRegion::App; + u32 bssAddr = dataAddr + (cxi.data.pageCount << 12); - ncsd.entrypoint = textAddr; + allocMemory(textAddr, cxi.text.pageCount, region, true, false, true, MemoryState::Code); + allocMemory(rodataAddr, cxi.rodata.pageCount, region, true, false, false, MemoryState::Code); + allocMemory(dataAddr, cxi.data.pageCount, region, true, true, false, MemoryState::Private); + allocMemory(bssAddr, bssSize >> 12, region, true, true, false, MemoryState::Private); + + // Copy .code file to FCRAM + copyToVaddr(textAddr, code.data(), textSize); + copyToVaddr(rodataAddr, code.data() + textSize, rodataSize); + copyToVaddr(dataAddr, code.data() + textSize + rodataSize, cxi.data.pageCount << 12); + + // Set BSS to zeroes + std::vector bss(bssSize, 0); + copyToVaddr(bssAddr, bss.data(), bssSize); + + ncsd.entrypoint = cxi.text.address; // Back the IOFile for accessing the ROM, as well as the ROM's CXI partition, in the memory class. CXIFile = ncsd.file; @@ -85,7 +83,9 @@ bool Memory::mapCXI(NCSD& ncsd, NCCH& cxi) { std::optional Memory::loadNCSD(Crypto::AESEngine& aesEngine, const std::filesystem::path& path) { NCSD ncsd; - if (!ncsd.file.open(path, "rb")) return std::nullopt; + if (!ncsd.file.open(path, "rb")) { + return std::nullopt; + } u8 magic[4]; // Must be "NCSD" ncsd.file.seek(0x100); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 57eac8ca..650c8d0e 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -6,6 +6,7 @@ #include #include "config_mem.hpp" +#include "kernel/fcram.hpp" #include "resource_limits.hpp" #include "services/fonts.hpp" #include "services/ptm.hpp" @@ -14,38 +15,43 @@ CMRC_DECLARE(ConsoleFonts); using namespace KernelMemoryTypes; -Memory::Memory(u64& cpuTicks, const EmulatorConfig& config) : cpuTicks(cpuTicks), config(config) { - fcram = new uint8_t[FCRAM_SIZE](); +Memory::Memory(KFcram& fcramManager, const EmulatorConfig& config) : fcramManager(fcramManager), config(config) { + const bool fastmemEnabled = config.fastmemEnabled; + arena = new Common::HostMemory(FASTMEM_BACKING_SIZE, FASTMEM_VIRTUAL_SIZE, fastmemEnabled); readTable.resize(totalPageCount, 0); writeTable.resize(totalPageCount, 0); - memoryInfo.reserve(32); // Pre-allocate some room for memory allocation info to avoid dynamic allocs + paddrTable.resize(totalPageCount, 0); + + fcram = arena->BackingBasePointer() + FASTMEM_FCRAM_OFFSET; + // arenaDSPRam = arena->BackingBasePointer() + FASTMEM_DSP_RAM_OFFSET; + useFastmem = fastmemEnabled && arena->VirtualBasePointer() != nullptr; } void Memory::reset() { - // Unallocate all memory + // Mark the entire process address space as free + constexpr static int MAX_USER_PAGES = 0x40000000 >> 12; memoryInfo.clear(); - usedFCRAMPages.reset(); - usedUserMemory = u32(0_MB); - usedSystemMemory = u32(0_MB); + memoryInfo.push_back(MemoryInfo(0, MAX_USER_PAGES, 0, KernelMemoryTypes::Free)); + + // TODO: remove this, only needed to make the subsequent allocations work for now + fcramManager.reset(FCRAM_SIZE, FCRAM_APPLICATION_SIZE, FCRAM_SYSTEM_SIZE, FCRAM_BASE_SIZE); + + if (useFastmem) { + // Unmap any mappings when resetting + arena->Unmap(0, 4_GB, false); + } for (u32 i = 0; i < totalPageCount; i++) { readTable[i] = 0; writeTable[i] = 0; + paddrTable[i] = 0; } - // Map (32 * 4) KB of FCRAM before the stack for the TLS of each thread - std::optional tlsBaseOpt = findPaddr(32 * 4_KB); - if (!tlsBaseOpt.has_value()) { // Should be unreachable but still good to have - Helpers::panic("Failed to allocate memory for thread-local storage"); - } - - u32 basePaddrForTLS = tlsBaseOpt.value(); - for (u32 i = 0; i < appResourceLimits.maxThreads; i++) { - u32 vaddr = VirtualAddrs::TLSBase + i * VirtualAddrs::TLSSize; - allocateMemory(vaddr, basePaddrForTLS, VirtualAddrs::TLSSize, true); - basePaddrForTLS += VirtualAddrs::TLSSize; - } + // Allocate 512 bytes of TLS for each thread. Since the smallest allocatable unit is 4 KB, that means allocating one page for every 8 threads + // Note that TLS is always allocated in the Base region + s32 tlsPages = (appResourceLimits.maxThreads + 7) >> 3; + allocMemory(VirtualAddrs::TLSBase, tlsPages, FcramRegion::Base, true, true, false, MemoryState::Locked); // Initialize shared memory blocks and reserve memory for them for (auto& e : sharedMemBlocks) { @@ -56,19 +62,23 @@ void Memory::reset() { } e.mapped = false; - e.paddr = allocateSysMemory(e.size); + FcramBlockList memBlock; + fcramManager.alloc(memBlock, e.size >> 12, FcramRegion::Sys, false); + e.paddr = memBlock.begin()->paddr; } // Map DSP RAM as R/W at [0x1FF00000, 0x1FF7FFFF] - constexpr u32 dspRamPages = DSP_RAM_SIZE / pageSize; // Number of DSP RAM pages - constexpr u32 initialPage = VirtualAddrs::DSPMemStart / pageSize; // First page of DSP RAM in the virtual address space + constexpr u32 dspRamPages = DSP_RAM_SIZE / pageSize; // Number of DSP RAM pages - for (u32 i = 0; i < dspRamPages; i++) { - auto pointer = uintptr_t(&dspRam[i * pageSize]); + u32 vaddr = VirtualAddrs::DSPMemStart; + u32 paddr = PhysicalAddrs::DSP_RAM; - readTable[i + initialPage] = pointer; - writeTable[i + initialPage] = pointer; - } + Operation op{.newState = MemoryState::Static, .r = true, .w = true, .changeState = true, .changePerms = true}; + changeMemoryState(vaddr, dspRamPages, op); + mapPhysicalMemory(vaddr, paddr, dspRamPages, true, true, false); + + // Allocate RW mapping for DSP RAM + // addFastmemView(VirtualAddrs::DSPMemStart, FASTMEM_DSP_RAM_OFFSET, DSP_RAM_SIZE, true, false); // Later adjusted based on ROM header when possible region = Regions::USA; @@ -76,14 +86,9 @@ void Memory::reset() { bool Memory::allocateMainThreadStack(u32 size) { // Map stack pages as R/W - std::optional basePaddr = findPaddr(size); - if (!basePaddr.has_value()) { // Should also be unreachable but still good to have - return false; - } - + // TODO: get the region from the exheader const u32 stackBottom = VirtualAddrs::StackTop - size; - std::optional result = allocateMemory(stackBottom, basePaddr.value(), size, true); // Should never be nullopt - return result.has_value(); + return allocMemory(stackBottom, size >> 12, FcramRegion::App, true, true, false, MemoryState::Locked); } u8 Memory::read8(u32 vaddr) { @@ -120,7 +125,7 @@ u8 Memory::read8(u32 vaddr) { case ConfigMem::FirmRevision: return firm.revision; case ConfigMem::FirmVersionMinor: return firm.minor; case ConfigMem::FirmVersionMajor: return firm.major; - case ConfigMem::WifiLevel: return 0; // No wifi :( + case ConfigMem::WifiLevel: return 0; // No wifi :( case ConfigMem::WifiMac: case ConfigMem::WifiMac + 1: @@ -163,8 +168,8 @@ u32 Memory::read32(u32 vaddr) { case ConfigMem::Datetime0 + 4: return u32(timeSince3DSEpoch() >> 32); // top 32 bits // Ticks since time was last updated. For now we return the current tick count - case ConfigMem::Datetime0 + 8: return u32(cpuTicks); - case ConfigMem::Datetime0 + 12: return u32(cpuTicks >> 32); + case ConfigMem::Datetime0 + 8: return u32(*cpuTicks); + case ConfigMem::Datetime0 + 12: return u32(*cpuTicks >> 32); case ConfigMem::Datetime0 + 16: return 0xFFB0FF0; // Unknown, set by PTM case ConfigMem::Datetime0 + 20: case ConfigMem::Datetime0 + 24: @@ -172,11 +177,10 @@ u32 Memory::read32(u32 vaddr) { case ConfigMem::AppMemAlloc: return appResourceLimits.maxCommit; case ConfigMem::SyscoreVer: return 2; - case 0x1FF81000: return 0; // TODO: Figure out what this config mem address does + case 0x1FF81000: + return 0; // TODO: Figure out what this config mem address does // Wifi MAC: First 4 bytes of MAC Address - case ConfigMem::WifiMac: - return (u32(MACAddress[3]) << 24) | (u32(MACAddress[2]) << 16) | (u32(MACAddress[1]) << 8) | - MACAddress[0]; + case ConfigMem::WifiMac: return (u32(MACAddress[3]) << 24) | (u32(MACAddress[2]) << 16) | (u32(MACAddress[1]) << 8) | MACAddress[0]; // 3D slider. Float in range 0.0 = off, 1.0 = max. case ConfigMem::SliderState3D: return Helpers::bit_cast(0.0f); @@ -186,7 +190,7 @@ u32 Memory::read32(u32 vaddr) { default: if (vaddr >= VirtualAddrs::VramStart && vaddr < VirtualAddrs::VramStart + VirtualAddrs::VramSize) { static int shutUpCounter = 0; - if (shutUpCounter < 5) { // Stop spamming about VRAM reads after the first 5 + if (shutUpCounter < 5) { // Stop spamming about VRAM reads after the first 5 shutUpCounter++; Helpers::warn("VRAM read!\n"); } @@ -296,149 +300,254 @@ std::string Memory::readString(u32 address, u32 maxSize) { // thanks to the New 3DS having more FCRAM u32 Memory::getLinearHeapVaddr() { return (kernelVersion < 0x22C) ? VirtualAddrs::LinearHeapStartOld : VirtualAddrs::LinearHeapStartNew; } -std::optional Memory::allocateMemory(u32 vaddr, u32 paddr, u32 size, bool linear, bool r, bool w, bool x, bool adjustAddrs, bool isMap) { - // Kernel-allocated memory & size must always be aligned to a page boundary - // Additionally assert we don't OoM and that we don't try to allocate physical FCRAM past what's available to userland - // If we're mapping there's no fear of OoM, because we're not really allocating memory, just binding vaddrs to specific paddrs - assert(isAligned(vaddr) && isAligned(paddr) && isAligned(size)); - assert(size <= FCRAM_APPLICATION_SIZE || isMap); - assert(usedUserMemory + size <= FCRAM_APPLICATION_SIZE || isMap); - assert(paddr + size <= FCRAM_APPLICATION_SIZE || isMap); +void Memory::changeMemoryState(u32 vaddr, s32 pages, const Operation& op) { + assert(!(vaddr & 0xFFF)); - // Amount of available user FCRAM pages and FCRAM pages to allocate respectively - const u32 availablePageCount = (FCRAM_APPLICATION_SIZE - usedUserMemory) / pageSize; - const u32 neededPageCount = size / pageSize; + if (!op.changePerms && !op.changeState) Helpers::panic("Invalid op passed to changeMemoryState!"); - assert(availablePageCount >= neededPageCount || isMap); + bool blockFound = false; - // If the paddr is 0, that means we need to select our own - // TODO: Fix. This method always tries to allocate blocks linearly. - // However, if the allocation is non-linear, the panic will trigger when it shouldn't. - // Non-linear allocation needs special handling - if (paddr == 0 && adjustAddrs) { - std::optional newPaddr = findPaddr(size); - if (!newPaddr.has_value()) { - Helpers::panic("Failed to find paddr"); + for (auto it = memoryInfo.begin(); it != memoryInfo.end(); it++) { + // Find the block that the memory region is located in + u32 blockStart = it->baseAddr; + u32 blockEnd = it->end(); + + u32 reqStart = vaddr; + u32 reqEnd = vaddr + (pages << 12); + + if (!(reqStart >= blockStart && reqEnd <= blockEnd)) continue; + + // Now that the block has been found, fill it with the necessary info + auto oldState = it->state; + u32 oldPerms = it->perms; + it->baseAddr = reqStart; + it->pages = pages; + if (op.changePerms) it->perms = (op.r ? PERMISSION_R : 0) | (op.w ? PERMISSION_W : 0) | (op.x ? PERMISSION_X : 0); + if (op.changeState) it->state = op.newState; + + // If the requested memory region is smaller than the block found, the block must be split + if (blockStart < reqStart) { + MemoryInfo startBlock(blockStart, (reqStart - blockStart) >> 12, oldPerms, oldState); + memoryInfo.insert(it, startBlock); } - paddr = newPaddr.value(); - assert(paddr + size <= FCRAM_APPLICATION_SIZE || isMap); - } - - // If the vaddr is 0 that means we need to select our own - // Depending on whether our mapping should be linear or not we allocate from one of the 2 typical heap spaces - // We don't plan on implementing freeing any time soon, so we can pick added userUserMemory to the vaddr base to - // Get the full vaddr. - // TODO: Fix this - if (vaddr == 0 && adjustAddrs) { - // Linear memory needs to be allocated in a way where you can easily get the paddr by subtracting the linear heap base - // In order to be able to easily send data to hardware like the GPU - if (linear) { - vaddr = getLinearHeapVaddr() + paddr; - } else { - vaddr = usedUserMemory + VirtualAddrs::NormalHeapStart; - } - } - - if (!isMap) { - usedUserMemory += size; - } - - // Do linear mapping - u32 virtualPage = vaddr >> pageShift; - u32 physPage = paddr >> pageShift; // TODO: Special handle when non-linear mapping is necessary - for (u32 i = 0; i < neededPageCount; i++) { - if (r) { - readTable[virtualPage] = uintptr_t(&fcram[physPage * pageSize]); - } - if (w) { - writeTable[virtualPage] = uintptr_t(&fcram[physPage * pageSize]); + if (reqEnd < blockEnd) { + auto itAfter = std::next(it); + MemoryInfo endBlock(reqEnd, (blockEnd - reqEnd) >> 12, oldPerms, oldState); + memoryInfo.insert(itAfter, endBlock); } - // Mark FCRAM page as allocated and go on - usedFCRAMPages[physPage] = true; - virtualPage++; - physPage++; + blockFound = true; + break; } - // Back up the info for this allocation in our memoryInfo vector - u32 perms = (r ? PERMISSION_R : 0) | (w ? PERMISSION_W : 0) | (x ? PERMISSION_X : 0); - memoryInfo.push_back(std::move(MemoryInfo(vaddr, size, perms, KernelMemoryTypes::Reserved))); + if (!blockFound) Helpers::panic("Unable to find block in changeMemoryState!"); - return vaddr; + // Merge all blocks with the same state and permissions + for (auto it = memoryInfo.begin(); it != memoryInfo.end();) { + auto next = std::next(it); + if (next == memoryInfo.end()) break; + + if (it->state != next->state || it->perms != next->perms) { + it++; + continue; + } + + next->baseAddr = it->baseAddr; + next->pages += it->pages; + it = memoryInfo.erase(it); + } } -// Find a paddr which we can use for allocating "size" bytes -std::optional Memory::findPaddr(u32 size) { - assert(isAligned(size)); - const u32 neededPages = size / pageSize; +void Memory::queryPhysicalBlocks(FcramBlockList& outList, u32 vaddr, s32 pages) { + s32 srcPages = pages; + for (auto& alloc : memoryInfo) { + u32 blockStart = alloc.baseAddr; + u32 blockEnd = alloc.end(); - // The FCRAM page we're testing to see if it's appropriate to use - u32 candidatePage = 0; - // The number of linear available pages we could find starting from this candidate page. - // If this ends up >= than neededPages then the paddr is good (ie we can use the candidate page as a base address) - u32 counter = 0; + if (!(vaddr >= blockStart && vaddr < blockEnd)) continue; - for (u32 i = 0; i < FCRAM_APPLICATION_PAGE_COUNT; i++) { - if (usedFCRAMPages[i]) { // Page is occupied already, go to new candidate - candidatePage = i + 1; - counter = 0; - } else { // The paddr we're testing has 1 more free page - counter++; - // Check if there's enough free memory to use this page - // We use == instead of >= because some software does 0-byte allocations - if (counter >= neededPages) { - return candidatePage * pageSize; + s32 blockPaddr = paddrTable[vaddr >> 12]; + s32 blockPages = alloc.pages - ((vaddr - blockStart) >> 12); + blockPages = std::min(srcPages, blockPages); + FcramBlock physicalBlock(blockPaddr, blockPages); + outList.push_back(physicalBlock); + + vaddr += blockPages << 12; + srcPages -= blockPages; + if (srcPages == 0) break; + } + + if (srcPages != 0) Helpers::panic("Unable to find virtual pages to map!"); +} + +void Memory::mapPhysicalMemory(u32 vaddr, u32 paddr, s32 pages, bool r, bool w, bool x) { + assert(!(vaddr & 0xFFF)); + assert(!(paddr & 0xFFF)); + + // TODO: make this a separate function + u8* hostPtr = nullptr; + if (paddr < FCRAM_SIZE) { + hostPtr = fcram + paddr; // FIXME: FCRAM doesn't actually start from physical address 0, but from 0x20000000 + + if (useFastmem) { + addFastmemView(vaddr, FASTMEM_FCRAM_OFFSET + paddr, usize(pages) * pageSize, w); + } + } else if (paddr >= VirtualAddrs::DSPMemStart && paddr < VirtualAddrs::DSPMemStart + DSP_RAM_SIZE) { + hostPtr = dspRam + (paddr - VirtualAddrs::DSPMemStart); + } + + for (int i = 0; i < pages; i++) { + u32 index = (vaddr >> 12) + i; + paddrTable[index] = paddr + (i << 12); + if (r) + readTable[index] = (uintptr_t)(hostPtr + (i << 12)); + else + readTable[index] = 0; + + if (w) + writeTable[index] = (uintptr_t)(hostPtr + (i << 12)); + else + writeTable[index] = 0; + } +} + +void Memory::unmapPhysicalMemory(u32 vaddr, u32 paddr, s32 pages) { + for (int i = 0; i < pages; i++) { + u32 index = (vaddr >> 12) + i; + paddrTable[index] = 0; + readTable[index] = 0; + writeTable[index] = 0; + } + + if (useFastmem) { + arena->Unmap(vaddr, pages * pageSize, false); + } +} + +bool Memory::allocMemory(u32 vaddr, s32 pages, FcramRegion region, bool r, bool w, bool x, MemoryState state) { + auto res = testMemoryState(vaddr, pages, MemoryState::Free); + if (res.isFailure()) return false; + + FcramBlockList memList; + fcramManager.alloc(memList, pages, region, false); + + for (auto it = memList.begin(); it != memList.end(); it++) { + Operation op{.newState = state, .r = r, .w = w, .x = x, .changeState = true, .changePerms = true}; + changeMemoryState(vaddr, it->pages, op); + mapPhysicalMemory(vaddr, it->paddr, it->pages, r, w, x); + vaddr += it->pages << 12; + } + + return true; +} + +bool Memory::allocMemoryLinear(u32& outVaddr, u32 inVaddr, s32 pages, FcramRegion region, bool r, bool w, bool x) { + if (inVaddr) Helpers::panic("inVaddr specified for linear allocation!"); + + FcramBlockList memList; + fcramManager.alloc(memList, pages, region, true); + + u32 paddr = memList.begin()->paddr; + u32 vaddr = getLinearHeapVaddr() + paddr; + auto res = testMemoryState(vaddr, pages, MemoryState::Free); + if (res.isFailure()) Helpers::panic("Unable to map linear allocation (vaddr:%08X pages:%08X)", vaddr, pages); + + Operation op{.newState = MemoryState::Continuous, .r = r, .w = w, .x = x, .changeState = true, .changePerms = true}; + changeMemoryState(vaddr, pages, op); + mapPhysicalMemory(vaddr, paddr, pages, r, w, x); + + outVaddr = vaddr; + return true; +} + +bool Memory::mapVirtualMemory( + u32 dstVaddr, u32 srcVaddr, s32 pages, bool r, bool w, bool x, MemoryState oldDstState, MemoryState oldSrcState, MemoryState newDstState, + MemoryState newSrcState, bool unmapPages +) { + // Check that the regions have the specified state + // TODO: check src perms + auto res = testMemoryState(srcVaddr, pages, oldSrcState); + if (res.isFailure()) return false; + + res = testMemoryState(dstVaddr, pages, oldDstState); + if (res.isFailure()) return false; + + // Change the virtual memory state for both regions + Operation srcOp{.newState = newSrcState, .changeState = true}; + changeMemoryState(srcVaddr, pages, srcOp); + + Operation dstOp{.newState = newDstState, .r = r, .w = w, .x = x, .changeState = true, .changePerms = true}; + changeMemoryState(dstVaddr, pages, dstOp); + + // Get a list of physical blocks in the source region + FcramBlockList physicalList; + queryPhysicalBlocks(physicalList, srcVaddr, pages); + + // Map or unmap each physical block + for (auto& block : physicalList) { + if (newDstState == MemoryState::Free) { + // TODO: Games with CROs will unmap the CRO yet still continue accessing the address it was mapped to? + if (unmapPages) { + unmapPhysicalMemory(dstVaddr, block.paddr, block.pages); } + } else { + mapPhysicalMemory(dstVaddr, block.paddr, block.pages, r, w, x); } + + dstVaddr += block.pages << 12; } - // Couldn't find any page :( - return std::nullopt; + return true; } -u32 Memory::allocateSysMemory(u32 size) { - // Should never be triggered, only here as a sanity check - if (!isAligned(size)) { - Helpers::panic("Memory::allocateSysMemory: Size is not page aligned (val = %08X)", size); +void Memory::changePermissions(u32 vaddr, s32 pages, bool r, bool w, bool x) { + Operation op{.r = r, .w = w, .x = x, .changePerms = true}; + changeMemoryState(vaddr, pages, op); + + // Now that permissions have been changed, update the corresponding host tables + FcramBlockList physicalList; + queryPhysicalBlocks(physicalList, vaddr, pages); + + for (auto& block : physicalList) { + mapPhysicalMemory(vaddr, block.paddr, block.pages, r, w, x); + vaddr += block.pages; } - - // We use a pretty dumb allocator for OS memory since this is not really accessible to the app and is only used internally - // It works by just allocating memory linearly, starting from index 0 of OS memory and going up - // This should also be unreachable in practice and exists as a sanity check - if (size > remainingSysFCRAM()) { - Helpers::panic("Memory::allocateSysMemory: Overflowed OS FCRAM"); - } - - const u32 pageCount = size / pageSize; // Number of pages that will be used up - const u32 startIndex = sysFCRAMIndex() + usedSystemMemory; // Starting FCRAM index - const u32 startingPage = startIndex / pageSize; - - for (u32 i = 0; i < pageCount; i++) { - if (usedFCRAMPages[startingPage + i]) // Also a theoretically unreachable panic for safety - Helpers::panic("Memory::reserveMemory: Trying to reserve already reserved memory"); - usedFCRAMPages[startingPage + i] = true; - } - - usedSystemMemory += size; - return startIndex; } -// The way I understand how the kernel's QueryMemory is supposed to work is that you give it a vaddr -// And the kernel looks up the memory allocations it's performed, finds which one it belongs in and returns its info? -// TODO: Verify this -MemoryInfo Memory::queryMemory(u32 vaddr) { +Result::HorizonResult Memory::queryMemory(MemoryInfo& out, u32 vaddr) { // Check each allocation for (auto& alloc : memoryInfo) { // Check if the memory address belongs in this allocation and return the info if so if (vaddr >= alloc.baseAddr && vaddr < alloc.end()) { - return alloc; + out = alloc; + return Result::Success; } } - // Otherwise, if this vaddr was never allocated - // TODO: I think this is meant to return how much memory starting here is free as the size? - return MemoryInfo(vaddr, pageSize, 0, KernelMemoryTypes::Free); + // Official kernel just returns an error here + Helpers::warn("Failed to find block in QueryMemory!"); + return Result::FailurePlaceholder; +} + +Result::HorizonResult Memory::testMemoryState(u32 vaddr, s32 pages, MemoryState desiredState) { + for (auto& alloc : memoryInfo) { + // Don't bother checking if we're to the left of the requested region + if (vaddr >= alloc.end()) continue; + if (alloc.state != desiredState) return Result::FailurePlaceholder; // TODO: error for state mismatch + + // If the end of this block comes after the end of the requested range with no errors, it's a success + if (alloc.end() >= vaddr + (pages << 12)) return Result::Success; + } + + // TODO: error for when address is outside of userland + return Result::FailurePlaceholder; +} + +void Memory::copyToVaddr(u32 dstVaddr, const u8* srcHost, s32 size) { + // TODO: check for noncontiguous allocations + u8* dstHost = (u8*)readTable[dstVaddr >> 12] + (dstVaddr & 0xFFF); + memcpy(dstHost, srcHost, size); } u8* Memory::mapSharedMemory(Handle handle, u32 vaddr, u32 myPerms, u32 otherPerms) { @@ -459,13 +568,11 @@ u8* Memory::mapSharedMemory(Handle handle, u32 vaddr, u32 myPerms, u32 otherPerm bool w = myPerms & 0b010; bool x = myPerms & 0b100; - const auto result = allocateMemory(vaddr, paddr, size, true, r, w, x, false, true); - e.mapped = true; - if (!result.has_value()) { - Helpers::panic("Memory::mapSharedMemory: Failed to map shared memory block"); - return nullptr; - } + Operation op{.newState = MemoryState::Shared, .r = r, .w = x, .x = x, .changeState = true, .changePerms = true}; + changeMemoryState(vaddr, size >> 12, op); + mapPhysicalMemory(vaddr, paddr, size >> 12, r, w, x); + e.mapped = true; return &fcram[paddr]; } } @@ -475,24 +582,6 @@ u8* Memory::mapSharedMemory(Handle handle, u32 vaddr, u32 myPerms, u32 otherPerm return nullptr; } -void Memory::mirrorMapping(u32 destAddress, u32 sourceAddress, u32 size) { - // Should theoretically be unreachable, only here for safety purposes - assert(isAligned(destAddress) && isAligned(sourceAddress) && isAligned(size)); - - const u32 pageCount = size / pageSize; // How many pages we need to mirror - for (u32 i = 0; i < pageCount; i++) { - // Redo the shift here to "properly" handle wrapping around the address space instead of reading OoB - const u32 sourcePage = sourceAddress / pageSize; - const u32 destPage = destAddress / pageSize; - - readTable[destPage] = readTable[sourcePage]; - writeTable[destPage] = writeTable[sourcePage]; - - sourceAddress += pageSize; - destAddress += pageSize; - } -} - // Get the number of ms since Jan 1 1900 u64 Memory::timeSince3DSEpoch() { using namespace std::chrono; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 57ccca18..659a1e1d 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -2,6 +2,7 @@ #include +#include #include #include @@ -11,10 +12,10 @@ #include "PICA/pica_hash.hpp" #include "PICA/pica_simd.hpp" #include "PICA/regs.hpp" -#include "screen_layout.hpp" #include "PICA/shader_decompiler.hpp" #include "config.hpp" #include "math_util.hpp" +#include "screen_layout.hpp" CMRC_DECLARE(RendererGL); @@ -71,19 +72,13 @@ void RendererGL::initGraphicsContextInternal() { // Create stream buffers for vertex, index and uniform buffers static constexpr usize hwIndexBufferSize = 2_MB; static constexpr usize hwVertexBufferSize = 16_MB; + static constexpr usize hwShaderUniformUBOSize = 4_MB; + static constexpr usize shadergenFragmentUBOSize = 4_MB; hwIndexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, hwIndexBufferSize); hwVertexBuffer = StreamBuffer::Create(GL_ARRAY_BUFFER, hwVertexBufferSize); - - // Allocate memory for the shadergen fragment uniform UBO - glGenBuffers(1, &shadergenFragmentUBO); - gl.bindUBO(shadergenFragmentUBO); - glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); - - // Allocate memory for the accelerated vertex shader uniform UBO - glGenBuffers(1, &hwShaderUniformUBO); - gl.bindUBO(hwShaderUniformUBO); - glBufferData(GL_UNIFORM_BUFFER, PICAShader::totalUniformSize(), nullptr, GL_DYNAMIC_DRAW); + hwShaderUniformUBO = StreamBuffer::Create(GL_UNIFORM_BUFFER, hwShaderUniformUBOSize); + shadergenFragmentUBO = StreamBuffer::Create(GL_UNIFORM_BUFFER, shadergenFragmentUBOSize); vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize * 2, GL_STREAM_DRAW); vbo.bind(); @@ -185,6 +180,10 @@ void RendererGL::initGraphicsContextInternal() { driverInfo.supportsExtFbFetch = (GLAD_GL_EXT_shader_framebuffer_fetch != 0); driverInfo.supportsArmFbFetch = (GLAD_GL_ARM_shader_framebuffer_fetch != 0); + // UBOs have an alignment requirement we have to respect + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast(&driverInfo.uboAlignment)); + driverInfo.uboAlignment = std::max(driverInfo.uboAlignment, 16); + // Initialize the default vertex shader used with shadergen std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader(); defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex); @@ -192,7 +191,7 @@ void RendererGL::initGraphicsContextInternal() { // The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend) // So we just call initGraphicsContextInternal for both -void RendererGL::initGraphicsContext([[maybe_unused]] SDL_Window* window) { initGraphicsContextInternal(); } +void RendererGL::initGraphicsContext([[maybe_unused]] void* context) { initGraphicsContextInternal(); } // Set up the OpenGL blending context to match the emulated PICA void RendererGL::setupBlending() { @@ -936,10 +935,6 @@ OpenGL::Program& RendererGL::getSpecializedShader() { glUniformBlockBinding(program.handle(), vertexUBOIndex, vsUBOBlockBinding); } } - glBindBufferBase(GL_UNIFORM_BUFFER, fsUBOBlockBinding, shadergenFragmentUBO); - if (usingAcceleratedShader) { - glBindBufferBase(GL_UNIFORM_BUFFER, vsUBOBlockBinding, hwShaderUniformUBO); - } // Upload uniform data to our shader's UBO PICA::FragmentUniforms uniforms; @@ -1023,8 +1018,22 @@ OpenGL::Program& RendererGL::getSpecializedShader() { } } - gl.bindUBO(shadergenFragmentUBO); - glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); + // Upload fragment uniforms to UBO + shadergenFragmentUBO->Bind(); + auto uboRes = shadergenFragmentUBO->Map(driverInfo.uboAlignment, sizeof(PICA::FragmentUniforms)); + std::memcpy(uboRes.pointer, &uniforms, sizeof(PICA::FragmentUniforms)); + shadergenFragmentUBO->Unmap(sizeof(PICA::FragmentUniforms)); + + // Bind our UBOs + glBindBufferRange( + GL_UNIFORM_BUFFER, fsUBOBlockBinding, shadergenFragmentUBO->GetGLBufferId(), uboRes.buffer_offset, sizeof(PICA::FragmentUniforms) + ); + + if (usingAcceleratedShader) { + glBindBufferRange( + GL_UNIFORM_BUFFER, vsUBOBlockBinding, hwShaderUniformUBO->GetGLBufferId(), hwShaderUniformUBOOffset, PICAShader::totalUniformSize() + ); + } return program; } @@ -1074,11 +1083,16 @@ bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* usingAcceleratedShader = false; } else { generatedVertexShader = &(*shader); - gl.bindUBO(hwShaderUniformUBO); + hwShaderUniformUBO->Bind(); + // Upload shader uniforms to our UBO if (shaderUnit.vs.uniformsDirty) { shaderUnit.vs.uniformsDirty = false; - glBufferSubData(GL_UNIFORM_BUFFER, 0, PICAShader::totalUniformSize(), shaderUnit.vs.getUniformPointer()); + auto uboRes = hwShaderUniformUBO->Map(driverInfo.uboAlignment, PICAShader::totalUniformSize()); + std::memcpy(uboRes.pointer, shaderUnit.vs.getUniformPointer(), PICAShader::totalUniformSize()); + hwShaderUniformUBO->Unmap(PICAShader::totalUniformSize()); + + hwShaderUniformUBOOffset = uboRes.buffer_offset; } performIndexedRender = accel->indexed; diff --git a/src/core/renderer_mtl/pica_to_mtl.cpp b/src/core/renderer_mtl/pica_to_mtl.cpp index 973ad1bf..21b520f2 100644 --- a/src/core/renderer_mtl/pica_to_mtl.cpp +++ b/src/core/renderer_mtl/pica_to_mtl.cpp @@ -16,27 +16,27 @@ namespace PICA { decodeTexelAI8ToRG8, true, { - .red = MTL::TextureSwizzleRed, - .green = MTL::TextureSwizzleRed, - .blue = MTL::TextureSwizzleRed, - .alpha = MTL::TextureSwizzleGreen, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleGreen, }}, // IA8 {MTL::PixelFormatRG8Unorm, 2, decodeTexelGR8ToRG8}, // RG8 {MTL::PixelFormatR8Unorm, 1, decodeTexelI8ToR8, true, - {.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleRed, .blue = MTL::TextureSwizzleRed, .alpha = MTL::TextureSwizzleOne}}, // I8 - {MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8}, // A8 - {MTL::PixelFormatABGR4Unorm, 2, decodeTexelAI4ToABGR4}, // IA4 + {MTL::TextureSwizzleRed, MTL::TextureSwizzleRed, MTL::TextureSwizzleRed, MTL::TextureSwizzleOne}}, // I8 + {MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8}, // A8 + {MTL::PixelFormatABGR4Unorm, 2, decodeTexelAI4ToABGR4}, // IA4 {MTL::PixelFormatR8Unorm, 1, decodeTexelI4ToR8, true, - {.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleRed, .blue = MTL::TextureSwizzleRed, .alpha = MTL::TextureSwizzleOne}}, // I4 - {MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8}, // A4 - {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8}, // ETC1 - {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8}, // ETC1A4 + {MTL::TextureSwizzleRed, MTL::TextureSwizzleRed, MTL::TextureSwizzleRed, MTL::TextureSwizzleOne}}, // I4 + {MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8}, // ETC1A4 }; void checkForMTLPixelFormatSupport(MTL::Device* device) { @@ -57,10 +57,10 @@ namespace PICA { decodeTexelAI4ToRG8, true, { - .red = MTL::TextureSwizzleRed, - .green = MTL::TextureSwizzleRed, - .blue = MTL::TextureSwizzleRed, - .alpha = MTL::TextureSwizzleGreen, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleRed, + MTL::TextureSwizzleGreen, } }; } diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 71cdd616..adee8f3c 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -10,8 +10,8 @@ #include "PICA/gpu.hpp" #include "PICA/pica_hash.hpp" -#include "screen_layout.hpp" #include "SDL_metal.h" +#include "screen_layout.hpp" using namespace PICA; @@ -57,9 +57,7 @@ void RendererMTL::reset() { colorRenderTargetCache.reset(); } -void RendererMTL::setMTKLayer(void* layer) { - metalLayer = (CA::MetalLayer*)layer; -} +void RendererMTL::setMTKLayer(void* layer) { metalLayer = (CA::MetalLayer*)layer; } void RendererMTL::display() { CA::MetalDrawable* drawable = metalLayer->nextDrawable(); @@ -151,13 +149,13 @@ void RendererMTL::display() { drawable->release(); } -void RendererMTL::initGraphicsContext(SDL_Window* window) { - // On iOS, the SwiftUI side handles the MetalLayer -#ifdef PANDA3DS_IOS +void RendererMTL::initGraphicsContext(void* window) { + // On Qt and iOS, the frontend handles the Metal layer +#if defined(PANDA3DS_FRONTEND_QT) || defined(PANDA3DS_IOS) device = MTL::CreateSystemDefaultDevice(); #else // TODO: what should be the type of the view? - void* view = SDL_Metal_CreateView(window); + void* view = SDL_Metal_CreateView((SDL_Window*)window); metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view); device = MTL::CreateSystemDefaultDevice(); metalLayer->setDevice(device); diff --git a/src/core/renderer_null/renderer_null.cpp b/src/core/renderer_null/renderer_null.cpp index 4be9d089..4db2696f 100644 --- a/src/core/renderer_null/renderer_null.cpp +++ b/src/core/renderer_null/renderer_null.cpp @@ -6,7 +6,7 @@ RendererNull::~RendererNull() {} void RendererNull::reset() {} void RendererNull::display() {} -void RendererNull::initGraphicsContext(SDL_Window* window) {} +void RendererNull::initGraphicsContext(void* context) {} void RendererNull::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} void RendererNull::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {} void RendererNull::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} diff --git a/src/core/renderer_sw/renderer_sw.cpp b/src/core/renderer_sw/renderer_sw.cpp index 86b6032f..a117e373 100644 --- a/src/core/renderer_sw/renderer_sw.cpp +++ b/src/core/renderer_sw/renderer_sw.cpp @@ -7,7 +7,7 @@ RendererSw::~RendererSw() {} void RendererSw::reset() { printf("RendererSW: Unimplemented reset call\n"); } void RendererSw::display() { printf("RendererSW: Unimplemented display call\n"); } -void RendererSw::initGraphicsContext(SDL_Window* window) { printf("RendererSW: Unimplemented initGraphicsContext call\n"); } +void RendererSw::initGraphicsContext(void* context) { printf("RendererSW: Unimplemented initGraphicsContext call\n"); } void RendererSw::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { printf("RendererSW: Unimplemented clearBuffer call\n"); } void RendererSw::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 57533bde..0bf9fae5 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -173,7 +173,8 @@ std::tuple createGraphicsPipeline( vk::PipelineDynamicStateCreateInfo dynamicState = {}; static vk::DynamicState dynamicStates[] = {// The viewport and scissor of the framebuffer will be dynamic at // run-time - vk::DynamicState::eViewport, vk::DynamicState::eScissor}; + vk::DynamicState::eViewport, vk::DynamicState::eScissor + }; dynamicState.dynamicStateCount = std::size(dynamicStates); dynamicState.pDynamicStates = dynamicStates; @@ -469,7 +470,8 @@ vk::RenderPass RendererVK::getRenderPass(vk::Format colorFormat, std::optional("vkGetInstanceProcAddr")); @@ -978,8 +980,8 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { } // Create surface - if (window) { - if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(window, instance.get(), &newSurface)) { + if (targetWindow) { + if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(targetWindow, instance.get(), &newSurface)) { swapchainSurface = newSurface; } else { Helpers::warn("Error creating Vulkan surface"); @@ -1127,7 +1129,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { vk::Extent2D swapchainExtent; { int windowWidth, windowHeight; - SDL_Vulkan_GetDrawableSize(window, &windowWidth, &windowHeight); + SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight); swapchainExtent.width = windowWidth; swapchainExtent.height = windowHeight; } @@ -1275,7 +1277,8 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { static vk::DescriptorSetLayoutBinding displayShaderLayout[] = { {// Just a singular texture slot - 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment + }, }; if (auto createResult = Vulkan::DescriptorUpdateBatch::create(device.get()); createResult.has_value()) { @@ -1407,7 +1410,8 @@ void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co static vk::ImageSubresourceRange depthStencilRanges[2] = { vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1), - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1)}; + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1) + }; // Clear RenderTarget getCurrentCommandBuffer().clearDepthStencilImage( diff --git a/src/core/screen_layout.cpp b/src/core/screen_layout.cpp index 93407986..90dc2dd3 100644 --- a/src/core/screen_layout.cpp +++ b/src/core/screen_layout.cpp @@ -147,4 +147,3 @@ const char* ScreenLayout::layoutToString(Layout layout) { default: return "invalid"; } } - \ No newline at end of file diff --git a/src/core/services/dsp.cpp b/src/core/services/dsp.cpp index 3f976f6a..be750dc6 100644 --- a/src/core/services/dsp.cpp +++ b/src/core/services/dsp.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include diff --git a/src/core/services/frd.cpp b/src/core/services/frd.cpp index 63e951a6..d8bcd56f 100644 --- a/src/core/services/frd.cpp +++ b/src/core/services/frd.cpp @@ -27,6 +27,7 @@ namespace FRDCommands { GetFriendAttributeFlags = 0x00170042, UpdateGameModeDescription = 0x001D0002, + SaveLocalAccountData = 0x04050000, UpdateMii = 0x040C0800, }; } @@ -61,6 +62,7 @@ void FRDService::handleSyncRequest(u32 messagePointer, FRDService::Type type) { if (type == Type::A) { switch (command) { case FRDCommands::UpdateMii: updateMii(messagePointer); break; + case FRDCommands::SaveLocalAccountData: saveLocalAccountData(messagePointer); break; default: Helpers::panic("FRD:A service requested. Command: %08X\n", command); break; } } else { @@ -265,6 +267,13 @@ void FRDService::logout(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); } +void FRDService::saveLocalAccountData(u32 messagePointer) { + log("FRD::SaveLocalAccountData (stubbed)\n"); + + mem.write32(messagePointer, IPC::responseHeader(0x405, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); +} + void FRDService::updateMii(u32 messagePointer) { log("FRD::UpdateMii (stubbed)\n"); diff --git a/src/core/services/fs.cpp b/src/core/services/fs.cpp index 18dd0b21..5d6cb44f 100644 --- a/src/core/services/fs.cpp +++ b/src/core/services/fs.cpp @@ -194,7 +194,11 @@ void FSService::handleSyncRequest(u32 messagePointer) { case FSCommands::SetThisSaveDataSecureValue: setThisSaveDataSecureValue(messagePointer); break; case FSCommands::AbnegateAccessRight: abnegateAccessRight(messagePointer); break; case FSCommands::TheGameboyVCFunction: theGameboyVCFunction(messagePointer); break; - default: Helpers::panic("FS service requested. Command: %08X\n", command); + + default: + Helpers::warn("Unimplemented FS service requested. Command: %08X\n", command); + mem.write32(messagePointer + 4, Result::Success); + break; } } diff --git a/src/core/services/hid.cpp b/src/core/services/hid.cpp index 695e0329..c149a2a3 100644 --- a/src/core/services/hid.cpp +++ b/src/core/services/hid.cpp @@ -118,7 +118,6 @@ void HIDService::getGyroscopeCoefficient(u32 messagePointer) { // The volume here is in the range [0, 0x3F] // It is read directly from I2C Device 3 register 0x09 -// Since we currently do not have audio, set the volume a bit below max (0x30) void HIDService::getSoundVolume(u32 messagePointer) { log("HID::GetSoundVolume\n"); constexpr u8 volume = 0x30; @@ -237,7 +236,7 @@ void HIDService::updateInputs(u64 currentTick) { // For some reason, the original developers decided to signal the HID events each time the OS rescanned inputs // Rather than once every time the state of a key, or the accelerometer state, etc is updated - // This means that the OS will signal the events even if literally nothing happened + // This means that the OS will signal the events even if nothing happened // Some games such as Majora's Mask rely on this behaviour. if (eventsInitialized) { for (auto& e : events) { diff --git a/src/core/services/ldr_ro.cpp b/src/core/services/ldr_ro.cpp index e1c5bde6..561bdf3e 100644 --- a/src/core/services/ldr_ro.cpp +++ b/src/core/services/ldr_ro.cpp @@ -23,7 +23,8 @@ namespace CROHeader { NameOffset = 0x084, NextCRO = 0x088, PrevCRO = 0x08C, - FixedSize = 0x98, + FileSize = 0x090, + FixedSize = 0x098, OnUnresolved = 0x0AC, CodeOffset = 0x0B0, DataOffset = 0x0B8, @@ -146,8 +147,10 @@ static const std::string CRO_MAGIC("CRO0"); static const std::string CRO_MAGIC_FIXED("FIXD"); static const std::string CRR_MAGIC("CRR0"); +using namespace KernelMemoryTypes; + class CRO { - Memory &mem; + Memory& mem; u32 croPointer; // Origin address of CRO in RAM u32 oldDataSegmentOffset; @@ -155,7 +158,7 @@ class CRO { bool isCRO; // False if CRS public: - CRO(Memory &mem, u32 croPointer, bool isCRO) : mem(mem), croPointer(croPointer), oldDataSegmentOffset(0), isCRO(isCRO) {} + CRO(Memory& mem, u32 croPointer, bool isCRO) : mem(mem), croPointer(croPointer), oldDataSegmentOffset(0), isCRO(isCRO) {} ~CRO() = default; std::string getModuleName() { @@ -164,25 +167,15 @@ class CRO { return mem.readString(moduleName.offset, moduleName.size); } - u32 getNextCRO() { - return mem.read32(croPointer + CROHeader::NextCRO); - } - - u32 getPrevCRO() { - return mem.read32(croPointer + CROHeader::PrevCRO); - } + u32 getNextCRO() { return mem.read32(croPointer + CROHeader::NextCRO); } - u32 getFixedSize() { - return mem.read32(croPointer + CROHeader::FixedSize); - } + u32 getPrevCRO() { return mem.read32(croPointer + CROHeader::PrevCRO); } - void setNextCRO(u32 nextCRO) { - mem.write32(croPointer + CROHeader::NextCRO, nextCRO); - } + u32 getFixedSize() { return mem.read32(croPointer + CROHeader::FixedSize); } - void setPrevCRO(u32 prevCRO) { - mem.write32(croPointer + CROHeader::PrevCRO, prevCRO); - } + void setNextCRO(u32 nextCRO) { mem.write32(croPointer + CROHeader::NextCRO, nextCRO); } + void setPrevCRO(u32 prevCRO) { mem.write32(croPointer + CROHeader::PrevCRO, prevCRO); } + u32 getSize() { return mem.read32(croPointer + CROHeader::FileSize); } void write32(u32 addr, u32 value) { // Note: some games export symbols to the static module, which doesn't contain any segments. @@ -228,21 +221,17 @@ class CRO { return entryOffset + offset; } - u32 getOnUnresolvedAddr() { - return getSegmentAddr(mem.read32(croPointer + CROHeader::OnUnresolved)); - } + u32 getOnUnresolvedAddr() { return getSegmentAddr(mem.read32(croPointer + CROHeader::OnUnresolved)); } u32 getNamedExportSymbolAddr(const std::string& symbolName) { // Note: The CRO contains a trie for fast symbol lookup. For simplicity, // we won't use it and instead look up the symbol in the named export symbol table const u32 exportStringSize = mem.read32(croPointer + CROHeader::ExportStringSize); - const CROHeaderEntry namedExportTable = getHeaderEntry(CROHeader::NamedExportTableOffset); for (u32 namedExport = 0; namedExport < namedExportTable.size; namedExport++) { const u32 nameOffset = mem.read32(namedExportTable.offset + 8 * namedExport + NamedExportTable::NameOffset); - const std::string exportSymbolName = mem.readString(nameOffset, exportStringSize); if (symbolName.compare(exportSymbolName) == 0) { @@ -437,7 +426,7 @@ class CRO { return true; } - bool rebaseSegmentTable(u32 dataVaddr, u32 bssVaddr, u32 *oldDataVaddr) { + bool rebaseSegmentTable(u32 dataVaddr, u32 bssVaddr, u32* oldDataVaddr) { const CROHeaderEntry segmentTable = getHeaderEntry(CROHeader::SegmentTableOffset); for (u32 segment = 0; segment < segmentTable.size; segment++) { @@ -446,13 +435,16 @@ class CRO { const u32 segmentID = mem.read32(segmentTable.offset + 12 * segment + SegmentTable::ID); switch (segmentID) { case SegmentTable::SegmentID::DATA: - *oldDataVaddr = segmentOffset + croPointer; oldDataSegmentOffset = segmentOffset; segmentOffset = dataVaddr; break; + *oldDataVaddr = segmentOffset + croPointer; + oldDataSegmentOffset = segmentOffset; + segmentOffset = dataVaddr; + break; case SegmentTable::SegmentID::BSS: segmentOffset = bssVaddr; break; case SegmentTable::SegmentID::TEXT: case SegmentTable::SegmentID::RODATA: - if (segmentOffset != 0) segmentOffset += croPointer; break; - default: - Helpers::panic("Unknown segment ID = %u", segmentID); + if (segmentOffset != 0) segmentOffset += croPointer; + break; + default: Helpers::panic("Unknown segment ID = %u", segmentID); } mem.write32(segmentTable.offset + 12 * segment + SegmentTable::Offset, segmentOffset); @@ -473,9 +465,9 @@ class CRO { case SegmentTable::SegmentID::BSS: segmentOffset = 0; break; case SegmentTable::SegmentID::TEXT: case SegmentTable::SegmentID::RODATA: - if (segmentOffset != 0) segmentOffset -= croPointer; break; - default: - Helpers::panic("Unknown segment ID = %u", segmentID); + if (segmentOffset != 0) segmentOffset -= croPointer; + break; + default: Helpers::panic("Unknown segment ID = %u", segmentID); } mem.write32(segmentTable.offset + 12 * segment + SegmentTable::Offset, segmentOffset); @@ -639,7 +631,9 @@ class CRO { u32 relocationOffset = mem.read32(anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset); if (relocationOffset != 0) { - mem.write32(anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset, relocationOffset + croPointer); + mem.write32( + anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset, relocationOffset + croPointer + ); } } @@ -653,7 +647,9 @@ class CRO { u32 relocationOffset = mem.read32(anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset); if (relocationOffset != 0) { - mem.write32(anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset, relocationOffset - croPointer); + mem.write32( + anonymousImportTable.offset + 8 * anonymousImport + AnonymousImportTable::RelocationOffset, relocationOffset - croPointer + ); } } @@ -673,7 +669,6 @@ class CRO { const u32 addend = mem.read32(relocationPatchTable.offset + 12 * relocationPatch + RelocationPatch::Addend); const u32 segmentAddr = getSegmentAddr(segmentOffset); - const u32 entryID = mem.read32(segmentTable.offset + 12 * (segmentOffset & 0xF) + SegmentTable::ID); u32 relocationTarget = segmentAddr; @@ -1198,9 +1193,7 @@ class CRO { } }; -void LDRService::reset() { - loadedCRS = 0; -} +void LDRService::reset() { loadedCRS = 0; } void LDRService::handleSyncRequest(u32 messagePointer) { const u32 command = mem.read32(messagePointer); @@ -1245,7 +1238,14 @@ void LDRService::initialize(u32 messagePointer) { } // Map CRO to output address - mem.mirrorMapping(mapVaddr, crsPointer, size); + // TODO: how to handle permissions? + bool succeeded = mem.mapVirtualMemory( + mapVaddr, crsPointer, size >> 12, true, true, true, MemoryState::Free, MemoryState::Private, MemoryState::Locked, MemoryState::AliasCode + ); + + if (!succeeded) { + Helpers::panic("Failed to map CRS"); + } CRO crs(mem, mapVaddr, false); @@ -1312,7 +1312,9 @@ void LDRService::loadCRO(u32 messagePointer, bool isNew) { const u32 fixLevel = mem.read32(messagePointer + 40); const Handle process = mem.read32(messagePointer + 52); - log("LDR_RO::LoadCRO (isNew = %d, buffer = %08X, vaddr = %08X, size = %08X, .data vaddr = %08X, .data size = %08X, .bss vaddr = %08X, .bss size = %08X, auto link = %d, fix level = %X, process = %X)\n", isNew, croPointer, mapVaddr, size, dataVaddr, dataSize, bssVaddr, bssSize, autoLink, fixLevel, process); + log("LDR_RO::LoadCRO (isNew = %d, buffer = %08X, vaddr = %08X, size = %08X, .data vaddr = %08X, .data size = %08X, .bss vaddr = %08X, .bss size " + "= %08X, auto link = %d, fix level = %X, process = %X)\n", + isNew, croPointer, mapVaddr, size, dataVaddr, dataSize, bssVaddr, bssSize, autoLink, fixLevel, process); // Sanity checks if (size < CRO_HEADER_SIZE) { @@ -1332,7 +1334,14 @@ void LDRService::loadCRO(u32 messagePointer, bool isNew) { } // Map CRO to output address - mem.mirrorMapping(mapVaddr, croPointer, size); + // TODO: how to handle permissions? + bool succeeded = mem.mapVirtualMemory( + mapVaddr, croPointer, size >> 12, true, true, true, MemoryState::Free, MemoryState::Private, MemoryState::Locked, MemoryState::AliasCode + ); + + if (!succeeded) { + Helpers::panic("Failed to map CRO"); + } CRO cro(mem, mapVaddr, true); @@ -1392,7 +1401,18 @@ void LDRService::unloadCRO(u32 messagePointer) { Helpers::panic("Failed to unrebase CRO"); } + u32 size = cro.getSize(); + bool succeeded = mem.mapVirtualMemory( + mapVaddr, croPointer, size >> 12, false, false, false, MemoryState::Locked, MemoryState::AliasCode, MemoryState::Free, MemoryState::Private, + false + ); + + if (!succeeded) { + Helpers::panic("Failed to unmap CRO"); + } + kernel.clearInstructionCacheRange(mapVaddr, cro.getFixedSize()); + mem.write32(messagePointer, IPC::responseHeader(0x5, 1, 0)); mem.write32(messagePointer + 4, Result::Success); } \ No newline at end of file diff --git a/src/core/services/soc.cpp b/src/core/services/soc.cpp index 4ad546b0..4e74c5c3 100644 --- a/src/core/services/soc.cpp +++ b/src/core/services/soc.cpp @@ -15,7 +15,11 @@ void SOCService::handleSyncRequest(u32 messagePointer) { const u32 command = mem.read32(messagePointer); switch (command) { case SOCCommands::InitializeSockets: initializeSockets(messagePointer); break; - default: Helpers::panic("SOC service requested. Command: %08X\n", command); + + default: + Helpers::warn("SOC service requested. Command: %08X\n", command); + mem.write32(messagePointer + 4, Result::Success); + break; } } @@ -30,4 +34,4 @@ void SOCService::initializeSockets(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x01, 1, 0)); mem.write32(messagePointer + 4, Result::Success); -} \ No newline at end of file +} diff --git a/src/dynamic_library.cpp b/src/dynamic_library.cpp new file mode 100644 index 00000000..833baef7 --- /dev/null +++ b/src/dynamic_library.cpp @@ -0,0 +1,96 @@ +// SPDX-FileCopyrightText: 2019 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "dynamic_library.hpp" + +#include + +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace Common { + DynamicLibrary::DynamicLibrary() = default; + DynamicLibrary::DynamicLibrary(const char* filename) { void(open(filename)); } + DynamicLibrary::DynamicLibrary(void* handle_) : handle{handle_} {} + DynamicLibrary::DynamicLibrary(DynamicLibrary&& rhs) noexcept : handle{std::exchange(rhs.handle, nullptr)} {} + + DynamicLibrary& DynamicLibrary::operator=(DynamicLibrary&& rhs) noexcept { + close(); + handle = std::exchange(rhs.handle, nullptr); + return *this; + } + + DynamicLibrary::~DynamicLibrary() { close(); } + + std::string DynamicLibrary::getUnprefixedFilename(const char* filename) { +#if defined(_WIN32) + return std::string(filename) + ".dll"; +#elif defined(__APPLE__) + return std::string(filename) + ".dylib"; +#else + return std::string(filename) + ".so"; +#endif + } + + std::string DynamicLibrary::getVersionedFilename(const char* libname, int major, int minor) { +#if defined(_WIN32) + if (major >= 0 && minor >= 0) + return fmt::format("{}-{}-{}.dll", libname, major, minor); + else if (major >= 0) + return fmt::format("{}-{}.dll", libname, major); + else + return fmt::format("{}.dll", libname); +#elif defined(__APPLE__) + const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : ""; + if (major >= 0 && minor >= 0) + return fmt::format("{}{}.{}.{}.dylib", prefix, libname, major, minor); + else if (major >= 0) + return fmt::format("{}{}.{}.dylib", prefix, libname, major); + else + return fmt::format("{}{}.dylib", prefix, libname); +#else + const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : ""; + if (major >= 0 && minor >= 0) + return fmt::format("{}{}.so.{}.{}", prefix, libname, major, minor); + else if (major >= 0) + return fmt::format("{}{}.so.{}", prefix, libname, major); + else + return fmt::format("{}{}.so", prefix, libname); +#endif + } + + bool DynamicLibrary::open(const char* filename) { +#ifdef _WIN32 + handle = reinterpret_cast(LoadLibraryA(filename)); +#else + handle = dlopen(filename, RTLD_NOW); +#endif + return handle != nullptr; + } + + void DynamicLibrary::close() { + if (!isOpen()) return; + +#ifdef _WIN32 + FreeLibrary(reinterpret_cast(handle)); +#else + dlclose(handle); +#endif + handle = nullptr; + } + + void* DynamicLibrary::getSymbolAddress(const char* name) const { +#ifdef _WIN32 + return reinterpret_cast(GetProcAddress(reinterpret_cast(handle), name)); +#else + return reinterpret_cast(dlsym(handle, name)); +#endif + } + +} // namespace Common \ No newline at end of file diff --git a/src/emulator.cpp b/src/emulator.cpp index 0f97208a..e5ba4e27 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -20,7 +20,7 @@ __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 1; Emulator::Emulator() : config(getConfigPath()), kernel(cpu, memory, gpu, config, lua), cpu(memory, kernel, *this), gpu(memory, config), - memory(cpu.getTicksRef(), config), cheats(memory, kernel.getServiceManager().getHID()), audioDevice(config.audioDeviceConfig), lua(*this), + memory(kernel.fcramManager, config), cheats(memory, kernel.getServiceManager().getHID()), audioDevice(config.audioDeviceConfig), lua(*this), running(false) #ifdef PANDA3DS_ENABLE_HTTP_SERVER , @@ -159,20 +159,21 @@ void Emulator::pollScheduler() { scheduler.updateNextTimestamp(); switch (eventType) { - case Scheduler::EventType::VBlank: [[likely]] { - // Signal that we've reached the end of a frame - frameDone = true; - lua.signalEvent(LuaEvent::Frame); + case Scheduler::EventType::VBlank: + [[likely]] { + // Signal that we've reached the end of a frame + frameDone = true; + lua.signalEvent(LuaEvent::Frame); - // Send VBlank interrupts - ServiceManager& srv = kernel.getServiceManager(); - srv.sendGPUInterrupt(GPUInterrupt::VBlank0); - srv.sendGPUInterrupt(GPUInterrupt::VBlank1); + // Send VBlank interrupts + ServiceManager& srv = kernel.getServiceManager(); + srv.sendGPUInterrupt(GPUInterrupt::VBlank0); + srv.sendGPUInterrupt(GPUInterrupt::VBlank1); - // Queue next VBlank event - scheduler.addEvent(Scheduler::EventType::VBlank, time + CPU::ticksPerSec / 60); - break; - } + // Queue next VBlank event + scheduler.addEvent(Scheduler::EventType::VBlank, time + CPU::ticksPerSec / 60); + break; + } case Scheduler::EventType::ThreadWakeup: kernel.pollThreadWakeups(); break; case Scheduler::EventType::UpdateTimers: kernel.pollTimers(); break; @@ -353,8 +354,7 @@ bool Emulator::loadELF(std::ifstream& file) { std::span Emulator::getSMDH() { switch (romType) { case ROMType::NCSD: - case ROMType::CXI: - return memory.getCXI()->smdh; + case ROMType::CXI: return memory.getCXI()->smdh; default: { return std::span(); } @@ -386,7 +386,7 @@ static void dumpRomFSNode(const RomFS::RomFSNode& node, const char* romFSBase, c for (auto& directory : node.directories) { const auto newPath = path / directory->name; - + // Create the directory for the new folder std::error_code ec; std::filesystem::create_directories(newPath, ec); @@ -465,7 +465,7 @@ void Emulator::reloadSettings() { loadRenderdoc(); } - gpu.getRenderer()->setHashTextures(config.hashTextures); + gpu.getRenderer()->setHashTextures(config.hashTextures); #ifdef PANDA3DS_ENABLE_DISCORD_RPC // Reload RPC setting if we're compiling with RPC support diff --git a/src/ios_driver.mm b/src/ios_driver.mm index cb98b269..87bd057a 100644 --- a/src/ios_driver.mm +++ b/src/ios_driver.mm @@ -15,13 +15,11 @@ extern "C" { #define IOS_EXPORT extern "C" __attribute__((visibility("default"))) std::unique_ptr emulator = nullptr; -HIDService* hidService = nullptr; IOS_EXPORT void iosCreateEmulator() { printf("Creating emulator\n"); emulator = std::make_unique(); - hidService = &emulator->getServiceManager().getHID(); emulator->initGraphicsContext(nullptr); } diff --git a/src/jni_driver.cpp b/src/jni_driver.cpp index 6a156360..7274c9c4 100644 --- a/src/jni_driver.cpp +++ b/src/jni_driver.cpp @@ -10,11 +10,11 @@ #include "android_utils.hpp" #include "sdl_sensors.hpp" -std::unique_ptr emulator = nullptr; -HIDService* hidService = nullptr; -RendererGL* renderer = nullptr; -bool romLoaded = false; -JavaVM* jvm = nullptr; +static std::unique_ptr emulator = nullptr; +static HIDService* hidService = nullptr; +static RendererGL* renderer = nullptr; +static bool romLoaded = false; +static JavaVM* jvm = nullptr; jclass alberClass; jmethodID alberClassOpenDocument; diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index 4786b317..9ad3b62a 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -1,12 +1,11 @@ -#include +#include + #include #include -#include - -#include -#include -#include +#include "emulator.hpp" +#include "renderer_gl/renderer_gl.hpp" +#include "version.hpp" static retro_environment_t envCallback; static retro_video_refresh_t videoCallback; @@ -20,20 +19,14 @@ static std::filesystem::path savePath; static bool screenTouched = false; static bool usingGLES = false; -std::unique_ptr emulator; -RendererGL* renderer; +static std::unique_ptr emulator; +static RendererGL* renderer; -std::filesystem::path Emulator::getConfigPath() { - return std::filesystem::path(savePath / "config.toml"); -} +std::filesystem::path Emulator::getConfigPath() { return std::filesystem::path(savePath / "config.toml"); } +std::filesystem::path Emulator::getAppDataRoot() { return std::filesystem::path(savePath / "Emulator Files"); } -std::filesystem::path Emulator::getAppDataRoot() { - return std::filesystem::path(savePath / "Emulator Files"); -} - -static void* getGLProcAddress(const char* name) { - return (void*)hwRender.get_proc_address(name); -} +static void* getGLProcAddress(const char* name) { return (void*)hwRender.get_proc_address(name); } +static void videoDestroyContext() { emulator->deinitGraphicsContext(); } static void videoResetContext() { if (usingGLES) { @@ -53,10 +46,6 @@ static void videoResetContext() { emulator->initGraphicsContext(nullptr); } -static void videoDestroyContext() { - emulator->deinitGraphicsContext(); -} - static bool setHWRender(retro_hw_context_type type) { hwRender.context_type = type; hwRender.context_reset = videoResetContext; @@ -159,16 +148,12 @@ static int fetchVariableInt(std::string key, int def) { return 0; } -static bool fetchVariableBool(std::string key, bool def) { - return fetchVariable(key, def ? "enabled" : "disabled") == "enabled"; -} - -static int fetchVariableRange(std::string key, int min, int max) { - return std::clamp(fetchVariableInt(key, min), min, max); -} +static bool fetchVariableBool(std::string key, bool def) { return fetchVariable(key, def ? "enabled" : "disabled") == "enabled"; } +static int fetchVariableRange(std::string key, int min, int max) { return std::clamp(fetchVariableInt(key, min), min, max); } static void configInit() { static const retro_variable values[] = { + {"panda3ds_use_fastmem", EmulatorConfig::enableFastmemDefault ? "Enable fastmem; enabled|disabled" : "Enable fastmem; disabled|enabled"}, {"panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault ? "Enable shader JIT; enabled|disabled" : "Enable shader JIT; disabled|enabled"}, {"panda3ds_accelerate_shaders", EmulatorConfig::accelerateShadersDefault ? "Run 3DS shaders on the GPU; enabled|disabled" : "Run 3DS shaders on the GPU; disabled|enabled"}, @@ -204,9 +189,10 @@ static void configUpdate() { config.rendererType = RendererType::OpenGL; config.vsyncEnabled = fetchVariableBool("panda3ds_use_vsync", true); config.shaderJitEnabled = fetchVariableBool("panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault); + config.fastmemEnabled = fetchVariableBool("panda3ds_use_fastmem", EmulatorConfig::enableFastmemDefault); + config.systemLanguage = EmulatorConfig::languageCodeFromString(fetchVariable("panda3ds_system_language", "en")); config.chargerPlugged = fetchVariableBool("panda3ds_use_charger", true); config.batteryPercentage = fetchVariableRange("panda3ds_battery_level", 5, 100); - config.systemLanguage = EmulatorConfig::languageCodeFromString(fetchVariable("panda3ds_system_language", "en")); config.dspType = Audio::DSPCore::typeFromString(fetchVariable("panda3ds_dsp_emulation", "null")); config.audioEnabled = fetchVariableBool("panda3ds_use_audio", false); @@ -259,27 +245,13 @@ void retro_get_system_av_info(retro_system_av_info* info) { info->timing.sample_rate = 32768; } -void retro_set_environment(retro_environment_t cb) { - envCallback = cb; -} - -void retro_set_video_refresh(retro_video_refresh_t cb) { - videoCallback = cb; -} - -void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { - audioBatchCallback = cb; -} +void retro_set_environment(retro_environment_t cb) { envCallback = cb; } +void retro_set_video_refresh(retro_video_refresh_t cb) { videoCallback = cb; } +void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { audioBatchCallback = cb; } void retro_set_audio_sample(retro_audio_sample_t cb) {} - -void retro_set_input_poll(retro_input_poll_t cb) { - inputPollCallback = cb; -} - -void retro_set_input_state(retro_input_state_t cb) { - inputStateCallback = cb; -} +void retro_set_input_poll(retro_input_poll_t cb) { inputPollCallback = cb; } +void retro_set_input_state(retro_input_state_t cb) { inputStateCallback = cb; } void retro_init() { enum retro_pixel_format xrgb888 = RETRO_PIXEL_FORMAT_XRGB8888; @@ -297,9 +269,7 @@ void retro_init() { emulator = std::make_unique(); } -void retro_deinit() { - emulator = nullptr; -} +void retro_deinit() { emulator = nullptr; } bool retro_load_game(const retro_game_info* game) { configInit(); @@ -325,9 +295,8 @@ void retro_unload_game() { renderer = nullptr; } -void retro_reset() { - emulator->reset(Emulator::ReloadOption::Reload); -} +void retro_reset() { emulator->reset(Emulator::ReloadOption::Reload); } +void retro_cheat_reset() { emulator->getCheats().reset(); } void retro_run() { configCheckVariables(); @@ -345,13 +314,16 @@ void retro_run() { hid.setKey(HID::Keys::Y, getButtonState(RETRO_DEVICE_ID_JOYPAD_Y)); hid.setKey(HID::Keys::L, getButtonState(RETRO_DEVICE_ID_JOYPAD_L)); hid.setKey(HID::Keys::R, getButtonState(RETRO_DEVICE_ID_JOYPAD_R)); + hid.setKey(HID::Keys::ZL, getButtonState(RETRO_DEVICE_ID_JOYPAD_L2)); + hid.setKey(HID::Keys::ZR, getButtonState(RETRO_DEVICE_ID_JOYPAD_R2)); + hid.setKey(HID::Keys::Start, getButtonState(RETRO_DEVICE_ID_JOYPAD_START)); hid.setKey(HID::Keys::Select, getButtonState(RETRO_DEVICE_ID_JOYPAD_SELECT)); hid.setKey(HID::Keys::Up, getButtonState(RETRO_DEVICE_ID_JOYPAD_UP)); hid.setKey(HID::Keys::Down, getButtonState(RETRO_DEVICE_ID_JOYPAD_DOWN)); hid.setKey(HID::Keys::Left, getButtonState(RETRO_DEVICE_ID_JOYPAD_LEFT)); hid.setKey(HID::Keys::Right, getButtonState(RETRO_DEVICE_ID_JOYPAD_RIGHT)); - // TODO: N3DS buttons + // TODO: C-Stick // Get analog values for the left analog stick (Right analog stick is N3DS-only and unimplemented) float xLeft = getAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X); @@ -443,8 +415,4 @@ void retro_cheat_set(uint index, bool enabled, const char* code) { } else { emulator->getCheats().disableCheat(id); } -} - -void retro_cheat_reset() { - emulator->getCheats().reset(); -} +} \ No newline at end of file diff --git a/src/panda_qt/cheats_window.cpp b/src/panda_qt/cheats_window.cpp index 2485c677..3627406a 100644 --- a/src/panda_qt/cheats_window.cpp +++ b/src/panda_qt/cheats_window.cpp @@ -70,8 +70,6 @@ void CheatEntryWidget::editClicked() { CheatEditDialog::CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry) : QDialog(), emu(emu), cheatEntry(cheatEntry) { setWindowTitle(tr("Edit Cheat")); - - setAttribute(Qt::WA_DeleteOnClose); setModal(true); QVBoxLayout* layout = new QVBoxLayout; @@ -147,6 +145,9 @@ void CheatEditDialog::accepted() { cheatEntry.setMetadata(metadata); cheatEntry.Update(); } + + // Delete the CheatEditDialog when the main thread is done using it + QObject::deleteLater(); }); }); } @@ -157,6 +158,9 @@ void CheatEditDialog::rejected() { // Was adding a cheat but user pressed cancel cheatEntry.Remove(); } + + // We have to manually memory-manage the CheatEditDialog object since it's accessed via multiple threads + QObject::deleteLater(); } CheatsWindow::CheatsWindow(Emulator* emu, const std::filesystem::path& cheatPath, QWidget* parent) diff --git a/src/panda_qt/config_window.cpp b/src/panda_qt/config_window.cpp index 14b33156..1d0c83d4 100644 --- a/src/panda_qt/config_window.cpp +++ b/src/panda_qt/config_window.cpp @@ -172,6 +172,10 @@ ConfigWindow::ConfigWindow(ConfigCallback configCallback, MainWindowCallback win connectCheckbox(circlePadProEnabled, config.circlePadProEnabled); genLayout->addRow(circlePadProEnabled); + QCheckBox* fastmemEnabled = new QCheckBox(tr("Enable Fastmem")); + connectCheckbox(fastmemEnabled, config.fastmemEnabled); + genLayout->addRow(fastmemEnabled); + QCheckBox* discordRpcEnabled = new QCheckBox(tr("Enable Discord RPC")); connectCheckbox(discordRpcEnabled, config.discordRpcEnabled); genLayout->addRow(discordRpcEnabled); diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index b592226b..f74f2061 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -10,6 +10,7 @@ #include "cheats.hpp" #include "input_mappings.hpp" #include "panda_qt/dsp_debugger.hpp" +#include "panda_qt/screen/screen.hpp" #include "sdl_sensors.hpp" #include "services/dsp.hpp" #include "version.hpp" @@ -25,8 +26,19 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) resize(800, 240 * 4); show(); + const RendererType rendererType = emu->getConfig().rendererType; + usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null); + usingVk = (rendererType == RendererType::Vulkan); + usingMtl = (rendererType == RendererType::Metal); + + ScreenWidget::API api = ScreenWidget::API::OpenGL; + if (usingVk) + api = ScreenWidget::API::Vulkan; + else if (usingMtl) + api = ScreenWidget::API::Metal; + // We pass a callback to the screen widget that will be triggered every time we resize the screen - screen = new ScreenWidget([this](u32 width, u32 height) { handleScreenResize(width, height); }, this); + screen = ScreenWidget::getWidget(api, [this](u32 width, u32 height) { handleScreenResize(width, height); }, this); setCentralWidget(screen); appRunning = true; @@ -149,28 +161,29 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) // The emulator graphics context for the thread should be initialized in the emulator thread due to how GL contexts work emuThread = std::thread([this]() { - const RendererType rendererType = emu->getConfig().rendererType; - usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null); - usingVk = (rendererType == RendererType::Vulkan); - usingMtl = (rendererType == RendererType::Metal); + switch (screen->api) { + case ScreenWidget::API::OpenGL: { + // Make GL context current for this thread, enable VSync + GL::Context* glContext = screen->getGLContext(); + glContext->MakeCurrent(); + glContext->SetSwapInterval(emu->getConfig().vsyncEnabled ? 1 : 0); - if (usingGL) { - // Make GL context current for this thread, enable VSync - GL::Context* glContext = screen->getGLContext(); - glContext->MakeCurrent(); - glContext->SetSwapInterval(emu->getConfig().vsyncEnabled ? 1 : 0); + if (glContext->IsGLES()) { + emu->getRenderer()->setupGLES(); + } - if (glContext->IsGLES()) { - emu->getRenderer()->setupGLES(); + emu->initGraphicsContext(glContext); + break; } - emu->initGraphicsContext(glContext); - } else if (usingVk) { - Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!"); - } else if (usingMtl) { - Helpers::panic("Metal on Qt currently doesn't work, try the SDL frontend instead!"); - } else { - Helpers::panic("Unsupported graphics backend for Qt frontend!"); + case ScreenWidget::API::Metal: { + emu->initGraphicsContext(nullptr); + emu->getRenderer()->setMTKLayer(screen->getMTKLayer()); + break; + } + + case ScreenWidget::API::Vulkan: Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!"); break; + default: Helpers::panic("Unsupported graphics backend for Qt frontend!"); break; } // We have to initialize controllers on the same thread they'll be polled in @@ -213,6 +226,8 @@ void MainWindow::emuThreadMainLoop() { void MainWindow::swapEmuBuffer() { if (usingGL) { screen->getGLContext()->SwapBuffers(); + } else if (usingMtl) { + // The renderer itself calls presentDrawable to swap buffers on Metal } else { Helpers::panic("[Qt] Don't know how to swap buffers for the current rendering backend :("); } @@ -290,6 +305,7 @@ MainWindow::~MainWindow() { delete aboutWindow; delete configWindow; delete cheatsEditor; + delete screen; delete luaEditor; } diff --git a/src/panda_qt/screen/metal_context.mm b/src/panda_qt/screen/metal_context.mm new file mode 100644 index 00000000..e7a508e4 --- /dev/null +++ b/src/panda_qt/screen/metal_context.mm @@ -0,0 +1,71 @@ +#import +#import +#import +#import +#import +#import + +#import "panda_qt/screen/screen_mtl.hpp" + +id metalDevice = nil; + +bool ScreenWidgetMTL::createMetalContext() { + NSView* nativeView = (NSView*)this->winId(); + // Retain the layer so that we can manually memory manage it. + CAMetalLayer* metalLayer = [[CAMetalLayer layer] retain]; + + if (!metalLayer) { + return false; + } + + metalDevice = MTLCreateSystemDefaultDevice(); + + if (!metalDevice) { + NSLog(@"Failed to create metal device"); + return false; + } + + metalLayer.device = metalDevice; + metalLayer.framebufferOnly = NO; + metalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm; + + CGFloat scale = [nativeView window].backingScaleFactor; + CGSize pointSize = nativeView.bounds.size; + + metalLayer.contentsScale = scale; + metalLayer.drawableSize = CGSizeMake(pointSize.width * scale, pointSize.height * scale); + + [nativeView setLayer:metalLayer]; + [nativeView setWantsLayer:YES]; + + CA::MetalLayer* cppLayer = (CA::MetalLayer*)metalLayer; + mtkLayer = static_cast(cppLayer); + + return true; +} + +void ScreenWidgetMTL::resizeMetalView() { + NSView* view = (NSView*)this->windowHandle()->winId(); + CAMetalLayer* metalLayer = (CAMetalLayer*)[view layer]; + + if (metalLayer) { + metalLayer.drawableSize = CGSizeMake(surfaceWidth, surfaceHeight); + } +} + +ScreenWidgetMTL::~ScreenWidgetMTL() { + if (mtkLayer) { + CAMetalLayer* metalLayer = (__bridge CAMetalLayer*)static_cast(mtkLayer); + + NSView* view = (NSView*)this->winId(); + [view setLayer:nil]; + [view setWantsLayer:NO]; + + // Release Metal device and layer + metalLayer.device = nil; + [metalLayer release]; + [metalDevice release]; + + mtkLayer = nullptr; + } +} \ No newline at end of file diff --git a/src/panda_qt/screen.cpp b/src/panda_qt/screen/screen.cpp similarity index 66% rename from src/panda_qt/screen.cpp rename to src/panda_qt/screen/screen.cpp index 0876bb71..94835284 100644 --- a/src/panda_qt/screen.cpp +++ b/src/panda_qt/screen/screen.cpp @@ -1,10 +1,12 @@ +#ifdef PANDA3DS_ENABLE_OPENGL #include "opengl.hpp" +#endif // opengl.hpp must be included at the very top. This comment exists to make clang-format not reorder it :p + #include #include #include #include -#include #include #include @@ -12,16 +14,16 @@ #include #endif -#include "panda_qt/screen.hpp" +#include "panda_qt/screen/screen.hpp" +#include "panda_qt/screen/screen_gl.hpp" +#include "panda_qt/screen/screen_mtl.hpp" -// OpenGL screen widget, based on https://github.com/stenzek/duckstation/blob/master/src/duckstation-qt/displaywidget.cpp +// Screen widget, based on https://github.com/stenzek/duckstation/blob/master/src/duckstation-qt/displaywidget.cpp // and https://github.com/melonDS-emu/melonDS/blob/master/src/frontend/qt_sdl/main.cpp #ifdef PANDA3DS_ENABLE_OPENGL -ScreenWidget::ScreenWidget(ResizeCallback resizeCallback, QWidget* parent) : QWidget(parent), resizeCallback(resizeCallback) { +ScreenWidget::ScreenWidget(API api, ResizeCallback resizeCallback, QWidget* parent) : api(api), QWidget(parent), resizeCallback(resizeCallback) { // Create a native window for use with our graphics API of choice - resize(800, 240 * 4); - setAutoFillBackground(false); setAttribute(Qt::WA_NativeWindow, true); setAttribute(Qt::WA_NoSystemBackground, true); @@ -29,11 +31,8 @@ ScreenWidget::ScreenWidget(ResizeCallback resizeCallback, QWidget* parent) : QWi setAttribute(Qt::WA_KeyCompression, false); setFocusPolicy(Qt::StrongFocus); setMouseTracking(true); - show(); - if (!createGLContext()) { - Helpers::panic("Failed to create GL context for display"); - } + // The graphics context, as well as resizing and showing the widget, is handled by the screen backend } void ScreenWidget::resizeEvent(QResizeEvent* event) { @@ -48,18 +47,7 @@ void ScreenWidget::resizeEvent(QResizeEvent* event) { } reloadScreenCoordinates(); - - // This will call take care of calling resizeSurface from the emulator thread - resizeCallback(surfaceWidth, surfaceHeight); -} - -// Note: This will run on the emulator thread, we don't want any Qt calls happening there. -void ScreenWidget::resizeSurface(u32 width, u32 height) { - if (previousWidth != width || previousHeight != height) { - if (glContext) { - glContext->ResizeSurface(width, height); - } - } + resizeDisplay(); } void ScreenWidget::reloadScreenCoordinates() { @@ -73,30 +61,6 @@ void ScreenWidget::reloadScreenLayout(ScreenLayout::Layout newLayout, float newT reloadScreenCoordinates(); } -bool ScreenWidget::createGLContext() { - // List of GL context versions we will try. Anything 4.1+ is good for desktop OpenGL, and 3.1+ for OpenGL ES - static constexpr std::array versionsToTry = { - GL::Context::Version{GL::Context::Profile::Core, 4, 6}, GL::Context::Version{GL::Context::Profile::Core, 4, 5}, - GL::Context::Version{GL::Context::Profile::Core, 4, 4}, GL::Context::Version{GL::Context::Profile::Core, 4, 3}, - GL::Context::Version{GL::Context::Profile::Core, 4, 2}, GL::Context::Version{GL::Context::Profile::Core, 4, 1}, - GL::Context::Version{GL::Context::Profile::ES, 3, 2}, GL::Context::Version{GL::Context::Profile::ES, 3, 1}, - }; - - std::optional windowInfo = getWindowInfo(); - if (windowInfo.has_value()) { - this->windowInfo = *windowInfo; - - glContext = GL::Context::Create(*getWindowInfo(), versionsToTry); - if (glContext == nullptr) { - return false; - } - - glContext->DoneCurrent(); - } - - return glContext != nullptr; -} - qreal ScreenWidget::devicePixelRatioFromScreen() const { const QScreen* screenForRatio = windowHandle()->screen(); if (!screenForRatio) { @@ -156,3 +120,15 @@ std::optional ScreenWidget::getWindowInfo() { return wi; } #endif + +ScreenWidget* ScreenWidget::getWidget(API api, ResizeCallback resizeCallback, QWidget* parent) { + if (api == API::OpenGL) { + return new ScreenWidgetGL(api, resizeCallback, parent); + } else if (api == API::Metal) { + return new ScreenWidgetMTL(api, resizeCallback, parent); + } else if (api == API::Vulkan) { + Helpers::panic("Vulkan is not yet supported on Panda3DS-Qt. Try SDL instead"); + } else { + Helpers::panic("ScreenWidget::getWidget: Unimplemented graphics API"); + } +} \ No newline at end of file diff --git a/src/panda_qt/screen/screen_gl.cpp b/src/panda_qt/screen/screen_gl.cpp new file mode 100644 index 00000000..87cb2738 --- /dev/null +++ b/src/panda_qt/screen/screen_gl.cpp @@ -0,0 +1,64 @@ +#include "panda_qt/screen/screen_gl.hpp" + +#include + +#ifdef PANDA3DS_ENABLE_OPENGL +ScreenWidgetGL::ScreenWidgetGL(API api, ResizeCallback resizeCallback, QWidget* parent) : ScreenWidget(api, resizeCallback, parent) { + // On Wayland + OpenGL, we have to show the window before we can create a graphics context. + resize(800, 240 * 4); + show(); + + if (!createContext()) { + Helpers::panic("Failed to create GL context for display"); + } +} + +bool ScreenWidgetGL::createContext() { + // List of GL context versions we will try. Anything 4.1+ is good for desktop OpenGL, and 3.1+ for OpenGL ES + static constexpr std::array versionsToTry = { + GL::Context::Version{GL::Context::Profile::Core, 4, 6}, GL::Context::Version{GL::Context::Profile::Core, 4, 5}, + GL::Context::Version{GL::Context::Profile::Core, 4, 4}, GL::Context::Version{GL::Context::Profile::Core, 4, 3}, + GL::Context::Version{GL::Context::Profile::Core, 4, 2}, GL::Context::Version{GL::Context::Profile::Core, 4, 1}, + GL::Context::Version{GL::Context::Profile::ES, 3, 2}, GL::Context::Version{GL::Context::Profile::ES, 3, 1}, + }; + + std::optional windowInfo = getWindowInfo(); + if (windowInfo.has_value()) { + this->windowInfo = *windowInfo; + + glContext = GL::Context::Create(*getWindowInfo(), versionsToTry); + if (glContext == nullptr) { + return false; + } + + glContext->DoneCurrent(); + } + + return glContext != nullptr; +} + +void ScreenWidgetGL::resizeDisplay() { + // This will call take care of calling resizeSurface from the emulator thread, as the GL renderer must resize from the emu thread + resizeCallback(surfaceWidth, surfaceHeight); +} + +// Note: This will run on the emulator thread, we don't want any Qt calls happening there. +void ScreenWidgetGL::resizeSurface(u32 width, u32 height) { + if (previousWidth != width || previousHeight != height) { + if (glContext) { + glContext->ResizeSurface(width, height); + } + } +} + +GL::Context* ScreenWidgetGL::getGLContext() { return glContext.get(); } +#else +ScreenWidgetGL::ScreenWidgetGL(API api, ResizeCallback resizeCallback, QWidget* parent) : ScreenWidget(api, resizeCallback, parent) { + Helpers::panic("OpenGL renderer not supported. Make sure you've compiled with OpenGL support and that you're on a compatible platform"); +} + +GL::Context* ScreenWidgetGL::getGLContext() { return nullptr; } +bool ScreenWidgetGL::createContext() { return false; } +void ScreenWidgetGL::resizeDisplay() {} +void ScreenWidgetGL::resizeSurface(u32 width, u32 height) {} +#endif diff --git a/src/panda_qt/screen/screen_mtl.cpp b/src/panda_qt/screen/screen_mtl.cpp new file mode 100644 index 00000000..472b166b --- /dev/null +++ b/src/panda_qt/screen/screen_mtl.cpp @@ -0,0 +1,33 @@ +#include "panda_qt/screen/screen_mtl.hpp" + +#ifdef PANDA3DS_ENABLE_METAL +ScreenWidgetMTL::ScreenWidgetMTL(API api, ResizeCallback resizeCallback, QWidget* parent) : ScreenWidget(api, resizeCallback, parent) { + if (!createContext()) { + Helpers::panic("Failed to create Metal context for display"); + } + + resize(800, 240 * 4); + show(); +} + +void ScreenWidgetMTL::resizeDisplay() { + resizeMetalView(); + resizeCallback(surfaceWidth, surfaceHeight); +} + +bool ScreenWidgetMTL::createContext() { return createMetalContext(); } +void* ScreenWidgetMTL::getMTKLayer() { return mtkLayer; } + +#else +ScreenWidgetMTL::ScreenWidgetMTL(API api, ResizeCallback resizeCallback, QWidget* parent) : ScreenWidget(api, resizeCallback, parent) { + Helpers::panic("Metal renderer not supported. Make sure you've compiled with Metal support and that you're on a compatible platform"); +} + +ScreenWidgetMTL::~ScreenWidgetMTL() {} +bool ScreenWidgetMTL::createContext() { return false; } +bool ScreenWidgetMTL::createMetalContext() { return false; } +void* ScreenWidgetMTL::getMTKLayer() { return nullptr; } + +void ScreenWidgetMTL::resizeDisplay() {} +void ScreenWidgetMTL::resizeMetalView() {} +#endif \ No newline at end of file diff --git a/tests/DetectEmulator/source/main.c b/tests/DetectEmulator/source/main.c index a66edd69..6e1e644d 100644 --- a/tests/DetectEmulator/source/main.c +++ b/tests/DetectEmulator/source/main.c @@ -6,16 +6,16 @@ #define CLEAR_COLOR 0x68B0D8FF #define DISPLAY_TRANSFER_FLAGS \ - (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ - GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ - GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) typedef struct { float x, y, z; } vertex; static const vertex vertex_list[] = { - { 200.0f, 200.0f, 0.5f }, - { 100.0f, 40.0f, 0.5f }, - { 300.0f, 40.0f, 0.5f }, + { 200.0f, 200.0f, 0.5f }, + { 100.0f, 40.0f, 0.5f }, + { 300.0f, 40.0f, 0.5f }, }; typedef enum { @@ -69,22 +69,22 @@ static C3D_Mtx projection; static void* vbo_data; static void sceneInit(void) { - // Load the vertex shader, create a shader program and bind it - vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); - shaderProgramInit(&program); - shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); - C3D_BindProgram(&program); + // Load the vertex shader, create a shader program and bind it + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + C3D_BindProgram(&program); - // Get the location of the uniforms - uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); - // Configure attributes for use with the vertex shader - C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); - AttrInfo_Init(attrInfo); - AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position - AttrInfo_AddFixed(attrInfo, 1); // v1=color + // Configure attributes for use with the vertex shader + C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); + AttrInfo_Init(attrInfo); + AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position + AttrInfo_AddFixed(attrInfo, 1); // v1=color - // Set the fixed attribute (color) to a colour depending on the emulator + // Set the fixed attribute (color) to a colour depending on the emulator Platform platform = getPlatform(); switch (platform) { @@ -104,78 +104,78 @@ static void sceneInit(void) { break; } - // Compute the projection matrix - Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); + // Compute the projection matrix + Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); - // Create the VBO (vertex buffer object) - vbo_data = linearAlloc(sizeof(vertex_list)); - memcpy(vbo_data, vertex_list, sizeof(vertex_list)); + // Create the VBO (vertex buffer object) + vbo_data = linearAlloc(sizeof(vertex_list)); + memcpy(vbo_data, vertex_list, sizeof(vertex_list)); - // Configure buffers - C3D_BufInfo* bufInfo = C3D_GetBufInfo(); - BufInfo_Init(bufInfo); - BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0); + // Configure buffers + C3D_BufInfo* bufInfo = C3D_GetBufInfo(); + BufInfo_Init(bufInfo); + BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0); - // Configure the first fragment shading substage to just pass through the vertex color - // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight - C3D_TexEnv* env = C3D_GetTexEnv(0); - C3D_TexEnvInit(env); - C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); - C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + C3D_TexEnv* env = C3D_GetTexEnv(0); + C3D_TexEnvInit(env); + C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); + C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); } static void sceneRender(void) { - // Update the uniforms - C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); + // Update the uniforms + C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); - // Draw the VBO - C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count); + // Draw the VBO + C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count); } static void sceneExit(void) { - // Free the VBO - linearFree(vbo_data); + // Free the VBO + linearFree(vbo_data); - // Free the shader program - shaderProgramFree(&program); - DVLB_Free(vshader_dvlb); + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); } int main() { emuPrint("Entering main\n"); - // Initialize graphics - gfxInitDefault(); - C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); + // Initialize graphics + gfxInitDefault(); + C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); - // Initialize the render target - C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); - C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + // Initialize the render target + C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); + C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); - // Initialize the scene - sceneInit(); + // Initialize the scene + sceneInit(); - // Main loop - while (true) - { - // Render the scene + // Main loop + while (true) + { + // Render the scene emuPrint("Entering C3D_FrameBegin"); - C3D_FrameBegin(C3D_FRAME_SYNCDRAW); + C3D_FrameBegin(C3D_FRAME_SYNCDRAW); emuPrint("Clearing render target"); - C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); + C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); emuPrint("Calling C3D_FrameDrawOn"); - C3D_FrameDrawOn(target); + C3D_FrameDrawOn(target); emuPrint("Calling sceneRender"); - sceneRender(); + sceneRender(); emuPrint("Entering C3D_FrameEnd"); - C3D_FrameEnd(0); + C3D_FrameEnd(0); emuPrint("Exited C3D_FrameEnd"); - } + } - // Deinitialize the scene - sceneExit(); + // Deinitialize the scene + sceneExit(); - // Deinitialize graphics - C3D_Fini(); - gfxExit(); - return 0; + // Deinitialize graphics + C3D_Fini(); + gfxExit(); + return 0; } diff --git a/tests/HelloWorldSVC/source/main.c b/tests/HelloWorldSVC/source/main.c index 7c2a0972..073c14a1 100644 --- a/tests/HelloWorldSVC/source/main.c +++ b/tests/HelloWorldSVC/source/main.c @@ -1,5 +1,4 @@ #include <3ds.h> -#include #include #include #include @@ -16,5 +15,5 @@ int main(int argc, char** argv) { __asm__ volatile ("" ::: "memory"); } - return 0; + return 0; } diff --git a/tests/ImmediateModeTriangles/source/main.c b/tests/ImmediateModeTriangles/source/main.c index 4eda609f..57fd15be 100644 --- a/tests/ImmediateModeTriangles/source/main.c +++ b/tests/ImmediateModeTriangles/source/main.c @@ -6,9 +6,9 @@ #define CLEAR_COLOR 0x68B0D8FF #define DISPLAY_TRANSFER_FLAGS \ - (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ - GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ - GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) static DVLB_s* vshader_dvlb; static shaderProgram_s program; @@ -17,105 +17,105 @@ static C3D_Mtx projection; static void sceneInit(void) { - // Load the vertex shader, create a shader program and bind it - vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); - shaderProgramInit(&program); - shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); - C3D_BindProgram(&program); + // Load the vertex shader, create a shader program and bind it + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + C3D_BindProgram(&program); - // Get the location of the uniforms - uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); - // Configure attributes for use with the vertex shader - // Attribute format and element count are ignored in immediate mode - C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); - AttrInfo_Init(attrInfo); - AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position - AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color + // Configure attributes for use with the vertex shader + // Attribute format and element count are ignored in immediate mode + C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); + AttrInfo_Init(attrInfo); + AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position + AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color - // Compute the projection matrix - Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); + // Compute the projection matrix + Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); - // Configure the first fragment shading substage to just pass through the vertex color - // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight - C3D_TexEnv* env = C3D_GetTexEnv(0); - C3D_TexEnvInit(env); - C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); - C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + C3D_TexEnv* env = C3D_GetTexEnv(0); + C3D_TexEnvInit(env); + C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); + C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); } static void sceneRender(void) { - // Update the uniforms - C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); + // Update the uniforms + C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); - // Draw the triangle directly - C3D_ImmDrawBegin(GPU_TRIANGLES); + // Draw the triangle directly + C3D_ImmDrawBegin(GPU_TRIANGLES); // Triangle 1 - C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position - C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color + C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color - C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); - C3D_ImmSendAttrib(0.0f, 1.0f, 0.0f, 1.0f); + C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(0.0f, 1.0f, 0.0f, 1.0f); - C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); - C3D_ImmSendAttrib(0.0f, 0.0f, 1.0f, 1.0f); + C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(0.0f, 0.0f, 1.0f, 1.0f); // Triangle 2 C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f); - C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f); - C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f); - C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); - C3D_ImmDrawEnd(); + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); + C3D_ImmDrawEnd(); } static void sceneExit(void) { - // Free the shader program - shaderProgramFree(&program); - DVLB_Free(vshader_dvlb); + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); } int main() { - // Initialize graphics - gfxInitDefault(); - C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); + // Initialize graphics + gfxInitDefault(); + C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); - // Initialize the render target - C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); - C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + // Initialize the render target + C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); + C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); - // Initialize the scene - sceneInit(); + // Initialize the scene + sceneInit(); - // Main loop - while (aptMainLoop()) - { - hidScanInput(); + // Main loop + while (aptMainLoop()) + { + hidScanInput(); - // Respond to user input - u32 kDown = hidKeysDown(); - if (kDown & KEY_START) - break; // break in order to return to hbmenu + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) + break; // break in order to return to hbmenu - // Render the scene - C3D_FrameBegin(C3D_FRAME_SYNCDRAW); - C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); - C3D_FrameDrawOn(target); - sceneRender(); - C3D_FrameEnd(0); - } + // Render the scene + C3D_FrameBegin(C3D_FRAME_SYNCDRAW); + C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); + C3D_FrameDrawOn(target); + sceneRender(); + C3D_FrameEnd(0); + } - // Deinitialize the scene - sceneExit(); + // Deinitialize the scene + sceneExit(); - // Deinitialize graphics - C3D_Fini(); - gfxExit(); - return 0; + // Deinitialize graphics + C3D_Fini(); + gfxExit(); + return 0; } diff --git a/tests/PICA_LITP/source/main.c b/tests/PICA_LITP/source/main.c index 9bcab5b9..ea5b112f 100644 --- a/tests/PICA_LITP/source/main.c +++ b/tests/PICA_LITP/source/main.c @@ -4,12 +4,11 @@ #include "vshader_shbin.h" - #define CLEAR_COLOR 0x68B0D8FF #define DISPLAY_TRANSFER_FLAGS \ - (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | \ - GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | \ + GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) static DVLB_s* vshader_dvlb; static shaderProgram_s program; @@ -17,107 +16,107 @@ static int uLoc_projection; static C3D_Mtx projection; static void sceneInit(void) { - // Load the vertex shader, create a shader program and bind it - vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); - shaderProgramInit(&program); - shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); - C3D_BindProgram(&program); + // Load the vertex shader, create a shader program and bind it + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + C3D_BindProgram(&program); - // Get the location of the uniforms - uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); - // Configure attributes for use with the vertex shader - // Attribute format and element count are ignored in immediate mode - C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); - AttrInfo_Init(attrInfo); - AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position - AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color + // Configure attributes for use with the vertex shader + // Attribute format and element count are ignored in immediate mode + C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); + AttrInfo_Init(attrInfo); + AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position + AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color - // Compute the projection matrix - Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); + // Compute the projection matrix + Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); - // Configure the first fragment shading substage to just pass through the vertex color - // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight - C3D_TexEnv* env = C3D_GetTexEnv(0); - C3D_TexEnvInit(env); - C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); - C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + C3D_TexEnv* env = C3D_GetTexEnv(0); + C3D_TexEnvInit(env); + C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); + C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); } static void sceneRender(void) { - // Update the uniforms - C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); + // Update the uniforms + C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); - // Draw the triangle directly - C3D_ImmDrawBegin(GPU_TRIANGLES); - // Triangle 1 - // This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1) - C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position - C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color + // Draw the triangle directly + C3D_ImmDrawBegin(GPU_TRIANGLES); + // Triangle 1 + // This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1) + C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color - // This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1) - C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); - C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f); + // This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1) + C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f); - // This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0) - C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); - C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f); + // This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0) + C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f); - // Triangle 2 - // The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp with alpha set to 1 (cmp.x = cmp.y = 0) - C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f); - // Output g component should be 64 / 128 = 0.5 - C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f); + // Triangle 2 + // The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp with alpha set to 1 (cmp.x = cmp.y = 0) + C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 64 / 128 = 0.5 + C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f); - C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f); - // Output g component should be 128 / 128 = 1.0 - C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f); + C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 128 / 128 = 1.0 + C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f); - C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f); - // Output g component should be 0 / 128 = 0 - C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f); - C3D_ImmDrawEnd(); + C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f); + // Output g component should be 0 / 128 = 0 + C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f); + C3D_ImmDrawEnd(); } static void sceneExit(void) { - // Free the shader program - shaderProgramFree(&program); - DVLB_Free(vshader_dvlb); + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); } int main() { - // Initialize graphics - gfxInitDefault(); - C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); + // Initialize graphics + gfxInitDefault(); + C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); - // Initialize the render target - C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); - C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + // Initialize the render target + C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); + C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); - // Initialize the scene - sceneInit(); + // Initialize the scene + sceneInit(); - // Main loop - while (aptMainLoop()) { - hidScanInput(); + // Main loop + while (aptMainLoop()) { + hidScanInput(); - // Respond to user input - u32 kDown = hidKeysDown(); - if (kDown & KEY_START) break; // break in order to return to hbmenu + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) break; // break in order to return to hbmenu - // Render the scene - C3D_FrameBegin(C3D_FRAME_SYNCDRAW); - C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); - C3D_FrameDrawOn(target); - sceneRender(); - C3D_FrameEnd(0); - } + // Render the scene + C3D_FrameBegin(C3D_FRAME_SYNCDRAW); + C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); + C3D_FrameDrawOn(target); + sceneRender(); + C3D_FrameEnd(0); + } - // Deinitialize the scene - sceneExit(); + // Deinitialize the scene + sceneExit(); - // Deinitialize graphics - C3D_Fini(); - gfxExit(); - return 0; + // Deinitialize graphics + C3D_Fini(); + gfxExit(); + return 0; } \ No newline at end of file diff --git a/tests/SimplerTri/source/main.c b/tests/SimplerTri/source/main.c index 0ebd936d..e8fdc4fb 100644 --- a/tests/SimplerTri/source/main.c +++ b/tests/SimplerTri/source/main.c @@ -6,17 +6,17 @@ #define CLEAR_COLOR 0x68B0D8FF #define DISPLAY_TRANSFER_FLAGS \ - (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ - GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ - GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) typedef struct { float x, y, z; } vertex; static const vertex vertex_list[] = { - { 200.0f, 200.0f, 0.5f }, - { 100.0f, 40.0f, 0.5f }, - { 300.0f, 40.0f, 0.5f }, + { 200.0f, 200.0f, 0.5f }, + { 100.0f, 40.0f, 0.5f }, + { 300.0f, 40.0f, 0.5f }, }; #define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0])) @@ -30,61 +30,61 @@ static void* vbo_data; static void sceneInit(void) { - // Load the vertex shader, create a shader program and bind it - vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); - shaderProgramInit(&program); - shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); - C3D_BindProgram(&program); + // Load the vertex shader, create a shader program and bind it + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + C3D_BindProgram(&program); - // Get the location of the uniforms - uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); - // Configure attributes for use with the vertex shader - C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); - AttrInfo_Init(attrInfo); - AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position - AttrInfo_AddFixed(attrInfo, 1); // v1=color + // Configure attributes for use with the vertex shader + C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); + AttrInfo_Init(attrInfo); + AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position + AttrInfo_AddFixed(attrInfo, 1); // v1=color - // Set the fixed attribute (color) to orange - C3D_FixedAttribSet(1, 1.0, 0.5, 0.2, 1.0); + // Set the fixed attribute (color) to orange + C3D_FixedAttribSet(1, 1.0, 0.5, 0.2, 1.0); - // Compute the projection matrix - Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); + // Compute the projection matrix + Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); - // Create the VBO (vertex buffer object) - vbo_data = linearAlloc(sizeof(vertex_list)); - memcpy(vbo_data, vertex_list, sizeof(vertex_list)); + // Create the VBO (vertex buffer object) + vbo_data = linearAlloc(sizeof(vertex_list)); + memcpy(vbo_data, vertex_list, sizeof(vertex_list)); - // Configure buffers - C3D_BufInfo* bufInfo = C3D_GetBufInfo(); - BufInfo_Init(bufInfo); - BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0); + // Configure buffers + C3D_BufInfo* bufInfo = C3D_GetBufInfo(); + BufInfo_Init(bufInfo); + BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0); - // Configure the first fragment shading substage to just pass through the vertex color - // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight - C3D_TexEnv* env = C3D_GetTexEnv(0); - C3D_TexEnvInit(env); - C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); - C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + C3D_TexEnv* env = C3D_GetTexEnv(0); + C3D_TexEnvInit(env); + C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); + C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); } static void sceneRender(void) { - // Update the uniforms - C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); + // Update the uniforms + C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); - // Draw the VBO - C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count); + // Draw the VBO + C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count); } static void sceneExit(void) { - // Free the VBO - linearFree(vbo_data); + // Free the VBO + linearFree(vbo_data); - // Free the shader program - shaderProgramFree(&program); - DVLB_Free(vshader_dvlb); + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); } // Print string in emulator terminal @@ -96,39 +96,39 @@ static void emuPrint(const char* str) int main() { emuPrint("Entering main\n"); - // Initialize graphics - gfxInitDefault(); - C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); + // Initialize graphics + gfxInitDefault(); + C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); - // Initialize the render target - C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); - C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + // Initialize the render target + C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); + C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); - // Initialize the scene - sceneInit(); + // Initialize the scene + sceneInit(); - // Main loop - while (true) - { - // Render the scene + // Main loop + while (true) + { + // Render the scene emuPrint("Entering C3D_FrameBegin"); - C3D_FrameBegin(C3D_FRAME_SYNCDRAW); + C3D_FrameBegin(C3D_FRAME_SYNCDRAW); emuPrint("Clearing render target"); - C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); + C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); emuPrint("Calling C3D_FrameDrawOn"); - C3D_FrameDrawOn(target); + C3D_FrameDrawOn(target); emuPrint("Calling sceneRender"); - sceneRender(); + sceneRender(); emuPrint("Entering C3D_FrameEnd"); - C3D_FrameEnd(0); + C3D_FrameEnd(0); emuPrint("Exited C3D_FrameEnd"); - } + } - // Deinitialize the scene - sceneExit(); + // Deinitialize the scene + sceneExit(); - // Deinitialize graphics - C3D_Fini(); - gfxExit(); - return 0; + // Deinitialize graphics + C3D_Fini(); + gfxExit(); + return 0; } diff --git a/tests/shader.cpp b/tests/shader.cpp index f5e70d87..12929c55 100644 --- a/tests/shader.cpp +++ b/tests/shader.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -106,7 +107,7 @@ namespace Catch { template <> struct StringMaker> { static std::string convert(std::array value) { - return std::format("({}, {}, {}, {})", value[0].toFloat32(), value[1].toFloat32(), value[2].toFloat32(), value[3].toFloat32()); + return fmt::format("({}, {}, {}, {})", value[0].toFloat32(), value[1].toFloat32(), value[2].toFloat32(), value[3].toFloat32()); } }; } // namespace Catch @@ -292,8 +293,8 @@ SHADER_TEST_CASE("FLR", "[shader][vertex]") { SHADER_TEST_CASE("Uniform Read", "[shader][vertex][uniform]") { const auto constant0 = nihstro::SourceRegister::MakeFloat(0); auto shader = TestType::assembleTest({ - {nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1 - }, + {nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "", + nihstro::InlineAsm::RelativeAddress::A1}, {nihstro::OpCode::Id::MOV, output0, "xyzw", constant0, "xyzw", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1}, {nihstro::OpCode::Id::END}, }); @@ -322,8 +323,8 @@ SHADER_TEST_CASE("Address Register Offset", "[video_core][shader][shader_jit]") const auto constant40 = nihstro::SourceRegister::MakeFloat(40); auto shader = TestType::assembleTest({ // mova a0.x, sh_input.x - {nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1 - }, + {nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "", + nihstro::InlineAsm::RelativeAddress::A1}, // mov sh_output.xyzw, c40[a0.x].xyzw {nihstro::OpCode::Id::MOV, output0, "xyzw", constant40, "xyzw", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1}, {nihstro::OpCode::Id::END}, diff --git a/third_party/boost b/third_party/boost index 4532ae23..ecfc47f5 160000 --- a/third_party/boost +++ b/third_party/boost @@ -1 +1 @@ -Subproject commit 4532ae239c4d0b88a547d28e19348c3b05bfd4d6 +Subproject commit ecfc47f58e73fa353456068a7245dc933ebe4472 diff --git a/third_party/host_memory/LICENSE.txt b/third_party/host_memory/LICENSE.txt new file mode 100644 index 00000000..f288702d --- /dev/null +++ b/third_party/host_memory/LICENSE.txt @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/third_party/host_memory/host_memory.cpp b/third_party/host_memory/host_memory.cpp new file mode 100644 index 00000000..bbd15ebe --- /dev/null +++ b/third_party/host_memory/host_memory.cpp @@ -0,0 +1,753 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// Copyright 2008 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#if defined(_M_ARM64) || defined(__aarch64__) +#define ARCHITECTURE_arm64 +#endif + +#ifdef _WIN32 + +#include + +#include +#include +#include + +#include "dynamic_library.hpp" + +#elif defined(__linux__) || defined(__FreeBSD__) // ^^^ Windows ^^^ vvv Linux vvv + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include + +#include + +#ifndef MAP_NORESERVE +#define MAP_NORESERVE 0 +#endif + +// On Android, include ioctl for shared memory ioctls, dlfcn for loading libandroid and linux/ashmem for ashmem defines +#ifdef __ANDROID__ +#include +#include +#include +#endif + +#endif // ^^^ Linux ^^^ + +#include +#include + +#include +#include +#include +#include + +#include "align.hpp" + +#define ASSERT(...) +#define UNIMPLEMENTED_MSG(...) +#define ASSERT_MSG(...) + +namespace Common { + constexpr size_t PageAlignment = 0x1000; + constexpr size_t HugePageSize = 0x200000; + +#if defined(_WIN32) && defined(PANDA3DS_HARDWARE_FASTMEM) + +// Manually imported for MinGW compatibility +#ifndef MEM_RESERVE_PLACEHOLDER +#define MEM_RESERVE_PLACEHOLDER 0x00040000 +#endif +#ifndef MEM_REPLACE_PLACEHOLDER +#define MEM_REPLACE_PLACEHOLDER 0x00004000 +#endif +#ifndef MEM_COALESCE_PLACEHOLDERS +#define MEM_COALESCE_PLACEHOLDERS 0x00000001 +#endif +#ifndef MEM_PRESERVE_PLACEHOLDER +#define MEM_PRESERVE_PLACEHOLDER 0x00000002 +#endif + + using PFN_CreateFileMapping2 = _Ret_maybenull_ HANDLE(WINAPI*)( + _In_ HANDLE File, _In_opt_ SECURITY_ATTRIBUTES* SecurityAttributes, _In_ ULONG DesiredAccess, _In_ ULONG PageProtection, + _In_ ULONG AllocationAttributes, _In_ ULONG64 MaximumSize, _In_opt_ PCWSTR Name, + _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, _In_ ULONG ParameterCount + ); + + using PFN_VirtualAlloc2 = _Ret_maybenull_ PVOID(WINAPI*)( + _In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress, _In_ SIZE_T Size, _In_ ULONG AllocationType, _In_ ULONG PageProtection, + _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, _In_ ULONG ParameterCount + ); + + using PFN_MapViewOfFile3 = _Ret_maybenull_ PVOID(WINAPI*)( + _In_ HANDLE FileMapping, _In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress, _In_ ULONG64 Offset, _In_ SIZE_T ViewSize, + _In_ ULONG AllocationType, _In_ ULONG PageProtection, _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, + _In_ ULONG ParameterCount + ); + + using PFN_UnmapViewOfFile2 = BOOL(WINAPI*)(_In_ HANDLE Process, _In_ PVOID BaseAddress, _In_ ULONG UnmapFlags); + + template + static void GetFuncAddress(Common::DynamicLibrary& dll, const char* name, T& pfn) { + if (!dll.getSymbol(name, &pfn)) { + Helpers::warn("Failed to load %s", name); + throw std::bad_alloc{}; + } + } + + class HostMemory::Impl { + public: + explicit Impl(size_t backing_size_, size_t virtual_size_) + : backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()}, kernelbase_dll("Kernelbase") { + if (!kernelbase_dll.isOpen()) { + Helpers::warn("Failed to load Kernelbase.dll"); + throw std::bad_alloc{}; + } + GetFuncAddress(kernelbase_dll, "CreateFileMapping2", pfn_CreateFileMapping2); + GetFuncAddress(kernelbase_dll, "VirtualAlloc2", pfn_VirtualAlloc2); + GetFuncAddress(kernelbase_dll, "MapViewOfFile3", pfn_MapViewOfFile3); + GetFuncAddress(kernelbase_dll, "UnmapViewOfFile2", pfn_UnmapViewOfFile2); + + // Allocate backing file map + backing_handle = pfn_CreateFileMapping2( + INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ, PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0 + ); + if (!backing_handle) { + Helpers::warn("Failed to allocate %X MiB of backing memory", backing_size >> 20); + throw std::bad_alloc{}; + } + // Allocate a virtual memory for the backing file map as placeholder + backing_base = + static_cast(pfn_VirtualAlloc2(process, nullptr, backing_size, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, nullptr, 0)); + if (!backing_base) { + Release(); + Helpers::warn("Failed to reserve %X MiB of virtual memory", backing_size >> 20); + throw std::bad_alloc{}; + } + // Map backing placeholder + void* const ret = + pfn_MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0); + if (ret != backing_base) { + Release(); + Helpers::warn("Failed to map %X MiB of virtual memory", backing_size >> 20); + throw std::bad_alloc{}; + } + // Allocate virtual address placeholder + virtual_base = + static_cast(pfn_VirtualAlloc2(process, nullptr, virtual_size, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, nullptr, 0)); + if (!virtual_base) { + Release(); + Helpers::warn("Failed to reserve %X GiB of virtual memory", virtual_size >> 30); + throw std::bad_alloc{}; + } + } + + ~Impl() { Release(); } + + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) { + std::unique_lock lock{placeholder_mutex}; + if (!IsNiechePlaceholder(virtual_offset, length)) { + Split(virtual_offset, length); + } + ASSERT(placeholders.find({virtual_offset, virtual_offset + length}) == placeholders.end()); + TrackPlaceholder(virtual_offset, host_offset, length); + + MapView(virtual_offset, host_offset, length); + } + + void Unmap(size_t virtual_offset, size_t length) { + std::scoped_lock lock{placeholder_mutex}; + + // Unmap until there are no more placeholders + while (UnmapOnePlaceholder(virtual_offset, length)) { + } + } + + void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) { + DWORD new_flags{}; + if (read && write) { + new_flags = PAGE_READWRITE; + } else if (read && !write) { + new_flags = PAGE_READONLY; + } else if (!read && !write) { + new_flags = PAGE_NOACCESS; + } else { + UNIMPLEMENTED_MSG("Protection flag combination read={} write={}", read, write); + } + const size_t virtual_end = virtual_offset + length; + + std::scoped_lock lock{placeholder_mutex}; + auto [it, end] = placeholders.equal_range({virtual_offset, virtual_end}); + while (it != end) { + const size_t offset = std::max(it->lower(), virtual_offset); + const size_t protect_length = std::min(it->upper(), virtual_end) - offset; + DWORD old_flags{}; + if (!VirtualProtect(virtual_base + offset, protect_length, new_flags, &old_flags)) { + Helpers::warn("Failed to change virtual memory protect rules"); + } + ++it; + } + } + + bool ClearBackingRegion(size_t physical_offset, size_t length) { + // TODO: This does not seem to be possible on Windows. + return false; + } + + void EnableDirectMappedAddress() { + // TODO + Helpers::panic("Unimplemented: EnableDirectMappedAddress on Windows"); + } + + const size_t backing_size; ///< Size of the backing memory in bytes + const size_t virtual_size; ///< Size of the virtual address placeholder in bytes + + u8* backing_base{}; + u8* virtual_base{}; + + private: + /// Release all resources in the object + void Release() { + if (!placeholders.empty()) { + for (const auto& placeholder : placeholders) { + if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder.lower(), MEM_PRESERVE_PLACEHOLDER)) { + Helpers::warn("Failed to unmap virtual memory placeholder"); + } + } + Coalesce(0, virtual_size); + } + if (virtual_base) { + if (!VirtualFree(virtual_base, 0, MEM_RELEASE)) { + Helpers::warn("Failed to free virtual memory"); + } + } + if (backing_base) { + if (!pfn_UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) { + Helpers::warn("Failed to unmap backing memory placeholder"); + } + if (!VirtualFreeEx(process, backing_base, 0, MEM_RELEASE)) { + Helpers::warn("Failed to free backing memory"); + } + } + if (!CloseHandle(backing_handle)) { + Helpers::warn("Failed to free backing memory file handle"); + } + } + + /// Unmap one placeholder in the given range (partial unmaps are supported) + /// Return true when there are no more placeholders to unmap + bool UnmapOnePlaceholder(size_t virtual_offset, size_t length) { + const auto it = placeholders.find({virtual_offset, virtual_offset + length}); + const auto begin = placeholders.begin(); + const auto end = placeholders.end(); + if (it == end) { + return false; + } + const size_t placeholder_begin = it->lower(); + const size_t placeholder_end = it->upper(); + const size_t unmap_begin = std::max(virtual_offset, placeholder_begin); + const size_t unmap_end = std::min(virtual_offset + length, placeholder_end); + ASSERT(unmap_begin >= placeholder_begin && unmap_begin < placeholder_end); + ASSERT(unmap_end <= placeholder_end && unmap_end > placeholder_begin); + + const auto host_pointer_it = placeholder_host_pointers.find(placeholder_begin); + ASSERT(host_pointer_it != placeholder_host_pointers.end()); + const size_t host_offset = host_pointer_it->second; + + const bool split_left = unmap_begin > placeholder_begin; + const bool split_right = unmap_end < placeholder_end; + + if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder_begin, MEM_PRESERVE_PLACEHOLDER)) { + Helpers::warn("Failed to unmap placeholder"); + } + // If we have to remap memory regions due to partial unmaps, we are in a data race as + // Windows doesn't support remapping memory without unmapping first. Avoid adding any extra + // logic within the panic region described below. + + // Panic region, we are in a data race right now + if (split_left || split_right) { + Split(unmap_begin, unmap_end - unmap_begin); + } + if (split_left) { + MapView(placeholder_begin, host_offset, unmap_begin - placeholder_begin); + } + if (split_right) { + MapView(unmap_end, host_offset + unmap_end - placeholder_begin, placeholder_end - unmap_end); + } + // End panic region + + size_t coalesce_begin = unmap_begin; + if (!split_left) { + // Try to coalesce pages to the left + coalesce_begin = it == begin ? 0 : std::prev(it)->upper(); + if (coalesce_begin != placeholder_begin) { + Coalesce(coalesce_begin, unmap_end - coalesce_begin); + } + } + if (!split_right) { + // Try to coalesce pages to the right + const auto next = std::next(it); + const size_t next_begin = next == end ? virtual_size : next->lower(); + if (placeholder_end != next_begin) { + // We can coalesce to the right + Coalesce(coalesce_begin, next_begin - coalesce_begin); + } + } + // Remove and reinsert placeholder trackers + UntrackPlaceholder(it); + if (split_left) { + TrackPlaceholder(placeholder_begin, host_offset, unmap_begin - placeholder_begin); + } + if (split_right) { + TrackPlaceholder(unmap_end, host_offset + unmap_end - placeholder_begin, placeholder_end - unmap_end); + } + return true; + } + + void MapView(size_t virtual_offset, size_t host_offset, size_t length) { + if (!pfn_MapViewOfFile3( + backing_handle, process, virtual_base + virtual_offset, host_offset, length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0 + )) { + Helpers::warn("Failed to map placeholder"); + } + } + + void Split(size_t virtual_offset, size_t length) { + if (!VirtualFreeEx(process, reinterpret_cast(virtual_base + virtual_offset), length, MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) { + Helpers::warn("Failed to split placeholder"); + } + } + + void Coalesce(size_t virtual_offset, size_t length) { + if (!VirtualFreeEx(process, reinterpret_cast(virtual_base + virtual_offset), length, MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) { + Helpers::warn("Failed to coalesce placeholders"); + } + } + + void TrackPlaceholder(size_t virtual_offset, size_t host_offset, size_t length) { + placeholders.insert({virtual_offset, virtual_offset + length}); + placeholder_host_pointers.emplace(virtual_offset, host_offset); + } + + void UntrackPlaceholder(boost::icl::separate_interval_set::iterator it) { + placeholder_host_pointers.erase(it->lower()); + placeholders.erase(it); + } + + /// Return true when a given memory region is a "nieche" and the placeholders don't have to be + /// split. + bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const { + const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length}); + if (it != placeholders.end() && it->lower() == virtual_offset + length) { + return it == placeholders.begin() ? virtual_offset == 0 : std::prev(it)->upper() == virtual_offset; + } + return false; + } + + HANDLE process{}; ///< Current process handle + HANDLE backing_handle{}; ///< File based backing memory + + DynamicLibrary kernelbase_dll; + PFN_CreateFileMapping2 pfn_CreateFileMapping2{}; + PFN_VirtualAlloc2 pfn_VirtualAlloc2{}; + PFN_MapViewOfFile3 pfn_MapViewOfFile3{}; + PFN_UnmapViewOfFile2 pfn_UnmapViewOfFile2{}; + + std::mutex placeholder_mutex; ///< Mutex for placeholders + boost::icl::separate_interval_set placeholders; ///< Mapped placeholders + std::unordered_map placeholder_host_pointers; ///< Placeholder backing offset + }; + +#elif (defined(__linux__) || defined(__FreeBSD__)) && defined(PANDA3DS_HARDWARE_FASTMEM) // ^^^ Windows ^^^ vvv Linux vvv + +#ifdef __ANDROID__ +#define ASHMEM_DEVICE "/dev/ashmem" + // Android shared memory creation code from Dolphin + static int AshmemCreateFileMapping(const char* name, size_t size) { + // ASharedMemory path - works on API >= 26 and falls through on API < 26: + + // We can't call ASharedMemory_create the normal way without increasing the + // minimum version requirement to API 26, so we use dlopen/dlsym instead + static void* libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL); + static auto sharedMemoryCreate = reinterpret_cast(dlsym(libandroid, "ASharedMemory_create")); + if (sharedMemoryCreate) { + return sharedMemoryCreate(name, size); + } + + // /dev/ashmem path - works on API < 29: + + int fd, ret; + fd = open(ASHMEM_DEVICE, O_RDWR); + if (fd < 0) return fd; + + // We don't really care if we can't set the name, it is optional + ioctl(fd, ASHMEM_SET_NAME, name); + + ret = ioctl(fd, ASHMEM_SET_SIZE, size); + if (ret < 0) { + close(fd); + Helpers::warn("Ashmem allocation failed"); + return ret; + } + return fd; + } +#endif + +#ifdef ARCHITECTURE_arm64 + static void* ChooseVirtualBase(size_t virtual_size) { + constexpr uintptr_t Map39BitSize = (1ULL << 39); + constexpr uintptr_t Map36BitSize = (1ULL << 36); + + // This is not a cryptographic application, we just want something random. + std::mt19937_64 rng; + + // We want to ensure we are allocating at an address aligned to the L2 block size. + // For Qualcomm devices, we must also allocate memory above 36 bits. + const size_t lower = Map36BitSize / HugePageSize; + const size_t upper = (Map39BitSize - virtual_size) / HugePageSize; + const size_t range = upper - lower; + + // Try up to 64 times to allocate memory at random addresses in the range. + for (int i = 0; i < 64; i++) { + // Calculate a possible location. + uintptr_t hint_address = ((rng() % range) + lower) * HugePageSize; + + // Try to map. + // Note: we may be able to take advantage of MAP_FIXED_NOREPLACE here. + void* map_pointer = + mmap(reinterpret_cast(hint_address), virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + + // If we successfully mapped, we're done. + if (reinterpret_cast(map_pointer) == hint_address) { + return map_pointer; + } + + // Unmap if necessary, and try again. + if (map_pointer != MAP_FAILED) { + munmap(map_pointer, virtual_size); + } + } + + return MAP_FAILED; + } + +#else + + static void* ChooseVirtualBase(size_t virtual_size) { +#if defined(__FreeBSD__) + void* virtual_base = + mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_ALIGNED_SUPER, -1, 0); + + if (virtual_base != MAP_FAILED) { + return virtual_base; + } +#endif + + return mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + } + +#endif + + class HostMemory::Impl { + public: + explicit Impl(size_t backing_size_, size_t virtual_size_) : backing_size{backing_size_}, virtual_size{virtual_size_} { + bool good = false; + SCOPE_EXIT { + if (!good) { + Release(); + } + }; + + long page_size = sysconf(_SC_PAGESIZE); + if (page_size != 0x1000) { + Helpers::warn("page size {:#x} is incompatible with 4K paging", page_size); + throw std::bad_alloc{}; + } + + // Backing memory initialization +#if defined(__FreeBSD__) && __FreeBSD__ < 13 + // XXX Drop after FreeBSD 12.* reaches EOL on 2024-06-30 + fd = shm_open(SHM_ANON, O_RDWR, 0600); +#elif defined(__ANDROID__) + fd = AshmemCreateFileMapping("HostMemory", 0); +#else + fd = memfd_create("HostMemory", 0); +#endif + + if (fd < 0) { + Helpers::warn("memfd_create failed: {}", strerror(errno)); + throw std::bad_alloc{}; + } + + // Defined to extend the file with zeros + int ret = ftruncate(fd, backing_size); + if (ret != 0) { + Helpers::warn("ftruncate failed with {}, are you out-of-memory?", strerror(errno)); + throw std::bad_alloc{}; + } + + backing_base = static_cast(mmap(nullptr, backing_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); + + if (backing_base == MAP_FAILED) { + Helpers::warn("mmap failed: {}", strerror(errno)); + throw std::bad_alloc{}; + } + + // Virtual memory initialization + virtual_base = virtual_map_base = static_cast(ChooseVirtualBase(virtual_size)); + if (virtual_base == MAP_FAILED) { + Helpers::warn("mmap failed: {}", strerror(errno)); + throw std::bad_alloc{}; + } +#if defined(__linux__) + madvise(virtual_base, virtual_size, MADV_HUGEPAGE); +#endif + + free_manager.SetAddressSpace(virtual_base, virtual_size); + good = true; + } + + ~Impl() { Release(); } + + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) { + // Intersect the range with our address space. + AdjustMap(&virtual_offset, &length); + + // We are removing a placeholder. + free_manager.AllocateBlock(virtual_base + virtual_offset, length); + + // Deduce mapping protection flags. + int flags = PROT_NONE; + if (True(perms & MemoryPermission::Read)) { + flags |= PROT_READ; + } + if (True(perms & MemoryPermission::Write)) { + flags |= PROT_WRITE; + } +#ifdef ARCHITECTURE_arm64 + if (True(perms & MemoryPermission::Execute)) { + flags |= PROT_EXEC; + } +#endif + + void* ret = mmap(virtual_base + virtual_offset, length, flags, MAP_SHARED | MAP_FIXED, fd, host_offset); + ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); + } + + void Unmap(size_t virtual_offset, size_t length) { + // The method name is wrong. We're still talking about the virtual range. + // We don't want to unmap, we want to reserve this memory. + + // Intersect the range with our address space. + AdjustMap(&virtual_offset, &length); + + // Merge with any adjacent placeholder mappings. + auto [merged_pointer, merged_size] = free_manager.FreeBlock(virtual_base + virtual_offset, length); + + void* ret = mmap(merged_pointer, merged_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); + } + + void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) { + // Intersect the range with our address space. + AdjustMap(&virtual_offset, &length); + + int flags = PROT_NONE; + if (read) { + flags |= PROT_READ; + } + if (write) { + flags |= PROT_WRITE; + } +#ifdef HAS_NCE + if (execute) { + flags |= PROT_EXEC; + } +#endif + int ret = mprotect(virtual_base + virtual_offset, length, flags); + ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno)); + } + + bool ClearBackingRegion(size_t physical_offset, size_t length) { +#ifdef __linux__ + // Set MADV_REMOVE on backing map to destroy it instantly. + // This also deletes the area from the backing file. + int ret = madvise(backing_base + physical_offset, length, MADV_REMOVE); + ASSERT_MSG(ret == 0, "madvise failed: {}", strerror(errno)); + + return true; +#else + return false; +#endif + } + + void EnableDirectMappedAddress() { virtual_base = nullptr; } + + const size_t backing_size; ///< Size of the backing memory in bytes + const size_t virtual_size; ///< Size of the virtual address placeholder in bytes + + u8* backing_base{reinterpret_cast(MAP_FAILED)}; + u8* virtual_base{reinterpret_cast(MAP_FAILED)}; + u8* virtual_map_base{reinterpret_cast(MAP_FAILED)}; + + private: + /// Release all resources in the object + void Release() { + if (virtual_map_base != MAP_FAILED) { + int ret = munmap(virtual_map_base, virtual_size); + ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno)); + } + + if (backing_base != MAP_FAILED) { + int ret = munmap(backing_base, backing_size); + ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno)); + } + + if (fd != -1) { + int ret = close(fd); + ASSERT_MSG(ret == 0, "close failed: {}", strerror(errno)); + } + } + + void AdjustMap(size_t* virtual_offset, size_t* length) { + if (virtual_base != nullptr) { + return; + } + + // If we are direct mapped, we want to make sure we are operating on a region + // that is in range of our virtual mapping. + size_t intended_start = *virtual_offset; + size_t intended_end = intended_start + *length; + size_t address_space_start = reinterpret_cast(virtual_map_base); + size_t address_space_end = address_space_start + virtual_size; + + if (address_space_start > intended_end || intended_start > address_space_end) { + *virtual_offset = 0; + *length = 0; + } else { + *virtual_offset = std::max(intended_start, address_space_start); + *length = std::min(intended_end, address_space_end) - *virtual_offset; + } + } + + int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create + FreeRegionManager free_manager{}; + }; + +#else // ^^^ Linux ^^^ vvv Generic vvv + + class HostMemory::Impl { + public: + explicit Impl(size_t /*backing_size */, size_t /* virtual_size */) { + // This is just a place holder. + // Please implement fastmem in a proper way on your platform. + throw std::bad_alloc{}; + } + + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm) {} + void Unmap(size_t virtual_offset, size_t length) {} + void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {} + bool ClearBackingRegion(size_t physical_offset, size_t length) { return false; } + void EnableDirectMappedAddress() {} + + u8* backing_base{nullptr}; + u8* virtual_base{nullptr}; + }; + +#endif // ^^^ Generic ^^^ + + HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_, bool enableFastmem) : backing_size(backing_size_), virtual_size(virtual_size_) { + try { + // Fastmem is disabled, just throw bad alloc and use the VirtualBuffer fallback. + if (!enableFastmem) { + throw std::bad_alloc{}; + } + + // Try to allocate a fastmem arena. + // The implementation will fail with std::bad_alloc on errors. + impl = std::make_unique( + Common::alignUp(backing_size, PageAlignment), Common::alignUp(virtual_size, PageAlignment) + HugePageSize + ); + backing_base = impl->backing_base; + virtual_base = impl->virtual_base; + + if (virtual_base) { + // Ensure the virtual base is aligned to the L2 block size. + virtual_base = reinterpret_cast(Common::alignUp(reinterpret_cast(virtual_base), HugePageSize)); + virtual_base_offset = virtual_base - impl->virtual_base; + } + + } catch (const std::bad_alloc&) { + if (enableFastmem) { + Helpers::warn("Fastmem unavailable, falling back to VirtualBuffer for memory allocation"); + } + + fallback_buffer = std::make_unique>(backing_size); + backing_base = fallback_buffer->data(); + virtual_base = nullptr; + } + } + + HostMemory::~HostMemory() = default; + HostMemory::HostMemory(HostMemory&&) noexcept = default; + HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default; + + void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms, bool separate_heap) { + ASSERT(virtual_offset % PageAlignment == 0); + ASSERT(host_offset % PageAlignment == 0); + ASSERT(length % PageAlignment == 0); + ASSERT(virtual_offset + length <= virtual_size); + ASSERT(host_offset + length <= backing_size); + if (length == 0 || !virtual_base || !impl) { + return; + } + impl->Map(virtual_offset + virtual_base_offset, host_offset, length, perms); + } + + void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap) { + ASSERT(virtual_offset % PageAlignment == 0); + ASSERT(length % PageAlignment == 0); + ASSERT(virtual_offset + length <= virtual_size); + if (length == 0 || !virtual_base || !impl) { + return; + } + impl->Unmap(virtual_offset + virtual_base_offset, length); + } + + void HostMemory::Protect(size_t virtual_offset, size_t length, MemoryPermission perm) { + ASSERT(virtual_offset % PageAlignment == 0); + ASSERT(length % PageAlignment == 0); + ASSERT(virtual_offset + length <= virtual_size); + if (length == 0 || !virtual_base || !impl) { + return; + } + const bool read = True(perm & MemoryPermission::Read); + const bool write = True(perm & MemoryPermission::Write); + const bool execute = True(perm & MemoryPermission::Execute); + impl->Protect(virtual_offset + virtual_base_offset, length, read, write, execute); + } + + void HostMemory::ClearBackingRegion(size_t physical_offset, size_t length, u32 fill_value) { + if (!impl || fill_value != 0 || !impl->ClearBackingRegion(physical_offset, length)) { + std::memset(backing_base + physical_offset, fill_value, length); + } + } + + void HostMemory::EnableDirectMappedAddress() { + if (impl) { + impl->EnableDirectMappedAddress(); + virtual_size += reinterpret_cast(virtual_base); + } + } + +} // namespace Common \ No newline at end of file diff --git a/third_party/host_memory/include/host_memory/free_region_manager.h b/third_party/host_memory/include/host_memory/free_region_manager.h new file mode 100644 index 00000000..2e590d60 --- /dev/null +++ b/third_party/host_memory/include/host_memory/free_region_manager.h @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +namespace Common { + +class FreeRegionManager { +public: + explicit FreeRegionManager() = default; + ~FreeRegionManager() = default; + + void SetAddressSpace(void* start, size_t size) { + this->FreeBlock(start, size); + } + + std::pair FreeBlock(void* block_ptr, size_t size) { + std::scoped_lock lk(m_mutex); + + // Check to see if we are adjacent to any regions. + auto start_address = reinterpret_cast(block_ptr); + auto end_address = start_address + size; + auto it = m_free_regions.find({start_address - 1, end_address + 1}); + + // If we are, join with them, ensuring we stay in bounds. + if (it != m_free_regions.end()) { + start_address = std::min(start_address, it->lower()); + end_address = std::max(end_address, it->upper()); + } + + // Free the relevant region. + m_free_regions.insert({start_address, end_address}); + + // Return the adjusted pointers. + block_ptr = reinterpret_cast(start_address); + size = end_address - start_address; + return {block_ptr, size}; + } + + void AllocateBlock(void* block_ptr, size_t size) { + std::scoped_lock lk(m_mutex); + + auto address = reinterpret_cast(block_ptr); + m_free_regions.subtract({address, address + size}); + } + +private: + std::mutex m_mutex; + boost::icl::interval_set m_free_regions; +}; + +} // namespace Common diff --git a/third_party/host_memory/include/host_memory/host_memory.h b/third_party/host_memory/include/host_memory/host_memory.h new file mode 100644 index 00000000..79e664fa --- /dev/null +++ b/third_party/host_memory/include/host_memory/host_memory.h @@ -0,0 +1,75 @@ +// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "enum_flag_ops.hpp" +#include "helpers.hpp" +#include "host_memory/virtual_buffer.h" + +namespace Common { + + enum class MemoryPermission : u32 { + Read = 1 << 0, + Write = 1 << 1, + ReadWrite = Read | Write, + Execute = 1 << 2, + }; + DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission) + + /** + * A low level linear memory buffer, which supports multiple mappings + * Its purpose is to rebuild a given sparse memory layout, including mirrors. + */ + class HostMemory { + public: + explicit HostMemory(size_t backing_size_, size_t virtual_size_, bool useFastmem); + ~HostMemory(); + + /** + * Copy constructors. They shall return a copy of the buffer without the mappings. + * TODO: Implement them with COW if needed. + */ + HostMemory(const HostMemory& other) = delete; + HostMemory& operator=(const HostMemory& other) = delete; + + /** + * Move constructors. They will move the buffer and the mappings to the new object. + */ + HostMemory(HostMemory&& other) noexcept; + HostMemory& operator=(HostMemory&& other) noexcept; + + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms, bool separate_heap); + void Unmap(size_t virtual_offset, size_t length, bool separate_heap); + void Protect(size_t virtual_offset, size_t length, MemoryPermission perms); + + void EnableDirectMappedAddress(); + + void ClearBackingRegion(size_t physical_offset, size_t length, u32 fill_value); + + [[nodiscard]] u8* BackingBasePointer() noexcept { return backing_base; } + [[nodiscard]] const u8* BackingBasePointer() const noexcept { return backing_base; } + + [[nodiscard]] u8* VirtualBasePointer() noexcept { return virtual_base; } + [[nodiscard]] const u8* VirtualBasePointer() const noexcept { return virtual_base; } + + bool IsInVirtualRange(void* address) const noexcept { return address >= virtual_base && address < virtual_base + virtual_size; } + + private: + size_t backing_size{}; + size_t virtual_size{}; + + // Low level handler for the platform dependent memory routines + class Impl; + std::unique_ptr impl; + u8* backing_base{}; + u8* virtual_base{}; + size_t virtual_base_offset{}; + + // Fallback if fastmem is not supported on this platform + std::unique_ptr> fallback_buffer; + }; + +} // namespace Common \ No newline at end of file diff --git a/third_party/host_memory/include/host_memory/scope_exit.h b/third_party/host_memory/include/host_memory/scope_exit.h new file mode 100644 index 00000000..771ad2ef --- /dev/null +++ b/third_party/host_memory/include/host_memory/scope_exit.h @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: 2014 Citra Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace detail { + template + class ScopeGuard { + ScopeGuard(const ScopeGuard&) = delete; + ScopeGuard& operator=(const ScopeGuard&) = delete; + + private: + F f; + bool active; + + public: + constexpr ScopeGuard(F f_) : f(std::move(f_)), active(true) {} + constexpr ~ScopeGuard() { + if (active) { + f(); + } + } + + constexpr void Cancel() { active = false; } + constexpr ScopeGuard(ScopeGuard&& rhs) : f(std::move(rhs.f)), active(rhs.active) { rhs.Cancel(); } + + ScopeGuard& operator=(ScopeGuard&& rhs) = delete; + }; + + template + constexpr ScopeGuard MakeScopeGuard(F f) { + return ScopeGuard(std::move(f)); + } + + enum class ScopeGuardOnExit {}; + + template + constexpr ScopeGuard operator+(ScopeGuardOnExit, F&& f) { + return ScopeGuard(std::forward(f)); + } + +} // namespace detail + +#define CONCATENATE_IMPL(s1, s2) s1##s2 +#define CONCATENATE(s1, s2) CONCATENATE_IMPL(s1, s2) + +#ifdef __COUNTER__ +#define ANONYMOUS_VARIABLE(pref) CONCATENATE(pref, __COUNTER__) +#else +#define ANONYMOUS_VARIABLE(pref) CONCATENATE(pref, __LINE__) +#endif + +/** + * This macro is similar to SCOPE_EXIT, except the object is caller managed. This is intended to be + * used when the caller might want to cancel the ScopeExit. + */ +#define SCOPE_GUARD detail::ScopeGuardOnExit() + [&]() + +/** + * This macro allows you to conveniently specify a block of code that will run on scope exit. Handy + * for doing ad-hoc clean-up tasks in a function with multiple returns. + * + * Example usage: + * \code + * const int saved_val = g_foo; + * g_foo = 55; + * SCOPE_EXIT{ g_foo = saved_val; }; + * + * if (Bar()) { + * return 0; + * } else { + * return 20; + * } + * \endcode + */ +#define SCOPE_EXIT auto ANONYMOUS_VARIABLE(SCOPE_EXIT_STATE_) = SCOPE_GUARD diff --git a/third_party/host_memory/include/host_memory/virtual_buffer.h b/third_party/host_memory/include/host_memory/virtual_buffer.h new file mode 100644 index 00000000..0b5ac54d --- /dev/null +++ b/third_party/host_memory/include/host_memory/virtual_buffer.h @@ -0,0 +1,68 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "helpers.hpp" +#include + +namespace Common { + + void* AllocateMemoryPages(std::size_t size) noexcept; + void FreeMemoryPages(void* base, std::size_t size) noexcept; + + template + class VirtualBuffer final { + public: + // TODO: Uncomment this and change Common::PageTable::PageInfo to be trivially constructible + // using std::atomic_ref once libc++ has support for it + // static_assert( + // std::is_trivially_constructible_v, + // "T must be trivially constructible, as non-trivial constructors will not be executed " + // "with the current allocator"); + + constexpr VirtualBuffer() = default; + explicit VirtualBuffer(std::size_t count) : alloc_size{count * sizeof(T)} { + base_ptr = reinterpret_cast(AllocateMemoryPages(alloc_size)); + } + + ~VirtualBuffer() noexcept { FreeMemoryPages(base_ptr, alloc_size); } + + VirtualBuffer(const VirtualBuffer&) = delete; + VirtualBuffer& operator=(const VirtualBuffer&) = delete; + + VirtualBuffer(VirtualBuffer&& other) noexcept + : alloc_size{std::exchange(other.alloc_size, 0)}, base_ptr{std::exchange(other.base_ptr), nullptr} {} + + VirtualBuffer& operator=(VirtualBuffer&& other) noexcept { + alloc_size = std::exchange(other.alloc_size, 0); + base_ptr = std::exchange(other.base_ptr, nullptr); + return *this; + } + + void resize(std::size_t count) { + const auto new_size = count * sizeof(T); + if (new_size == alloc_size) { + return; + } + + FreeMemoryPages(base_ptr, alloc_size); + + alloc_size = new_size; + base_ptr = reinterpret_cast(AllocateMemoryPages(alloc_size)); + } + + [[nodiscard]] constexpr const T& operator[](std::size_t index) const { return base_ptr[index]; } + [[nodiscard]] constexpr T& operator[](std::size_t index) { return base_ptr[index]; } + + [[nodiscard]] constexpr T* data() { return base_ptr; } + [[nodiscard]] constexpr const T* data() const { return base_ptr; } + + [[nodiscard]] constexpr std::size_t size() const { return alloc_size / sizeof(T); } + + private: + std::size_t alloc_size{}; + T* base_ptr{}; + }; + +} // namespace Common \ No newline at end of file diff --git a/third_party/host_memory/virtual_buffer.cpp b/third_party/host_memory/virtual_buffer.cpp new file mode 100644 index 00000000..b6575796 --- /dev/null +++ b/third_party/host_memory/virtual_buffer.cpp @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#ifdef _WIN32 +#include +#else +#include +#endif + +#include "host_memory/virtual_buffer.h" + +namespace Common { + void* AllocateMemoryPages(std::size_t size) noexcept { +#ifdef _WIN32 + void* base{VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE)}; +#else + void* base{mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0)}; + + if (base == MAP_FAILED) { + base = nullptr; + } +#endif + + if (!base) { + Helpers::panic("Failed to allocate memory pages"); + } + + return base; + } + + void FreeMemoryPages(void* base, [[maybe_unused]] std::size_t size) noexcept { + if (!base) { + return; + } +#ifdef _WIN32 + if (!VirtualFree(base, 0, MEM_RELEASE)) { + Helpers::panic("Failed to free memory pages"); + } +#else + if (munmap(base, size) != 0) { + Helpers::panic("Failed to free memory pages"); + } +#endif + } + +} // namespace Common \ No newline at end of file diff --git a/third_party/metal-cpp b/third_party/metal-cpp index a63bd172..5caea74c 160000 --- a/third_party/metal-cpp +++ b/third_party/metal-cpp @@ -1 +1 @@ -Subproject commit a63bd172ddcba73a3d87ca32032b66ad41ddb9a6 +Subproject commit 5caea74c5f77492add32b7cad109d796e342ab49