mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-21 03:54:45 +00:00
Merge branch 'main' into texture_cache/subresources
This commit is contained in:
commit
77c8142321
45 changed files with 1632 additions and 275 deletions
|
@ -42,3 +42,7 @@ Files: CMakeSettings.json
|
|||
src/shadps4.rc
|
||||
Copyright: shadPS4 Emulator Project
|
||||
License: GPL-2.0-or-later
|
||||
|
||||
Files: externals/renderdoc/*
|
||||
Copyright: 2019-2024 Baldur Karlsson
|
||||
License: MIT
|
||||
|
|
|
@ -79,6 +79,7 @@ find_package(xbyak 7.07 CONFIG)
|
|||
find_package(xxHash 0.8.2 MODULE)
|
||||
find_package(zlib-ng 2.2.0 MODULE)
|
||||
find_package(Zydis 4.1.0 CONFIG)
|
||||
find_package(RenderDoc MODULE)
|
||||
|
||||
if (APPLE)
|
||||
find_package(date 3.0.1 CONFIG)
|
||||
|
@ -484,6 +485,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||
src/video_core/texture_cache/tile_manager.cpp
|
||||
src/video_core/texture_cache/tile_manager.h
|
||||
src/video_core/texture_cache/types.h
|
||||
src/video_core/renderdoc.cpp
|
||||
src/video_core/renderdoc.h
|
||||
)
|
||||
|
||||
set(INPUT src/input/controller.cpp
|
||||
|
@ -559,7 +562,7 @@ endif()
|
|||
|
||||
create_target_directory_groups(shadps4)
|
||||
|
||||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient)
|
||||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API)
|
||||
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3)
|
||||
|
||||
if (APPLE)
|
||||
|
|
7
externals/CMakeLists.txt
vendored
7
externals/CMakeLists.txt
vendored
|
@ -74,6 +74,13 @@ if (NOT TARGET GPUOpen::VulkanMemoryAllocator)
|
|||
add_subdirectory(vma)
|
||||
endif()
|
||||
|
||||
# RenderDoc
|
||||
if (NOT TARGET RenderDoc::API)
|
||||
add_library(renderdoc INTERFACE)
|
||||
target_include_directories(renderdoc SYSTEM INTERFACE ./renderdoc)
|
||||
add_library(RenderDoc::API ALIAS renderdoc)
|
||||
endif()
|
||||
|
||||
# glslang
|
||||
if (NOT TARGET glslang::glslang)
|
||||
set(SKIP_GLSLANG_INSTALL ON CACHE BOOL "")
|
||||
|
|
741
externals/renderdoc/renderdoc_app.h
vendored
Normal file
741
externals/renderdoc/renderdoc_app.h
vendored
Normal file
|
@ -0,0 +1,741 @@
|
|||
/******************************************************************************
|
||||
* The MIT License (MIT)
|
||||
*
|
||||
* Copyright (c) 2019-2024 Baldur Karlsson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html
|
||||
//
|
||||
|
||||
#if !defined(RENDERDOC_NO_STDINT)
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)
|
||||
#define RENDERDOC_CC __cdecl
|
||||
#elif defined(__linux__) || defined(__FreeBSD__)
|
||||
#define RENDERDOC_CC
|
||||
#elif defined(__APPLE__)
|
||||
#define RENDERDOC_CC
|
||||
#else
|
||||
#error "Unknown platform"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Constants not used directly in below API
|
||||
|
||||
// This is a GUID/magic value used for when applications pass a path where shader debug
|
||||
// information can be found to match up with a stripped shader.
|
||||
// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue =
|
||||
// RENDERDOC_ShaderDebugMagicValue_value
|
||||
#define RENDERDOC_ShaderDebugMagicValue_struct \
|
||||
{ \
|
||||
0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
|
||||
}
|
||||
|
||||
// as an alternative when you want a byte array (assuming x86 endianness):
|
||||
#define RENDERDOC_ShaderDebugMagicValue_bytearray \
|
||||
{ \
|
||||
0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
|
||||
}
|
||||
|
||||
// truncated version when only a uint64_t is available (e.g. Vulkan tags):
|
||||
#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RenderDoc capture options
|
||||
//
|
||||
|
||||
typedef enum RENDERDOC_CaptureOption
|
||||
{
|
||||
// Allow the application to enable vsync
|
||||
//
|
||||
// Default - enabled
|
||||
//
|
||||
// 1 - The application can enable or disable vsync at will
|
||||
// 0 - vsync is force disabled
|
||||
eRENDERDOC_Option_AllowVSync = 0,
|
||||
|
||||
// Allow the application to enable fullscreen
|
||||
//
|
||||
// Default - enabled
|
||||
//
|
||||
// 1 - The application can enable or disable fullscreen at will
|
||||
// 0 - fullscreen is force disabled
|
||||
eRENDERDOC_Option_AllowFullscreen = 1,
|
||||
|
||||
// Record API debugging events and messages
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Enable built-in API debugging features and records the results into
|
||||
// the capture, which is matched up with events on replay
|
||||
// 0 - no API debugging is forcibly enabled
|
||||
eRENDERDOC_Option_APIValidation = 2,
|
||||
eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum
|
||||
|
||||
// Capture CPU callstacks for API events
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Enables capturing of callstacks
|
||||
// 0 - no callstacks are captured
|
||||
eRENDERDOC_Option_CaptureCallstacks = 3,
|
||||
|
||||
// When capturing CPU callstacks, only capture them from actions.
|
||||
// This option does nothing without the above option being enabled
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Only captures callstacks for actions.
|
||||
// Ignored if CaptureCallstacks is disabled
|
||||
// 0 - Callstacks, if enabled, are captured for every event.
|
||||
eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4,
|
||||
eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4,
|
||||
|
||||
// Specify a delay in seconds to wait for a debugger to attach, after
|
||||
// creating or injecting into a process, before continuing to allow it to run.
|
||||
//
|
||||
// 0 indicates no delay, and the process will run immediately after injection
|
||||
//
|
||||
// Default - 0 seconds
|
||||
//
|
||||
eRENDERDOC_Option_DelayForDebugger = 5,
|
||||
|
||||
// Verify buffer access. This includes checking the memory returned by a Map() call to
|
||||
// detect any out-of-bounds modification, as well as initialising buffers with undefined contents
|
||||
// to a marker value to catch use of uninitialised memory.
|
||||
//
|
||||
// NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do
|
||||
// not do the same kind of interception & checking and undefined contents are really undefined.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Verify buffer access
|
||||
// 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in
|
||||
// RenderDoc.
|
||||
eRENDERDOC_Option_VerifyBufferAccess = 6,
|
||||
|
||||
// The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites.
|
||||
// This option now controls the filling of uninitialised buffers with 0xdddddddd which was
|
||||
// previously always enabled
|
||||
eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess,
|
||||
|
||||
// Hooks any system API calls that create child processes, and injects
|
||||
// RenderDoc into them recursively with the same options.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Hooks into spawned child processes
|
||||
// 0 - Child processes are not hooked by RenderDoc
|
||||
eRENDERDOC_Option_HookIntoChildren = 7,
|
||||
|
||||
// By default RenderDoc only includes resources in the final capture necessary
|
||||
// for that frame, this allows you to override that behaviour.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - all live resources at the time of capture are included in the capture
|
||||
// and available for inspection
|
||||
// 0 - only the resources referenced by the captured frame are included
|
||||
eRENDERDOC_Option_RefAllResources = 8,
|
||||
|
||||
// **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or
|
||||
// getting it will be ignored, to allow compatibility with older versions.
|
||||
// In v1.1 the option acts as if it's always enabled.
|
||||
//
|
||||
// By default RenderDoc skips saving initial states for resources where the
|
||||
// previous contents don't appear to be used, assuming that writes before
|
||||
// reads indicate previous contents aren't used.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - initial contents at the start of each captured frame are saved, even if
|
||||
// they are later overwritten or cleared before being used.
|
||||
// 0 - unless a read is detected, initial contents will not be saved and will
|
||||
// appear as black or empty data.
|
||||
eRENDERDOC_Option_SaveAllInitials = 9,
|
||||
|
||||
// In APIs that allow for the recording of command lists to be replayed later,
|
||||
// RenderDoc may choose to not capture command lists before a frame capture is
|
||||
// triggered, to reduce overheads. This means any command lists recorded once
|
||||
// and replayed many times will not be available and may cause a failure to
|
||||
// capture.
|
||||
//
|
||||
// NOTE: This is only true for APIs where multithreading is difficult or
|
||||
// discouraged. Newer APIs like Vulkan and D3D12 will ignore this option
|
||||
// and always capture all command lists since the API is heavily oriented
|
||||
// around it and the overheads have been reduced by API design.
|
||||
//
|
||||
// 1 - All command lists are captured from the start of the application
|
||||
// 0 - Command lists are only captured if their recording begins during
|
||||
// the period when a frame capture is in progress.
|
||||
eRENDERDOC_Option_CaptureAllCmdLists = 10,
|
||||
|
||||
// Mute API debugging output when the API validation mode option is enabled
|
||||
//
|
||||
// Default - enabled
|
||||
//
|
||||
// 1 - Mute any API debug messages from being displayed or passed through
|
||||
// 0 - API debugging is displayed as normal
|
||||
eRENDERDOC_Option_DebugOutputMute = 11,
|
||||
|
||||
// Option to allow vendor extensions to be used even when they may be
|
||||
// incompatible with RenderDoc and cause corrupted replays or crashes.
|
||||
//
|
||||
// Default - inactive
|
||||
//
|
||||
// No values are documented, this option should only be used when absolutely
|
||||
// necessary as directed by a RenderDoc developer.
|
||||
eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12,
|
||||
|
||||
// Define a soft memory limit which some APIs may aim to keep overhead under where
|
||||
// possible. Anything above this limit will where possible be saved directly to disk during
|
||||
// capture.
|
||||
// This will cause increased disk space use (which may cause a capture to fail if disk space is
|
||||
// exhausted) as well as slower capture times.
|
||||
//
|
||||
// Not all memory allocations may be deferred like this so it is not a guarantee of a memory
|
||||
// limit.
|
||||
//
|
||||
// Units are in MBs, suggested values would range from 200MB to 1000MB.
|
||||
//
|
||||
// Default - 0 Megabytes
|
||||
eRENDERDOC_Option_SoftMemoryLimit = 13,
|
||||
} RENDERDOC_CaptureOption;
|
||||
|
||||
// Sets an option that controls how RenderDoc behaves on capture.
|
||||
//
|
||||
// Returns 1 if the option and value are valid
|
||||
// Returns 0 if either is invalid and the option is unchanged
|
||||
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val);
|
||||
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val);
|
||||
|
||||
// Gets the current value of an option as a uint32_t
|
||||
//
|
||||
// If the option is invalid, 0xffffffff is returned
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt);
|
||||
|
||||
// Gets the current value of an option as a float
|
||||
//
|
||||
// If the option is invalid, -FLT_MAX is returned
|
||||
typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt);
|
||||
|
||||
typedef enum RENDERDOC_InputButton
|
||||
{
|
||||
// '0' - '9' matches ASCII values
|
||||
eRENDERDOC_Key_0 = 0x30,
|
||||
eRENDERDOC_Key_1 = 0x31,
|
||||
eRENDERDOC_Key_2 = 0x32,
|
||||
eRENDERDOC_Key_3 = 0x33,
|
||||
eRENDERDOC_Key_4 = 0x34,
|
||||
eRENDERDOC_Key_5 = 0x35,
|
||||
eRENDERDOC_Key_6 = 0x36,
|
||||
eRENDERDOC_Key_7 = 0x37,
|
||||
eRENDERDOC_Key_8 = 0x38,
|
||||
eRENDERDOC_Key_9 = 0x39,
|
||||
|
||||
// 'A' - 'Z' matches ASCII values
|
||||
eRENDERDOC_Key_A = 0x41,
|
||||
eRENDERDOC_Key_B = 0x42,
|
||||
eRENDERDOC_Key_C = 0x43,
|
||||
eRENDERDOC_Key_D = 0x44,
|
||||
eRENDERDOC_Key_E = 0x45,
|
||||
eRENDERDOC_Key_F = 0x46,
|
||||
eRENDERDOC_Key_G = 0x47,
|
||||
eRENDERDOC_Key_H = 0x48,
|
||||
eRENDERDOC_Key_I = 0x49,
|
||||
eRENDERDOC_Key_J = 0x4A,
|
||||
eRENDERDOC_Key_K = 0x4B,
|
||||
eRENDERDOC_Key_L = 0x4C,
|
||||
eRENDERDOC_Key_M = 0x4D,
|
||||
eRENDERDOC_Key_N = 0x4E,
|
||||
eRENDERDOC_Key_O = 0x4F,
|
||||
eRENDERDOC_Key_P = 0x50,
|
||||
eRENDERDOC_Key_Q = 0x51,
|
||||
eRENDERDOC_Key_R = 0x52,
|
||||
eRENDERDOC_Key_S = 0x53,
|
||||
eRENDERDOC_Key_T = 0x54,
|
||||
eRENDERDOC_Key_U = 0x55,
|
||||
eRENDERDOC_Key_V = 0x56,
|
||||
eRENDERDOC_Key_W = 0x57,
|
||||
eRENDERDOC_Key_X = 0x58,
|
||||
eRENDERDOC_Key_Y = 0x59,
|
||||
eRENDERDOC_Key_Z = 0x5A,
|
||||
|
||||
// leave the rest of the ASCII range free
|
||||
// in case we want to use it later
|
||||
eRENDERDOC_Key_NonPrintable = 0x100,
|
||||
|
||||
eRENDERDOC_Key_Divide,
|
||||
eRENDERDOC_Key_Multiply,
|
||||
eRENDERDOC_Key_Subtract,
|
||||
eRENDERDOC_Key_Plus,
|
||||
|
||||
eRENDERDOC_Key_F1,
|
||||
eRENDERDOC_Key_F2,
|
||||
eRENDERDOC_Key_F3,
|
||||
eRENDERDOC_Key_F4,
|
||||
eRENDERDOC_Key_F5,
|
||||
eRENDERDOC_Key_F6,
|
||||
eRENDERDOC_Key_F7,
|
||||
eRENDERDOC_Key_F8,
|
||||
eRENDERDOC_Key_F9,
|
||||
eRENDERDOC_Key_F10,
|
||||
eRENDERDOC_Key_F11,
|
||||
eRENDERDOC_Key_F12,
|
||||
|
||||
eRENDERDOC_Key_Home,
|
||||
eRENDERDOC_Key_End,
|
||||
eRENDERDOC_Key_Insert,
|
||||
eRENDERDOC_Key_Delete,
|
||||
eRENDERDOC_Key_PageUp,
|
||||
eRENDERDOC_Key_PageDn,
|
||||
|
||||
eRENDERDOC_Key_Backspace,
|
||||
eRENDERDOC_Key_Tab,
|
||||
eRENDERDOC_Key_PrtScrn,
|
||||
eRENDERDOC_Key_Pause,
|
||||
|
||||
eRENDERDOC_Key_Max,
|
||||
} RENDERDOC_InputButton;
|
||||
|
||||
// Sets which key or keys can be used to toggle focus between multiple windows
|
||||
//
|
||||
// If keys is NULL or num is 0, toggle keys will be disabled
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num);
|
||||
|
||||
// Sets which key or keys can be used to capture the next frame
|
||||
//
|
||||
// If keys is NULL or num is 0, captures keys will be disabled
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num);
|
||||
|
||||
typedef enum RENDERDOC_OverlayBits
|
||||
{
|
||||
// This single bit controls whether the overlay is enabled or disabled globally
|
||||
eRENDERDOC_Overlay_Enabled = 0x1,
|
||||
|
||||
// Show the average framerate over several seconds as well as min/max
|
||||
eRENDERDOC_Overlay_FrameRate = 0x2,
|
||||
|
||||
// Show the current frame number
|
||||
eRENDERDOC_Overlay_FrameNumber = 0x4,
|
||||
|
||||
// Show a list of recent captures, and how many captures have been made
|
||||
eRENDERDOC_Overlay_CaptureList = 0x8,
|
||||
|
||||
// Default values for the overlay mask
|
||||
eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate |
|
||||
eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList),
|
||||
|
||||
// Enable all bits
|
||||
eRENDERDOC_Overlay_All = ~0U,
|
||||
|
||||
// Disable all bits
|
||||
eRENDERDOC_Overlay_None = 0,
|
||||
} RENDERDOC_OverlayBits;
|
||||
|
||||
// returns the overlay bits that have been set
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)();
|
||||
// sets the overlay bits with an and & or mask
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or);
|
||||
|
||||
// this function will attempt to remove RenderDoc's hooks in the application.
|
||||
//
|
||||
// Note: that this can only work correctly if done immediately after
|
||||
// the module is loaded, before any API work happens. RenderDoc will remove its
|
||||
// injected hooks and shut down. Behaviour is undefined if this is called
|
||||
// after any API functions have been called, and there is still no guarantee of
|
||||
// success.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)();
|
||||
|
||||
// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers.
|
||||
typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown;
|
||||
|
||||
// This function will unload RenderDoc's crash handler.
|
||||
//
|
||||
// If you use your own crash handler and don't want RenderDoc's handler to
|
||||
// intercede, you can call this function to unload it and any unhandled
|
||||
// exceptions will pass to the next handler.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)();
|
||||
|
||||
// Sets the capture file path template
|
||||
//
|
||||
// pathtemplate is a UTF-8 string that gives a template for how captures will be named
|
||||
// and where they will be saved.
|
||||
//
|
||||
// Any extension is stripped off the path, and captures are saved in the directory
|
||||
// specified, and named with the filename and the frame number appended. If the
|
||||
// directory does not exist it will be created, including any parent directories.
|
||||
//
|
||||
// If pathtemplate is NULL, the template will remain unchanged
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// SetCaptureFilePathTemplate("my_captures/example");
|
||||
//
|
||||
// Capture #1 -> my_captures/example_frame123.rdc
|
||||
// Capture #2 -> my_captures/example_frame456.rdc
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate);
|
||||
|
||||
// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string
|
||||
typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)();
|
||||
|
||||
// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers.
|
||||
typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate;
|
||||
typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate;
|
||||
|
||||
// returns the number of captures that have been made
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)();
|
||||
|
||||
// This function returns the details of a capture, by index. New captures are added
|
||||
// to the end of the list.
|
||||
//
|
||||
// filename will be filled with the absolute path to the capture file, as a UTF-8 string
|
||||
// pathlength will be written with the length in bytes of the filename string
|
||||
// timestamp will be written with the time of the capture, in seconds since the Unix epoch
|
||||
//
|
||||
// Any of the parameters can be NULL and they'll be skipped.
|
||||
//
|
||||
// The function will return 1 if the capture index is valid, or 0 if the index is invalid
|
||||
// If the index is invalid, the values will be unchanged
|
||||
//
|
||||
// Note: when captures are deleted in the UI they will remain in this list, so the
|
||||
// capture path may not exist anymore.
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename,
|
||||
uint32_t *pathlength, uint64_t *timestamp);
|
||||
|
||||
// Sets the comments associated with a capture file. These comments are displayed in the
|
||||
// UI program when opening.
|
||||
//
|
||||
// filePath should be a path to the capture file to add comments to. If set to NULL or ""
|
||||
// the most recent capture file created made will be used instead.
|
||||
// comments should be a NULL-terminated UTF-8 string to add as comments.
|
||||
//
|
||||
// Any existing comments will be overwritten.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath,
|
||||
const char *comments);
|
||||
|
||||
// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)();
|
||||
|
||||
// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers.
|
||||
// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for
|
||||
// backwards compatibility with old code, it is castable either way since it's ABI compatible
|
||||
// as the same function pointer type.
|
||||
typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected;
|
||||
|
||||
// This function will launch the Replay UI associated with the RenderDoc library injected
|
||||
// into the running application.
|
||||
//
|
||||
// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter
|
||||
// to connect to this application
|
||||
// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open
|
||||
// if cmdline is NULL, the command line will be empty.
|
||||
//
|
||||
// returns the PID of the replay UI if successful, 0 if not successful.
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl,
|
||||
const char *cmdline);
|
||||
|
||||
// RenderDoc can return a higher version than requested if it's backwards compatible,
|
||||
// this function returns the actual version returned. If a parameter is NULL, it will be
|
||||
// ignored and the others will be filled out.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch);
|
||||
|
||||
// Requests that the replay UI show itself (if hidden or not the current top window). This can be
|
||||
// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle
|
||||
// showing the UI after making a capture.
|
||||
//
|
||||
// This will return 1 if the request was successfully passed on, though it's not guaranteed that
|
||||
// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current
|
||||
// target control connection to make such a request, or if there was another error
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Capturing functions
|
||||
//
|
||||
|
||||
// A device pointer is a pointer to the API's root handle.
|
||||
//
|
||||
// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc
|
||||
typedef void *RENDERDOC_DevicePointer;
|
||||
|
||||
// A window handle is the OS's native window handle
|
||||
//
|
||||
// This would be an HWND, GLXDrawable, etc
|
||||
typedef void *RENDERDOC_WindowHandle;
|
||||
|
||||
// A helper macro for Vulkan, where the device handle cannot be used directly.
|
||||
//
|
||||
// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use.
|
||||
//
|
||||
// Specifically, the value needed is the dispatch table pointer, which sits as the first
|
||||
// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and
|
||||
// indirect once.
|
||||
#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst)))
|
||||
|
||||
// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will
|
||||
// respond to keypresses. Neither parameter can be NULL
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// capture the next frame on whichever window and API is currently considered active
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)();
|
||||
|
||||
// capture the next N frames on whichever window and API is currently considered active
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames);
|
||||
|
||||
// When choosing either a device pointer or a window handle to capture, you can pass NULL.
|
||||
// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify
|
||||
// any API rendering to a specific window, or a specific API instance rendering to any window,
|
||||
// or in the simplest case of one window and one API, you can just pass NULL for both.
|
||||
//
|
||||
// In either case, if there are two or more possible matching (device,window) pairs it
|
||||
// is undefined which one will be captured.
|
||||
//
|
||||
// Note: for headless rendering you can pass NULL for the window handle and either specify
|
||||
// a device pointer or leave it NULL as above.
|
||||
|
||||
// Immediately starts capturing API calls on the specified device pointer and window handle.
|
||||
//
|
||||
// If there is no matching thing to capture (e.g. no supported API has been initialised),
|
||||
// this will do nothing.
|
||||
//
|
||||
// The results are undefined (including crashes) if two captures are started overlapping,
|
||||
// even on separate devices and/oror windows.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// Returns whether or not a frame capture is currently ongoing anywhere.
|
||||
//
|
||||
// This will return 1 if a capture is ongoing, and 0 if there is no capture running
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)();
|
||||
|
||||
// Ends capturing immediately.
|
||||
//
|
||||
// This will return 1 if the capture succeeded, and 0 if there was an error capturing.
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// Ends capturing immediately and discard any data stored without saving to disk.
|
||||
//
|
||||
// This will return 1 if the capture was discarded, and 0 if there was an error or no capture
|
||||
// was in progress
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom
|
||||
// title to the capture produced which will be displayed in the UI.
|
||||
//
|
||||
// If multiple captures are ongoing, this title will be applied to the first capture to end after
|
||||
// this call. The second capture to end will have no title, unless this function is called again.
|
||||
//
|
||||
// Calling this function has no effect if no capture is currently running, and if it is called
|
||||
// multiple times only the last title will be used.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RenderDoc API versions
|
||||
//
|
||||
|
||||
// RenderDoc uses semantic versioning (http://semver.org/).
|
||||
//
|
||||
// MAJOR version is incremented when incompatible API changes happen.
|
||||
// MINOR version is incremented when functionality is added in a backwards-compatible manner.
|
||||
// PATCH version is incremented when backwards-compatible bug fixes happen.
|
||||
//
|
||||
// Note that this means the API returned can be higher than the one you might have requested.
|
||||
// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned
|
||||
// instead of 1.0.0. You can check this with the GetAPIVersion entry point
|
||||
typedef enum RENDERDOC_Version
|
||||
{
|
||||
eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00
|
||||
eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01
|
||||
eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02
|
||||
eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00
|
||||
eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01
|
||||
eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02
|
||||
eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00
|
||||
eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00
|
||||
eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00
|
||||
eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01
|
||||
eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02
|
||||
eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00
|
||||
eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00
|
||||
} RENDERDOC_Version;
|
||||
|
||||
// API version changelog:
|
||||
//
|
||||
// 1.0.0 - initial release
|
||||
// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered
|
||||
// by keypress or TriggerCapture, instead of Start/EndFrameCapture.
|
||||
// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation
|
||||
// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new
|
||||
// function pointer is added to the end of the struct, the original layout is identical
|
||||
// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote
|
||||
// replay/remote server concept in replay UI)
|
||||
// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these
|
||||
// are captures and not debug logging files. This is the first API version in the v1.0
|
||||
// branch.
|
||||
// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be
|
||||
// displayed in the UI program on load.
|
||||
// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions
|
||||
// which allows users to opt-in to allowing unsupported vendor extensions to function.
|
||||
// Should be used at the user's own risk.
|
||||
// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to
|
||||
// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to
|
||||
// 0xdddddddd of uninitialised buffer contents.
|
||||
// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop
|
||||
// capturing without saving anything to disk.
|
||||
// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening
|
||||
// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option.
|
||||
// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected
|
||||
// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a
|
||||
// capture made with StartFrameCapture() or EndFrameCapture()
|
||||
|
||||
typedef struct RENDERDOC_API_1_6_0
|
||||
{
|
||||
pRENDERDOC_GetAPIVersion GetAPIVersion;
|
||||
|
||||
pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32;
|
||||
pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32;
|
||||
|
||||
pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32;
|
||||
pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32;
|
||||
|
||||
pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys;
|
||||
pRENDERDOC_SetCaptureKeys SetCaptureKeys;
|
||||
|
||||
pRENDERDOC_GetOverlayBits GetOverlayBits;
|
||||
pRENDERDOC_MaskOverlayBits MaskOverlayBits;
|
||||
|
||||
// Shutdown was renamed to RemoveHooks in 1.4.1.
|
||||
// These unions allow old code to continue compiling without changes
|
||||
union
|
||||
{
|
||||
pRENDERDOC_Shutdown Shutdown;
|
||||
pRENDERDOC_RemoveHooks RemoveHooks;
|
||||
};
|
||||
pRENDERDOC_UnloadCrashHandler UnloadCrashHandler;
|
||||
|
||||
// Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2.
|
||||
// These unions allow old code to continue compiling without changes
|
||||
union
|
||||
{
|
||||
// deprecated name
|
||||
pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate;
|
||||
// current name
|
||||
pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate;
|
||||
};
|
||||
union
|
||||
{
|
||||
// deprecated name
|
||||
pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate;
|
||||
// current name
|
||||
pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate;
|
||||
};
|
||||
|
||||
pRENDERDOC_GetNumCaptures GetNumCaptures;
|
||||
pRENDERDOC_GetCapture GetCapture;
|
||||
|
||||
pRENDERDOC_TriggerCapture TriggerCapture;
|
||||
|
||||
// IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1.
|
||||
// This union allows old code to continue compiling without changes
|
||||
union
|
||||
{
|
||||
// deprecated name
|
||||
pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected;
|
||||
// current name
|
||||
pRENDERDOC_IsTargetControlConnected IsTargetControlConnected;
|
||||
};
|
||||
pRENDERDOC_LaunchReplayUI LaunchReplayUI;
|
||||
|
||||
pRENDERDOC_SetActiveWindow SetActiveWindow;
|
||||
|
||||
pRENDERDOC_StartFrameCapture StartFrameCapture;
|
||||
pRENDERDOC_IsFrameCapturing IsFrameCapturing;
|
||||
pRENDERDOC_EndFrameCapture EndFrameCapture;
|
||||
|
||||
// new function in 1.1.0
|
||||
pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture;
|
||||
|
||||
// new function in 1.2.0
|
||||
pRENDERDOC_SetCaptureFileComments SetCaptureFileComments;
|
||||
|
||||
// new function in 1.4.0
|
||||
pRENDERDOC_DiscardFrameCapture DiscardFrameCapture;
|
||||
|
||||
// new function in 1.5.0
|
||||
pRENDERDOC_ShowReplayUI ShowReplayUI;
|
||||
|
||||
// new function in 1.6.0
|
||||
pRENDERDOC_SetCaptureTitle SetCaptureTitle;
|
||||
} RENDERDOC_API_1_6_0;
|
||||
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RenderDoc API entry point
|
||||
//
|
||||
// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available.
|
||||
//
|
||||
// The name is the same as the typedef - "RENDERDOC_GetAPI"
|
||||
//
|
||||
// This function is not thread safe, and should not be called on multiple threads at once.
|
||||
// Ideally, call this once as early as possible in your application's startup, before doing
|
||||
// any API work, since some configuration functionality etc has to be done also before
|
||||
// initialising any APIs.
|
||||
//
|
||||
// Parameters:
|
||||
// version is a single value from the RENDERDOC_Version above.
|
||||
//
|
||||
// outAPIPointers will be filled out with a pointer to the corresponding struct of function
|
||||
// pointers.
|
||||
//
|
||||
// Returns:
|
||||
// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested
|
||||
// 0 - if the requested version is not supported or the arguments are invalid.
|
||||
//
|
||||
typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
|
@ -22,8 +22,10 @@ static bool isShowSplash = false;
|
|||
static bool isNullGpu = false;
|
||||
static bool shouldDumpShaders = false;
|
||||
static bool shouldDumpPM4 = false;
|
||||
static u32 vblankDivider = 1;
|
||||
static bool vkValidation = false;
|
||||
static bool vkValidationSync = false;
|
||||
static bool rdocEnable = false;
|
||||
// Gui
|
||||
std::string settings_install_dir = "";
|
||||
u32 main_window_geometry_x = 400;
|
||||
|
@ -94,6 +96,14 @@ bool dumpPM4() {
|
|||
return shouldDumpPM4;
|
||||
}
|
||||
|
||||
bool isRdocEnabled() {
|
||||
return rdocEnable;
|
||||
}
|
||||
|
||||
u32 vblankDiv() {
|
||||
return vblankDivider;
|
||||
}
|
||||
|
||||
bool vkValidationEnabled() {
|
||||
return vkValidation;
|
||||
}
|
||||
|
@ -233,10 +243,10 @@ void load(const std::filesystem::path& path) {
|
|||
|
||||
screenWidth = toml::find_or<toml::integer>(gpu, "screenWidth", screenWidth);
|
||||
screenHeight = toml::find_or<toml::integer>(gpu, "screenHeight", screenHeight);
|
||||
gpuId = toml::find_or<toml::integer>(gpu, "gpuId", 0);
|
||||
isNullGpu = toml::find_or<toml::boolean>(gpu, "nullGpu", false);
|
||||
shouldDumpShaders = toml::find_or<toml::boolean>(gpu, "dumpShaders", false);
|
||||
shouldDumpPM4 = toml::find_or<toml::boolean>(gpu, "dumpPM4", false);
|
||||
vblankDivider = toml::find_or<toml::integer>(gpu, "vblankDivider", 1);
|
||||
}
|
||||
}
|
||||
if (data.contains("Vulkan")) {
|
||||
|
@ -244,8 +254,10 @@ void load(const std::filesystem::path& path) {
|
|||
if (vkResult.is_ok()) {
|
||||
auto vk = vkResult.unwrap();
|
||||
|
||||
gpuId = toml::find_or<toml::integer>(vk, "gpuId", 0);
|
||||
vkValidation = toml::find_or<toml::boolean>(vk, "validation", true);
|
||||
vkValidationSync = toml::find_or<toml::boolean>(vk, "validation_sync", true);
|
||||
rdocEnable = toml::find_or<toml::boolean>(vk, "rdocEnable", false);
|
||||
}
|
||||
}
|
||||
if (data.contains("Debug")) {
|
||||
|
@ -312,14 +324,16 @@ void save(const std::filesystem::path& path) {
|
|||
data["General"]["logFilter"] = logFilter;
|
||||
data["General"]["logType"] = logType;
|
||||
data["General"]["showSplash"] = isShowSplash;
|
||||
data["GPU"]["gpuId"] = gpuId;
|
||||
data["GPU"]["screenWidth"] = screenWidth;
|
||||
data["GPU"]["screenHeight"] = screenHeight;
|
||||
data["GPU"]["nullGpu"] = isNullGpu;
|
||||
data["GPU"]["dumpShaders"] = shouldDumpShaders;
|
||||
data["GPU"]["dumpPM4"] = shouldDumpPM4;
|
||||
data["GPU"]["vblankDivider"] = vblankDivider;
|
||||
data["Vulkan"]["gpuId"] = gpuId;
|
||||
data["Vulkan"]["validation"] = vkValidation;
|
||||
data["Vulkan"]["validation_sync"] = vkValidationSync;
|
||||
data["Vulkan"]["rdocEnable"] = rdocEnable;
|
||||
data["Debug"]["DebugDump"] = isDebugDump;
|
||||
data["LLE"]["libc"] = isLibc;
|
||||
data["GUI"]["theme"] = mw_themes;
|
||||
|
|
|
@ -26,6 +26,8 @@ bool showSplash();
|
|||
bool nullGpu();
|
||||
bool dumpShaders();
|
||||
bool dumpPM4();
|
||||
bool isRdocEnabled();
|
||||
u32 vblankDiv();
|
||||
|
||||
bool vkValidationEnabled();
|
||||
bool vkValidationSyncEnabled();
|
||||
|
|
|
@ -73,6 +73,7 @@ static auto UserPaths = [] {
|
|||
create_path(PathType::TempDataDir, user_dir / TEMPDATA_DIR);
|
||||
create_path(PathType::SysModuleDir, user_dir / SYSMODULES_DIR);
|
||||
create_path(PathType::DownloadDir, user_dir / DOWNLOAD_DIR);
|
||||
create_path(PathType::CapturesDir, user_dir / CAPTURES_DIR);
|
||||
|
||||
return paths;
|
||||
}();
|
||||
|
|
|
@ -19,6 +19,7 @@ enum class PathType {
|
|||
GameDataDir, // Where game data is stored.
|
||||
SysModuleDir, // Where system modules are stored.
|
||||
DownloadDir, // Where downloads/temp files are stored.
|
||||
CapturesDir, // Where rdoc captures are stored.
|
||||
};
|
||||
|
||||
constexpr auto PORTABLE_DIR = "user";
|
||||
|
@ -33,6 +34,7 @@ constexpr auto GAMEDATA_DIR = "data";
|
|||
constexpr auto TEMPDATA_DIR = "temp";
|
||||
constexpr auto SYSMODULES_DIR = "sys_modules";
|
||||
constexpr auto DOWNLOAD_DIR = "download";
|
||||
constexpr auto CAPTURES_DIR = "captures";
|
||||
|
||||
// Filenames
|
||||
constexpr auto LOG_FILE = "shad_log.txt";
|
||||
|
|
|
@ -285,20 +285,24 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe
|
|||
const auto slot = GetTcbKey();
|
||||
|
||||
#if defined(_WIN32)
|
||||
// The following logic is based on the wine implementation of TlsGetValue
|
||||
// https://github.com/wine-mirror/wine/blob/a27b9551/dlls/kernelbase/thread.c#L719
|
||||
// The following logic is based on the Kernel32.dll asm of TlsGetValue
|
||||
static constexpr u32 TlsSlotsOffset = 0x1480;
|
||||
static constexpr u32 TlsExpansionSlotsOffset = 0x1780;
|
||||
static constexpr u32 TlsMinimumAvailable = 64;
|
||||
|
||||
const u32 teb_offset = slot < TlsMinimumAvailable ? TlsSlotsOffset : TlsExpansionSlotsOffset;
|
||||
const u32 tls_index = slot < TlsMinimumAvailable ? slot : slot - TlsMinimumAvailable;
|
||||
|
||||
// Load the pointer to the table of TLS slots.
|
||||
c.putSeg(gs);
|
||||
c.mov(dst, ptr[reinterpret_cast<void*>(teb_offset)]);
|
||||
// Load the pointer to our buffer.
|
||||
c.mov(dst, qword[dst + tls_index * sizeof(LPVOID)]);
|
||||
if (slot < TlsMinimumAvailable) {
|
||||
// Load the pointer to TLS slots.
|
||||
c.mov(dst, ptr[reinterpret_cast<void*>(TlsSlotsOffset + slot * sizeof(LPVOID))]);
|
||||
} else {
|
||||
const u32 tls_index = slot - TlsMinimumAvailable;
|
||||
|
||||
// Load the pointer to the table of TLS expansion slots.
|
||||
c.mov(dst, ptr[reinterpret_cast<void*>(TlsExpansionSlotsOffset)]);
|
||||
// Load the pointer to our buffer.
|
||||
c.mov(dst, qword[dst + tls_index * sizeof(LPVOID)]);
|
||||
}
|
||||
#elif defined(__APPLE__)
|
||||
// The following logic is based on the Darwin implementation of _os_tsd_get_direct, used by
|
||||
// pthread_getspecific https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L89-L96
|
||||
|
|
|
@ -20,13 +20,12 @@
|
|||
|
||||
extern Frontend::WindowSDL* g_window;
|
||||
std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
||||
std::unique_ptr<AmdGpu::Liverpool> liverpool;
|
||||
|
||||
namespace Libraries::GnmDriver {
|
||||
|
||||
using namespace AmdGpu;
|
||||
|
||||
static std::unique_ptr<AmdGpu::Liverpool> liverpool;
|
||||
|
||||
enum GnmEventIdents : u64 {
|
||||
Compute0RelMem = 0x00,
|
||||
Compute1RelMem = 0x01,
|
||||
|
@ -2131,6 +2130,7 @@ int PS4_SYSV_ABI sceGnmSubmitDone() {
|
|||
if (!liverpool->IsGpuIdle()) {
|
||||
submission_lock = true;
|
||||
}
|
||||
liverpool->SubmitDone();
|
||||
send_init_packet = true;
|
||||
++frames_submitted;
|
||||
return ORBIS_OK;
|
||||
|
|
|
@ -78,9 +78,7 @@ bool EqueueInternal::TriggerEvent(u64 ident, s16 filter, void* trigger_data) {
|
|||
std::scoped_lock lock{m_mutex};
|
||||
|
||||
for (auto& event : m_events) {
|
||||
ASSERT_MSG(event.event.filter == filter,
|
||||
"Event to trigger doesn't match to queue events");
|
||||
if (event.event.ident == ident) {
|
||||
if ((event.event.ident == ident) && (event.event.filter == filter)) {
|
||||
event.Trigger(trigger_data);
|
||||
has_found = true;
|
||||
}
|
||||
|
|
|
@ -41,7 +41,6 @@ public:
|
|||
AddWaiter(waiter);
|
||||
|
||||
// Perform the wait.
|
||||
std::exchange(lk, std::unique_lock{waiter.mutex});
|
||||
return waiter.Wait(lk, timeout);
|
||||
}
|
||||
|
||||
|
@ -59,10 +58,9 @@ public:
|
|||
it++;
|
||||
continue;
|
||||
}
|
||||
std::scoped_lock lk2{waiter.mutex};
|
||||
it = wait_list.erase(it);
|
||||
token_count -= waiter.need_count;
|
||||
waiter.cv.notify_one();
|
||||
it = wait_list.erase(it);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -84,7 +82,6 @@ public:
|
|||
|
||||
public:
|
||||
struct WaitingThread : public ListBaseHook {
|
||||
std::mutex mutex;
|
||||
std::string name;
|
||||
std::condition_variable cv;
|
||||
u32 priority;
|
||||
|
|
|
@ -43,8 +43,8 @@ namespace Libraries {
|
|||
void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
|
||||
LOG_INFO(Lib_Kernel, "Initializing HLE libraries");
|
||||
Libraries::Kernel::LibKernel_Register(sym);
|
||||
Libraries::VideoOut::RegisterLib(sym);
|
||||
Libraries::GnmDriver::RegisterlibSceGnmDriver(sym);
|
||||
Libraries::VideoOut::RegisterLib(sym);
|
||||
if (!Config::isLleLibc()) {
|
||||
Libraries::LibC::libcSymbolsRegister(sym);
|
||||
}
|
||||
|
|
|
@ -3,14 +3,16 @@
|
|||
|
||||
#include <pthread.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/thread.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/kernel/time_management.h"
|
||||
#include "core/libraries/videoout/driver.h"
|
||||
#include "core/platform.h"
|
||||
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
|
||||
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
||||
extern std::unique_ptr<AmdGpu::Liverpool> liverpool;
|
||||
|
||||
namespace Libraries::VideoOut {
|
||||
|
||||
|
@ -41,20 +43,18 @@ VideoOutDriver::VideoOutDriver(u32 width, u32 height) {
|
|||
main_port.resolution.fullHeight = height;
|
||||
main_port.resolution.paneWidth = width;
|
||||
main_port.resolution.paneHeight = height;
|
||||
present_thread = std::jthread([&](std::stop_token token) { PresentThread(token); });
|
||||
}
|
||||
|
||||
VideoOutDriver::~VideoOutDriver() = default;
|
||||
|
||||
int VideoOutDriver::Open(const ServiceThreadParams* params) {
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
if (main_port.is_open) {
|
||||
return ORBIS_VIDEO_OUT_ERROR_RESOURCE_BUSY;
|
||||
}
|
||||
|
||||
int handle = 1;
|
||||
main_port.is_open = true;
|
||||
return handle;
|
||||
liverpool->SetVoPort(&main_port);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void VideoOutDriver::Close(s32 handle) {
|
||||
|
@ -158,31 +158,22 @@ int VideoOutDriver::UnregisterBuffers(VideoOutPort* port, s32 attributeIndex) {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
|
||||
Request req;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
submit_cond.wait_for(lock, timeout, [&] { return !requests.empty(); });
|
||||
if (requests.empty()) {
|
||||
renderer->ShowSplash();
|
||||
return;
|
||||
}
|
||||
|
||||
// Retrieve the request.
|
||||
req = requests.front();
|
||||
requests.pop();
|
||||
std::chrono::microseconds VideoOutDriver::Flip(const Request& req) {
|
||||
if (!req) {
|
||||
return std::chrono::microseconds{0};
|
||||
}
|
||||
|
||||
const auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Whatever the game is rendering show splash if it is active
|
||||
if (!renderer->ShowSplash(req.frame)) {
|
||||
// Present the frame.
|
||||
renderer->Present(req.frame);
|
||||
}
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
// Update flip status.
|
||||
auto& flip_status = req.port->flip_status;
|
||||
auto* port = req.port;
|
||||
auto& flip_status = port->flip_status;
|
||||
flip_status.count++;
|
||||
flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
|
||||
flip_status.tsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
|
@ -192,7 +183,7 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
|
|||
flip_status.flipPendingNum = static_cast<int>(requests.size());
|
||||
|
||||
// Trigger flip events for the port.
|
||||
for (auto& event : req.port->flip_events) {
|
||||
for (auto& event : port->flip_events) {
|
||||
if (event != nullptr) {
|
||||
event->TriggerEvent(SCE_VIDEO_OUT_EVENT_FLIP, Kernel::SceKernelEvent::Filter::VideoOut,
|
||||
reinterpret_cast<void*>(req.flip_arg));
|
||||
|
@ -201,21 +192,23 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
|
|||
|
||||
// Reset flip label
|
||||
if (req.index != -1) {
|
||||
req.port->buffer_labels[req.index] = 0;
|
||||
port->buffer_labels[req.index] = 0;
|
||||
port->SignalVoLabel();
|
||||
}
|
||||
|
||||
const auto end = std::chrono::high_resolution_clock::now();
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(end - start);
|
||||
}
|
||||
|
||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||
bool is_eop /*= false*/) {
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
Vulkan::Frame* frame;
|
||||
if (index == -1) {
|
||||
frame = renderer->PrepareBlankFrame();
|
||||
} else {
|
||||
const auto& buffer = port->buffer_slots[index];
|
||||
const auto& group = port->groups[buffer.group_index];
|
||||
frame = renderer->PrepareFrame(group, buffer.address_left);
|
||||
frame = renderer->PrepareFrame(group, buffer.address_left, is_eop);
|
||||
}
|
||||
|
||||
if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) {
|
||||
|
@ -223,6 +216,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
|||
return false;
|
||||
}
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
requests.push({
|
||||
.frame = frame,
|
||||
.port = port,
|
||||
|
@ -234,24 +228,53 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
|||
|
||||
port->flip_status.flipPendingNum = static_cast<int>(requests.size());
|
||||
port->flip_status.gcQueueNum = 0;
|
||||
submit_cond.notify_one();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VideoOutDriver::Vblank() {
|
||||
std::scoped_lock lock{mutex};
|
||||
void VideoOutDriver::PresentThread(std::stop_token token) {
|
||||
static constexpr std::chrono::milliseconds VblankPeriod{16};
|
||||
Common::SetCurrentThreadName("PresentThread");
|
||||
|
||||
auto& vblank_status = main_port.vblank_status;
|
||||
vblank_status.count++;
|
||||
vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
|
||||
vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
const auto receive_request = [this] -> Request {
|
||||
std::scoped_lock lk{mutex};
|
||||
if (!requests.empty()) {
|
||||
const auto request = requests.front();
|
||||
requests.pop();
|
||||
return request;
|
||||
}
|
||||
return {};
|
||||
};
|
||||
|
||||
// Trigger flip events for the port.
|
||||
for (auto& event : main_port.vblank_events) {
|
||||
if (event != nullptr) {
|
||||
event->TriggerEvent(SCE_VIDEO_OUT_EVENT_VBLANK,
|
||||
Kernel::SceKernelEvent::Filter::VideoOut, nullptr);
|
||||
auto vblank_period = VblankPeriod / Config::vblankDiv();
|
||||
auto delay = std::chrono::microseconds{0};
|
||||
while (!token.stop_requested()) {
|
||||
// Sleep for most of the vblank duration.
|
||||
std::this_thread::sleep_for(vblank_period - delay);
|
||||
|
||||
// Check if it's time to take a request.
|
||||
auto& vblank_status = main_port.vblank_status;
|
||||
if (vblank_status.count % (main_port.flip_rate + 1) == 0) {
|
||||
const auto request = receive_request();
|
||||
delay = Flip(request);
|
||||
FRAME_END;
|
||||
}
|
||||
|
||||
{
|
||||
// Needs lock here as can be concurrently read by `sceVideoOutGetVblankStatus`
|
||||
std::unique_lock lock{main_port.vo_mutex};
|
||||
vblank_status.count++;
|
||||
vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
|
||||
vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
main_port.vblank_cv.notify_all();
|
||||
}
|
||||
|
||||
// Trigger flip events for the port.
|
||||
for (auto& event : main_port.vblank_events) {
|
||||
if (event != nullptr) {
|
||||
event->TriggerEvent(SCE_VIDEO_OUT_EVENT_VBLANK,
|
||||
Kernel::SceKernelEvent::Filter::VideoOut, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,10 +3,13 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/debug.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "core/libraries/videoout/video_out.h"
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include "core/libraries/videoout/video_out.h"
|
||||
|
||||
namespace Vulkan {
|
||||
struct Frame;
|
||||
|
@ -25,6 +28,9 @@ struct VideoOutPort {
|
|||
SceVideoOutVblankStatus vblank_status;
|
||||
std::vector<Kernel::SceKernelEqueue> flip_events;
|
||||
std::vector<Kernel::SceKernelEqueue> vblank_events;
|
||||
std::mutex vo_mutex;
|
||||
std::condition_variable vo_cv;
|
||||
std::condition_variable vblank_cv;
|
||||
int flip_rate = 0;
|
||||
|
||||
s32 FindFreeGroup() const {
|
||||
|
@ -35,6 +41,22 @@ struct VideoOutPort {
|
|||
return index;
|
||||
}
|
||||
|
||||
bool IsVoLabel(const u64* address) const {
|
||||
const u64* start = &buffer_labels[0];
|
||||
const u64* end = &buffer_labels[MaxDisplayBuffers - 1];
|
||||
return address >= start && address <= end;
|
||||
}
|
||||
|
||||
void WaitVoLabel(auto&& pred) {
|
||||
std::unique_lock lk{vo_mutex};
|
||||
vo_cv.wait(lk, pred);
|
||||
}
|
||||
|
||||
void SignalVoLabel() {
|
||||
std::scoped_lock lk{vo_mutex};
|
||||
vo_cv.notify_one();
|
||||
}
|
||||
|
||||
[[nodiscard]] int NumRegisteredBuffers() const {
|
||||
return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(),
|
||||
[](auto& buffer) { return buffer.group_index != -1; });
|
||||
|
@ -63,11 +85,8 @@ public:
|
|||
const BufferAttribute* attribute);
|
||||
int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex);
|
||||
|
||||
void Flip(std::chrono::microseconds timeout);
|
||||
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
|
||||
|
||||
void Vblank();
|
||||
|
||||
private:
|
||||
struct Request {
|
||||
Vulkan::Frame* frame;
|
||||
|
@ -76,14 +95,19 @@ private:
|
|||
s64 flip_arg;
|
||||
u64 submit_tsc;
|
||||
bool eop;
|
||||
|
||||
operator bool() const noexcept {
|
||||
return frame != nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
std::chrono::microseconds Flip(const Request& req);
|
||||
void PresentThread(std::stop_token token);
|
||||
|
||||
std::mutex mutex;
|
||||
VideoOutPort main_port{};
|
||||
std::condition_variable_any submit_cond;
|
||||
std::condition_variable done_cond;
|
||||
std::jthread present_thread;
|
||||
std::queue<Request> requests;
|
||||
bool is_neo{};
|
||||
};
|
||||
|
||||
} // namespace Libraries::VideoOut
|
||||
|
|
|
@ -183,6 +183,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetVblankStatus(int handle, SceVideoOutVblankStatus*
|
|||
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
std::unique_lock lock{port->vo_mutex};
|
||||
*status = port->vblank_status;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
@ -229,14 +230,6 @@ s32 PS4_SYSV_ABI sceVideoOutUnregisterBuffers(s32 handle, s32 attributeIndex) {
|
|||
return driver->UnregisterBuffers(port, attributeIndex);
|
||||
}
|
||||
|
||||
void Flip(std::chrono::microseconds micros) {
|
||||
return driver->Flip(micros);
|
||||
}
|
||||
|
||||
void Vblank() {
|
||||
return driver->Vblank();
|
||||
}
|
||||
|
||||
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) {
|
||||
auto* port = driver->GetPort(handle);
|
||||
ASSERT(port);
|
||||
|
@ -266,6 +259,18 @@ s32 PS4_SYSV_ABI sceVideoOutGetDeviceCapabilityInfo(
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle) {
|
||||
auto* port = driver->GetPort(handle);
|
||||
if (!port) {
|
||||
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
std::unique_lock lock{port->vo_mutex};
|
||||
const auto prev_counter = port->vblank_status.count;
|
||||
port->vblank_cv.wait(lock, [&]() { return prev_counter != port->vblank_status.count; });
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
||||
driver = std::make_unique<VideoOutDriver>(Config::getScreenWidth(), Config::getScreenHeight());
|
||||
|
||||
|
@ -294,6 +299,7 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
|||
sceVideoOutGetVblankStatus);
|
||||
LIB_FUNCTION("kGVLc3htQE8", "libSceVideoOut", 1, "libSceVideoOut", 0, 0,
|
||||
sceVideoOutGetDeviceCapabilityInfo);
|
||||
LIB_FUNCTION("j6RaAUlaLv0", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutWaitVblank);
|
||||
|
||||
// openOrbis appears to have libSceVideoOut_v1 module libSceVideoOut_v1.1
|
||||
LIB_FUNCTION("Up36PTk687E", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, sceVideoOutOpen);
|
||||
|
|
|
@ -92,11 +92,12 @@ void PS4_SYSV_ABI sceVideoOutSetBufferAttribute(BufferAttribute* attribute, Pixe
|
|||
u32 tilingMode, u32 aspectRatio, u32 width,
|
||||
u32 height, u32 pitchInPixel);
|
||||
s32 PS4_SYSV_ABI sceVideoOutAddFlipEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata);
|
||||
s32 PS4_SYSV_ABI sceVideoOutAddVBlankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata);
|
||||
s32 PS4_SYSV_ABI sceVideoOutAddVblankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata);
|
||||
s32 PS4_SYSV_ABI sceVideoOutRegisterBuffers(s32 handle, s32 startIndex, void* const* addresses,
|
||||
s32 bufferNum, const BufferAttribute* attribute);
|
||||
s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate);
|
||||
s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle);
|
||||
s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle);
|
||||
s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode, s64 flipArg);
|
||||
s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status);
|
||||
s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status);
|
||||
|
@ -104,9 +105,6 @@ s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 i
|
|||
const void* param);
|
||||
s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle);
|
||||
|
||||
void Flip(std::chrono::microseconds micros);
|
||||
void Vblank();
|
||||
|
||||
// Internal system functions
|
||||
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr);
|
||||
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk);
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <common/logging/log.h>
|
||||
#include <core/file_format/playgo_chunk.h>
|
||||
#include <core/file_format/psf.h>
|
||||
#include <core/file_format/splash.h>
|
||||
|
@ -10,21 +9,30 @@
|
|||
#include <core/libraries/libc_internal/libc_internal.h>
|
||||
#include <core/libraries/rtc/rtc.h>
|
||||
#include <core/libraries/videoout/video_out.h>
|
||||
#include <fmt/core.h>
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/logging/backend.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/ntapi.h"
|
||||
#include "common/path_util.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/singleton.h"
|
||||
#include "common/version.h"
|
||||
#include "core/file_format/psf.h"
|
||||
#include "core/file_format/splash.h"
|
||||
#include "core/file_sys/fs.h"
|
||||
#include "core/libraries/disc_map/disc_map.h"
|
||||
#include "core/libraries/kernel/thread_management.h"
|
||||
#include "core/libraries/libc/libc.h"
|
||||
#include "core/libraries/libc_internal/libc_internal.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "core/libraries/rtc/rtc.h"
|
||||
#include "core/linker.h"
|
||||
#include "core/memory.h"
|
||||
#include "emulator.h"
|
||||
#include "video_core/renderdoc.h"
|
||||
|
||||
#include <fmt/core.h>
|
||||
|
||||
Frontend::WindowSDL* g_window = nullptr;
|
||||
|
||||
|
@ -52,6 +60,9 @@ Emulator::Emulator() {
|
|||
memory = Core::Memory::Instance();
|
||||
controller = Common::Singleton<Input::GameController>::Instance();
|
||||
linker = Common::Singleton<Core::Linker>::Instance();
|
||||
|
||||
// Load renderdoc module.
|
||||
VideoCore::LoadRenderDoc();
|
||||
}
|
||||
|
||||
Emulator::~Emulator() {
|
||||
|
@ -120,6 +131,12 @@ void Emulator::Run(const std::filesystem::path& file) {
|
|||
}
|
||||
mnt->Mount(mount_download_dir, "/download0");
|
||||
|
||||
const auto& mount_captures_dir = Common::FS::GetUserPath(Common::FS::PathType::CapturesDir);
|
||||
if (!std::filesystem::exists(mount_captures_dir)) {
|
||||
std::filesystem::create_directory(mount_captures_dir);
|
||||
}
|
||||
VideoCore::SetOutputDir(mount_captures_dir.generic_string(), id);
|
||||
|
||||
// Initialize kernel and library facilities.
|
||||
Libraries::Kernel::init_pthreads();
|
||||
Libraries::InitHLELibs(&linker->GetHLESymbols());
|
||||
|
@ -152,14 +169,8 @@ void Emulator::Run(const std::filesystem::path& file) {
|
|||
std::jthread mainthread =
|
||||
std::jthread([this](std::stop_token stop_token) { linker->Execute(); });
|
||||
|
||||
// Begin main window loop until the application exits
|
||||
static constexpr std::chrono::milliseconds FlipPeriod{16};
|
||||
|
||||
while (window->isOpen()) {
|
||||
window->waitEvent();
|
||||
Libraries::VideoOut::Flip(FlipPeriod);
|
||||
Libraries::VideoOut::Vblank();
|
||||
FRAME_END;
|
||||
}
|
||||
|
||||
std::exit(0);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "core/libraries/pad/pad.h"
|
||||
#include "input/controller.h"
|
||||
#include "sdl_window.h"
|
||||
#include "video_core/renderdoc.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <SDL3/SDL_metal.h>
|
||||
|
@ -72,7 +73,7 @@ void WindowSDL::waitEvent() {
|
|||
// Called on main thread
|
||||
SDL_Event event;
|
||||
|
||||
if (!SDL_PollEvent(&event)) {
|
||||
if (!SDL_WaitEvent(&event)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -180,6 +181,11 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
|
|||
ax = Input::GetAxis(-0x80, 0x80, axisvalue);
|
||||
break;
|
||||
case SDLK_S:
|
||||
if (event->key.mod == SDL_KMOD_LCTRL) {
|
||||
// Trigger rdoc capture
|
||||
VideoCore::TriggerCapture();
|
||||
break;
|
||||
}
|
||||
axis = Input::Axis::LeftY;
|
||||
if (event->type == SDL_EVENT_KEY_DOWN) {
|
||||
axisvalue += 127;
|
||||
|
|
|
@ -258,6 +258,7 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b);
|
|||
Id EmitSMulExt(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitUMulExt(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitIMul32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitIMul64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSDiv32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitUDiv32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitINeg32(EmitContext& ctx, Id value);
|
||||
|
@ -271,6 +272,7 @@ Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
|
|||
Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
|
||||
Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count);
|
||||
Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
|
||||
|
@ -286,8 +288,10 @@ Id EmitSMax32(EmitContext& ctx, Id a, Id b);
|
|||
Id EmitUMax32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
|
||||
Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
|
||||
Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
|
||||
|
|
|
@ -84,6 +84,10 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
|
|||
return ctx.OpIMul(ctx.U32[1], a, b);
|
||||
}
|
||||
|
||||
Id EmitIMul64(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpIMul(ctx.U64, a, b);
|
||||
}
|
||||
|
||||
Id EmitSDiv32(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpSDiv(ctx.U32[1], a, b);
|
||||
}
|
||||
|
@ -142,6 +146,13 @@ Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
|||
return result;
|
||||
}
|
||||
|
||||
Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
const Id result{ctx.OpBitwiseOr(ctx.U64, a, b)};
|
||||
SetZeroFlag(ctx, inst, result);
|
||||
SetSignFlag(ctx, inst, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)};
|
||||
SetZeroFlag(ctx, inst, result);
|
||||
|
@ -231,11 +242,19 @@ Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
|
|||
return result;
|
||||
}
|
||||
|
||||
Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
|
||||
Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
|
||||
return ctx.OpSLessThan(ctx.U1[1], lhs, rhs);
|
||||
}
|
||||
|
||||
Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) {
|
||||
Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
|
||||
return ctx.OpSLessThan(ctx.U1[1], lhs, rhs);
|
||||
}
|
||||
|
||||
Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs) {
|
||||
return ctx.OpULessThan(ctx.U1[1], lhs, rhs);
|
||||
}
|
||||
|
||||
Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs) {
|
||||
return ctx.OpULessThan(ctx.U1[1], lhs, rhs);
|
||||
}
|
||||
|
||||
|
|
|
@ -2392,10 +2392,10 @@ enum class OperandField : u32 {
|
|||
ConstFloatPos_4_0,
|
||||
ConstFloatNeg_4_0,
|
||||
VccZ = 251,
|
||||
ExecZ,
|
||||
Scc,
|
||||
LdsDirect,
|
||||
LiteralConst,
|
||||
ExecZ = 252,
|
||||
Scc = 253,
|
||||
LdsDirect = 254,
|
||||
LiteralConst = 255,
|
||||
VectorGPR,
|
||||
|
||||
Undefined = 0xFFFFFFFF,
|
||||
|
|
|
@ -76,21 +76,21 @@ void Translator::EmitPrologue() {
|
|||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||
// Input modifiers work on float values.
|
||||
force_flt |= operand.input_modifier.abs | operand.input_modifier.neg;
|
||||
|
||||
IR::U32F32 value{};
|
||||
|
||||
const bool is_float = operand.type == ScalarType::Float32 || force_flt;
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
if (operand.type == ScalarType::Float32 || force_flt) {
|
||||
if (is_float) {
|
||||
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||
} else {
|
||||
value = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
||||
}
|
||||
break;
|
||||
case OperandField::VectorGPR:
|
||||
if (operand.type == ScalarType::Float32 || force_flt) {
|
||||
if (is_float) {
|
||||
value = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
||||
} else {
|
||||
value = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
||||
|
@ -164,15 +164,160 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if (operand.input_modifier.abs) {
|
||||
value = ir.FPAbs(value);
|
||||
}
|
||||
if (operand.input_modifier.neg) {
|
||||
value = ir.FPNeg(value);
|
||||
if (is_float) {
|
||||
if (operand.input_modifier.abs) {
|
||||
value = ir.FPAbs(value);
|
||||
}
|
||||
if (operand.input_modifier.neg) {
|
||||
value = ir.FPNeg(value);
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::U32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||
return GetSrc<IR::U32F32>(operand, force_flt);
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::F32 Translator::GetSrc(const InstOperand& operand, bool) {
|
||||
return GetSrc<IR::U32F32>(operand, true);
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::U64F64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
|
||||
IR::Value value_hi{};
|
||||
IR::Value value_lo{};
|
||||
|
||||
bool immediate = false;
|
||||
const bool is_float = operand.type == ScalarType::Float64 || force_flt;
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
if (is_float) {
|
||||
value_lo = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||
value_hi = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code + 1));
|
||||
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
|
||||
value_lo = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
||||
value_hi = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code + 1));
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
case OperandField::VectorGPR:
|
||||
if (is_float) {
|
||||
value_lo = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
||||
value_hi = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code + 1));
|
||||
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
|
||||
value_lo = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
||||
value_hi = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code + 1));
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstZero:
|
||||
immediate = true;
|
||||
if (force_flt) {
|
||||
value_lo = ir.Imm64(0.0);
|
||||
} else {
|
||||
value_lo = ir.Imm64(u64(0U));
|
||||
}
|
||||
break;
|
||||
case OperandField::SignedConstIntPos:
|
||||
ASSERT(!force_flt);
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(s64(operand.code) - SignedConstIntPosMin + 1);
|
||||
break;
|
||||
case OperandField::SignedConstIntNeg:
|
||||
ASSERT(!force_flt);
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-s64(operand.code) + SignedConstIntNegMin - 1);
|
||||
break;
|
||||
case OperandField::LiteralConst:
|
||||
immediate = true;
|
||||
if (force_flt) {
|
||||
UNREACHABLE(); // There is a literal double?
|
||||
} else {
|
||||
value_lo = ir.Imm64(u64(operand.code));
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstFloatPos_1_0:
|
||||
immediate = true;
|
||||
if (force_flt) {
|
||||
value_lo = ir.Imm64(1.0);
|
||||
} else {
|
||||
value_lo = ir.Imm64(std::bit_cast<u64>(f64(1.0)));
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstFloatPos_0_5:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(0.5);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_2_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(2.0);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_4_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(4.0);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_0_5:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-0.5);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_1_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-1.0);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_2_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-2.0);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_4_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-4.0);
|
||||
break;
|
||||
case OperandField::VccLo: {
|
||||
value_lo = ir.GetVccLo();
|
||||
value_hi = ir.GetVccHi();
|
||||
} break;
|
||||
case OperandField::VccHi:
|
||||
UNREACHABLE();
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
IR::Value value;
|
||||
|
||||
if (immediate) {
|
||||
value = value_lo;
|
||||
} else if (is_float) {
|
||||
throw NotImplementedException("required OpPackDouble2x32 implementation");
|
||||
} else {
|
||||
IR::Value packed = ir.CompositeConstruct(value_lo, value_hi);
|
||||
value = ir.PackUint2x32(packed);
|
||||
}
|
||||
|
||||
if (is_float) {
|
||||
if (operand.input_modifier.abs) {
|
||||
value = ir.FPAbs(IR::F32F64(value));
|
||||
}
|
||||
if (operand.input_modifier.neg) {
|
||||
value = ir.FPNeg(IR::F32F64(value));
|
||||
}
|
||||
}
|
||||
return IR::U64F64(value);
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::U64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
|
||||
return GetSrc64<IR::U64F64>(operand, force_flt);
|
||||
}
|
||||
template <>
|
||||
IR::F64 Translator::GetSrc64(const InstOperand& operand, bool) {
|
||||
return GetSrc64<IR::U64F64>(operand, true);
|
||||
}
|
||||
|
||||
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
||||
IR::U32F32 result = value;
|
||||
if (operand.output_modifier.multiplier != 0.f) {
|
||||
|
@ -197,6 +342,43 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_raw) {
|
||||
IR::U64F64 value_untyped = value_raw;
|
||||
|
||||
const bool is_float = value_raw.Type() == IR::Type::F64 || value_raw.Type() == IR::Type::F32;
|
||||
if (is_float) {
|
||||
if (operand.output_modifier.multiplier != 0.f) {
|
||||
value_untyped =
|
||||
ir.FPMul(value_untyped, ir.Imm64(f64(operand.output_modifier.multiplier)));
|
||||
}
|
||||
if (operand.output_modifier.clamp) {
|
||||
value_untyped = ir.FPSaturate(value_raw);
|
||||
}
|
||||
}
|
||||
const IR::U64 value =
|
||||
is_float ? ir.BitCast<IR::U64>(IR::F64{value_untyped}) : IR::U64{value_untyped};
|
||||
|
||||
const IR::Value unpacked{ir.UnpackUint2x32(value)};
|
||||
const IR::U32 lo{ir.CompositeExtract(unpacked, 0U)};
|
||||
const IR::U32 hi{ir.CompositeExtract(unpacked, 1U)};
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetScalarReg(IR::ScalarReg(operand.code + 1), hi);
|
||||
return ir.SetScalarReg(IR::ScalarReg(operand.code), lo);
|
||||
case OperandField::VectorGPR:
|
||||
ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi);
|
||||
return ir.SetVectorReg(IR::VectorReg(operand.code), lo);
|
||||
case OperandField::VccLo:
|
||||
UNREACHABLE();
|
||||
case OperandField::VccHi:
|
||||
UNREACHABLE();
|
||||
case OperandField::M0:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::EmitFetch(const GcnInst& inst) {
|
||||
// Read the pointer to the fetch shader assembly.
|
||||
const u32 sgpr_base = inst.src[0].code;
|
||||
|
@ -320,6 +502,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
|
|||
case Opcode::V_ADD_I32:
|
||||
translator.V_ADD_I32(inst);
|
||||
break;
|
||||
case Opcode::V_ADDC_U32:
|
||||
translator.V_ADDC_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_I32:
|
||||
translator.V_CVT_F32_I32(inst);
|
||||
break;
|
||||
|
@ -470,6 +655,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
|
|||
case Opcode::IMAGE_LOAD:
|
||||
translator.IMAGE_LOAD(false, inst);
|
||||
break;
|
||||
case Opcode::V_MAD_U64_U32:
|
||||
translator.V_MAD_U64_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_I32:
|
||||
translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
|
||||
break;
|
||||
|
|
|
@ -100,6 +100,7 @@ public:
|
|||
void V_AND_B32(const GcnInst& inst);
|
||||
void V_LSHLREV_B32(const GcnInst& inst);
|
||||
void V_ADD_I32(const GcnInst& inst);
|
||||
void V_ADDC_U32(const GcnInst& inst);
|
||||
void V_CVT_F32_I32(const GcnInst& inst);
|
||||
void V_CVT_F32_U32(const GcnInst& inst);
|
||||
void V_MAD_F32(const GcnInst& inst);
|
||||
|
@ -129,6 +130,7 @@ public:
|
|||
void V_CVT_U32_F32(const GcnInst& inst);
|
||||
void V_SUBREV_F32(const GcnInst& inst);
|
||||
void V_SUBREV_I32(const GcnInst& inst);
|
||||
void V_MAD_U64_U32(const GcnInst& inst);
|
||||
void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
|
||||
void V_LSHRREV_B32(const GcnInst& inst);
|
||||
void V_MUL_HI_U32(bool is_signed, const GcnInst& inst);
|
||||
|
@ -186,8 +188,12 @@ public:
|
|||
void EXP(const GcnInst& inst);
|
||||
|
||||
private:
|
||||
IR::U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false);
|
||||
template <typename T = IR::U32F32>
|
||||
[[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false);
|
||||
template <typename T = IR::U64F64>
|
||||
[[nodiscard]] T GetSrc64(const InstOperand& operand, bool flt_zero = false);
|
||||
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
|
||||
|
||||
private:
|
||||
IR::IREmitter ir;
|
||||
|
|
|
@ -67,7 +67,8 @@ void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) {
|
|||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, is_xor ? ir.BitwiseXor(src0, src1) : ir.BitwiseOr(src0, src1));
|
||||
ir.SetVectorReg(dst_reg,
|
||||
is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1)));
|
||||
}
|
||||
|
||||
void Translator::V_AND_B32(const GcnInst& inst) {
|
||||
|
@ -92,6 +93,30 @@ void Translator::V_ADD_I32(const GcnInst& inst) {
|
|||
// TODO: Carry
|
||||
}
|
||||
|
||||
void Translator::V_ADDC_U32(const GcnInst& inst) {
|
||||
|
||||
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
|
||||
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
|
||||
|
||||
IR::U32 scarry;
|
||||
if (inst.src_count == 3) { // VOP3
|
||||
IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))};
|
||||
scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))};
|
||||
} else { // VOP2
|
||||
scarry = ir.GetVccLo();
|
||||
}
|
||||
|
||||
const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry);
|
||||
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, result);
|
||||
|
||||
const IR::U1 less_src0 = ir.ILessThan(result, src0, false);
|
||||
const IR::U1 less_src1 = ir.ILessThan(result, src1, false);
|
||||
const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1);
|
||||
ir.SetVcc(did_overflow);
|
||||
}
|
||||
|
||||
void Translator::V_CVT_F32_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
@ -294,6 +319,23 @@ void Translator::V_SUBREV_I32(const GcnInst& inst) {
|
|||
// TODO: Carry-out
|
||||
}
|
||||
|
||||
void Translator::V_MAD_U64_U32(const GcnInst& inst) {
|
||||
|
||||
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
|
||||
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
|
||||
const auto src2 = GetSrc64<IR::U64>(inst.src[2]);
|
||||
|
||||
const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
|
||||
const IR::U64 sum_result = ir.IAdd(mul_result, src2);
|
||||
|
||||
SetDst64(inst.dst[0], sum_result);
|
||||
|
||||
const IR::U1 less_src0 = ir.ILessThan(sum_result, mul_result, false);
|
||||
const IR::U1 less_src1 = ir.ILessThan(sum_result, src2, false);
|
||||
const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1);
|
||||
ir.SetVcc(did_overflow);
|
||||
}
|
||||
|
||||
void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
|
|
|
@ -964,8 +964,18 @@ IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) {
|
|||
return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b);
|
||||
}
|
||||
|
||||
U32 IREmitter::IMul(const U32& a, const U32& b) {
|
||||
return Inst<U32>(Opcode::IMul32, a, b);
|
||||
U32U64 IREmitter::IMul(const U32U64& a, const U32U64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::IMul32, a, b);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::IMul64, a, b);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) {
|
||||
|
@ -1024,8 +1034,18 @@ U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
|
|||
return Inst<U32>(Opcode::BitwiseAnd32, a, b);
|
||||
}
|
||||
|
||||
U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
|
||||
return Inst<U32>(Opcode::BitwiseOr32, a, b);
|
||||
U32U64 IREmitter::BitwiseOr(const U32U64& a, const U32U64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::BitwiseOr32, a, b);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::BitwiseOr64, a, b);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
|
||||
|
@ -1095,8 +1115,18 @@ U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) {
|
|||
return Inst<U32>(Opcode::UClamp32, value, min, max);
|
||||
}
|
||||
|
||||
U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
|
||||
return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
|
||||
U1 IREmitter::ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) {
|
||||
if (lhs.Type() != rhs.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
|
||||
}
|
||||
switch (lhs.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U1>(is_signed ? Opcode::SLessThan32 : Opcode::ULessThan32, lhs, rhs);
|
||||
case Type::U64:
|
||||
return Inst<U1>(is_signed ? Opcode::SLessThan64 : Opcode::ULessThan64, lhs, rhs);
|
||||
default:
|
||||
ThrowInvalidType(lhs.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
|
||||
|
@ -1155,8 +1185,9 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F32F64& value) {
|
|||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid destination bitsize {}", bitsize);
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Invalid destination bitsize {}", bitsize);
|
||||
}
|
||||
|
||||
U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) {
|
||||
|
@ -1183,13 +1214,17 @@ F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Val
|
|||
switch (src_bitsize) {
|
||||
case 32:
|
||||
return Inst<F32>(Opcode::ConvertF32S32, value);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 64:
|
||||
switch (src_bitsize) {
|
||||
case 32:
|
||||
return Inst<F64>(Opcode::ConvertF64S32, value);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
|
||||
|
@ -1203,13 +1238,17 @@ F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Val
|
|||
return Inst<F32>(Opcode::ConvertF32U16, value);
|
||||
case 32:
|
||||
return Inst<F32>(Opcode::ConvertF32U32, value);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 64:
|
||||
switch (src_bitsize) {
|
||||
case 32:
|
||||
return Inst<F64>(Opcode::ConvertF64U32, value);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
|
||||
|
@ -1227,7 +1266,11 @@ U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
|
|||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U16>(Opcode::ConvertU16U32, value);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
||||
}
|
||||
|
@ -1238,13 +1281,17 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
|
|||
switch (value.Type()) {
|
||||
case Type::F32:
|
||||
return Inst<F16>(Opcode::ConvertF16F32, value);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
switch (value.Type()) {
|
||||
case Type::F16:
|
||||
return Inst<F32>(Opcode::ConvertF32F16, value);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
||||
|
|
|
@ -159,7 +159,7 @@ public:
|
|||
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32U64 INeg(const U32U64& value);
|
||||
[[nodiscard]] U32 IAbs(const U32& value);
|
||||
|
@ -167,7 +167,7 @@ public:
|
|||
[[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
|
||||
[[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
|
||||
[[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32U64 BitwiseOr(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
|
||||
const U32& count);
|
||||
|
@ -188,7 +188,7 @@ public:
|
|||
[[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
|
||||
[[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
|
||||
|
||||
[[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
|
||||
[[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed);
|
||||
[[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
|
||||
[[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
|
||||
[[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
|
||||
|
|
|
@ -227,6 +227,7 @@ OPCODE(IAddCary32, U32x2, U32,
|
|||
OPCODE(ISub32, U32, U32, U32, )
|
||||
OPCODE(ISub64, U64, U64, U64, )
|
||||
OPCODE(IMul32, U32, U32, U32, )
|
||||
OPCODE(IMul64, U64, U64, U64, )
|
||||
OPCODE(SMulExt, U32x2, U32, U32, )
|
||||
OPCODE(UMulExt, U32x2, U32, U32, )
|
||||
OPCODE(SDiv32, U32, U32, U32, )
|
||||
|
@ -242,6 +243,7 @@ OPCODE(ShiftRightArithmetic32, U32, U32,
|
|||
OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
|
||||
OPCODE(BitwiseAnd32, U32, U32, U32, )
|
||||
OPCODE(BitwiseOr32, U32, U32, U32, )
|
||||
OPCODE(BitwiseOr64, U64, U64, U64, )
|
||||
OPCODE(BitwiseXor32, U32, U32, U32, )
|
||||
OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
|
||||
OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
|
||||
|
@ -258,8 +260,10 @@ OPCODE(SMax32, U32, U32,
|
|||
OPCODE(UMax32, U32, U32, U32, )
|
||||
OPCODE(SClamp32, U32, U32, U32, U32, )
|
||||
OPCODE(UClamp32, U32, U32, U32, U32, )
|
||||
OPCODE(SLessThan, U1, U32, U32, )
|
||||
OPCODE(ULessThan, U1, U32, U32, )
|
||||
OPCODE(SLessThan32, U1, U32, U32, )
|
||||
OPCODE(SLessThan64, U1, U64, U64, )
|
||||
OPCODE(ULessThan32, U1, U32, U32, )
|
||||
OPCODE(ULessThan64, U1, U64, U64, )
|
||||
OPCODE(IEqual, U1, U32, U32, )
|
||||
OPCODE(SLessThanEqual, U1, U32, U32, )
|
||||
OPCODE(ULessThanEqual, U1, U32, U32, )
|
||||
|
|
|
@ -21,6 +21,8 @@ template <typename T>
|
|||
return value.F32();
|
||||
} else if constexpr (std::is_same_v<T, u64>) {
|
||||
return value.U64();
|
||||
} else if constexpr (std::is_same_v<T, s64>) {
|
||||
return static_cast<s64>(value.U64());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -281,12 +283,18 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
return FoldLogicalOr(inst);
|
||||
case IR::Opcode::LogicalNot:
|
||||
return FoldLogicalNot(inst);
|
||||
case IR::Opcode::SLessThan:
|
||||
case IR::Opcode::SLessThan32:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
|
||||
return;
|
||||
case IR::Opcode::ULessThan:
|
||||
case IR::Opcode::SLessThan64:
|
||||
FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a < b; });
|
||||
return;
|
||||
case IR::Opcode::ULessThan32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
|
||||
return;
|
||||
case IR::Opcode::ULessThan64:
|
||||
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; });
|
||||
return;
|
||||
case IR::Opcode::SLessThanEqual:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
|
||||
return;
|
||||
|
|
|
@ -348,13 +348,15 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::GetThreadBitScalarReg:
|
||||
case IR::Opcode::GetScalarRegister: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
inst.ReplaceUsesWith(
|
||||
pass.ReadVariable(reg, block, opcode == IR::Opcode::GetThreadBitScalarReg));
|
||||
const bool thread_bit = opcode == IR::Opcode::GetThreadBitScalarReg;
|
||||
const IR::Value value = pass.ReadVariable(reg, block, thread_bit);
|
||||
inst.ReplaceUsesWith(value);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetVectorRegister: {
|
||||
const IR::VectorReg reg{inst.Arg(0).VectorReg()};
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
|
||||
const IR::Value value = pass.ReadVariable(reg, block);
|
||||
inst.ReplaceUsesWith(value);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetGotoVariable:
|
||||
|
|
|
@ -220,6 +220,7 @@ using F16 = TypedValue<Type::F16>;
|
|||
using F32 = TypedValue<Type::F32>;
|
||||
using F64 = TypedValue<Type::F64>;
|
||||
using U32F32 = TypedValue<Type::U32 | Type::F32>;
|
||||
using U64F64 = TypedValue<Type::U64 | Type::F64>;
|
||||
using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
||||
using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
|
||||
using F32F64 = TypedValue<Type::F32 | Type::F64>;
|
||||
|
|
|
@ -5,8 +5,10 @@
|
|||
#include "common/debug.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/thread.h"
|
||||
#include "core/libraries/videoout/driver.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pm4_cmds.h"
|
||||
#include "video_core/renderdoc.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
@ -32,12 +34,15 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||
while (!stoken.stop_requested()) {
|
||||
{
|
||||
std::unique_lock lk{submit_mutex};
|
||||
Common::CondvarWait(submit_cv, lk, stoken, [this] { return num_submits != 0; });
|
||||
Common::CondvarWait(submit_cv, lk, stoken,
|
||||
[this] { return num_submits != 0 || submit_done; });
|
||||
}
|
||||
if (stoken.stop_requested()) {
|
||||
break;
|
||||
}
|
||||
|
||||
VideoCore::StartCapture();
|
||||
|
||||
int qid = -1;
|
||||
|
||||
while (num_submits) {
|
||||
|
@ -48,11 +53,9 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||
Task::Handle task{};
|
||||
{
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
|
||||
if (queue.submits.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
task = queue.submits.front();
|
||||
}
|
||||
task.resume();
|
||||
|
@ -64,9 +67,20 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||
queue.submits.pop();
|
||||
|
||||
--num_submits;
|
||||
std::scoped_lock lock2{submit_mutex};
|
||||
submit_cv.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
if (submit_done) {
|
||||
VideoCore::EndCapture();
|
||||
|
||||
if (rasterizer) {
|
||||
rasterizer->Flush();
|
||||
}
|
||||
submit_done = false;
|
||||
}
|
||||
|
||||
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle);
|
||||
}
|
||||
}
|
||||
|
@ -365,8 +379,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
|
||||
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
||||
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
||||
u64* address = write_data->Address<u64*>();
|
||||
if (!write_data->wr_one_addr.Value()) {
|
||||
std::memcpy(write_data->Address<void*>(), write_data->data, data_size);
|
||||
std::memcpy(address, write_data->data, data_size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -379,6 +394,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
case PM4ItOpcode::WaitRegMem: {
|
||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
// Optimization: VO label waits are special because the emulator
|
||||
// will write to the label when presentation is finished. So if
|
||||
// there are no other submits to yield to we can sleep the thread
|
||||
// instead and allow other tasks to run.
|
||||
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
||||
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
|
||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||
}
|
||||
while (!wait_reg_mem->Test()) {
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
|
@ -511,7 +534,7 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
|||
|
||||
auto task = ProcessGraphics(dcb, ccb);
|
||||
{
|
||||
std::unique_lock lock{queue.m_access};
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
queue.submits.emplace(task.handle);
|
||||
}
|
||||
|
||||
|
@ -526,7 +549,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
|||
|
||||
const auto& task = ProcessCompute(acb, vqid);
|
||||
{
|
||||
std::unique_lock lock{queue.m_access};
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
queue.submits.emplace(task.handle);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <array>
|
||||
#include <condition_variable>
|
||||
#include <coroutine>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <thread>
|
||||
|
@ -21,6 +22,10 @@ namespace Vulkan {
|
|||
class Rasterizer;
|
||||
}
|
||||
|
||||
namespace Libraries::VideoOut {
|
||||
struct VideoOutPort;
|
||||
}
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
|
||||
|
@ -995,10 +1000,25 @@ public:
|
|||
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||
void SubmitAsc(u32 vqid, std::span<const u32> acb);
|
||||
|
||||
void SubmitDone() noexcept {
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
submit_done = true;
|
||||
submit_cv.notify_one();
|
||||
}
|
||||
|
||||
void WaitGpuIdle() noexcept {
|
||||
std::unique_lock lk{submit_mutex};
|
||||
submit_cv.wait(lk, [this] { return num_submits == 0; });
|
||||
}
|
||||
|
||||
bool IsGpuIdle() const {
|
||||
return num_submits == 0;
|
||||
}
|
||||
|
||||
void SetVoPort(Libraries::VideoOut::VideoOutPort* port) {
|
||||
vo_port = port;
|
||||
}
|
||||
|
||||
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
@ -1063,8 +1083,10 @@ private:
|
|||
} cblock{};
|
||||
|
||||
Vulkan::Rasterizer* rasterizer{};
|
||||
Libraries::VideoOut::VideoOutPort* vo_port{};
|
||||
std::jthread process_thread{};
|
||||
std::atomic<u32> num_submits{};
|
||||
std::atomic<bool> submit_done{};
|
||||
std::mutex submit_mutex;
|
||||
std::condition_variable_any submit_cv;
|
||||
};
|
||||
|
|
|
@ -404,8 +404,9 @@ struct PM4CmdWaitRegMem {
|
|||
u32 mask;
|
||||
u32 poll_interval;
|
||||
|
||||
u32* Address() const {
|
||||
return reinterpret_cast<u32*>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
template <typename T = u32*>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
}
|
||||
|
||||
bool Test() const {
|
||||
|
@ -464,8 +465,8 @@ struct PM4CmdWriteData {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>(addr64);
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>(addr64);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -494,8 +495,9 @@ struct PM4CmdEventWriteEos {
|
|||
BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS
|
||||
};
|
||||
|
||||
u32* Address() const {
|
||||
return reinterpret_cast<u32*>(address_lo | u64(address_hi) << 32);
|
||||
template <typename T = u32*>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>(address_lo | u64(address_hi) << 32);
|
||||
}
|
||||
|
||||
u32 DataDWord() const {
|
||||
|
|
120
src/video_core/renderdoc.cpp
Normal file
120
src/video_core/renderdoc.cpp
Normal file
|
@ -0,0 +1,120 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "video_core/renderdoc.h"
|
||||
|
||||
#include <renderdoc_app.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class CaptureState {
|
||||
Idle,
|
||||
Triggered,
|
||||
InProgress,
|
||||
};
|
||||
static CaptureState capture_state{CaptureState::Idle};
|
||||
|
||||
RENDERDOC_API_1_6_0* rdoc_api{};
|
||||
|
||||
void LoadRenderDoc() {
|
||||
#ifdef WIN32
|
||||
|
||||
// Check if we are running by RDoc GUI
|
||||
HMODULE mod = GetModuleHandleA("renderdoc.dll");
|
||||
if (!mod && Config::isRdocEnabled()) {
|
||||
// If enabled in config, try to load RDoc runtime in offline mode
|
||||
HKEY h_reg_key;
|
||||
LONG result = RegOpenKeyExW(HKEY_LOCAL_MACHINE,
|
||||
L"SOFTWARE\\Classes\\RenderDoc.RDCCapture.1\\DefaultIcon\\", 0,
|
||||
KEY_READ, &h_reg_key);
|
||||
if (result != ERROR_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
std::array<wchar_t, MAX_PATH> key_str{};
|
||||
DWORD str_sz_out{key_str.size()};
|
||||
result = RegQueryValueExW(h_reg_key, L"", 0, NULL, (LPBYTE)key_str.data(), &str_sz_out);
|
||||
if (result != ERROR_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::filesystem::path path{key_str.cbegin(), key_str.cend()};
|
||||
path = path.parent_path().append("renderdoc.dll");
|
||||
const auto path_to_lib = path.generic_string();
|
||||
mod = LoadLibraryA(path_to_lib.c_str());
|
||||
}
|
||||
|
||||
if (mod) {
|
||||
const auto RENDERDOC_GetAPI =
|
||||
reinterpret_cast<pRENDERDOC_GetAPI>(GetProcAddress(mod, "RENDERDOC_GetAPI"));
|
||||
const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api);
|
||||
ASSERT(ret == 1);
|
||||
}
|
||||
#else
|
||||
#ifdef ANDROID
|
||||
static constexpr const char RENDERDOC_LIB[] = "libVkLayer_GLES_RenderDoc.so";
|
||||
#else
|
||||
static constexpr const char RENDERDOC_LIB[] = "librenderdoc.so";
|
||||
#endif
|
||||
if (void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD)) {
|
||||
const auto RENDERDOC_GetAPI =
|
||||
reinterpret_cast<pRENDERDOC_GetAPI>(dlsym(mod, "RENDERDOC_GetAPI"));
|
||||
const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api);
|
||||
ASSERT(ret == 1);
|
||||
}
|
||||
#endif
|
||||
if (rdoc_api) {
|
||||
// Disable default capture keys as they suppose to trigger present-to-present capturing
|
||||
// and it is not what we want
|
||||
rdoc_api->SetCaptureKeys(nullptr, 0);
|
||||
|
||||
// Also remove rdoc crash handler
|
||||
rdoc_api->UnloadCrashHandler();
|
||||
}
|
||||
}
|
||||
|
||||
void StartCapture() {
|
||||
if (!rdoc_api) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (capture_state == CaptureState::Triggered) {
|
||||
rdoc_api->StartFrameCapture(nullptr, nullptr);
|
||||
capture_state = CaptureState::InProgress;
|
||||
}
|
||||
}
|
||||
|
||||
void EndCapture() {
|
||||
if (!rdoc_api) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (capture_state == CaptureState::InProgress) {
|
||||
rdoc_api->EndFrameCapture(nullptr, nullptr);
|
||||
capture_state = CaptureState::Idle;
|
||||
}
|
||||
}
|
||||
|
||||
void TriggerCapture() {
|
||||
if (capture_state == CaptureState::Idle) {
|
||||
capture_state = CaptureState::Triggered;
|
||||
}
|
||||
}
|
||||
|
||||
void SetOutputDir(const std::string& path, const std::string& prefix) {
|
||||
if (!rdoc_api) {
|
||||
return;
|
||||
}
|
||||
rdoc_api->SetCaptureFilePathTemplate((path + '\\' + prefix).c_str());
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
25
src/video_core/renderdoc.h
Normal file
25
src/video_core/renderdoc.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
/// Loads renderdoc dynamic library module.
|
||||
void LoadRenderDoc();
|
||||
|
||||
/// Begins a capture if a renderdoc instance is attached.
|
||||
void StartCapture();
|
||||
|
||||
/// Ends current renderdoc capture.
|
||||
void EndCapture();
|
||||
|
||||
/// Triggers capturing process.
|
||||
void TriggerCapture();
|
||||
|
||||
/// Sets output directory for captures
|
||||
void SetOutputDir(const std::string& path, const std::string& prefix);
|
||||
|
||||
} // namespace VideoCore
|
|
@ -63,44 +63,30 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format for
|
|||
};
|
||||
}
|
||||
|
||||
RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool)
|
||||
: window{window_}, instance{window, Config::getGpuId(), Config::vkValidationEnabled()},
|
||||
scheduler{instance}, swapchain{instance, window}, texture_cache{instance, scheduler} {
|
||||
rasterizer = std::make_unique<Rasterizer>(instance, scheduler, texture_cache, liverpool);
|
||||
RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool_)
|
||||
: window{window_}, liverpool{liverpool_},
|
||||
instance{window, Config::getGpuId(), Config::vkValidationEnabled()}, draw_scheduler{instance},
|
||||
present_scheduler{instance}, flip_scheduler{instance}, swapchain{instance, window},
|
||||
texture_cache{instance, draw_scheduler} {
|
||||
rasterizer = std::make_unique<Rasterizer>(instance, draw_scheduler, texture_cache, liverpool);
|
||||
const u32 num_images = swapchain.GetImageCount();
|
||||
const vk::Device device = instance.GetDevice();
|
||||
|
||||
const vk::CommandPoolCreateInfo pool_info = {
|
||||
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer |
|
||||
vk::CommandPoolCreateFlagBits::eTransient,
|
||||
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(),
|
||||
};
|
||||
command_pool = device.createCommandPoolUnique(pool_info);
|
||||
|
||||
const vk::CommandBufferAllocateInfo alloc_info = {
|
||||
.commandPool = *command_pool,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
.commandBufferCount = num_images,
|
||||
};
|
||||
|
||||
const auto cmdbuffers = device.allocateCommandBuffers(alloc_info);
|
||||
// Create presentation frames.
|
||||
present_frames.resize(num_images);
|
||||
for (u32 i = 0; i < num_images; i++) {
|
||||
Frame& frame = present_frames[i];
|
||||
frame.cmdbuf = cmdbuffers[i];
|
||||
frame.render_ready = device.createSemaphore({});
|
||||
frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled});
|
||||
free_queue.push(&frame);
|
||||
}
|
||||
}
|
||||
|
||||
RendererVulkan::~RendererVulkan() {
|
||||
scheduler.Finish();
|
||||
draw_scheduler.Finish();
|
||||
const vk::Device device = instance.GetDevice();
|
||||
for (auto& frame : present_frames) {
|
||||
vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation);
|
||||
device.destroyImageView(frame.image_view);
|
||||
device.destroySemaphore(frame.render_ready);
|
||||
device.destroyFence(frame.present_done);
|
||||
}
|
||||
}
|
||||
|
@ -184,7 +170,7 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
|
|||
info.pitch = splash->GetImageInfo().width;
|
||||
info.guest_address = VAddr(splash->GetImageData().data());
|
||||
info.guest_size_bytes = splash->GetImageData().size();
|
||||
splash_img.emplace(instance, scheduler, info);
|
||||
splash_img.emplace(instance, present_scheduler, info);
|
||||
texture_cache.RefreshImage(*splash_img);
|
||||
}
|
||||
frame = PrepareFrameInternal(*splash_img);
|
||||
|
@ -193,12 +179,18 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
|
|||
return true;
|
||||
}
|
||||
|
||||
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
|
||||
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop) {
|
||||
// Request a free presentation frame.
|
||||
Frame* frame = GetRenderFrame();
|
||||
|
||||
// Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image.
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
|
||||
// EOP flips are triggered from GPU thread so use the drawing scheduler to record
|
||||
// commands. Otherwise we are dealing with a CPU flip which could have arrived
|
||||
// from any guest thread. Use a separate scheduler for that.
|
||||
auto& scheduler = is_eop ? draw_scheduler : flip_scheduler;
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf);
|
||||
|
||||
const std::array pre_barrier{
|
||||
vk::ImageMemoryBarrier{
|
||||
|
@ -218,12 +210,11 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
|
|||
},
|
||||
},
|
||||
};
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
||||
{}, {}, pre_barrier);
|
||||
|
||||
// Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image.
|
||||
cmdbuf.blitImage(
|
||||
image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal,
|
||||
MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height),
|
||||
|
@ -245,13 +236,15 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
|
|||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
|
||||
|
||||
// Flush pending vulkan operations.
|
||||
scheduler.Flush(frame->render_ready);
|
||||
// Flush frame creation commands.
|
||||
frame->ready_semaphore = scheduler.GetMasterSemaphore()->Handle();
|
||||
frame->ready_tick = scheduler.CurrentTick();
|
||||
SubmitInfo info{};
|
||||
scheduler.Flush(info);
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
@ -260,11 +253,8 @@ void RendererVulkan::Present(Frame* frame) {
|
|||
|
||||
const vk::Image swapchain_image = swapchain.Image();
|
||||
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
||||
};
|
||||
const vk::CommandBuffer cmdbuf = frame->cmdbuf;
|
||||
cmdbuf.begin(begin_info);
|
||||
auto& scheduler = present_scheduler;
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
{
|
||||
auto* profiler_ctx = instance.GetProfilerContext();
|
||||
TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame",
|
||||
|
@ -339,35 +329,17 @@ void RendererVulkan::Present(Frame* frame) {
|
|||
TracyVkCollect(profiler_ctx, cmdbuf);
|
||||
}
|
||||
}
|
||||
cmdbuf.end();
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
vk::PipelineStageFlagBits::eAllGraphics,
|
||||
};
|
||||
|
||||
const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore();
|
||||
const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
|
||||
const std::array wait_semaphores = {image_acquired, frame->render_ready};
|
||||
|
||||
vk::SubmitInfo submit_info = {
|
||||
.waitSemaphoreCount = static_cast<u32>(wait_semaphores.size()),
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1u,
|
||||
.pCommandBuffers = &cmdbuf,
|
||||
.signalSemaphoreCount = 1,
|
||||
.pSignalSemaphores = &present_ready,
|
||||
};
|
||||
|
||||
std::scoped_lock submit_lock{scheduler.submit_mutex};
|
||||
try {
|
||||
instance.GetGraphicsQueue().submit(submit_info, frame->present_done);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what());
|
||||
UNREACHABLE();
|
||||
}
|
||||
// Flush vulkan commands.
|
||||
SubmitInfo info{};
|
||||
info.AddWait(swapchain.GetImageAcquiredSemaphore());
|
||||
info.AddWait(frame->ready_semaphore, frame->ready_tick);
|
||||
info.AddSignal(swapchain.GetPresentReadySemaphore());
|
||||
info.AddSignal(frame->present_done);
|
||||
scheduler.Flush(info);
|
||||
|
||||
// Present to swapchain.
|
||||
std::scoped_lock submit_lock{Scheduler::submit_mutex};
|
||||
swapchain.Present();
|
||||
|
||||
// Free the frame for reuse
|
||||
|
|
|
@ -26,9 +26,15 @@ struct Frame {
|
|||
VmaAllocation allocation;
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
vk::Semaphore render_ready;
|
||||
vk::Fence present_done;
|
||||
vk::CommandBuffer cmdbuf;
|
||||
vk::Semaphore ready_semaphore;
|
||||
u64 ready_tick;
|
||||
};
|
||||
|
||||
enum SchedulerType {
|
||||
Draw,
|
||||
Present,
|
||||
CpuFlip,
|
||||
};
|
||||
|
||||
class Rasterizer;
|
||||
|
@ -39,16 +45,16 @@ public:
|
|||
~RendererVulkan();
|
||||
|
||||
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
||||
VAddr cpu_address) {
|
||||
VAddr cpu_address, bool is_eop) {
|
||||
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
||||
const auto image_id = texture_cache.FindImage(info, cpu_address);
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
return PrepareFrameInternal(image);
|
||||
return PrepareFrameInternal(image, is_eop);
|
||||
}
|
||||
|
||||
Frame* PrepareBlankFrame() {
|
||||
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
|
||||
return PrepareFrameInternal(image);
|
||||
return PrepareFrameInternal(image, true);
|
||||
}
|
||||
|
||||
VideoCore::Image& RegisterVideoOutSurface(
|
||||
|
@ -60,9 +66,9 @@ public:
|
|||
}
|
||||
|
||||
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
|
||||
return std::find_if(vo_buffers_addr.cbegin(), vo_buffers_addr.cend(), [&](VAddr vo_buffer) {
|
||||
return std::ranges::find_if(vo_buffers_addr, [&](VAddr vo_buffer) {
|
||||
return vo_buffer == color_buffer.Address();
|
||||
}) != vo_buffers_addr.cend();
|
||||
}) != vo_buffers_addr.end();
|
||||
}
|
||||
|
||||
bool ShowSplash(Frame* frame = nullptr);
|
||||
|
@ -70,13 +76,16 @@ public:
|
|||
void RecreateFrame(Frame* frame, u32 width, u32 height);
|
||||
|
||||
private:
|
||||
Frame* PrepareFrameInternal(VideoCore::Image& image);
|
||||
Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true);
|
||||
Frame* GetRenderFrame();
|
||||
|
||||
private:
|
||||
Frontend::WindowSDL& window;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Instance instance;
|
||||
Scheduler scheduler;
|
||||
Scheduler draw_scheduler;
|
||||
Scheduler present_scheduler;
|
||||
Scheduler flip_scheduler;
|
||||
Swapchain swapchain;
|
||||
std::unique_ptr<Rasterizer> rasterizer;
|
||||
VideoCore::TextureCache texture_cache;
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
|
||||
|
@ -60,46 +58,4 @@ void MasterSemaphore::Wait(u64 tick) {
|
|||
Refresh();
|
||||
}
|
||||
|
||||
void MasterSemaphore::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
|
||||
u64 signal_value) {
|
||||
cmdbuf.end();
|
||||
|
||||
const u32 num_signal_semaphores = signal ? 2U : 1U;
|
||||
const std::array signal_values{signal_value, u64(0)};
|
||||
const std::array signal_semaphores{Handle(), signal};
|
||||
|
||||
const u32 num_wait_semaphores = wait ? 2U : 1U;
|
||||
const std::array wait_values{signal_value - 1, u64(1)};
|
||||
const std::array wait_semaphores{Handle(), wait};
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
};
|
||||
|
||||
const vk::TimelineSemaphoreSubmitInfo timeline_si = {
|
||||
.waitSemaphoreValueCount = num_wait_semaphores,
|
||||
.pWaitSemaphoreValues = wait_values.data(),
|
||||
.signalSemaphoreValueCount = num_signal_semaphores,
|
||||
.pSignalSemaphoreValues = signal_values.data(),
|
||||
};
|
||||
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1u,
|
||||
.pCommandBuffers = &cmdbuf,
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
|
||||
try {
|
||||
instance.GetGraphicsQueue().submit(submit_info);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
UNREACHABLE_MSG("Device lost during submit: {}", err.what());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -46,10 +46,6 @@ public:
|
|||
/// Waits for a tick to be hit on the GPU
|
||||
void Wait(u64 tick);
|
||||
|
||||
/// Submits the provided command buffer for execution
|
||||
void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
|
||||
u64 signal_value);
|
||||
|
||||
protected:
|
||||
const Instance& instance;
|
||||
vk::UniqueSemaphore semaphore; ///< Timeline semaphore.
|
||||
|
|
|
@ -96,6 +96,13 @@ void Rasterizer::DispatchDirect() {
|
|||
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
||||
}
|
||||
|
||||
u64 Rasterizer::Flush() {
|
||||
const u64 current_tick = scheduler.CurrentTick();
|
||||
SubmitInfo info{};
|
||||
scheduler.Flush(info);
|
||||
return current_tick;
|
||||
}
|
||||
|
||||
void Rasterizer::BeginRendering() {
|
||||
const auto& regs = liverpool->regs;
|
||||
RenderState state;
|
||||
|
|
|
@ -36,6 +36,8 @@ public:
|
|||
void ScopeMarkerBegin(const std::string& str);
|
||||
void ScopeMarkerEnd();
|
||||
|
||||
u64 Flush();
|
||||
|
||||
private:
|
||||
u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset);
|
||||
void MapMemory(VAddr addr, size_t size);
|
||||
|
|
|
@ -2,12 +2,15 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <mutex>
|
||||
#include "common/assert.h"
|
||||
#include "common/debug.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
std::mutex Scheduler::submit_mutex;
|
||||
|
||||
Scheduler::Scheduler(const Instance& instance)
|
||||
: instance{instance}, master_semaphore{instance}, command_pool{instance, &master_semaphore} {
|
||||
profiler_scope = reinterpret_cast<tracy::VkCtxScope*>(std::malloc(sizeof(tracy::VkCtxScope)));
|
||||
|
@ -50,22 +53,24 @@ void Scheduler::EndRendering() {
|
|||
current_cmdbuf.endRendering();
|
||||
}
|
||||
|
||||
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
|
||||
// When flushing, we only send data to the worker thread; no waiting is necessary.
|
||||
SubmitExecution(signal, wait);
|
||||
void Scheduler::Flush(SubmitInfo& info) {
|
||||
// When flushing, we only send data to the driver; no waiting is necessary.
|
||||
SubmitExecution(info);
|
||||
}
|
||||
|
||||
void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {
|
||||
void Scheduler::Finish() {
|
||||
// When finishing, we need to wait for the submission to have executed on the device.
|
||||
const u64 presubmit_tick = CurrentTick();
|
||||
SubmitExecution(signal, wait);
|
||||
SubmitInfo info{};
|
||||
SubmitExecution(info);
|
||||
Wait(presubmit_tick);
|
||||
}
|
||||
|
||||
void Scheduler::Wait(u64 tick) {
|
||||
if (tick >= master_semaphore.CurrentTick()) {
|
||||
// Make sure we are not waiting for the current tick without signalling
|
||||
Flush();
|
||||
SubmitInfo info{};
|
||||
Flush(info);
|
||||
}
|
||||
master_semaphore.Wait(tick);
|
||||
}
|
||||
|
@ -86,7 +91,7 @@ void Scheduler::AllocateWorkerCommandBuffers() {
|
|||
}
|
||||
}
|
||||
|
||||
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
||||
void Scheduler::SubmitExecution(SubmitInfo& info) {
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
const u64 signal_value = master_semaphore.NextTick();
|
||||
|
||||
|
@ -97,7 +102,40 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
|
|||
}
|
||||
|
||||
EndRendering();
|
||||
master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);
|
||||
current_cmdbuf.end();
|
||||
|
||||
const vk::Semaphore timeline = master_semaphore.Handle();
|
||||
info.AddSignal(timeline, signal_value);
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
};
|
||||
|
||||
const vk::TimelineSemaphoreSubmitInfo timeline_si = {
|
||||
.waitSemaphoreValueCount = static_cast<u32>(info.wait_ticks.size()),
|
||||
.pWaitSemaphoreValues = info.wait_ticks.data(),
|
||||
.signalSemaphoreValueCount = static_cast<u32>(info.signal_ticks.size()),
|
||||
.pSignalSemaphoreValues = info.signal_ticks.data(),
|
||||
};
|
||||
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = static_cast<u32>(info.wait_semas.size()),
|
||||
.pWaitSemaphores = info.wait_semas.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1U,
|
||||
.pCommandBuffers = ¤t_cmdbuf,
|
||||
.signalSemaphoreCount = static_cast<u32>(info.signal_semas.size()),
|
||||
.pSignalSemaphores = info.signal_semas.data(),
|
||||
};
|
||||
|
||||
try {
|
||||
instance.GetGraphicsQueue().submit(submit_info, info.fence);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
UNREACHABLE_MSG("Device lost during submit: {}", err.what());
|
||||
}
|
||||
|
||||
master_semaphore.Refresh();
|
||||
AllocateWorkerCommandBuffers();
|
||||
|
||||
|
|
|
@ -26,16 +26,39 @@ struct RenderState {
|
|||
}
|
||||
};
|
||||
|
||||
struct SubmitInfo {
|
||||
boost::container::static_vector<vk::Semaphore, 3> wait_semas;
|
||||
boost::container::static_vector<u64, 3> wait_ticks;
|
||||
boost::container::static_vector<vk::Semaphore, 3> signal_semas;
|
||||
boost::container::static_vector<u64, 3> signal_ticks;
|
||||
vk::Fence fence;
|
||||
|
||||
void AddWait(vk::Semaphore semaphore, u64 tick = 1) {
|
||||
wait_semas.emplace_back(semaphore);
|
||||
wait_ticks.emplace_back(tick);
|
||||
}
|
||||
|
||||
void AddSignal(vk::Semaphore semaphore, u64 tick = 1) {
|
||||
signal_semas.emplace_back(semaphore);
|
||||
signal_ticks.emplace_back(tick);
|
||||
}
|
||||
|
||||
void AddSignal(vk::Fence fence) {
|
||||
this->fence = fence;
|
||||
}
|
||||
};
|
||||
|
||||
class Scheduler {
|
||||
public:
|
||||
explicit Scheduler(const Instance& instance);
|
||||
~Scheduler();
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
|
||||
/// Sends the current execution context to the GPU
|
||||
/// and increments the scheduler timeline semaphore.
|
||||
void Flush(SubmitInfo& info);
|
||||
|
||||
/// Sends the current execution context to the GPU and waits for it to complete.
|
||||
void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
|
||||
void Finish();
|
||||
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick);
|
||||
|
@ -76,12 +99,12 @@ public:
|
|||
pending_ops.emplace(func, CurrentTick());
|
||||
}
|
||||
|
||||
std::mutex submit_mutex;
|
||||
static std::mutex submit_mutex;
|
||||
|
||||
private:
|
||||
void AllocateWorkerCommandBuffers();
|
||||
|
||||
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
|
||||
void SubmitExecution(SubmitInfo& info);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
|
|
@ -55,7 +55,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
|
|||
.pQueueFamilyIndices = queue_family_indices.data(),
|
||||
.preTransform = transform,
|
||||
.compositeAlpha = composite_alpha,
|
||||
.presentMode = vk::PresentModeKHR::eFifo,
|
||||
.presentMode = vk::PresentModeKHR::eMailbox,
|
||||
.clipped = true,
|
||||
.oldSwapchain = nullptr,
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue