From fbac1c304a1737537395a3f9638b5b4d1fe4e8d0 Mon Sep 17 00:00:00 2001 From: scribam Date: Thu, 28 Dec 2017 22:38:29 +0100 Subject: [PATCH 01/22] Improve cellSearch implementation --- rpcs3/Emu/Cell/Modules/cellSearch.cpp | 364 ++++++++++++++++++++++---- rpcs3/Emu/Cell/Modules/cellSearch.h | 95 +++++-- 2 files changed, 398 insertions(+), 61 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellSearch.cpp b/rpcs3/Emu/Cell/Modules/cellSearch.cpp index 5316eee164..6ae255fb64 100644 --- a/rpcs3/Emu/Cell/Modules/cellSearch.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSearch.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "Emu/System.h" +#include "Emu/IdManager.h" #include "Emu/Cell/PPUModule.h" #include "cellSearch.h" @@ -7,13 +8,62 @@ logs::channel cellSearch("cellSearch"); -s32 cellSearchInitialize(ppu_thread& ppu, CellSearchMode mode, u32 container, vm::ptr func, vm::ptr userData) +template<> +void fmt_class_string::format(std::string& out, u64 arg) { - cellSearch.warning("cellSearchInitialize()"); + format_enum(out, arg, [](auto error) + { + switch (error) + { + STR_CASE(CELL_SEARCH_CANCELED); + STR_CASE(CELL_SEARCH_ERROR_PARAM); + STR_CASE(CELL_SEARCH_ERROR_BUSY); + STR_CASE(CELL_SEARCH_ERROR_NO_MEMORY); + STR_CASE(CELL_SEARCH_ERROR_UNKNOWN_MODE); + STR_CASE(CELL_SEARCH_ERROR_ALREADY_INITIALIZED); + STR_CASE(CELL_SEARCH_ERROR_NOT_INITIALIZED); + STR_CASE(CELL_SEARCH_ERROR_FINALIZING); + STR_CASE(CELL_SEARCH_ERROR_NOT_SUPPORTED_SEARCH); + STR_CASE(CELL_SEARCH_ERROR_CONTENT_OBSOLETE); + STR_CASE(CELL_SEARCH_ERROR_CONTENT_NOT_FOUND); + STR_CASE(CELL_SEARCH_ERROR_NOT_LIST); + STR_CASE(CELL_SEARCH_ERROR_OUT_OF_RANGE); + STR_CASE(CELL_SEARCH_ERROR_INVALID_SEARCHID); + STR_CASE(CELL_SEARCH_ERROR_ALREADY_GOT_RESULT); + STR_CASE(CELL_SEARCH_ERROR_NOT_SUPPORTED_CONTEXT); + STR_CASE(CELL_SEARCH_ERROR_INVALID_CONTENTTYPE); + STR_CASE(CELL_SEARCH_ERROR_DRM); + STR_CASE(CELL_SEARCH_ERROR_TAG); + STR_CASE(CELL_SEARCH_ERROR_GENERIC); + } + + return unknown; + }); +} + +struct search_t +{ + vm::ptr func; + vm::ptr userData; +}; + +struct search_object_t +{ + // TODO: Figured out the correct values to set here + static const u32 id_base = 1; + static const u32 id_step = 1; + static const u32 id_count = 64; + static const u32 invalid = 0xFFFFFFFF; +}; + +error_code cellSearchInitialize(CellSearchMode mode, u32 container, vm::ptr func, vm::ptr userData) +{ + cellSearch.warning("cellSearchInitialize(mode=0x%x, container=0x%x, func=*0x%x, userData=*0x%x)", (u32) mode, container, func, userData); + + const auto search = fxm::make_always(); + search->func = func; + search->userData = userData; - // TODO: Store the arguments somewhere so we can use them later. - - //inform callback that search is alive sysutil_register_cb([=](ppu_thread& ppu) -> s32 { func(ppu, CELL_SEARCH_EVENT_INITIALIZE_RESULT, CELL_OK, vm::null, userData); @@ -23,124 +73,350 @@ s32 cellSearchInitialize(ppu_thread& ppu, CellSearchMode mode, u32 container, vm return CELL_OK; } -s32 cellSearchFinalize() +error_code cellSearchFinalize() { cellSearch.warning("cellSearchFinalize()"); + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto search = fxm::get_always(); + + search->func(ppu, CELL_SEARCH_EVENT_FINALIZE_RESULT, CELL_OK, vm::null, search->userData); + return CELL_OK; + }); + return CELL_OK; } -s32 cellSearchStartListSearch() +error_code cellSearchStartListSearch(CellSearchListSearchType type, CellSearchSortOrder sortOrder, vm::ptr outSearchId) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchStartListSearch(type=0x%x, sortOrder=0x%x, outSearchId=*0x%x)", (u32) type, (u32) sortOrder, outSearchId); + + if (!outSearchId) + { + return CELL_SEARCH_ERROR_PARAM; + } + + *outSearchId = idm::make(); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto search = fxm::get_always(); + + vm::var resultParam; + resultParam->searchId = *outSearchId; + resultParam->resultNum = 0; // TODO + + search->func(ppu, CELL_SEARCH_EVENT_LISTSEARCH_RESULT, CELL_OK, vm::cast(resultParam.addr()), search->userData); + return CELL_OK; + }); + return CELL_OK; } -s32 cellSearchStartContentSearchInList() +error_code cellSearchStartContentSearchInList(vm::cptr listId, CellSearchSortKey sortKey, CellSearchSortOrder sortOrder, vm::ptr outSearchId) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchStartContentSearchInList(listId=*0x%x, sortKey=0x%x, sortOrder=0x%x, outSearchId=*0x%x)", listId, (u32) sortKey, (u32) sortOrder, outSearchId); + + if (!listId || !outSearchId) + { + return CELL_SEARCH_ERROR_PARAM; + } + + *outSearchId = idm::make(); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto search = fxm::get_always(); + + vm::var resultParam; + resultParam->searchId = *outSearchId; + resultParam->resultNum = 0; // TODO + + search->func(ppu, CELL_SEARCH_EVENT_CONTENTSEARCH_INLIST_RESULT, CELL_OK, vm::cast(resultParam.addr()), search->userData); + return CELL_OK; + }); + return CELL_OK; } -s32 cellSearchStartContentSearch() +error_code cellSearchStartContentSearch(CellSearchContentSearchType type, CellSearchSortKey sortKey, CellSearchSortOrder sortOrder, vm::ptr outSearchId) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchStartContentSearch(type=0x%x, sortKey=0x%x, sortOrder=0x%x, outSearchId=*0x%x)", (u32) type, (u32) sortKey, (u32) sortOrder, outSearchId); + + if (!outSearchId) + { + return CELL_SEARCH_ERROR_PARAM; + } + + *outSearchId = idm::make(); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto search = fxm::get_always(); + + vm::var resultParam; + resultParam->searchId = *outSearchId; + resultParam->resultNum = 0; // TODO + + search->func(ppu, CELL_SEARCH_EVENT_CONTENTSEARCH_RESULT, CELL_OK, vm::cast(resultParam.addr()), search->userData); + return CELL_OK; + }); + return CELL_OK; } -s32 cellSearchStartSceneSearchInVideo() +error_code cellSearchStartSceneSearchInVideo(vm::cptr videoId, CellSearchSceneSearchType searchType, CellSearchSortOrder sortOrder, vm::ptr outSearchId) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchStartSceneSearchInVideo(videoId=*0x%x, searchType=0x%x, sortOrder=0x%x, outSearchId=*0x%x)", videoId, (u32) searchType, (u32) sortOrder, outSearchId); + + if (!videoId || !outSearchId) + { + return CELL_SEARCH_ERROR_PARAM; + } + + *outSearchId = idm::make(); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto search = fxm::get_always(); + + vm::var resultParam; + resultParam->searchId = *outSearchId; + resultParam->resultNum = 0; // TODO + + search->func(ppu, CELL_SEARCH_EVENT_SCENESEARCH_INVIDEO_RESULT, CELL_OK, vm::cast(resultParam.addr()), search->userData); + return CELL_OK; + }); + return CELL_OK; } -s32 cellSearchStartSceneSearch() +error_code cellSearchStartSceneSearch(CellSearchSceneSearchType searchType, vm::cptr gameTitle, vm::cpptr tags, u32 tagNum, CellSearchSortKey sortKey, CellSearchSortOrder sortOrder, vm::ptr outSearchId) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchStartSceneSearch(searchType=0x%x, gameTitle=%s, tags=**0x%x, tagNum=0x%x, sortKey=0x%x, sortOrder=0x%x, outSearchId=*0x%x)", (u32) searchType, gameTitle, tags, tagNum, (u32) sortKey, (u32) sortOrder, outSearchId); + + if (!gameTitle || !outSearchId) + { + return CELL_SEARCH_ERROR_PARAM; + } + + *outSearchId = idm::make(); + + sysutil_register_cb([=](ppu_thread& ppu) -> s32 + { + const auto search = fxm::get_always(); + + vm::var resultParam; + resultParam->searchId = *outSearchId; + resultParam->resultNum = 0; // TODO + + search->func(ppu, CELL_SEARCH_EVENT_SCENESEARCH_RESULT, CELL_OK, vm::cast(resultParam.addr()), search->userData); + return CELL_OK; + }); + return CELL_OK; } -s32 cellSearchGetContentInfoByOffset() +error_code cellSearchGetContentInfoByOffset(CellSearchId searchId, s32 offset, vm::ptr infoBuffer, vm::ptr outContentType, vm::ptr outContentId) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetContentInfoByOffset(searchId=0x%x, offset=0x%x, infoBuffer=*0x%x, outContentType=*0x%x, outContentId=*0x%x)", searchId, offset, infoBuffer, outContentType, outContentId); + + if (!outContentType) + { + return CELL_SEARCH_ERROR_PARAM; + } + + const auto searchObject = idm::get(searchId); + + if (!searchObject) + { + return CELL_SEARCH_ERROR_INVALID_SEARCHID; + } + return CELL_OK; } -s32 cellSearchGetContentInfoByContentId() +error_code cellSearchGetContentInfoByContentId(vm::cptr contentId, vm::ptr infoBuffer, vm::ptr outContentType) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetContentInfoByContentId(contentId=*0x%x, infoBuffer=*0x%x, outContentType=*0x%x)", contentId, infoBuffer, outContentType); + + if (!outContentType) + { + return CELL_SEARCH_ERROR_PARAM; + } + return CELL_OK; } -s32 cellSearchGetOffsetByContentId() +error_code cellSearchGetOffsetByContentId(CellSearchId searchId, vm::cptr contentId, vm::ptr outOffset) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetOffsetByContentId(searchId=0x%x, contentId=*0x%x, outOffset=*0x%x)", searchId, contentId, outOffset); + + if (!outOffset) + { + return CELL_SEARCH_ERROR_PARAM; + } + + const auto searchObject = idm::get(searchId); + + if (!searchObject) + { + return CELL_SEARCH_ERROR_INVALID_SEARCHID; + } + return CELL_OK; } -s32 cellSearchGetContentIdByOffset() +error_code cellSearchGetContentIdByOffset(CellSearchId searchId, s32 offset, vm::ptr outContentType, vm::ptr outContentId, vm::ptr outTimeInfo) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetContentIdByOffset(searchId=0x%x, offset=0x%x, outContentType=*0x%x, outContentId=*0x%x, outTimeInfo=*0x%x)", searchId, offset, outContentType, outContentId, outTimeInfo); + + if (!outContentType || !outContentId) + { + return CELL_SEARCH_ERROR_PARAM; + } + + const auto searchObject = idm::get(searchId); + + if (!searchObject) + { + return CELL_SEARCH_ERROR_INVALID_SEARCHID; + } + return CELL_OK; } -s32 cellSearchGetContentInfoGameComment() +error_code cellSearchGetContentInfoGameComment(vm::cptr contentId, vm::ptr gameComment) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetContentInfoGameComment(contentId=*0x%x, gameComment=*0x%x)", contentId, gameComment); + + if (!gameComment) + { + return CELL_SEARCH_ERROR_PARAM; + } + return CELL_OK; } -s32 cellSearchGetMusicSelectionContext() +error_code cellSearchGetMusicSelectionContext(CellSearchId searchId, vm::cptr contentId, CellSearchRepeatMode repeatMode, CellSearchContextOption option, vm::ptr outContext) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetMusicSelectionContext(searchId=0x%x, contentId=*0x%x, repeatMode=0x%x, option=0x%x, outContext=*0x%x)", searchId, contentId, (u32) repeatMode, (u32) option, outContext); + + if (!outContext) + { + return CELL_SEARCH_ERROR_PARAM; + } + + const auto searchObject = idm::get(searchId); + + if (!searchObject) + { + return CELL_SEARCH_ERROR_INVALID_SEARCHID; + } + return CELL_OK; } -s32 cellSearchGetMusicSelectionContextOfSingleTrack() +error_code cellSearchGetMusicSelectionContextOfSingleTrack(vm::cptr contentId, vm::ptr outContext) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetMusicSelectionContextOfSingleTrack(contentId=*0x%x, outContext=*0x%x)", contentId, outContext); + + if (!contentId || !outContext) + { + return CELL_SEARCH_ERROR_PARAM; + } + return CELL_OK; } -s32 cellSearchGetContentInfoPath() +error_code cellSearchGetContentInfoPath(vm::cptr contentId, vm::ptr infoPath) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetContentInfoPath(contentId=*0x%x, infoPath=*0x%x)", contentId, infoPath); + + if (!infoPath) + { + return CELL_SEARCH_ERROR_PARAM; + } + return CELL_OK; } -s32 cellSearchGetContentInfoPathMovieThumb() +error_code cellSearchGetContentInfoPathMovieThumb(vm::cptr contentId, vm::ptr infoMt) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetContentInfoPathMovieThumb(contentId=*0x%x, infoMt=*0x%x)", contentId, infoMt); + + if (!infoMt) + { + return CELL_SEARCH_ERROR_PARAM; + } + return CELL_OK; } -s32 cellSearchPrepareFile() +error_code cellSearchPrepareFile(vm::cptr path) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchPrepareFile(path=%s)", path); + + if (!path) + { + return CELL_SEARCH_ERROR_PARAM; + } + return CELL_OK; } -s32 cellSearchGetContentInfoDeveloperData() +error_code cellSearchGetContentInfoDeveloperData(vm::cptr contentId, vm::ptr developerData) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetContentInfoDeveloperData(contentId=*0x%x, developerData=*0x%x)", contentId, developerData); + + if (!contentId || !developerData) + { + return CELL_SEARCH_ERROR_PARAM; + } + return CELL_OK; } -s32 cellSearchGetContentInfoSharable() +error_code cellSearchGetContentInfoSharable(vm::cptr contentId, vm::ptr sharable) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchGetContentInfoSharable(contentId=*0x%x, sharable=*0x%x)", contentId, sharable); + + if (!contentId || !sharable) + { + return CELL_SEARCH_ERROR_PARAM; + } + return CELL_OK; } -s32 cellSearchCancel() +error_code cellSearchCancel(CellSearchId searchId) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.todo("cellSearchCancel(searchId=0x%x)", searchId); + + const auto searchObject = idm::get(searchId); + + if (!searchObject) + { + return CELL_SEARCH_ERROR_INVALID_SEARCHID; + } + return CELL_OK; } -s32 cellSearchEnd() +error_code cellSearchEnd(CellSearchId searchId) { - UNIMPLEMENTED_FUNC(cellSearch); + cellSearch.warning("cellSearchEnd(searchId=0x%x)", searchId); + + const auto searchObject = idm::get(searchId); + + if (!searchObject) + { + return CELL_SEARCH_ERROR_INVALID_SEARCHID; + } + + idm::remove(searchId); + return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellSearch.h b/rpcs3/Emu/Cell/Modules/cellSearch.h index 2b63c6527a..bef91358ca 100644 --- a/rpcs3/Emu/Cell/Modules/cellSearch.h +++ b/rpcs3/Emu/Cell/Modules/cellSearch.h @@ -1,9 +1,7 @@ #pragma once -namespace vm { using namespace ps3; } - // Error Codes -enum +enum CellSearchError : u32 { CELL_SEARCH_CANCELED = 1, CELL_SEARCH_ERROR_PARAM = 0x8002C801, @@ -30,17 +28,20 @@ enum // Constants enum { - CELL_SEARCH_CONTENT_ID_SIZE = 16, - CELL_SEARCH_TITLE_LEN_MAX = 384, - CELL_SEARCH_TAG_NUM_MAX = 6, - CELL_SEARCH_TAG_LEN_MAX = 63, - CELL_MUSIC_SELECTION_CONTEXT_SIZE = 2048, - CELL_SEARCH_PATH_LEN_MAX = 63, - CELL_SEARCH_MTOPTION_LEN_MAX = 63, + CELL_SEARCH_CONTENT_ID_SIZE = 16, + CELL_SEARCH_TITLE_LEN_MAX = 384, + CELL_SEARCH_TAG_NUM_MAX = 6, + CELL_SEARCH_TAG_LEN_MAX = 63, + CELL_MUSIC_SELECTION_CONTEXT_SIZE = 2048, + CELL_SEARCH_PATH_LEN_MAX = 63, + CELL_SEARCH_MTOPTION_LEN_MAX = 63, + CELL_SEARCH_DEVELOPERDATA_LEN_MAX = 64, + CELL_SEARCH_GAMECOMMENT_SIZE_MAX = 1024, + CELL_SEARCH_CONTENT_BUFFER_SIZE_MAX = 2048, }; // Sort keys -enum : s32 +enum CellSearchSortKey : s32 { CELL_SEARCH_SORTKEY_NONE = 0, CELL_SEARCH_SORTKEY_DEFAULT = 1, @@ -56,15 +57,15 @@ enum : s32 }; // Sort order -enum : s32 +enum CellSearchSortOrder : s32 { CELL_SEARCH_SORTORDER_NONE = 0, CELL_SEARCH_SORTORDER_ASCENDING = 1, - CELL_SEARCH_SOFTORDER_DESCENDING = 2, + CELL_SEARCH_SORTORDER_DESCENDING = 2, }; // Content types -enum : s32 +enum CellSearchContentType : s32 { CELL_SEARCH_CONTENTTYPE_NONE = 0, CELL_SEARCH_CONTENTTYPE_MUSIC = 1, @@ -122,15 +123,22 @@ enum CellSearchSceneType : s32 // List types enum CellSearchListType : s32 { + CELL_SEARCH_LISTTYPE_NONE = 0, CELL_SEARCH_LISTTYPE_MUSIC_ALBUM = 1, CELL_SEARCH_LISTTYPE_MUSIC_GENRE = 2, CELL_SEARCH_LISTTYPE_MUSIC_ARTIST = 3, + CELL_SEARCH_LISTTYPE_PHOTO_YEAR = 4, + CELL_SEARCH_LISTTYPE_PHOTO_MONTH = 5, + CELL_SEARCH_LISTTYPE_PHOTO_ALBUM = 6, + CELL_SEARCH_LISTTYPE_PHOTO_PLAYLIST = 7, + CELL_SEARCH_LISTTYPE_VIDEO_ALBUM = 8, CELL_SEARCH_LISTTYPE_MUSIC_PLAYLIST = 9, }; // Content status enum CellSearchContentStatus : s32 { + CELL_SEARCH_CONTENTSTATUS_NONE, CELL_SEARCH_CONTENTSTATUS_AVAILABLE, CELL_SEARCH_CONTENTSTATUS_NOT_SUPPORTED, CELL_SEARCH_CONTENTSTATUS_BROKEN, @@ -139,7 +147,7 @@ enum CellSearchContentStatus : s32 // Search orientation enum CellSearchOrientation : s32 { - CELL_SEARCH_ORIENTATION_UNKNOWN, + CELL_SEARCH_ORIENTATION_UNKNOWN = 0, CELL_SEARCH_ORIENTATION_TOP_LEFT, CELL_SEARCH_ORIENTATION_TOP_RIGHT, CELL_SEARCH_ORIENTATION_BOTTOM_RIGHT, @@ -165,7 +173,60 @@ enum CellSearchEvent : s32 CELL_SEARCH_EVENT_SCENESEARCH_RESULT, }; -using CellSearchSystemCallback = void(CellSearchEvent event, s32 result, vm::cptr param, vm::ptr userData); +enum CellSearchListSearchType : s32 +{ + CELL_SEARCH_LISTSEARCHTYPE_NONE = 0, + CELL_SEARCH_LISTSEARCHTYPE_MUSIC_ALBUM, + CELL_SEARCH_LISTSEARCHTYPE_MUSIC_GENRE, + CELL_SEARCH_LISTSEARCHTYPE_MUSIC_ARTIST, + CELL_SEARCH_LISTSEARCHTYPE_PHOTO_YEAR, + CELL_SEARCH_LISTSEARCHTYPE_PHOTO_MONTH, + CELL_SEARCH_LISTSEARCHTYPE_PHOTO_ALBUM, + CELL_SEARCH_LISTSEARCHTYPE_PHOTO_PLAYLIST, + CELL_SEARCH_LISTSEARCHTYPE_VIDEO_ALBUM, + CELL_SEARCH_LISTSEARCHTYPE_MUSIC_PLAYLIST, +}; + +enum CellSearchContentSearchType : s32 +{ + CELL_SEARCH_CONTENTSEARCHTYPE_NONE = 0, + CELL_SEARCH_CONTENTSEARCHTYPE_MUSIC_ALL, + CELL_SEARCH_CONTENTSEARCHTYPE_PHOTO_ALL, + CELL_SEARCH_CONTENTSEARCHTYPE_VIDEO_ALL, +}; + +enum CellSearchSceneSearchType : s32 +{ + CELL_SEARCH_SCENESEARCHTYPE_NONE = 0, + CELL_SEARCH_SCENESEARCHTYPE_CHAPTER, + CELL_SEARCH_SCENESEARCHTYPE_CLIP_HIGHLIGHT, + CELL_SEARCH_SCENESEARCHTYPE_CLIP_USER, + CELL_SEARCH_SCENESEARCHTYPE_CLIP, + CELL_SEARCH_SCENESEARCHTYPE_ALL, +}; + +enum CellSearchRepeatMode : s32 +{ + CELL_SEARCH_REPEATMODE_NONE = 0, + CELL_SEARCH_REPEATMODE_REPEAT1, + CELL_SEARCH_REPEATMODE_ALL, + CELL_SEARCH_REPEATMODE_NOREPEAT1, +}; + +enum CellSearchContextOption : s32 +{ + CELL_SEARCH_CONTEXTOPTION_NONE = 0, + CELL_SEARCH_CONTEXTOPTION_SHUFFLE, +}; + +enum CellSearchSharableType : s32 +{ + CELL_SEARCH_SHARABLETYPE_PROHIBITED = 0, + CELL_SEARCH_SHARABLETYPE_PERMITTED, +}; + +using CellSearchId = s32; +using CellSearchSystemCallback = void(CellSearchEvent event, s32 result, vm::ps3::cptr param, vm::ps3::ptr userData); struct CellSearchContentId { @@ -174,7 +235,7 @@ struct CellSearchContentId struct CellSearchResultParam { - be_t searchId; + be_t searchId; be_t resultNum; }; From 42f56e357c0733cff4f45bd21baff157b82da60f Mon Sep 17 00:00:00 2001 From: Megamouse Date: Thu, 18 Jan 2018 15:59:39 +0100 Subject: [PATCH 02/22] Qt: remove obsolete and faulty m_icon_color. RepaintGui does it already also remove some unused headers --- rpcs3/rpcs3qt/game_list_frame.cpp | 8 +------- rpcs3/rpcs3qt/game_list_frame.h | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/rpcs3/rpcs3qt/game_list_frame.cpp b/rpcs3/rpcs3qt/game_list_frame.cpp index b29b670e41..ca7fcd0c5f 100644 --- a/rpcs3/rpcs3qt/game_list_frame.cpp +++ b/rpcs3/rpcs3qt/game_list_frame.cpp @@ -15,14 +15,9 @@ #include #include -#include #include -#include #include #include -#include -#include -#include #include inline std::string sstr(const QString& _in) { return _in.toStdString(); } @@ -338,8 +333,6 @@ void game_list_frame::LoadSettings() m_sortColumn = xgui_settings->GetValue(gui::gl_sortCol).toInt(); - m_Icon_Color = xgui_settings->GetValue(gui::gl_iconColor).value(); - m_categoryFilters = xgui_settings->GetGameListCategoryFilters(); Refresh(true); @@ -943,6 +936,7 @@ void game_list_frame::SetToolBarVisible(const bool& showToolBar) m_Tool_Bar->setVisible(showToolBar); xgui_settings->SetValue(gui::gl_toolBarVisible, showToolBar); } + bool game_list_frame::GetToolBarVisible() { return m_showToolBar; diff --git a/rpcs3/rpcs3qt/game_list_frame.h b/rpcs3/rpcs3qt/game_list_frame.h index dcf0a02ac5..f3b28d9586 100644 --- a/rpcs3/rpcs3qt/game_list_frame.h +++ b/rpcs3/rpcs3qt/game_list_frame.h @@ -13,7 +13,6 @@ #include #include #include -#include #include From d238791b7d909cf13efff905ed8e7872fe9dbeac Mon Sep 17 00:00:00 2001 From: Megamouse Date: Thu, 18 Jan 2018 18:07:49 +0100 Subject: [PATCH 03/22] RSX: properly handle disconnected pads --- rpcs3/Emu/RSX/overlays.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/overlays.h b/rpcs3/Emu/RSX/overlays.h index 234ec613b4..8539186d0e 100644 --- a/rpcs3/Emu/RSX/overlays.h +++ b/rpcs3/Emu/RSX/overlays.h @@ -74,7 +74,7 @@ namespace rsx } const PadInfo& rinfo = handler->GetInfo(); - if (rinfo.max_connect == 0 || !rinfo.now_connect) + if (rinfo.max_connect == 0) return selection_code::error; std::array button_state; @@ -85,7 +85,7 @@ namespace rsx if (Emu.IsStopped()) return selection_code::canceled; - if (Emu.IsPaused()) + if (Emu.IsPaused() || !rinfo.now_connect) { std::this_thread::sleep_for(10ms); continue; From 54fbde0de162e281896f35d9b629075770488565 Mon Sep 17 00:00:00 2001 From: scribam Date: Thu, 18 Jan 2018 21:56:32 +0100 Subject: [PATCH 04/22] [Travis] Update urls to retrieve glew and vulkan deb packages --- .travis.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 00b9e054aa..f214250f13 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,11 +41,11 @@ before_install: # Install updated libglew-dev since the version provided by trusty is outdated - if [ "$TRAVIS_OS_NAME" = "linux" ]; then - wget http://mirrors.kernel.org/ubuntu/pool/main/g/glew/libglew-dev_2.0.0-3_amd64.deb; - wget http://mirrors.kernel.org/ubuntu/pool/main/g/glew/libglew2.0_2.0.0-3_amd64.deb; - wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan1_1.0.61.1+dfsg1-1ubuntu1~16.04.1_amd64.deb; - wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan-dev_1.0.61.1+dfsg1-1ubuntu1~16.04.1_amd64.deb; - sudo dpkg -i libglew2.0_2.0.0-3_amd64.deb libglew-dev_2.0.0-3_amd64.deb libvulkan1_1.0.61.1+dfsg1-1ubuntu1~16.04.1_amd64.deb libvulkan-dev_1.0.61.1+dfsg1-1ubuntu1~16.04.1_amd64.deb; + wget https://mirrors.kernel.org/ubuntu/pool/universe/g/glew/libglew-dev_2.0.0-5_amd64.deb; + wget https://mirrors.kernel.org/ubuntu/pool/universe/g/glew/libglew2.0_2.0.0-5_amd64.deb; + wget https://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan1_1.0.61.1+dfsg1-1ubuntu1~16.04.1_amd64.deb; + wget https://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan-dev_1.0.61.1+dfsg1-1ubuntu1~16.04.1_amd64.deb; + sudo dpkg -i libglew2.0_2.0.0-5_amd64.deb libglew-dev_2.0.0-5_amd64.deb libvulkan1_1.0.61.1+dfsg1-1ubuntu1~16.04.1_amd64.deb libvulkan-dev_1.0.61.1+dfsg1-1ubuntu1~16.04.1_amd64.deb; else brew update; brew install ccache glew llvm40; From cbc8bf01a1b7cd87c1b67782237d558806d41773 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 21 Oct 2017 14:21:37 +0300 Subject: [PATCH 05/22] cell/scheduler: Manage thread placement depending on cpu hardware --- Utilities/Thread.cpp | 104 ++++++++++++++++++++++++++++-- Utilities/Thread.h | 31 ++++++++- rpcs3/Emu/Cell/MFC.cpp | 10 +++ rpcs3/Emu/Cell/MFC.h | 2 + rpcs3/Emu/Cell/PPUThread.cpp | 9 +++ rpcs3/Emu/Cell/PPUThread.h | 1 + rpcs3/Emu/Cell/SPUThread.cpp | 23 ++----- rpcs3/Emu/RSX/RSXThread.cpp | 6 +- rpcs3/Emu/System.h | 7 +- rpcs3/Json/tooltips.json | 2 +- rpcs3/rpcs3qt/emu_settings.h | 22 +++---- rpcs3/rpcs3qt/settings_dialog.cpp | 4 +- rpcs3/rpcs3qt/settings_dialog.ui | 4 +- 13 files changed, 185 insertions(+), 40 deletions(-) diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index fc1fa44f0b..cce866eecf 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -6,7 +6,9 @@ #include "Emu/Cell/lv2/sys_mmapper.h" #include "Emu/Cell/lv2/sys_event.h" #include "Thread.h" +#include "sysinfo.h" #include +#include #ifdef _WIN32 #include @@ -1547,6 +1549,8 @@ thread_local DECLARE(thread_ctrl::g_tls_this_thread) = nullptr; extern thread_local std::string(*g_tls_log_prefix)(); +DECLARE(thread_ctrl::g_native_core_layout) { native_core_arrangement::undefined }; + void thread_ctrl::start(const std::shared_ptr& ctrl, task_stack task) { #ifdef _WIN32 @@ -1853,6 +1857,89 @@ void thread_ctrl::test() } } +void thread_ctrl::detect_cpu_layout() +{ + if (!g_native_core_layout.compare_and_swap_test(native_core_arrangement::undefined, native_core_arrangement::generic)) + return; + + const auto system_id = utils::get_system_info(); + if (system_id.find("Ryzen") != std::string::npos) + { + g_native_core_layout.store(native_core_arrangement::amd_ccx); + } + else if (system_id.find("i3") != std::string::npos || system_id.find("i7") != std::string::npos) + { + g_native_core_layout.store(native_core_arrangement::intel_ht); + } +} + +u16 thread_ctrl::get_affinity_mask(thread_class group) +{ + detect_cpu_layout(); + + if (const auto thread_count = std::thread::hardware_concurrency()) + { + const u16 all_cores_mask = thread_count < 16 ? (u16)(~(UINT16_MAX << thread_count)): UINT16_MAX; + + switch (g_native_core_layout) + { + default: + case native_core_arrangement::generic: + { + return all_cores_mask; + } + case native_core_arrangement::amd_ccx: + { + u16 primary_ccx_unit_mask; + if (thread_count >= 16) + { + // Threadripper, R7 + // Assign threads 8-16 + // It appears some windows code is bound to lower core addresses, binding 8-16 is alot faster than 0-7 + primary_ccx_unit_mask = 0b1111111100000000; + } + else + { + // R5 & R3 don't seem to improve performance no matter how these are shuffled (including 1600) + primary_ccx_unit_mask = 0b11111111 & all_cores_mask; + } + + switch (group) + { + default: + case thread_class::general: + return all_cores_mask; + case thread_class::rsx: + case thread_class::ppu: + case thread_class::spu: + return primary_ccx_unit_mask; + } + } + case native_core_arrangement::intel_ht: + { + if (thread_count <= 4) + { + //i3 or worse + switch (group) + { + case thread_class::rsx: + case thread_class::ppu: + return (0b0101 & all_cores_mask); + case thread_class::spu: + return (0b1010 & all_cores_mask); + case thread_class::general: + return all_cores_mask; + } + } + + return all_cores_mask; + } + } + } + + return UINT16_MAX; +} + void thread_ctrl::set_native_priority(int priority) { #ifdef _WIN32 @@ -1886,24 +1973,31 @@ void thread_ctrl::set_native_priority(int priority) #endif } -void thread_ctrl::set_ideal_processor_core(int core) +void thread_ctrl::set_thread_affinity_mask(u16 mask) { #ifdef _WIN32 HANDLE _this_thread = GetCurrentThread(); - SetThreadIdealProcessor(_this_thread, core); + SetThreadAffinityMask(_this_thread, (DWORD_PTR)mask); #elif __APPLE__ - thread_affinity_policy_data_t policy = { static_cast(core) }; + thread_affinity_policy_data_t policy = { static_cast(mask) }; thread_port_t mach_thread = pthread_mach_thread_np(pthread_self()); thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1); #elif defined(__linux__) || defined(__DragonFly__) || defined(__FreeBSD__) cpu_set_t cs; CPU_ZERO(&cs); - CPU_SET(core, &cs); + + for (u32 core = 0; core < 16u; ++core) + { + if ((u32)mask & (1u << core)) + { + CPU_SET(core, &cs); + } + } + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cs); #endif } - named_thread::named_thread() { } diff --git a/Utilities/Thread.h b/Utilities/Thread.h index 031ac9eecc..11cb504253 100644 --- a/Utilities/Thread.h +++ b/Utilities/Thread.h @@ -16,6 +16,23 @@ // Will report exception and call std::abort() if put in catch(...) [[noreturn]] void catch_all_exceptions(); +// Hardware core layout +enum class native_core_arrangement : u32 +{ + undefined, + generic, + intel_ht, + amd_ccx +}; + +enum class thread_class : u32 +{ + general, + rsx, + spu, + ppu +}; + // Simple list of void() functors class task_stack { @@ -91,6 +108,9 @@ class thread_ctrl final // Current thread static thread_local thread_ctrl* g_tls_this_thread; + // Target cpu core layout + static atomic_t g_native_core_layout; + // Self pointer std::shared_ptr m_self; @@ -234,8 +254,17 @@ public: thread_ctrl::start(out, std::forward(func)); } + // Detect layout + static void detect_cpu_layout(); + + // Returns a core affinity mask. Set whether to generate the high priority set or not + static u16 get_affinity_mask(thread_class group); + + // Sets the native thread priority static void set_native_priority(int priority); - static void set_ideal_processor_core(int core); + + // Sets the preferred affinity mask for this thread + static void set_thread_affinity_mask(u16 mask); }; class named_thread diff --git a/rpcs3/Emu/Cell/MFC.cpp b/rpcs3/Emu/Cell/MFC.cpp index d6c0b01cbc..833a0ddae9 100644 --- a/rpcs3/Emu/Cell/MFC.cpp +++ b/rpcs3/Emu/Cell/MFC.cpp @@ -3,6 +3,7 @@ #include "Emu/Memory/vm.h" #include "Emu/Cell/SPUThread.h" #include "Emu/Cell/lv2/sys_sync.h" +#include "Emu/System.h" #include "MFC.h" const bool s_use_rtm = utils::has_rtm(); @@ -375,3 +376,12 @@ void mfc_thread::add_spu(spu_ptr _spu) run(); } + +void mfc_thread::on_spawn() +{ + if (g_cfg.core.thread_scheduler_enabled) + { + // Bind to same set with the SPUs + thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::spu)); + } +} diff --git a/rpcs3/Emu/Cell/MFC.h b/rpcs3/Emu/Cell/MFC.h index 5be7986845..1fa4b55f6c 100644 --- a/rpcs3/Emu/Cell/MFC.h +++ b/rpcs3/Emu/Cell/MFC.h @@ -113,4 +113,6 @@ public: virtual void cpu_task() override; virtual void add_spu(spu_ptr _spu); + + virtual void on_spawn() override; }; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 7f9a2e52e8..1fe24516b6 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -334,6 +334,15 @@ extern void ppu_breakpoint(u32 addr) } } +void ppu_thread::on_spawn() +{ + if (g_cfg.core.thread_scheduler_enabled) + { + // Bind to primary set + thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::ppu)); + } +} + void ppu_thread::on_init(const std::shared_ptr& _this) { if (!stack_addr) diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 3d5be3f8ab..f5db19fcea 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -30,6 +30,7 @@ public: static const u32 id_step = 1; static const u32 id_count = 2048; + virtual void on_spawn() override; virtual void on_init(const std::shared_ptr&) override; virtual std::string get_name() const override; virtual std::string dump() const override; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index c33b3a1442..483ddbd30f 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -134,8 +134,8 @@ namespace spu { if (timeout_ms > 0) { - const auto timeout = timeout_ms * 1000u; //convert to microseconds - const auto start = get_system_time(); + const u64 timeout = timeout_ms * 1000u; //convert to microseconds + const u64 start = get_system_time(); auto remaining = timeout; while (atomic_instruction_table[pc_offset].load(std::memory_order_consume) >= max_concurrent_instructions) @@ -143,7 +143,7 @@ namespace spu if (remaining >= native_jiffy_duration_us) std::this_thread::sleep_for(1ms); else - std::this_thread::yield(); + busy_wait(remaining); const auto now = get_system_time(); const auto elapsed = now - start; @@ -155,7 +155,8 @@ namespace spu else { //Slight pause if function is overburdened - thread_ctrl::wait_for(100); + const auto count = atomic_instruction_table[pc_offset].load(std::memory_order_consume) * 100ull; + busy_wait(count); } } @@ -278,25 +279,15 @@ spu_imm_table_t::spu_imm_table_t() void SPUThread::on_spawn() { - if (g_cfg.core.bind_spu_cores) + if (g_cfg.core.thread_scheduler_enabled) { - //Get next secondary core number - auto core_count = std::thread::hardware_concurrency(); - if (core_count > 0 && core_count <= 16) - { - auto half_count = core_count / 2; - auto assigned_secondary_core = ((g_num_spu_threads % half_count) * 2) + 1; - - thread_ctrl::set_ideal_processor_core((s32)assigned_secondary_core); - } + thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::spu)); } if (g_cfg.core.lower_spu_priority) { thread_ctrl::set_native_priority(-1); } - - g_num_spu_threads++; } void SPUThread::on_init(const std::shared_ptr& _this) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index ae86c25072..04c4de0fc8 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -398,7 +398,11 @@ namespace rsx // Raise priority above other threads thread_ctrl::set_native_priority(1); - thread_ctrl::set_ideal_processor_core(0); + + if (g_cfg.core.thread_scheduler_enabled) + { + thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx)); + } // Round to nearest to deal with forward/reverse scaling fesetround(FE_TONEAREST); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 2d2ad68f50..3044d98e61 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -286,8 +286,13 @@ struct cfg_root : cfg::node cfg::_bool llvm_logs{this, "Save LLVM logs"}; cfg::string llvm_cpu{this, "Use LLVM CPU"}; +#ifdef _WIN32 + cfg::_bool thread_scheduler_enabled{ this, "Enable thread scheduler", true }; +#else + cfg::_bool thread_scheduler_enabled{ this, "Enable thread scheduler", false }; +#endif + cfg::_enum spu_decoder{this, "SPU Decoder", spu_decoder_type::asmjit}; - cfg::_bool bind_spu_cores{this, "Bind SPU threads to secondary cores"}; cfg::_bool lower_spu_priority{this, "Lower SPU thread priority"}; cfg::_bool spu_debug{this, "SPU Debug"}; cfg::_int<0, 16384> max_spu_immediate_write_size{this, "Maximum immediate DMA write size", 16384}; // Maximum size that an SPU thread can write directly without posting to MFC diff --git a/rpcs3/Json/tooltips.json b/rpcs3/Json/tooltips.json index 62fa57d0ae..cca09a5f36 100644 --- a/rpcs3/Json/tooltips.json +++ b/rpcs3/Json/tooltips.json @@ -26,7 +26,7 @@ }, "checkboxes": { "hookStFunc": "Allows to hook some functions like 'memcpy' replacing them with high-level implementations. May do nothing or break things. Experimental.", - "bindSPUThreads": "If your CPU has SMT (Hyper-Threading) SPU threads will run on these logical cores instead.\nUsually faster on an i3, possibly slower or no difference on an i7 or Ryzen.", + "enableThreadScheduler": "Allows rpcs3 to manually schedule physical cores to run specific tasks on, instead of letting the OS handle it.\nVery useful on windows, especially for AMD Ryzen systems where it can give huge performance gains.", "lowerSPUThrPrio": "Runs SPU threads with lower priority than PPU threads.\nUsually faster on an i3 or i5, possibly slower or no difference on an i7 or Ryzen.", "spuLoopDetection": "Try to detect loop conditions in SPU kernels and use them as scheduling hints.\nImproves performance and reduces CPU usage.\nMay cause severe audio stuttering in rare cases." }, diff --git a/rpcs3/rpcs3qt/emu_settings.h b/rpcs3/rpcs3qt/emu_settings.h index d8970c6d8c..696dda4857 100644 --- a/rpcs3/rpcs3qt/emu_settings.h +++ b/rpcs3/rpcs3qt/emu_settings.h @@ -30,7 +30,7 @@ public: SPUDecoder, LibLoadOptions, HookStaticFuncs, - BindSPUThreads, + EnableThreadScheduler, LowerSPUThreadPrio, SPULoopDetection, PreferredSPUThreads, @@ -183,16 +183,16 @@ private: const QMap SettingsLoc = { // Core Tab - { PPUDecoder, { "Core", "PPU Decoder"}}, - { SPUDecoder, { "Core", "SPU Decoder"}}, - { LibLoadOptions, { "Core", "Lib Loader"}}, - { HookStaticFuncs, { "Core", "Hook static functions"}}, - { BindSPUThreads, { "Core", "Bind SPU threads to secondary cores"}}, - { LowerSPUThreadPrio, { "Core", "Lower SPU thread priority"}}, - { SPULoopDetection, { "Core", "SPU loop detection"}}, - { PreferredSPUThreads, { "Core", "Preferred SPU Threads"}}, - { PPUDebug, { "Core", "PPU Debug"}}, - { SPUDebug, { "Core", "SPU Debug"}}, + { PPUDecoder, { "Core", "PPU Decoder"}}, + { SPUDecoder, { "Core", "SPU Decoder"}}, + { LibLoadOptions, { "Core", "Lib Loader"}}, + { HookStaticFuncs, { "Core", "Hook static functions"}}, + { EnableThreadScheduler, { "Core", "Enable thread scheduler"}}, + { LowerSPUThreadPrio, { "Core", "Lower SPU thread priority"}}, + { SPULoopDetection, { "Core", "SPU loop detection"}}, + { PreferredSPUThreads, { "Core", "Preferred SPU Threads"}}, + { PPUDebug, { "Core", "PPU Debug"}}, + { SPUDebug, { "Core", "SPU Debug"}}, // Graphics Tab { Renderer, { "Video", "Renderer"}}, diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index be7b8e08ce..5ad5b7c072 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -129,8 +129,8 @@ settings_dialog::settings_dialog(std::shared_ptr guiSettings, std: xemu_settings->EnhanceCheckBox(ui->hookStFunc, emu_settings::HookStaticFuncs); SubscribeTooltip(ui->hookStFunc, json_cpu_cbs["hookStFunc"].toString()); - xemu_settings->EnhanceCheckBox(ui->bindSPUThreads, emu_settings::BindSPUThreads); - SubscribeTooltip(ui->bindSPUThreads, json_cpu_cbs["bindSPUThreads"].toString()); + xemu_settings->EnhanceCheckBox(ui->enableScheduler, emu_settings::EnableThreadScheduler); + SubscribeTooltip(ui->enableScheduler, json_cpu_cbs["enableThreadScheduler"].toString()); xemu_settings->EnhanceCheckBox(ui->lowerSPUThrPrio, emu_settings::LowerSPUThreadPrio); SubscribeTooltip(ui->lowerSPUThrPrio, json_cpu_cbs["lowerSPUThrPrio"].toString()); diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui index 8fc47c7017..63bdb78730 100644 --- a/rpcs3/rpcs3qt/settings_dialog.ui +++ b/rpcs3/rpcs3qt/settings_dialog.ui @@ -207,9 +207,9 @@ - + - Bind SPU threads to secondary cores + Enable thread scheduler From 9ec23371927a87813518896b11632202d43fd5df Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 15 Jan 2018 22:28:25 +0300 Subject: [PATCH 06/22] rsx: Synchronization improvements - Always flush the primary queue and wait if not involking readback from rsx thread -- Should fix some instances of device_lost when using WCB -- Marked remaining case as TODO -- TODO: optimize amount of time rsx waits for external threads trying to read --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 96 ++++++----------- rpcs3/Emu/RSX/VK/VKGSRender.h | 181 +++++++++++++++++++++----------- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 4 +- 3 files changed, 154 insertions(+), 127 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8293750951..e86014be0c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -811,7 +811,13 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) if (!is_rsxthr) { + //Always submit primary cb to ensure state consistency (flush pending changes such as image transitions) vm::temporary_unlock(); + + std::lock_guard lock(m_flush_queue_mutex); + + m_flush_requests.post(sync_timestamp == 0ull); + has_queue_ref = true; } else { @@ -821,67 +827,36 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) if (sync_timestamp > 0) { - //Wait for any cb submitted after the sync timestamp to finish - while (true) + //Wait for earliest cb submitted after the sync timestamp to finish + command_buffer_chunk *target_cb = nullptr; + for (auto &cb : m_primary_cb_list) { - u32 pending = 0; - - if (m_last_flushable_cb < 0) - break; - - for (auto &cb : m_primary_cb_list) + if (cb.pending && cb.last_sync >= sync_timestamp) { - if (!cb.pending && cb.last_sync >= sync_timestamp) - { - pending = 0; - break; - } - - if (cb.pending) - { - pending++; - - if (is_rsxthr) - cb.poke(); - } + if (target_cb == nullptr || target_cb->last_sync > cb.last_sync) + target_cb = &cb; } - - if (!pending) - break; - - std::this_thread::yield(); } + if (target_cb) + target_cb->wait(); + if (is_rsxthr) m_last_flushable_cb = -1; } - else + + if (has_queue_ref) { - if (!is_rsxthr) - { - { - std::lock_guard lock(m_flush_queue_mutex); - - m_flush_commands = true; - m_queued_threads++; - } - - //Wait for the RSX thread to process - while (m_flush_commands) - { - _mm_lfence(); - _mm_pause(); - } - - has_queue_ref = true; - } + //Wait for the RSX thread to process request if it hasn't already + m_flush_requests.producer_wait(); } m_texture_cache.flush_all(result, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); if (has_queue_ref) { - m_queued_threads--; + //Release RSX thread + m_flush_requests.remove_one(); } } @@ -1855,7 +1830,7 @@ void VKGSRender::flush_command_queue(bool hard_sync) } m_last_flushable_cb = -1; - m_flush_commands = false; + m_flush_requests.clear_pending_flag(); } else { @@ -2037,15 +2012,7 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) void VKGSRender::do_local_task(bool idle) { - //TODO: Guard this - if (m_overlay_cleanup_requests.size()) - { - flush_command_queue(true); - m_ui_renderer->remove_temp_resources(); - m_overlay_cleanup_requests.clear(); - } - - if (m_flush_commands) + if (m_flush_requests.pending()) { std::lock_guard lock(m_flush_queue_mutex); @@ -2053,12 +2020,8 @@ void VKGSRender::do_local_task(bool idle) //Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline flush_command_queue(); - m_flush_commands = false; - while (m_queued_threads) - { - _mm_lfence(); - _mm_pause(); - } + m_flush_requests.clear_pending_flag(); + m_flush_requests.consumer_wait(); } if (m_last_flushable_cb > -1) @@ -2151,7 +2114,14 @@ void VKGSRender::do_local_task(bool idle) #endif - if (m_custom_ui) + //TODO: Guard this + if (m_overlay_cleanup_requests.size()) + { + flush_command_queue(true); + m_ui_renderer->remove_temp_resources(); + m_overlay_cleanup_requests.clear(); + } + else if (m_custom_ui) { if (!in_begin_end && native_ui_flip_request.load()) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 976e02306a..4ecb9daa2c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -43,6 +43,7 @@ struct command_buffer_chunk: public vk::command_buffer std::atomic_bool pending = { false }; std::atomic last_sync = { 0 }; + std::mutex guard_mutex; command_buffer_chunk() {} @@ -84,8 +85,13 @@ struct command_buffer_chunk: public vk::command_buffer { if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS) { - vkResetFences(m_device, 1, &submit_fence); - pending = false; + std::lock_guard lock(guard_mutex); + + if (pending) + { + vkResetFences(m_device, 1, &submit_fence); + pending = false; + } } return !pending; @@ -93,6 +99,8 @@ struct command_buffer_chunk: public vk::command_buffer void wait() { + std::lock_guard lock(guard_mutex); + if (!pending) return; @@ -116,6 +124,114 @@ struct occlusion_data command_buffer_chunk* command_buffer_to_wait = nullptr; }; +struct frame_context_t +{ + VkSemaphore present_semaphore = VK_NULL_HANDLE; + VkDescriptorSet descriptor_set = VK_NULL_HANDLE; + vk::descriptor_pool descriptor_pool; + u32 used_descriptors = 0; + + std::vector> buffer_views_to_clean; + std::vector> samplers_to_clean; + + u32 present_image = UINT32_MAX; + command_buffer_chunk* swap_command_buffer = nullptr; + + //Heap pointers + s64 attrib_heap_ptr = 0; + s64 ubo_heap_ptr = 0; + s64 index_heap_ptr = 0; + s64 texture_upload_heap_ptr = 0; + + u64 last_frame_sync_time = 0; + + //Copy shareable information + void grab_resources(frame_context_t &other) + { + present_semaphore = other.present_semaphore; + descriptor_set = other.descriptor_set; + descriptor_pool = other.descriptor_pool; + used_descriptors = other.used_descriptors; + + attrib_heap_ptr = other.attrib_heap_ptr; + ubo_heap_ptr = other.attrib_heap_ptr; + index_heap_ptr = other.attrib_heap_ptr; + texture_upload_heap_ptr = other.texture_upload_heap_ptr; + } + + //Exchange storage (non-copyable) + void swap_storage(frame_context_t &other) + { + std::swap(buffer_views_to_clean, other.buffer_views_to_clean); + std::swap(samplers_to_clean, other.samplers_to_clean); + } + + void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc) + { + attrib_heap_ptr = attrib_loc; + ubo_heap_ptr = ubo_loc; + index_heap_ptr = index_loc; + texture_upload_heap_ptr = texture_loc; + + last_frame_sync_time = get_system_time(); + } + + void reset_heap_ptrs() + { + last_frame_sync_time = 0; + } +}; + +struct flush_request_task +{ + atomic_t pending_state{ false }; //Flush request status; true if rsx::thread is yet to service this request + atomic_t num_waiters{ 0 }; //Number of threads waiting for this request to be serviced + bool hard_sync = false; + + flush_request_task(){} + + void post(bool _hard_sync) + { + hard_sync = (hard_sync || _hard_sync); + pending_state = true; + num_waiters++; + } + + void remove_one() + { + num_waiters--; + } + + void clear_pending_flag() + { + hard_sync = false; + pending_state.store(false); + } + + bool pending() const + { + return pending_state.load(); + } + + void consumer_wait() const + { + while (num_waiters.load() != 0) + { + _mm_lfence(); + _mm_pause(); + } + } + + void producer_wait() const + { + while (pending_state.load()) + { + _mm_lfence(); + _mm_pause(); + } + } +}; + class VKGSRender : public GSRender { private: @@ -191,64 +307,6 @@ private: vk::vk_data_heap m_index_buffer_ring_info; vk::vk_data_heap m_texture_upload_buffer_ring_info; - struct frame_context_t - { - VkSemaphore present_semaphore = VK_NULL_HANDLE; - VkDescriptorSet descriptor_set = VK_NULL_HANDLE; - vk::descriptor_pool descriptor_pool; - u32 used_descriptors = 0; - - std::vector> buffer_views_to_clean; - std::vector> samplers_to_clean; - - u32 present_image = UINT32_MAX; - command_buffer_chunk* swap_command_buffer = nullptr; - - //Heap pointers - s64 attrib_heap_ptr = 0; - s64 ubo_heap_ptr = 0; - s64 index_heap_ptr = 0; - s64 texture_upload_heap_ptr = 0; - - u64 last_frame_sync_time = 0; - - //Copy shareable information - void grab_resources(frame_context_t &other) - { - present_semaphore = other.present_semaphore; - descriptor_set = other.descriptor_set; - descriptor_pool = other.descriptor_pool; - used_descriptors = other.used_descriptors; - - attrib_heap_ptr = other.attrib_heap_ptr; - ubo_heap_ptr = other.attrib_heap_ptr; - index_heap_ptr = other.attrib_heap_ptr; - texture_upload_heap_ptr = other.texture_upload_heap_ptr; - } - - //Exchange storage (non-copyable) - void swap_storage(frame_context_t &other) - { - std::swap(buffer_views_to_clean, other.buffer_views_to_clean); - std::swap(samplers_to_clean, other.samplers_to_clean); - } - - void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc) - { - attrib_heap_ptr = attrib_loc; - ubo_heap_ptr = ubo_loc; - index_heap_ptr = index_loc; - texture_upload_heap_ptr = texture_loc; - - last_frame_sync_time = get_system_time(); - } - - void reset_heap_ptrs() - { - last_frame_sync_time = 0; - } - }; - std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage frame_context_t m_aux_frame_context; @@ -277,8 +335,7 @@ private: std::atomic m_last_flushable_cb = {-1 }; std::mutex m_flush_queue_mutex; - std::atomic m_flush_commands = { false }; - std::atomic m_queued_threads = { 0 }; + flush_request_task m_flush_requests; std::thread::id rsx_thread; std::atomic m_last_sync_event = { 0 }; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index da8e9a498d..0d8dad1ca2 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -13,8 +13,8 @@ namespace vk VkSampler g_null_sampler = nullptr; - bool g_cb_no_interrupt_flag = false; - bool g_drv_no_primitive_restart_flag = false; + atomic_t g_cb_no_interrupt_flag { false }; + atomic_t g_drv_no_primitive_restart_flag { false }; u64 g_num_processed_frames = 0; u64 g_num_total_frames = 0; From 1a6e53ec98fc14e93a865a09bfbce2f859af5f75 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 18 Jan 2018 13:14:55 +0300 Subject: [PATCH 07/22] overlay: Fixes - Add fallback fonts including attempting to find glyphs in dev_flash - Fix vulkan hang on startup if icons are not present --- rpcs3/Emu/RSX/VK/VKOverlays.h | 2 +- rpcs3/Emu/RSX/overlay_controls.h | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index dec6017f7f..2d02d3ccea 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -532,7 +532,7 @@ namespace vk const VkImageSubresourceRange range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; auto tex = std::make_unique(dev, memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_IMAGE_TYPE_2D, format, w, h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TYPE_2D, format, std::max(w, 1), std::max(h, 1), 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0); diff --git a/rpcs3/Emu/RSX/overlay_controls.h b/rpcs3/Emu/RSX/overlay_controls.h index 67482c120a..0c533ae239 100644 --- a/rpcs3/Emu/RSX/overlay_controls.h +++ b/rpcs3/Emu/RSX/overlay_controls.h @@ -117,9 +117,10 @@ namespace rsx { //Init glyph std::vector bytes; + std::vector fallback_fonts; #ifdef _WIN32 std::string font_dir = "C:/Windows/Fonts/"; - std::string fallback_font = "C:/Windows/Fonts/Arial.ttf"; + fallback_fonts.push_back("C:/Windows/Fonts/Arial.ttf"); #else char *home = getenv("HOME"); if (home == nullptr) @@ -131,19 +132,31 @@ namespace rsx else font_dir += "/.fonts/"; - std::string fallback_font = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"; + fallback_fonts.push_back("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"); //ubuntu + fallback_fonts.push_back("/usr/share/fonts/TTF/DejaVuSans.ttf"); //arch #endif + //Also attempt to load from dev_flash as a last resort + fallback_fonts.push_back(fs::get_config_dir() + "dev_flash/data/font/SCE-PS3-VR-R-LATIN.TTF"); + std::string requested_file = font_dir + ttf_name + ".ttf"; std::string file_path = requested_file; if (!fs::is_file(requested_file)) { - if (fs::is_file(fallback_font)) + bool font_found = false; + for (auto &fallback_font : fallback_fonts) { - //TODO: Multiple fallbacks - file_path = fallback_font; + if (fs::is_file(fallback_font)) + { + file_path = fallback_font; + font_found = true; + + LOG_NOTICE(RSX, "Found font file '%s' as a replacement for '%s'", fallback_font.c_str(), ttf_name); + break; + } } - else + + if (!font_found) { LOG_ERROR(RSX, "Failed to initialize font '%s.ttf'", ttf_name); return; From 0a2992839bb6f402f8b6814cd114ff6a734a8140 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 18 Jan 2018 15:06:28 +0300 Subject: [PATCH 08/22] rsx/gl/vk: Simulate z clipping with selective depth clamp - The scale offset matrix is fine but on real hardware the z results seem to be independent of near/far clipping distances -- If depth falls within near/far, clamp depth value to [0,1] --- rpcs3/Emu/RSX/Common/GLSLCommon.h | 14 ++++++++++++++ rpcs3/Emu/RSX/GL/GLGSRender.cpp | 6 ++++-- rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 3 +++ rpcs3/Emu/RSX/RSXThread.cpp | 10 ++-------- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 6 ++++-- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 3 +++ 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index e3232404ca..82e4136df4 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -341,6 +341,20 @@ namespace glsl OS << " return result;\n"; OS << "}\n\n"; + OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n"; + OS << "{\n"; + OS << " float d = pos.z / pos.w;\n"; + OS << " if (d < 0.f && d >= near_plane)\n"; + OS << " d = 0.f;\n"; + OS << " else if (d > 1.f && d <= far_plane)\n"; + OS << " d = 1.f;\n"; + OS << " else\n"; + OS << " return pos;\n"; + OS << "\n"; + OS << " pos.z = d * pos.w;\n"; + OS << " return pos;\n"; + OS << "}\n\n"; + if (domain == glsl::program_domain::glsl_vertex_program) return; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index be3fe8b014..f046194e40 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1057,7 +1057,9 @@ void GLGSRender::load_program(u32 vertex_base, u32 vertex_count) *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); *(reinterpret_cast(buf + 132)) = vertex_base; *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); - fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast(buf + 144)); + *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); + *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); + fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast(buf + 160)); if (m_transform_constants_dirty) { @@ -1107,7 +1109,7 @@ void GLGSRender::update_draw_state() gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); gl_state.stencil_mask(rsx::method_registers.stencil_mask()); - gl_state.enable(rsx::method_registers.depth_clamp_enabled(), GL_DEPTH_CLAMP); + gl_state.enable(rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled(), GL_DEPTH_CLAMP); if (gl_state.enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST)) { diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index 67f5ab6cd2..a1acc4c8f7 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -39,6 +39,8 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << " uint transform_branch_bits;\n"; OS << " uint vertex_base_index;\n"; OS << " float point_size;\n"; + OS << " float z_near;\n"; + OS << " float z_far;\n"; OS << " ivec4 input_attributes[16];\n"; OS << "};\n\n"; } @@ -297,6 +299,7 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS) OS << " gl_PointSize = point_size;\n"; OS << " gl_Position = gl_Position * scale_offset_mat;\n"; + OS << " gl_Position = apply_zclip_xform(gl_Position, z_near, z_far);\n"; //Since our clip_space is symetrical [-1, 1] we map it to linear space using the eqn: //ln = (clip * 2) - 1 to fully utilize the 0-1 range of the depth buffer diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 04c4de0fc8..7f71b5ff36 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -799,14 +799,8 @@ namespace rsx if (flip_y) scale_y *= -1; if (flip_y) offset_y *= -1; - float clip_min = rsx::method_registers.clip_min(); - float clip_max = rsx::method_registers.clip_max(); - - float z_clip_scale = (clip_max + clip_min) == 0.f ? 1.f : (clip_max + clip_min); - float z_offset_scale = (clip_max - clip_min) == 0.f ? 1.f : (clip_max - clip_min); - - float scale_z = rsx::method_registers.viewport_scale_z() / z_clip_scale; - float offset_z = rsx::method_registers.viewport_offset_z() / z_offset_scale; + float scale_z = rsx::method_registers.viewport_scale_z(); + float offset_z = rsx::method_registers.viewport_offset_z(); float one = 1.f; stream_vector(buffer, (u32&)scale_x, 0, 0, (u32&)offset_x); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index e86014be0c..58659b4006 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2277,7 +2277,7 @@ void VKGSRender::load_program(u32 vertex_count, u32 vertex_base) properties.rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; properties.rs.polygonMode = VK_POLYGON_MODE_FILL; - properties.rs.depthClampEnable = rsx::method_registers.depth_clamp_enabled(); + properties.rs.depthClampEnable = rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled(); properties.rs.rasterizerDiscardEnable = VK_FALSE; //Disabled by setting factors to 0 as needed @@ -2338,7 +2338,9 @@ void VKGSRender::load_program(u32 vertex_count, u32 vertex_base) *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); *(reinterpret_cast(buf + 132)) = vertex_base; *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); - fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast(buf + 144)); + *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); + *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); + fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast(buf + 160)); //Vertex constants buf = buf + 512; diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index bfc357297b..bfa54106bb 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -37,6 +37,8 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << " uint transform_branch_bits;\n"; OS << " uint vertex_base_index;\n"; OS << " float point_size;\n"; + OS << " float z_near;\n"; + OS << " float z_far;\n"; OS << " ivec4 input_attributes[16];\n"; OS << "};\n"; @@ -312,6 +314,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) OS << " gl_PointSize = point_size;\n"; OS << " gl_Position = gl_Position * scale_offset_mat;\n"; + OS << " gl_Position = apply_zclip_xform(gl_Position, z_near, z_far);\n"; OS << "}\n"; } From 6828fbf6580fcd97dc1f00a0acd2489c573e61ae Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 18 Jan 2018 16:59:55 +0300 Subject: [PATCH 09/22] rsx/texture_cache: Remove hacks; it has been proven that in offsets are in x16 fixed point --- rpcs3/Emu/RSX/Common/texture_cache.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 2f6ba30865..50af451884 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1525,8 +1525,6 @@ namespace rsx const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0)); const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0)); - u32 framebuffer_src_address = src_address; - float scale_x = dst.scale_x; float scale_y = dst.scale_y; @@ -1535,17 +1533,6 @@ namespace rsx const u16 src_w = (const u16)((f32)dst.clip_width / scale_x); const u16 src_h = (const u16)((f32)dst.clip_height / scale_y); - //Correct for tile compression - //TODO: Investigate whether DST compression also affects alignment - if (src.compressed_x || src.compressed_y) - { - const u32 x_bytes = src_is_argb8 ? (src.offset_x * 4) : (src.offset_x * 2); - const u32 y_bytes = src.pitch * src.offset_y; - - if (src.offset_x <= 16 && src.offset_y <= 16) - framebuffer_src_address -= (x_bytes + y_bytes); - } - //Check if src/dst are parts of render targets auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, false, false); dst_is_render_target = dst_subres.surface != nullptr; @@ -1559,7 +1546,7 @@ namespace rsx } //TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate - auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, false, false); + auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src_w, src_h, src.pitch, true, false, false); src_is_render_target = src_subres.surface != nullptr; if (src_is_render_target && src_subres.surface->get_native_pitch() != src.pitch) From 49e64b9e82c4926c1d1d48ea7cc4ff11523de5cc Mon Sep 17 00:00:00 2001 From: Zion Nimchuk Date: Wed, 17 Jan 2018 22:08:51 -0800 Subject: [PATCH 10/22] install icons to /usr/share and load them from there --- rpcs3/CMakeLists.txt | 5 +++++ rpcs3/Emu/RSX/overlay_controls.h | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index bd6ae58a52..3e0b8d0ef2 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -468,4 +468,9 @@ if(UNIX AND NOT APPLE) DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/48x48/apps) install(FILES rpcs3.desktop DESTINATION ${CMAKE_INSTALL_PREFIX}/share/applications) + # Install other files + install(DIRECTORY ../bin/Icons + DESTINATION ${CMAKE_INSTALL_PREFIX}/share/rpcs3) + install(DIRECTORY ../bin/GuiConfigs + DESTINATION ${CMAKE_INSTALL_PREFIX}/share/rpcs3) endif() diff --git a/rpcs3/Emu/RSX/overlay_controls.h b/rpcs3/Emu/RSX/overlay_controls.h index 0c533ae239..d543a02fa5 100644 --- a/rpcs3/Emu/RSX/overlay_controls.h +++ b/rpcs3/Emu/RSX/overlay_controls.h @@ -10,6 +10,7 @@ #include #include #include +#include #endif // STB_IMAGE_IMPLEMENTATION and STB_TRUETYPE_IMPLEMENTATION defined externally @@ -470,7 +471,20 @@ namespace rsx { //Resource was not found in config dir, try and grab from relative path (linux) info = std::make_unique(("Icons/ui/" + res).c_str()); +#ifndef _WIN32 + if (info->data == nullptr) + { + char result[ PATH_MAX ]; +#ifdef __linux__ + ssize_t count = readlink( "/proc/self/exe", result, PATH_MAX ); +#else + ssize_t count = readlink( "/proc/curproc/file", result, PATH_MAX ); +#endif + std::string executablePath = dirname(result); + info = std::make_unique((executablePath + "/../share/rpcs3/Icons/ui/" + res).c_str()); + } +#endif if (info->data != nullptr) { //Install the image to config dir @@ -1308,7 +1322,7 @@ namespace rsx u16 m_elements_height = 0; s16 m_selected_entry = -1; u16 m_elements_count = 0; - + public: list_view(u16 width, u16 height) { From f908daf323176789e089c777168835c1eb9258ca Mon Sep 17 00:00:00 2001 From: elad Date: Fri, 19 Jan 2018 17:17:29 +0200 Subject: [PATCH 11/22] SPU/MFC: check for sync command before doing a list transfer fixes #3828 --- rpcs3/Emu/Cell/MFC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/MFC.cpp b/rpcs3/Emu/Cell/MFC.cpp index 833a0ddae9..bd1c601349 100644 --- a/rpcs3/Emu/Cell/MFC.cpp +++ b/rpcs3/Emu/Cell/MFC.cpp @@ -187,7 +187,7 @@ void mfc_thread::cpu_task() vm::notify(cmd.eal, 128); } } - else if (cmd.cmd & MFC_LIST_MASK) + else if (cmd.cmd & MFC_LIST_MASK && LIKELY(cmd.cmd != MFC_SYNC_CMD)) { struct list_element { From 743928b3794a53668ccf5ac2a347ffd027af22e0 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 19 Jan 2018 12:19:59 +0300 Subject: [PATCH 12/22] vk/gl: Preserve clamped z precision to some extent - Use edges of depth range to map clamped stuff Disable range compression on regular draws vs extended range draws - Some applications require full 0-1 usage without compromises. -- TODO: This leaves the extended range z values to fight with regular draws in the .99 - 1.0 range --- rpcs3/Emu/RSX/Common/GLSLCommon.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index 82e4136df4..58464933ad 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -345,11 +345,11 @@ namespace glsl OS << "{\n"; OS << " float d = pos.z / pos.w;\n"; OS << " if (d < 0.f && d >= near_plane)\n"; - OS << " d = 0.f;\n"; + OS << " d = 0.f;\n"; //force clamp negative values OS << " else if (d > 1.f && d <= far_plane)\n"; - OS << " d = 1.f;\n"; + OS << " d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n"; OS << " else\n"; - OS << " return pos;\n"; + OS << " return pos; //d = (0.99 * d);\n"; //range compression for normal values is disabled until a solution to ops comparing z is found OS << "\n"; OS << " pos.z = d * pos.w;\n"; OS << " return pos;\n"; From fcd702c8a624491f0b5ad35d73e7e6969b90b5a2 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 20 Jan 2018 14:37:46 +0300 Subject: [PATCH 13/22] rsx: Texture format fixes - Implement low bit decode override flags for 2-component textures - Properly implement alot of texture remaps according to the autotest results rsx: Do not unnecessarily shuffle WZYX->RGBA unless we have proof - From looking at format swizzles, this is incorrect --- rpcs3/Emu/RSX/GL/GLTexture.cpp | 36 ++++++++++++++++++++----------- rpcs3/Emu/RSX/GL/GLTextureCache.h | 10 --------- rpcs3/Emu/RSX/RSXTexture.cpp | 18 +++++++++++++++- rpcs3/Emu/RSX/VK/VKFormats.cpp | 12 ++++++++--- rpcs3/Emu/RSX/VK/VKTextureCache.h | 12 +---------- 5 files changed, 51 insertions(+), 37 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 8cd6c342cc..e75f193175 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -35,6 +35,10 @@ namespace gl case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return GL_RG8; + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return GL_RG8; + case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return GL_RG8; + case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return GL_RG8; } fmt::throw_exception("Unknown texture format 0x%x" HERE, texture_format); } @@ -49,11 +53,11 @@ namespace gl case CELL_GCM_TEXTURE_R5G6B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5); case CELL_GCM_TEXTURE_A8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8); case CELL_GCM_TEXTURE_G8B8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE); - case CELL_GCM_TEXTURE_R6G5B5: return std::make_tuple(GL_RGBA, GL_UNSIGNED_BYTE); + case CELL_GCM_TEXTURE_R6G5B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5); case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE); case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_FLOAT); case CELL_GCM_TEXTURE_DEPTH16: return std::make_tuple(GL_DEPTH_COMPONENT, GL_SHORT); - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_FLOAT); + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_HALF_FLOAT); case CELL_GCM_TEXTURE_X16: return std::make_tuple(GL_RED, GL_UNSIGNED_SHORT); case CELL_GCM_TEXTURE_Y16_X16: return std::make_tuple(GL_RG, GL_UNSIGNED_SHORT); case CELL_GCM_TEXTURE_R5G5B5A1: return std::make_tuple(GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1); @@ -66,6 +70,10 @@ namespace gl case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE); case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE); case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE); + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE); + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return std::make_tuple(GL_RG, GL_BYTE); + case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE); + case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE); } fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format); } @@ -226,6 +234,10 @@ namespace gl case CELL_GCM_TEXTURE_D1R5G5B5: case CELL_GCM_TEXTURE_D8R8G8B8: case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return false; case CELL_GCM_TEXTURE_COMPRESSED_DXT1: case CELL_GCM_TEXTURE_COMPRESSED_DXT23: @@ -252,25 +264,31 @@ namespace gl case CELL_GCM_TEXTURE_COMPRESSED_DXT1: case CELL_GCM_TEXTURE_COMPRESSED_DXT23: case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return{ GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE }; case CELL_GCM_TEXTURE_A4R4G4B4: return{ GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA }; case CELL_GCM_TEXTURE_B8: + return{ GL_ONE, GL_RED, GL_RED, GL_RED }; + case CELL_GCM_TEXTURE_X16: + return{ GL_RED, GL_ONE, GL_RED, GL_ONE }; + case CELL_GCM_TEXTURE_X32_FLOAT: return{ GL_RED, GL_RED, GL_RED, GL_RED }; case CELL_GCM_TEXTURE_G8B8: - return{ GL_GREEN, GL_RED, GL_GREEN, GL_RED }; + return{ GL_RED, GL_GREEN, GL_RED, GL_GREEN }; case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return{ GL_RED, GL_GREEN, GL_RED, GL_GREEN }; + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + return{ GL_GREEN, GL_RED, GL_GREEN, GL_RED }; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return{ GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN }; @@ -282,11 +300,6 @@ namespace gl case CELL_GCM_TEXTURE_COMPRESSED_HILO8: case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return{ GL_RED, GL_GREEN, GL_RED, GL_GREEN }; - - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - return{ GL_ZERO, GL_GREEN, GL_BLUE, GL_RED }; - } fmt::throw_exception("Unknown format 0x%x" HERE, texture_format); } @@ -535,7 +548,6 @@ namespace gl } //The rest of sampler state is now handled by sampler state objects - const auto format_type = get_format_type(gcm_format); const GLenum gl_format = std::get<0>(format_type); const GLenum gl_type = std::get<1>(format_type); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index e028a43239..75fb7056d8 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -461,16 +461,6 @@ namespace gl rsx::scale_image_nearest(dst, const_cast(data), width, height, rsx_pitch, real_pitch, pixel_size, samples_u, samples_v); } - switch (gcm_format) - { - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - rsx::shuffle_texel_data_wzyx(dst, rsx_pitch, width, height); - break; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - rsx::shuffle_texel_data_wzyx(dst, rsx_pitch, width, height); - break; - } - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); diff --git a/rpcs3/Emu/RSX/RSXTexture.cpp b/rpcs3/Emu/RSX/RSXTexture.cpp index d4f45de577..fea2121960 100644 --- a/rpcs3/Emu/RSX/RSXTexture.cpp +++ b/rpcs3/Emu/RSX/RSXTexture.cpp @@ -183,7 +183,23 @@ namespace rsx std::pair, std::array> fragment_texture::decoded_remap() const { - const u32 remap_ctl = registers[NV4097_SET_TEXTURE_CONTROL1 + (m_index * 8)]; + u32 remap_ctl = registers[NV4097_SET_TEXTURE_CONTROL1 + (m_index * 8)]; + u32 remap_override = (remap_ctl >> 16) & 0xFFFF; + + switch (format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN)) + { + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + //Low bit in remap control affects whether the G component should read from first or second component + //Components are usually interleaved R-G-R-G unless flag is set, then its R-R-R-G (Virtua Fighter 5) + remap_ctl = (remap_override) ?(0b01010110 | (remap_ctl & 0xFF00)): (0b01100110 | (remap_ctl & 0xFF00)); + break; + default: + break; + } //Remapping tables; format is A-R-G-B //Remap input table. Contains channel index to read color from diff --git a/rpcs3/Emu/RSX/VK/VKFormats.cpp b/rpcs3/Emu/RSX/VK/VKFormats.cpp index c99ec0d1c5..faf18545c4 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.cpp +++ b/rpcs3/Emu/RSX/VK/VKFormats.cpp @@ -138,17 +138,23 @@ std::array get_component_mapping(u32 format) mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; break; case CELL_GCM_TEXTURE_G8B8: - mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; case CELL_GCM_TEXTURE_B8: + mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + case CELL_GCM_TEXTURE_X16: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }; break; + case CELL_GCM_TEXTURE_X32_FLOAT: mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G }; break; @@ -161,7 +167,7 @@ std::array get_component_mapping(u32 format) case CELL_GCM_TEXTURE_COMPRESSED_HILO8: case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 0cddd96a4a..2917e0fffb 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -327,18 +327,8 @@ namespace vk } dma_buffer->unmap(); + //Its highly likely that this surface will be reused, so we just leave resources in place - - switch (gcm_format) - { - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - rsx::shuffle_texel_data_wzyx(pixels_dst, rsx_pitch, width, height); - break; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - rsx::shuffle_texel_data_wzyx(pixels_dst, rsx_pitch, width, height); - break; - } - return result; } From ab17b49e15a6f293b2f9113aea3806d04df0bc55 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 19 Jan 2018 14:32:15 +0300 Subject: [PATCH 14/22] scheduler stuff - more threads for rsx - better 1600 --- Utilities/Thread.cpp | 19 ++++++++++++++----- rpcs3/Emu/Cell/SPUThread.cpp | 4 +++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index cce866eecf..fceb85f6e9 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -1890,18 +1890,25 @@ u16 thread_ctrl::get_affinity_mask(thread_class group) } case native_core_arrangement::amd_ccx: { - u16 primary_ccx_unit_mask; + u16 spu_mask, ppu_mask, rsx_mask; if (thread_count >= 16) { // Threadripper, R7 // Assign threads 8-16 // It appears some windows code is bound to lower core addresses, binding 8-16 is alot faster than 0-7 - primary_ccx_unit_mask = 0b1111111100000000; + ppu_mask = spu_mask = 0b1111111100000000; + rsx_mask = all_cores_mask; + } + else if (thread_count == 12) + { + // 1600/2600 (x) + ppu_mask = spu_mask = 0b111111000000; + rsx_mask = all_cores_mask; } else { - // R5 & R3 don't seem to improve performance no matter how these are shuffled (including 1600) - primary_ccx_unit_mask = 0b11111111 & all_cores_mask; + // R5 & R3 don't seem to improve performance no matter how these are shuffled + ppu_mask = spu_mask = rsx_mask = 0b11111111 & all_cores_mask; } switch (group) @@ -1910,9 +1917,11 @@ u16 thread_ctrl::get_affinity_mask(thread_class group) case thread_class::general: return all_cores_mask; case thread_class::rsx: + return rsx_mask; case thread_class::ppu: + return ppu_mask; case thread_class::spu: - return primary_ccx_unit_mask; + return spu_mask; } } case native_core_arrangement::intel_ht: diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 483ddbd30f..eb98662899 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -142,8 +142,10 @@ namespace spu { if (remaining >= native_jiffy_duration_us) std::this_thread::sleep_for(1ms); + else if (remaining > 100) + std::this_thread::yield(); else - busy_wait(remaining); + busy_wait(); const auto now = get_system_time(); const auto elapsed = now - start; From 3d9e3a16f1d2aee7ee5437b911692109709e1887 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 21 Jan 2018 18:31:35 +0300 Subject: [PATCH 15/22] rsx/gl/vk: Fixes and optimizations - opengl driver optimization for nvidia. On nvidia glTextureBufferRange performance is horrendous -- Initialize texture buffer to whole buffer at startup and use absolute offsets to read data instead -- Over 2x performance in some cases (Resogun, TNT racers) - gl/vk: Do not flip non-existent display buffers. Fixes spec violation at boot in TNT racers demo - whitespace fixes for sys_rsx --- rpcs3/Emu/Cell/lv2/sys_rsx.h | 27 ++-- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 196 ++++++++++++++------------- rpcs3/Emu/RSX/GL/GLGSRender.h | 15 +- rpcs3/Emu/RSX/GL/GLHelpers.h | 14 +- rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp | 11 +- rpcs3/Emu/RSX/RSXThread.cpp | 17 ++- rpcs3/Emu/RSX/RSXThread.h | 6 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 26 ++-- 8 files changed, 177 insertions(+), 135 deletions(-) diff --git a/rpcs3/Emu/Cell/lv2/sys_rsx.h b/rpcs3/Emu/Cell/lv2/sys_rsx.h index 1883735ecf..0a867c3013 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsx.h +++ b/rpcs3/Emu/Cell/lv2/sys_rsx.h @@ -1,6 +1,7 @@ #pragma once -struct RsxDriverInfo { +struct RsxDriverInfo +{ be_t version_driver; // 0x0 be_t version_gpu; // 0x4 be_t memory_size; // 0x8 @@ -15,7 +16,9 @@ struct RsxDriverInfo { be_t unk3[6]; // 0x38-0x54 be_t systemModeFlags; // 0x54 u8 unk4[0x1064]; // 0x10B8 - struct Head { + + struct Head + { be_t lastFlipTime; // 0x0 last flip time be_t flipFlags; // 0x8 flags to handle flip/queue be_t unk1; // 0xC @@ -29,6 +32,7 @@ struct RsxDriverInfo { be_t unk; // 0x38 possible u32, 'flip field', top/bottom for interlaced be_t unk5; // 0x3C possible high bits of time stamp? used in getlastVBlankTime } head[8]; // size = 0x40, 0x200 + be_t unk7; // 0x12B8 be_t unk8; // 0x12BC be_t handlers; // 0x12C0 -- flags showing which handlers are set @@ -46,10 +50,12 @@ struct RsxDriverInfo { be_t lastError; // 0x12F4 error param for cellGcmSetGraphicsHandler // todo: theres more to this }; + static_assert(sizeof(RsxDriverInfo) == 0x12F8, "rsxSizeTest"); static_assert(sizeof(RsxDriverInfo::Head) == 0x40, "rsxHeadSizeTest"); -struct RsxDmaControl { +struct RsxDmaControl +{ u8 resv[0x40]; atomic_be_t put; atomic_be_t get; @@ -58,30 +64,35 @@ struct RsxDmaControl { be_t unk1; }; -struct RsxSemaphore { +struct RsxSemaphore +{ be_t val; be_t pad; be_t timestamp; }; -struct RsxNotify { +struct RsxNotify +{ be_t timestamp; be_t zero; }; -struct RsxReport { +struct RsxReport +{ be_t timestamp; be_t val; be_t pad; }; -struct RsxReports { +struct RsxReports +{ RsxSemaphore semaphore[0x100]; RsxNotify notify[64]; RsxReport report[2048]; }; -struct RsxDisplayInfo { +struct RsxDisplayInfo +{ be_t offset; be_t pitch; be_t width; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index f046194e40..36d7752880 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -218,11 +218,7 @@ void GLGSRender::end() } //Do vertex upload before RTT prep / texture lookups to give the driver time to push data - u32 vertex_draw_count; - u32 actual_vertex_count; - u32 vertex_base; - std::optional > indexed_draw_info; - std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer(); + auto upload_info = set_vertex_buffer(); //Load textures { @@ -294,7 +290,7 @@ void GLGSRender::end() std::chrono::time_point program_start = steady_clock::now(); //Load program here since it is dependent on vertex state - load_program(vertex_base, actual_vertex_count); + load_program(upload_info); std::chrono::time_point program_stop = steady_clock::now(); m_begin_time += (u32)std::chrono::duration_cast(program_stop - program_start).count(); @@ -492,10 +488,10 @@ void GLGSRender::end() const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive); bool single_draw = !supports_multidraw || (rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive); - if (indexed_draw_info) + if (upload_info.index_info) { - const GLenum index_type = std::get<0>(indexed_draw_info.value()); - const u32 index_offset = std::get<1>(indexed_draw_info.value()); + const GLenum index_type = std::get<0>(upload_info.index_info.value()); + const u32 index_offset = std::get<1>(upload_info.index_info.value()); const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive; if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) @@ -505,7 +501,7 @@ void GLGSRender::end() if (single_draw) { - glDrawElements(draw_mode, vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset); + glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset); } else { @@ -535,7 +531,7 @@ void GLGSRender::end() { if (single_draw) { - glDrawArrays(draw_mode, 0, vertex_draw_count); + glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count); } else { @@ -652,16 +648,25 @@ void GLGSRender::on_init_thread() //Use industry standard resource alignment values as defaults m_uniform_buffer_offset_align = 256; m_min_texbuffer_alignment = 256; + m_max_texbuffer_size = 0; glEnable(GL_VERTEX_PROGRAM_POINT_SIZE); glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_offset_align); glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment); + glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &m_max_texbuffer_size); m_vao.create(); //Set min alignment to 16-bytes for SSE optimizations with aligned addresses to work m_min_texbuffer_alignment = std::max(m_min_texbuffer_alignment, 16); m_uniform_buffer_offset_align = std::max(m_uniform_buffer_offset_align, 16); + LOG_NOTICE(RSX, "Supported texel buffer size reported: %d bytes", m_max_texbuffer_size); + if (m_max_texbuffer_size < (16 * 0x100000)) + { + LOG_ERROR(RSX, "Max texture buffer size supported is less than 16M which is useless. Expect undefined behaviour."); + m_max_texbuffer_size = (16 * 0x100000); + } + const u32 texture_index_offset = rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count; //Array stream buffer @@ -709,11 +714,14 @@ void GLGSRender::on_init_thread() m_index_ring_buffer.reset(new gl::ring_buffer()); } - m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000); - m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000); - m_transform_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); - m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); - m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); + m_attrib_ring_buffer->create(gl::buffer::target::texture, std::min(m_max_texbuffer_size, 256 * 0x100000)); + m_index_ring_buffer->create(gl::buffer::target::element_array, std::min(m_max_texbuffer_size, 64 * 0x100000)); + m_transform_constants_buffer->create(gl::buffer::target::uniform, std::min(m_max_texbuffer_size, 16 * 0x100000)); + m_fragment_constants_buffer->create(gl::buffer::target::uniform, std::min(m_max_texbuffer_size, 16 * 0x100000)); + m_vertex_state_buffer->create(gl::buffer::target::uniform, std::min(m_max_texbuffer_size, 16 * 0x100000)); + + m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size()); + m_gl_volatile_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size()); m_vao.element_array_buffer = *m_index_ring_buffer; @@ -999,7 +1007,7 @@ bool GLGSRender::check_program_state() return (rsx::method_registers.shader_program_address() != 0); } -void GLGSRender::load_program(u32 vertex_base, u32 vertex_count) +void GLGSRender::load_program(const vertex_upload_info& upload_info) { get_current_fragment_program(fs_sampler_state); verify(HERE), current_fragment_program.valid; @@ -1055,11 +1063,11 @@ void GLGSRender::load_program(u32 vertex_base, u32 vertex_count) fill_scale_offset_data(buf, false); fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 132)) = vertex_base; + *(reinterpret_cast(buf + 132)) = upload_info.vertex_index_base; *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); - fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast(buf + 160)); + fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, reinterpret_cast(buf + 160), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset); if (m_transform_constants_dirty) { @@ -1223,97 +1231,101 @@ void GLGSRender::flip(int buffer) return; } + gl::screen.clear(gl::buffers::color); + u32 buffer_width = display_buffers[buffer].width; u32 buffer_height = display_buffers[buffer].height; u32 buffer_pitch = display_buffers[buffer].pitch; - // Calculate blit coordinates - coordi aspect_ratio; - sizei csize(m_frame->client_width(), m_frame->client_height()); - sizei new_size = csize; - - if (!g_cfg.video.stretch_to_display_area) + if (buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch) { - const double aq = (double)buffer_width / buffer_height; - const double rq = (double)new_size.width / new_size.height; - const double q = aq / rq; + // Calculate blit coordinates + coordi aspect_ratio; + sizei csize(m_frame->client_width(), m_frame->client_height()); + sizei new_size = csize; - if (q > 1.0) + if (!g_cfg.video.stretch_to_display_area) { - new_size.height = int(new_size.height / q); - aspect_ratio.y = (csize.height - new_size.height) / 2; - } - else if (q < 1.0) - { - new_size.width = int(new_size.width * q); - aspect_ratio.x = (csize.width - new_size.width) / 2; - } - } + const double aq = (double)buffer_width / buffer_height; + const double rq = (double)new_size.width / new_size.height; + const double q = aq / rq; - aspect_ratio.size = new_size; - - // Find the source image - rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); - u32 absolute_address = buffer_region.address + buffer_region.base; - - m_flip_fbo.recreate(); - m_flip_fbo.bind(); - - const u32 size = buffer_pitch * buffer_height; - if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) - { - buffer_width = render_target_texture->width(); - buffer_height = render_target_texture->height(); - - m_flip_fbo.color = *render_target_texture; - m_flip_fbo.read_buffer(m_flip_fbo.color); - } - else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address)) - { - //Hack - this should be the first location to check for output - //The render might have been done offscreen or in software and a blit used to display - m_flip_fbo.color = surface->get_raw_view(); - m_flip_fbo.read_buffer(m_flip_fbo.color); - } - else - { - LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU"); - - if (!m_flip_tex_color || m_flip_tex_color.size() != sizei{ (int)buffer_width, (int)buffer_height }) - { - m_flip_tex_color.recreate(gl::texture::target::texture2D); - - m_flip_tex_color.config() - .size({ (int)buffer_width, (int)buffer_height }) - .type(gl::texture::type::uint_8_8_8_8) - .format(gl::texture::format::bgra); - - m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4); + if (q > 1.0) + { + new_size.height = int(new_size.height / q); + aspect_ratio.y = (csize.height - new_size.height) / 2; + } + else if (q < 1.0) + { + new_size.width = int(new_size.width * q); + aspect_ratio.x = (csize.width - new_size.width) / 2; + } } - if (buffer_region.tile) + aspect_ratio.size = new_size; + + // Find the source image + rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); + u32 absolute_address = buffer_region.address + buffer_region.base; + + m_flip_fbo.recreate(); + m_flip_fbo.bind(); + + const u32 size = buffer_pitch * buffer_height; + if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) { - std::unique_ptr temp(new u8[buffer_height * buffer_pitch]); - buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch); - m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8); + buffer_width = render_target_texture->width(); + buffer_height = render_target_texture->height(); + + m_flip_fbo.color = *render_target_texture; + m_flip_fbo.read_buffer(m_flip_fbo.color); + } + else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address)) + { + //Hack - this should be the first location to check for output + //The render might have been done offscreen or in software and a blit used to display + m_flip_fbo.color = surface->get_raw_view(); + m_flip_fbo.read_buffer(m_flip_fbo.color); } else { - m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8); + LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU"); + + if (!m_flip_tex_color || m_flip_tex_color.size() != sizei{ (int)buffer_width, (int)buffer_height }) + { + m_flip_tex_color.recreate(gl::texture::target::texture2D); + + m_flip_tex_color.config() + .size({ (int)buffer_width, (int)buffer_height }) + .type(gl::texture::type::uint_8_8_8_8) + .format(gl::texture::format::bgra); + + m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4); + } + + if (buffer_region.tile) + { + std::unique_ptr temp(new u8[buffer_height * buffer_pitch]); + buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch); + m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8); + } + else + { + m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8); + } + + m_flip_fbo.color = m_flip_tex_color; + m_flip_fbo.read_buffer(m_flip_fbo.color); } - m_flip_fbo.color = m_flip_tex_color; - m_flip_fbo.read_buffer(m_flip_fbo.color); + // Blit source image to the screen + // Disable scissor test (affects blit) + glDisable(GL_SCISSOR_TEST); + + areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); + m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear); } - // Blit source image to the screen - // Disable scissor test (affects blit) - glDisable(GL_SCISSOR_TEST); - - areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); - gl::screen.clear(gl::buffers::color); - m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear); - if (m_custom_ui) { gl::screen.bind(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index f99313fae7..4cdf9ddfa4 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -255,6 +255,16 @@ struct driver_state } }; +struct vertex_upload_info +{ + u32 vertex_draw_count; + u32 allocated_vertex_count; + u32 vertex_index_base; + u32 persistent_mapping_offset; + u32 volatile_mapping_offset; + std::optional > index_info; +}; + class GLGSRender : public GSRender { private: @@ -289,6 +299,7 @@ private: GLint m_min_texbuffer_alignment = 256; GLint m_uniform_buffer_offset_align = 256; + GLint m_max_texbuffer_size = 65536; bool manually_flush_ring_buffers = false; @@ -326,14 +337,14 @@ private: driver_state gl_state; // Return element to draw and in case of indexed draw index type and offset in index buffer - std::tuple > > set_vertex_buffer(); + vertex_upload_info set_vertex_buffer(); rsx::vertex_input_layout m_vertex_layout = {}; void clear_surface(u32 arg); void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false); bool check_program_state(); - void load_program(u32 vertex_base, u32 vertex_count); + void load_program(const vertex_upload_info& upload_info); void update_draw_state(); diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 4884a76df1..a15fdae4b6 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -259,6 +259,7 @@ namespace gl glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status); return (status == GL_SIGNALED); } + return true; } bool wait_for_signal() @@ -831,7 +832,6 @@ namespace gl protected: u32 m_data_loc = 0; - u32 m_limit = 0; void *m_memory_mapping = nullptr; fence m_fence; @@ -854,7 +854,7 @@ namespace gl verify(HERE), m_memory_mapping != nullptr; m_data_loc = 0; - m_limit = ::narrow(size); + m_size = ::narrow(size); } void create(target target_, GLsizeiptr size, const void* data_ = nullptr) @@ -868,7 +868,7 @@ namespace gl u32 offset = m_data_loc; if (m_data_loc) offset = align(offset, alignment); - if ((offset + alloc_size) > m_limit) + if ((offset + alloc_size) > m_size) { if (!m_fence.is_empty()) m_fence.wait_for_signal(); @@ -894,7 +894,7 @@ namespace gl m_memory_mapping = nullptr; m_data_loc = 0; - m_limit = 0; + m_size = 0; } glDeleteBuffers(1, &m_id); @@ -936,7 +936,7 @@ namespace gl m_memory_mapping = nullptr; m_data_loc = 0; - m_limit = ::narrow(size); + m_size = ::narrow(size); } void create(target target_, GLsizeiptr size, const void* data_ = nullptr) @@ -954,9 +954,9 @@ namespace gl const u32 block_size = align(alloc_size + 16, 256); //Overallocate just in case we need to realign base - if ((offset + block_size) > m_limit) + if ((offset + block_size) > m_size) { - buffer::data(m_limit, nullptr); + buffer::data(m_size, nullptr); m_data_loc = 0; } diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index e930bd85e4..3011493286 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -180,7 +180,7 @@ namespace }; } -std::tuple>> GLGSRender::set_vertex_buffer() +vertex_upload_info GLGSRender::set_vertex_buffer() { std::chrono::time_point then = steady_clock::now(); @@ -196,6 +196,7 @@ std::tuple>> GLGSRender::se auto required = calculate_memory_requirements(m_vertex_layout, vertex_count); std::pair persistent_mapping = {}, volatile_mapping = {}; + vertex_upload_info upload_info = { result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, 0u, 0u, result.index_info }; if (required.first > 0) { @@ -213,7 +214,7 @@ std::tuple>> GLGSRender::se if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first)) { in_cache = true; - m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, cached->offset_in_heap, required.first); + upload_info.persistent_mapping_offset = cached->offset_in_heap; } else { @@ -224,7 +225,7 @@ std::tuple>> GLGSRender::se if (!in_cache) { persistent_mapping = m_attrib_ring_buffer->alloc_from_heap(required.first, m_min_texbuffer_alignment); - m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, persistent_mapping.second, required.first); + upload_info.persistent_mapping_offset = persistent_mapping.second; if (to_store) { @@ -237,7 +238,7 @@ std::tuple>> GLGSRender::se if (required.second > 0) { volatile_mapping = m_attrib_ring_buffer->alloc_from_heap(required.second, m_min_texbuffer_alignment); - m_gl_volatile_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, volatile_mapping.second, required.second); + upload_info.volatile_mapping_offset = volatile_mapping.second; } //Write all the data @@ -245,7 +246,7 @@ std::tuple>> GLGSRender::se std::chrono::time_point now = steady_clock::now(); m_vertex_upload_time += std::chrono::duration_cast(now - then).count(); - return std::make_tuple(result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, result.index_info); + return upload_info; } namespace diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 7f71b5ff36..eced03add3 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1616,6 +1616,8 @@ namespace rsx local_mem_addr = localAddress; flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE; + memset(display_buffers, 0, sizeof(display_buffers)); + m_used_gcm_commands.clear(); on_init_rsx(); @@ -1676,7 +1678,7 @@ namespace rsx } } - std::pair thread::calculate_memory_requirements(vertex_input_layout& layout, const u32 vertex_count) + std::pair thread::calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count) { u32 persistent_memory_size = 0; u32 volatile_memory_size = 0; @@ -1732,11 +1734,11 @@ namespace rsx return std::make_pair(persistent_memory_size, volatile_memory_size); } - void thread::fill_vertex_layout_state(vertex_input_layout& layout, const u32 vertex_count, s32* buffer) + void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base) { std::array offset_in_block = {}; - u32 volatile_offset = 0; - u32 persistent_offset = 0; + u32 volatile_offset = volatile_offset_base; + u32 persistent_offset = persistent_offset_base; //NOTE: Order is important! Transient ayout is always push_buffers followed by register data if (rsx::method_registers.current_draw_clause.is_immediate_draw) @@ -1757,12 +1759,13 @@ namespace rsx if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) { const auto &block = layout.interleaved_blocks[0]; + u32 inline_data_offset = volatile_offset_base; for (const u8 index : block.locations) { auto &info = rsx::method_registers.vertex_arrays_info[index]; - offset_in_block[index] = persistent_offset; //just because this var is 0 when we enter here; inlined is transient memory - persistent_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size()); + offset_in_block[index] = inline_data_offset; + inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size()); } } else @@ -1917,7 +1920,7 @@ namespace rsx } } - void thread::write_vertex_data_to_memory(vertex_input_layout &layout, const u32 first_vertex, const u32 vertex_count, void *persistent_data, void *volatile_data) + void thread::write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data) { char *transient = (char *)volatile_data; char *persistent = (char *)persistent_data; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index c836c469d8..164bc66400 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -396,18 +396,18 @@ namespace rsx * result.first contains persistent memory requirements * result.second contains volatile memory requirements */ - std::pair calculate_memory_requirements(vertex_input_layout& layout, const u32 vertex_count); + std::pair calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count); /** * Generates vertex input descriptors as an array of 16x4 s32s */ - void fill_vertex_layout_state(vertex_input_layout& layout, const u32 vertex_count, s32* buffer); + void fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0); /** * Uploads vertex data described in the layout descriptor * Copies from local memory to the write-only output buffers provided in a sequential manner */ - void write_vertex_data_to_memory(vertex_input_layout &layout, const u32 first_vertex, const u32 vertex_count, void *persistent_data, void *volatile_data); + void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data); private: std::mutex m_mtx_task; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 58659b4006..aed4274845 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2891,6 +2891,7 @@ void VKGSRender::flip(int buffer) u32 buffer_width = display_buffers[buffer].width; u32 buffer_height = display_buffers[buffer].height; + u32 buffer_pitch = display_buffers[buffer].pitch; coordi aspect_ratio; @@ -2966,18 +2967,21 @@ void VKGSRender::flip(int buffer) //Blit contents to screen.. vk::image* image_to_flip = nullptr; - rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); - u32 absolute_address = buffer_region.address + buffer_region.base; + if (buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch) + { + rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); + u32 absolute_address = buffer_region.address + buffer_region.base; - if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) - { - image_to_flip = render_target_texture; - } - else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address)) - { - //Hack - this should be the first location to check for output - //The render might have been done offscreen or in software and a blit used to display - image_to_flip = surface->get_raw_texture(); + if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) + { + image_to_flip = render_target_texture; + } + else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address)) + { + //Hack - this should be the first location to check for output + //The render might have been done offscreen or in software and a blit used to display + image_to_flip = surface->get_raw_texture(); + } } VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_frame->present_image); From 4f01794713b09bddc51998edd060caf4f1a5e487 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 21 Jan 2018 23:55:01 +0300 Subject: [PATCH 16/22] Minor fixes - vulkan: Do not assume an aux frame context must exist in a well defined state as set in init_buffers() since the request might be external (via overlays path) - gl: Do not bother waiting for idle before servicing external flip requests - gl: Queue overlay cleanup requests to ensure only glthread attempts touching the context - overlays: Do not compute size metrics for invalid/unsupported glyphs --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 18 ++++++++++++------ rpcs3/Emu/RSX/GL/GLGSRender.h | 2 ++ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 10 +++++----- rpcs3/Emu/RSX/overlay_controls.h | 10 +++++++++- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 36d7752880..7d593570fc 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1237,7 +1237,7 @@ void GLGSRender::flip(int buffer) u32 buffer_height = display_buffers[buffer].height; u32 buffer_pitch = display_buffers[buffer].pitch; - if (buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch) + if ((u32)buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch) { // Calculate blit coordinates coordi aspect_ratio; @@ -1423,7 +1423,7 @@ void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) } } -void GLGSRender::do_local_task(bool idle) +void GLGSRender::do_local_task(bool /*idle*/) { m_frame->clear_wm_events(); @@ -1444,9 +1444,14 @@ void GLGSRender::do_local_task(bool idle) q.cv.notify_one(); } - if (m_custom_ui) + if (m_overlay_cleanup_requests.size()) { - if (!in_begin_end && idle && native_ui_flip_request.load()) + m_ui_renderer.remove_temp_resources(); + m_overlay_cleanup_requests.clear(); + } + else if (m_custom_ui) + { + if (!in_begin_end && native_ui_flip_request.load()) { native_ui_flip_request.store(false); flip((s32)current_display_buffer); @@ -1517,5 +1522,6 @@ void GLGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query) void GLGSRender::shell_do_cleanup() { - m_ui_renderer.remove_temp_resources(); -} \ No newline at end of file + //TODO: Key cleanup requests with UID to identify resources to remove + m_overlay_cleanup_requests.push_back(0); +} diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 4cdf9ddfa4..5ba2ae0005 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -307,6 +307,8 @@ private: gl::depth_convert_pass m_depth_converter; gl::ui_overlay_renderer m_ui_renderer; + std::vector m_overlay_cleanup_requests; + std::mutex queue_guard; std::list work_queue; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index aed4274845..ec144a6823 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2010,7 +2010,7 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) ctx->swap_command_buffer = nullptr; } -void VKGSRender::do_local_task(bool idle) +void VKGSRender::do_local_task(bool /*idle*/) { if (m_flush_requests.pending()) { @@ -2857,10 +2857,10 @@ void VKGSRender::flip(int buffer) if (m_current_frame == &m_aux_frame_context) { m_current_frame = &frame_context_storage[m_current_queue_index]; - if (m_current_frame->swap_command_buffer && m_current_frame->swap_command_buffer->pending) + if (m_current_frame->swap_command_buffer) { - //No choice but to wait for the last frame on the dst swapchain image to complete - m_current_frame->swap_command_buffer->wait(); + //Always present if pending swap is present. + //Its possible this flip request is triggered by overlays and the flip queue is in undefined state process_swap_request(m_current_frame, true); } @@ -2967,7 +2967,7 @@ void VKGSRender::flip(int buffer) //Blit contents to screen.. vk::image* image_to_flip = nullptr; - if (buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch) + if ((u32)buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch) { rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); u32 absolute_address = buffer_region.address + buffer_region.base; diff --git a/rpcs3/Emu/RSX/overlay_controls.h b/rpcs3/Emu/RSX/overlay_controls.h index d543a02fa5..2046c0ae81 100644 --- a/rpcs3/Emu/RSX/overlay_controls.h +++ b/rpcs3/Emu/RSX/overlay_controls.h @@ -854,7 +854,15 @@ namespace rsx last_word = text_width; } - renderer->get_char(c, text_width, unused); + if ((u32)c > renderer->char_count) + { + //Non-existent glyph + text_width += renderer->em_size; + } + else + { + renderer->get_char(c, text_width, unused); + } if (!ignore_word_wrap && wrap_text && text_width >= w) { From cc0d7c5985803316e92fb1bc5a1dd3fae4d0eb35 Mon Sep 17 00:00:00 2001 From: Inviuz Date: Wed, 3 Jan 2018 12:48:50 +0100 Subject: [PATCH 17/22] improved hack/fix for cellsurmixer --- rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp index db3c170ab9..75ab34d083 100644 --- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp @@ -356,7 +356,8 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id) while (!idm::select([](u32, lv2_event_queue& eq) { - return eq.name == "_mxr000\0"_u64; + //some games do not set event queue name, though key seems constant for them + return (eq.name == "_mxr000\0"_u64) || (eq.key == 0x8000cafe02460300); })) { thread_ctrl::wait_for(50000); From 2f414f96bf9a48c47878e5aff0b9141a11d76b24 Mon Sep 17 00:00:00 2001 From: Jake Date: Tue, 16 Jan 2018 21:51:58 -0600 Subject: [PATCH 18/22] rsx: fix potential hang during thread close --- rpcs3/Emu/RSX/GSRender.cpp | 2 ++ rpcs3/Emu/RSX/RSXThread.cpp | 7 ++++--- rpcs3/Emu/RSX/RSXThread.h | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/GSRender.cpp b/rpcs3/Emu/RSX/GSRender.cpp index 0969294a72..888b704e78 100644 --- a/rpcs3/Emu/RSX/GSRender.cpp +++ b/rpcs3/Emu/RSX/GSRender.cpp @@ -41,6 +41,8 @@ void GSRender::on_exit() { if (m_frame) { + m_frame->disable_wm_event_queue(); + m_frame->clear_wm_events(); m_frame->delete_context(m_context); m_context = nullptr; } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index eced03add3..e4b1c48286 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -368,7 +368,7 @@ namespace rsx vblank_count = 0; // TODO: exit condition - while (!Emu.IsStopped()) + while (!Emu.IsStopped() && !m_rsx_thread_exiting) { if (get_system_time() - start_time > vblank_count * 1000000 / 60) { @@ -389,7 +389,7 @@ namespace rsx continue; } - while (Emu.IsPaused()) + while (Emu.IsPaused() && !m_rsx_thread_exiting) std::this_thread::sleep_for(10ms); std::this_thread::sleep_for(1ms); // hack @@ -772,6 +772,7 @@ namespace rsx void thread::on_exit() { + m_rsx_thread_exiting = true; if (m_vblank_thread) { m_vblank_thread->join(); @@ -1618,7 +1619,7 @@ namespace rsx memset(display_buffers, 0, sizeof(display_buffers)); - m_used_gcm_commands.clear(); + m_rsx_thread_exiting = false; on_init_rsx(); start_thread(fxm::get()); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 164bc66400..44901e5f90 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -209,6 +209,7 @@ namespace rsx std::shared_ptr m_vblank_thread; protected: + atomic_t m_rsx_thread_exiting{false}; std::stack m_call_stack; std::array vertex_push_buffers; std::vector element_push_buffer; @@ -316,7 +317,6 @@ namespace rsx u64 vblank_count; public: - std::set m_used_gcm_commands; bool invalid_command_interrupt_raised = false; bool sync_point_request = false; bool in_begin_end = false; From efe6a6cf53540309f059907aae7f765d3c0a1eda Mon Sep 17 00:00:00 2001 From: Jake Date: Tue, 23 Jan 2018 22:16:46 -0600 Subject: [PATCH 19/22] rsx: fixed emulated primitive restart --- rpcs3/Emu/RSX/rsx_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index a47b6122ac..7faf46b265 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -299,7 +299,7 @@ namespace rsx for (int i = 0; i < index_count; ++i) { - if (indices[i] == UINT16_MAX) + if (indices[i] == restart_index) { if (last_start >= 0) { From a9c26b40dd6e41cd83e4309cfc419dbaae012ef3 Mon Sep 17 00:00:00 2001 From: Zangetsu38 Date: Thu, 18 Jan 2018 05:40:37 +0100 Subject: [PATCH 20/22] hle: Stub function Unimplemented in NpSns. --- rpcs3/Emu/Cell/Modules/sceNpSns.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/sceNpSns.cpp b/rpcs3/Emu/Cell/Modules/sceNpSns.cpp index cfd8f0e39c..f97c65ee36 100644 --- a/rpcs3/Emu/Cell/Modules/sceNpSns.cpp +++ b/rpcs3/Emu/Cell/Modules/sceNpSns.cpp @@ -137,28 +137,32 @@ error_code sceNpSnsFbGetAccessToken(u32 handle, vm::cptr Date: Tue, 23 Jan 2018 12:08:43 +0100 Subject: [PATCH 21/22] vk/ogl: Fix regression for G8B8 --- rpcs3/Emu/RSX/GL/GLTexture.cpp | 2 +- rpcs3/Emu/RSX/VK/VKFormats.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index e75f193175..41d6d2c6f3 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -281,7 +281,7 @@ namespace gl return{ GL_RED, GL_RED, GL_RED, GL_RED }; case CELL_GCM_TEXTURE_G8B8: - return{ GL_RED, GL_GREEN, GL_RED, GL_GREEN }; + return{ GL_GREEN, GL_RED, GL_GREEN, GL_RED }; case CELL_GCM_TEXTURE_Y16_X16: return{ GL_RED, GL_GREEN, GL_RED, GL_GREEN }; diff --git a/rpcs3/Emu/RSX/VK/VKFormats.cpp b/rpcs3/Emu/RSX/VK/VKFormats.cpp index faf18545c4..11b4aa47e3 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.cpp +++ b/rpcs3/Emu/RSX/VK/VKFormats.cpp @@ -138,7 +138,7 @@ std::array get_component_mapping(u32 format) mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; break; case CELL_GCM_TEXTURE_G8B8: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; + mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; case CELL_GCM_TEXTURE_B8: mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; From b9c10a186da43f0de2c54af6419a1cd94e1fc9e9 Mon Sep 17 00:00:00 2001 From: Megamouse Date: Wed, 24 Jan 2018 17:38:38 +0100 Subject: [PATCH 22/22] Qt/Core: implement max llvm compile threads --- rpcs3/Emu/Cell/PPUThread.cpp | 4 +++- rpcs3/Emu/System.h | 1 + rpcs3/Json/tooltips.json | 1 + rpcs3/rpcs3qt/emu_settings.cpp | 6 ++++-- rpcs3/rpcs3qt/emu_settings.h | 4 +++- rpcs3/rpcs3qt/settings_dialog.cpp | 6 +++++- rpcs3/rpcs3qt/settings_dialog.ui | 31 +++++++++++++++++-------------- 7 files changed, 34 insertions(+), 19 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 1fe24516b6..fabfc5839d 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1161,7 +1161,9 @@ extern void ppu_initialize(const ppu_module& info) static semaphore<> jmutex; // Initialize semaphore with the max number of threads - semaphore jcores(std::thread::hardware_concurrency()); + u32 max_threads = static_cast(g_cfg.core.llvm_threads); + s32 thread_count = max_threads > 0 ? std::min(max_threads, std::thread::hardware_concurrency()) : std::thread::hardware_concurrency(); + semaphore jcores(thread_count); if (!jcores.get()) { diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 3044d98e61..f4dfeafba8 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -285,6 +285,7 @@ struct cfg_root : cfg::node cfg::_bool ppu_debug{this, "PPU Debug"}; cfg::_bool llvm_logs{this, "Save LLVM logs"}; cfg::string llvm_cpu{this, "Use LLVM CPU"}; + cfg::_int<0, INT32_MAX> llvm_threads{this, "Max LLVM Compile Threads", 0}; #ifdef _WIN32 cfg::_bool thread_scheduler_enabled{ this, "Enable thread scheduler", true }; diff --git a/rpcs3/Json/tooltips.json b/rpcs3/Json/tooltips.json index cca09a5f36..700c3bfb7c 100644 --- a/rpcs3/Json/tooltips.json +++ b/rpcs3/Json/tooltips.json @@ -64,6 +64,7 @@ "gs_resizeOnBoot": "Automatically resizes the game window on boot.\nThis does not change the internal game resolution.", "showTrophyPopups": "Show trophy popups when a trophy is unlocked.", "gs_disableMouse": "Disables the activation of fullscreen mode per doubleclick while the game screen is active.\nCheck this if you want to play with mouse and keyboard (for example with UCR).", + "maxLLVMThreads": "Limits the maximum number of threads used for PPU Module compilation.\nLower this in order to increase performance of other open applications.\nThe default uses all available threads.", "useNativeInterface": "Enables use of native HUD within the game window that can interacts with the game controllers.\nWhen disabled, regular Qt dialogs are used instead.\nNot all languages are currently supported (English only)" } }, diff --git a/rpcs3/rpcs3qt/emu_settings.cpp b/rpcs3/rpcs3qt/emu_settings.cpp index be59923fa6..1d5aa3bba7 100644 --- a/rpcs3/rpcs3qt/emu_settings.cpp +++ b/rpcs3/rpcs3qt/emu_settings.cpp @@ -222,13 +222,15 @@ void emu_settings::SaveSettings() m_config.write(out.c_str(), out.size()); } -void emu_settings::EnhanceComboBox(QComboBox* combobox, SettingsType type, bool is_ranged) +void emu_settings::EnhanceComboBox(QComboBox* combobox, SettingsType type, bool is_ranged, bool use_max, int max) { if (is_ranged) { QStringList range = GetSettingOptions(type); - for (int i = range.first().toInt(); i <= range.last().toInt(); i++) + int max_item = use_max ? max : range.last().toInt(); + + for (int i = range.first().toInt(); i <= max_item; i++) { combobox->addItem(QString::number(i), QVariant(QString::number(i))); } diff --git a/rpcs3/rpcs3qt/emu_settings.h b/rpcs3/rpcs3qt/emu_settings.h index 696dda4857..dad3ec4492 100644 --- a/rpcs3/rpcs3qt/emu_settings.h +++ b/rpcs3/rpcs3qt/emu_settings.h @@ -36,6 +36,7 @@ public: PreferredSPUThreads, PPUDebug, SPUDebug, + MaxLLVMThreads, // Graphics Renderer, @@ -143,7 +144,7 @@ public: ~emu_settings(); /** Connects a combo box with the target settings type*/ - void EnhanceComboBox(QComboBox* combobox, SettingsType type, bool is_ranged = false); + void EnhanceComboBox(QComboBox* combobox, SettingsType type, bool is_ranged = false, bool use_max = false, int max = 0); /** Connects a check box with the target settings type*/ void EnhanceCheckBox(QCheckBox* checkbox, SettingsType type); @@ -193,6 +194,7 @@ private: { PreferredSPUThreads, { "Core", "Preferred SPU Threads"}}, { PPUDebug, { "Core", "PPU Debug"}}, { SPUDebug, { "Core", "SPU Debug"}}, + { MaxLLVMThreads, { "Core", "Max LLVM Compile Threads"}}, // Graphics Tab { Renderer, { "Video", "Renderer"}}, diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index 5ad5b7c072..62d40baee1 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -22,6 +22,7 @@ #include "Crypto/unself.h" #include +#include inline std::string sstr(const QString& _in) { return _in.toStdString(); } inline std::string sstr(const QVariant& _in) { return sstr(_in.toString()); } @@ -692,6 +693,10 @@ settings_dialog::settings_dialog(std::shared_ptr guiSettings, std: // Comboboxes + xemu_settings->EnhanceComboBox(ui->maxLLVMThreads, emu_settings::MaxLLVMThreads, true, true, std::thread::hardware_concurrency()); + SubscribeTooltip(ui->maxLLVMThreads, json_emu_misc["maxLLVMThreads"].toString()); + ui->maxLLVMThreads->setItemText(ui->maxLLVMThreads->findData("0"), tr("All (%1)").arg(std::thread::hardware_concurrency())); + SubscribeTooltip(ui->combo_configs, json_emu_gui["configs"].toString()); SubscribeTooltip(ui->combo_stylesheets, json_emu_gui["stylesheets"].toString()); @@ -727,7 +732,6 @@ settings_dialog::settings_dialog(std::shared_ptr guiSettings, std: if (game) { ui->gb_stylesheets->setEnabled(false); - ui->gb_configs->setEnabled(false); ui->gb_settings->setEnabled(false); ui->gb_colors->setEnabled(false); ui->gb_viewport->setEnabled(false); diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui index 63bdb78730..0a6934abd5 100644 --- a/rpcs3/rpcs3qt/settings_dialog.ui +++ b/rpcs3/rpcs3qt/settings_dialog.ui @@ -6,8 +6,8 @@ 0 0 - 787 - 566 + 1011 + 775 @@ -1377,20 +1377,13 @@ - + - UI Configurations + Max LLVM Compile Threads - + - - - - - - Apply - - + @@ -1439,12 +1432,22 @@ - 0 + 20 0 + + + + + + + Apply + + +