rsx/vk: Implement shaders cache and fix broken pipeline_storage comparison and hash

- Fix pipeline state storage to uniquely store each pipeline variant
- Adds a progress bar to indicate loading is taking place
This commit is contained in:
kd-11 2017-08-09 14:30:15 +03:00
parent 1da732bbf5
commit c7dca1dbef
6 changed files with 319 additions and 15 deletions

View file

@ -110,6 +110,7 @@ class program_state_cache
protected:
size_t m_next_id = 0;
bool m_cache_miss_flag;
binary_to_vertex_program m_vertex_shader_cache;
binary_to_fragment_program m_fragment_shader_cache;
std::unordered_map <pipeline_key, pipeline_storage_type, pipeline_key_hash, pipeline_key_compare> m_storage;
@ -197,7 +198,10 @@ public:
{
const auto I = m_storage.find(key);
if (I != m_storage.end())
{
m_cache_miss_flag = false;
return I->second;
}
}
LOG_NOTICE(RSX, "Add program :");
@ -205,6 +209,7 @@ public:
LOG_NOTICE(RSX, "*** fp id = %d", fragment_program.id);
m_storage[key] = backend_traits::build_pipeline(vertex_program, fragment_program, pipelineProperties, std::forward<Args>(args)...);
m_cache_miss_flag = true;
return m_storage[key];
}

View file

@ -259,13 +259,7 @@ struct RSXFragmentProgram
}
RSXFragmentProgram()
: size(0)
, addr(0)
, offset(0)
, ctrl(0)
, unnormalized_coords(0)
, texture_dimensions(0)
, valid(false)
{
memset(this, 0, sizeof(RSXFragmentProgram));
}
};

View file

@ -614,11 +614,15 @@ VKGSRender::VKGSRender() : GSRender()
m_text_writer->init(*m_device, m_memory_type_mapping, m_render_passes[idx]);
}
m_prog_buffer.reset(new VKProgramBuffer(m_render_passes.data()));
if (g_cfg.video.disable_vertex_cache)
m_vertex_cache.reset(new vk::null_vertex_cache());
else
m_vertex_cache.reset(new vk::weak_vertex_cache());
m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "v1"));
open_command_buffer();
for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
@ -655,7 +659,7 @@ VKGSRender::~VKGSRender()
//Shaders
vk::finalize_compiler_context();
m_prog_buffer.clear();
m_prog_buffer->clear();
//Global resources
vk::destroy_global_resources();
@ -1213,6 +1217,8 @@ void VKGSRender::on_init_thread()
GSRender::on_init_thread();
rsx_thread = std::this_thread::get_id();
m_shaders_cache->load(*m_device, pipeline_layout);
}
void VKGSRender::on_exit()
@ -1732,6 +1738,7 @@ bool VKGSRender::check_program_status()
(u8)m_draw_buffers_count);
properties.render_pass = m_render_passes[idx];
properties.render_pass_location = (int)idx;
properties.num_targets = m_draw_buffers_count;
@ -1739,7 +1746,10 @@ bool VKGSRender::check_program_status()
//Load current program from buffer
vertex_program.skip_vertex_input_check = true;
m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get();
m_program = m_prog_buffer->getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get();
if (m_prog_buffer->check_cache_missed())
m_shaders_cache->store(properties, vertex_program, fragment_program);
vk::leave_uninterruptible();
@ -1781,7 +1791,7 @@ void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
auto &vertex_program = current_vertex_program;
auto &fragment_program = current_fragment_program;
const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program);
const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float));
const size_t required_mem = 512 + 8192 + fragment_buffer_sz;
@ -1807,7 +1817,7 @@ void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
//Fragment constants
buf = buf + 8192;
if (fragment_constants_sz)
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) }, fragment_program);
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) }, fragment_program);
fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program);

View file

@ -21,6 +21,8 @@ namespace vk
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<VkFormat>, VkFormat>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>;
using null_vertex_cache = vertex_cache;
using shader_cache = rsx::shaders_cache<vk::pipeline_props, VKProgramBuffer>;
}
//Heap allocation sizes in MB
@ -123,9 +125,10 @@ private:
public:
//vk::fbo draw_fbo;
std::unique_ptr<vk::vertex_cache> m_vertex_cache;
std::unique_ptr<vk::shader_cache> m_shaders_cache;
private:
VKProgramBuffer m_prog_buffer;
std::unique_ptr<VKProgramBuffer> m_prog_buffer;
vk::render_device *m_device;
vk::swap_chain* m_swap_chain;

View file

@ -16,6 +16,7 @@ namespace vk
VkRenderPass render_pass;
int num_targets;
int render_pass_location;
bool operator==(const pipeline_props& other) const
{
@ -28,7 +29,13 @@ namespace vk
if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo)))
return false;
if (memcmp(&cs, &other.cs, sizeof(VkPipelineColorBlendStateCreateInfo)))
//Cannot memcmp cs due to pAttachments being a pointer to memory
if (cs.attachmentCount != other.cs.attachmentCount ||
cs.flags != other.cs.flags ||
cs.logicOp != other.cs.logicOp ||
cs.logicOpEnable != other.cs.logicOpEnable ||
cs.sType != other.cs.sType ||
memcmp(cs.blendConstants, other.cs.blendConstants, 4 * sizeof(f32)))
return false;
if (memcmp(&ia, &other.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo)))
@ -37,6 +44,9 @@ namespace vk
if (memcmp(&ds, &other.ds, sizeof(VkPipelineDepthStencilStateCreateInfo)))
return false;
if (num_targets != other.num_targets)
return false;
return num_targets == other.num_targets;
}
};
@ -64,7 +74,12 @@ namespace std
seed ^= hash_struct(pipelineProperties.ia);
seed ^= hash_struct(pipelineProperties.ds);
seed ^= hash_struct(pipelineProperties.rs);
seed ^= hash_struct(pipelineProperties.cs);
//Do not compare pointers to memory!
auto tmp = pipelineProperties.cs;
tmp.pAttachments = nullptr;
seed ^= hash_struct(tmp);
seed ^= hash_struct(pipelineProperties.att_state[0]);
return hash<size_t>()(seed);
}
@ -93,7 +108,8 @@ struct VKTraits
}
static
pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout)
pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData,
const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout)
{
VkPipelineShaderStageCreateInfo shader_stages[2] = {};
@ -159,11 +175,47 @@ struct VKTraits
class VKProgramBuffer : public program_state_cache<VKTraits>
{
const VkRenderPass *m_render_pass_data;
public:
VKProgramBuffer(VkRenderPass *renderpass_list)
: m_render_pass_data(renderpass_list)
{}
void clear()
{
program_state_cache<VKTraits>::clear();
m_vertex_shader_cache.clear();
m_fragment_shader_cache.clear();
}
u64 get_hash(vk::pipeline_props &props)
{
return std::hash<vk::pipeline_props>()(props);
}
u64 get_hash(RSXVertexProgram &prog)
{
return program_hash_util::vertex_program_hash()(prog);
}
u64 get_hash(RSXFragmentProgram &prog)
{
return program_hash_util::fragment_program_hash()(prog);
}
template <typename... Args>
void add_pipeline_entry(RSXVertexProgram &vp, RSXFragmentProgram &fp, vk::pipeline_props &props, Args&& ...args)
{
//Extract pointers from pipeline props
props.render_pass = m_render_pass_data[props.render_pass_location];
props.cs.pAttachments = props.att_state;
vp.skip_vertex_input_check = true;
getGraphicPipelineState(vp, fp, props, std::forward<Args>(args)...);
}
bool check_cache_missed() const
{
return m_cache_miss_flag;
}
};

View file

@ -2,6 +2,9 @@
#include "Utilities/VirtualMemory.h"
#include "Emu/Memory/vm.h"
#include "gcm_enums.h"
#include "Common/ProgramStateCache.h"
#include "Emu/Cell/Modules/cellMsgDialog.h"
#include "Emu/System.h"
namespace rsx
{
@ -202,6 +205,243 @@ namespace rsx
}
};
template <typename pipeline_storage_type, typename backend_storage>
class shaders_cache
{
struct pipeline_data
{
u64 vertex_program_hash;
u64 fragment_program_hash;
u64 pipeline_storage_hash;
u32 fp_ctrl;
u32 fp_texture_dimensions;
u16 fp_unnormalized_coords;
u16 fp_height;
u16 fp_pixel_layout;
u16 fp_lighting_flags;
u16 fp_shadow_textures;
u16 fp_redirected_textures;
pipeline_storage_type pipeline_properties;
};
std::string version_prefix;
std::string root_path;
std::unordered_map<u64, std::vector<u8>> fragment_program_data;
backend_storage& m_storage;
public:
shaders_cache(backend_storage& storage, std::string version_prefix_str = "v1")
: version_prefix(version_prefix_str)
, m_storage(storage)
{
root_path = Emu.GetCachePath() + "/shaders_cache";
}
template <typename... Args>
void load(Args&& ...args)
{
std::string directory_path = root_path + "/pipelines";
if (!fs::is_dir(directory_path))
{
fs::create_path(directory_path);
fs::create_path(root_path + "/raw");
return;
}
fs::dir root = fs::dir(directory_path);
fs::dir_entry tmp;
u32 entry_count = 0;
for (auto It = root.begin(); It != root.end(); ++It, entry_count++);
if (entry_count <= 2)
return;
entry_count -= 2;
f32 delta = 100.f / entry_count;
f32 tally = 0.f;
root.rewind();
// Progress dialog
auto dlg = Emu.GetCallbacks().get_msg_dialog();
dlg->type.se_normal = true;
dlg->type.bg_invisible = true;
dlg->type.progress_bar_count = 1;
dlg->on_close = [](s32 status)
{
Emu.CallAfter([]()
{
Emu.Stop();
});
};
Emu.CallAfter([=]()
{
dlg->Create("Preloading cached shaders from disk.\nPlease wait...");
});
u32 processed = 0;
while (root.read(tmp))
{
if (tmp.name == "." || tmp.name == "..")
continue;
std::vector<u8> bytes;
fs::file f(directory_path + "/" + tmp.name);
processed++;
Emu.CallAfter([=]()
{
dlg->ProgressBarSetMsg(0, fmt::format("Loading pipeline object %u of %u", processed, entry_count));
});
if (f.size() != sizeof(pipeline_data))
{
LOG_ERROR(RSX, "Cached pipeline object %s is not binary compatible with the current shader cache", tmp.name.c_str());
continue;
}
f.read<u8>(bytes, f.size());
auto unpacked = unpack(*(pipeline_data*)bytes.data());
m_storage.add_pipeline_entry(std::get<1>(unpacked), std::get<2>(unpacked), std::get<0>(unpacked), std::forward<Args>(args)...);
tally += delta;
if (tally > 1.f)
{
u32 value = (u32)tally;
Emu.CallAfter([=]()
{
dlg->ProgressBarInc(0, value);
});
tally -= (f32)value;
}
}
}
void store(pipeline_storage_type &pipeline, RSXVertexProgram &vp, RSXFragmentProgram &fp)
{
pipeline_data data = pack(pipeline, vp, fp);
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
if (!fs::is_file(fp_name))
{
const auto size = program_hash_util::fragment_program_utils::get_fragment_program_ucode_size(fp.addr);
fs::file(fp_name, fs::rewrite).write(fp.addr, size);
}
if (!fs::is_file(vp_name))
{
std::vector<u32> output;
output.resize(vp.data.size() + 1);
output[0] = vp.output_mask;
std::copy(vp.data.begin(), vp.data.end(), output.begin() + 1);
fs::file(vp_name, fs::rewrite).write<u32>(output);
}
u64 state_hash = 0;
state_hash ^= std::hash<u32>()(data.fp_ctrl);
state_hash ^= std::hash<u32>()(data.fp_texture_dimensions);
state_hash ^= std::hash<u16>()(data.fp_unnormalized_coords);
state_hash ^= std::hash<u16>()(data.fp_height);
state_hash ^= std::hash<u16>()(data.fp_pixel_layout);
state_hash ^= std::hash<u16>()(data.fp_lighting_flags);
state_hash ^= std::hash<u16>()(data.fp_shadow_textures);
state_hash ^= std::hash<u16>()(data.fp_redirected_textures);
std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
std::string pipeline_path = root_path + "/pipelines/" + version_prefix + "-" + pipeline_file_name;
fs::file(pipeline_path, fs::rewrite).write(&data, sizeof(pipeline_data));
}
RSXVertexProgram load_vp_raw(u64 program_hash)
{
std::vector<u32> data;
std::string filename = fmt::format("%llX.vp", program_hash);
fs::file f(root_path + "/raw/" + filename);
f.read<u32>(data, f.size() / sizeof(u32));
RSXVertexProgram vp = {};
vp.data.resize(data.size() - 1);
vp.output_mask = data[0];
vp.skip_vertex_input_check = true;
std::copy(data.begin() + 1, data.end(), vp.data.begin());
return vp;
}
RSXFragmentProgram load_fp_raw(u64 program_hash)
{
std::vector<u8> data;
std::string filename = fmt::format("%llX.fp", program_hash);
fs::file f(root_path + "/raw/" + filename);
f.read<u8>(data, f.size());
RSXFragmentProgram fp = {};
fragment_program_data[program_hash] = data;
fp.addr = fragment_program_data[program_hash].data();
return fp;
}
std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram> unpack(pipeline_data &data)
{
RSXVertexProgram vp = load_vp_raw(data.vertex_program_hash);
RSXFragmentProgram fp = load_fp_raw(data.fragment_program_hash);
pipeline_storage_type pipeline = data.pipeline_properties;
fp.ctrl = data.fp_ctrl;
fp.texture_dimensions = data.fp_texture_dimensions;
fp.unnormalized_coords = data.fp_unnormalized_coords;
fp.height = data.fp_height;
fp.pixel_center_mode = (rsx::window_pixel_center)(data.fp_pixel_layout & 0x3);
fp.origin_mode = (rsx::window_origin)((data.fp_pixel_layout >> 2) & 0x1);
fp.alpha_func = (rsx::comparison_function)((data.fp_pixel_layout >> 3) & 0xF);
fp.front_back_color_enabled = (data.fp_lighting_flags & 0x1) != 0;
fp.back_color_diffuse_output = ((data.fp_lighting_flags >> 1) & 0x1) != 0;
fp.back_color_specular_output = ((data.fp_lighting_flags >> 2) & 0x1) != 0;
fp.front_color_diffuse_output = ((data.fp_lighting_flags >> 3) & 0x1) != 0;
fp.front_color_specular_output = ((data.fp_lighting_flags >> 4) & 0x1) != 0;
fp.shadow_textures = data.fp_shadow_textures;
fp.redirected_textures = data.fp_redirected_textures;
return std::make_tuple(pipeline, vp, fp);
}
pipeline_data pack(pipeline_storage_type &pipeline, RSXVertexProgram &vp, RSXFragmentProgram &fp)
{
pipeline_data data_block;
data_block.pipeline_properties = pipeline;
data_block.vertex_program_hash = m_storage.get_hash(vp);
data_block.fragment_program_hash = m_storage.get_hash(fp);
data_block.pipeline_storage_hash = m_storage.get_hash(pipeline);
data_block.fp_ctrl = fp.ctrl;
data_block.fp_texture_dimensions = fp.texture_dimensions;
data_block.fp_unnormalized_coords = fp.unnormalized_coords;
data_block.fp_height = fp.height;
data_block.fp_pixel_layout = (u16)fp.pixel_center_mode | (u16)fp.origin_mode << 2 | (u16)fp.alpha_func << 3;
data_block.fp_lighting_flags = (u16)fp.front_back_color_enabled | (u16)fp.back_color_diffuse_output << 1 |
(u16)fp.back_color_specular_output << 2 | (u16)fp.front_color_diffuse_output << 3 | (u16)fp.front_color_specular_output << 4;
data_block.fp_shadow_textures = fp.shadow_textures;
data_block.fp_redirected_textures = fp.redirected_textures;
return data_block;
}
};
namespace vertex_cache
{
// A null vertex cache