gl/vk/rsx: Add a cross-platform overlay text; Minor perf improvements and rsx bugfixes (#2196)

* gl/rsx: Implement platform-agnostic text overlays

gl: Restore performance metrics using new text out helper

gl/rsx: Refactor text generation class

* vk: Enable text overlay

gl/vk: Polish overlay counters implementation

gl: Better resource shutdown for text writer

* gl: Optimization, do not rebind TIUs every frame. Speedup

* gl: Optimizations and improvements to vertex upload code

* gl/vk: Texture format swizzles

vk: Texture format fix

vk: Fix YX format swizzles

* rsx: Decode vertex texture index
This commit is contained in:
kd-11 2016-10-11 03:55:42 +03:00 committed by raven02
parent 7a9841bb2a
commit 8454949eea
20 changed files with 1016 additions and 79 deletions

View file

@ -0,0 +1,253 @@
#pragma once
#include <vector>
#include <unordered_map>
#include <Utilities/types.h>
/**
* FONT GLYPHS GO HERE
* Any font in hex format should work here.
* For this case, We're using ANSI 128 characters, but more types can be supported
*
* This example is the GNU unifont glyph set
*/
const static std::string GNU_UNIFONT_GLYPHS[128] =
{
"0000 : AAAA00018000000180004A51EA505A51C99E0001800000018000000180005555",
"0001 : AAAA00018000000180003993C252325F8A527193800000018000000180005555",
"0002 : AAAA00018000000180003BA5C124311989247125800000018000000180005555",
"0003 : AAAA00018000000180007BA5C1247919C1247925800000018000000180005555",
"0004 : AAAA000180000001800079BFC2487A49C2487989800000018000000180005555",
"0005 : AAAA00018000000180007A4DC2527B53C2D67A4F800000018000000180005555",
"0006 : AAAA000180000001800031A5CA287A31CA2849A5800000018000000180005555",
"0007 : AAAA000180000001800073D1CA1073D1CA1073DF800000018000000180005555",
"0008 : AAAA00018000000180001E3991401E3191081E71800000018000000180005555",
"0009 : AAAA000180000001800022F9A2203E21A2202221800000018000000180005555",
"000A : AAAA000180000001800020F9A08020F9A0803E81800000018000000180005555",
"000B : AAAA000180000001800022F9A220222194200821800000018000000180005555",
"000C : AAAA00018000000180003EF9A0803EF9A0802081800000018000000180005555",
"000D : AAAA00018000000180001EF1A08820F1A0901E89800000018000000180005555",
"000E : AAAA00018000000180001E71A0881C8982883C71800000018000000180005555",
"000F : AAAA00018000000180001EF9A0201C2182203CF9800000018000000180005555",
"0010 : AAAA0001800000018000391DA510251DA51039DD800000018000000180005555",
"0011 : AAAA00018000000180007189CA184A09CA08719D800000018000000180005555",
"0012 : AAAA00018000000180007199CA044A09CA10719D800000018000000180005555",
"0013 : AAAA00018000000180007199CA044A19CA047199800000018000000180005555",
"0014 : AAAA00018000000180007185CA0C4A15CA1C7185800000018000000180005555",
"0015 : AAAA00018000000180004993EA546A59DBD44A53800000018000000180005555",
"0016 : AAAA00018000000180003453C29A311789127113800000018000000180005555",
"0017 : AAAA00018000000180007BB9C1247939C1247939800000018000000180005555",
"0018 : AAAA00018000000180003325C4B447ADC4A434A5800000018000000180005555",
"0019 : AAAA00018000000180003E89A0D83EA9A0883E89800000018000000180005555",
"001A : AAAA00018000000180003A5DC252325D8A52719D800000018000000180005555",
"001B : AAAA000180000001800079CFC2107991C0507B8F800000018000000180005555",
"001C : AAAA00018000000180001E7190801E61901010E1800000018000000180005555",
"001D : AAAA00018000000180000E719080166192100EE1800000018000000180005555",
"001E : AAAA00018000000180001C7192801C61941012E1800000018000000180005555",
"001F : AAAA000180000001800012719280126192100CE1800000018000000180005555",
"0020 : 00000000000000000000000000000000",
"0021 : 00000000080808080808080008080000",
"0022 : 00002222222200000000000000000000",
"0023 : 000000001212127E24247E4848480000",
"0024 : 00000000083E4948380E09493E080000",
"0025 : 00000000314A4A340808162929460000",
"0026 : 000000001C2222141829454246390000",
"0027 : 00000808080800000000000000000000",
"0028 : 00000004080810101010101008080400",
"0029 : 00000020101008080808080810102000",
"002A : 00000000000008492A1C2A4908000000",
"002B : 0000000000000808087F080808000000",
"002C : 00000000000000000000000018080810",
"002D : 0000000000000000003C000000000000",
"002E : 00000000000000000000000018180000",
"002F : 00000000020204080810102040400000",
"0030 : 00000000182442464A52624224180000",
"0031 : 000000000818280808080808083E0000",
"0032 : 000000003C4242020C102040407E0000",
"0033 : 000000003C4242021C020242423C0000",
"0034 : 00000000040C142444447E0404040000",
"0035 : 000000007E4040407C020202423C0000",
"0036 : 000000001C2040407C424242423C0000",
"0037 : 000000007E0202040404080808080000",
"0038 : 000000003C4242423C424242423C0000",
"0039 : 000000003C4242423E02020204380000",
"003A : 00000000000018180000001818000000",
"003B : 00000000000018180000001808081000",
"003C : 00000000000204081020100804020000",
"003D : 000000000000007E0000007E00000000",
"003E : 00000000004020100804081020400000",
"003F : 000000003C4242020408080008080000",
"0040 : 000000001C224A565252524E201E0000",
"0041 : 0000000018242442427E424242420000",
"0042 : 000000007C4242427C424242427C0000",
"0043 : 000000003C42424040404042423C0000",
"0044 : 00000000784442424242424244780000",
"0045 : 000000007E4040407C404040407E0000",
"0046 : 000000007E4040407C40404040400000",
"0047 : 000000003C424240404E4242463A0000",
"0048 : 00000000424242427E42424242420000",
"0049 : 000000003E08080808080808083E0000",
"004A : 000000001F0404040404044444380000",
"004B : 00000000424448506060504844420000",
"004C : 000000004040404040404040407E0000",
"004D : 00000000424266665A5A424242420000",
"004E : 0000000042626252524A4A4646420000",
"004F : 000000003C42424242424242423C0000",
"0050 : 000000007C4242427C40404040400000",
"0051 : 000000003C4242424242425A663C0300",
"0052 : 000000007C4242427C48444442420000",
"0053 : 000000003C424240300C0242423C0000",
"0054 : 000000007F0808080808080808080000",
"0055 : 000000004242424242424242423C0000",
"0056 : 00000000414141222222141408080000",
"0057 : 00000000424242425A5A666642420000",
"0058 : 00000000424224241818242442420000",
"0059 : 00000000414122221408080808080000",
"005A : 000000007E02020408102040407E0000",
"005B : 0000000E080808080808080808080E00",
"005C : 00000000404020101008080402020000",
"005D : 00000070101010101010101010107000",
"005E : 00001824420000000000000000000000",
"005F : 00000000000000000000000000007F00",
"0060 : 00201008000000000000000000000000",
"0061 : 0000000000003C42023E4242463A0000",
"0062 : 0000004040405C6242424242625C0000",
"0063 : 0000000000003C4240404040423C0000",
"0064 : 0000000202023A4642424242463A0000",
"0065 : 0000000000003C42427E4040423C0000",
"0066 : 0000000C1010107C1010101010100000",
"0067 : 0000000000023A44444438203C42423C",
"0068 : 0000004040405C624242424242420000",
"0069 : 000000080800180808080808083E0000",
"006A : 0000000404000C040404040404044830",
"006B : 00000040404044485060504844420000",
"006C : 000000180808080808080808083E0000",
"006D : 00000000000076494949494949490000",
"006E : 0000000000005C624242424242420000",
"006F : 0000000000003C4242424242423C0000",
"0070 : 0000000000005C6242424242625C4040",
"0071 : 0000000000003A4642424242463A0202",
"0072 : 0000000000005C624240404040400000",
"0073 : 0000000000003C4240300C02423C0000",
"0074 : 000000001010107C10101010100C0000",
"0075 : 000000000000424242424242463A0000",
"0076 : 00000000000042424224242418180000",
"0077 : 00000000000041494949494949360000",
"0078 : 00000000000042422418182442420000",
"0079 : 0000000000004242424242261A02023C",
"007A : 0000000000007E0204081020407E0000",
"007B : 0000000C101008081010080810100C00",
"007C : 00000808080808080808080808080808",
"007D : 00000030080810100808101008083000",
"007E : 00000031494600000000000000000000",
"007F : AAAA000180000001800073D1CA104BD1CA1073DF800000018000000180005555"
};
class GlyphManager
{
private:
struct glyph
{
u8 character;
u32 glyph_point_offset;
u32 points_count;
std::vector<u8> plot;
};
std::unordered_map<u8, glyph> glyph_map;
void decode_glyph_map(const std::string glyphs[128])
{
for (int i = 0; i < 128; ++i)
{
std::string character = glyphs[i];
std::string index = character.substr(0, 4);
std::string glyph_data = character.substr(7);
glyph this_glyph;
this_glyph.character = strtol(index.c_str(), nullptr, 16);
this_glyph.plot.reserve(16);
if (glyph_data.length() == 32)
{
for (int n = 0; n < 16; ++n)
{
std::string line = glyph_data.substr(n * 2, 2);
u8 value = (u8)strtol(line.c_str(), nullptr, 16);
this_glyph.plot.push_back(value);
}
}
else
{
//TODO: Support 16-wide characters
for (int n = 0; n < 16; ++n)
this_glyph.plot.push_back(0);
}
glyph_map[this_glyph.character] = this_glyph;
}
}
public:
struct glyph_point
{
float x;
float y;
glyph_point(float _x, float _y) : x(_x), y(_y)
{}
};
GlyphManager()
{
glyph_map = {};
decode_glyph_map(GNU_UNIFONT_GLYPHS);
}
std::vector<glyph_point> generate_point_map()
{
std::vector<glyph_point> result;
for (auto &entry : glyph_map)
{
glyph& text = entry.second;
text.glyph_point_offset = (u32)result.size();
for (int j = 0; j < 16; ++j)
{
auto &line = text.plot[j];
if (line == 0)
continue;
for (int i = 0; i < 8; ++i)
{
if (line & (1 << i))
{
//Font is inverted, so we correct it for conventional renderers
float x = (float)(7 - i);
float y = (float)(15 - j);
result.push_back({ x, y });
}
}
}
text.points_count = (u32)result.size() - text.glyph_point_offset;
}
return result;
}
std::unordered_map<u8, std::pair<u32, u32>> get_glyph_offsets()
{
std::unordered_map<u8, std::pair<u32, u32>> result = {};
for (auto &entry : glyph_map)
{
result[entry.second.character] = std::make_pair(entry.second.glyph_point_offset, entry.second.points_count);
}
return result;
}
};

View file

@ -189,7 +189,7 @@ std::string VertexProgramDecompiler::GetFunc()
std::string VertexProgramDecompiler::GetTex()
{
return m_parr.AddParam(PF_PARAM_UNIFORM, "sampler2D", std::string("vtex") + std::to_string(/*?.tex_num*/0));
return m_parr.AddParam(PF_PARAM_UNIFORM, "sampler2D", std::string("vtex") + std::to_string(d2.tex_num));
}
std::string VertexProgramDecompiler::Format(const std::string& code)

View file

@ -374,7 +374,7 @@ namespace
void GLGSRender::end()
{
if (!draw_fbo || !load_program())
if (!draw_fbo)
{
rsx::thread::end();
return;
@ -397,24 +397,23 @@ void GLGSRender::end()
ds->set_cleared();
}
std::chrono::time_point<std::chrono::system_clock> textures_start = std::chrono::system_clock::now();
//Setup textures
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
int location;
if (!rsx::method_registers.fragment_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, 0);
continue;
}
if (m_program->uniforms.has_location("tex" + std::to_string(i), &location))
{
if (!rsx::method_registers.fragment_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, 0);
glProgramUniform1i(m_program->id(), location, i);
continue;
}
m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]));
__glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts);
glProgramUniform1i(m_program->id(), location, i);
}
}
@ -423,29 +422,30 @@ void GLGSRender::end()
{
int texture_index = i + rsx::limits::fragment_textures_count;
int location;
if (!rsx::method_registers.vertex_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + texture_index);
glBindTexture(GL_TEXTURE_2D, 0);
continue;
}
if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location))
{
if (!rsx::method_registers.vertex_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + texture_index);
glBindTexture(GL_TEXTURE_2D, 0);
glProgramUniform1i(m_program->id(), location, texture_index);
continue;
}
m_gl_vertex_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.vertex_textures[i]));
__glcheck m_gl_texture_cache.upload_texture(texture_index, rsx::method_registers.vertex_textures[i], m_gl_vertex_textures[i], m_rtts);
glProgramUniform1i(m_program->id(), location, texture_index);
}
}
std::chrono::time_point<std::chrono::system_clock> textures_end = std::chrono::system_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
u32 vertex_draw_count;
std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
std::tie(vertex_draw_count, indexed_draw_info) = set_vertex_buffer();
m_vao.bind();
std::chrono::time_point<std::chrono::system_clock> then = std::chrono::system_clock::now();
std::chrono::time_point<std::chrono::system_clock> draw_start = std::chrono::system_clock::now();
if (g_cfg_rsx_debug_output)
{
@ -467,8 +467,8 @@ void GLGSRender::end()
draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count);
}
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
std::chrono::time_point<std::chrono::system_clock> draw_end = std::chrono::system_clock::now();
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
write_buffers();
@ -519,6 +519,7 @@ void GLGSRender::on_init_thread()
m_vao.element_array_buffer = m_index_ring_buffer;
m_gl_texture_cache.initialize_rtt_cache();
m_text_printer.init();
}
void GLGSRender::on_exit()
@ -556,6 +557,8 @@ void GLGSRender::on_exit()
m_uniform_ring_buffer.remove();
m_index_ring_buffer.remove();
m_text_printer.close();
return GSRender::on_exit();
}
@ -833,27 +836,25 @@ void GLGSRender::flip(int buffer)
__glcheck flip_fbo->blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical());
m_frame->flip(m_context);
if (g_cfg_rsx_overlay)
{
//TODO: Display overlay in a cross-platform manner
//Core context throws wgl font functions out of the window as they use display lists
//Only show debug info if the user really requests it
if (g_cfg_rsx_debug_output)
{
std::string message =
"draw_calls: " + std::to_string(m_draw_calls) + ", " + "draw_call_setup: " + std::to_string(m_begin_time) + "us, " + "vertex_upload_time: " + std::to_string(m_vertex_upload_time) + "us, " + "draw_call_execution: " + std::to_string(m_draw_time) + "us";
LOG_ERROR(RSX, "%s", message);
}
gl::screen.bind();
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), "draw calls: " + std::to_string(m_draw_calls));
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), "draw call setup: " + std::to_string(m_begin_time) + "us");
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), "textures upload time: " + std::to_string(m_textures_upload_time) + "us");
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
}
m_frame->flip(m_context);
m_draw_calls = 0;
m_begin_time = 0;
m_draw_time = 0;
m_vertex_upload_time = 0;
m_textures_upload_time = 0;
for (auto &tex : m_rtts.invalidated_resources)
{

View file

@ -6,6 +6,7 @@
#include "GLRenderTargets.h"
#include <Utilities/optional.hpp>
#include "GLProgramBuffer.h"
#include "GLTextOut.h"
#pragma comment(lib, "opengl32.lib")
@ -33,10 +34,13 @@ private:
u32 m_begin_time = 0;
u32 m_draw_time = 0;
u32 m_vertex_upload_time = 0;
u32 m_textures_upload_time = 0;
GLint m_min_texbuffer_alignment = 256;
GLint m_uniform_buffer_offset_align = 256;
gl::text_writer m_text_printer;
public:
gl::fbo draw_fbo;

View file

@ -167,6 +167,8 @@ OPENGL_PROC(PFNGLMAPBUFFERRANGEPROC, MapBufferRange);
OPENGL_PROC(PFNGLBINDBUFFERRANGEPROC, BindBufferRange);
OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase);
OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays);
//Texture Buffers
OPENGL_PROC(PFNGLTEXBUFFERPROC, TexBuffer);
OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT);

View file

@ -38,6 +38,41 @@ struct GLTraits
.make();
__glcheck result.use();
//Progam locations are guaranteed to not change after linking
//Texture locations are simply bound to the TIUs so this can be done once
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
int location;
if (result.uniforms.has_location("tex" + std::to_string(i), &location))
result.uniforms[location] = i;
}
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
{
int location;
if (result.uniforms.has_location("vtex" + std::to_string(i), &location))
result.uniforms[location] = (i + rsx::limits::fragment_textures_count);
}
//We use texture buffers for vertex attributes. Bind these here as well
//as they are guaranteed to be fixed (1 to 1 mapping)
std::array<const char*, 16> s_reg_table =
{
"in_pos_buffer", "in_weight_buffer", "in_normal_buffer",
"in_diff_color_buffer", "in_spec_color_buffer",
"in_fog_buffer",
"in_point_size_buffer", "in_7_buffer",
"in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer",
"in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer"
};
for (int i = 0; i < rsx::limits::vertex_count; ++i)
{
int location;
if (result.uniforms.has_location(s_reg_table[i], &location))
result.uniforms[location] = (i + rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count);
}
LOG_NOTICE(RSX, "*** prog id = %d", result.id());
LOG_NOTICE(RSX, "*** vp id = %d", vertexProgramData.id);
LOG_NOTICE(RSX, "*** fp id = %d", fragmentProgramData.id);

View file

@ -0,0 +1,199 @@
#pragma once
#include "stdafx.h"
#include "GLHelpers.h"
#include "../Common/TextGlyphs.h"
namespace gl
{
class text_writer
{
private:
gl::glsl::program m_program;
gl::glsl::shader m_vs;
gl::glsl::shader m_fs;
gl::vao m_vao;
gl::buffer m_text_buffer;
gl::buffer m_scale_offsets_buffer;
std::unordered_map<u8, std::pair<u32, u32>> m_offsets;
bool initialized = false;
void init_program()
{
std::string vs =
{
"#version 420\n"
"#extension GL_ARB_shader_draw_parameters: enable\n"
"layout(location=0) in vec2 pos;\n"
"uniform vec2 offsets[255];\n"
"uniform vec2 scale;\n"
"\n"
"void main()\n"
"{\n"
" vec2 offset = offsets[gl_DrawIDARB];\n"
" gl_Position = vec4(pos, 0., 1.);\n"
" gl_Position.xy = gl_Position.xy * scale + offset;\n"
"}\n"
};
std::string fs =
{
"#version 420\n"
"layout(location=0) out vec4 col0;\n"
"uniform vec4 draw_color;\n"
"\n"
"void main()\n"
"{\n"
" col0 = draw_color;\n"
"}\n"
};
m_fs.create(gl::glsl::shader::type::fragment);
m_fs.source(fs);
m_fs.compile();
m_vs.create(gl::glsl::shader::type::vertex);
m_vs.source(vs);
m_vs.compile();
m_program.create();
m_program.attach(m_vs);
m_program.attach(m_fs);
m_program.make();
}
void load_program(float scale_x, float scale_y, float *offsets, int nb_offsets, color4f color)
{
float scale[] = { scale_x, scale_y };
m_program.use();
m_program.uniforms["draw_color"] = color;
glProgramUniform2fv(m_program.id(), m_program.uniforms["offsets"].location(), nb_offsets, offsets);
glProgramUniform2fv(m_program.id(), m_program.uniforms["scale"].location(), 1, scale);
}
public:
text_writer() {}
~text_writer(){}
void init()
{
m_text_buffer.create();
m_scale_offsets_buffer.create();
GlyphManager glyph_source;
auto points = glyph_source.generate_point_map();
const u32 buffer_size = points.size() * sizeof(GlyphManager::glyph_point);
m_text_buffer.data(buffer_size, points.data());
m_offsets = glyph_source.get_glyph_offsets();
m_scale_offsets_buffer.data(512 * 4 * sizeof(float));
//Init VAO
int old_vao;
glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &old_vao);
m_vao.create();
m_vao.bind();
//This is saved as part of the bound VAO's state
m_vao.array_buffer = m_text_buffer;
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, false, sizeof(GlyphManager::glyph_point), 0);
glBindVertexArray(old_vao);
init_program();
initialized = true;
}
void print_text(int x, int y, int target_w, int target_h, const std::string &text, color4f color = { 0.3f, 1.f, 0.3f, 1.f })
{
verify(HERE), initialized;
std::vector<GLint> offsets;
std::vector<GLsizei> counts;
std::vector<float> shader_offsets;
char *s = const_cast<char *>(text.c_str());
//Y is in raster coordinates: convert to bottom-left origin
y = (target_h - y - 16);
//Compress [0, w] and [0, h] into range [-1, 1]
float scale_x = 2.f / target_w;
float scale_y = 2.f / target_h;
float base_offset = 0.f;
shader_offsets.reserve(text.length() * 2);
while (*s)
{
u8 offset = (u8)*s;
bool to_draw = false; //Can be false for space or unsupported characters
auto o = m_offsets.find(offset);
if (o != m_offsets.end())
{
if (o->second.second > 0)
{
to_draw = true;
offsets.push_back(o->second.first);
counts.push_back(o->second.second);
}
}
if (to_draw)
{
//Generate a scale_offset pair for this entry
float offset_x = scale_x * (x + base_offset);
offset_x -= 1.f;
float offset_y = scale_y * y;
offset_y -= 1.f;
shader_offsets.push_back(offset_x);
shader_offsets.push_back(offset_y);
}
base_offset += 9.f;
s++;
}
//TODO: Add drop shadow if deemed necessary for visibility
int old_vao;
glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &old_vao);
load_program(scale_x, scale_y, shader_offsets.data(), counts.size(), color);
m_vao.bind();
glMultiDrawArrays(GL_POINTS, offsets.data(), counts.data(), counts.size());
glBindVertexArray(old_vao);
}
void close()
{
if (initialized)
{
m_scale_offsets_buffer.remove();
m_text_buffer.remove();
m_vao.remove();
m_program.remove();
m_fs.remove();
m_vs.remove();
initialized = false;
}
}
};
}

View file

@ -151,12 +151,10 @@ namespace
case CELL_GCM_TEXTURE_G8B8:
return { GL_GREEN, GL_RED, GL_GREEN, GL_RED};
case CELL_GCM_TEXTURE_Y16_X16:
case CELL_GCM_TEXTURE_Y16_X16:
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
return { GL_RED, GL_GREEN, GL_RED, GL_GREEN};
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
return { GL_GREEN, GL_RED, GL_GREEN, GL_RED };
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
return { GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN };

View file

@ -238,7 +238,8 @@ namespace
texture.copy_from(m_attrib_ring_info, gl_type, buffer_offset, data_size);
//Link texture to uniform
m_program->uniforms.texture(location, vertex_array.index + texture_index_offset, texture);
glActiveTexture(GL_TEXTURE0 + texture_index_offset + vertex_array.index);
texture.bind();
}
void operator()(const rsx::vertex_array_register& vertex_register)
@ -263,7 +264,8 @@ namespace
texture.copy_from(m_attrib_ring_info, gl_type, mapping.second, data_size);
//Link texture to uniform
m_program->uniforms.texture(location, vertex_register.index + texture_index_offset, texture);
glActiveTexture(GL_TEXTURE0 + texture_index_offset + vertex_register.index);
texture.bind();
break;
}
default:
@ -274,12 +276,6 @@ namespace
void operator()(const rsx::empty_vertex_array& vbo)
{
int location;
if (!m_program->uniforms.has_location(s_reg_table[vbo.index], &location))
return;
glActiveTexture(GL_TEXTURE0 + vbo.index + texture_index_offset);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, vbo.index + texture_index_offset);
}
protected:
@ -331,20 +327,12 @@ namespace
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset);
// std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
// m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now -
//then).count();
return std::make_tuple(index_count,
std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer));
}
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset);
// std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
// m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now -
//then).count();
return std::make_tuple(vertex_count, std::optional<std::tuple<GLenum, u32>>());
}
@ -414,17 +402,7 @@ namespace
{
u32 verts_allocated = max_index - min_index + 1;
__glcheck m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size);
// Disable texture then reenable them
// Is it really necessary ?
for (int index = 0; index < rsx::limits::vertex_count; ++index) {
int location;
if (!m_program->uniforms.has_location(s_reg_table[index], &location)) continue;
glActiveTexture(GL_TEXTURE0 + index + texture_index_offset);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + texture_index_offset);
continue;
}
vertex_buffer_visitor visitor(verts_allocated, texture_index_offset, m_attrib_ring_buffer,
m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment);
const auto& vertex_buffers =
@ -458,12 +436,7 @@ namespace
if (!m_program->uniforms.has_location(s_reg_table[index], &location)) continue;
if (!vertex_info.size()) // disabled, bind a null sampler
{
glActiveTexture(GL_TEXTURE0 + index + texture_index_offset);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + texture_index_offset);
continue;
}
const u32 element_size =
rsx::get_vertex_type_size_on_host(vertex_info.type(), vertex_info.size());
@ -498,7 +471,8 @@ namespace
texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size);
// Link texture to uniform
m_program->uniforms.texture(location, index + texture_index_offset, texture);
glActiveTexture(GL_TEXTURE0 + texture_index_offset + index);
texture.bind();
m_attrib_ring_buffer.unmap();
}
return vertex_draw_count;
@ -509,12 +483,16 @@ namespace
std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::set_vertex_buffer()
{
std::chrono::time_point<std::chrono::system_clock> then = std::chrono::system_clock::now();
return std::apply_visitor(draw_command_visitor(m_index_ring_buffer, m_attrib_ring_buffer,
auto result = std::apply_visitor(draw_command_visitor(m_index_ring_buffer, m_attrib_ring_buffer,
m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment,
[this](const auto& state, const auto& list) {
return this->get_vertex_buffers(state, list);
}),
get_draw_command(rsx::method_registers));
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
return result;
}
namespace

View file

@ -118,6 +118,12 @@ union D2
u32 iaddrh : 6;
u32 : 26;
};
struct
{
u32 : 8;
u32 tex_num : 2; /* Actual field may be 4 bits wide, but we only have 4 TIUs */
u32 : 22;
};
};
union D3

View file

@ -6,6 +6,8 @@
#include "../Common/BufferUtils.h"
#include "VKFormats.h"
extern cfg::bool_entry g_cfg_rsx_overlay;
namespace
{
u32 get_max_depth_value(rsx::surface_depth_format format)
@ -528,6 +530,13 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan)
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore);
if (g_cfg_rsx_overlay)
{
size_t idx = vk::get_render_pass_location( m_swap_chain->get_surface_format(), VK_FORMAT_UNDEFINED, 1);
m_text_writer.reset(new vk::text_writer());
m_text_writer->init(*m_device, m_memory_type_mapping, m_render_passes[idx]);
}
}
VKGSRender::~VKGSRender()
@ -578,6 +587,8 @@ VKGSRender::~VKGSRender()
m_rtts.destroy();
m_texture_cache.destroy();
m_text_writer.reset();
//Pipeline descriptors
vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr);
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
@ -607,12 +618,11 @@ void VKGSRender::begin()
{
rsx::thread::begin();
//TODO: Fence sync, ring-buffers, etc
//CHECK_RESULT(vkDeviceWaitIdle((*m_device)));
//Ease resource pressure if the number of draw calls becomes too high
if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS)
{
std::chrono::time_point<std::chrono::system_clock> submit_start = std::chrono::system_clock::now();
close_and_submit_command_buffer({}, m_submit_fence);
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
@ -626,8 +636,13 @@ void VKGSRender::begin()
m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one();
m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one();
m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one();
std::chrono::time_point<std::chrono::system_clock> submit_end = std::chrono::system_clock::now();
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
}
std::chrono::time_point<std::chrono::system_clock> start = std::chrono::system_clock::now();
VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.descriptorPool = descriptor_pool;
alloc_info.descriptorSetCount = 1;
@ -650,6 +665,9 @@ void VKGSRender::begin()
//TODO: Set up other render-state parameters into the program pipeline
std::chrono::time_point<std::chrono::system_clock> stop = std::chrono::system_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
m_draw_calls++;
m_used_descriptors++;
}
@ -662,6 +680,8 @@ void VKGSRender::end()
(u8)vk::get_draw_buffers(rsx::method_registers.surface_color_target()).size());
VkRenderPass current_render_pass = m_render_passes[idx];
std::chrono::time_point<std::chrono::system_clock> textures_start = std::chrono::system_clock::now();
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
if (m_program->has_uniform("tex" + std::to_string(i)))
@ -742,6 +762,9 @@ void VKGSRender::end()
}
}
std::chrono::time_point<std::chrono::system_clock> textures_end = std::chrono::system_clock::now();
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = current_render_pass;
@ -755,6 +778,9 @@ void VKGSRender::end()
auto upload_info = upload_vertex_data();
std::chrono::time_point<std::chrono::system_clock> vertex_end = std::chrono::system_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr);
@ -776,6 +802,8 @@ void VKGSRender::end()
vkCmdEndRenderPass(m_command_buffer);
std::chrono::time_point<std::chrono::system_clock> draw_end = std::chrono::system_clock::now();
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
rsx::thread::end();
}
@ -1335,6 +1363,8 @@ void VKGSRender::flip(int buffer)
resize_screen = true;
}
std::chrono::time_point<std::chrono::system_clock> flip_start = std::chrono::system_clock::now();
if (!resize_screen)
{
u32 buffer_width = gcm_buffers[buffer].width;
@ -1396,6 +1426,41 @@ void VKGSRender::flip(int buffer)
vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range);
}
std::unique_ptr<vk::framebuffer> direct_fbo;
std::vector<std::unique_ptr<vk::image_view>> swap_image_view;
if (g_cfg_rsx_overlay)
{
//Change the image layout whilst setting up a dependency on waiting for the blit op to finish before we start writing
auto subres = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
barrier.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
barrier.image = target_image;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = subres;
vkCmdPipelineBarrier(m_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
size_t idx = vk::get_render_pass_location(m_swap_chain->get_surface_format(), VK_FORMAT_UNDEFINED, 1);
VkRenderPass single_target_pass = m_render_passes[idx];
swap_image_view.push_back(std::make_unique<vk::image_view>(*m_device, target_image, VK_IMAGE_VIEW_TYPE_2D, m_swap_chain->get_surface_format(), vk::default_component_map(), subres));
direct_fbo.reset(new vk::framebuffer(*m_device, single_target_pass, m_client_width, m_client_height, std::move(swap_image_view)));
m_text_writer->print_text(m_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls));
m_text_writer->print_text(m_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us");
m_text_writer->print_text(m_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
m_text_writer->print_text(m_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), "texture upload time: " + std::to_string(m_textures_upload_time) + "us");
m_text_writer->print_text(m_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
m_text_writer->print_text(m_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
vk::change_image_layout(m_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres);
}
close_and_submit_command_buffer({ m_present_semaphore }, m_submit_fence);
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
@ -1472,6 +1537,9 @@ void VKGSRender::flip(int buffer)
vkDestroyFence((*m_device), resize_fence, nullptr);
}
std::chrono::time_point<std::chrono::system_clock> flip_end = std::chrono::system_clock::now();
m_flip_time = std::chrono::duration_cast<std::chrono::microseconds>(flip_end - flip_start).count();
m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one();
m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one();
m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one();
@ -1486,12 +1554,21 @@ void VKGSRender::flip(int buffer)
m_sampler_to_clean.clear();
m_framebuffer_to_clean.clear();
if (g_cfg_rsx_overlay)
{
m_text_writer->reset_descriptors();
}
vkResetDescriptorPool(*m_device, descriptor_pool, 0);
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
open_command_buffer();
m_draw_calls = 0;
m_draw_time = 0;
m_setup_time = 0;
m_vertex_upload_time = 0;
m_textures_upload_time = 0;
m_used_descriptors = 0;
m_frame->flip(m_context);
}

View file

@ -4,6 +4,7 @@
#include "VKTextureCache.h"
#include "VKRenderTargets.h"
#include "VKFormats.h"
#include "VkTextOut.h"
#include <Utilities\optional.hpp>
#define RSX_DEBUG 1
@ -33,6 +34,8 @@ private:
std::unique_ptr<vk::buffer> null_buffer;
std::unique_ptr<vk::buffer_view> null_buffer_view;
std::unique_ptr<vk::text_writer> m_text_writer;
public:
//vk::fbo draw_fbo;
@ -72,6 +75,12 @@ private:
u32 m_client_height = 0;
u32 m_draw_calls = 0;
u32 m_setup_time = 0;
u32 m_vertex_upload_time = 0;
u32 m_textures_upload_time = 0;
u32 m_draw_time = 0;
u32 m_flip_time = 0;
u32 m_used_descriptors = 0;
u8 m_draw_buffers_count = 0;

View file

@ -112,7 +112,7 @@ namespace vk
case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return VK_FORMAT_R16_SFLOAT;
case CELL_GCM_TEXTURE_X16: return VK_FORMAT_R16_UNORM;
case CELL_GCM_TEXTURE_Y16_X16: return VK_FORMAT_R16G16_UNORM;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return VK_FORMAT_R16G16_UNORM;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return VK_FORMAT_R16G16_SFLOAT;
case CELL_GCM_TEXTURE_R5G5B5A1: return VK_FORMAT_R5G5B5A1_UNORM_PACK16;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return VK_FORMAT_R16G16B16A16_SFLOAT;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return VK_FORMAT_R32G32B32A32_SFLOAT;

View file

@ -0,0 +1,365 @@
#pragma once
#include "VKHelpers.h"
#include "VKVertexProgram.h"
#include "VKFragmentProgram.h"
#include "../Common/TextGlyphs.h"
namespace vk
{
class text_writer
{
private:
std::unique_ptr<vk::buffer> m_vertex_buffer;
std::unique_ptr<vk::buffer> m_uniforms_buffer;
std::unique_ptr<vk::glsl::program> m_program;
VKVertexProgram m_vertex_shader;
VKFragmentProgram m_fragment_shader;
vk::descriptor_pool m_descriptor_pool;
VkDescriptorSet m_descriptor_set = nullptr;
VkDescriptorSetLayout m_descriptor_layout = nullptr;
VkPipelineLayout m_pipeline_layout = nullptr;
u32 m_used_descriptors = 0;
VkRenderPass m_render_pass;
VkDevice device = nullptr;
u32 m_uniform_buffer_offset = 0;
u32 m_uniform_buffer_size = 0;
bool initialized = false;
std::unordered_map<u8, std::pair<u32, u32>> m_offsets;
void init_descriptor_set(vk::render_device &dev)
{
VkDescriptorPoolSize descriptor_pools[1] =
{
{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 120 },
};
//Reserve descriptor pools
m_descriptor_pool.create(dev, descriptor_pools, 1);
VkDescriptorSetLayoutBinding bindings[1] = {};
//Scale and offset data plus output color
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[0].binding = 0;
VkDescriptorSetLayoutCreateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
infos.pBindings = bindings;
infos.bindingCount = 1;
CHECK_RESULT(vkCreateDescriptorSetLayout(dev, &infos, nullptr, &m_descriptor_layout));
VkPipelineLayoutCreateInfo layout_info = {};
layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
layout_info.setLayoutCount = 1;
layout_info.pSetLayouts = &m_descriptor_layout;
CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &m_pipeline_layout));
}
void init_program(vk::render_device &dev)
{
std::string vs =
{
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(location=0) in vec2 pos;\n"
"layout(std140, set=0, binding=0) uniform scale_offset_buffer\n"
"{\n"
" vec4 offsets[510];\n"
" vec4 scale;\n"
" vec4 text_color;\n"
"};\n"
"\n"
"layout(location=1) out vec4 draw_color;\n"
"\n"
"void main()\n"
"{\n"
" vec2 offset = offsets[gl_InstanceIndex].xy;\n"
" gl_Position = vec4(pos, 0., 1.);\n"
" gl_Position.xy = gl_Position.xy * scale.xy + offset;\n"
" draw_color = text_color;\n"
"}\n"
};
std::string fs =
{
"#version 420\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(location=1) in vec4 draw_color;\n"
"layout(location=0) out vec4 col0;\n"
"\n"
"void main()\n"
"{\n"
" col0 = draw_color;\n"
"}\n"
};
m_vertex_shader.shader = vs;
m_vertex_shader.Compile();
m_fragment_shader.shader = fs;
m_fragment_shader.Compile();
VkPipelineShaderStageCreateInfo shader_stages[2] = {};
shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
shader_stages[0].module = m_vertex_shader.handle;
shader_stages[0].pName = "main";
shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
shader_stages[1].module = m_fragment_shader.handle;
shader_stages[1].pName = "main";
VkDynamicState dynamic_state_descriptors[VK_DYNAMIC_STATE_RANGE_SIZE] = {};
VkPipelineDynamicStateCreateInfo dynamic_state_info = {};
dynamic_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
dynamic_state_info.pDynamicStates = dynamic_state_descriptors;
VkVertexInputAttributeDescription vdesc;
VkVertexInputBindingDescription vbind;
vdesc.binding = 0;
vdesc.format = VK_FORMAT_R32G32_SFLOAT;
vdesc.location = 0;
vdesc.offset = 0;
vbind.binding = 0;
vbind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
vbind.stride = 8;
VkPipelineVertexInputStateCreateInfo vi = {};
vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vi.vertexAttributeDescriptionCount = 1;
vi.vertexBindingDescriptionCount = 1;
vi.pVertexAttributeDescriptions = &vdesc;
vi.pVertexBindingDescriptions = &vbind;
VkPipelineViewportStateCreateInfo vp = {};
vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
vp.scissorCount = 1;
vp.viewportCount = 1;
VkPipelineMultisampleStateCreateInfo ms = {};
ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
ms.pSampleMask = NULL;
ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
VkPipelineInputAssemblyStateCreateInfo ia = {};
ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
ia.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
VkPipelineRasterizationStateCreateInfo rs = {};
rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rs.lineWidth = 1.f;
VkPipelineColorBlendAttachmentState att = {};
att.colorWriteMask = 0xf;
VkPipelineColorBlendStateCreateInfo cs = {};
cs.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
cs.attachmentCount = 1;
cs.pAttachments = &att;
VkPipelineDepthStencilStateCreateInfo ds = {};
ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
VkPipeline pipeline;
VkGraphicsPipelineCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
info.pVertexInputState = &vi;
info.pInputAssemblyState = &ia;
info.pRasterizationState = &rs;
info.pColorBlendState = &cs;
info.pMultisampleState = &ms;
info.pViewportState = &vp;
info.pDepthStencilState = &ds;
info.stageCount = 2;
info.pStages = shader_stages;
info.pDynamicState = &dynamic_state_info;
info.layout = m_pipeline_layout;
info.basePipelineIndex = -1;
info.basePipelineHandle = VK_NULL_HANDLE;
info.renderPass = m_render_pass;
CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline));
const std::vector<vk::glsl::program_input> unused;
m_program = std::make_unique<vk::glsl::program>((VkDevice)dev, pipeline, unused, unused);
}
void load_program(vk::command_buffer &cmd, float scale_x, float scale_y, float *offsets, int nb_offsets, std::array<float, 4> color)
{
verify(HERE), m_used_descriptors < 120;
VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.descriptorPool = m_descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &m_descriptor_layout;
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
CHECK_RESULT(vkAllocateDescriptorSets(device, &alloc_info, &m_descriptor_set));
m_used_descriptors++;
float scale[] = { scale_x, scale_y };
float colors[] = { color[0], color[1], color[2], color[3] };
float *dst = (float*)m_uniforms_buffer->map(m_uniform_buffer_offset, 8192);
//std140 spec demands that arrays be multiples of 16 bytes
for (int i = 0; i < nb_offsets; ++i)
{
dst[i * 4] = offsets[i * 2];
dst[i * 4 + 1] = offsets[i * 2 + 1];
}
memcpy(&dst[510*4], scale, 8);
memcpy(&dst[511*4], colors, 16);
m_uniforms_buffer->unmap();
m_program->bind_uniform({ m_uniforms_buffer->value, m_uniform_buffer_offset, 8192 }, 0, m_descriptor_set);
m_uniform_buffer_offset = (m_uniform_buffer_offset + 8192) % m_uniform_buffer_size;
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout, 0, 1, &m_descriptor_set, 0, nullptr);
VkDeviceSize zero = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &m_vertex_buffer->value, &zero);
}
public:
text_writer() {}
~text_writer()
{
if (initialized)
{
vkDestroyDescriptorSetLayout(device, m_descriptor_layout, nullptr);
vkDestroyPipelineLayout(device, m_pipeline_layout, nullptr);
m_descriptor_pool.destroy();
}
}
void init(vk::render_device &dev, vk::memory_type_mapping &memory_types, VkRenderPass &render_pass)
{
//At worst case, 1 char = 16*16*8 bytes (average about 24*8), so ~256K for 128 chars. Allocating 512k for verts
//uniform params are 8k in size, allocating for 120 lines (max lines at 4k, one column per row. Can be expanded
m_vertex_buffer.reset( new vk::buffer(dev, 524288, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 0));
m_uniforms_buffer.reset(new vk::buffer(dev, 983040, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
m_render_pass = render_pass;
m_uniform_buffer_size = 983040;
init_descriptor_set(dev);
init_program(dev);
GlyphManager glyph_source;
auto points = glyph_source.generate_point_map();
const u32 buffer_size = points.size() * sizeof(GlyphManager::glyph_point);
u8 *dst = (u8*)m_vertex_buffer->map(0, buffer_size);
memcpy(dst, points.data(), buffer_size);
m_vertex_buffer->unmap();
m_offsets = glyph_source.get_glyph_offsets();
device = dev;
initialized = true;
}
void print_text(vk::command_buffer &cmd, vk::framebuffer &target, int x, int y, int target_w, int target_h, const std::string &text, std::array<float, 4> color = { 0.3f, 1.f, 0.3f, 1.f })
{
std::vector<u32> offsets;
std::vector<u32> counts;
std::vector<float> shader_offsets;
char *s = const_cast<char *>(text.c_str());
//Y is in raster coordinates: convert to bottom-left origin
y = (target_h - y - 16);
//Compress [0, w] and [0, h] into range [-1, 1]
//Flip Y scaling
float scale_x = +2.f / target_w;
float scale_y = -2.f / target_h;
float base_offset = 0.f;
shader_offsets.reserve(text.length() * 2);
while (*s)
{
u8 offset = (u8)*s;
bool to_draw = false; //Can be false for space or unsupported characters
auto o = m_offsets.find(offset);
if (o != m_offsets.end())
{
if (o->second.second > 0)
{
to_draw = true;
offsets.push_back(o->second.first);
counts.push_back(o->second.second);
}
}
if (to_draw)
{
//Generate a scale_offset pair for this entry
float offset_x = scale_x * (x + base_offset);
offset_x -= 1.f;
float offset_y = scale_y * y;
offset_y += 1.f;
shader_offsets.push_back(offset_x);
shader_offsets.push_back(offset_y);
}
base_offset += 9.f;
s++;
}
VkViewport vp = {0, 0, target_w, target_h, 0., 1.};
vkCmdSetViewport(cmd, 0, 1, &vp);
VkRect2D vs = { {0, 0}, {target_w, target_h} };
vkCmdSetScissor(cmd, 0, 1, &vs);
//TODO: Add drop shadow if deemed necessary for visibility
load_program(cmd, scale_x, scale_y, shader_offsets.data(), counts.size(), color);
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = m_render_pass;
rp_begin.framebuffer = target.value;
rp_begin.renderArea.offset.x = 0;
rp_begin.renderArea.offset.y = 0;
rp_begin.renderArea.extent.width = target.width();
rp_begin.renderArea.extent.height = target.height();
vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
for (int i = 0; i < counts.size(); ++i)
{
vkCmdDraw(cmd, counts[i], 1, offsets[i], i);
}
vkCmdEndRenderPass(cmd);
}
void reset_descriptors()
{
vkResetDescriptorPool(device, m_descriptor_pool, 0);
m_used_descriptors = 0;
}
};
}

View file

@ -92,6 +92,7 @@
</ProjectReference>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLTextOut.h" />
<ClInclude Include="Emu\RSX\GL\GLCommonDecompiler.h" />
<ClInclude Include="Emu\RSX\GL\GLFragmentProgram.h" />
<ClInclude Include="Emu\RSX\GL\GLGSRender.h" />

View file

@ -23,5 +23,6 @@
<ClInclude Include="Emu\RSX\GL\OpenGL.h" />
<ClInclude Include="Emu\RSX\GL\GLTextureCache.h" />
<ClInclude Include="Emu\RSX\GL\GLRenderTargets.h" />
<ClInclude Include="Emu\RSX\GL\GLTextOut.h" />
</ItemGroup>
</Project>

View file

@ -30,6 +30,7 @@
<ClInclude Include="Emu\RSX\VK\VKHelpers.h" />
<ClInclude Include="Emu\RSX\VK\VKProgramBuffer.h" />
<ClInclude Include="Emu\RSX\VK\VKRenderTargets.h" />
<ClInclude Include="Emu\RSX\VK\VKTextOut.h" />
<ClInclude Include="Emu\RSX\VK\VKTextureCache.h" />
<ClInclude Include="Emu\RSX\VK\VKVertexProgram.h" />
<ClInclude Include="Emu\RSX\VK\VulkanAPI.h" />

View file

@ -37,6 +37,9 @@
<ClInclude Include="Emu\RSX\VK\VKFormats.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\VKTextOut.h">
<Filter>Source Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Emu\RSX\VK\VKGSRender.cpp">

View file

@ -608,6 +608,7 @@
<ClInclude Include="Emu\CPU\CPUThread.h" />
<ClInclude Include="Emu\DbgCommand.h" />
<ClInclude Include="Emu\Memory\wait_engine.h" />
<ClInclude Include="Emu\RSX\Common\TextGlyphs.h" />
<ClInclude Include="Emu\RSX\gcm_enums.h" />
<ClInclude Include="Emu\RSX\gcm_printing.h" />
<ClInclude Include="Emu\RSX\rsx_cache.h" />

View file

@ -1705,5 +1705,8 @@
<ClInclude Include="Emu\RSX\gcm_printing.h">
<Filter>Emu\GPU\RSX</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\TextGlyphs.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup>
</Project>