Merge pull request #2391 from kd-11/rsx_surface_tests

gl/vk/rsx: Improve texture cache
This commit is contained in:
kd-11 2017-03-01 10:36:03 +03:00 committed by GitHub
commit 4ab9a2a3a8
19 changed files with 1779 additions and 973 deletions

View file

@ -363,34 +363,34 @@ u8 get_format_block_size_in_texel(int format)
LOG_ERROR(RSX, "Unimplemented block size in texels for texture format: 0x%x", format);
return 1;
}
}
u8 get_format_block_size_in_bytes(rsx::surface_color_format format)
{
switch (format)
{
case rsx::surface_color_format::b8:
return 1;
case rsx::surface_color_format::g8b8:
case rsx::surface_color_format::r5g6b5:
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
return 2;
case rsx::surface_color_format::a8b8g8r8:
case rsx::surface_color_format::a8r8g8b8:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x32:
return 4;
case rsx::surface_color_format::w16z16y16x16:
return 8;
case rsx::surface_color_format::w32z32y32x32:
return 16;
default:
fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format);
}
}
u8 get_format_block_size_in_bytes(rsx::surface_color_format format)
{
switch (format)
{
case rsx::surface_color_format::b8:
return 1;
case rsx::surface_color_format::g8b8:
case rsx::surface_color_format::r5g6b5:
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
return 2;
case rsx::surface_color_format::a8b8g8r8:
case rsx::surface_color_format::a8r8g8b8:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x32:
return 4;
case rsx::surface_color_format::w16z16y16x16:
return 8;
case rsx::surface_color_format::w32z32y32x32:
return 16;
default:
fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format);
}
}
static size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment)

View file

@ -41,7 +41,7 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st)
void D3D12GSRender::load_program()
{
auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16>
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
{
ID3D12Resource *surface = nullptr;
if (!is_depth)

View file

@ -332,7 +332,6 @@ void GLGSRender::begin()
std::chrono::time_point<steady_clock> now = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
m_draw_calls++;
}
namespace
@ -381,8 +380,6 @@ void GLGSRender::end()
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
}
draw_fbo.bind();
//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
@ -473,10 +470,17 @@ void GLGSRender::end()
draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count);
}
m_attrib_ring_buffer->notify();
m_index_ring_buffer->notify();
m_uniform_ring_buffer->notify();
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
write_buffers();
m_draw_calls++;
//LOG_WARNING(RSX, "Finished draw call, EID=%d", m_draw_calls);
synchronize_buffers();
rsx::thread::end();
}
@ -546,10 +550,11 @@ void GLGSRender::on_init_thread()
m_index_ring_buffer->create(gl::buffer::target::element_array, 16 * 0x100000);
m_vao.element_array_buffer = *m_index_ring_buffer;
m_gl_texture_cache.initialize_rtt_cache();
if (g_cfg_rsx_overlay)
m_text_printer.init();
m_gl_texture_cache.initialize(this);
}
void GLGSRender::on_exit()
@ -588,11 +593,12 @@ void GLGSRender::on_exit()
m_index_ring_buffer->remove();
m_text_printer.close();
m_gl_texture_cache.close();
return GSRender::on_exit();
}
void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
void GLGSRender::clear_surface(u32 arg)
{
if (rsx::method_registers.surface_color_target() == rsx::surface_target::none) return;
@ -602,9 +608,6 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
return;
}
renderer->init_buffers(true);
renderer->draw_fbo.bind();
GLbitfield mask = 0;
rsx::surface_depth_format surface_depth_format = rsx::method_registers.surface_depth_fmt();
@ -618,6 +621,10 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
glDepthMask(GL_TRUE);
glClearDepth(double(clear_depth) / max_depth_value);
mask |= GLenum(gl::buffers::depth);
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
ds->set_cleared();
}
if (surface_depth_format == rsx::surface_depth_format::z24s8 && (arg & 0x2))
@ -644,46 +651,31 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
}
glClear(mask);
renderer->write_buffers();
}
using rsx_method_impl_t = void(*)(u32, GLGSRender*);
static const std::unordered_map<u32, rsx_method_impl_t> g_gl_method_tbl =
{
{ NV4097_CLEAR_SURFACE, nv4097_clear_surface }
};
bool GLGSRender::do_method(u32 cmd, u32 arg)
{
auto found = g_gl_method_tbl.find(cmd);
if (found == g_gl_method_tbl.end())
{
return false;
}
found->second(arg, this);
switch (cmd)
{
case NV4097_CLEAR_SURFACE:
{
if (arg & 0x1)
{
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
ds->set_cleared();
}
init_buffers(true);
synchronize_buffers();
clear_surface(arg);
return true;
}
case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
flush_draw_buffers = true;
return true;
}
return true;
return false;
}
bool GLGSRender::load_program()
{
auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16>
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple<bool, u16>
{
gl::render_target *surface = nullptr;
if (!is_depth)
@ -691,14 +683,21 @@ bool GLGSRender::load_program()
else
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
if (!surface) return std::make_tuple(false, 0);
if (!surface)
{
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
if (!rsc.surface || rsc.is_depth_surface != is_depth)
return std::make_tuple(false, 0);
surface = rsc.surface;
}
return std::make_tuple(true, surface->get_native_pitch());
};
RSXVertexProgram vertex_program = get_current_vertex_program();
RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func);
std::array<float, 16> rtt_scaling;
u32 unnormalized_rtts = 0;
for (auto &vtx : vertex_program.rsx_vertex_inputs)
@ -819,17 +818,8 @@ void GLGSRender::flip(int buffer)
rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
u32 absolute_address = buffer_region.address + buffer_region.base;
if (0)
{
LOG_NOTICE(RSX, "flip(%d) -> 0x%x [0x%x]", buffer, absolute_address, rsx::get_address(gcm_buffers[1 - buffer].offset, CELL_GCM_LOCATION_LOCAL));
}
gl::texture *render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address);
/**
* Calling read_buffers will overwrite cached content
*/
__glcheck m_flip_fbo.recreate();
m_flip_fbo.bind();
@ -877,33 +867,27 @@ void GLGSRender::flip(int buffer)
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
coordi aspect_ratio;
if (1) //enable aspect ratio
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
const double aq = (double)buffer_width / buffer_height;
const double rq = (double)new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
const double aq = (double)buffer_width / buffer_height;
const double rq = (double)new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = int(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = int(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
aspect_ratio.size = new_size;
new_size.height = int(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else
else if (q < 1.0)
{
aspect_ratio.size = { m_frame->client_width(), m_frame->client_height() };
new_size.width = int(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
aspect_ratio.size = new_size;
gl::screen.clear(gl::buffers::color_depth_stencil);
__glcheck flip_fbo->blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical());
@ -928,6 +912,8 @@ void GLGSRender::flip(int buffer)
m_vertex_upload_time = 0;
m_textures_upload_time = 0;
m_gl_texture_cache.clear_temporary_surfaces();
for (auto &tex : m_rtts.invalidated_resources)
{
tex->remove();
@ -946,6 +932,48 @@ u64 GLGSRender::timestamp() const
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
{
if (is_writing) return m_gl_texture_cache.mark_as_dirty(address);
return false;
if (is_writing)
return m_gl_texture_cache.mark_as_dirty(address);
else
return m_gl_texture_cache.flush_section(address);
}
void GLGSRender::do_local_task()
{
std::lock_guard<std::mutex> lock(queue_guard);
work_queue.remove_if([](work_item &q) { return q.received; });
for (work_item& q: work_queue)
{
std::unique_lock<std::mutex> lock(q.guard_mutex);
//Process this address
q.result = m_gl_texture_cache.flush_section(q.address_to_flush);
q.processed = true;
//Notify thread waiting on this
lock.unlock();
q.cv.notify_one();
}
}
work_item& GLGSRender::post_flush_request(u32 address)
{
std::lock_guard<std::mutex> lock(queue_guard);
work_queue.emplace_back();
work_item &result = work_queue.back();
result.address_to_flush = address;
return result;
}
void GLGSRender::synchronize_buffers()
{
if (flush_draw_buffers)
{
//LOG_WARNING(RSX, "Flushing RTT buffers EID=%d", m_draw_calls);
write_buffers();
flush_draw_buffers = false;
}
}

View file

@ -12,6 +12,41 @@
#pragma comment(lib, "opengl32.lib")
struct work_item
{
std::condition_variable cv;
std::mutex guard_mutex;
u32 address_to_flush = 0;
bool processed = false;
bool result = false;
bool received = false;
};
struct gcm_buffer_info
{
u32 address = 0;
u32 pitch = 0;
bool is_depth_surface;
rsx::surface_color_format color_format;
rsx::surface_depth_format depth_format;
u16 width;
u16 height;
gcm_buffer_info()
{
address = 0;
pitch = 0;
}
gcm_buffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
{}
};
class GLGSRender : public GSRender
{
private:
@ -25,7 +60,7 @@ private:
gl_render_targets m_rtts;
gl::gl_texture_cache m_gl_texture_cache;
gl::texture_cache m_gl_texture_cache;
gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count];
@ -49,6 +84,14 @@ private:
gl::text_writer m_text_printer;
std::mutex queue_guard;
std::list<work_item> work_queue;
gcm_buffer_info surface_info[rsx::limits::color_buffers_count];
gcm_buffer_info depth_surface_info;
bool flush_draw_buffers = false;
public:
gl::fbo draw_fbo;
@ -72,6 +115,8 @@ private:
// Return element to draw and in case of indexed draw index type and offset in index buffer
std::tuple<u32, std::optional<std::tuple<GLenum, u32> > > set_vertex_buffer();
void clear_surface(u32 arg);
public:
bool load_program();
void init_buffers(bool skip_reading = false);
@ -79,6 +124,9 @@ public:
void write_buffers();
void set_viewport();
void synchronize_buffers();
work_item& post_flush_request(u32 address);
protected:
void begin() override;
void end() override;
@ -89,6 +137,8 @@ protected:
void flip(int buffer) override;
u64 timestamp() const override;
void do_local_task() override;
bool on_access_violation(u32 address, bool is_writing) override;
virtual std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;

View file

@ -67,6 +67,106 @@ namespace gl
}
};
class fence
{
GLsync m_value = nullptr;
GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT;
public:
fence() {}
~fence() {}
void create()
{
m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
flags = GL_SYNC_FLUSH_COMMANDS_BIT;
}
void destroy()
{
glDeleteSync(m_value);
m_value = nullptr;
}
void reset()
{
if (m_value != nullptr)
destroy();
create();
}
bool is_empty()
{
return (m_value == nullptr);
}
bool check_signaled()
{
verify(HERE), m_value != nullptr;
if (flags)
{
GLenum err = glClientWaitSync(m_value, flags, 0);
flags = 0;
return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
return (status == GL_SIGNALED);
}
}
bool wait_for_signal()
{
verify(HERE), m_value != nullptr;
GLenum err = GL_WAIT_FAILED;
bool done = false;
while (!done)
{
if (flags)
{
err = glClientWaitSync(m_value, flags, 0);
flags = 0;
switch (err)
{
default:
LOG_ERROR(RSX, "gl::fence sync returned unknown error 0x%X", err);
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
}
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
if (status == GL_SIGNALED)
break;
}
}
glDeleteSync(m_value);
m_value = nullptr;
return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
}
};
template<typename Type, uint BindId, uint GetStateId>
class save_binding_state_base
{
@ -594,33 +694,7 @@ namespace gl
u32 m_limit = 0;
void *m_memory_mapping = nullptr;
GLsync m_fence = nullptr;
void wait_for_sync()
{
verify(HERE), m_fence != nullptr;
bool done = false;
while (!done)
{
//Check if we are finished, wait time = 1us
GLenum err = glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, 1000);
switch (err)
{
default:
LOG_ERROR(RSX, "err Returned 0x%X", err);
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
}
}
glDeleteSync(m_fence);
m_fence = nullptr;
}
fence m_fence;
public:
@ -628,7 +702,7 @@ namespace gl
{
if (m_id)
{
wait_for_sync();
m_fence.wait_for_signal();
remove();
}
@ -656,17 +730,15 @@ namespace gl
if ((offset + alloc_size) > m_limit)
{
//TODO: Measure the stall here
wait_for_sync();
if (!m_fence.is_empty())
m_fence.wait_for_signal();
m_data_loc = 0;
offset = 0;
}
if (!m_data_loc)
{
verify(HERE), m_fence == nullptr;
m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
m_fence.reset();
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
m_data_loc = align(offset + alloc_size, 256);
@ -697,6 +769,13 @@ namespace gl
{
glBindBufferRange((GLenum)current_target(), index, id(), offset, size);
}
//Notification of a draw command
virtual void notify()
{
if (m_fence.is_empty())
m_fence.reset();
}
};
class legacy_ring_buffer : public ring_buffer
@ -790,6 +869,8 @@ namespace gl
m_mapped_bytes = 0;
m_mapping_offset = 0;
}
void notify() override {}
};
class vao
@ -1019,7 +1100,16 @@ namespace gl
compressed_rgb_s3tc_dxt1 = GL_COMPRESSED_RGB_S3TC_DXT1_EXT,
compressed_rgba_s3tc_dxt1 = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,
compressed_rgba_s3tc_dxt3 = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,
compressed_rgba_s3tc_dxt5 = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT
compressed_rgba_s3tc_dxt5 = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,
//Sized internal formats, see opengl spec document on glTexImage2D, table 3
rgba8 = GL_RGBA8,
r5g6b5 = GL_RGB565,
r8 = GL_R8,
rg8 = GL_RG8,
r32f = GL_R32F,
rgba16f = GL_RGBA16F,
rgba32f = GL_RGBA32F
};
enum class wrap

View file

@ -170,6 +170,8 @@ OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase);
OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays);
OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT);
//Texture Buffers
OPENGL_PROC(PFNGLTEXBUFFERPROC, TexBuffer);
OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT);
@ -183,6 +185,7 @@ OPENGL_PROC(PFNGLBUFFERSTORAGEPROC, BufferStorage);
//ARB_sync
OPENGL_PROC(PFNGLFENCESYNCPROC, FenceSync);
OPENGL_PROC(PFNGLCLIENTWAITSYNCPROC, ClientWaitSync);
OPENGL_PROC(PFNGLGETSYNCIVPROC, GetSynciv);
OPENGL_PROC(PFNGLDELETESYNCPROC, DeleteSync);
//KHR_debug

View file

@ -44,9 +44,10 @@ color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_forma
case rsx::surface_color_format::x32:
return{ ::gl::texture::type::f32, ::gl::texture::format::red, false, 1, 4 };
case rsx::surface_color_format::a8b8g8r8:
default:
LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", (u32)color_format);
case rsx::surface_color_format::a8b8g8r8:
return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1 };
}
}
@ -75,92 +76,46 @@ u8 rsx::internals::get_pixel_size(rsx::surface_depth_format format)
fmt::throw_exception("Unknown depth format" HERE);
}
void GLGSRender::init_buffers(bool skip_reading)
::gl::texture::internal_format rsx::internals::sized_internal_format(rsx::surface_color_format color_format)
{
u16 clip_horizontal = rsx::method_registers.surface_clip_width();
u16 clip_vertical = rsx::method_registers.surface_clip_height();
set_viewport();
if (draw_fbo && !m_rtts_dirty)
switch (color_format)
{
return;
case rsx::surface_color_format::r5g6b5:
return ::gl::texture::internal_format::r5g6b5;
case rsx::surface_color_format::a8r8g8b8:
return ::gl::texture::internal_format::rgba8;
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
return ::gl::texture::internal_format::rgba8;
case rsx::surface_color_format::w16z16y16x16:
return ::gl::texture::internal_format::rgba16f;
case rsx::surface_color_format::w32z32y32x32:
return ::gl::texture::internal_format::rgba32f;
case rsx::surface_color_format::b8:
return ::gl::texture::internal_format::r8;
case rsx::surface_color_format::g8b8:
return ::gl::texture::internal_format::rg8;
case rsx::surface_color_format::x32:
return ::gl::texture::internal_format::r32f;
case rsx::surface_color_format::a8b8g8r8:
return ::gl::texture::internal_format::rgba8;
default:
LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", (u32)color_format);
return ::gl::texture::internal_format::rgba8;
}
m_rtts_dirty = false;
if (0)
{
LOG_NOTICE(RSX, "render to -> 0x%x", get_color_surface_addresses()[0]);
}
m_rtts.prepare_render_target(nullptr, rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(), clip_horizontal, clip_vertical,
rsx::method_registers.surface_color_target(),
get_color_surface_addresses(), get_zeta_surface_address());
draw_fbo.recreate();
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
{
__glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]);
}
}
if (std::get<0>(m_rtts.m_bound_depth_stencil))
{
__glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil);
}
if (!draw_fbo.check())
return;
//HACK: read_buffer shouldn't be there
switch (rsx::method_registers.surface_color_target())
{
case rsx::surface_target::none: break;
case rsx::surface_target::surface_a:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[0]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surface_b:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[1]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[1]);
break;
case rsx::surface_target::surfaces_a_b:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c_d:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
}
}
std::array<std::vector<gsl::byte>, 4> GLGSRender::copy_render_targets_to_memory()
{
int clip_w = rsx::method_registers.surface_clip_width();
int clip_h = rsx::method_registers.surface_clip_height();
return m_rtts.get_render_targets_data(rsx::method_registers.surface_color(), clip_w, clip_h);
}
std::array<std::vector<gsl::byte>, 2> GLGSRender::copy_depth_stencil_buffer_to_memory()
{
int clip_w = rsx::method_registers.surface_clip_width();
int clip_h = rsx::method_registers.surface_clip_height();
return m_rtts.get_depth_stencil_data(rsx::method_registers.surface_depth_fmt(), clip_w, clip_h);
}
namespace
@ -196,6 +151,145 @@ namespace
}
}
void GLGSRender::init_buffers(bool skip_reading)
{
if (draw_fbo && !m_rtts_dirty)
{
set_viewport();
return;
}
//We are about to change buffers, flush any pending requests for the old buffers
//LOG_WARNING(RSX, "Render targets have changed; checking for sync points (EID=%d)", m_draw_calls);
synchronize_buffers();
m_rtts_dirty = false;
const u16 clip_horizontal = rsx::method_registers.surface_clip_width();
const u16 clip_vertical = rsx::method_registers.surface_clip_height();
const auto pitchs = get_pitchs();
const auto surface_format = rsx::method_registers.surface_color();
const auto depth_format = rsx::method_registers.surface_depth_fmt();
const auto surface_addresses = get_color_surface_addresses();
const auto depth_address = get_zeta_surface_address();
m_rtts.prepare_render_target(nullptr, surface_format, depth_format, clip_horizontal, clip_vertical,
rsx::method_registers.surface_color_target(),
surface_addresses, depth_address);
draw_fbo.recreate();
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
{
__glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]);
std::get<1>(m_rtts.m_bound_render_targets[i])->set_rsx_pitch(pitchs[i]);
surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical };
}
else
surface_info[i] = {};
}
if (std::get<0>(m_rtts.m_bound_depth_stencil))
{
__glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil);
std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch());
depth_surface_info = { depth_address, rsx::method_registers.surface_z_pitch(), true, surface_format, depth_format, clip_horizontal, clip_vertical };
}
else
depth_surface_info = {};
if (!draw_fbo.check())
return;
draw_fbo.bind();
set_viewport();
switch (rsx::method_registers.surface_color_target())
{
case rsx::surface_target::none: break;
case rsx::surface_target::surface_a:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[0]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surface_b:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[1]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[1]);
break;
case rsx::surface_target::surfaces_a_b:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c_d:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
}
//Mark buffer regions as NO_ACCESS on Cell visible side
if (g_cfg_rsx_write_color_buffers)
{
auto color_format = rsx::internals::surface_color_format_to_gl(surface_format);
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (!surface_info[i].address || pitchs[i] <= 64) continue;
const u32 range = surface_info[i].pitch * surface_info[i].height;
m_gl_texture_cache.lock_rtt_region(surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch,
color_format.format, color_format.type, *std::get<1>(m_rtts.m_bound_render_targets[i]));
}
}
if (g_cfg_rsx_write_depth_buffer)
{
if (depth_surface_info.address && rsx::method_registers.surface_z_pitch() > 64)
{
auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format);
u32 pitch = depth_surface_info.width * 2;
if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
const u32 range = pitch * depth_surface_info.height;
//TODO: Verify that depth surface pitch variance affects results
if (pitch != depth_surface_info.pitch)
LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch);
m_gl_texture_cache.lock_rtt_region(depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch,
depth_format_gl.format, depth_format_gl.type, *std::get<1>(m_rtts.m_bound_depth_stencil));
}
}
}
std::array<std::vector<gsl::byte>, 4> GLGSRender::copy_render_targets_to_memory()
{
int clip_w = rsx::method_registers.surface_clip_width();
int clip_h = rsx::method_registers.surface_clip_height();
return m_rtts.get_render_targets_data(rsx::method_registers.surface_color(), clip_w, clip_h);
}
std::array<std::vector<gsl::byte>, 2> GLGSRender::copy_depth_stencil_buffer_to_memory()
{
int clip_w = rsx::method_registers.surface_clip_width();
int clip_h = rsx::method_registers.surface_clip_height();
return m_rtts.get_depth_stencil_data(rsx::method_registers.surface_depth_fmt(), clip_w, clip_h);
}
void GLGSRender::read_buffers()
{
if (!draw_fbo)
@ -228,7 +322,7 @@ void GLGSRender::read_buffers()
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
bool success = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch);
bool success = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch);
//Fall back to slower methods if the image could not be fetched from cache.
if (!success)
@ -240,7 +334,7 @@ void GLGSRender::read_buffers()
else
{
u32 range = pitch * height;
m_gl_texture_cache.remove_in_range(texaddr, range);
m_gl_texture_cache.invalidate_range(texaddr, range);
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
color_buffer.read(buffer.get(), width, height, pitch);
@ -287,7 +381,7 @@ void GLGSRender::read_buffers()
return;
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
bool in_cache = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch);
bool in_cache = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch);
if (in_cache)
return;
@ -332,85 +426,36 @@ void GLGSRender::write_buffers()
if (!draw_fbo)
return;
//TODO: Detect when the data is actually being used by cell and issue download command on-demand (mark as not present?)
//Should also mark cached resources as dirty so that read buffers works out-of-the-box without modification
if (g_cfg_rsx_write_color_buffers)
{
auto color_format = rsx::internals::surface_color_format_to_gl(rsx::method_registers.surface_color());
auto write_color_buffers = [&](int index, int count)
{
u32 width = rsx::method_registers.surface_clip_width();
u32 height = rsx::method_registers.surface_clip_height();
std::array<u32, 4> offsets = get_offsets();
const std::array<u32, 4 > locations = get_locations();
const std::array<u32, 4 > pitchs = get_pitchs();
for (int i = index; i < index + count; ++i)
{
u32 offset = offsets[i];
u32 location = locations[i];
u32 pitch = pitchs[i];
if (pitch <= 64)
if (surface_info[i].address == 0 || surface_info[i].pitch <= 64)
continue;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
u32 range = pitch * height;
/**Even tiles are loaded as whole textures during read_buffers from testing.
* Need further evaluation to determine correct behavior. Separate paths for both show no difference,
* but using the GPU to perform the caching is many times faster.
*/
__glcheck m_gl_texture_cache.save_render_target(texaddr, range, (*std::get<1>(m_rtts.m_bound_render_targets[i])));
const u32 range = surface_info[i].pitch * surface_info[i].height;
__glcheck m_gl_texture_cache.save_rtt(surface_info[i].address, range);
}
};
switch (rsx::method_registers.surface_color_target())
{
case rsx::surface_target::none:
break;
case rsx::surface_target::surface_a:
write_color_buffers(0, 1);
break;
case rsx::surface_target::surface_b:
write_color_buffers(1, 1);
break;
case rsx::surface_target::surfaces_a_b:
write_color_buffers(0, 2);
break;
case rsx::surface_target::surfaces_a_b_c:
write_color_buffers(0, 3);
break;
case rsx::surface_target::surfaces_a_b_c_d:
write_color_buffers(0, 4);
break;
}
write_color_buffers(0, 4);
}
if (g_cfg_rsx_write_depth_buffer)
{
//TODO: use pitch
u32 pitch = rsx::method_registers.surface_z_pitch();
if (!depth_surface_info.address || depth_surface_info.pitch <= 64) return;
if (pitch <= 64)
return;
u32 range = depth_surface_info.width * depth_surface_info.height * 2;
if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2;
auto depth_format = rsx::internals::surface_depth_format_to_gl(rsx::method_registers.surface_depth_fmt());
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
u32 range = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height() * 2;
if (rsx::method_registers.surface_depth_fmt() != rsx::surface_depth_format::z16) range *= 2;
m_gl_texture_cache.save_render_target(depth_address, range, (*std::get<1>(m_rtts.m_bound_depth_stencil)));
m_gl_texture_cache.save_rtt(depth_surface_info.address, range);
}
}

View file

@ -4,40 +4,6 @@
#include "stdafx.h"
#include "../RSXThread.h"
namespace gl
{
class render_target : public texture
{
bool is_cleared = false;
u16 native_pitch = 0;
public:
render_target() {}
void set_cleared()
{
is_cleared = true;
}
bool cleared() const
{
return is_cleared;
}
// Internal pitch is the actual row length in bytes of the openGL texture
void set_native_pitch(u16 pitch)
{
native_pitch = pitch;
}
u16 get_native_pitch() const
{
return native_pitch;
}
};
}
struct color_swizzle
{
gl::texture::channel a = gl::texture::channel::a;
@ -73,12 +39,111 @@ namespace rsx
{
namespace internals
{
::gl::texture::internal_format sized_internal_format(rsx::surface_color_format color_format);
color_format surface_color_format_to_gl(rsx::surface_color_format color_format);
depth_format surface_depth_format_to_gl(rsx::surface_depth_format depth_format);
u8 get_pixel_size(rsx::surface_depth_format format);
}
}
namespace gl
{
class render_target : public texture
{
bool is_cleared = false;
u32 rsx_pitch = 0;
u16 native_pitch = 0;
u16 surface_height = 0;
u16 surface_width = 0;
u16 surface_pixel_size = 0;
texture::internal_format compatible_internal_format = texture::internal_format::rgba8;
public:
render_target() {}
void set_cleared()
{
is_cleared = true;
}
bool cleared() const
{
return is_cleared;
}
// Internal pitch is the actual row length in bytes of the openGL texture
void set_native_pitch(u16 pitch)
{
native_pitch = pitch;
}
u16 get_native_pitch() const
{
return native_pitch;
}
// Rsx pitch
void set_rsx_pitch(u16 pitch)
{
rsx_pitch = pitch;
}
u16 get_rsx_pitch() const
{
return rsx_pitch;
}
std::pair<u16, u16> get_dimensions()
{
if (!surface_height) surface_height = height();
if (!surface_width) surface_width = width();
return std::make_pair(surface_width, surface_height);
}
void set_compatible_format(texture::internal_format format)
{
compatible_internal_format = format;
}
texture::internal_format get_compatible_internal_format()
{
return compatible_internal_format;
}
// For an address within the texture, extract this sub-section's rect origin
std::tuple<bool, u16, u16> get_texture_subresource(u32 offset)
{
if (!offset)
{
return std::make_tuple(true, 0, 0);
}
if (!surface_height) surface_height = height();
if (!surface_width) surface_width = width();
u32 range = rsx_pitch * surface_height;
if (offset < range)
{
if (!surface_pixel_size)
surface_pixel_size = native_pitch / surface_width;
u32 pixel_offset = (offset / surface_pixel_size);
u32 y = (pixel_offset / surface_width);
u32 x = (pixel_offset % surface_width);
return std::make_tuple(true, (u16)x, (u16)y);
}
else
return std::make_tuple(false, 0, 0);
}
};
}
struct gl_render_target_traits
{
using surface_storage_type = std::unique_ptr<gl::render_target>;
@ -97,13 +162,17 @@ struct gl_render_target_traits
std::unique_ptr<gl::render_target> result(new gl::render_target());
auto format = rsx::internals::surface_color_format_to_gl(surface_color_format);
auto internal_fmt = rsx::internals::sized_internal_format(surface_color_format);
result->recreate(gl::texture::target::texture2D);
result->set_native_pitch(width * format.channel_count * format.channel_size);
result->set_compatible_format(internal_fmt);
__glcheck result->config()
.size({ (int)width, (int)height })
.type(format.type)
.format(format.format)
.internal_format(internal_fmt)
.swizzle(format.swizzle.r, format.swizzle.g, format.swizzle.b, format.swizzle.a)
.wrap(gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border)
.apply();
@ -144,6 +213,7 @@ struct gl_render_target_traits
native_pitch *= 2;
result->set_native_pitch(native_pitch);
result->set_compatible_format(format.internal_format);
return result;
}
@ -210,7 +280,138 @@ struct gl_render_target_traits
}
};
struct gl_render_targets : public rsx::surface_store<gl_render_target_traits>
struct surface_subresource
{
gl::render_target *surface = nullptr;
u16 x = 0;
u16 y = 0;
u16 w = 0;
u16 h = 0;
bool is_bound = false;
bool is_depth_surface = false;
surface_subresource() {}
surface_subresource(gl::render_target *src, u16 X, u16 Y, u16 W, u16 H, bool _Bound, bool _Depth)
: surface(src), x(X), y(Y), w(W), h(H), is_bound(_Bound), is_depth_surface(_Depth)
{}
};
class gl_render_targets : public rsx::surface_store<gl_render_target_traits>
{
private:
bool surface_overlaps(gl::render_target *surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y)
{
bool is_subslice = false;
u16 x_offset = 0;
u16 y_offset = 0;
if (surface_address > texaddr)
return false;
u32 offset = texaddr - surface_address;
if (offset >= 0)
{
std::tie(is_subslice, x_offset, y_offset) = surface->get_texture_subresource(offset);
if (is_subslice)
{
*x = x_offset;
*y = y_offset;
return true;
}
}
return false;
}
bool is_bound(u32 address, bool is_depth)
{
if (is_depth)
{
const u32 bound_depth_address = std::get<0>(m_bound_depth_stencil);
return (bound_depth_address == address);
}
for (auto &surface: m_bound_render_targets)
{
const u32 bound_address = std::get<0>(surface);
if (bound_address == address)
return true;
}
return false;
}
bool fits(gl::render_target *src, std::pair<u16, u16> &dims, u16 x_offset, u16 y_offset, u16 width, u16 height) const
{
if ((x_offset + width) > dims.first) return false;
if ((y_offset + height) > dims.second) return false;
return true;
}
public:
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch)
{
gl::render_target *surface = nullptr;
bool is_subslice = false;
u16 x_offset = 0;
u16 y_offset = 0;
for (auto &tex_info : m_render_targets_storage)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset))
{
if (surface->get_rsx_pitch() != requested_pitch)
continue;
auto dims = surface->get_dimensions();
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, false), false };
else
{
if (dims.first >= requested_width && dims.second >= requested_height)
{
LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region");
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, false), false };
}
}
}
}
//Check depth surfaces for overlap
for (auto &tex_info : m_depth_stencil_storage)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset))
{
if (surface->get_rsx_pitch() != requested_pitch)
continue;
auto dims = surface->get_dimensions();
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, true), true };
else
{
if (dims.first >= requested_width && dims.second >= requested_height)
{
LOG_WARNING(RSX, "Overlapping depth surface exceeds bounds; returning full surface region");
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, true), true };
}
}
}
}
return {};
}
};

View file

@ -7,7 +7,7 @@
#include "../rsx_utils.h"
#include "../Common/TextureUtils.h"
namespace
namespace gl
{
GLenum get_sized_internal_format(u32 texture_format)
{
@ -40,7 +40,6 @@ namespace
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
}
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format)
{
switch (texture_format)
@ -68,7 +67,10 @@ namespace
}
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
}
}
namespace
{
bool is_compressed_format(u32 texture_format)
{
switch (texture_format)
@ -319,10 +321,10 @@ namespace rsx
int mip_level = 0;
if (dim == rsx::texture_dimension_extended::texture_dimension_1d)
{
__glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, get_sized_internal_format(format), width);
__glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, ::gl::get_sized_internal_format(format), width);
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
@ -335,7 +337,7 @@ namespace rsx
{
u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
__glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;
@ -343,10 +345,10 @@ namespace rsx
if (dim == rsx::texture_dimension_extended::texture_dimension_2d)
{
__glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, get_sized_internal_format(format), width, height);
__glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, ::gl::get_sized_internal_format(format), width, height);
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
@ -359,7 +361,7 @@ namespace rsx
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;
@ -367,13 +369,13 @@ namespace rsx
if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap)
{
__glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, get_sized_internal_format(format), width, height);
__glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, ::gl::get_sized_internal_format(format), width, height);
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
// mip_level % mipmap_per_layer will always be equal to mip_level
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
@ -387,7 +389,7 @@ namespace rsx
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
mip_level++;
}
}
@ -396,10 +398,10 @@ namespace rsx
if (dim == rsx::texture_dimension_extended::texture_dimension_3d)
{
__glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, get_sized_internal_format(format), width, height, depth);
__glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, ::gl::get_sized_internal_format(format), width, height, depth);
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
@ -412,7 +414,7 @@ namespace rsx
{
u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, get_sized_internal_format(format), size, staging_buffer.data());
__glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;

View file

@ -64,3 +64,9 @@ namespace rsx
};
}
}
namespace gl
{
GLenum get_sized_internal_format(u32 gcm_format);
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format);
}

View file

@ -0,0 +1,63 @@
#pragma once
#include "stdafx.h"
#include "GLGSRender.h"
#include "GLTextureCache.h"
namespace gl
{
bool texture_cache::flush_section(u32 address)
{
if (address < rtt_cache_range.first ||
address >= rtt_cache_range.second)
return false;
bool post_task = false;
{
std::lock_guard<std::mutex> lock(m_section_mutex);
for (cached_rtt_section &rtt : m_rtt_cache)
{
if (rtt.is_dirty()) continue;
if (rtt.is_locked() && rtt.overlaps(address))
{
if (rtt.is_flushed())
{
LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", rtt.get_section_base(), rtt.get_section_size());
continue;
}
//LOG_WARNING(RSX, "Cell needs GPU data synced here, address=0x%X", address);
if (std::this_thread::get_id() != m_renderer_thread)
{
post_task = true;
break;
}
rtt.flush();
return true;
}
}
}
if (post_task)
{
//LOG_WARNING(RSX, "Cache access not from worker thread! address = 0x%X", address);
work_item &task = m_renderer->post_flush_request(address);
{
std::unique_lock<std::mutex> lock(task.guard_mutex);
task.cv.wait(lock, [&task] { return task.processed; });
}
task.received = true;
return task.result;
}
return false;
}
}

File diff suppressed because it is too large Load diff

View file

@ -391,6 +391,9 @@ namespace rsx
// TODO: exit condition
while (!Emu.IsStopped())
{
//Execute backend-local tasks first
do_local_task();
const u32 get = ctrl->get;
const u32 put = ctrl->put;
@ -634,6 +637,8 @@ namespace rsx
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges) const
{
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> result;
result.reserve(rsx::limits::vertex_count);
u32 input_mask = state.vertex_attrib_input_mask();
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
@ -835,7 +840,7 @@ namespace rsx
return result;
}
RSXFragmentProgram thread::get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, bool)> get_surface_info) const
RSXFragmentProgram thread::get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info) const
{
RSXFragmentProgram result = {};
u32 shader_program = rsx::method_registers.shader_program_address();
@ -883,7 +888,7 @@ namespace rsx
bool surface_exists;
u16 surface_pitch;
std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, false);
std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, false);
if (surface_exists && surface_pitch)
{
@ -892,7 +897,7 @@ namespace rsx
}
else
{
std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, true);
std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, true);
if (surface_exists)
{
u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);

View file

@ -219,7 +219,7 @@ namespace rsx
* get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth
* returns whether surface is a render target and surface pitch in native format
*/
RSXFragmentProgram get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, bool)> get_surface_info) const;
RSXFragmentProgram get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info) const;
public:
double fps_limit = 59.94;
@ -239,6 +239,11 @@ namespace rsx
virtual void on_task() override;
virtual void on_exit() override;
/**
* Execute a backend local task queue
*/
virtual void do_local_task() {}
public:
virtual std::string get_name() const override;

View file

@ -981,7 +981,7 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
bool VKGSRender::load_program()
{
auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16>
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
{
vk::render_target *surface = nullptr;
if (!is_depth)

View file

@ -6,143 +6,132 @@
namespace vk
{
struct cached_texture_object
class cached_texture_section : public rsx::buffered_section
{
u32 native_rsx_address;
u32 native_rsx_size;
u16 width;
u16 height;
u16 depth;
u16 mipmaps;
std::unique_ptr<vk::image_view> uploaded_image_view;
std::unique_ptr<vk::image> uploaded_texture;
u64 protected_rgn_start;
u64 protected_rgn_end;
bool exists = false;
bool locked = false;
bool dirty = true;
public:
cached_texture_section() {}
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image_view *view, vk::image *image)
{
width = w;
height = h;
this->depth = depth;
this->mipmaps = mipmaps;
uploaded_image_view.reset(view);
uploaded_texture.reset(image);
}
bool matches(u32 rsx_address, u32 rsx_size) const
{
return rsx::buffered_section::matches(rsx_address, rsx_size);
}
bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps) const
{
if (rsx_address == cpu_address_base)
{
if (!width && !height && !mipmaps)
return true;
return (width == this->width && height == this->height && mipmaps == this->mipmaps);
}
return false;
}
bool exists() const
{
return (uploaded_texture.get() != nullptr);
}
u16 get_width() const
{
return width;
}
u16 get_height() const
{
return height;
}
std::unique_ptr<vk::image_view>& get_view()
{
return uploaded_image_view;
}
std::unique_ptr<vk::image>& get_texture()
{
return uploaded_texture;
}
};
class texture_cache
{
private:
std::vector<cached_texture_object> m_cache;
std::vector<cached_texture_section> m_cache;
std::pair<u64, u64> texture_cache_range = std::make_pair(0xFFFFFFFF, 0);
std::vector<std::unique_ptr<vk::image_view> > m_temporary_image_view;
std::vector<std::unique_ptr<vk::image>> m_dirty_textures;
bool lock_memory_region(u32 start, u32 size)
cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
static const u32 memory_page_size = 4096;
start = start & ~(memory_page_size - 1);
size = (u32)align(size, memory_page_size);
return vm::page_protect(start, size, 0, 0, vm::page_writable);
}
bool unlock_memory_region(u32 start, u32 size)
{
static const u32 memory_page_size = 4096;
start = start & ~(memory_page_size - 1);
size = (u32)align(size, memory_page_size);
return vm::page_protect(start, size, 0, vm::page_writable, 0);
}
bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
{
//Check for memory area overlap. unlock page(s) if needed and add this index to array.
//Axis separation test
const u32 &block_start = base1;
const u32 block_end = limit1;
if (limit2 < block_start) return false;
if (base2 > block_end) return false;
u32 min_separation = (limit2 - base2) + (limit1 - base1);
u32 range_limit = (block_end > limit2) ? block_end : limit2;
u32 range_base = (block_start < base2) ? block_start : base2;
u32 actual_separation = (range_limit - range_base);
if (actual_separation < min_separation)
return true;
return false;
}
cached_texture_object& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
for (cached_texture_object &tex : m_cache)
for (auto &tex : m_cache)
{
if (!tex.dirty && tex.exists &&
tex.native_rsx_address == rsx_address &&
tex.native_rsx_size == rsx_size)
if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty())
{
if (!confirm_dimensions) return tex;
if (tex.width == width && tex.height == height && tex.mipmaps == mipmaps)
if (tex.matches(rsx_address, width, height, mipmaps))
return tex;
else
{
LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.");
LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.width, tex.height);
LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height());
}
}
}
for (cached_texture_object &tex : m_cache)
for (auto &tex : m_cache)
{
if (tex.dirty)
if (tex.is_dirty())
{
if (tex.exists)
if (tex.exists())
{
m_dirty_textures.push_back(std::move(tex.uploaded_texture));
tex.exists = false;
m_dirty_textures.push_back(std::move(tex.get_texture()));
m_temporary_image_view.push_back(std::move(tex.get_view()));
}
return tex;
}
}
m_cache.push_back(cached_texture_object());
m_cache.push_back(cached_texture_section());
return m_cache[m_cache.size() - 1];
}
void lock_object(cached_texture_object &obj)
{
static const u32 memory_page_size = 4096;
obj.protected_rgn_start = obj.native_rsx_address & ~(memory_page_size - 1);
obj.protected_rgn_end = (u32)align(obj.native_rsx_size, memory_page_size);
obj.protected_rgn_end += obj.protected_rgn_start;
lock_memory_region(static_cast<u32>(obj.protected_rgn_start), static_cast<u32>(obj.native_rsx_size));
if (obj.protected_rgn_start < texture_cache_range.first)
texture_cache_range = std::make_pair(obj.protected_rgn_start, texture_cache_range.second);
if (obj.protected_rgn_end > texture_cache_range.second)
texture_cache_range = std::make_pair(texture_cache_range.first, obj.protected_rgn_end);
}
void unlock_object(cached_texture_object &obj)
{
unlock_memory_region(static_cast<u32>(obj.protected_rgn_start), static_cast<u32>(obj.native_rsx_size));
}
void purge_cache()
{
for (cached_texture_object &tex : m_cache)
for (auto &tex : m_cache)
{
if (tex.exists)
m_dirty_textures.push_back(std::move(tex.uploaded_texture));
if (tex.exists())
{
m_dirty_textures.push_back(std::move(tex.get_texture()));
m_temporary_image_view.push_back(std::move(tex.get_view()));
}
if (tex.locked)
unlock_object(tex);
if (tex.is_locked())
tex.unprotect();
}
m_temporary_image_view.clear();
@ -196,12 +185,6 @@ namespace vk
return m_temporary_image_view.back().get();
}
cached_texture_object& cto = find_cached_texture(texaddr, range, true, tex.width(), tex.height(), tex.get_exact_mipmap_count());
if (cto.exists && !cto.dirty)
{
return cto.uploaded_image_view.get();
}
u32 raw_format = tex.format();
u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
@ -213,6 +196,7 @@ namespace vk
u16 height = 0;
u16 depth = 0;
u8 layer = 0;
switch (tex.get_extended_texture_dimension())
{
case rsx::texture_dimension_extended::texture_dimension_1d:
@ -245,6 +229,12 @@ namespace vk
break;
}
cached_texture_section& region = find_cached_texture(texaddr, range, true, tex.width(), height, tex.get_exact_mipmap_count());
if (region.exists() && !region.is_dirty())
{
return region.get_view().get();
}
bool is_cubemap = tex.get_extended_texture_dimension() == rsx::texture_dimension_extended::texture_dimension_cubemap;
VkImageSubresourceRange subresource_range = vk::get_image_subresource_range(0, 0, is_cubemap ? 6 : 1, tex.get_exact_mipmap_count(), VK_IMAGE_ASPECT_COLOR_BIT);
@ -255,33 +245,29 @@ namespace vk
return nullptr;
}
cto.uploaded_texture = std::make_unique<vk::image>(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
vk::image *image = new vk::image(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type,
vk_format,
tex.width(), height, depth, tex.get_exact_mipmap_count(), layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0);
change_image_layout(cmd, cto.uploaded_texture->value, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
change_image_layout(cmd, image->value, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
cto.uploaded_image_view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), cto.uploaded_texture->value, image_view_type, vk_format,
vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), image->value, image_view_type, vk_format,
mapping,
subresource_range);
copy_mipmaped_image_using_buffer(cmd, cto.uploaded_texture->value, get_subresources_layout(tex), format, !(tex.format() & CELL_GCM_TEXTURE_LN), tex.get_exact_mipmap_count(),
copy_mipmaped_image_using_buffer(cmd, image->value, get_subresources_layout(tex), format, !(tex.format() & CELL_GCM_TEXTURE_LN), tex.get_exact_mipmap_count(),
upload_heap, upload_buffer);
change_image_layout(cmd, cto.uploaded_texture->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
change_image_layout(cmd, image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
cto.exists = true;
cto.dirty = false;
cto.native_rsx_address = texaddr;
cto.native_rsx_size = range;
cto.width = tex.width();
cto.height = tex.height();
cto.mipmaps = tex.get_exact_mipmap_count();
lock_object(cto);
region.reset(texaddr, range);
region.create(tex.width(), height, depth, tex.get_exact_mipmap_count(), view, image);
region.protect(0, vm::page_writable);
region.set_dirty(false);
return cto.uploaded_image_view.get();
texture_cache_range = region.get_min_max(texture_cache_range);
return view;
}
bool invalidate_address(u32 rsx_address)
@ -290,23 +276,22 @@ namespace vk
rsx_address > texture_cache_range.second)
return false;
for (cached_texture_object &tex : m_cache)
bool response = false;
for (auto &tex : m_cache)
{
if (tex.dirty) continue;
if (tex.is_dirty()) continue;
if (rsx_address >= tex.protected_rgn_start &&
rsx_address < tex.protected_rgn_end)
if (tex.overlaps(rsx_address))
{
unlock_object(tex);
tex.set_dirty(true);
tex.unprotect();
tex.native_rsx_address = 0;
tex.dirty = true;
return true;
response = true;
}
}
return false;
return response;
}
void flush()

View file

@ -1,5 +1,6 @@
#pragma once
#include <rsx_decompiler.h>
#include "Emu/Memory/vm.h"
namespace rsx
{
@ -64,4 +65,140 @@ namespace rsx
program_info get(raw_program raw_program_, decompile_language lang);
void clear();
};
class buffered_section
{
protected:
u32 cpu_address_base = 0;
u32 cpu_address_range = 0;
u32 locked_address_base = 0;
u32 locked_address_range = 0;
u32 memory_protection = 0;
bool locked = false;
bool dirty = false;
bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
{
//Check for memory area overlap. unlock page(s) if needed and add this index to array.
//Axis separation test
const u32 &block_start = base1;
const u32 block_end = limit1;
if (limit2 < block_start) return false;
if (base2 > block_end) return false;
u32 min_separation = (limit2 - base2) + (limit1 - base1);
u32 range_limit = (block_end > limit2) ? block_end : limit2;
u32 range_base = (block_start < base2) ? block_start : base2;
u32 actual_separation = (range_limit - range_base);
if (actual_separation < min_separation)
return true;
return false;
}
public:
buffered_section() {}
~buffered_section() {}
void reset(u32 base, u32 length)
{
verify(HERE), locked == false;
cpu_address_base = base;
cpu_address_range = length;
locked_address_base = (base & ~4095);
locked_address_range = align(base + length, 4096) - locked_address_base;
memory_protection = vm::page_readable | vm::page_writable;
locked = false;
}
bool protect(u8 flags_set, u8 flags_clear)
{
if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, flags_clear))
{
memory_protection &= ~flags_clear;
memory_protection |= flags_set;
locked = memory_protection != (vm::page_readable | vm::page_writable);
}
else
fmt::throw_exception("failed to lock memory @ 0x%X!", locked_address_base);
return false;
}
bool unprotect()
{
u32 flags_set = (vm::page_readable | vm::page_writable) & ~memory_protection;
if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, 0))
{
memory_protection = (vm::page_writable | vm::page_readable);
locked = false;
return true;
}
else
fmt::throw_exception("failed to unlock memory @ 0x%X!", locked_address_base);
return false;
}
bool overlaps(std::pair<u32, u32> range)
{
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
}
bool overlaps(u32 address)
{
return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
}
bool is_locked() const
{
return locked;
}
bool is_dirty() const
{
return dirty;
}
void set_dirty(bool state)
{
dirty = state;
}
u32 get_section_base() const
{
return cpu_address_base;
}
u32 get_section_size() const
{
return cpu_address_range;
}
bool matches(u32 cpu_address, u32 size) const
{
return (cpu_address_base == cpu_address && cpu_address_range == size);
}
std::pair<u32, u32> get_min_max(std::pair<u32, u32> current_min_max)
{
u32 min = std::min(current_min_max.first, locked_address_base);
u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range);
return std::make_pair(min, max);
}
};
}

View file

@ -109,6 +109,7 @@
<ClCompile Include="Emu\RSX\GL\GLCommonDecompiler.cpp" />
<ClCompile Include="Emu\RSX\GL\GLFragmentProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />

View file

@ -10,6 +10,7 @@
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />