mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 19:45:20 +00:00
gl: Minor optimizations
rsx: Texture cache - improvements to locking rsx: Minor optimizations to get_current_vertex_program and begin-end batch flushes rsx: Optimize texture cache storage - Manages storage in blocks of 16MB rsx/vk/gl: Fix swizzled texture input gl: Hotfix for compressed texture formats
This commit is contained in:
parent
e37a2a8f7d
commit
45d0e821dc
9 changed files with 372 additions and 267 deletions
|
@ -14,6 +14,12 @@ namespace rsx
|
|||
swapped_native_component_order = 2,
|
||||
};
|
||||
|
||||
enum texture_upload_context
|
||||
{
|
||||
shader_read = 0,
|
||||
blit_engine_src = 1
|
||||
};
|
||||
|
||||
template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format>
|
||||
class texture_cache
|
||||
{
|
||||
|
@ -34,12 +40,14 @@ namespace rsx
|
|||
|
||||
void notify(u32 data_size)
|
||||
{
|
||||
verify(HERE), valid_count >= 0;
|
||||
max_range = std::max(data_size, max_range);
|
||||
valid_count++;
|
||||
}
|
||||
|
||||
void add(section_storage_type& section, u32 data_size)
|
||||
{
|
||||
verify(HERE), valid_count >= 0;
|
||||
max_range = std::max(data_size, max_range);
|
||||
valid_count++;
|
||||
|
||||
|
@ -65,7 +73,7 @@ namespace rsx
|
|||
std::unordered_map<u32, framebuffer_memory_characteristics> m_cache_miss_statistics_table;
|
||||
|
||||
//Memory usage
|
||||
const s32 m_max_zombie_objects = 32; //Limit on how many texture objects to keep around for reuse after they are invalidated
|
||||
const s32 m_max_zombie_objects = 128; //Limit on how many texture objects to keep around for reuse after they are invalidated
|
||||
s32 m_unreleased_texture_objects = 0; //Number of invalidated objects not yet freed from memory
|
||||
|
||||
/* Helpers */
|
||||
|
@ -74,11 +82,141 @@ namespace rsx
|
|||
virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_storage_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) = 0;
|
||||
virtual section_storage_type* create_new_texture(commandbuffer_type&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, const u32 gcm_format,
|
||||
const rsx::texture_dimension_extended type, const texture_create_flags flags, std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) = 0;
|
||||
virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format,
|
||||
virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format, const texture_upload_context context,
|
||||
std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled, std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) = 0;
|
||||
virtual void enforce_surface_creation_type(section_storage_type& section, const texture_create_flags expected) = 0;
|
||||
virtual void insert_texture_barrier() = 0;
|
||||
|
||||
private:
|
||||
//Internal implementation methods
|
||||
bool invalidate_range_impl(u32 address, u32 range, bool unprotect)
|
||||
{
|
||||
bool response = false;
|
||||
u32 last_dirty_block = 0;
|
||||
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
|
||||
|
||||
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
|
||||
{
|
||||
auto &range_data = It->second;
|
||||
const u32 base = It->first;
|
||||
bool range_reset = false;
|
||||
|
||||
if (base == last_dirty_block && range_data.valid_count == 0)
|
||||
continue;
|
||||
|
||||
if (trampled_range.first >= (base + get_block_size()) || base >= trampled_range.second)
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < range_data.data.size(); i++)
|
||||
{
|
||||
auto &tex = range_data.data[i];
|
||||
|
||||
if (tex.is_dirty()) continue;
|
||||
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
|
||||
|
||||
auto overlapped = tex.overlaps_page(trampled_range, address);
|
||||
if (std::get<0>(overlapped))
|
||||
{
|
||||
auto &new_range = std::get<1>(overlapped);
|
||||
|
||||
if (new_range.first != trampled_range.first ||
|
||||
new_range.second != trampled_range.second)
|
||||
{
|
||||
i = 0;
|
||||
trampled_range = new_range;
|
||||
range_reset = true;
|
||||
}
|
||||
|
||||
if (unprotect)
|
||||
{
|
||||
tex.set_dirty(true);
|
||||
tex.unprotect();
|
||||
}
|
||||
else
|
||||
{
|
||||
tex.discard();
|
||||
}
|
||||
|
||||
m_unreleased_texture_objects++;
|
||||
range_data.valid_count--;
|
||||
response = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (range_reset)
|
||||
{
|
||||
last_dirty_block = base;
|
||||
It = m_cache.begin();
|
||||
}
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
template <typename ...Args>
|
||||
bool flush_address_impl(u32 address, Args&&... extras)
|
||||
{
|
||||
bool response = false;
|
||||
u32 last_dirty_block = 0;
|
||||
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
|
||||
|
||||
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
|
||||
{
|
||||
auto &range_data = It->second;
|
||||
const u32 base = It->first;
|
||||
bool range_reset = false;
|
||||
|
||||
if (base == last_dirty_block && range_data.valid_count == 0)
|
||||
continue;
|
||||
|
||||
if (trampled_range.first >= (base + get_block_size()) || base >= trampled_range.second)
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < range_data.data.size(); i++)
|
||||
{
|
||||
auto &tex = range_data.data[i];
|
||||
|
||||
if (tex.is_dirty()) continue;
|
||||
if (!tex.is_flushable()) continue;
|
||||
|
||||
auto overlapped = tex.overlaps_page(trampled_range, address);
|
||||
if (std::get<0>(overlapped))
|
||||
{
|
||||
auto &new_range = std::get<1>(overlapped);
|
||||
|
||||
if (new_range.first != trampled_range.first ||
|
||||
new_range.second != trampled_range.second)
|
||||
{
|
||||
i = 0;
|
||||
trampled_range = new_range;
|
||||
range_reset = true;
|
||||
}
|
||||
|
||||
//TODO: Map basic host_visible memory without coherent constraint
|
||||
if (!tex.flush(std::forward<Args>(extras)...))
|
||||
{
|
||||
//Missed address, note this
|
||||
//TODO: Lower severity when successful to keep the cache from overworking
|
||||
record_cache_miss(tex);
|
||||
}
|
||||
|
||||
response = true;
|
||||
range_data.valid_count--;
|
||||
}
|
||||
}
|
||||
|
||||
if (range_reset)
|
||||
{
|
||||
It = m_cache.begin();
|
||||
}
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
constexpr u32 get_block_size() const { return 0x1000000; }
|
||||
inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); }
|
||||
|
||||
public:
|
||||
|
||||
texture_cache() {}
|
||||
|
@ -93,7 +231,9 @@ namespace rsx
|
|||
auto test = std::make_pair(rsx_address, range);
|
||||
for (auto &address_range : m_cache)
|
||||
{
|
||||
if (address_range.second.valid_count == 0) continue;
|
||||
auto &range_data = address_range.second;
|
||||
|
||||
for (auto &tex : range_data.data)
|
||||
{
|
||||
if (tex.get_section_base() > rsx_address)
|
||||
|
@ -109,7 +249,7 @@ namespace rsx
|
|||
|
||||
section_storage_type *find_texture_from_dimensions(u32 rsx_address, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
|
||||
{
|
||||
auto found = m_cache.find(rsx_address);
|
||||
auto found = m_cache.find(get_block_address(rsx_address));
|
||||
if (found != m_cache.end())
|
||||
{
|
||||
auto &range_data = found->second;
|
||||
|
@ -127,59 +267,53 @@ namespace rsx
|
|||
|
||||
section_storage_type& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
|
||||
{
|
||||
const u32 block_address = get_block_address(rsx_address);
|
||||
|
||||
auto found = m_cache.find(block_address);
|
||||
if (found != m_cache.end())
|
||||
{
|
||||
reader_lock lock(m_cache_mutex);
|
||||
auto &range_data = found->second;
|
||||
|
||||
auto found = m_cache.find(rsx_address);
|
||||
if (found != m_cache.end())
|
||||
for (auto &tex : range_data.data)
|
||||
{
|
||||
auto &range_data = found->second;
|
||||
|
||||
for (auto &tex : range_data.data)
|
||||
if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty())
|
||||
{
|
||||
if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty())
|
||||
{
|
||||
if (!confirm_dimensions) return tex;
|
||||
if (!confirm_dimensions) return tex;
|
||||
|
||||
if (tex.matches(rsx_address, width, height, mipmaps))
|
||||
return tex;
|
||||
else
|
||||
{
|
||||
LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address);
|
||||
LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height());
|
||||
}
|
||||
if (tex.matches(rsx_address, width, height, mipmaps))
|
||||
return tex;
|
||||
else
|
||||
{
|
||||
LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address);
|
||||
LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &tex : range_data.data)
|
||||
for (auto &tex : range_data.data)
|
||||
{
|
||||
if (tex.is_dirty())
|
||||
{
|
||||
if (tex.is_dirty())
|
||||
if (tex.exists())
|
||||
{
|
||||
if (tex.exists())
|
||||
{
|
||||
m_unreleased_texture_objects--;
|
||||
free_texture_section(tex);
|
||||
}
|
||||
|
||||
range_data.notify(rsx_size);
|
||||
return tex;
|
||||
m_unreleased_texture_objects--;
|
||||
free_texture_section(tex);
|
||||
}
|
||||
|
||||
range_data.notify(rsx_size);
|
||||
return tex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer_lock lock(m_cache_mutex);
|
||||
|
||||
section_storage_type tmp;
|
||||
m_cache[rsx_address].add(tmp, rsx_size);
|
||||
return m_cache[rsx_address].data.back();
|
||||
m_cache[block_address].add(tmp, rsx_size);
|
||||
return m_cache[block_address].data.back();
|
||||
}
|
||||
|
||||
section_storage_type* find_flushable_section(const u32 address, const u32 range)
|
||||
{
|
||||
reader_lock lock(m_cache_mutex);
|
||||
|
||||
auto found = m_cache.find(address);
|
||||
auto found = m_cache.find(get_block_address(address));
|
||||
if (found != m_cache.end())
|
||||
{
|
||||
auto &range_data = found->second;
|
||||
|
@ -199,9 +333,8 @@ namespace rsx
|
|||
template <typename ...Args>
|
||||
void lock_memory_region(image_storage_type* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height, const u32 pitch, Args&&... extras)
|
||||
{
|
||||
section_storage_type& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
|
||||
|
||||
writer_lock lock(m_cache_mutex);
|
||||
section_storage_type& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
|
||||
|
||||
if (!region.is_locked())
|
||||
{
|
||||
|
@ -217,6 +350,7 @@ namespace rsx
|
|||
template <typename ...Args>
|
||||
bool flush_memory_to_cache(const u32 memory_address, const u32 memory_size, bool skip_synchronized, Args&&... extra)
|
||||
{
|
||||
writer_lock lock(m_cache_mutex);
|
||||
section_storage_type* region = find_flushable_section(memory_address, memory_size);
|
||||
|
||||
//TODO: Make this an assertion
|
||||
|
@ -236,6 +370,7 @@ namespace rsx
|
|||
template <typename ...Args>
|
||||
bool load_memory_from_cache(const u32 memory_address, const u32 memory_size, Args&&... extras)
|
||||
{
|
||||
reader_lock lock(m_cache_mutex);
|
||||
section_storage_type *region = find_flushable_section(memory_address, memory_size);
|
||||
|
||||
if (region && !region->is_dirty())
|
||||
|
@ -256,7 +391,7 @@ namespace rsx
|
|||
|
||||
reader_lock lock(m_cache_mutex);
|
||||
|
||||
auto found = m_cache.find(address);
|
||||
auto found = m_cache.find(get_block_address(address));
|
||||
if (found != m_cache.end())
|
||||
{
|
||||
auto &range_data = found->second;
|
||||
|
@ -304,74 +439,8 @@ namespace rsx
|
|||
address > no_access_range.second)
|
||||
return false;
|
||||
|
||||
bool response = false;
|
||||
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
|
||||
std::unordered_map<u32, bool> processed_ranges;
|
||||
|
||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
|
||||
|
||||
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
|
||||
{
|
||||
auto &range_data = It->second;
|
||||
const u32 base = It->first;
|
||||
bool range_reset = false;
|
||||
|
||||
if (processed_ranges[base] || range_data.valid_count == 0)
|
||||
continue;
|
||||
|
||||
//Quickly discard range
|
||||
const u32 lock_base = base & ~0xfff;
|
||||
const u32 lock_limit = align(range_data.max_range + base, 4096);
|
||||
|
||||
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
|
||||
(lock_base > address || lock_limit <= address))
|
||||
{
|
||||
processed_ranges[base] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < range_data.data.size(); i++)
|
||||
{
|
||||
auto &tex = range_data.data[i];
|
||||
|
||||
if (tex.is_dirty()) continue;
|
||||
if (!tex.is_flushable()) continue;
|
||||
|
||||
auto overlapped = tex.overlaps_page(trampled_range, address);
|
||||
if (std::get<0>(overlapped))
|
||||
{
|
||||
auto &new_range = std::get<1>(overlapped);
|
||||
|
||||
if (new_range.first != trampled_range.first ||
|
||||
new_range.second != trampled_range.second)
|
||||
{
|
||||
i = 0;
|
||||
trampled_range = new_range;
|
||||
range_reset = true;
|
||||
}
|
||||
|
||||
//TODO: Map basic host_visible memory without coherent constraint
|
||||
if (!tex.flush(std::forward<Args>(extras)...))
|
||||
{
|
||||
//Missed address, note this
|
||||
//TODO: Lower severity when successful to keep the cache from overworking
|
||||
record_cache_miss(tex);
|
||||
}
|
||||
|
||||
response = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (range_reset)
|
||||
{
|
||||
processed_ranges.clear();
|
||||
It = m_cache.begin();
|
||||
}
|
||||
|
||||
processed_ranges[base] = true;
|
||||
}
|
||||
|
||||
return response;
|
||||
return flush_address_impl(address, std::forward<Args>(extras)...);
|
||||
}
|
||||
|
||||
bool invalidate_address(u32 address)
|
||||
|
@ -392,76 +461,8 @@ namespace rsx
|
|||
return false;
|
||||
}
|
||||
|
||||
bool response = false;
|
||||
std::unordered_map<u32, bool> processed_ranges;
|
||||
|
||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
|
||||
|
||||
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
|
||||
{
|
||||
auto &range_data = It->second;
|
||||
const u32 base = It->first;
|
||||
bool range_reset = false;
|
||||
|
||||
if (processed_ranges[base] || range_data.valid_count == 0)
|
||||
continue;
|
||||
|
||||
//Quickly discard range
|
||||
const u32 lock_base = base & ~0xfff;
|
||||
const u32 lock_limit = align(range_data.max_range + base, 4096);
|
||||
|
||||
if (trampled_range.first >= lock_limit || lock_base >= trampled_range.second)
|
||||
{
|
||||
processed_ranges[base] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < range_data.data.size(); i++)
|
||||
{
|
||||
auto &tex = range_data.data[i];
|
||||
|
||||
if (tex.is_dirty()) continue;
|
||||
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
|
||||
|
||||
auto overlapped = tex.overlaps_page(trampled_range, address);
|
||||
if (std::get<0>(overlapped))
|
||||
{
|
||||
auto &new_range = std::get<1>(overlapped);
|
||||
|
||||
if (new_range.first != trampled_range.first ||
|
||||
new_range.second != trampled_range.second)
|
||||
{
|
||||
i = 0;
|
||||
trampled_range = new_range;
|
||||
range_reset = true;
|
||||
}
|
||||
|
||||
if (unprotect)
|
||||
{
|
||||
tex.set_dirty(true);
|
||||
tex.unprotect();
|
||||
}
|
||||
else
|
||||
{
|
||||
tex.discard();
|
||||
}
|
||||
|
||||
m_unreleased_texture_objects++;
|
||||
range_data.valid_count--;
|
||||
response = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (range_reset)
|
||||
{
|
||||
processed_ranges.clear();
|
||||
It = m_cache.begin();
|
||||
}
|
||||
|
||||
processed_ranges[base] = true;
|
||||
}
|
||||
|
||||
return response;
|
||||
return invalidate_range_impl(address, range, unprotect);
|
||||
}
|
||||
|
||||
void record_cache_miss(section_storage_type &tex)
|
||||
|
@ -521,6 +522,8 @@ namespace rsx
|
|||
|
||||
void purge_dirty()
|
||||
{
|
||||
writer_lock lock(m_cache_mutex);
|
||||
|
||||
//Reclaims all graphics memory consumed by dirty textures
|
||||
std::vector<u32> empty_addresses;
|
||||
empty_addresses.resize(32);
|
||||
|
@ -611,6 +614,17 @@ namespace rsx
|
|||
return texptr->get_view();
|
||||
}
|
||||
|
||||
{
|
||||
//Search in cache and upload/bind
|
||||
reader_lock lock(m_cache_mutex);
|
||||
|
||||
auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height);
|
||||
if (cached_texture)
|
||||
{
|
||||
return cached_texture->get_raw_view();
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if we are re-sampling a subresource of an RTV/DSV texture, bound or otherwise
|
||||
* (Turbo: Super Stunt Squad does this; bypassing the need for a sync object)
|
||||
* The engine does not read back the texture resource through cell, but specifies a texture location that is
|
||||
|
@ -664,15 +678,6 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
//If all the above failed, then its probably a generic texture.
|
||||
//Search in cache and upload/bind
|
||||
|
||||
auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height);
|
||||
if (cached_texture)
|
||||
{
|
||||
return cached_texture->get_raw_view();
|
||||
}
|
||||
|
||||
//Do direct upload from CPU as the last resort
|
||||
const auto extended_dimension = tex.get_extended_texture_dimension();
|
||||
u16 height = 0;
|
||||
|
@ -698,12 +703,13 @@ namespace rsx
|
|||
break;
|
||||
}
|
||||
|
||||
writer_lock lock(m_cache_mutex);
|
||||
const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN);
|
||||
auto subresources_layout = get_subresources_layout(tex);
|
||||
auto remap_vector = tex.decoded_remap();
|
||||
|
||||
return upload_image_from_cpu(cmd, texaddr, tex_width, height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
|
||||
subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
|
||||
texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
|
||||
}
|
||||
|
||||
template <typename surface_store_type, typename blitter_type, typename ...Args>
|
||||
|
@ -770,7 +776,9 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
reader_lock lock(m_cache_mutex);
|
||||
section_storage_type* cached_dest = nullptr;
|
||||
|
||||
if (!dst_is_render_target)
|
||||
{
|
||||
//First check if this surface exists in VRAM with exact dimensions
|
||||
|
@ -785,7 +793,7 @@ namespace rsx
|
|||
//Prep surface
|
||||
enforce_surface_creation_type(*cached_dest, dst.swizzled ? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order);
|
||||
|
||||
//TODO: Move this code into utils since it is used alot
|
||||
const auto old_dst_area = dst_area;
|
||||
if (const u32 address_offset = dst.rsx_address - cached_dest->get_section_base())
|
||||
{
|
||||
const u16 bpp = dst_is_argb8 ? 4 : 2;
|
||||
|
@ -809,11 +817,16 @@ namespace rsx
|
|||
max_dst_height = cached_dest->get_height();
|
||||
}
|
||||
else
|
||||
{
|
||||
cached_dest = nullptr;
|
||||
dst_area = old_dst_area;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cached_dest && is_memcpy)
|
||||
{
|
||||
lock.upgrade();
|
||||
invalidate_range_impl(dst_address, memcpy_bytes_length, true);
|
||||
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
|
||||
return true;
|
||||
}
|
||||
|
@ -839,6 +852,8 @@ namespace rsx
|
|||
|
||||
if (rsx_pitch <= 64 && native_pitch != rsx_pitch)
|
||||
{
|
||||
lock.upgrade();
|
||||
invalidate_range_impl(dst_address, memcpy_bytes_length, true);
|
||||
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
|
||||
return true;
|
||||
}
|
||||
|
@ -856,7 +871,9 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
flush_address(src.rsx_address, std::forward<Args>(extras)...);
|
||||
lock.upgrade();
|
||||
|
||||
flush_address_impl(src_address, std::forward<Args>(extras)...);
|
||||
|
||||
const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1;
|
||||
std::vector<rsx_subresource_layout> subresource_layout;
|
||||
|
@ -869,7 +886,7 @@ namespace rsx
|
|||
subresource_layout.push_back(subres);
|
||||
|
||||
const u32 gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
|
||||
vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format,
|
||||
vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format, texture_upload_context::blit_engine_src,
|
||||
subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled, default_remap_vector)->get_raw_texture();
|
||||
}
|
||||
}
|
||||
|
@ -928,7 +945,8 @@ namespace rsx
|
|||
//TODO: Check for other types of format mismatch
|
||||
if (format_mismatch)
|
||||
{
|
||||
invalidate_range(cached_dest->get_section_base(), cached_dest->get_section_size());
|
||||
lock.upgrade();
|
||||
invalidate_range_impl(cached_dest->get_section_base(), cached_dest->get_section_size(), true);
|
||||
|
||||
dest_texture = 0;
|
||||
cached_dest = nullptr;
|
||||
|
@ -958,6 +976,8 @@ namespace rsx
|
|||
else
|
||||
gcm_format = (dst_is_argb8) ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
|
||||
|
||||
lock.upgrade();
|
||||
|
||||
dest_texture = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst.clip_height,
|
||||
dst_dimensions.width, dst_dimensions.height, 1, 1,
|
||||
gcm_format, rsx::texture_dimension_extended::texture_dimension_2d,
|
||||
|
|
|
@ -462,14 +462,15 @@ void GLGSRender::end()
|
|||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
int location;
|
||||
if (!rsx::method_registers.fragment_textures[i].enabled())
|
||||
continue;
|
||||
|
||||
if (m_program->uniforms.has_location("tex" + std::to_string(i), &location))
|
||||
if (rsx::method_registers.fragment_textures[i].enabled() && m_program->uniforms.has_location("tex" + std::to_string(i), &location))
|
||||
{
|
||||
m_gl_texture_cache.upload_and_bind_texture(i, get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]), rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
|
||||
|
||||
if (m_textures_dirty[i])
|
||||
m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
|
||||
}
|
||||
|
||||
m_textures_dirty[i] = false;
|
||||
}
|
||||
|
||||
//Vertex textures
|
||||
|
|
|
@ -186,8 +186,22 @@ void GLGSRender::init_buffers(bool skip_reading)
|
|||
|
||||
draw_fbo.recreate();
|
||||
|
||||
bool old_format_found = false;
|
||||
gl::texture::format old_format;
|
||||
|
||||
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
|
||||
{
|
||||
if (surface_info[i].pitch && g_cfg.video.write_color_buffers)
|
||||
{
|
||||
if (!old_format_found)
|
||||
{
|
||||
old_format = rsx::internals::surface_color_format_to_gl(surface_info[i].color_format).format;
|
||||
old_format_found = true;
|
||||
}
|
||||
|
||||
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_info[i].address, surface_info[i].pitch * surface_info[i].height);
|
||||
}
|
||||
|
||||
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
|
||||
{
|
||||
__glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]);
|
||||
|
|
|
@ -36,7 +36,7 @@ namespace gl
|
|||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
|
||||
}
|
||||
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
|
||||
fmt::throw_exception("Unknown texture format 0x%x" HERE, texture_format);
|
||||
}
|
||||
|
||||
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format)
|
||||
|
@ -63,6 +63,9 @@ namespace gl
|
|||
case CELL_GCM_TEXTURE_D1R5G5B5: return std::make_tuple(GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV);
|
||||
case CELL_GCM_TEXTURE_D8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8);
|
||||
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return std::make_tuple(GL_RG, GL_HALF_FLOAT);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return std::make_tuple(GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE);
|
||||
}
|
||||
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
|
||||
}
|
||||
|
@ -333,7 +336,7 @@ namespace gl
|
|||
}
|
||||
|
||||
void fill_texture(rsx::texture_dimension_extended dim, u16 mipmap_count, int format, u16 width, u16 height, u16 depth,
|
||||
const std::vector<rsx_subresource_layout> &input_layouts, bool is_swizzled, std::vector<gsl::byte> staging_buffer)
|
||||
const std::vector<rsx_subresource_layout> &input_layouts, bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<gsl::byte> staging_buffer)
|
||||
{
|
||||
int mip_level = 0;
|
||||
if (is_compressed_format(format))
|
||||
|
@ -349,11 +352,10 @@ namespace gl
|
|||
glTexStorage1D(GL_TEXTURE_1D, mipmap_count, get_sized_internal_format(format), width);
|
||||
if (!is_compressed_format(format))
|
||||
{
|
||||
const auto &format_type = get_format_type(format);
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
{
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
|
||||
glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
|
||||
glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, gl_format, gl_type, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -362,7 +364,7 @@ namespace gl
|
|||
{
|
||||
u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
|
||||
glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
|
||||
glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, gl_format, size, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
@ -372,11 +374,10 @@ namespace gl
|
|||
{
|
||||
if (!is_compressed_format(format))
|
||||
{
|
||||
const auto &format_type = get_format_type(format);
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
{
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
|
||||
glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -385,7 +386,7 @@ namespace gl
|
|||
{
|
||||
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
|
||||
glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
|
||||
glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
@ -398,11 +399,10 @@ namespace gl
|
|||
// mip_level % mipmap_per_layer will always be equal to mip_level
|
||||
if (!is_compressed_format(format))
|
||||
{
|
||||
const auto &format_type = get_format_type(format);
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
{
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
|
||||
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
|
||||
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data());
|
||||
mip_level++;
|
||||
}
|
||||
}
|
||||
|
@ -412,7 +412,7 @@ namespace gl
|
|||
{
|
||||
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
|
||||
glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
|
||||
glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data());
|
||||
mip_level++;
|
||||
}
|
||||
}
|
||||
|
@ -423,11 +423,10 @@ namespace gl
|
|||
{
|
||||
if (!is_compressed_format(format))
|
||||
{
|
||||
const auto &format_type = get_format_type(format);
|
||||
for (const rsx_subresource_layout &layout : input_layouts)
|
||||
{
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
|
||||
glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data());
|
||||
glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, gl_format, gl_type, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -436,7 +435,7 @@ namespace gl
|
|||
{
|
||||
u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
|
||||
glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, get_sized_internal_format(format), size, staging_buffer.data());
|
||||
glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, gl_format, size, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
@ -529,6 +528,9 @@ namespace gl
|
|||
|
||||
//The rest of sampler state is now handled by sampler state objects
|
||||
|
||||
fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, data_upload_buf);
|
||||
const auto format_type = get_format_type(gcm_format);
|
||||
const GLenum gl_format = std::get<0>(format_type);
|
||||
const GLenum gl_type = std::get<1>(format_type);
|
||||
fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,14 @@ namespace gl
|
|||
|
||||
GLuint create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type);
|
||||
|
||||
/**
|
||||
* is_swizzled - determines whether input bytes are in morton order
|
||||
* subresources_layout - descriptor of the mipmap levels in memory
|
||||
* decoded_remap - two vectors, first one contains index to read, e.g if v[0] = 1 then component 0[A] in the texture should read as component 1[R]
|
||||
* - layout of vector is in A-R-G-B
|
||||
* - second vector contains overrides to force the value to either 0 or 1 instead of reading from texture
|
||||
* static_state - set up the texture without consideration for sampler state (useful for vertex textures which have no real sampler state on RSX)
|
||||
*/
|
||||
void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type,
|
||||
std::vector<rsx_subresource_layout>& subresources_layout, std::pair<std::array<u8, 4>, std::array<u8, 4>>& decoded_remap, bool static_state);
|
||||
|
||||
|
|
|
@ -47,6 +47,8 @@ namespace gl
|
|||
texture::type type = texture::type::ubyte;
|
||||
bool pack_unpack_swap_bytes = false;
|
||||
|
||||
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
|
||||
|
||||
u8 get_pixel_size(texture::format fmt_, texture::type type_)
|
||||
{
|
||||
u8 size = 1;
|
||||
|
@ -224,6 +226,11 @@ namespace gl
|
|||
vram_texture = source.id();
|
||||
}
|
||||
|
||||
void set_view_flags(const rsx::texture_create_flags flags)
|
||||
{
|
||||
view_flags = flags;
|
||||
}
|
||||
|
||||
void copy_texture(bool=false)
|
||||
{
|
||||
if (!glIsTexture(vram_texture))
|
||||
|
@ -306,7 +313,6 @@ namespace gl
|
|||
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
protect(utils::protection::ro);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -410,6 +416,11 @@ namespace gl
|
|||
|
||||
return (gl::texture::format)fmt == tex->get_internal_format();
|
||||
}
|
||||
|
||||
rsx::texture_create_flags get_view_flags() const
|
||||
{
|
||||
return view_flags;
|
||||
}
|
||||
};
|
||||
|
||||
class texture_cache : public rsx::texture_cache<void*, cached_texture_section, u32, u32, gl::texture, gl::texture::format>
|
||||
|
@ -577,28 +588,54 @@ namespace gl
|
|||
break;
|
||||
}
|
||||
|
||||
if (flags == rsx::texture_create_flags::swapped_native_component_order)
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D, vram_texture);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, GL_ALPHA);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_G, GL_RED);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, GL_GREEN);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, GL_BLUE);
|
||||
}
|
||||
|
||||
auto& cached = create_texture(vram_texture, rsx_address, rsx_size, width, height);
|
||||
cached.protect(utils::protection::ro);
|
||||
cached.set_dirty(false);
|
||||
cached.set_depth_flag(depth_flag);
|
||||
cached.set_view_flags(flags);
|
||||
|
||||
return &cached;
|
||||
}
|
||||
|
||||
cached_texture_section* upload_image_from_cpu(void*&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format,
|
||||
std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled,
|
||||
const rsx::texture_upload_context context, std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled,
|
||||
std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) override
|
||||
{
|
||||
void* unused = nullptr;
|
||||
auto section = create_new_texture(unused, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, type,
|
||||
rsx::texture_create_flags::default_component_order, remap_vector);
|
||||
|
||||
gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, pitch, swizzled, type, subresource_layout, remap_vector, false);
|
||||
//Swizzling is ignored for blit engine copy and emulated using remapping
|
||||
bool input_swizzled = (context == rsx::texture_upload_context::blit_engine_src)? false : swizzled;
|
||||
|
||||
gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, pitch, input_swizzled, type, subresource_layout, remap_vector, false);
|
||||
return section;
|
||||
}
|
||||
|
||||
void enforce_surface_creation_type(cached_texture_section& section, const rsx::texture_create_flags flags) override
|
||||
{
|
||||
if (flags == section.get_view_flags())
|
||||
return;
|
||||
|
||||
if (flags == rsx::texture_create_flags::swapped_native_component_order)
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D, section.get_raw_texture());
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, GL_ALPHA);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_G, GL_RED);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, GL_GREEN);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, GL_BLUE);
|
||||
}
|
||||
|
||||
section.set_view_flags(flags);
|
||||
}
|
||||
|
||||
void insert_texture_barrier() override
|
||||
|
@ -630,6 +667,8 @@ namespace gl
|
|||
|
||||
bool is_depth_texture(const u32 rsx_address) override
|
||||
{
|
||||
reader_lock lock(m_cache_mutex);
|
||||
|
||||
auto section = find_texture_from_range(rsx_address, 64u);
|
||||
if (section != nullptr) return section->is_depth_texture();
|
||||
|
||||
|
|
|
@ -403,7 +403,8 @@ namespace rsx
|
|||
std::vector <std::pair<u32, u32>> split_ranges;
|
||||
auto first_count_cmds = method_registers.current_draw_clause.first_count_commands;
|
||||
|
||||
if (method_registers.current_draw_clause.first_count_commands.size() > 1)
|
||||
if (method_registers.current_draw_clause.first_count_commands.size() > 1 &&
|
||||
method_registers.current_draw_clause.is_disjoint_primitive)
|
||||
{
|
||||
u32 next = method_registers.current_draw_clause.first_count_commands.front().first;
|
||||
u32 last_head = 0;
|
||||
|
@ -433,13 +434,18 @@ namespace rsx
|
|||
{
|
||||
std::vector<std::pair<u32, u32>> tmp;
|
||||
auto list_head = first_count_cmds.begin();
|
||||
bool emit_begin = false;
|
||||
|
||||
for (auto &range : split_ranges)
|
||||
{
|
||||
tmp.resize(range.second - range.first + 1);
|
||||
std::copy(list_head + range.first, list_head + range.second, tmp.begin());
|
||||
|
||||
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type);
|
||||
if (emit_begin)
|
||||
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type);
|
||||
else
|
||||
emit_begin = true;
|
||||
|
||||
method_registers.current_draw_clause.first_count_commands = tmp;
|
||||
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
|
||||
}
|
||||
|
@ -565,41 +571,44 @@ namespace rsx
|
|||
deferred_primitive_type = value;
|
||||
else
|
||||
{
|
||||
deferred_call_size++;
|
||||
|
||||
// Combine all calls since the last one
|
||||
auto &first_count = method_registers.current_draw_clause.first_count_commands;
|
||||
if (first_count.size() > deferred_call_size)
|
||||
{
|
||||
const auto &batch_first_count = first_count[deferred_call_size - 1];
|
||||
u32 count = batch_first_count.second;
|
||||
u32 next = batch_first_count.first + count;
|
||||
|
||||
for (int n = deferred_call_size; n < first_count.size(); n++)
|
||||
{
|
||||
if (first_count[n].first != next)
|
||||
{
|
||||
LOG_ERROR(RSX, "Non-continous first-count range passed as one draw; will be split.");
|
||||
|
||||
first_count[deferred_call_size - 1].second = count;
|
||||
deferred_call_size++;
|
||||
|
||||
count = first_count[deferred_call_size - 1].second;
|
||||
next = first_count[deferred_call_size - 1].first + count;
|
||||
continue;
|
||||
}
|
||||
|
||||
count += first_count[n].second;
|
||||
next += first_count[n].second;
|
||||
}
|
||||
|
||||
first_count[deferred_call_size - 1].second = count;
|
||||
first_count.resize(deferred_call_size);
|
||||
}
|
||||
|
||||
has_deferred_call = true;
|
||||
flush_commands_flag = false;
|
||||
execute_method_call = false;
|
||||
|
||||
deferred_call_size++;
|
||||
|
||||
if (method_registers.current_draw_clause.is_disjoint_primitive)
|
||||
{
|
||||
// Combine all calls since the last one
|
||||
auto &first_count = method_registers.current_draw_clause.first_count_commands;
|
||||
if (first_count.size() > deferred_call_size)
|
||||
{
|
||||
const auto &batch_first_count = first_count[deferred_call_size - 1];
|
||||
u32 count = batch_first_count.second;
|
||||
u32 next = batch_first_count.first + count;
|
||||
|
||||
for (int n = deferred_call_size; n < first_count.size(); n++)
|
||||
{
|
||||
if (first_count[n].first != next)
|
||||
{
|
||||
LOG_ERROR(RSX, "Non-continous first-count range passed as one draw; will be split.");
|
||||
|
||||
first_count[deferred_call_size - 1].second = count;
|
||||
deferred_call_size++;
|
||||
|
||||
count = first_count[deferred_call_size - 1].second;
|
||||
next = first_count[deferred_call_size - 1].first + count;
|
||||
continue;
|
||||
}
|
||||
|
||||
count += first_count[n].second;
|
||||
next += first_count[n].second;
|
||||
}
|
||||
|
||||
first_count[deferred_call_size - 1].second = count;
|
||||
first_count.resize(deferred_call_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -1049,24 +1058,33 @@ namespace rsx
|
|||
|
||||
void thread::get_current_vertex_program()
|
||||
{
|
||||
auto &result = current_vertex_program = {};
|
||||
|
||||
const u32 transform_program_start = rsx::method_registers.transform_program_start();
|
||||
result.data.reserve((512 - transform_program_start) * 4);
|
||||
result.rsx_vertex_inputs.reserve(rsx::limits::vertex_count);
|
||||
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
|
||||
current_vertex_program.skip_vertex_input_check = false;
|
||||
|
||||
current_vertex_program.rsx_vertex_inputs.resize(0);
|
||||
current_vertex_program.data.resize(512 * 4);
|
||||
current_vertex_program.rsx_vertex_inputs.reserve(rsx::limits::vertex_count);
|
||||
|
||||
u32* ucode_src = rsx::method_registers.transform_program.data() + (transform_program_start * 4);
|
||||
u32* ucode_dst = current_vertex_program.data.data();
|
||||
u32 ucode_size = 0;
|
||||
D3 d3;
|
||||
|
||||
for (int i = transform_program_start; i < 512; ++i)
|
||||
{
|
||||
result.data.resize((i - transform_program_start) * 4 + 4);
|
||||
memcpy(result.data.data() + (i - transform_program_start) * 4, rsx::method_registers.transform_program.data() + i * 4, 4 * sizeof(u32));
|
||||
|
||||
D3 d3;
|
||||
d3.HEX = rsx::method_registers.transform_program[i * 4 + 3];
|
||||
ucode_size += 4;
|
||||
memcpy(ucode_dst, ucode_src, 4 * sizeof(u32));
|
||||
|
||||
d3.HEX = ucode_src[3];
|
||||
if (d3.end)
|
||||
break;
|
||||
|
||||
ucode_src += 4;
|
||||
ucode_dst += 4;
|
||||
}
|
||||
result.output_mask = rsx::method_registers.vertex_attrib_output_mask();
|
||||
|
||||
current_vertex_program.data.resize(ucode_size);
|
||||
|
||||
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
|
||||
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||
|
@ -1079,7 +1097,7 @@ namespace rsx
|
|||
|
||||
if (rsx::method_registers.vertex_arrays_info[index].size() > 0)
|
||||
{
|
||||
result.rsx_vertex_inputs.push_back(
|
||||
current_vertex_program.rsx_vertex_inputs.push_back(
|
||||
{index,
|
||||
rsx::method_registers.vertex_arrays_info[index].size(),
|
||||
rsx::method_registers.vertex_arrays_info[index].frequency(),
|
||||
|
@ -1089,7 +1107,7 @@ namespace rsx
|
|||
}
|
||||
else if (vertex_push_buffers[index].vertex_count > 1)
|
||||
{
|
||||
result.rsx_vertex_inputs.push_back(
|
||||
current_vertex_program.rsx_vertex_inputs.push_back(
|
||||
{ index,
|
||||
rsx::method_registers.register_vertex_info[index].size,
|
||||
1,
|
||||
|
@ -1099,7 +1117,7 @@ namespace rsx
|
|||
}
|
||||
else if (rsx::method_registers.register_vertex_info[index].size > 0)
|
||||
{
|
||||
result.rsx_vertex_inputs.push_back(
|
||||
current_vertex_program.rsx_vertex_inputs.push_back(
|
||||
{index,
|
||||
rsx::method_registers.register_vertex_info[index].size,
|
||||
rsx::method_registers.register_vertex_info[index].frequency,
|
||||
|
|
|
@ -565,7 +565,7 @@ namespace vk
|
|||
}
|
||||
|
||||
cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format,
|
||||
std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled,
|
||||
const rsx::texture_upload_context context, std::vector<rsx_subresource_layout>& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled,
|
||||
std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) override
|
||||
{
|
||||
auto section = create_new_texture(cmd, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, type,
|
||||
|
@ -578,7 +578,10 @@ namespace vk
|
|||
|
||||
vk::enter_uninterruptible();
|
||||
|
||||
vk::copy_mipmaped_image_using_buffer(cmd, image->value, subresource_layout, gcm_format, swizzled, mipmaps, subres_range.aspectMask,
|
||||
//Swizzling is ignored for blit engine copy and emulated using a swapped order image view
|
||||
bool input_swizzled = (context == rsx::texture_upload_context::blit_engine_src) ? false : swizzled;
|
||||
|
||||
vk::copy_mipmaped_image_using_buffer(cmd, image->value, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask,
|
||||
*m_texture_upload_heap, m_texture_upload_buffer);
|
||||
|
||||
vk::leave_uninterruptible();
|
||||
|
|
|
@ -70,7 +70,7 @@ namespace rsx
|
|||
bool locked = false;
|
||||
bool dirty = false;
|
||||
|
||||
inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
|
||||
inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) const
|
||||
{
|
||||
return (base1 < limit2 && base2 < limit1);
|
||||
}
|
||||
|
@ -133,12 +133,12 @@ namespace rsx
|
|||
locked = false;
|
||||
}
|
||||
|
||||
bool overlaps(std::pair<u32, u32> range)
|
||||
bool overlaps(std::pair<u32, u32> range) const
|
||||
{
|
||||
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
|
||||
}
|
||||
|
||||
bool overlaps(u32 address)
|
||||
bool overlaps(u32 address) const
|
||||
{
|
||||
return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
|
||||
}
|
||||
|
@ -148,7 +148,7 @@ namespace rsx
|
|||
* ignore_protection_range - if true, the test should not check against the aligned protection range, instead
|
||||
* tests against actual range of contents in memory
|
||||
*/
|
||||
bool overlaps(std::pair<u32, u32> range, bool ignore_protection_range)
|
||||
bool overlaps(std::pair<u32, u32> range, bool ignore_protection_range) const
|
||||
{
|
||||
if (!ignore_protection_range)
|
||||
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
|
||||
|
@ -160,7 +160,7 @@ namespace rsx
|
|||
* Check if the page containing the address tramples this section. Also compares a former trampled page range to compare
|
||||
* If true, returns the range <min, max> with updated invalid range
|
||||
*/
|
||||
std::tuple<bool, std::pair<u32, u32>> overlaps_page(std::pair<u32, u32> old_range, u32 address)
|
||||
std::tuple<bool, std::pair<u32, u32>> overlaps_page(std::pair<u32, u32> old_range, u32 address) const
|
||||
{
|
||||
const u32 page_base = address & ~4095;
|
||||
const u32 page_limit = address + 4096;
|
||||
|
@ -204,7 +204,7 @@ namespace rsx
|
|||
return (cpu_address_base == cpu_address && cpu_address_range == size);
|
||||
}
|
||||
|
||||
std::pair<u32, u32> get_min_max(std::pair<u32, u32> current_min_max)
|
||||
std::pair<u32, u32> get_min_max(std::pair<u32, u32> current_min_max) const
|
||||
{
|
||||
u32 min = std::min(current_min_max.first, locked_address_base);
|
||||
u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range);
|
||||
|
|
Loading…
Add table
Reference in a new issue