mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-08-09 01:29:23 +00:00
rsx: Make input_is_swizzled a template parameter
This lowers the relative cost of this function from ~2.25% to ~1.80% on gcc 9 which I found quite surprising, some of it probably gets inlined better in the callers, but I haven’t been able to isolate which parts.
This commit is contained in:
parent
46d692d5a6
commit
69e9ee26f6
2 changed files with 7 additions and 7 deletions
|
@ -1263,13 +1263,13 @@ namespace rsx
|
||||||
switch (out_bpp)
|
switch (out_bpp)
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch, false);
|
convert_linear_swizzle<u8, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch, false);
|
convert_linear_swizzle<u16, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch, false);
|
convert_linear_swizzle<u32, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -330,8 +330,8 @@ namespace rsx
|
||||||
* Restriction: It has mixed results if the height or width is not a power of 2
|
* Restriction: It has mixed results if the height or width is not a power of 2
|
||||||
* Restriction: Only works with 2D surfaces
|
* Restriction: Only works with 2D surfaces
|
||||||
*/
|
*/
|
||||||
template<typename T>
|
template<typename T, bool input_is_swizzled>
|
||||||
void convert_linear_swizzle(void* input_pixels, void* output_pixels, u16 width, u16 height, u32 pitch, bool input_is_swizzled)
|
void convert_linear_swizzle(void* input_pixels, void* output_pixels, u16 width, u16 height, u32 pitch)
|
||||||
{
|
{
|
||||||
u32 log2width = ceil_log2(width);
|
u32 log2width = ceil_log2(width);
|
||||||
u32 log2height = ceil_log2(height);
|
u32 log2height = ceil_log2(height);
|
||||||
|
@ -357,7 +357,7 @@ namespace rsx
|
||||||
|
|
||||||
u32 adv = pitch / sizeof(T);
|
u32 adv = pitch / sizeof(T);
|
||||||
|
|
||||||
if (!input_is_swizzled)
|
if constexpr (!input_is_swizzled)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < height; ++y)
|
for (int y = 0; y < height; ++y)
|
||||||
{
|
{
|
||||||
|
@ -414,7 +414,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
if (depth == 1)
|
if (depth == 1)
|
||||||
{
|
{
|
||||||
convert_linear_swizzle<T>(input_pixels, output_pixels, width, height, width * sizeof(T), true);
|
convert_linear_swizzle<T, true>(input_pixels, output_pixels, width, height, width * sizeof(T));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue