rsx: Make input_is_swizzled a template parameter

This lowers the relative cost of this function from ~2.25% to ~1.80% on
gcc 9 which I found quite surprising, some of it probably gets inlined
better in the callers, but I haven’t been able to isolate which parts.
This commit is contained in:
Emmanuel Gil Peyrot 2019-10-27 22:55:19 +01:00 committed by kd-11
parent 46d692d5a6
commit 69e9ee26f6
2 changed files with 7 additions and 7 deletions

View File

@ -1263,13 +1263,13 @@ namespace rsx
switch (out_bpp)
{
case 1:
convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch, false);
convert_linear_swizzle<u8, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch);
break;
case 2:
convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch, false);
convert_linear_swizzle<u16, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch);
break;
case 4:
convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch, false);
convert_linear_swizzle<u32, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, in_pitch);
break;
}
}

View File

@ -330,8 +330,8 @@ namespace rsx
* Restriction: It has mixed results if the height or width is not a power of 2
* Restriction: Only works with 2D surfaces
*/
template<typename T>
void convert_linear_swizzle(void* input_pixels, void* output_pixels, u16 width, u16 height, u32 pitch, bool input_is_swizzled)
template<typename T, bool input_is_swizzled>
void convert_linear_swizzle(void* input_pixels, void* output_pixels, u16 width, u16 height, u32 pitch)
{
u32 log2width = ceil_log2(width);
u32 log2height = ceil_log2(height);
@ -357,7 +357,7 @@ namespace rsx
u32 adv = pitch / sizeof(T);
if (!input_is_swizzled)
if constexpr (!input_is_swizzled)
{
for (int y = 0; y < height; ++y)
{
@ -414,7 +414,7 @@ namespace rsx
{
if (depth == 1)
{
convert_linear_swizzle<T>(input_pixels, output_pixels, width, height, width * sizeof(T), true);
convert_linear_swizzle<T, true>(input_pixels, output_pixels, width, height, width * sizeof(T));
return;
}