Merge pull request #3193 from Tilka/swizzle
VertexLoaderX64: optimize 4444 color conversion
This commit is contained in:
commit
ece2e91446
|
@ -150,9 +150,9 @@ public:
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ERROR_LOG(VIDEO, "Can't compare vertex loaders that expect different vertex formats!");
|
ERROR_LOG(VIDEO, "Can't compare vertex loaders that expect different vertex formats!");
|
||||||
ERROR_LOG(VIDEO, "a: m_VertexSize %d, m_native_components 0x%08x, stride %d\n",
|
ERROR_LOG(VIDEO, "a: m_VertexSize %d, m_native_components 0x%08x, stride %d",
|
||||||
a->m_VertexSize, a->m_native_components, a->m_native_vtx_decl.stride);
|
a->m_VertexSize, a->m_native_components, a->m_native_vtx_decl.stride);
|
||||||
ERROR_LOG(VIDEO, "b: m_VertexSize %d, m_native_components 0x%08x, stride %d\n",
|
ERROR_LOG(VIDEO, "b: m_VertexSize %d, m_native_components 0x%08x, stride %d",
|
||||||
b->m_VertexSize, b->m_native_components, b->m_native_vtx_decl.stride);
|
b->m_VertexSize, b->m_native_components, b->m_native_vtx_decl.stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,7 +71,7 @@ void UpdateVertexArrayPointers()
|
||||||
for (int i = 0; i < 12; i++)
|
for (int i = 0; i < 12; i++)
|
||||||
{
|
{
|
||||||
// Only update the array base if the vertex description states we are going to use it.
|
// Only update the array base if the vertex description states we are going to use it.
|
||||||
if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) >= 0x2)
|
if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) & MASK_INDEXED)
|
||||||
cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
|
cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -314,34 +314,24 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
|
||||||
LoadAndSwap(16, scratch1, data);
|
LoadAndSwap(16, scratch1, data);
|
||||||
if (cpu_info.bBMI2)
|
if (cpu_info.bBMI2)
|
||||||
{
|
{
|
||||||
MOV(32, R(scratch3), Imm32(0x0F0F0F0F));
|
MOV(32, R(scratch2), Imm32(0x0F0F0F0F));
|
||||||
PDEP(32, scratch2, scratch1, R(scratch3));
|
PDEP(32, scratch1, scratch1, R(scratch2));
|
||||||
MOV(32, R(scratch3), Imm32(0xF0F0F0F0));
|
|
||||||
PDEP(32, scratch1, scratch1, R(scratch3));
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(32, R(scratch3), R(scratch1));
|
|
||||||
SHL(32, R(scratch1), Imm8(12));
|
|
||||||
AND(32, R(scratch1), Imm32(0x0F000000));
|
|
||||||
MOV(32, R(scratch2), R(scratch1));
|
MOV(32, R(scratch2), R(scratch1));
|
||||||
|
|
||||||
MOV(32, R(scratch1), R(scratch3));
|
|
||||||
SHL(32, R(scratch1), Imm8(8));
|
SHL(32, R(scratch1), Imm8(8));
|
||||||
AND(32, R(scratch1), Imm32(0x000F0000));
|
OR(32, R(scratch1), R(scratch2));
|
||||||
OR(32, R(scratch2), R(scratch1));
|
AND(32, R(scratch1), Imm32(0x00FF00FF));
|
||||||
|
|
||||||
MOV(32, R(scratch1), R(scratch3));
|
MOV(32, R(scratch2), R(scratch1));
|
||||||
SHL(32, R(scratch1), Imm8(4));
|
SHL(32, R(scratch1), Imm8(4));
|
||||||
AND(32, R(scratch1), Imm32(0x00000F00));
|
OR(32, R(scratch1), R(scratch2));
|
||||||
OR(32, R(scratch2), R(scratch1));
|
AND(32, R(scratch1), Imm32(0x0F0F0F0F));
|
||||||
|
|
||||||
AND(32, R(scratch3), Imm8(0x0F));
|
|
||||||
OR(32, R(scratch2), R(scratch3));
|
|
||||||
|
|
||||||
MOV(32, R(scratch1), R(scratch2));
|
|
||||||
SHL(32, R(scratch1), Imm8(4));
|
|
||||||
}
|
}
|
||||||
|
MOV(32, R(scratch2), R(scratch1));
|
||||||
|
SHL(32, R(scratch1), Imm8(4));
|
||||||
OR(32, R(scratch1), R(scratch2));
|
OR(32, R(scratch1), R(scratch2));
|
||||||
SwapAndStore(32, MDisp(dst_reg, m_dst_ofs), scratch1);
|
SwapAndStore(32, MDisp(dst_reg, m_dst_ofs), scratch1);
|
||||||
load_bytes = 2;
|
load_bytes = 2;
|
||||||
|
|
Loading…
Reference in New Issue