Merge pull request #3193 from Tilka/swizzle

VertexLoaderX64: optimize 4444 color conversion
2015-10-23 07:51:40 +02:00 · 2015-10-23 07:51:40 +02:00 · ece2e91446
parent 5228758383 33784456a5
commit ece2e91446
3 changed files with 12 additions and 22 deletions
--- a/Source/Core/VideoCommon/VertexLoaderBase.cpp
+++ b/Source/Core/VideoCommon/VertexLoaderBase.cpp
@ -150,9 +150,9 @@ public:
 			else
 			{
 				ERROR_LOG(VIDEO, "Can't compare vertex loaders that expect different vertex formats!");
-				ERROR_LOG(VIDEO, "a: m_VertexSize %d, m_native_components 0x%08x, stride %d\n",
+				ERROR_LOG(VIDEO, "a: m_VertexSize %d, m_native_components 0x%08x, stride %d",
 				                 a->m_VertexSize, a->m_native_components, a->m_native_vtx_decl.stride);
-				ERROR_LOG(VIDEO, "b: m_VertexSize %d, m_native_components 0x%08x, stride %d\n",
+				ERROR_LOG(VIDEO, "b: m_VertexSize %d, m_native_components 0x%08x, stride %d",
 				                 b->m_VertexSize, b->m_native_components, b->m_native_vtx_decl.stride);
 			}
 		}
--- a/Source/Core/VideoCommon/VertexLoaderManager.cpp
+++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp
@ -71,7 +71,7 @@ void UpdateVertexArrayPointers()
 	for (int i = 0; i < 12; i++)
 	{
 		// Only update the array base if the vertex description states we are going to use it.
-		if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) >= 0x2)
+		if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) & MASK_INDEXED)
 			cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
 	}
--- a/Source/Core/VideoCommon/VertexLoaderX64.cpp
+++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp
@ -314,34 +314,24 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
 			LoadAndSwap(16, scratch1, data);
 			if (cpu_info.bBMI2)
 			{
-				MOV(32, R(scratch3), Imm32(0x0F0F0F0F));
+				MOV(32, R(scratch2), Imm32(0x0F0F0F0F));
-				PDEP(32, scratch2, scratch1, R(scratch3));
+				PDEP(32, scratch1, scratch1, R(scratch2));
 				MOV(32, R(scratch3), Imm32(0xF0F0F0F0));
 				PDEP(32, scratch1, scratch1, R(scratch3));
 			}
 			else
 			{
 				MOV(32, R(scratch3), R(scratch1));
 				SHL(32, R(scratch1), Imm8(12));
 				AND(32, R(scratch1), Imm32(0x0F000000));
 				MOV(32, R(scratch2), R(scratch1));
 				MOV(32, R(scratch1), R(scratch3));
 				SHL(32, R(scratch1), Imm8(8));
-				AND(32, R(scratch1), Imm32(0x000F0000));
+				OR(32, R(scratch1), R(scratch2));
-				OR(32, R(scratch2), R(scratch1));
+				AND(32, R(scratch1), Imm32(0x00FF00FF));
-				MOV(32, R(scratch1), R(scratch3));
+				MOV(32, R(scratch2), R(scratch1));
 				SHL(32, R(scratch1), Imm8(4));
-				AND(32, R(scratch1), Imm32(0x00000F00));
+				OR(32, R(scratch1), R(scratch2));
-				OR(32, R(scratch2), R(scratch1));
+				AND(32, R(scratch1), Imm32(0x0F0F0F0F));
 				AND(32, R(scratch3), Imm8(0x0F));
 				OR(32, R(scratch2), R(scratch3));
 				MOV(32, R(scratch1), R(scratch2));
 				SHL(32, R(scratch1), Imm8(4));
 			}
 			MOV(32, R(scratch2), R(scratch1));
 			SHL(32, R(scratch1), Imm8(4));
 			OR(32, R(scratch1), R(scratch2));
 			SwapAndStore(32, MDisp(dst_reg, m_dst_ofs), scratch1);
 			load_bytes = 2;