ReorderingMTGS: Linux asm memcpy fixes. (untested)

git-svn-id: http://pcsx2.googlecode.com/svn/branches/ReorderingMTGS@3488 96395faa-99c1-11dd-bbfe-3dabce05a288
2010-07-14 14:23:59 +00:00 · 2010-07-14 14:23:59 +00:00 · e793f91993
parent 6ded71561c
commit e793f91993
1 changed files with 36 additions and 36 deletions
--- a/common/include/Utilities/MemcpyFast.h
+++ b/common/include/Utilities/MemcpyFast.h
@ -48,10 +48,10 @@
 				//"mov		ecx, [%[dest]]\n"
 				//"mov		edx, [%[src]]\n"
 				//"mov		eax, [%[qwc]]\n"			// keep a copy of count
-				"shr		%[qwc], 1\n"
-				"jz		memcpy_qwc_1\n"		// only one 16 byte block to copy?
+				"cmp		%[qwc], 1\n"
+				"jbe		memcpy_qwc_1\n"				// only one 16 byte block to copy?

-				"cmp		%[qwc], 64\n" // "IN_CACHE_COPY/32"
+				"cmp		%[qwc], 128\n" // "IN_CACHE_COPY/16"
 				"jb			memcpy_qwc_loop1\n"			// small copies should be cached (definite speedup --air)
 		
 			"memcpy_qwc_loop2:\n"						// 32-byte blocks, uncached copy
@ -68,14 +68,14 @@

 				"add		%[src],32\n"				// update source pointer
 				"add		%[dest],32\n"				// update destination pointer
-				"sub		%[qwc],1\n"
+				"sub		%[qwc],2\n"
 				"jnz		memcpy_qwc_loop2\n"			// last 64-byte block?
 				"sfence\n"								// flush the write buffer
 				"jmp		memcpy_qwc_1\n"

 			// 32-byte blocks, cached!
 			// This *is* important.  Removing this and using exclusively non-temporal stores
-			// results in noticable speed loss!
+			// results in noticeable speed loss!

 			"memcpy_qwc_loop1:\n"				
 				"prefetchnta [%[src] + 568]\n"			// start reading ahead (tested: it helps! --air)
@ -91,11 +91,11 @@

 				"add		%[src],32\n"				// update source pointer
 				"add		%[dest],32\n"				// update destination pointer
-				"sub		%[qwc],1\n"
+				"sub		%[qwc],2\n"
 				"jnz		memcpy_qwc_loop1\n"			// last 64-byte block?

 			"memcpy_qwc_1:\n"
-				"test	[%[qwc]],dword ptr 1\n"
+				"test		[%qwc],1\n"
 				"jz			memcpy_qwc_final\n"
 				"movq		mm0,[%[src]]\n"
 				"movq		mm1,[%[src]+8]\n"