ReorderingMTGS: Linux asm memcpy fixes. (untested)

git-svn-id: http://pcsx2.googlecode.com/svn/branches/ReorderingMTGS@3488 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-14 14:23:59 +00:00
parent 6ded71561c
commit e793f91993
1 changed files with 36 additions and 36 deletions

View File

@ -48,10 +48,10 @@
//"mov ecx, [%[dest]]\n"
//"mov edx, [%[src]]\n"
//"mov eax, [%[qwc]]\n" // keep a copy of count
"shr %[qwc], 1\n"
"jz memcpy_qwc_1\n" // only one 16 byte block to copy?
"cmp %[qwc], 1\n"
"jbe memcpy_qwc_1\n" // only one 16 byte block to copy?
"cmp %[qwc], 64\n" // "IN_CACHE_COPY/32"
"cmp %[qwc], 128\n" // "IN_CACHE_COPY/16"
"jb memcpy_qwc_loop1\n" // small copies should be cached (definite speedup --air)
"memcpy_qwc_loop2:\n" // 32-byte blocks, uncached copy
@ -68,14 +68,14 @@
"add %[src],32\n" // update source pointer
"add %[dest],32\n" // update destination pointer
"sub %[qwc],1\n"
"sub %[qwc],2\n"
"jnz memcpy_qwc_loop2\n" // last 64-byte block?
"sfence\n" // flush the write buffer
"jmp memcpy_qwc_1\n"
// 32-byte blocks, cached!
// This *is* important. Removing this and using exclusively non-temporal stores
// results in noticable speed loss!
// results in noticeable speed loss!
"memcpy_qwc_loop1:\n"
"prefetchnta [%[src] + 568]\n" // start reading ahead (tested: it helps! --air)
@ -91,11 +91,11 @@
"add %[src],32\n" // update source pointer
"add %[dest],32\n" // update destination pointer
"sub %[qwc],1\n"
"sub %[qwc],2\n"
"jnz memcpy_qwc_loop1\n" // last 64-byte block?
"memcpy_qwc_1:\n"
"test [%[qwc]],dword ptr 1\n"
"test [%qwc],1\n"
"jz memcpy_qwc_final\n"
"movq mm0,[%[src]]\n"
"movq mm1,[%[src]+8]\n"