Fixed a bug in memcpy_fast that caused memory corruption on blocks not aligned to 32-bits in length (this might fix the linux memcpy fast problem too).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@727 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-03-09 21:52:33 +00:00
parent 04fba65901
commit 5dc76238bf
5 changed files with 41 additions and 49 deletions

View File

@ -380,7 +380,7 @@ struct ElfObject
proghead[ i ].p_paddr, proghead[ i ].p_vaddr); proghead[ i ].p_paddr, proghead[ i ].p_vaddr);
// used to be paddr // used to be paddr
memcpy( memcpy_fast(
&PS2MEM_BASE[proghead[ i ].p_vaddr & 0x1ffffff], &PS2MEM_BASE[proghead[ i ].p_vaddr & 0x1ffffff],
data.GetPtr(proghead[ i ].p_offset), size data.GetPtr(proghead[ i ].p_offset), size
); );
@ -400,11 +400,9 @@ struct ElfObject
ELF_LOG("flags: %08x\n",proghead[i].p_flags); ELF_LOG("flags: %08x\n",proghead[i].p_flags);
ELF_LOG("palign: %08x\n",proghead[i].p_align); ELF_LOG("palign: %08x\n",proghead[i].p_align);
ELF_LOG("\n"); ELF_LOG("\n");
} }
} }
void loadSectionHeaders() void loadSectionHeaders()
{ {
if( secthead == NULL || header.e_shoff > (u32)data.GetLength() ) if( secthead == NULL || header.e_shoff > (u32)data.GetLength() )
@ -604,6 +602,7 @@ int loadElfFile(const char *filename)
ElfApplyPatches(); ElfApplyPatches();
LoadGameSpecificSettings(); LoadGameSpecificSettings();
return 0; return 0;
} }

View File

@ -16,8 +16,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/ */
#ifndef __MEMORY_H__ #pragma once
#define __MEMORY_H__
#ifdef __LINUX__ #ifdef __LINUX__
#include <signal.h> #include <signal.h>
@ -185,5 +184,3 @@ extern void mmap_ClearCpuBlock( uint offset );
extern void loadBiosRom( const char *ext, u8 *dest, long maxSize ); extern void loadBiosRom( const char *ext, u8 *dest, long maxSize );
extern u16 ba0R16(u32 mem); extern u16 ba0R16(u32 mem);
#endif

View File

@ -688,8 +688,10 @@ int OpenPlugins(const char* pTitleFilename)
GSdriverInfo info; GSdriverInfo info;
int ret; int ret;
if ( !initp ) InitPlugins(); if ( !initp )
//throw Exception::InvalidOperation( "Bad coder mojo -- OpenPlugins called prior to InitPlugins." ); {
if( InitPlugins() == -1 ) return -1;
}
#ifndef _WIN32 #ifndef _WIN32
// change dir so that CDVD can find its config file // change dir so that CDVD can find its config file

View File

@ -381,13 +381,14 @@ $memcpy_align_done: // destination is dword aligned
shr eax, 6 // get 64-byte block count shr eax, 6 // get 64-byte block count
jz $memcpy_ic_2 // finish the last few bytes jz $memcpy_ic_2 // finish the last few bytes
mov edx, offset _mmx_backup ; will probably need this to save/restore mmx
cmp eax, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy cmp eax, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy
jae $memcpy_uc_test jae $memcpy_uc_test
movq [_mmx_backup+0x00],mm0 movq [edx+0x00],mm0
movq [_mmx_backup+0x08],mm1 movq [edx+0x08],mm1
movq [_mmx_backup+0x10],mm2 movq [edx+0x10],mm2
movq [_mmx_backup+0x18],mm3 movq [edx+0x18],mm3
// This is small block copy that uses the MMX registers to copy 8 bytes // This is small block copy that uses the MMX registers to copy 8 bytes
// at a time. It uses the "unrolled loop" optimization, and also uses // at a time. It uses the "unrolled loop" optimization, and also uses
@ -419,10 +420,10 @@ $memcpy_ic_1: // 64-byte block copies, in-cache copy
dec eax // count down dec eax // count down
jnz $memcpy_ic_1 // last 64-byte block? jnz $memcpy_ic_1 // last 64-byte block?
movq mm0,[_mmx_backup+0x00] movq mm0,[edx+0x00]
movq mm1,[_mmx_backup+0x08] movq mm1,[edx+0x08]
movq mm2,[_mmx_backup+0x10] movq mm2,[edx+0x10]
movq mm3,[_mmx_backup+0x18] movq mm3,[edx+0x18]
$memcpy_ic_2: $memcpy_ic_2:
mov eax, ecx // has valid low 6 bits of the byte count mov eax, ecx // has valid low 6 bits of the byte count
@ -434,9 +435,6 @@ $memcpy_ic_3:
jmp eax // jump to array of movsd's jmp eax // jump to array of movsd's
$memcpy_uc_test: $memcpy_uc_test:
// cmp ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy
// jae $memcpy_bp_1
//$memcpy_64_test:
or eax, eax // tail end of block prefetch will jump here or eax, eax // tail end of block prefetch will jump here
jz $memcpy_ic_2 // no more 64-byte blocks left jz $memcpy_ic_2 // no more 64-byte blocks left
@ -445,9 +443,9 @@ $memcpy_uc_test:
// bypasses the cache and writes straight to main memory. This code also // bypasses the cache and writes straight to main memory. This code also
// uses the software prefetch instruction to pre-read the data. // uses the software prefetch instruction to pre-read the data.
movq [_mmx_backup+0x00],mm0 movq [edx+0x00],mm0
movq [_mmx_backup+0x08],mm1 movq [edx+0x08],mm1
movq [_mmx_backup+0x10],mm2 movq [edx+0x10],mm2
.align 16 .align 16
$memcpy_uc_1: // 64-byte blocks, uncached copy $memcpy_uc_1: // 64-byte blocks, uncached copy
@ -475,9 +473,9 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
movntq [edi-8], mm1 movntq [edi-8], mm1
jnz $memcpy_uc_1 // last 64-byte block? jnz $memcpy_uc_1 // last 64-byte block?
movq mm0,[_mmx_backup+0x00] movq mm0,[edx+0x00]
movq mm1,[_mmx_backup+0x08] movq mm1,[edx+0x08]
movq mm2,[_mmx_backup+0x10] movq mm2,[edx+0x10]
jmp $memcpy_ic_2 // almost done (not needed because large copy below was removed) jmp $memcpy_ic_2 // almost done (not needed because large copy below was removed)
@ -493,7 +491,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
// The smallest copy uses the X86 "movsd" instruction, in an optimized // The smallest copy uses the X86 "movsd" instruction, in an optimized
// form which is an "unrolled loop". Then it handles the last few bytes. // form which is an "unrolled loop". Then it handles the last few bytes.
.align 4 .align 16
movsd movsd
movsd // perform last 1-15 dword copies movsd // perform last 1-15 dword copies
movsd movsd
@ -512,8 +510,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
movsd movsd
$memcpy_last_few: // dword aligned from before movsd's $memcpy_last_few: // dword aligned from before movsd's
mov eax, ecx // has valid low 2 bits of the byte count and ecx, 0b11 // the last few cows must come home
and eax, 0b11 // the last few cows must come home
jz $memcpy_final // no more, let's leave jz $memcpy_final // no more, let's leave
rep movsb // the last 1, 2, or 3 bytes rep movsb // the last 1, 2, or 3 bytes

View File

@ -404,13 +404,14 @@ $memcpy_align_done: ; destination is dword aligned
shr eax, 6 ; get 64-byte block count shr eax, 6 ; get 64-byte block count
jz $memcpy_ic_2 ; finish the last few bytes jz $memcpy_ic_2 ; finish the last few bytes
mov edx, offset _mmx_backup ; will probably need this to save/restore mmx
cmp eax, IN_CACHE_COPY/64 ; too big 4 cache? use uncached copy cmp eax, IN_CACHE_COPY/64 ; too big 4 cache? use uncached copy
jae $memcpy_uc_test jae $memcpy_uc_test
movq [_mmx_backup+0x00],mm0 movq [edx+0x00],mm0
movq [_mmx_backup+0x08],mm1 movq [edx+0x08],mm1
movq [_mmx_backup+0x10],mm2 movq [edx+0x10],mm2
movq [_mmx_backup+0x18],mm3 movq [edx+0x18],mm3
// This is small block copy that uses the MMX registers to copy 8 bytes // This is small block copy that uses the MMX registers to copy 8 bytes
// at a time. It uses the "unrolled loop" optimization, and also uses // at a time. It uses the "unrolled loop" optimization, and also uses
@ -442,10 +443,10 @@ $memcpy_ic_1: ; 64-byte block copies, in-cache copy
dec eax ; count down dec eax ; count down
jnz $memcpy_ic_1 ; last 64-byte block? jnz $memcpy_ic_1 ; last 64-byte block?
movq mm0,[_mmx_backup+0x00] movq mm0,[edx+0x00]
movq mm1,[_mmx_backup+0x08] movq mm1,[edx+0x08]
movq mm2,[_mmx_backup+0x10] movq mm2,[edx+0x10]
movq mm3,[_mmx_backup+0x18] movq mm3,[edx+0x18]
$memcpy_ic_2: $memcpy_ic_2:
mov eax, ecx ; has valid low 6 bits of the byte count mov eax, ecx ; has valid low 6 bits of the byte count
@ -457,9 +458,6 @@ $memcpy_ic_3:
jmp eax ; jump to array of movsd's jmp eax ; jump to array of movsd's
$memcpy_uc_test: $memcpy_uc_test:
/*cmp ecx, UNCACHED_COPY/64 ; big enough? use block prefetch copy
jae $memcpy_bp_1
$memcpy_64_test:*/
or eax, eax ; tail end of block prefetch will jump here or eax, eax ; tail end of block prefetch will jump here
jz $memcpy_ic_2 ; no more 64-byte blocks left jz $memcpy_ic_2 ; no more 64-byte blocks left
@ -468,9 +466,9 @@ $memcpy_64_test:*/
// bypasses the cache and writes straight to main memory. This code also // bypasses the cache and writes straight to main memory. This code also
// uses the software prefetch instruction to pre-read the data. // uses the software prefetch instruction to pre-read the data.
movq [_mmx_backup+0x00],mm0 movq [edx+0x00],mm0
movq [_mmx_backup+0x08],mm1 movq [edx+0x08],mm1
movq [_mmx_backup+0x10],mm2 movq [edx+0x10],mm2
align 16 align 16
$memcpy_uc_1: ; 64-byte blocks, uncached copy $memcpy_uc_1: ; 64-byte blocks, uncached copy
@ -498,9 +496,9 @@ $memcpy_uc_1: ; 64-byte blocks, uncached copy
movntq [edi-8], mm1 movntq [edi-8], mm1
jnz $memcpy_uc_1 ; last 64-byte block? jnz $memcpy_uc_1 ; last 64-byte block?
movq mm0,[_mmx_backup+0x00] movq mm0,[edx+0x00]
movq mm1,[_mmx_backup+0x08] movq mm1,[edx+0x08]
movq mm2,[_mmx_backup+0x10] movq mm2,[edx+0x10]
jmp $memcpy_ic_2 ; almost done (not needed because large copy below was removed) jmp $memcpy_ic_2 ; almost done (not needed because large copy below was removed)
@ -559,7 +557,7 @@ $memcpy_bp_3:
// The smallest copy uses the X86 "movsd" instruction, in an optimized // The smallest copy uses the X86 "movsd" instruction, in an optimized
// form which is an "unrolled loop". Then it handles the last few bytes. // form which is an "unrolled loop". Then it handles the last few bytes.
align 4 align 16
movsd movsd
movsd ; perform last 1-15 dword copies movsd ; perform last 1-15 dword copies
movsd movsd
@ -578,8 +576,7 @@ align 4
movsd movsd
$memcpy_last_few: ; dword aligned from before movsd's $memcpy_last_few: ; dword aligned from before movsd's
mov eax, ecx ; has valid low 2 bits of the byte count and ecx, 11b ; the last few cows must come home
and eax, 11b ; the last few cows must come home
jz $memcpy_final ; no more, let's leave jz $memcpy_final ; no more, let's leave
rep movsb ; the last 1, 2, or 3 bytes rep movsb ; the last 1, 2, or 3 bytes