mirror of https://github.com/PCSX2/pcsx2.git
Fixed a bug in memcpy_fast that caused memory corruption on blocks not aligned to 32-bits in length (this might fix the linux memcpy fast problem too).
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@727 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
04fba65901
commit
5dc76238bf
|
@ -380,7 +380,7 @@ struct ElfObject
|
|||
proghead[ i ].p_paddr, proghead[ i ].p_vaddr);
|
||||
|
||||
// used to be paddr
|
||||
memcpy(
|
||||
memcpy_fast(
|
||||
&PS2MEM_BASE[proghead[ i ].p_vaddr & 0x1ffffff],
|
||||
data.GetPtr(proghead[ i ].p_offset), size
|
||||
);
|
||||
|
@ -400,11 +400,9 @@ struct ElfObject
|
|||
ELF_LOG("flags: %08x\n",proghead[i].p_flags);
|
||||
ELF_LOG("palign: %08x\n",proghead[i].p_align);
|
||||
ELF_LOG("\n");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void loadSectionHeaders()
|
||||
{
|
||||
if( secthead == NULL || header.e_shoff > (u32)data.GetLength() )
|
||||
|
@ -604,6 +602,7 @@ int loadElfFile(const char *filename)
|
|||
|
||||
ElfApplyPatches();
|
||||
LoadGameSpecificSettings();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,8 +16,7 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#ifndef __MEMORY_H__
|
||||
#define __MEMORY_H__
|
||||
#pragma once
|
||||
|
||||
#ifdef __LINUX__
|
||||
#include <signal.h>
|
||||
|
@ -185,5 +184,3 @@ extern void mmap_ClearCpuBlock( uint offset );
|
|||
|
||||
extern void loadBiosRom( const char *ext, u8 *dest, long maxSize );
|
||||
extern u16 ba0R16(u32 mem);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -688,8 +688,10 @@ int OpenPlugins(const char* pTitleFilename)
|
|||
GSdriverInfo info;
|
||||
int ret;
|
||||
|
||||
if ( !initp ) InitPlugins();
|
||||
//throw Exception::InvalidOperation( "Bad coder mojo -- OpenPlugins called prior to InitPlugins." );
|
||||
if ( !initp )
|
||||
{
|
||||
if( InitPlugins() == -1 ) return -1;
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
// change dir so that CDVD can find its config file
|
||||
|
|
|
@ -381,13 +381,14 @@ $memcpy_align_done: // destination is dword aligned
|
|||
shr eax, 6 // get 64-byte block count
|
||||
jz $memcpy_ic_2 // finish the last few bytes
|
||||
|
||||
mov edx, offset _mmx_backup ; will probably need this to save/restore mmx
|
||||
cmp eax, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy
|
||||
jae $memcpy_uc_test
|
||||
|
||||
movq [_mmx_backup+0x00],mm0
|
||||
movq [_mmx_backup+0x08],mm1
|
||||
movq [_mmx_backup+0x10],mm2
|
||||
movq [_mmx_backup+0x18],mm3
|
||||
movq [edx+0x00],mm0
|
||||
movq [edx+0x08],mm1
|
||||
movq [edx+0x10],mm2
|
||||
movq [edx+0x18],mm3
|
||||
|
||||
// This is small block copy that uses the MMX registers to copy 8 bytes
|
||||
// at a time. It uses the "unrolled loop" optimization, and also uses
|
||||
|
@ -419,10 +420,10 @@ $memcpy_ic_1: // 64-byte block copies, in-cache copy
|
|||
dec eax // count down
|
||||
jnz $memcpy_ic_1 // last 64-byte block?
|
||||
|
||||
movq mm0,[_mmx_backup+0x00]
|
||||
movq mm1,[_mmx_backup+0x08]
|
||||
movq mm2,[_mmx_backup+0x10]
|
||||
movq mm3,[_mmx_backup+0x18]
|
||||
movq mm0,[edx+0x00]
|
||||
movq mm1,[edx+0x08]
|
||||
movq mm2,[edx+0x10]
|
||||
movq mm3,[edx+0x18]
|
||||
|
||||
$memcpy_ic_2:
|
||||
mov eax, ecx // has valid low 6 bits of the byte count
|
||||
|
@ -434,9 +435,6 @@ $memcpy_ic_3:
|
|||
jmp eax // jump to array of movsd's
|
||||
|
||||
$memcpy_uc_test:
|
||||
// cmp ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy
|
||||
// jae $memcpy_bp_1
|
||||
//$memcpy_64_test:
|
||||
or eax, eax // tail end of block prefetch will jump here
|
||||
jz $memcpy_ic_2 // no more 64-byte blocks left
|
||||
|
||||
|
@ -445,9 +443,9 @@ $memcpy_uc_test:
|
|||
// bypasses the cache and writes straight to main memory. This code also
|
||||
// uses the software prefetch instruction to pre-read the data.
|
||||
|
||||
movq [_mmx_backup+0x00],mm0
|
||||
movq [_mmx_backup+0x08],mm1
|
||||
movq [_mmx_backup+0x10],mm2
|
||||
movq [edx+0x00],mm0
|
||||
movq [edx+0x08],mm1
|
||||
movq [edx+0x10],mm2
|
||||
|
||||
.align 16
|
||||
$memcpy_uc_1: // 64-byte blocks, uncached copy
|
||||
|
@ -475,9 +473,9 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
|
|||
movntq [edi-8], mm1
|
||||
jnz $memcpy_uc_1 // last 64-byte block?
|
||||
|
||||
movq mm0,[_mmx_backup+0x00]
|
||||
movq mm1,[_mmx_backup+0x08]
|
||||
movq mm2,[_mmx_backup+0x10]
|
||||
movq mm0,[edx+0x00]
|
||||
movq mm1,[edx+0x08]
|
||||
movq mm2,[edx+0x10]
|
||||
|
||||
jmp $memcpy_ic_2 // almost done (not needed because large copy below was removed)
|
||||
|
||||
|
@ -493,7 +491,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
|
|||
|
||||
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
||||
// form which is an "unrolled loop". Then it handles the last few bytes.
|
||||
.align 4
|
||||
.align 16
|
||||
movsd
|
||||
movsd // perform last 1-15 dword copies
|
||||
movsd
|
||||
|
@ -512,8 +510,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
|
|||
movsd
|
||||
|
||||
$memcpy_last_few: // dword aligned from before movsd's
|
||||
mov eax, ecx // has valid low 2 bits of the byte count
|
||||
and eax, 0b11 // the last few cows must come home
|
||||
and ecx, 0b11 // the last few cows must come home
|
||||
jz $memcpy_final // no more, let's leave
|
||||
rep movsb // the last 1, 2, or 3 bytes
|
||||
|
||||
|
|
|
@ -404,13 +404,14 @@ $memcpy_align_done: ; destination is dword aligned
|
|||
shr eax, 6 ; get 64-byte block count
|
||||
jz $memcpy_ic_2 ; finish the last few bytes
|
||||
|
||||
mov edx, offset _mmx_backup ; will probably need this to save/restore mmx
|
||||
cmp eax, IN_CACHE_COPY/64 ; too big 4 cache? use uncached copy
|
||||
jae $memcpy_uc_test
|
||||
|
||||
movq [_mmx_backup+0x00],mm0
|
||||
movq [_mmx_backup+0x08],mm1
|
||||
movq [_mmx_backup+0x10],mm2
|
||||
movq [_mmx_backup+0x18],mm3
|
||||
movq [edx+0x00],mm0
|
||||
movq [edx+0x08],mm1
|
||||
movq [edx+0x10],mm2
|
||||
movq [edx+0x18],mm3
|
||||
|
||||
// This is small block copy that uses the MMX registers to copy 8 bytes
|
||||
// at a time. It uses the "unrolled loop" optimization, and also uses
|
||||
|
@ -442,10 +443,10 @@ $memcpy_ic_1: ; 64-byte block copies, in-cache copy
|
|||
dec eax ; count down
|
||||
jnz $memcpy_ic_1 ; last 64-byte block?
|
||||
|
||||
movq mm0,[_mmx_backup+0x00]
|
||||
movq mm1,[_mmx_backup+0x08]
|
||||
movq mm2,[_mmx_backup+0x10]
|
||||
movq mm3,[_mmx_backup+0x18]
|
||||
movq mm0,[edx+0x00]
|
||||
movq mm1,[edx+0x08]
|
||||
movq mm2,[edx+0x10]
|
||||
movq mm3,[edx+0x18]
|
||||
|
||||
$memcpy_ic_2:
|
||||
mov eax, ecx ; has valid low 6 bits of the byte count
|
||||
|
@ -457,9 +458,6 @@ $memcpy_ic_3:
|
|||
jmp eax ; jump to array of movsd's
|
||||
|
||||
$memcpy_uc_test:
|
||||
/*cmp ecx, UNCACHED_COPY/64 ; big enough? use block prefetch copy
|
||||
jae $memcpy_bp_1
|
||||
$memcpy_64_test:*/
|
||||
or eax, eax ; tail end of block prefetch will jump here
|
||||
jz $memcpy_ic_2 ; no more 64-byte blocks left
|
||||
|
||||
|
@ -468,9 +466,9 @@ $memcpy_64_test:*/
|
|||
// bypasses the cache and writes straight to main memory. This code also
|
||||
// uses the software prefetch instruction to pre-read the data.
|
||||
|
||||
movq [_mmx_backup+0x00],mm0
|
||||
movq [_mmx_backup+0x08],mm1
|
||||
movq [_mmx_backup+0x10],mm2
|
||||
movq [edx+0x00],mm0
|
||||
movq [edx+0x08],mm1
|
||||
movq [edx+0x10],mm2
|
||||
|
||||
align 16
|
||||
$memcpy_uc_1: ; 64-byte blocks, uncached copy
|
||||
|
@ -498,9 +496,9 @@ $memcpy_uc_1: ; 64-byte blocks, uncached copy
|
|||
movntq [edi-8], mm1
|
||||
jnz $memcpy_uc_1 ; last 64-byte block?
|
||||
|
||||
movq mm0,[_mmx_backup+0x00]
|
||||
movq mm1,[_mmx_backup+0x08]
|
||||
movq mm2,[_mmx_backup+0x10]
|
||||
movq mm0,[edx+0x00]
|
||||
movq mm1,[edx+0x08]
|
||||
movq mm2,[edx+0x10]
|
||||
|
||||
jmp $memcpy_ic_2 ; almost done (not needed because large copy below was removed)
|
||||
|
||||
|
@ -559,7 +557,7 @@ $memcpy_bp_3:
|
|||
|
||||
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
||||
// form which is an "unrolled loop". Then it handles the last few bytes.
|
||||
align 4
|
||||
align 16
|
||||
movsd
|
||||
movsd ; perform last 1-15 dword copies
|
||||
movsd
|
||||
|
@ -578,8 +576,7 @@ align 4
|
|||
movsd
|
||||
|
||||
$memcpy_last_few: ; dword aligned from before movsd's
|
||||
mov eax, ecx ; has valid low 2 bits of the byte count
|
||||
and eax, 11b ; the last few cows must come home
|
||||
and ecx, 11b ; the last few cows must come home
|
||||
jz $memcpy_final ; no more, let's leave
|
||||
rep movsb ; the last 1, 2, or 3 bytes
|
||||
|
||||
|
|
Loading…
Reference in New Issue