mirror of https://github.com/PCSX2/pcsx2.git
Fixed a bug in memcpy_fast that caused memory corruption on blocks not aligned to 32-bits in length (this might fix the linux memcpy fast problem too).
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@727 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
04fba65901
commit
5dc76238bf
|
@ -380,7 +380,7 @@ struct ElfObject
|
||||||
proghead[ i ].p_paddr, proghead[ i ].p_vaddr);
|
proghead[ i ].p_paddr, proghead[ i ].p_vaddr);
|
||||||
|
|
||||||
// used to be paddr
|
// used to be paddr
|
||||||
memcpy(
|
memcpy_fast(
|
||||||
&PS2MEM_BASE[proghead[ i ].p_vaddr & 0x1ffffff],
|
&PS2MEM_BASE[proghead[ i ].p_vaddr & 0x1ffffff],
|
||||||
data.GetPtr(proghead[ i ].p_offset), size
|
data.GetPtr(proghead[ i ].p_offset), size
|
||||||
);
|
);
|
||||||
|
@ -400,11 +400,9 @@ struct ElfObject
|
||||||
ELF_LOG("flags: %08x\n",proghead[i].p_flags);
|
ELF_LOG("flags: %08x\n",proghead[i].p_flags);
|
||||||
ELF_LOG("palign: %08x\n",proghead[i].p_align);
|
ELF_LOG("palign: %08x\n",proghead[i].p_align);
|
||||||
ELF_LOG("\n");
|
ELF_LOG("\n");
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void loadSectionHeaders()
|
void loadSectionHeaders()
|
||||||
{
|
{
|
||||||
if( secthead == NULL || header.e_shoff > (u32)data.GetLength() )
|
if( secthead == NULL || header.e_shoff > (u32)data.GetLength() )
|
||||||
|
@ -604,6 +602,7 @@ int loadElfFile(const char *filename)
|
||||||
|
|
||||||
ElfApplyPatches();
|
ElfApplyPatches();
|
||||||
LoadGameSpecificSettings();
|
LoadGameSpecificSettings();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,8 +16,7 @@
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __MEMORY_H__
|
#pragma once
|
||||||
#define __MEMORY_H__
|
|
||||||
|
|
||||||
#ifdef __LINUX__
|
#ifdef __LINUX__
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
@ -185,5 +184,3 @@ extern void mmap_ClearCpuBlock( uint offset );
|
||||||
|
|
||||||
extern void loadBiosRom( const char *ext, u8 *dest, long maxSize );
|
extern void loadBiosRom( const char *ext, u8 *dest, long maxSize );
|
||||||
extern u16 ba0R16(u32 mem);
|
extern u16 ba0R16(u32 mem);
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -688,8 +688,10 @@ int OpenPlugins(const char* pTitleFilename)
|
||||||
GSdriverInfo info;
|
GSdriverInfo info;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if ( !initp ) InitPlugins();
|
if ( !initp )
|
||||||
//throw Exception::InvalidOperation( "Bad coder mojo -- OpenPlugins called prior to InitPlugins." );
|
{
|
||||||
|
if( InitPlugins() == -1 ) return -1;
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
// change dir so that CDVD can find its config file
|
// change dir so that CDVD can find its config file
|
||||||
|
|
|
@ -381,13 +381,14 @@ $memcpy_align_done: // destination is dword aligned
|
||||||
shr eax, 6 // get 64-byte block count
|
shr eax, 6 // get 64-byte block count
|
||||||
jz $memcpy_ic_2 // finish the last few bytes
|
jz $memcpy_ic_2 // finish the last few bytes
|
||||||
|
|
||||||
|
mov edx, offset _mmx_backup ; will probably need this to save/restore mmx
|
||||||
cmp eax, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy
|
cmp eax, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy
|
||||||
jae $memcpy_uc_test
|
jae $memcpy_uc_test
|
||||||
|
|
||||||
movq [_mmx_backup+0x00],mm0
|
movq [edx+0x00],mm0
|
||||||
movq [_mmx_backup+0x08],mm1
|
movq [edx+0x08],mm1
|
||||||
movq [_mmx_backup+0x10],mm2
|
movq [edx+0x10],mm2
|
||||||
movq [_mmx_backup+0x18],mm3
|
movq [edx+0x18],mm3
|
||||||
|
|
||||||
// This is small block copy that uses the MMX registers to copy 8 bytes
|
// This is small block copy that uses the MMX registers to copy 8 bytes
|
||||||
// at a time. It uses the "unrolled loop" optimization, and also uses
|
// at a time. It uses the "unrolled loop" optimization, and also uses
|
||||||
|
@ -419,10 +420,10 @@ $memcpy_ic_1: // 64-byte block copies, in-cache copy
|
||||||
dec eax // count down
|
dec eax // count down
|
||||||
jnz $memcpy_ic_1 // last 64-byte block?
|
jnz $memcpy_ic_1 // last 64-byte block?
|
||||||
|
|
||||||
movq mm0,[_mmx_backup+0x00]
|
movq mm0,[edx+0x00]
|
||||||
movq mm1,[_mmx_backup+0x08]
|
movq mm1,[edx+0x08]
|
||||||
movq mm2,[_mmx_backup+0x10]
|
movq mm2,[edx+0x10]
|
||||||
movq mm3,[_mmx_backup+0x18]
|
movq mm3,[edx+0x18]
|
||||||
|
|
||||||
$memcpy_ic_2:
|
$memcpy_ic_2:
|
||||||
mov eax, ecx // has valid low 6 bits of the byte count
|
mov eax, ecx // has valid low 6 bits of the byte count
|
||||||
|
@ -434,9 +435,6 @@ $memcpy_ic_3:
|
||||||
jmp eax // jump to array of movsd's
|
jmp eax // jump to array of movsd's
|
||||||
|
|
||||||
$memcpy_uc_test:
|
$memcpy_uc_test:
|
||||||
// cmp ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy
|
|
||||||
// jae $memcpy_bp_1
|
|
||||||
//$memcpy_64_test:
|
|
||||||
or eax, eax // tail end of block prefetch will jump here
|
or eax, eax // tail end of block prefetch will jump here
|
||||||
jz $memcpy_ic_2 // no more 64-byte blocks left
|
jz $memcpy_ic_2 // no more 64-byte blocks left
|
||||||
|
|
||||||
|
@ -445,9 +443,9 @@ $memcpy_uc_test:
|
||||||
// bypasses the cache and writes straight to main memory. This code also
|
// bypasses the cache and writes straight to main memory. This code also
|
||||||
// uses the software prefetch instruction to pre-read the data.
|
// uses the software prefetch instruction to pre-read the data.
|
||||||
|
|
||||||
movq [_mmx_backup+0x00],mm0
|
movq [edx+0x00],mm0
|
||||||
movq [_mmx_backup+0x08],mm1
|
movq [edx+0x08],mm1
|
||||||
movq [_mmx_backup+0x10],mm2
|
movq [edx+0x10],mm2
|
||||||
|
|
||||||
.align 16
|
.align 16
|
||||||
$memcpy_uc_1: // 64-byte blocks, uncached copy
|
$memcpy_uc_1: // 64-byte blocks, uncached copy
|
||||||
|
@ -475,9 +473,9 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
|
||||||
movntq [edi-8], mm1
|
movntq [edi-8], mm1
|
||||||
jnz $memcpy_uc_1 // last 64-byte block?
|
jnz $memcpy_uc_1 // last 64-byte block?
|
||||||
|
|
||||||
movq mm0,[_mmx_backup+0x00]
|
movq mm0,[edx+0x00]
|
||||||
movq mm1,[_mmx_backup+0x08]
|
movq mm1,[edx+0x08]
|
||||||
movq mm2,[_mmx_backup+0x10]
|
movq mm2,[edx+0x10]
|
||||||
|
|
||||||
jmp $memcpy_ic_2 // almost done (not needed because large copy below was removed)
|
jmp $memcpy_ic_2 // almost done (not needed because large copy below was removed)
|
||||||
|
|
||||||
|
@ -493,7 +491,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
|
||||||
|
|
||||||
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
||||||
// form which is an "unrolled loop". Then it handles the last few bytes.
|
// form which is an "unrolled loop". Then it handles the last few bytes.
|
||||||
.align 4
|
.align 16
|
||||||
movsd
|
movsd
|
||||||
movsd // perform last 1-15 dword copies
|
movsd // perform last 1-15 dword copies
|
||||||
movsd
|
movsd
|
||||||
|
@ -512,8 +510,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
|
||||||
movsd
|
movsd
|
||||||
|
|
||||||
$memcpy_last_few: // dword aligned from before movsd's
|
$memcpy_last_few: // dword aligned from before movsd's
|
||||||
mov eax, ecx // has valid low 2 bits of the byte count
|
and ecx, 0b11 // the last few cows must come home
|
||||||
and eax, 0b11 // the last few cows must come home
|
|
||||||
jz $memcpy_final // no more, let's leave
|
jz $memcpy_final // no more, let's leave
|
||||||
rep movsb // the last 1, 2, or 3 bytes
|
rep movsb // the last 1, 2, or 3 bytes
|
||||||
|
|
||||||
|
|
|
@ -404,13 +404,14 @@ $memcpy_align_done: ; destination is dword aligned
|
||||||
shr eax, 6 ; get 64-byte block count
|
shr eax, 6 ; get 64-byte block count
|
||||||
jz $memcpy_ic_2 ; finish the last few bytes
|
jz $memcpy_ic_2 ; finish the last few bytes
|
||||||
|
|
||||||
|
mov edx, offset _mmx_backup ; will probably need this to save/restore mmx
|
||||||
cmp eax, IN_CACHE_COPY/64 ; too big 4 cache? use uncached copy
|
cmp eax, IN_CACHE_COPY/64 ; too big 4 cache? use uncached copy
|
||||||
jae $memcpy_uc_test
|
jae $memcpy_uc_test
|
||||||
|
|
||||||
movq [_mmx_backup+0x00],mm0
|
movq [edx+0x00],mm0
|
||||||
movq [_mmx_backup+0x08],mm1
|
movq [edx+0x08],mm1
|
||||||
movq [_mmx_backup+0x10],mm2
|
movq [edx+0x10],mm2
|
||||||
movq [_mmx_backup+0x18],mm3
|
movq [edx+0x18],mm3
|
||||||
|
|
||||||
// This is small block copy that uses the MMX registers to copy 8 bytes
|
// This is small block copy that uses the MMX registers to copy 8 bytes
|
||||||
// at a time. It uses the "unrolled loop" optimization, and also uses
|
// at a time. It uses the "unrolled loop" optimization, and also uses
|
||||||
|
@ -442,10 +443,10 @@ $memcpy_ic_1: ; 64-byte block copies, in-cache copy
|
||||||
dec eax ; count down
|
dec eax ; count down
|
||||||
jnz $memcpy_ic_1 ; last 64-byte block?
|
jnz $memcpy_ic_1 ; last 64-byte block?
|
||||||
|
|
||||||
movq mm0,[_mmx_backup+0x00]
|
movq mm0,[edx+0x00]
|
||||||
movq mm1,[_mmx_backup+0x08]
|
movq mm1,[edx+0x08]
|
||||||
movq mm2,[_mmx_backup+0x10]
|
movq mm2,[edx+0x10]
|
||||||
movq mm3,[_mmx_backup+0x18]
|
movq mm3,[edx+0x18]
|
||||||
|
|
||||||
$memcpy_ic_2:
|
$memcpy_ic_2:
|
||||||
mov eax, ecx ; has valid low 6 bits of the byte count
|
mov eax, ecx ; has valid low 6 bits of the byte count
|
||||||
|
@ -457,9 +458,6 @@ $memcpy_ic_3:
|
||||||
jmp eax ; jump to array of movsd's
|
jmp eax ; jump to array of movsd's
|
||||||
|
|
||||||
$memcpy_uc_test:
|
$memcpy_uc_test:
|
||||||
/*cmp ecx, UNCACHED_COPY/64 ; big enough? use block prefetch copy
|
|
||||||
jae $memcpy_bp_1
|
|
||||||
$memcpy_64_test:*/
|
|
||||||
or eax, eax ; tail end of block prefetch will jump here
|
or eax, eax ; tail end of block prefetch will jump here
|
||||||
jz $memcpy_ic_2 ; no more 64-byte blocks left
|
jz $memcpy_ic_2 ; no more 64-byte blocks left
|
||||||
|
|
||||||
|
@ -468,9 +466,9 @@ $memcpy_64_test:*/
|
||||||
// bypasses the cache and writes straight to main memory. This code also
|
// bypasses the cache and writes straight to main memory. This code also
|
||||||
// uses the software prefetch instruction to pre-read the data.
|
// uses the software prefetch instruction to pre-read the data.
|
||||||
|
|
||||||
movq [_mmx_backup+0x00],mm0
|
movq [edx+0x00],mm0
|
||||||
movq [_mmx_backup+0x08],mm1
|
movq [edx+0x08],mm1
|
||||||
movq [_mmx_backup+0x10],mm2
|
movq [edx+0x10],mm2
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
$memcpy_uc_1: ; 64-byte blocks, uncached copy
|
$memcpy_uc_1: ; 64-byte blocks, uncached copy
|
||||||
|
@ -498,9 +496,9 @@ $memcpy_uc_1: ; 64-byte blocks, uncached copy
|
||||||
movntq [edi-8], mm1
|
movntq [edi-8], mm1
|
||||||
jnz $memcpy_uc_1 ; last 64-byte block?
|
jnz $memcpy_uc_1 ; last 64-byte block?
|
||||||
|
|
||||||
movq mm0,[_mmx_backup+0x00]
|
movq mm0,[edx+0x00]
|
||||||
movq mm1,[_mmx_backup+0x08]
|
movq mm1,[edx+0x08]
|
||||||
movq mm2,[_mmx_backup+0x10]
|
movq mm2,[edx+0x10]
|
||||||
|
|
||||||
jmp $memcpy_ic_2 ; almost done (not needed because large copy below was removed)
|
jmp $memcpy_ic_2 ; almost done (not needed because large copy below was removed)
|
||||||
|
|
||||||
|
@ -559,7 +557,7 @@ $memcpy_bp_3:
|
||||||
|
|
||||||
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
||||||
// form which is an "unrolled loop". Then it handles the last few bytes.
|
// form which is an "unrolled loop". Then it handles the last few bytes.
|
||||||
align 4
|
align 16
|
||||||
movsd
|
movsd
|
||||||
movsd ; perform last 1-15 dword copies
|
movsd ; perform last 1-15 dword copies
|
||||||
movsd
|
movsd
|
||||||
|
@ -578,8 +576,7 @@ align 4
|
||||||
movsd
|
movsd
|
||||||
|
|
||||||
$memcpy_last_few: ; dword aligned from before movsd's
|
$memcpy_last_few: ; dword aligned from before movsd's
|
||||||
mov eax, ecx ; has valid low 2 bits of the byte count
|
and ecx, 11b ; the last few cows must come home
|
||||||
and eax, 11b ; the last few cows must come home
|
|
||||||
jz $memcpy_final ; no more, let's leave
|
jz $memcpy_final ; no more, let's leave
|
||||||
rep movsb ; the last 1, 2, or 3 bytes
|
rep movsb ; the last 1, 2, or 3 bytes
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue