- fixed a warning from last revision

- made "framelimit" mode default, instead of "normal" mode (was discussed by team)
- some more microVU rec work...


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@452 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-02-08 07:21:56 +00:00
parent 63d5ce867b
commit 9f454da8d6
5 changed files with 141 additions and 90 deletions

View File

@ -151,7 +151,7 @@ static int _findFilenamePosition( const string& src)
{
// note: the source path could have multiple trailing slashes. We want to ignore those.
int startpos = src.find_last_not_of( Delimiters );
unsigned int startpos = src.find_last_not_of( Delimiters );
if(startpos == string::npos )
return 0;

View File

@ -128,7 +128,7 @@ public:
//cpu
SetCurrentSection( "Cpu" );
Entry( "Options", Conf.Options, PCSX2_EEREC|PCSX2_VU0REC|PCSX2_VU1REC|PCSX2_GSMULTITHREAD );
Entry( "Options", Conf.Options, PCSX2_EEREC|PCSX2_VU0REC|PCSX2_VU1REC|PCSX2_GSMULTITHREAD|PCSX2_FRAMELIMIT_LIMIT );
Entry( "sseMXCSR", Conf.sseMXCSR, DEFAULT_sseMXCSR );
Entry( "sseVUMXCSR", Conf.sseVUMXCSR, DEFAULT_sseVUMXCSR );
Entry( "eeOptions", Conf.eeOptions, DEFAULT_eeOptions );

View File

@ -43,12 +43,6 @@ __forceinline void mVUinit(microVU* mVU, VURegs* vuRegsPtr, const int vuIndex) {
mVU->cacheAddr = 0xC0000000 + (vuIndex ? mVU->cacheSize : 0);
mVU->cache = NULL;
for (int i; i <= mVU->prog.max; i++) {
for (u32 j; j < mVU->progSize; j++) {
mVU->prog.prog[i].block[j] = new microBlockManager();
}
}
mVUreset(mVU);
}
@ -57,6 +51,13 @@ __forceinline void mVUreset(microVU* mVU) {
mVUclose(mVU); // Close
// Create Block Managers
for (int i; i <= mVU->prog.max; i++) {
for (u32 j; j < mVU->progSize; j++) {
mVU->prog.prog[i].block[j] = new microBlockManager();
}
}
// Dynarec Cache
mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0x10000000, "Micro VU");
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache));
@ -74,6 +75,7 @@ __forceinline void mVUclose(microVU* mVU) {
if ( mVU->cache ) { SysMunmap( mVU->cache, mVU->cacheSize ); mVU->cache = NULL; }
// Delete Block Managers
for (int i; i <= mVU->prog.max; i++) {
for (u32 j; j < mVU->progSize; j++) {
if (mVU->prog.prog[i].block[j]) delete mVU->prog.prog[i].block[j];
@ -103,10 +105,27 @@ __forceinline void mVUclear(microVU* mVU, u32 addr, u32 size) {
}
// Executes for number of cycles
void* mVUexecute(microVU* mVU, u32 startPC, u32 cycles) {
void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) {
/* Pseudocode: (ToDo: implement # of cycles)
1) Search for existing program
2) If program not found, goto 5
3) Search for recompiled block
4) If recompiled block found, goto 6
5) Recompile as much blocks as possible
6) Return start execution address of block
*/
if ( mVUsearchProg(&microVU0) ) { // Found Program
microBlock* block = microVU0.prog.prog[microVU0.prog.cur].block[startPC]->search(microVU0.prog.lastPipelineState);
if (block) return block->x86ptrStart;
}
// Recompile code
return NULL;
}
void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) {
return NULL;
}
/*
// Executes till finished
void* mVUexecuteF(microVU* mVU, u32 startPC) {
//if (!mProg.finished) {
@ -117,6 +136,7 @@ void* mVUexecuteF(microVU* mVU, u32 startPC) {
//}
return NULL;
}
*/
//------------------------------------------------------------------
// Micro VU - Private Functions
@ -145,60 +165,44 @@ __forceinline int mVUfindLeastUsedProg(microVU* mVU) {
__forceinline void mVUcacheProg(microVU* mVU) {
if (!mVU->prog.prog[mVU->prog.cur].cached) { // If uncached, then cache
memcpy_fast(mVU->prog.prog[mVU->prog.cur].data, mVU->regs->Micro, mVU->microSize);
mVU->prog.prog[mVU->prog.cur].cached = 1;
}
}
// Searches for Cached Micro Program and sets prog.cur (returns -1 if no program found)
// Searches for Cached Micro Program and sets prog.cur to it (returns 1 if program found, else returns 0)
__forceinline int mVUsearchProg(microVU* mVU) {
if (mVU->prog.cleared) { // If cleared, we need to search for new program
for (int i = 0; i <= mVU->prog.total; i++) {
if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) {
return i;
if (i == mVU->prog.cur) continue; // We can skip the current program.
if (mVU->prog.prog[i].cached) {
if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) {
mVU->prog.cur = i;
return 1;
}
}
}
mVU->prog.cur = mVUfindLeastUsedProg(mVU);
return -1;
mVU->prog.cur = mVUfindLeastUsedProg(mVU); // If cleared and program not cached, make a new program instance
// ToDo: Clear old data if overwriting old program
return 0;
}
else return mVU->prog.cur;
else return 1; // If !cleared, then we're still on the same program as last-time ;)
}
//------------------------------------------------------------------
// Dispatcher Functions
//------------------------------------------------------------------
// Runs till finished
__declspec(naked) void runVU0(microVU* mVU, u32 startPC) {
// Runs VU0 for number of cycles
__declspec(naked) void __fastcall startVU0(u32 startPC, u32 cycles) {
__asm {
mov eax, dword ptr [esp]
mov microVU0.x86callstack, eax
add esp, 4
call mVUexecuteF
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
call mVUexecuteVU0
/*backup cpu state*/
mov microVU0.x86ebp, ebp
mov microVU0.x86esi, esi
mov microVU0.x86edi, edi
mov microVU0.x86ebx, ebx
/*mov microVU0.x86esp, esp*/
ldmxcsr g_sseVUMXCSR
jmp eax
}
}
__declspec(naked) void runVU1(microVU* mVU, u32 startPC) {
__asm {
mov eax, dword ptr [esp]
mov microVU1.x86callstack, eax
add esp, 4
call mVUexecuteF
/*backup cpu state*/
mov microVU1.x86ebp, ebp
mov microVU1.x86esi, esi
mov microVU1.x86edi, edi
mov microVU1.x86ebx, ebx
/*mov microVU1.x86esp, esp*/
push ebx;
push ebp;
push esi;
push edi;
ldmxcsr g_sseVUMXCSR
@ -206,43 +210,39 @@ __declspec(naked) void runVU1(microVU* mVU, u32 startPC) {
}
}
// Runs for number of cycles
__declspec(naked) void runVU0(microVU* mVU, u32 startPC, u32 cycles) {
// Runs VU1 for number of cycles
__declspec(naked) void __fastcall startVU1(u32 startPC, u32 cycles) {
__asm {
mov eax, dword ptr [esp]
mov microVU0.x86callstack, eax
add esp, 4
call mVUexecute
call mVUexecuteVU1
/*backup cpu state*/
mov microVU0.x86ebp, ebp
mov microVU0.x86esi, esi
mov microVU0.x86edi, edi
mov microVU0.x86ebx, ebx
/*mov microVU0.x86esp, esp*/
push ebx;
push ebp;
push esi;
push edi;
ldmxcsr g_sseVUMXCSR
jmp eax
}
}
__declspec(naked) void runVU1(microVU* mVU, u32 startPC, u32 cycles) {
// Exit point
__declspec(naked) void __fastcall endVU0(u32 startPC, u32 cycles) {
__asm {
mov eax, dword ptr [esp]
mov microVU1.x86callstack, eax
add esp, 4
call mVUexecute
/*backup cpu state*/
mov microVU1.x86ebp, ebp
mov microVU1.x86esi, esi
mov microVU1.x86edi, edi
mov microVU1.x86ebx, ebx
/*mov microVU1.x86esp, esp*/
//call mVUcleanUpVU0
ldmxcsr g_sseVUMXCSR
/*restore cpu state*/
pop edi;
pop esi;
pop ebp;
pop ebx;
ldmxcsr g_sseMXCSR
jmp eax
ret
}
}
//------------------------------------------------------------------
@ -269,14 +269,9 @@ __forceinline void clearVUrec(u32 addr, u32 size, int vuIndex) {
else mVUclear(&microVU1, addr, size);
}
__forceinline void runVUrec(u32 startPC, int vuIndex) {
if (!vuIndex) runVU0(&microVU0, startPC);
else runVU1(&microVU1, startPC);
}
__forceinline void runVUrec(u32 startPC, u32 cycles, int vuIndex) {
if (!vuIndex) runVU0(&microVU0, startPC, cycles);
else runVU1(&microVU1, startPC, cycles);
if (!vuIndex) startVU0(startPC, cycles);
else startVU1(startPC, cycles);
}
#endif // PCSX2_MICROVU

View File

@ -24,9 +24,9 @@
struct microBlock {
u32 pipelineState; // FMACx|y|z|w | FDiv | EFU | IALU | BRANCH // Still thinking of how I'm going to do this
u32 x86ptrStart;
u32 x86ptrEnd;
u32 x86ptrBranch;
u8* x86ptrStart;
u8* x86ptrEnd;
u8* x86ptrBranch;
//u32 size;
};
@ -50,7 +50,7 @@ public:
ZeroMemory(&blockList, sizeof(blockList)); // Can be Omitted?
}
void close() {}; // Can be Omitted?
void add(u32 pipelineState, u32 x86ptrStart) {
void add(u32 pipelineState, u8* x86ptrStart) {
if (!search(pipelineState)) {
listSize++;
listSize &= MaxBlocks;
@ -62,6 +62,7 @@ public:
for (int i = 0; i < listSize; i++) {
if (blockList[i].pipelineState == pipelineState) return &blockList[i];
}
return NULL;
}
void clearFast() {
listSize = -1;
@ -92,6 +93,7 @@ struct microProgManager {
int total; // Total Number of valid MicroPrograms minus 1
int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one)
int finished; // Completed MicroProgram to E-bit Termination
u32 lastPipelineState; // Pipeline state from where it left off (useful for continuing execution)
};
struct microVU {
@ -104,14 +106,16 @@ struct microVU {
VURegs* regs; // VU Regs Struct
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
u8* ptr; // Pointer to next place to write recompiled code to
uptr x86callstack;
uptr x86ebp;
uptr x86esi;
uptr x86edi;
uptr x86ebx;
uptr x86esp;
/*
uptr x86eax; // Accumulator register. Used in arithmetic operations.
uptr x86ecx; // Counter register. Used in shift/rotate instructions.
uptr x86edx; // Data register. Used in arithmetic operations and I/O operations.
uptr x86ebx; // Base register. Used as a pointer to data (located in DS in segmented mode).
uptr x86esp; // Stack Pointer register. Pointer to the top of the stack.
uptr x86ebp; // Stack Base Pointer register. Used to point to the base of the stack.
uptr x86esi; // Source register. Used as a pointer to a source in stream operations.
uptr x86edi; // Destination register. Used as a pointer to a destination in stream operations.
*/
microProgManager<0x800> prog; // Micro Program Data
};
@ -124,8 +128,8 @@ __forceinline void mVUinit(microVU* mVU, VURegs* vuRegsPtr, const int vuIndex);
__forceinline void mVUreset(microVU* mVU);
__forceinline void mVUclose(microVU* mVU);
__forceinline void mVUclear(microVU* mVU, u32 addr, u32 size); // Clears part of a Micro Program (must use before modifying micro program!)
void* mVUexecute(microVU* mVU, u32 startPC, u32 cycles); // Recompiles/Executes code for the number of cycles indicated (will always run for >= 'cycles' amount unless 'finished')
void* mVUexecuteF(microVU* mVU, u32 startPC); // Recompiles/Executes code till finished
//void* mVUexecute(microVU* mVU, u32 startPC, u32 cycles); // Recompiles/Executes code for the number of cycles indicated (will always run for >= 'cycles' amount unless 'finished')
//void* mVUexecuteF(microVU* mVU, u32 startPC); // Recompiles/Executes code till finished
__forceinline int mVUfindLeastUsedProg(microVU* mVU);
__forceinline int mVUsearchProg(microVU* mVU);

View File

@ -17,3 +17,55 @@
*/
#include "PrecompiledHeader.h"
/*
Cotton's Notes on how things will work (*experimental*, subject to change if I get different ideas):
Guide:
Fd, Fs, Ft = operands in the Micro Instructions
Acc = VU's Accumulator register
Fs/t = shorthand notation I made-up for "Fs or Ft"
xmmFd, xmmFs, xmmFt, xmmAcc = XMM regs that hold Fd, Fs, Ft, and Acc values respectively.
xmmZ = XMM reg that holds the zero Register; always {0, 0, 0, 1.0}
xmmT1, xmmT2, xmmT3 = temp regs.
General:
XMM0 is a volatile temp reg throughout the recs. You can always freely use it.
EAX is a volatile temp reg. You can always freely use it.
Mapping:
xmmT1 = xmm0
xmmFd = xmm1
xmmFs = xmm2
xmmFt = xmm3
xmmAcc = xmm4
xmmT2 = xmm5
xmmT3 = xmm6
xmmZ = xmm7
Most of the time the above mapping will be true, unless I find a reason not to do it this way :)
Opcodes:
Fd's 4-vectors must be preserved (kept valid); Unless operation is single-scalar, then only 'x' XMM vector
will contain valid data for X, Y, Z, or W, and the other XMM vectors will be garbage and freely modifiable.
Fs and Ft are temp regs that won't be used after the opcode, so their values can be freely modified.
If (Fd == 0), Then you don't need to explicitly handle this case in the opcode implementation,
since its dealt-with in the analyzing microVU pipeline functions.
(So just do the normal operation and don't worry about it.)
If (_X_Y_Z_W == 0) Then same as above. (btw, I'm'm not sure if this case ever happens...)
If (Fd == Fs/t), Then xmmFd != xmmFs/t (unless its more optimized this way! it'll be commented on the opcode)
Clamping:
Fs/t can always be clamped by case 15 (all vectors modified) since they won't be written back.
Problems:
The biggest problem I think I'll have is xgkick opcode having variable timing/stalling.
Other Notes:
These notes are mostly to help me (cottonvibes) remember good ideas and to help confused devs to
have an idea of how things work. Right now its all theoretical and I'll change things once implemented ;p
*/