newppu savestates as well as a bit of jacking around for performance and other misc things I meant to do, but alas speedups wont be possible without obfuscating the code.

This commit is contained in:
zeromus 2009-08-06 00:51:29 +00:00
parent 5b75a119f5
commit 89d32673c2
6 changed files with 150 additions and 63 deletions

View File

@ -115,7 +115,7 @@ static void mmc5_PPUWrite(uint32 A, uint8 V) {
} }
} }
uint8 mmc5_PPURead(uint32 A) { uint8 FASTCALL mmc5_PPURead(uint32 A) {
if(A<0x2000) if(A<0x2000)
{ {
if(ppuphase == PPUPHASE_BG) if(ppuphase == PPUPHASE_BG)

View File

@ -99,6 +99,8 @@ void ResetCartMapping(void)
{ {
int x; int x;
PPU_ResetHooks();
for(x=0;x<32;x++) for(x=0;x<32;x++)
{ {
Page[x]=nothing-x*2048; Page[x]=nothing-x*2048;

View File

@ -627,7 +627,10 @@ void DebugCycle() {
if (numWPs || dbgstate.step || dbgstate.runline || dbgstate.stepout || watchpoint[64].flags || dbgstate.badopbreak) if (numWPs || dbgstate.step || dbgstate.runline || dbgstate.stepout || watchpoint[64].flags || dbgstate.badopbreak)
breakpoint(); breakpoint();
if(debug_loggingCD) LogCDData(); if(debug_loggingCD) LogCDData();
//mbg 6/30/06 - this was commented out when i got here. i dont understand it anyway //mbg 6/30/06 - this was commented out when i got here. i dont understand it anyway
//if(logging || (hMemView && (EditingMode == 2))) LogInstruction(); //if(logging || (hMemView && (EditingMode == 2))) LogInstruction();
FCEUD_TraceInstruction();
extern volatile int logging;
if(logging) FCEUD_TraceInstruction();
} }

View File

@ -40,6 +40,7 @@
#include "input.h" #include "input.h"
#include "driver.h" #include "driver.h"
#define VBlankON (PPU[0]&0x80) //Generate VBlank NMI #define VBlankON (PPU[0]&0x80) //Generate VBlank NMI
#define Sprite16 (PPU[0]&0x20) //Sprites 8x16/8x8 #define Sprite16 (PPU[0]&0x20) //Sprites 8x16/8x8
#define BGAdrHI (PPU[0]&0x10) //BG pattern adr $0000/$1000 #define BGAdrHI (PPU[0]&0x10) //BG pattern adr $0000/$1000
@ -68,7 +69,6 @@ static uint32 ppulut1[256];
static uint32 ppulut2[256]; static uint32 ppulut2[256];
static uint32 ppulut3[128]; static uint32 ppulut3[128];
PPUPHASE ppuphase;
int test = 0; int test = 0;
template<typename T, int BITS> template<typename T, int BITS>
@ -102,45 +102,81 @@ BITREVLUT<uint8,8> bitrevlut;
struct PPUSTATUS struct PPUSTATUS
{ {
int sl; int32 sl;
int cycle, end_cycle; int32 cycle, end_cycle;
}; };
struct SPRITE_READ struct SPRITE_READ
{ {
int num; int32 num;
int count; int32 count;
int fetch; int32 fetch;
int found; int32 found;
int found_pos[8]; int32 found_pos[8];
int ret; int32 ret;
int last; int32 last;
int mode; int32 mode;
void reset() {
num = count = fetch = found = ret = last = mode = 0;
found_pos[0] = found_pos[1] = found_pos[2] = found_pos[3] = 0;
found_pos[4] = found_pos[5] = found_pos[6] = found_pos[7] = 0;
}
void start_scanline()
{
num = 1;
found = 0;
fetch = 1;
count = 0;
last = 64;
mode = 0;
found_pos[0] = found_pos[1] = found_pos[2] = found_pos[3] = 0;
found_pos[4] = found_pos[5] = found_pos[6] = found_pos[7] = 0;
}
}; };
struct SPRITE_READ spr_read = { 0 };
//doesn't need to be savestated as it is just a reflection of the current position in the ppu loop
PPUPHASE ppuphase;
//this needs to be savestated since a game may be trying to read from this across vblanks
SPRITE_READ spr_read;
//definitely needs to be savestated
uint8 idleSynch = 1;
//uses the internal counters concept at http://nesdev.icequake.net/PPU%20addressing.txt //uses the internal counters concept at http://nesdev.icequake.net/PPU%20addressing.txt
struct PPUREGS { struct PPUREGS {
//normal clocked regs. as the game can interfere with these at any time, they need to be savestated
uint32 fv;//3 uint32 fv;//3
uint32 v;//1 uint32 v;//1
uint32 h;//1 uint32 h;//1
uint32 vt;//5 uint32 vt;//5
uint32 ht;//5 uint32 ht;//5
uint32 fh;//3
uint32 s;//1
uint32 par;//8
uint32 ar;//2
uint32 _fv, _v, _h, _vt, _ht;
struct PPUSTATUS status;
PPUREGS()
: fv(0), v(0), h(0), vt(0), ht(0), fh(0), s(0), par(0), ar(0)
, _fv(0), _v(0), _h(0), _vt(0), _ht(0)
{ status.cycle = 0; status.end_cycle = 341;
status.sl = 241;
}
//temp unlatched regs (need savestating, can be written to at any time)
uint32 _fv, _v, _h, _vt, _ht;
//other regs that need savestating
uint32 fh;//3 (horz scroll)
uint32 s;//1 ($2000 bit 4: "Background pattern table address (0: $0000; 1: $1000)")
//other regs that don't need saving
uint32 par;//8 (sort of a hack, just stored in here, but not managed by this system)
//cached state data. these are always reset at the beginning of a frame and don't need saving
//but just to be safe, we're gonna save it
PPUSTATUS status;
void reset()
{
fv = v = h = vt = ht = 0;
fh = par = s = 0;
_fv = _v = _h = _vt = _ht = 0;
status.cycle = 0;
status.end_cycle = 341;
status.sl = 241;
}
void install_latches() { void install_latches() {
fv = _fv; fv = _fv;
v = _v; v = _v;
@ -390,7 +426,7 @@ inline void FFCEUX_PPUWrite_Default(uint32 A, uint8 V) {
} }
} }
uint8 FFCEUX_PPURead_Default(uint32 A) { uint8 FASTCALL FFCEUX_PPURead_Default(uint32 A) {
uint32 tmp = A; uint32 tmp = A;
if(tmp<0x2000) if(tmp<0x2000)
@ -421,15 +457,10 @@ uint8 FFCEUX_PPURead_Default(uint32 A) {
} }
uint8 (*FFCEUX_PPURead)(uint32 A) = 0; uint8 (FASTCALL *FFCEUX_PPURead)(uint32 A) = 0;
void (*FFCEUX_PPUWrite)(uint32 A, uint8 V) = 0; void (*FFCEUX_PPUWrite)(uint32 A, uint8 V) = 0;
#define CALL_PPUREAD(A) (FFCEUX_PPURead?FFCEUX_PPURead(A):(\ #define CALL_PPUREAD(A) (FFCEUX_PPURead(A))
((A)<0x2000)? \
VPage[(A)>>10][(A)] \
: vnapage[((A)>>10)&0x3][(A)&0x3FF] \
))
#define CALL_PPUWRITE(A,V) (FFCEUX_PPUWrite?FFCEUX_PPUWrite(A,V):FFCEUX_PPUWrite_Default(A,V)) #define CALL_PPUWRITE(A,V) (FFCEUX_PPUWrite?FFCEUX_PPUWrite(A,V):FFCEUX_PPUWrite_Default(A,V))
@ -1777,6 +1808,11 @@ void FCEUPPU_Init(void)
makeppulut(); makeppulut();
} }
void PPU_ResetHooks()
{
FFCEUX_PPURead = FFCEUX_PPURead_Default;
}
void FCEUPPU_Reset(void) void FCEUPPU_Reset(void)
{ {
VRAMBuffer=PPU[0]=PPU[1]=PPU_status=PPU[3]=0; VRAMBuffer=PPU[0]=PPU[1]=PPU_status=PPU[3]=0;
@ -1786,7 +1822,11 @@ void FCEUPPU_Reset(void)
vtoggle = 0; vtoggle = 0;
ppudead = 2; ppudead = 2;
kook = 0; kook = 0;
idleSynch = 1;
// XOffset=0; // XOffset=0;
ppur.reset();
spr_read.reset();
} }
void FCEUPPU_Power(void) void FCEUPPU_Power(void)
@ -1988,6 +2028,41 @@ SFORMAT FCEUPPU_STATEINFO[]={
{ 0 } { 0 }
}; };
SFORMAT FCEU_NEWPPU_STATEINFO[] = {
{ &idleSynch, 1, "IDLS" },
{ &spr_read.num, 4|FCEUSTATE_RLSB, "SR_0" },
{ &spr_read.count, 4|FCEUSTATE_RLSB, "SR_1" },
{ &spr_read.fetch, 4|FCEUSTATE_RLSB, "SR_2" },
{ &spr_read.found, 4|FCEUSTATE_RLSB, "SR_3" },
{ &spr_read.found_pos[0], 4|FCEUSTATE_RLSB, "SRx0" },
{ &spr_read.found_pos[0], 4|FCEUSTATE_RLSB, "SRx1" },
{ &spr_read.found_pos[0], 4|FCEUSTATE_RLSB, "SRx2" },
{ &spr_read.found_pos[0], 4|FCEUSTATE_RLSB, "SRx3" },
{ &spr_read.found_pos[0], 4|FCEUSTATE_RLSB, "SRx4" },
{ &spr_read.found_pos[0], 4|FCEUSTATE_RLSB, "SRx5" },
{ &spr_read.found_pos[0], 4|FCEUSTATE_RLSB, "SRx6" },
{ &spr_read.found_pos[0], 4|FCEUSTATE_RLSB, "SRx7" },
{ &spr_read.ret, 4|FCEUSTATE_RLSB, "SR_4" },
{ &spr_read.last, 4|FCEUSTATE_RLSB, "SR_5" },
{ &spr_read.mode, 4|FCEUSTATE_RLSB, "SR_6" },
{ &ppur.fv, 4|FCEUSTATE_RLSB, "PFVx" },
{ &ppur.v, 4|FCEUSTATE_RLSB, "PVxx" },
{ &ppur.h, 4|FCEUSTATE_RLSB, "PHxx" },
{ &ppur.vt, 4|FCEUSTATE_RLSB, "PVTx" },
{ &ppur.ht, 4|FCEUSTATE_RLSB, "PHTx" },
{ &ppur._fv, 4|FCEUSTATE_RLSB, "P_FV" },
{ &ppur._v, 4|FCEUSTATE_RLSB, "P_Vx" },
{ &ppur._h, 4|FCEUSTATE_RLSB, "P_Hx" },
{ &ppur._vt, 4|FCEUSTATE_RLSB, "P_VT" },
{ &ppur._ht, 4|FCEUSTATE_RLSB, "P_HT" },
{ &ppur.fh, 4|FCEUSTATE_RLSB, "PFHx" },
{ &ppur.s, 4|FCEUSTATE_RLSB, "PSxx" },
{ &ppur.status.sl, 4|FCEUSTATE_RLSB, "PST0" },
{ &ppur.status.cycle, 4|FCEUSTATE_RLSB, "PST1" },
{ &ppur.status.end_cycle, 4|FCEUSTATE_RLSB, "PST2" },
{ 0 }
};
void FCEUPPU_SaveState(void) void FCEUPPU_SaveState(void)
{ {
TempAddrT=TempAddr; TempAddrT=TempAddr;
@ -2000,13 +2075,14 @@ int pputime=0;
int totpputime=0; int totpputime=0;
const int kLineTime=341; const int kLineTime=341;
const int kFetchTime=2; const int kFetchTime=2;
int idleSynch = 1;
void runppu(int x) { void runppu(int x) {
//pputime+=x; //pputime+=x;
//if(cputodo<200) return; //if(cputodo<200) return;
ppur.status.cycle = (ppur.status.cycle + x) % ppur.status.cycle = (ppur.status.cycle + x) %
ppur.status.end_cycle; ppur.status.end_cycle;
X6502_Run(x); X6502_Run(x);
//pputime -= cputodo<<2; //pputime -= cputodo<<2;
} }
@ -2016,7 +2092,7 @@ struct BGData {
struct Record { struct Record {
uint8 nt, at, pt[2]; uint8 nt, at, pt[2];
void Read() { INLINE void Read() {
RefreshAddr = ppur.get_ntread(); RefreshAddr = ppur.get_ntread();
nt = CALL_PPUREAD(RefreshAddr); nt = CALL_PPUREAD(RefreshAddr);
runppu(kFetchTime); runppu(kFetchTime);
@ -2056,6 +2132,7 @@ struct BGData {
int framectr=0; int framectr=0;
int FCEUX_PPU_Loop(int skip) { int FCEUX_PPU_Loop(int skip) {
//262 scanlines //262 scanlines
if (ppudead) if (ppudead)
{ {
@ -2108,26 +2185,20 @@ int FCEUX_PPU_Loop(int skip) {
//if(PPUON) //if(PPUON)
// ppur.install_latches(); // ppur.install_latches();
uint8 oams[2][64][7]; static uint8 oams[2][64][8]; //[7] turned to [8] for faster indexing
int oamcounts[2]={0,0}; static int oamcounts[2]={0,0};
int oamslot=0; static int oamslot=0;
int oamcount; static int oamcount;
//capture the initial xscroll //capture the initial xscroll
//int xscroll = ppur.fh; //int xscroll = ppur.fh;
//render 241 scanlines (including 1 dummy at beginning) //render 241 scanlines (including 1 dummy at beginning)
for(int sl=0;sl<241;sl++) { for(int sl=0;sl<241;sl++) {
spr_read.num = 1; spr_read.start_scanline();
spr_read.found = 0;
spr_read.fetch = 1;
spr_read.count = 0;
spr_read.last = 64;
spr_read.mode = 0;
memset(spr_read.found_pos, 0, sizeof(spr_read.found_pos));
ppur.status.sl = sl; ppur.status.sl = sl;
int yp = sl-1; const int yp = sl-1;
ppuphase = PPUPHASE_BG; ppuphase = PPUPHASE_BG;
if(sl != 0) { if(sl != 0) {
@ -2139,8 +2210,8 @@ int FCEUX_PPU_Loop(int skip) {
//twiddle the oam buffers //twiddle the oam buffers
int scanslot = oamslot^1; const int scanslot = oamslot^1;
int renderslot = oamslot; const int renderslot = oamslot;
oamslot ^= 1; oamslot ^= 1;
oamcount = oamcounts[renderslot]; oamcount = oamcounts[renderslot];
@ -2151,25 +2222,26 @@ int FCEUX_PPU_Loop(int skip) {
for(int xt=0;xt<32;xt++) { for(int xt=0;xt<32;xt++) {
bgdata.main[xt+2].Read(); bgdata.main[xt+2].Read();
//ok, we're also going to draw here. //ok, we're also going to draw here.
//unless we're on the first dummy scanline //unless we're on the first dummy scanline
if(sl != 0) { if(sl != 0) {
int xstart = xt<<3; int xstart = xt<<3;
oamcount = oamcounts[renderslot]; oamcount = oamcounts[renderslot];
uint8 *target=XBuf+(yp<<8)+xstart; uint8 * const target=XBuf+(yp<<8)+xstart;
uint8 *ptr = target; uint8 *ptr = target;
int rasterpos = xstart; int rasterpos = xstart;
//check all the conditions that can cause things to render in these 8px //check all the conditions that can cause things to render in these 8px
bool renderspritenow = SpriteON && rendersprites && (xt>0 || SpriteLeft8); const bool renderspritenow = SpriteON && rendersprites && (xt>0 || SpriteLeft8);
bool renderbgnow = ScreenON && renderbg && (xt>0 || BGLeft8); const bool renderbgnow = ScreenON && renderbg && (xt>0 || BGLeft8);
for(int xp=0;xp<8;xp++,rasterpos++) { for(int xp=0;xp<8;xp++,rasterpos++) {
//bg pos is different from raster pos due to its offsetability. //bg pos is different from raster pos due to its offsetability.
//so adjust for that here //so adjust for that here
int bgpos = rasterpos + ppur.fh; const int bgpos = rasterpos + ppur.fh;
int bgpx = bgpos&7; const int bgpx = bgpos&7;
int bgtile = bgpos>>3; const int bgtile = bgpos>>3;
uint8 pixel=0, pixelcolor; uint8 pixel=0, pixelcolor;
@ -2236,7 +2308,7 @@ int FCEUX_PPU_Loop(int skip) {
//look for sprites (was supposed to run concurrent with bg rendering) //look for sprites (was supposed to run concurrent with bg rendering)
oamcounts[scanslot] = 0; oamcounts[scanslot] = 0;
oamcount=0; oamcount=0;
int spriteHeight = Sprite16?16:8; const int spriteHeight = Sprite16?16:8;
for(int i=0;i<64;i++) { for(int i=0;i<64;i++) {
uint8* spr = SPRAM+i*4; uint8* spr = SPRAM+i*4;
if(yp >= spr[0] && yp < spr[0]+spriteHeight) { if(yp >= spr[0] && yp < spr[0]+spriteHeight) {
@ -2289,9 +2361,9 @@ int FCEUX_PPU_Loop(int skip) {
//this is how we support the no 8 sprite limit feature. //this is how we support the no 8 sprite limit feature.
//not that at some point we may need a virtual CALL_PPUREAD which just peeks and doesnt increment any counters //not that at some point we may need a virtual CALL_PPUREAD which just peeks and doesnt increment any counters
//this could be handy for the debugging tools also //this could be handy for the debugging tools also
bool realSprite = (s<8); const bool realSprite = (s<8);
uint8* oam = oams[scanslot][s]; uint8* const oam = oams[scanslot][s];
uint32 line = yp - oam[0]; uint32 line = yp - oam[0];
if(oam[2]&0x80) //vflip if(oam[2]&0x80) //vflip
line = spriteHeight-line-1; line = spriteHeight-line-1;

View File

@ -18,7 +18,14 @@ void FCEUPPU_SaveState(void);
void FCEUPPU_LoadState(int version); void FCEUPPU_LoadState(int version);
uint8* FCEUPPU_GetCHR(uint32 vadr, uint32 refreshaddr); uint8* FCEUPPU_GetCHR(uint32 vadr, uint32 refreshaddr);
extern uint8 (*FFCEUX_PPURead)(uint32 A); #ifdef _MSC_VER
#define FASTCALL __fastcall
#else
#define FASTCALL
#endif
void PPU_ResetHooks();
extern uint8 (FASTCALL *FFCEUX_PPURead)(uint32 A);
extern void (*FFCEUX_PPUWrite)(uint32 A, uint8 V); extern void (*FFCEUX_PPUWrite)(uint32 A, uint8 V);
extern int scanline; extern int scanline;

View File

@ -75,6 +75,7 @@ static int SFEXINDEX;
extern SFORMAT FCEUPPU_STATEINFO[]; extern SFORMAT FCEUPPU_STATEINFO[];
extern SFORMAT FCEU_NEWPPU_STATEINFO[];
extern SFORMAT FCEUSND_STATEINFO[]; extern SFORMAT FCEUSND_STATEINFO[];
extern SFORMAT FCEUCTRL_STATEINFO[]; extern SFORMAT FCEUCTRL_STATEINFO[];
extern SFORMAT FCEUMOV_STATEINFO[]; extern SFORMAT FCEUMOV_STATEINFO[];
@ -248,6 +249,7 @@ static bool ReadStateChunks(std::istream* is, int32 totalsize)
{ {
case 1:if(!ReadStateChunk(is,SFCPU,size)) ret=false;break; case 1:if(!ReadStateChunk(is,SFCPU,size)) ret=false;break;
case 3:if(!ReadStateChunk(is,FCEUPPU_STATEINFO,size)) ret=false;break; case 3:if(!ReadStateChunk(is,FCEUPPU_STATEINFO,size)) ret=false;break;
case 31:if(!ReadStateChunk(is,FCEU_NEWPPU_STATEINFO,size)) ret=false;break;
case 4:if(!ReadStateChunk(is,FCEUCTRL_STATEINFO,size)) ret=false;break; case 4:if(!ReadStateChunk(is,FCEUCTRL_STATEINFO,size)) ret=false;break;
case 7: case 7:
if(!FCEUMOV_ReadState(is,size)) { if(!FCEUMOV_ReadState(is,size)) {
@ -351,6 +353,7 @@ bool FCEUSS_SaveMS(std::ostream* outstream, int compressionLevel)
totalsize=WriteStateChunk(os,1,SFCPU); totalsize=WriteStateChunk(os,1,SFCPU);
totalsize+=WriteStateChunk(os,2,SFCPUC); totalsize+=WriteStateChunk(os,2,SFCPUC);
totalsize+=WriteStateChunk(os,3,FCEUPPU_STATEINFO); totalsize+=WriteStateChunk(os,3,FCEUPPU_STATEINFO);
totalsize+=WriteStateChunk(os,31,FCEU_NEWPPU_STATEINFO);
totalsize+=WriteStateChunk(os,4,FCEUCTRL_STATEINFO); totalsize+=WriteStateChunk(os,4,FCEUCTRL_STATEINFO);
totalsize+=WriteStateChunk(os,5,FCEUSND_STATEINFO); totalsize+=WriteStateChunk(os,5,FCEUSND_STATEINFO);
if(FCEUMOV_Mode(MOVIEMODE_PLAY|MOVIEMODE_RECORD)) if(FCEUMOV_Mode(MOVIEMODE_PLAY|MOVIEMODE_RECORD))