gpu: fix disp fifo capture and vram capture/display from same bank. i am beginning to rewrite the dispfifo logic so there may be some easy regressions to fix. fixes splinter cell.

This commit is contained in:
zeromus 2009-07-25 06:40:47 +00:00
parent c0b302f4e4
commit 443fa0c50f
5 changed files with 87 additions and 67 deletions

View File

@ -21,6 +21,8 @@ Graphics:
bug: fix 256B granularity sprite addressing for sub gpu
bug: fix 128-wide captures
bug: fix color overflow in capture blending
bug: fix disp fifo capture
bug: fix simultaneous vram display and capture via same bank
bug: swrast: add clear image and scroll emulation
bug: swrast: fixes to shadow rendering

View File

@ -53,7 +53,8 @@ GPU::MosaicLookup GPU::mosaicLookup;
//#define DEBUG_TRI
CACHE_ALIGN u8 GPU_screen[4*256*192];
CACHE_ALIGN u8 *GPU_tempScanline;
u8 *GPU_tempScanline;
CACHE_ALIGN u16 GPU_tempScanlineBuffer[256];
CACHE_ALIGN u8 sprWin[256];
@ -2536,16 +2537,15 @@ template<bool SKIP> static void GPU_ligne_DispCapture(u16 l)
//INFO("Capture source is SourceB\n");
switch (gpu->dispCapCnt.srcB)
{
case 0: // Capture VRAM
{
//INFO("Capture VRAM\n");
CAPCOPY(cap_src,cap_dst);
}
case 0:
//Capture VRAM
CAPCOPY(cap_src,cap_dst);
break;
case 1: // Capture Main Memory Display FIFO
{
//INFO("Capture Main Memory Display FIFO\n");
}
case 1:
//capture dispfifo
//(not yet tested)
for(int i=0; i < 128; i++)
T1WriteLong(cap_dst, i << 2, DISP_FIFOrecv());
break;
}
}
@ -2566,50 +2566,56 @@ template<bool SKIP> static void GPU_ligne_DispCapture(u16 l)
gfx3d_GetLineData(l, &srcA, NULL);
}
static u16 fifoLine[256];
if (gpu->dispCapCnt.srcB == 0) // VRAM screen
srcB = (u16 *)cap_src;
else
srcB = NULL; // DISP FIFOS
if ((srcA) && (srcB))
{
const int todo = (gpu->dispCapCnt.capx==DISPCAPCNT::_128?128:256);
//fifo - tested by splinter cell chaos theory thermal view
srcB = fifoLine;
for (int i=0; i < 128; i++)
T1WriteLong((u8*)srcB, i << 2, DISP_FIFOrecv());
}
for(u16 i = 0; i < todo; i++)
const int todo = (gpu->dispCapCnt.capx==DISPCAPCNT::_128?128:256);
for(u16 i = 0; i < todo; i++)
{
u16 a,r,g,b;
u16 a_alpha = srcA[i] & 0x8000;
u16 b_alpha = srcB[i] & 0x8000;
if(a_alpha)
{
u16 a,r,g,b;
a = 0x8000;
r = ((srcA[i] & 0x1F) * gpu->dispCapCnt.EVA);
g = (((srcA[i] >> 5) & 0x1F) * gpu->dispCapCnt.EVA);
b = (((srcA[i] >> 10) & 0x1F) * gpu->dispCapCnt.EVA);
}
else
a = r = g = b = 0;
u16 a_alpha = srcA[i] & 0x8000;
u16 b_alpha = srcB[i] & 0x8000;
if(a_alpha)
{
a = 0x8000;
r = ((srcA[i] & 0x1F) * gpu->dispCapCnt.EVA);
g = (((srcA[i] >> 5) & 0x1F) * gpu->dispCapCnt.EVA);
b = (((srcA[i] >> 10) & 0x1F) * gpu->dispCapCnt.EVA);
}
else
a = r = g = b = 0;
if(b_alpha)
{
a = 0x8000;
r += ((srcB[i] & 0x1F) * gpu->dispCapCnt.EVB);
g += (((srcB[i] >> 5) & 0x1F) * gpu->dispCapCnt.EVB);
b += (((srcB[i] >> 10) & 0x1F) * gpu->dispCapCnt.EVB);
}
r >>= 4;
g >>= 4;
b >>= 4;
r = std::min((u16)31,r);
g = std::min((u16)31,g);
b = std::min((u16)31,b);
T2WriteWord(cap_dst, i << 1, a | (b << 10) | (g << 5) | r);
if(b_alpha)
{
a = 0x8000;
r += ((srcB[i] & 0x1F) * gpu->dispCapCnt.EVB);
g += (((srcB[i] >> 5) & 0x1F) * gpu->dispCapCnt.EVB);
b += (((srcB[i] >> 10) & 0x1F) * gpu->dispCapCnt.EVB);
}
r >>= 4;
g >>= 4;
b >>= 4;
//freedom wings sky will overflow while doing some fsaa/motionblur effect without this
r = std::min((u16)31,r);
g = std::min((u16)31,g);
b = std::min((u16)31,b);
T2WriteWord(cap_dst, i << 1, a | (b << 10) | (g << 5) | r);
}
}
break;
@ -2821,17 +2827,17 @@ void GPU_ligne(NDS_Screen * screen, u16 l, bool skip)
gpu->setup_windows<0>();
gpu->setup_windows<1>();
//always generate the 2d+3d, no matter what we're displaying, since we may need to capture it
//(if this seems inefficient in some cases, consider that the speed in those cases is not really a problem)
GPU_tempScanline = screen->gpu->currDst = (u8 *)(GPU_screen) + (screen->offset + l) * 512;
GPU_ligne_layer(screen, l);
if (gpu->core == GPU_MAIN)
{
GPU_ligne_DispCapture<false>(l);
if (l == 191) { disp_fifo.head = disp_fifo.tail = 0; }
//generate the 2d engine output
if(gpu->dispMode == 1) {
//optimization: render straight to the output buffer when thats what we are going to end up displaying anyway
GPU_tempScanline = screen->gpu->currDst = (u8 *)(GPU_screen) + (screen->offset + l) * 512;
} else {
//otherwise, we need to go to a temp buffer
GPU_tempScanline = screen->gpu->currDst = (u8 *)GPU_tempScanlineBuffer;
}
GPU_ligne_layer(screen, l);
switch (gpu->dispMode)
{
case 0: // Display Off(Display white)
@ -2847,7 +2853,7 @@ void GPU_ligne(NDS_Screen * screen, u16 l, bool skip)
//do nothing: it has already been generated into the right place
break;
case 2: // Display framebuffer
case 2: // Display vram framebuffer
{
u8 * dst = GPU_screen + (screen->offset + l) * 512;
u8 * src = gpu->VRAMaddr + (l*512);
@ -2856,6 +2862,8 @@ void GPU_ligne(NDS_Screen * screen, u16 l, bool skip)
break;
case 3: // Display memory FIFO
{
//this has not been tested since the dma timing for dispfifo was changed around the time of
//newemuloop. it may not work.
u8 * dst = GPU_screen + (screen->offset + l) * 512;
for (int i=0; i < 128; i++)
T1WriteLong(dst, i << 2, DISP_FIFOrecv() & 0x7FFF7FFF);
@ -2863,6 +2871,17 @@ void GPU_ligne(NDS_Screen * screen, u16 l, bool skip)
break;
}
//capture after displaying so that we can safely display vram before overwriting it here
if (gpu->core == GPU_MAIN)
{
//BUG!!! if someone is capturing and displaying both from the fifo, then it will have been
//consumed above by the display before we get here
//(is that even legal? i think so)
GPU_ligne_DispCapture<false>(l);
if (l == 191) { disp_fifo.head = disp_fifo.tail = 0; }
}
GPU_ligne_MasterBrightness(screen, l);
}

View File

@ -1090,15 +1090,12 @@ void FASTCALL MMU_doDMA(u32 num)
taille = (MMU.DMACrt[PROCNUM][num]&0x1FFFFF);
if(taille == 0) taille = 0x200000; //according to gbatek..
//THIS IS A BIG HACK
// If we are in "Main memory display" mode just copy an entire
// screen (256x192 pixels).
// Reference: http://nocash.emubase.de/gbatek.htm#dsvideocaptureandmainmemorydisplaymode
// (under DISP_MMEM_FIFO)
if ((MMU.DMAStartTime[PROCNUM][num]==EDMAMode_MemDisplay) && // Must be in main memory display mode
(taille==4) && // Word must be 4
(((MMU.DMACrt[PROCNUM][num]>>26)&1) == 1)) // Transfer mode must be 32bit wide
taille = 24576; //256*192/2;
//for main memory display fifo dmas, check for normal conditions and then dma all 128 bytes at once
//(theyll get sent to the fifo, which can handle more than it ought to be able to)
if ((MMU.DMAStartTime[PROCNUM][num]==EDMAMode_MemDisplay) &&
(taille==4) &&
(((MMU.DMACrt[PROCNUM][num]>>26)&1) == 1))
taille = 128;
if(MMU.DMAStartTime[PROCNUM][num] == EDMAMode_Card)
taille *= 0x80;

View File

@ -1973,7 +1973,10 @@ static void execHardware_hstart()
if(nds.VCount<192)
{
//this is hacky.
//there is a corresponding hack in doDMA
//there is a corresponding hack in doDMA.
//it should be driven by a fifo (and generate just in time as the scanline is displayed)
//but that isnt even possible until we have some sort of sub-scanline timing.
//it may not be necessary.
execHardware_doAllDma(EDMAMode_MemDisplay);
}

View File

@ -345,7 +345,6 @@ struct Shader
{
mode = (polyattr>>4)&0x3;
//if there is no texture set, then set to the mode which doesnt even use a texture
//unless we're in shadow
if(sampler.texFormat == 0 && mode != 3)
mode = 4;
}