GPU: For native-sized captures, always read from native VRAM -- never from custom VRAM. Should make captures a little faster when running a custom framebuffer and/or using RGB888 color format.

This commit is contained in:
rogerman 2017-07-21 12:49:57 -07:00
parent 1b483d266a
commit 301c875ecb
1 changed files with 33 additions and 90 deletions

View File

@ -6127,7 +6127,9 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
//this has not been tested yet (I thought I needed it for hotel dusk, but it was fixed by the above) //this has not been tested yet (I thought I needed it for hotel dusk, but it was fixed by the above)
dstNativeOffset &= 0x0000FFFF; dstNativeOffset &= 0x0000FFFF;
const u16 *vram16 = (this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset]) ? (u16 *)MMU.blank_memory : (u16 *)GPU->GetCustomVRAMBlankBuffer(); const u16 *vramNative16 = (u16 *)MMU.blank_memory;
const u16 *vramCustom16 = (u16 *)GPU->GetCustomVRAMBlankBuffer();
const u32 *vramCustom32 = (u32 *)GPU->GetCustomVRAMBlankBuffer();
u16 *dstNative16 = this->_VRAMNativeBlockPtr[vramWriteBlock] + dstNativeOffset; u16 *dstNative16 = this->_VRAMNativeBlockPtr[vramWriteBlock] + dstNativeOffset;
bool readNativeVRAM = true; bool readNativeVRAM = true;
bool captureLineNativeState32 = newCaptureLineNativeState; bool captureLineNativeState32 = newCaptureLineNativeState;
@ -6152,16 +6154,14 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
// Convert VRAM for native VRAM capture. // Convert VRAM for native VRAM capture.
if ( (DISPCAPCNT.SrcB == 0) && (DISPCAPCNT.CaptureSrc != 0) && (vramConfiguration.banks[vramReadBlock].purpose == VramConfiguration::LCDC) ) if ( (DISPCAPCNT.SrcB == 0) && (DISPCAPCNT.CaptureSrc != 0) && (vramConfiguration.banks[vramReadBlock].purpose == VramConfiguration::LCDC) )
{
this->VerifyVRAMLineDidChange(vramReadBlock, readLineIndexWithOffset);
if (this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset])
{ {
size_t vramNativeOffset = readLineIndexWithOffset * GPU_FRAMEBUFFER_NATIVE_WIDTH; size_t vramNativeOffset = readLineIndexWithOffset * GPU_FRAMEBUFFER_NATIVE_WIDTH;
vramNativeOffset &= 0x0000FFFF; vramNativeOffset &= 0x0000FFFF;
vram16 = this->_VRAMNativeBlockPtr[vramReadBlock] + vramNativeOffset; vramNative16 = this->_VRAMNativeBlockPtr[vramReadBlock] + vramNativeOffset;
}
else this->VerifyVRAMLineDidChange(vramReadBlock, readLineIndexWithOffset);
if (!this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset])
{ {
size_t vramCustomOffset = ((this->_dispCapCnt.readOffset * _gpuCaptureLineIndex[64]) + _gpuCaptureLineIndex[l]) * dispInfo.customWidth; size_t vramCustomOffset = ((this->_dispCapCnt.readOffset * _gpuCaptureLineIndex[64]) + _gpuCaptureLineIndex[l]) * dispInfo.customWidth;
while (vramCustomOffset >= _gpuVRAMBlockOffset) while (vramCustomOffset >= _gpuVRAMBlockOffset)
@ -6173,12 +6173,11 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
{ {
case NDSColorFormat_BGR555_Rev: case NDSColorFormat_BGR555_Rev:
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
vram16 = (u16 *)this->_VRAMCustomBlockPtr[vramReadBlock] + vramCustomOffset; vramCustom16 = (u16 *)this->_VRAMCustomBlockPtr[vramReadBlock] + vramCustomOffset;
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)this->_VRAMCustomBlockPtr[vramReadBlock] + vramCustomOffset, this->_captureWorkingB16, compInfo.line.pixelCount); vramCustom32 = (u32 *)this->_VRAMCustomBlockPtr[vramReadBlock] + vramCustomOffset;
vram16 = this->_captureWorkingB16;
break; break;
} }
@ -6188,7 +6187,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
static CACHE_ALIGN u16 fifoLine16[GPU_FRAMEBUFFER_NATIVE_WIDTH]; static CACHE_ALIGN u16 fifoLine16[GPU_FRAMEBUFFER_NATIVE_WIDTH];
const u16 *srcA16 = (DISPCAPCNT.SrcA == 0) ? ((OUTPUTFORMAT != NDSColorFormat_BGR555_Rev) ? this->_captureWorkingA16 : (u16 *)compInfo.target.lineColorHead) : this->_3DFramebuffer16 + compInfo.line.blockOffsetCustom; const u16 *srcA16 = (DISPCAPCNT.SrcA == 0) ? ((OUTPUTFORMAT != NDSColorFormat_BGR555_Rev) ? this->_captureWorkingA16 : (u16 *)compInfo.target.lineColorHead) : this->_3DFramebuffer16 + compInfo.line.blockOffsetCustom;
const u16 *srcB16 = (DISPCAPCNT.SrcB == 0) ? vram16 : fifoLine16; const u16 *srcB16 = (DISPCAPCNT.SrcB == 0) ? vramNative16 : fifoLine16;
switch (DISPCAPCNT.CaptureSrc) switch (DISPCAPCNT.CaptureSrc)
{ {
@ -6238,16 +6237,8 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
switch (DISPCAPCNT.SrcB) switch (DISPCAPCNT.SrcB)
{ {
case 0: // Capture VRAM case 0: // Capture VRAM
{
if (this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset])
{ {
this->_RenderLine_DispCapture_Copy<NDSColorFormat_BGR555_Rev, 0, CAPTURELENGTH, true, true>(srcB16, dstNative16, CAPTURELENGTH, 1); this->_RenderLine_DispCapture_Copy<NDSColorFormat_BGR555_Rev, 0, CAPTURELENGTH, true, true>(srcB16, dstNative16, CAPTURELENGTH, 1);
}
else
{
this->_RenderLine_DispCapture_Copy<NDSColorFormat_BGR555_Rev, 0, CAPTURELENGTH, false, true>(srcB16, dstNative16, CAPTURELENGTH, 1);
}
newCaptureLineNativeState = this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset]; newCaptureLineNativeState = this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset];
break; break;
} }
@ -6266,29 +6257,14 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
default: // Capture source is SourceA+B blended default: // Capture source is SourceA+B blended
{ {
//INFO("Capture source is SourceA+B blended\n"); //INFO("Capture source is SourceA+B blended\n");
if (DISPCAPCNT.SrcA == 0)
{
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
{
if (this->isLineRenderNative[l])
{
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1);
}
else
{
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1);
}
newCaptureLineNativeState = false;
}
else
{
if (DISPCAPCNT.SrcB != 0) if (DISPCAPCNT.SrcB != 0)
{ {
// fifo - tested by splinter cell chaos theory thermal view // fifo - tested by splinter cell chaos theory thermal view
this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine16); this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine16);
} }
if (DISPCAPCNT.SrcA == 0)
{
if (this->isLineRenderNative[l]) if (this->isLineRenderNative[l])
{ {
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1); this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1);
@ -6298,47 +6274,18 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1); this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1);
} }
newCaptureLineNativeState = this->isLineRenderNative[l]; newCaptureLineNativeState = this->isLineRenderNative[l] && ((DISPCAPCNT.SrcB != 0) || this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset]);
}
} }
else else
{ {
if (is3DFramebufferNativeSize) if (is3DFramebufferNativeSize)
{ {
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
{
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1);
newCaptureLineNativeState = false;
}
else
{
if (DISPCAPCNT.SrcB != 0)
{
// fifo - tested by splinter cell chaos theory thermal view
this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine16);
}
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1); this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1);
newCaptureLineNativeState = true; newCaptureLineNativeState = (DISPCAPCNT.SrcB != 0) || this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset];
}
} }
else else
{ {
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
{
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1);
}
else
{
if (DISPCAPCNT.SrcB != 0)
{
// fifo - tested by splinter cell chaos theory thermal view
this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine16);
}
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1); this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA16, srcB16, dstNative16, CAPTURELENGTH, 1);
}
newCaptureLineNativeState = false; newCaptureLineNativeState = false;
} }
} }
@ -6383,7 +6330,6 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{ {
static CACHE_ALIGN FragmentColor fifoLine32[GPU_FRAMEBUFFER_NATIVE_WIDTH]; static CACHE_ALIGN FragmentColor fifoLine32[GPU_FRAMEBUFFER_NATIVE_WIDTH];
u32 *vram32 = (u32 *)GPU->GetCustomVRAMBlankBuffer();
FragmentColor *dstCustom32 = (FragmentColor *)this->_VRAMCustomBlockPtr[vramWriteBlock] + dstCustomOffset; FragmentColor *dstCustom32 = (FragmentColor *)this->_VRAMCustomBlockPtr[vramWriteBlock] + dstCustomOffset;
bool isLineCaptureNative32 = ( (vramWriteBlock == vramReadBlock) && (writeLineIndexWithOffset == readLineIndexWithOffset) ) ? captureLineNativeState32 : this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset]; bool isLineCaptureNative32 = ( (vramWriteBlock == vramReadBlock) && (writeLineIndexWithOffset == readLineIndexWithOffset) ) ? captureLineNativeState32 : this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset];
@ -6394,22 +6340,14 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
if ( (DISPCAPCNT.SrcB == 0) && (DISPCAPCNT.CaptureSrc != 0) && (vramConfiguration.banks[vramReadBlock].purpose == VramConfiguration::LCDC) ) if ( (DISPCAPCNT.SrcB == 0) && (DISPCAPCNT.CaptureSrc != 0) && (vramConfiguration.banks[vramReadBlock].purpose == VramConfiguration::LCDC) )
{ {
size_t vramCustomOffset = ((this->_dispCapCnt.readOffset * _gpuCaptureLineIndex[64]) + _gpuCaptureLineIndex[l]) * dispInfo.customWidth;
while (vramCustomOffset >= _gpuVRAMBlockOffset)
{
vramCustomOffset -= _gpuVRAMBlockOffset;
}
vram32 = (u32 *)this->_VRAMCustomBlockPtr[vramReadBlock] + vramCustomOffset;
if (readNativeVRAM) if (readNativeVRAM)
{ {
ColorspaceConvertBuffer555To8888Opaque<false, false>(vram16, vram32, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555To8888Opaque<false, false>(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
} }
const u32 *srcA32 = (DISPCAPCNT.SrcA == 0) ? (u32 *)compInfo.target.lineColorHead : (u32 *)CurrentRenderer->GetFramebuffer() + compInfo.line.blockOffsetCustom; const u32 *srcA32 = (DISPCAPCNT.SrcA == 0) ? (u32 *)compInfo.target.lineColorHead : (u32 *)CurrentRenderer->GetFramebuffer() + compInfo.line.blockOffsetCustom;
const u32 *srcB32 = (DISPCAPCNT.SrcB == 0) ? vram32 : (u32 *)fifoLine32; const u32 *srcB32 = (DISPCAPCNT.SrcB == 0) ? vramCustom32 : (u32 *)fifoLine32;
switch (DISPCAPCNT.CaptureSrc) switch (DISPCAPCNT.CaptureSrc)
{ {
@ -6507,6 +6445,11 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
} }
else else
{ {
if (!this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] && (DISPCAPCNT.SrcB == 0))
{
srcB16 = vramCustom16;
}
u16 *dstCustom16 = (u16 *)this->_VRAMCustomBlockPtr[vramWriteBlock] + dstCustomOffset; u16 *dstCustom16 = (u16 *)this->_VRAMCustomBlockPtr[vramWriteBlock] + dstCustomOffset;
switch (DISPCAPCNT.CaptureSrc) switch (DISPCAPCNT.CaptureSrc)