Render3D: Small optimization to scrolling clear images on non-SSE2 systems.
- Also fix a depth bug for scrolling clear images on SSE2 systems by disabling the SSE2-specific code. This issue will need to be researched at a later date.
This commit is contained in:
parent
1767651b19
commit
7c80205a40
|
@ -488,6 +488,75 @@ Render3DError Render3D::UpdateToonTable(const u16 *toonTableBuffer)
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
template <bool ISCOLORBLANK, bool ISDEPTHBLANK>
|
||||
void Render3D::_ClearImageScrolledLoop(const u8 xScroll, const u8 yScroll, const u16 *__restrict inColor16, const u16 *__restrict inDepth16, const u8 inPolyID,
|
||||
u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog, u8 *__restrict outPolyID)
|
||||
{
|
||||
if (ISCOLORBLANK && ISDEPTHBLANK)
|
||||
{
|
||||
memset(outColor16, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16));
|
||||
memset(outDepth24, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u32));
|
||||
memset(outFog, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u8));
|
||||
memset(outPolyID, inPolyID, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u8));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ISCOLORBLANK)
|
||||
{
|
||||
// Hint to when the clear color image pointer is pointing to blank memory.
|
||||
// In this case, just do a simple zero fill for speed.
|
||||
//
|
||||
// Test cases:
|
||||
// - Sonic Chronicles: The Dark Brotherhood
|
||||
// - The Chronicles of Narnia: The Lion, the Witch and the Wardrobe
|
||||
memset(outColor16, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16));
|
||||
}
|
||||
|
||||
if (ISDEPTHBLANK)
|
||||
{
|
||||
memset(outDepth24, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u32));
|
||||
memset(outFog, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u8));
|
||||
}
|
||||
|
||||
for (size_t dstIndex = 0, iy = 0; iy < GPU_FRAMEBUFFER_NATIVE_HEIGHT; iy++)
|
||||
{
|
||||
const size_t y = ((iy + yScroll) & 0xFF) << 8;
|
||||
|
||||
for (size_t ix = 0; ix < GPU_FRAMEBUFFER_NATIVE_WIDTH; dstIndex++, ix++)
|
||||
{
|
||||
const size_t x = (ix + xScroll) & 0xFF;
|
||||
const size_t srcIndex = y | x;
|
||||
|
||||
// Clear image color buffer in RGBA5551 format.
|
||||
//
|
||||
// Test cases:
|
||||
// - Harry Potter and the Order of Phoenix
|
||||
// - Blazer Drive
|
||||
if (!ISCOLORBLANK)
|
||||
{
|
||||
outColor16[dstIndex] = inColor16[srcIndex];
|
||||
}
|
||||
|
||||
// Clear image depth buffer, where the first 15 bits are converted to
|
||||
// 24-bit depth, and the remaining MSB is the fog flag.
|
||||
//
|
||||
// Test cases:
|
||||
// - Harry Potter and the Order of Phoenix
|
||||
// - Blazer Drive
|
||||
// - Sonic Chronicles: The Dark Brotherhood
|
||||
// - The Chronicles of Narnia: The Lion, the Witch and the Wardrobe
|
||||
if (!ISDEPTHBLANK)
|
||||
{
|
||||
outDepth24[dstIndex] = DS_DEPTH15TO24(inDepth16[srcIndex]);
|
||||
outFog[dstIndex] = BIT15(inDepth16[srcIndex]);
|
||||
}
|
||||
|
||||
outPolyID[dstIndex] = inPolyID;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Render3DError Render3D::ClearFramebuffer(const GFX3D_State &renderState)
|
||||
{
|
||||
Render3DError error = RENDER3DERROR_NOERR;
|
||||
|
@ -530,27 +599,28 @@ Render3DError Render3D::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
}
|
||||
else
|
||||
{
|
||||
for (size_t dstIndex = 0, iy = 0; iy < GPU_FRAMEBUFFER_NATIVE_HEIGHT; iy++)
|
||||
const bool isClearColorBlank = (clearColorBuffer >= (u16 *)MMU.blank_memory);
|
||||
const bool isClearDepthBlank = (clearDepthBuffer >= (u16 *)MMU.blank_memory);
|
||||
|
||||
if (!isClearColorBlank && !isClearDepthBlank)
|
||||
{
|
||||
const size_t y = ((iy + yScroll) & 0xFF) << 8;
|
||||
|
||||
for (size_t ix = 0; ix < GPU_FRAMEBUFFER_NATIVE_WIDTH; dstIndex++, ix++)
|
||||
{
|
||||
const size_t x = (ix + xScroll) & 0xFF;
|
||||
const size_t srcIndex = y | x;
|
||||
|
||||
//this is tested by harry potter and the order of the phoenix.
|
||||
//TODO (optimization) dont do this if we are mapped to blank memory (such as in sonic chronicles)
|
||||
//(or use a special zero fill in the bulk clearing above)
|
||||
this->clearImageColor16Buffer[dstIndex] = clearColorBuffer[srcIndex];
|
||||
|
||||
//this is tested quite well in the sonic chronicles main map mode
|
||||
//where depth values are used for trees etc you can walk behind
|
||||
this->clearImageDepthBuffer[dstIndex] = DS_DEPTH15TO24(clearDepthBuffer[srcIndex]);
|
||||
|
||||
this->clearImageFogBuffer[dstIndex] = BIT15(clearDepthBuffer[srcIndex]);
|
||||
this->clearImagePolyIDBuffer[dstIndex] = clearFragment.opaquePolyID;
|
||||
}
|
||||
this->_ClearImageScrolledLoop<false, false>(xScroll, yScroll, clearColorBuffer, clearDepthBuffer, clearFragment.opaquePolyID,
|
||||
this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
}
|
||||
else if (isClearColorBlank)
|
||||
{
|
||||
this->_ClearImageScrolledLoop< true, false>(xScroll, yScroll, clearColorBuffer, clearDepthBuffer, clearFragment.opaquePolyID,
|
||||
this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
}
|
||||
else if (isClearDepthBlank)
|
||||
{
|
||||
this->_ClearImageScrolledLoop<false, true>(xScroll, yScroll, clearColorBuffer, clearDepthBuffer, clearFragment.opaquePolyID,
|
||||
this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->_ClearImageScrolledLoop< true, true>(xScroll, yScroll, clearColorBuffer, clearDepthBuffer, clearFragment.opaquePolyID,
|
||||
this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -758,6 +828,11 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
}
|
||||
else
|
||||
{
|
||||
// FIXME: Fix SSE2 support for scrolled clear images.
|
||||
// The depth-related code below doesn't actually work, and I don't know why
|
||||
// this is, so just use the scalar version for now.
|
||||
// - rogerman, 2018/09/19
|
||||
/*
|
||||
const size_t shiftCount = xScroll & 0x07;
|
||||
|
||||
for (size_t dstIndex = 0, iy = 0; iy < GPU_FRAMEBUFFER_NATIVE_HEIGHT; iy++)
|
||||
|
@ -850,6 +925,30 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
_mm_storel_epi64((__m128i *)(this->clearImagePolyIDBuffer + dstIndex), opaquePolyID_vec128);
|
||||
}
|
||||
}
|
||||
*/
|
||||
const bool isClearColorBlank = (clearColorBuffer >= (u16 *)MMU.blank_memory);
|
||||
const bool isClearDepthBlank = (clearDepthBuffer >= (u16 *)MMU.blank_memory);
|
||||
|
||||
if (!isClearColorBlank && !isClearDepthBlank)
|
||||
{
|
||||
this->_ClearImageScrolledLoop<false, false>(xScroll, yScroll, clearColorBuffer, clearDepthBuffer, clearFragment.opaquePolyID,
|
||||
this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
}
|
||||
else if (isClearColorBlank)
|
||||
{
|
||||
this->_ClearImageScrolledLoop< true, false>(xScroll, yScroll, clearColorBuffer, clearDepthBuffer, clearFragment.opaquePolyID,
|
||||
this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
}
|
||||
else if (isClearDepthBlank)
|
||||
{
|
||||
this->_ClearImageScrolledLoop<false, true>(xScroll, yScroll, clearColorBuffer, clearDepthBuffer, clearFragment.opaquePolyID,
|
||||
this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->_ClearImageScrolledLoop< true, true>(xScroll, yScroll, clearColorBuffer, clearDepthBuffer, clearFragment.opaquePolyID,
|
||||
this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
error = this->ClearUsingImage(this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
|
|
|
@ -183,7 +183,11 @@ protected:
|
|||
CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u32 clearImageDepthBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u8 clearImageFogBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u8 clearImagePolyIDBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u8 clearImagePolyIDBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
|
||||
template<bool ISCOLORBLANK, bool ISDEPTHBLANK> void _ClearImageScrolledLoop(const u8 xScroll, const u8 yScroll, const u16 *__restrict inColor16, const u16 *__restrict inDepth16, const u8 inPolyID,
|
||||
u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog, u8 *__restrict outPolyID);
|
||||
|
||||
|
||||
virtual Render3DError BeginRender(const GFX3D &engine);
|
||||
virtual Render3DError RenderGeometry(const GFX3D_State &renderState, const POLYLIST *polyList, const INDEXLIST *indexList);
|
||||
|
|
Loading…
Reference in New Issue