Render3D:
- Provide a means of specifying which specific framebuffers need to be flushed for each frame.
This commit is contained in:
parent
ed1879dc8f
commit
f68df5b976
|
@ -4089,6 +4089,18 @@ void GPUEngineA::SetCustomFramebufferSize(size_t w, size_t h)
|
||||||
free_aligned(oldColorRGBA5551Buffer);
|
free_aligned(oldColorRGBA5551Buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool GPUEngineA::Is3DRenderedLayerNeeded()
|
||||||
|
{
|
||||||
|
return ( this->_enableLayer[GPULayerID_BG0] && (this->_IORegisterMap->DISPCNT.BG0_3D != 0) );
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GPUEngineA::Is3DCapturingNeeded()
|
||||||
|
{
|
||||||
|
const IOREG_DISPCAPCNT &DISPCAPCNT = this->_IORegisterMap->DISPCAPCNT;
|
||||||
|
return ( (DISPCAPCNT.CaptureEnable != 0) && (vramConfiguration.banks[DISPCAPCNT.VRAMWriteBlock].purpose == VramConfiguration::LCDC) && (DISPCAPCNT.SrcA != 0) );
|
||||||
|
}
|
||||||
|
|
||||||
template<bool ISCUSTOMRENDERINGNEEDED>
|
template<bool ISCUSTOMRENDERINGNEEDED>
|
||||||
void GPUEngineA::RenderLine(const u16 l)
|
void GPUEngineA::RenderLine(const u16 l)
|
||||||
{
|
{
|
||||||
|
@ -5681,7 +5693,10 @@ void GPUSubsystem::RenderLine(const u16 l, bool isFrameSkipRequested)
|
||||||
{
|
{
|
||||||
if (l == 0)
|
if (l == 0)
|
||||||
{
|
{
|
||||||
|
CurrentRenderer->SetFramebufferFlushStates(this->_engineMain->Is3DRenderedLayerNeeded(), this->_engineMain->Is3DCapturingNeeded());
|
||||||
CurrentRenderer->RenderFinish();
|
CurrentRenderer->RenderFinish();
|
||||||
|
CurrentRenderer->SetFramebufferFlushStates(true, true);
|
||||||
|
|
||||||
this->_event->DidFrameBegin();
|
this->_event->DidFrameBegin();
|
||||||
this->UpdateVRAM3DUsageProperties();
|
this->UpdateVRAM3DUsageProperties();
|
||||||
|
|
||||||
|
|
|
@ -1404,6 +1404,9 @@ public:
|
||||||
u16* Get3DFramebufferRGBA5551() const;
|
u16* Get3DFramebufferRGBA5551() const;
|
||||||
virtual void SetCustomFramebufferSize(size_t w, size_t h);
|
virtual void SetCustomFramebufferSize(size_t w, size_t h);
|
||||||
|
|
||||||
|
bool Is3DRenderedLayerNeeded();
|
||||||
|
bool Is3DCapturingNeeded();
|
||||||
|
|
||||||
template<bool ISCUSTOMRENDERINGNEEDED> void RenderLine(const u16 l);
|
template<bool ISCUSTOMRENDERINGNEEDED> void RenderLine(const u16 l);
|
||||||
void FramebufferPostprocess();
|
void FramebufferPostprocess();
|
||||||
};
|
};
|
||||||
|
|
|
@ -895,7 +895,7 @@ void OpenGLRenderer::SetVersion(unsigned int major, unsigned int minor, unsigned
|
||||||
|
|
||||||
Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict srcRGBA8888, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict srcRGBA8888, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
||||||
{
|
{
|
||||||
if (srcRGBA8888 == NULL)
|
if ( srcRGBA8888 == NULL || ((dstRGBA6665 == NULL) && (dstRGBA5551 == NULL)) )
|
||||||
{
|
{
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
@ -907,6 +907,8 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s
|
||||||
const size_t pixCount = this->_framebufferWidth;
|
const size_t pixCount = this->_framebufferWidth;
|
||||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||||
|
|
||||||
|
if ( (dstRGBA6665 != NULL) && (dstRGBA5551 != NULL) )
|
||||||
|
{
|
||||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||||
{
|
{
|
||||||
size_t x = 0;
|
size_t x = 0;
|
||||||
|
@ -914,8 +916,10 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s
|
||||||
#if defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
#if defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||||
{
|
{
|
||||||
|
__m128i color;
|
||||||
|
|
||||||
// Convert to RGBA6665
|
// Convert to RGBA6665
|
||||||
__m128i color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
||||||
color = _mm_srli_epi32(color, 2);
|
color = _mm_srli_epi32(color, 2);
|
||||||
|
|
||||||
__m128i a = _mm_srli_epi32(color, 1); // Special handling for 5-bit alpha
|
__m128i a = _mm_srli_epi32(color, 1); // Special handling for 5-bit alpha
|
||||||
|
@ -972,6 +976,85 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else if (dstRGBA6665 != NULL)
|
||||||
|
{
|
||||||
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||||
|
{
|
||||||
|
size_t x = 0;
|
||||||
|
|
||||||
|
#if defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||||
|
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||||
|
{
|
||||||
|
__m128i color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
||||||
|
color = _mm_srli_epi32(color, 2);
|
||||||
|
|
||||||
|
__m128i a = _mm_srli_epi32(color, 1); // Special handling for 5-bit alpha
|
||||||
|
a = _mm_and_si128(a, _mm_set1_epi32(0x1F000000));
|
||||||
|
|
||||||
|
color = _mm_and_si128(color, _mm_set1_epi32(0x003F3F3F));
|
||||||
|
color = _mm_or_si128(color, a);
|
||||||
|
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA
|
||||||
|
_mm_store_si128((__m128i *)(dstRGBA6665 + iw), color);
|
||||||
|
}
|
||||||
|
#endif // defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||||
|
|
||||||
|
for (; x < pixCount; x++, ir++, iw++)
|
||||||
|
{
|
||||||
|
#ifdef LOCAL_BE
|
||||||
|
dstRGBA6665[iw].color = BGRA8888_32_To_RGBA6665_32(srcRGBA8888[ir].color);
|
||||||
|
#else
|
||||||
|
dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcRGBA8888[ir].color);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||||
|
{
|
||||||
|
size_t x = 0;
|
||||||
|
|
||||||
|
#if defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||||
|
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||||
|
{
|
||||||
|
__m128i color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
||||||
|
|
||||||
|
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x000000F8)); // Read from R
|
||||||
|
b = _mm_slli_epi32(b, 7); // Shift to B
|
||||||
|
|
||||||
|
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x0000F800)); // Read from G
|
||||||
|
g = _mm_srli_epi32(g, 6); // Shift in G
|
||||||
|
|
||||||
|
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x00F80000)); // Read from B
|
||||||
|
r = _mm_srli_epi32(r, 19); // Shift to R
|
||||||
|
|
||||||
|
__m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||||
|
a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A
|
||||||
|
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A
|
||||||
|
|
||||||
|
color = _mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a);
|
||||||
|
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
|
||||||
|
_mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), color);
|
||||||
|
}
|
||||||
|
#endif // defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||||
|
|
||||||
|
for (; x < pixCount; x++, ir++, iw++)
|
||||||
|
{
|
||||||
|
#ifdef LOCAL_BE
|
||||||
|
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
|
||||||
|
(srcRGBA8888[ir].g >> 3) & 0x1F,
|
||||||
|
(srcRGBA8888[ir].r >> 3) & 0x1F) |
|
||||||
|
((this->_framebufferColor[ir].a == 0) ? 0x0000 : 0x8000);
|
||||||
|
#else
|
||||||
|
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
|
||||||
|
(srcRGBA8888[ir].g >> 3) & 0x1F,
|
||||||
|
(srcRGBA8888[ir].r >> 3) & 0x1F) |
|
||||||
|
((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
@ -2736,6 +2819,11 @@ Render3DError OpenGLRenderer_1_2::RenderFinish()
|
||||||
return OGLERROR_NOERR;
|
return OGLERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||||
|
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||||
|
|
||||||
|
if ( (framebufferRGBA6665 != NULL) || (framebufferRGBA5551 != NULL) )
|
||||||
|
{
|
||||||
if(!BEGINGL())
|
if(!BEGINGL())
|
||||||
{
|
{
|
||||||
GPU->GetEventHandler()->DidRender3DEnd();
|
GPU->GetEventHandler()->DidRender3DEnd();
|
||||||
|
@ -2747,17 +2835,18 @@ Render3DError OpenGLRenderer_1_2::RenderFinish()
|
||||||
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);
|
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);
|
||||||
if (mappedBufferPtr != NULL)
|
if (mappedBufferPtr != NULL)
|
||||||
{
|
{
|
||||||
this->FlushFramebuffer(mappedBufferPtr, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
this->FlushFramebuffer(mappedBufferPtr, framebufferRGBA6665, framebufferRGBA5551);
|
||||||
glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB);
|
glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, this->_framebufferColor);
|
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, this->_framebufferColor);
|
||||||
this->FlushFramebuffer(this->_framebufferColor, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
this->FlushFramebuffer(this->_framebufferColor, framebufferRGBA6665, framebufferRGBA5551);
|
||||||
}
|
}
|
||||||
|
|
||||||
ENDGL();
|
ENDGL();
|
||||||
|
}
|
||||||
|
|
||||||
this->_pixelReadNeedsFinish = false;
|
this->_pixelReadNeedsFinish = false;
|
||||||
|
|
||||||
|
@ -3314,6 +3403,11 @@ Render3DError OpenGLRenderer_1_5::RenderFinish()
|
||||||
return OGLERROR_NOERR;
|
return OGLERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||||
|
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||||
|
|
||||||
|
if ( (framebufferRGBA6665 != NULL) || (framebufferRGBA5551 != NULL) )
|
||||||
|
{
|
||||||
if(!BEGINGL())
|
if(!BEGINGL())
|
||||||
{
|
{
|
||||||
GPU->GetEventHandler()->DidRender3DEnd();
|
GPU->GetEventHandler()->DidRender3DEnd();
|
||||||
|
@ -3325,17 +3419,18 @@ Render3DError OpenGLRenderer_1_5::RenderFinish()
|
||||||
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY);
|
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY);
|
||||||
if (mappedBufferPtr != NULL)
|
if (mappedBufferPtr != NULL)
|
||||||
{
|
{
|
||||||
this->FlushFramebuffer(mappedBufferPtr, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
this->FlushFramebuffer(mappedBufferPtr, framebufferRGBA6665, framebufferRGBA5551);
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER_ARB);
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER_ARB);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, this->_framebufferColor);
|
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, this->_framebufferColor);
|
||||||
this->FlushFramebuffer(this->_framebufferColor, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
this->FlushFramebuffer(this->_framebufferColor, framebufferRGBA6665, framebufferRGBA5551);
|
||||||
}
|
}
|
||||||
|
|
||||||
ENDGL();
|
ENDGL();
|
||||||
|
}
|
||||||
|
|
||||||
this->_pixelReadNeedsFinish = false;
|
this->_pixelReadNeedsFinish = false;
|
||||||
|
|
||||||
|
@ -4107,6 +4202,11 @@ Render3DError OpenGLRenderer_2_1::RenderFinish()
|
||||||
return OGLERROR_NOERR;
|
return OGLERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||||
|
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||||
|
|
||||||
|
if ( (framebufferRGBA6665 != NULL) || (framebufferRGBA5551 != NULL) )
|
||||||
|
{
|
||||||
if(!BEGINGL())
|
if(!BEGINGL())
|
||||||
{
|
{
|
||||||
GPU->GetEventHandler()->DidRender3DEnd();
|
GPU->GetEventHandler()->DidRender3DEnd();
|
||||||
|
@ -4116,11 +4216,12 @@ Render3DError OpenGLRenderer_2_1::RenderFinish()
|
||||||
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
|
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
|
||||||
if (mappedBufferPtr != NULL)
|
if (mappedBufferPtr != NULL)
|
||||||
{
|
{
|
||||||
this->FlushFramebuffer(mappedBufferPtr, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
this->FlushFramebuffer(mappedBufferPtr, framebufferRGBA6665, framebufferRGBA5551);
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
}
|
}
|
||||||
|
|
||||||
ENDGL();
|
ENDGL();
|
||||||
|
}
|
||||||
|
|
||||||
this->_pixelReadNeedsFinish = false;
|
this->_pixelReadNeedsFinish = false;
|
||||||
|
|
||||||
|
|
|
@ -1961,7 +1961,9 @@ Render3DError SoftRasterizerRenderer::EndRender(const u64 frameCount)
|
||||||
this->RenderEdgeMarkingAndFog(this->postprocessParam[0]);
|
this->RenderEdgeMarkingAndFog(this->postprocessParam[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->FlushFramebuffer(GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||||
|
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||||
|
this->FlushFramebuffer(framebufferRGBA6665, framebufferRGBA5551);
|
||||||
}
|
}
|
||||||
|
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
|
@ -2005,7 +2007,9 @@ Render3DError SoftRasterizerRenderer::RenderFinish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this->FlushFramebuffer(GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||||
|
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||||
|
this->FlushFramebuffer(framebufferRGBA6665, framebufferRGBA5551);
|
||||||
|
|
||||||
GPU->GetEventHandler()->DidRender3DEnd();
|
GPU->GetEventHandler()->DidRender3DEnd();
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
|
|
|
@ -228,6 +228,9 @@ Render3D::Render3D()
|
||||||
_framebufferColorSizeBytes = 0;
|
_framebufferColorSizeBytes = 0;
|
||||||
_framebufferColor = NULL;
|
_framebufferColor = NULL;
|
||||||
|
|
||||||
|
_willFlushFramebufferRGBA6665 = true;
|
||||||
|
_willFlushFramebufferRGBA5551 = true;
|
||||||
|
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,6 +285,18 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h)
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Render3D::GetFramebufferFlushStates(bool &willFlushRGBA6665, bool &willFlushRGBA5551)
|
||||||
|
{
|
||||||
|
willFlushRGBA6665 = this->_willFlushFramebufferRGBA6665;
|
||||||
|
willFlushRGBA5551 = this->_willFlushFramebufferRGBA5551;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Render3D::SetFramebufferFlushStates(bool willFlushRGBA6665, bool willFlushRGBA5551)
|
||||||
|
{
|
||||||
|
this->_willFlushFramebufferRGBA6665 = willFlushRGBA6665;
|
||||||
|
this->_willFlushFramebufferRGBA5551 = willFlushRGBA5551;
|
||||||
|
}
|
||||||
|
|
||||||
Render3DError Render3D::BeginRender(const GFX3D &engine)
|
Render3DError Render3D::BeginRender(const GFX3D &engine)
|
||||||
{
|
{
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
|
@ -309,13 +324,18 @@ Render3DError Render3D::EndRender(const u64 frameCount)
|
||||||
|
|
||||||
Render3DError Render3D::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
Render3DError Render3D::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
||||||
{
|
{
|
||||||
|
if (dstRGBA6665 != NULL)
|
||||||
|
{
|
||||||
memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes);
|
memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||||
|
}
|
||||||
|
|
||||||
// Convert to RGBA5551
|
if (dstRGBA5551 != NULL)
|
||||||
|
{
|
||||||
for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++)
|
for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++)
|
||||||
{
|
{
|
||||||
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
@ -453,6 +473,9 @@ Render3DError Render3D::Reset()
|
||||||
memset(this->clearImagePolyIDBuffer, 0, sizeof(this->clearImagePolyIDBuffer));
|
memset(this->clearImagePolyIDBuffer, 0, sizeof(this->clearImagePolyIDBuffer));
|
||||||
memset(this->clearImageFogBuffer, 0, sizeof(this->clearImageFogBuffer));
|
memset(this->clearImageFogBuffer, 0, sizeof(this->clearImageFogBuffer));
|
||||||
|
|
||||||
|
this->_willFlushFramebufferRGBA6665 = true;
|
||||||
|
this->_willFlushFramebufferRGBA5551 = true;
|
||||||
|
|
||||||
TexCache_Reset();
|
TexCache_Reset();
|
||||||
|
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
|
@ -505,12 +528,19 @@ Render3DError Render3D::VramReconfigureSignal()
|
||||||
|
|
||||||
Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
||||||
{
|
{
|
||||||
|
if ( (dstRGBA6665 == NULL) && (dstRGBA5551 == NULL) )
|
||||||
|
{
|
||||||
|
return RENDER3DERROR_NOERR;
|
||||||
|
}
|
||||||
|
|
||||||
const __m128i zero_vec128 = _mm_setzero_si128();
|
const __m128i zero_vec128 = _mm_setzero_si128();
|
||||||
|
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
||||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||||
|
|
||||||
|
if ( (dstRGBA6665 != NULL) && (dstRGBA5551 != NULL) )
|
||||||
|
{
|
||||||
for (; i < ssePixCount; i += 4)
|
for (; i < ssePixCount; i += 4)
|
||||||
{
|
{
|
||||||
// Copy the framebufferColor buffer
|
// Copy the framebufferColor buffer
|
||||||
|
@ -559,6 +589,58 @@ Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6
|
||||||
dstRGBA6665[i] = this->_framebufferColor[i];
|
dstRGBA6665[i] = this->_framebufferColor[i];
|
||||||
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else if (dstRGBA6665 != NULL)
|
||||||
|
{
|
||||||
|
memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (; i < ssePixCount; i += 4)
|
||||||
|
{
|
||||||
|
// Convert to RGBA5551
|
||||||
|
__m128i color = _mm_load_si128((__m128i *)(this->_framebufferColor + i));
|
||||||
|
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x0000003E)); // Read from R
|
||||||
|
r = _mm_srli_epi32(r, 1); // Shift to R
|
||||||
|
|
||||||
|
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x00003E00)); // Read from G
|
||||||
|
g = _mm_srli_epi32(g, 4); // Shift in G
|
||||||
|
|
||||||
|
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x003E0000)); // Read from B
|
||||||
|
b = _mm_srli_epi32(b, 7); // Shift to B
|
||||||
|
|
||||||
|
__m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||||
|
a = _mm_cmpeq_epi32(a, zero_vec128); // Determine A
|
||||||
|
|
||||||
|
// From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned
|
||||||
|
// 16-bit. Since SSE2 only has packssdw (signed saturated 16-bit pack), using
|
||||||
|
// packssdw on the alpha bit (0x8000) will result in a value of 0x7FFF, which is
|
||||||
|
// incorrect. Now if we were to use SSE4.1's packusdw (unsigned saturated 16-bit
|
||||||
|
// pack), we wouldn't have to go through this hassle. But not everyone has an
|
||||||
|
// SSE4.1-capable CPU, so doing this the SSE2 way is more guaranteed to work for
|
||||||
|
// everyone's CPU.
|
||||||
|
//
|
||||||
|
// To use packssdw, we take a bit one position lower for the alpha bit, run
|
||||||
|
// packssdw, then shift the bit back to its original position. Then we por the
|
||||||
|
// alpha vector with the post-packed color vector to get the final color.
|
||||||
|
|
||||||
|
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000)); // Mask out the bit before A
|
||||||
|
a = _mm_packs_epi32(a, zero_vec128); // Pack 32-bit down to 16-bit
|
||||||
|
a = _mm_slli_epi16(a, 1); // Shift the A bit back to where it needs to be
|
||||||
|
|
||||||
|
// Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in.
|
||||||
|
color = _mm_or_si128(_mm_or_si128(r, g), b);
|
||||||
|
color = _mm_packs_epi32(color, zero_vec128);
|
||||||
|
color = _mm_or_si128(color, a);
|
||||||
|
|
||||||
|
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < pixCount; i++)
|
||||||
|
{
|
||||||
|
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
|
|
@ -109,6 +109,9 @@ protected:
|
||||||
size_t _framebufferColorSizeBytes;
|
size_t _framebufferColorSizeBytes;
|
||||||
FragmentColor *_framebufferColor;
|
FragmentColor *_framebufferColor;
|
||||||
|
|
||||||
|
bool _willFlushFramebufferRGBA6665;
|
||||||
|
bool _willFlushFramebufferRGBA5551;
|
||||||
|
|
||||||
CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||||
CACHE_ALIGN u32 clearImageDepthBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
CACHE_ALIGN u32 clearImageDepthBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||||
CACHE_ALIGN u8 clearImageFogBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
CACHE_ALIGN u8 clearImageFogBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||||
|
@ -155,6 +158,9 @@ public:
|
||||||
virtual Render3DError VramReconfigureSignal(); // Called when the emulator reconfigures its VRAM. You may need to invalidate your texture cache.
|
virtual Render3DError VramReconfigureSignal(); // Called when the emulator reconfigures its VRAM. You may need to invalidate your texture cache.
|
||||||
|
|
||||||
virtual Render3DError SetFramebufferSize(size_t w, size_t h); // Called whenever the output framebuffer size changes.
|
virtual Render3DError SetFramebufferSize(size_t w, size_t h); // Called whenever the output framebuffer size changes.
|
||||||
|
|
||||||
|
virtual void GetFramebufferFlushStates(bool &willFlushRGBA6665, bool &willFlushRGBA5551);
|
||||||
|
virtual void SetFramebufferFlushStates(bool willFlushRGBA6665, bool willFlushRGBA5551);
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef ENABLE_SSE2
|
#ifdef ENABLE_SSE2
|
||||||
|
|
Loading…
Reference in New Issue