Render3D:

- 3D renderers can now be requested to output their framebuffers in RGBA6665 (SoftRasterizer and OpenGL) or RGBA8888 (OpenGL only) color formats.
This commit is contained in:
rogerman 2016-04-09 07:47:53 +00:00
parent f036f26310
commit 9a2bc94fc9
8 changed files with 691 additions and 273 deletions

File diff suppressed because it is too large Load Diff

View File

@ -451,10 +451,12 @@ struct OGLRenderRef
GLuint vertexFramebufferOutputShaderID;
GLuint fragmentEdgeMarkShaderID;
GLuint fragmentFogShaderID;
GLuint fragmentFramebufferOutputShaderID;
GLuint fragmentFramebufferRGBA6665OutputShaderID;
GLuint fragmentFramebufferRGBA8888OutputShaderID;
GLuint programEdgeMarkID;
GLuint programFogID;
GLuint programFramebufferOutputID;
GLuint programFramebufferRGBA6665OutputID;
GLuint programFramebufferRGBA8888OutputID;
GLint uniformFramebufferSize;
GLint uniformStateToonShadingMode;
@ -572,7 +574,7 @@ private:
unsigned int versionRevision;
private:
Render3DError _FlushFramebufferConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551);
Render3DError _FlushFramebufferConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebuffer, u16 *__restrict dstRGBA5551);
protected:
// OpenGL-specific References
@ -585,6 +587,7 @@ protected:
bool isMultisampledFBOSupported;
bool isShaderSupported;
bool isVAOSupported;
bool willFlipFramebufferOnGPU;
bool willConvertFramebufferOnGPU;
// Textures
@ -595,7 +598,7 @@ protected:
size_t _currentPolyIndex;
std::vector<u8> _shadowPolyID;
Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551);
Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebuffer, u16 *__restrict dstRGBA5551);
// OpenGL-specific methods
virtual Render3DError CreateVBOs() = 0;
@ -618,7 +621,8 @@ protected:
const std::string &fogVtxShader,
const std::string &fogFragShader,
const std::string &framebufferOutputVtxShader,
const std::string &framebufferOutputFragShader) = 0;
const std::string &framebufferOutputRGBA6665FragShader,
const std::string &framebufferOutputRGBA8888FragShader) = 0;
virtual Render3DError DestroyPostprocessingPrograms() = 0;
virtual Render3DError InitEdgeMarkProgramBindings() = 0;
virtual Render3DError InitEdgeMarkProgramShaderLocations() = 0;
@ -687,7 +691,8 @@ protected:
const std::string &fogVtxShader,
const std::string &fogFragShader,
const std::string &framebufferOutputVtxShader,
const std::string &framebufferOutputFragShader);
const std::string &framebufferOutputRGBA6665FragShader,
const std::string &framebufferOutputRGBA8888FragShader);
virtual Render3DError DestroyPostprocessingPrograms();
virtual Render3DError InitEdgeMarkProgramBindings();
virtual Render3DError InitEdgeMarkProgramShaderLocations();
@ -774,7 +779,8 @@ protected:
const std::string &fogVtxShader,
const std::string &fogFragShader,
const std::string &framebufferOutputVtxShader,
const std::string &framebufferOutputFragShader);
const std::string &framebufferOutputRGBA6665FragShader,
const std::string &framebufferOutputRGBA8888FragShader);
virtual Render3DError DestroyPostprocessingPrograms();
virtual Render3DError InitEdgeMarkProgramBindings();
virtual Render3DError InitEdgeMarkProgramShaderLocations();

View File

@ -508,7 +508,8 @@ Render3DError OpenGLRenderer_3_2::InitExtensions()
// Load and create shaders. Return on any error, since v3.2 Core Profile makes shaders mandatory.
this->isShaderSupported = true;
// OpenGL v3.2 Core Profile should have all the necessary features to be able to convert the framebuffer.
// OpenGL v3.2 Core Profile should have all the necessary features to be able to flip and convert the framebuffer.
this->willFlipFramebufferOnGPU = true;
this->willConvertFramebufferOnGPU = true;
std::string vertexShaderProgram;
@ -538,6 +539,7 @@ Render3DError OpenGLRenderer_3_2::InitExtensions()
fogVtxShaderString,
fogFragShaderString,
framebufferOutputVtxShaderString,
framebufferOutputFragShaderString,
framebufferOutputFragShaderString);
if (error != OGLERROR_NOERR)
{
@ -643,9 +645,12 @@ Render3DError OpenGLRenderer_3_2::InitFogProgramShaderLocations()
Render3DError OpenGLRenderer_3_2::InitFramebufferOutputProgramBindings()
{
OGLRenderRef &OGLRef = *this->ref;
glBindAttribLocation(OGLRef.programFramebufferOutputID, OGLVertexAttributeID_Position, "inPosition");
glBindAttribLocation(OGLRef.programFramebufferOutputID, OGLVertexAttributeID_TexCoord0, "inTexCoord0");
glBindFragDataLocation(OGLRef.programFramebufferOutputID, 0, "outFragColor");
glBindAttribLocation(OGLRef.programFramebufferRGBA6665OutputID, OGLVertexAttributeID_Position, "inPosition");
glBindAttribLocation(OGLRef.programFramebufferRGBA6665OutputID, OGLVertexAttributeID_TexCoord0, "inTexCoord0");
glBindAttribLocation(OGLRef.programFramebufferRGBA8888OutputID, OGLVertexAttributeID_Position, "inPosition");
glBindAttribLocation(OGLRef.programFramebufferRGBA8888OutputID, OGLVertexAttributeID_TexCoord0, "inTexCoord0");
glBindFragDataLocation(OGLRef.programFramebufferRGBA6665OutputID, 0, "outFragColor");
glBindFragDataLocation(OGLRef.programFramebufferRGBA8888OutputID, 0, "outFragColor");
return OGLERROR_NOERR;
}
@ -654,10 +659,13 @@ Render3DError OpenGLRenderer_3_2::InitFramebufferOutputShaderLocations()
{
OGLRenderRef &OGLRef = *this->ref;
glUseProgram(OGLRef.programFramebufferOutputID);
glUseProgram(OGLRef.programFramebufferRGBA6665OutputID);
const GLint uniformTexFinalColorRGBA6665 = glGetUniformLocation(OGLRef.programFramebufferRGBA6665OutputID, "texInFragColor");
glUniform1i(uniformTexFinalColorRGBA6665, OGLTextureUnitID_FinalColor);
const GLint uniformTexFinalColor = glGetUniformLocation(OGLRef.programFramebufferOutputID, "texInFragColor");
glUniform1i(uniformTexFinalColor, OGLTextureUnitID_FinalColor);
glUseProgram(OGLRef.programFramebufferRGBA8888OutputID);
const GLint uniformTexFinalColorRGBA8888 = glGetUniformLocation(OGLRef.programFramebufferRGBA8888OutputID, "texInFragColor");
glUniform1i(uniformTexFinalColorRGBA8888, OGLTextureUnitID_FinalColor);
return OGLERROR_NOERR;
}
@ -1253,17 +1261,20 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels()
this->_mappedFramebuffer = NULL;
}
// Perform the RGBA6665 color space conversion while we're still on the GPU so
// that we can avoid having to do it on the CPU.
// Flip the framebuffer in Y to match the coordinates of OpenGL and the NDS hardware.
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, OGLRef.fboPostprocessID);
glDrawBuffer(GL_COLOR_ATTACHMENT1);
glBlitFramebuffer(0, this->_framebufferHeight, this->_framebufferWidth, 0, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboPostprocessID);
glReadBuffer(GL_COLOR_ATTACHMENT1);
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
{
// Perform the RGBA6665 color space conversion while we're still on the GPU so
// that we can avoid having to do it on the CPU.
glDrawBuffer(GL_COLOR_ATTACHMENT2);
glUseProgram(OGLRef.programFramebufferOutputID);
glUseProgram(OGLRef.programFramebufferRGBA6665OutputID);
glViewport(0, 0, this->_framebufferWidth, this->_framebufferHeight);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
@ -1273,13 +1284,14 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels()
glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboPostprocessIndexID);
glBindVertexArray(OGLRef.vaoPostprocessStatesID);
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_BYTE, 0);
glBindVertexArray(0);
// Read back the pixels.
glReadBuffer(GL_COLOR_ATTACHMENT2);
}
// Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this
// format without a performance penalty.
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_RGBA, GL_UNSIGNED_BYTE, 0);
// Set the read and draw target buffers back to color attachment 0, which is always the default.

View File

@ -298,6 +298,17 @@ CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
};
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
};
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
};

View File

@ -741,6 +741,7 @@ extern CACHE_ALIGN u8 mixTable555[32][32][32];
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];

View File

@ -1956,8 +1956,9 @@ Render3DError SoftRasterizerRenderer::EndRender(const u64 frameCount)
this->RenderEdgeMarkingAndFog(this->postprocessParam[0]);
}
FragmentColor *framebufferMain = (this->_outputFormat == NDSColorFormat_BGR888_Rev) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
this->FlushFramebuffer(this->_framebufferColor, NULL, framebufferRGBA5551);
this->FlushFramebuffer(this->_framebufferColor, framebufferMain, framebufferRGBA5551);
}
return RENDER3DERROR_NOERR;
@ -2000,8 +2001,9 @@ Render3DError SoftRasterizerRenderer::RenderFinish()
}
}
FragmentColor *framebufferMain = (this->_outputFormat == NDSColorFormat_BGR888_Rev) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
this->FlushFramebuffer(this->_framebufferColor, NULL, framebufferRGBA5551);
this->FlushFramebuffer(this->_framebufferColor, framebufferMain, framebufferRGBA5551);
return RENDER3DERROR_NOERR;
}

View File

@ -99,6 +99,8 @@ bool NDS_3D_ChangeCore(int newCore)
return result;
}
newRenderer->RequestColorFormat(GPU->GetDisplayInfo().colorFormat);
Render3DError error = newRenderer->SetFramebufferSize(GPU->GetCustomFramebufferWidth(), GPU->GetCustomFramebufferHeight());
if (error != RENDER3DERROR_NOERR)
{
@ -283,6 +285,8 @@ Render3D::Render3D()
_framebufferColorSizeBytes = 0;
_framebufferColor = NULL;
_internalRenderingFormat = NDSColorFormat_BGR666_Rev;
_outputFormat = NDSColorFormat_BGR666_Rev;
_renderNeedsFinish = false;
_willFlushFramebufferRGBA6665 = true;
_willFlushFramebufferRGBA5551 = true;
@ -344,6 +348,17 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h)
return RENDER3DERROR_NOERR;
}
NDSColorFormat Render3D::RequestColorFormat(NDSColorFormat colorFormat)
{
this->_outputFormat = (colorFormat == NDSColorFormat_BGR555_Rev) ? NDSColorFormat_BGR666_Rev : colorFormat;
return this->_outputFormat;
}
NDSColorFormat Render3D::GetColorFormat() const
{
return this->_outputFormat;
}
void Render3D::GetFramebufferFlushStates(bool &willFlushRGBA6665, bool &willFlushRGBA5551)
{
willFlushRGBA6665 = this->_willFlushFramebufferRGBA6665;
@ -565,16 +580,47 @@ Render3DError Render3D::EndRender(const u64 frameCount)
return RENDER3DERROR_NOERR;
}
Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebuffer, u16 *__restrict dstRGBA5551)
{
if ( (dstRGBA6665 == NULL) && (dstRGBA5551 == NULL) )
if ( (dstFramebuffer == NULL) && (dstRGBA5551 == NULL) )
{
return RENDER3DERROR_NOERR;
}
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
if (dstFramebuffer != NULL)
{
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
{
for (size_t i = 0; i < pixCount; i++)
{
dstFramebuffer[i].r = srcFramebuffer[i].r >> 2;
dstFramebuffer[i].g = srcFramebuffer[i].g >> 2;
dstFramebuffer[i].b = srcFramebuffer[i].b >> 2;
dstFramebuffer[i].a = srcFramebuffer[i].a >> 3;
}
}
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
{
for (size_t i = 0; i < pixCount; i++)
{
dstFramebuffer[i].r = material_6bit_to_8bit[srcFramebuffer[i].r];
dstFramebuffer[i].g = material_6bit_to_8bit[srcFramebuffer[i].g];
dstFramebuffer[i].b = material_6bit_to_8bit[srcFramebuffer[i].b];
dstFramebuffer[i].a = material_5bit_to_8bit[srcFramebuffer[i].a];
}
}
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
{
memcpy(dstFramebuffer, srcFramebuffer, pixCount * sizeof(FragmentColor));
}
}
if (dstRGBA5551 != NULL)
{
for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++)
for (size_t i = 0; i < pixCount; i++)
{
dstRGBA5551[i] = R6G6B6TORGB15(srcFramebuffer[i].r, srcFramebuffer[i].g, srcFramebuffer[i].b) | ((srcFramebuffer[i].a == 0) ? 0x0000 : 0x8000);
}
@ -766,9 +812,9 @@ Render3DError Render3D::VramReconfigureSignal()
#ifdef ENABLE_SSE2
Render3DError Render3D_SSE2::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
Render3DError Render3D_SSE2::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebuffer, u16 *__restrict dstRGBA5551)
{
if ( (dstRGBA6665 == NULL) && (dstRGBA5551 == NULL) )
if ( (dstFramebuffer == NULL) && (dstRGBA5551 == NULL) )
{
return RENDER3DERROR_NOERR;
}
@ -777,6 +823,59 @@ Render3DError Render3D_SSE2::FlushFramebuffer(const FragmentColor *__restrict sr
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
const size_t ssePixCount = pixCount - (pixCount % 4);
if (dstFramebuffer != NULL)
{
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
{
for (; i < ssePixCount; i += 4)
{
// Convert to RGBA6665
__m128i color6665 = _mm_load_si128((__m128i *)(srcFramebuffer + i));
__m128i a = _mm_srli_epi32(_mm_and_si128(color6665, _mm_set1_epi32(0xF8000000)), 3);
color6665 = _mm_srli_epi32(_mm_and_si128(color6665, _mm_set1_epi32(0x00FCFCFC)), 2);
color6665 = _mm_or_si128(color6665, a);
_mm_store_si128((__m128i *)(dstFramebuffer + i), color6665);
}
for (; i < pixCount; i++)
{
dstFramebuffer[i].r = srcFramebuffer[i].r >> 2;
dstFramebuffer[i].g = srcFramebuffer[i].g >> 2;
dstFramebuffer[i].b = srcFramebuffer[i].b >> 2;
dstFramebuffer[i].a = srcFramebuffer[i].a >> 3;
}
}
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
{
for (; i < ssePixCount; i += 4)
{
// Convert to RGBA8888:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
__m128i color8888 = _mm_load_si128((__m128i *)(srcFramebuffer + i));
__m128i a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(color8888, 3), _mm_set1_epi8(0xF8)), _mm_and_si128(_mm_srli_epi32(color8888, 2), _mm_set1_epi8(0x07)) );
color8888 = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(color8888, 2), _mm_set1_epi8(0xFC)), _mm_and_si128(_mm_srli_epi32(color8888, 4), _mm_set1_epi8(0x03)) );
color8888 = _mm_or_si128(_mm_and_si128(color8888, _mm_set1_epi32(0x00FFFFFF)), _mm_and_si128(a, _mm_set1_epi32(0xFF000000)));
_mm_store_si128((__m128i *)(dstFramebuffer + i), color8888);
}
for (; i < pixCount; i++)
{
dstFramebuffer[i].r = material_6bit_to_8bit[srcFramebuffer[i].r];
dstFramebuffer[i].g = material_6bit_to_8bit[srcFramebuffer[i].g];
dstFramebuffer[i].b = material_6bit_to_8bit[srcFramebuffer[i].b];
dstFramebuffer[i].a = material_5bit_to_8bit[srcFramebuffer[i].a];
}
}
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
{
memcpy(dstFramebuffer, srcFramebuffer, pixCount * sizeof(FragmentColor));
}
}
if (dstRGBA5551 != NULL)
{
for (; i < ssePixCount; i += 4)

View File

@ -109,6 +109,8 @@ protected:
size_t _framebufferColorSizeBytes;
FragmentColor *_framebufferColor;
NDSColorFormat _internalRenderingFormat;
NDSColorFormat _outputFormat;
bool _renderNeedsFinish;
bool _willFlushFramebufferRGBA6665;
bool _willFlushFramebufferRGBA5551;
@ -130,7 +132,7 @@ protected:
virtual Render3DError RenderEdgeMarking(const u16 *colorTable, const bool useAntialias);
virtual Render3DError RenderFog(const u8 *densityTable, const u32 color, const u32 offset, const u8 shift, const bool alphaOnly);
virtual Render3DError EndRender(const u64 frameCount);
virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551);
virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebuffer, u16 *__restrict dstRGBA5551);
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
@ -167,6 +169,9 @@ public:
virtual Render3DError SetFramebufferSize(size_t w, size_t h); // Called whenever the output framebuffer size changes.
virtual NDSColorFormat RequestColorFormat(NDSColorFormat colorFormat);
virtual NDSColorFormat GetColorFormat() const;
virtual FragmentColor* GetFramebuffer();
virtual void GetFramebufferFlushStates(bool &willFlushRGBA6665, bool &willFlushRGBA5551);
virtual void SetFramebufferFlushStates(bool willFlushRGBA6665, bool willFlushRGBA5551);
@ -182,7 +187,7 @@ public:
class Render3D_SSE2 : public Render3D
{
protected:
virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551);
virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebuffer, u16 *__restrict dstRGBA5551);
public:
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);