GFX3D / SoftRasterizer: Small refactor to use some of the new data types introduced in commit cda8cb5
.
- Also make SoftRasterizer's framebuffer out-of-bounds width check more robust.
This commit is contained in:
parent
90c8411cbf
commit
111292ff15
|
@ -1236,35 +1236,31 @@ void NDSGeometryEngine::SetNormal(const u32 param)
|
|||
MatrixMultVec3x3(_mtxCurrent[MATRIXMODE_POSITION_VECTOR], normalTransformed.vec);
|
||||
|
||||
//apply lighting model
|
||||
const s32 diffuse32[3] = {
|
||||
const Color3s32 diffuse = {
|
||||
(s32)( this->_regDiffuse & 0x001F),
|
||||
(s32)((this->_regDiffuse >> 5) & 0x001F),
|
||||
(s32)((this->_regDiffuse >> 10) & 0x001F)
|
||||
};
|
||||
|
||||
const s32 ambient32[3] = {
|
||||
const Color3s32 ambient = {
|
||||
(s32)( this->_regAmbient & 0x001F),
|
||||
(s32)((this->_regAmbient >> 5) & 0x001F),
|
||||
(s32)((this->_regAmbient >> 10) & 0x001F)
|
||||
};
|
||||
|
||||
const s32 emission32[3] = {
|
||||
const Color3s32 emission = {
|
||||
(s32)( this->_regEmission & 0x001F),
|
||||
(s32)((this->_regEmission >> 5) & 0x001F),
|
||||
(s32)((this->_regEmission >> 10) & 0x001F)
|
||||
};
|
||||
|
||||
const s32 specular32[3] = {
|
||||
const Color3s32 specular = {
|
||||
(s32)( this->_regSpecular & 0x001F),
|
||||
(s32)((this->_regSpecular >> 5) & 0x001F),
|
||||
(s32)((this->_regSpecular >> 10) & 0x001F)
|
||||
};
|
||||
|
||||
s32 vertexColor[3] = {
|
||||
emission32[0],
|
||||
emission32[1],
|
||||
emission32[2]
|
||||
};
|
||||
Color3s32 vertexColor = emission;
|
||||
|
||||
const u8 lightMask = gfx3d.regPolyAttrApplied.LightMask;
|
||||
|
||||
|
@ -1275,7 +1271,7 @@ void NDSGeometryEngine::SetNormal(const u32 param)
|
|||
continue;
|
||||
}
|
||||
|
||||
const s32 lightColor32[3] = {
|
||||
const Color3s32 lightColor = {
|
||||
(s32)( this->_regLightColor[i] & 0x0000001F),
|
||||
(s32)((this->_regLightColor[i] >> 5) & 0x0000001F),
|
||||
(s32)((this->_regLightColor[i] >> 10) & 0x0000001F)
|
||||
|
@ -1317,17 +1313,17 @@ void NDSGeometryEngine::SetNormal(const u32 param)
|
|||
|
||||
for (size_t c = 0; c < 3; c++)
|
||||
{
|
||||
const s32 specComp = ((specular32[c] * lightColor32[c] * fixedshininess) >> 17); // 5 bits for color*color and 12 bits for shininess
|
||||
const s32 diffComp = (( diffuse32[c] * lightColor32[c] * fixed_diffuse) >> 17); // 5 bits for color*color and 12 bits for diffuse
|
||||
const s32 ambComp = (( ambient32[c] * lightColor32[c]) >> 5); // 5 bits for color*color
|
||||
vertexColor[c] += specComp + diffComp + ambComp;
|
||||
const s32 specComp = ((specular.component[c] * lightColor.component[c] * fixedshininess) >> 17); // 5 bits for color*color and 12 bits for shininess
|
||||
const s32 diffComp = (( diffuse.component[c] * lightColor.component[c] * fixed_diffuse) >> 17); // 5 bits for color*color and 12 bits for diffuse
|
||||
const s32 ambComp = (( ambient.component[c] * lightColor.component[c]) >> 5); // 5 bits for color*color
|
||||
vertexColor.component[c] += specComp + diffComp + ambComp;
|
||||
}
|
||||
}
|
||||
|
||||
const Color4u8 newVtxColor = {
|
||||
(u8)std::min<s32>(31, vertexColor[0]),
|
||||
(u8)std::min<s32>(31, vertexColor[1]),
|
||||
(u8)std::min<s32>(31, vertexColor[2]),
|
||||
(u8)std::min<s32>(31, vertexColor.r),
|
||||
(u8)std::min<s32>(31, vertexColor.g),
|
||||
(u8)std::min<s32>(31, vertexColor.b),
|
||||
0
|
||||
};
|
||||
|
||||
|
|
|
@ -372,31 +372,34 @@ Render3DError RasterizerUnit<RENDERER>::_SetupTexture(const POLY &thePoly, size_
|
|||
}
|
||||
|
||||
template<bool RENDERER>
|
||||
FORCEINLINE Color4u8 RasterizerUnit<RENDERER>::_sample(const float u, const float v)
|
||||
FORCEINLINE Color4u8 RasterizerUnit<RENDERER>::_sample(const Vector2f32 &texCoord)
|
||||
{
|
||||
//finally, we can use floor here. but, it is slower than we want.
|
||||
//the best solution is probably to wait until the pipeline is full of fixed point
|
||||
const float fu = u * (float)this->_currentTexture->GetRenderWidth() / (float)this->_currentTexture->GetWidth();
|
||||
const float fv = v * (float)this->_currentTexture->GetRenderHeight() / (float)this->_currentTexture->GetHeight();
|
||||
s32 iu = 0;
|
||||
s32 iv = 0;
|
||||
const float texScalingFactor = (float)this->_currentTexture->GetScalingFactor();
|
||||
const Vector2f32 texCoordScaled = {
|
||||
texCoord.u * texScalingFactor,
|
||||
texCoord.v * texScalingFactor
|
||||
};
|
||||
|
||||
Vector2s32 sampleCoord = { 0, 0 };
|
||||
|
||||
if (!this->_softRender->_enableFragmentSamplingHack)
|
||||
{
|
||||
iu = s32floor(fu);
|
||||
iv = s32floor(fv);
|
||||
sampleCoord.u = s32floor(texCoordScaled.u);
|
||||
sampleCoord.v = s32floor(texCoordScaled.v);
|
||||
}
|
||||
else
|
||||
{
|
||||
iu = this->_round_s(fu);
|
||||
iv = this->_round_s(fv);
|
||||
sampleCoord.u = this->_round_s(texCoordScaled.u);
|
||||
sampleCoord.v = this->_round_s(texCoordScaled.v);
|
||||
}
|
||||
|
||||
const u32 *textureData = this->_currentTexture->GetRenderData();
|
||||
this->_currentTexture->GetRenderSamplerCoordinates(this->_textureWrapMode, iu, iv);
|
||||
this->_currentTexture->GetRenderSamplerCoordinates(this->_textureWrapMode, sampleCoord);
|
||||
|
||||
Color4u8 color;
|
||||
color.value = textureData[( iv << this->_currentTexture->GetRenderWidthShift() ) + iu];
|
||||
color.value = textureData[( sampleCoord.v << this->_currentTexture->GetRenderWidthShift() ) + sampleCoord.u];
|
||||
|
||||
return color;
|
||||
}
|
||||
|
@ -416,26 +419,27 @@ FORCEINLINE float RasterizerUnit<RENDERER>::_round_s(double val)
|
|||
}
|
||||
|
||||
template<bool RENDERER> template<bool ISSHADOWPOLYGON>
|
||||
FORCEINLINE void RasterizerUnit<RENDERER>::_shade(const PolygonMode polygonMode, const Color4u8 src, Color4u8 &dst, const float texCoordU, const float texCoordV)
|
||||
FORCEINLINE void RasterizerUnit<RENDERER>::_shade(const PolygonMode polygonMode, const Color4u8 vtxColor, const Vector2f32 &texCoord, Color4u8 &outColor)
|
||||
{
|
||||
if (ISSHADOWPOLYGON)
|
||||
{
|
||||
dst = src;
|
||||
outColor = vtxColor;
|
||||
return;
|
||||
}
|
||||
|
||||
const GFX3D_State &renderState = *this->_softRender->currentRenderState;
|
||||
|
||||
static const Color4u8 colorWhite = { 0x3F, 0x3F, 0x3F, 0x1F };
|
||||
const Color4u8 mainTexColor = (this->_currentTexture->IsSamplingEnabled()) ? this->_sample(texCoordU, texCoordV) : colorWhite;
|
||||
const Color4u8 mainTexColor = (this->_currentTexture->IsSamplingEnabled()) ? this->_sample(texCoord) : colorWhite;
|
||||
|
||||
switch (polygonMode)
|
||||
{
|
||||
case POLYGON_MODE_MODULATE:
|
||||
dst.r = modulate_table[mainTexColor.r][src.r];
|
||||
dst.g = modulate_table[mainTexColor.g][src.g];
|
||||
dst.b = modulate_table[mainTexColor.b][src.b];
|
||||
dst.a = modulate_table[GFX3D_5TO6_LOOKUP(mainTexColor.a)][GFX3D_5TO6_LOOKUP(src.a)]>>1;
|
||||
{
|
||||
outColor.r = modulate_table[mainTexColor.r][vtxColor.r];
|
||||
outColor.g = modulate_table[mainTexColor.g][vtxColor.g];
|
||||
outColor.b = modulate_table[mainTexColor.b][vtxColor.b];
|
||||
outColor.a = modulate_table[GFX3D_5TO6_LOOKUP(mainTexColor.a)][GFX3D_5TO6_LOOKUP(vtxColor.a)] >> 1;
|
||||
//dst.a = 28;
|
||||
//#ifdef _MSC_VER
|
||||
//if(GetAsyncKeyState(VK_SHIFT)) {
|
||||
|
@ -449,65 +453,63 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_shade(const PolygonMode polygonMode,
|
|||
//}
|
||||
//#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case POLYGON_MODE_DECAL:
|
||||
{
|
||||
if (this->_currentTexture->IsSamplingEnabled())
|
||||
{
|
||||
dst.r = decal_table[mainTexColor.a][mainTexColor.r][src.r];
|
||||
dst.g = decal_table[mainTexColor.a][mainTexColor.g][src.g];
|
||||
dst.b = decal_table[mainTexColor.a][mainTexColor.b][src.b];
|
||||
dst.a = src.a;
|
||||
outColor.r = decal_table[mainTexColor.a][mainTexColor.r][vtxColor.r];
|
||||
outColor.g = decal_table[mainTexColor.a][mainTexColor.g][vtxColor.g];
|
||||
outColor.b = decal_table[mainTexColor.a][mainTexColor.b][vtxColor.b];
|
||||
outColor.a = vtxColor.a;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst = src;
|
||||
outColor = vtxColor;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case POLYGON_MODE_TOONHIGHLIGHT:
|
||||
{
|
||||
const Color4u8 toonColor = this->_softRender->toonColor32LUT[src.r >> 1];
|
||||
const Color4u8 toonColor = this->_softRender->toonColor32LUT[vtxColor.r >> 1];
|
||||
|
||||
if (renderState.DISP3DCNT.PolygonShading == PolygonShadingMode_Highlight)
|
||||
{
|
||||
// Tested in the "Shadows of Almia" logo in the Pokemon Ranger: Shadows of Almia title screen.
|
||||
// Also tested in Advance Wars: Dual Strike and Advance Wars: Days of Ruin when tiles highlight
|
||||
// during unit selection.
|
||||
dst.r = modulate_table[mainTexColor.r][src.r];
|
||||
dst.g = modulate_table[mainTexColor.g][src.r];
|
||||
dst.b = modulate_table[mainTexColor.b][src.r];
|
||||
dst.a = modulate_table[GFX3D_5TO6_LOOKUP(mainTexColor.a)][GFX3D_5TO6_LOOKUP(src.a)] >> 1;
|
||||
outColor.r = modulate_table[mainTexColor.r][vtxColor.r];
|
||||
outColor.g = modulate_table[mainTexColor.g][vtxColor.r];
|
||||
outColor.b = modulate_table[mainTexColor.b][vtxColor.r];
|
||||
outColor.a = modulate_table[GFX3D_5TO6_LOOKUP(mainTexColor.a)][GFX3D_5TO6_LOOKUP(vtxColor.a)] >> 1;
|
||||
|
||||
dst.r = min<u8>(0x3F, (dst.r + toonColor.r));
|
||||
dst.g = min<u8>(0x3F, (dst.g + toonColor.g));
|
||||
dst.b = min<u8>(0x3F, (dst.b + toonColor.b));
|
||||
outColor.r = min<u8>(0x3F, (outColor.r + toonColor.r));
|
||||
outColor.g = min<u8>(0x3F, (outColor.g + toonColor.g));
|
||||
outColor.b = min<u8>(0x3F, (outColor.b + toonColor.b));
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.r = modulate_table[mainTexColor.r][toonColor.r];
|
||||
dst.g = modulate_table[mainTexColor.g][toonColor.g];
|
||||
dst.b = modulate_table[mainTexColor.b][toonColor.b];
|
||||
dst.a = modulate_table[GFX3D_5TO6_LOOKUP(mainTexColor.a)][GFX3D_5TO6_LOOKUP(src.a)] >> 1;
|
||||
outColor.r = modulate_table[mainTexColor.r][toonColor.r];
|
||||
outColor.g = modulate_table[mainTexColor.g][toonColor.g];
|
||||
outColor.b = modulate_table[mainTexColor.b][toonColor.b];
|
||||
outColor.a = modulate_table[GFX3D_5TO6_LOOKUP(mainTexColor.a)][GFX3D_5TO6_LOOKUP(vtxColor.a)] >> 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case POLYGON_MODE_SHADOW:
|
||||
//is this right? only with the material color?
|
||||
dst = src;
|
||||
outColor = vtxColor;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON>
|
||||
FORCEINLINE void RasterizerUnit<RENDERER>::_pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, float r, float g, float b, float invu, float invv, float z, float w)
|
||||
FORCEINLINE void RasterizerUnit<RENDERER>::_pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const Color4f32 &vtxColorFloat, float invu, float invv, float z, float w)
|
||||
{
|
||||
const GFX3D_State &renderState = *this->_softRender->currentRenderState;
|
||||
Color4u8 newDstColor32;
|
||||
Color4u8 shaderOutput;
|
||||
bool isOpaquePixel;
|
||||
|
||||
u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex];
|
||||
u8 &dstAttributeOpaquePolyID = this->_softRender->_framebufferAttributes->opaquePolyID[fragmentIndex];
|
||||
|
@ -600,22 +602,21 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_pixel(const POLYGON_ATTR polyAttr, c
|
|||
dstAttributeStencil = 0;
|
||||
}
|
||||
}
|
||||
|
||||
//perspective-correct the colors
|
||||
r = (r * w) + 0.5f;
|
||||
g = (g * w) + 0.5f;
|
||||
b = (b * w) + 0.5f;
|
||||
|
||||
//this is a HACK:
|
||||
//we are being very sloppy with our interpolation precision right now
|
||||
//and rather than fix it, i just want to clamp it
|
||||
newDstColor32.r = max<u8>(0x00, min<u32>(0x3F, u32floor(r)));
|
||||
newDstColor32.g = max<u8>(0x00, min<u32>(0x3F, u32floor(g)));
|
||||
newDstColor32.b = max<u8>(0x00, min<u32>(0x3F, u32floor(b)));
|
||||
newDstColor32.a = polyAttr.Alpha;
|
||||
const Color4u8 vtxColor = {
|
||||
max<u8>( 0x00, min<u32>(0x3F, u32floor( (vtxColorFloat.r * w) + 0.5f )) ),
|
||||
max<u8>( 0x00, min<u32>(0x3F, u32floor( (vtxColorFloat.g * w) + 0.5f )) ),
|
||||
max<u8>( 0x00, min<u32>(0x3F, u32floor( (vtxColorFloat.b * w) + 0.5f )) ),
|
||||
polyAttr.Alpha
|
||||
};
|
||||
|
||||
//pixel shader
|
||||
this->_shade<ISSHADOWPOLYGON>((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w);
|
||||
const Vector2f32 texCoordFloat = { invu * w, invv * w };
|
||||
Color4u8 shaderOutput;
|
||||
this->_shade<ISSHADOWPOLYGON>((PolygonMode)polyAttr.Mode, vtxColor, texCoordFloat, shaderOutput);
|
||||
|
||||
// handle alpha test
|
||||
if ( shaderOutput.a == 0 ||
|
||||
|
@ -625,7 +626,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_pixel(const POLYGON_ATTR polyAttr, c
|
|||
}
|
||||
|
||||
// write pixel values to the framebuffer
|
||||
isOpaquePixel = (shaderOutput.a == 0x1F);
|
||||
const bool isOpaquePixel = (shaderOutput.a == 0x1F);
|
||||
if (isOpaquePixel)
|
||||
{
|
||||
dstAttributeOpaquePolyID = polyAttr.PolygonID;
|
||||
|
@ -684,7 +685,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline(const POLYGON_ATTR poly
|
|||
pLeft->invw.curr
|
||||
};
|
||||
|
||||
CACHE_ALIGN float color[4] = {
|
||||
CACHE_ALIGN Color4f32 vtxColorFloat = {
|
||||
pLeft->color[0].curr,
|
||||
pLeft->color[1].curr,
|
||||
pLeft->color[2].curr,
|
||||
|
@ -701,10 +702,10 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline(const POLYGON_ATTR poly
|
|||
(pRight->invw.curr - coord[3]) * invWidth
|
||||
};
|
||||
|
||||
const CACHE_ALIGN float color_dx[4] = {
|
||||
(pRight->color[0].curr - color[0]) * invWidth,
|
||||
(pRight->color[1].curr - color[1]) * invWidth,
|
||||
(pRight->color[2].curr - color[2]) * invWidth,
|
||||
const CACHE_ALIGN Color4f32 vtxColorFloat_dx = {
|
||||
(pRight->color[0].curr - vtxColorFloat.r) * invWidth,
|
||||
(pRight->color[1].curr - vtxColorFloat.g) * invWidth,
|
||||
(pRight->color[2].curr - vtxColorFloat.b) * invWidth,
|
||||
0.0f * invWidth
|
||||
};
|
||||
|
||||
|
@ -712,18 +713,14 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline(const POLYGON_ATTR poly
|
|||
|
||||
//CONSIDER: in case some other math is wrong (shouldve been clipped OK), we might go out of bounds here.
|
||||
//better check the Y value.
|
||||
if (RENDERER && (pLeft->Y < 0 || pLeft->Y > (framebufferHeight - 1)))
|
||||
if ( (pLeft->Y < 0) || (pLeft->Y >= framebufferHeight) )
|
||||
{
|
||||
printf("rasterizer rendering at y=%d! oops!\n",pLeft->Y);
|
||||
return;
|
||||
}
|
||||
if (!RENDERER && (pLeft->Y < 0 || pLeft->Y >= framebufferHeight))
|
||||
{
|
||||
printf("rasterizer rendering at y=%d! oops!\n",pLeft->Y);
|
||||
const float gpuScalingFactorHeight = (float)framebufferHeight / (float)GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||
printf("rasterizer rendering at y=%d! oops! (x%.1f)\n", pLeft->Y, gpuScalingFactorHeight);
|
||||
return;
|
||||
}
|
||||
|
||||
int x = XStart;
|
||||
s32 x = XStart;
|
||||
|
||||
if (x < 0)
|
||||
{
|
||||
|
@ -740,28 +737,51 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline(const POLYGON_ATTR poly
|
|||
coord[2] += coord_dx[2] * negativeX;
|
||||
coord[3] += coord_dx[3] * negativeX;
|
||||
|
||||
color[0] += color_dx[0] * negativeX;
|
||||
color[1] += color_dx[1] * negativeX;
|
||||
color[2] += color_dx[2] * negativeX;
|
||||
color[3] += color_dx[3] * negativeX;
|
||||
vtxColorFloat.r += vtxColorFloat_dx.r * negativeX;
|
||||
vtxColorFloat.g += vtxColorFloat_dx.g * negativeX;
|
||||
vtxColorFloat.b += vtxColorFloat_dx.b * negativeX;
|
||||
vtxColorFloat.a += vtxColorFloat_dx.a * negativeX;
|
||||
|
||||
adr += -x;
|
||||
width -= -x;
|
||||
x = 0;
|
||||
}
|
||||
|
||||
// Normally, if an out-of-bounds write were to occur, this would cause an error to get logged.
|
||||
// However, due to rounding errors associated with floating-point conversions, it is common that
|
||||
// rendering at custom resolutions may cause the rendering width to be off by 1. Therefore, we
|
||||
// will treat this case as an exception and suppress the error through the use of this flag.
|
||||
bool customResolutionOutOfBoundsSuppress = false;
|
||||
|
||||
if (x+width > framebufferWidth)
|
||||
{
|
||||
if (RENDERER && !USELINEHACK && framebufferWidth == GPU_FRAMEBUFFER_NATIVE_WIDTH)
|
||||
if (RENDERER && !USELINEHACK)
|
||||
{
|
||||
printf("rasterizer rendering at x=%d! oops!\n",x+width-1);
|
||||
return;
|
||||
if ( (framebufferWidth != GPU_FRAMEBUFFER_NATIVE_WIDTH) && (x+width == framebufferWidth+1) )
|
||||
{
|
||||
customResolutionOutOfBoundsSuppress = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
const float gpuScalingFactorWidth = (float)framebufferWidth / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
printf("rasterizer rendering at x=%d! oops! (x%.2f)\n", x+width-1, gpuScalingFactorWidth);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (customResolutionOutOfBoundsSuppress)
|
||||
{
|
||||
width -= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
width = (s32)framebufferWidth - x;
|
||||
}
|
||||
width = framebufferWidth - x;
|
||||
}
|
||||
|
||||
while (width-- > 0)
|
||||
{
|
||||
this->_pixel<ISFRONTFACING, ISSHADOWPOLYGON>(polyAttr, isTranslucent, adr, dstColor[adr], color[0], color[1], color[2], coord[0], coord[1], coord[2], 1.0f/coord[3]);
|
||||
this->_pixel<ISFRONTFACING, ISSHADOWPOLYGON>(polyAttr, isTranslucent, adr, dstColor[adr], vtxColorFloat, coord[0], coord[1], coord[2], 1.0f/coord[3]);
|
||||
adr++;
|
||||
x++;
|
||||
|
||||
|
@ -770,22 +790,19 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline(const POLYGON_ATTR poly
|
|||
coord[2] += coord_dx[2];
|
||||
coord[3] += coord_dx[3];
|
||||
|
||||
color[0] += color_dx[0];
|
||||
color[1] += color_dx[1];
|
||||
color[2] += color_dx[2];
|
||||
color[3] += color_dx[3];
|
||||
vtxColorFloat.r += vtxColorFloat_dx.r;
|
||||
vtxColorFloat.g += vtxColorFloat_dx.g;
|
||||
vtxColorFloat.b += vtxColorFloat_dx.b;
|
||||
vtxColorFloat.a += vtxColorFloat_dx.a;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON>
|
||||
FORCEINLINE void RasterizerUnit<RENDERER>::_pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w)
|
||||
FORCEINLINE void RasterizerUnit<RENDERER>::_pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const __m128 &vtxColorFloat, float invu, float invv, float z, float w)
|
||||
{
|
||||
const GFX3D_State &renderState = *this->_softRender->currentRenderState;
|
||||
Color4u8 newDstColor32;
|
||||
Color4u8 shaderOutput;
|
||||
bool isOpaquePixel;
|
||||
|
||||
u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex];
|
||||
u8 &dstAttributeOpaquePolyID = this->_softRender->_framebufferAttributes->opaquePolyID[fragmentIndex];
|
||||
|
@ -881,18 +898,21 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_pixel_SSE2(const POLYGON_ATTR polyAt
|
|||
|
||||
//perspective-correct the colors
|
||||
const __m128 perspective = _mm_set_ps(31.0f, w, w, w);
|
||||
__m128 newColorf = _mm_add_ps( _mm_mul_ps(srcColorf, perspective), _mm_set1_ps(0.5f) );
|
||||
newColorf = _mm_max_ps(newColorf, _mm_setzero_ps());
|
||||
__m128 newVtxColorf = _mm_add_ps( _mm_mul_ps(vtxColorFloat, perspective), _mm_set1_ps(0.5f) );
|
||||
newVtxColorf = _mm_max_ps(newVtxColorf, _mm_setzero_ps());
|
||||
|
||||
__m128i cvtColor32 = _mm_cvttps_epi32(newColorf);
|
||||
cvtColor32 = _mm_min_epu8(cvtColor32, _mm_set_epi32(0x1F, 0x3F, 0x3F, 0x3F));
|
||||
cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128());
|
||||
cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128());
|
||||
__m128i newVtxColori = _mm_cvttps_epi32(newVtxColorf);
|
||||
newVtxColori = _mm_min_epu8(newVtxColori, _mm_set_epi32(0x1F, 0x3F, 0x3F, 0x3F));
|
||||
newVtxColori = _mm_packus_epi16(newVtxColori, _mm_setzero_si128());
|
||||
newVtxColori = _mm_packus_epi16(newVtxColori, _mm_setzero_si128());
|
||||
|
||||
newDstColor32.value = _mm_cvtsi128_si32(cvtColor32);
|
||||
Color4u8 vtxColor;
|
||||
vtxColor.value = _mm_cvtsi128_si32(newVtxColori);
|
||||
|
||||
//pixel shader
|
||||
this->_shade<ISSHADOWPOLYGON>((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w);
|
||||
const Vector2f32 texCoordFloat = { invu * w, invv * w };
|
||||
Color4u8 shaderOutput;
|
||||
this->_shade<ISSHADOWPOLYGON>((PolygonMode)polyAttr.Mode, vtxColor, texCoordFloat, shaderOutput);
|
||||
|
||||
// handle alpha test
|
||||
if ( shaderOutput.a == 0 ||
|
||||
|
@ -902,7 +922,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_pixel_SSE2(const POLYGON_ATTR polyAt
|
|||
}
|
||||
|
||||
// write pixel values to the framebuffer
|
||||
isOpaquePixel = (shaderOutput.a == 0x1F);
|
||||
const bool isOpaquePixel = (shaderOutput.a == 0x1F);
|
||||
if (isOpaquePixel)
|
||||
{
|
||||
dstAttributeOpaquePolyID = polyAttr.PolygonID;
|
||||
|
@ -955,32 +975,28 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline_SSE2(const POLYGON_ATTR
|
|||
|
||||
//these are the starting values, taken from the left edge
|
||||
__m128 coord = _mm_setr_ps(pLeft->u.curr,
|
||||
pLeft->v.curr,
|
||||
pLeft->z.curr,
|
||||
pLeft->invw.curr);
|
||||
pLeft->v.curr,
|
||||
pLeft->z.curr,
|
||||
pLeft->invw.curr);
|
||||
|
||||
__m128 color = _mm_setr_ps(pLeft->color[0].curr,
|
||||
pLeft->color[1].curr,
|
||||
pLeft->color[2].curr,
|
||||
(float)polyAttr.Alpha / 31.0f);
|
||||
__m128 vtxColorFloat = _mm_setr_ps(pLeft->color[0].curr,
|
||||
pLeft->color[1].curr,
|
||||
pLeft->color[2].curr,
|
||||
(float)polyAttr.Alpha / 31.0f);
|
||||
|
||||
//our dx values are taken from the steps up until the right edge
|
||||
const __m128 invWidth = _mm_set1_ps(1.0f / (float)width);
|
||||
const __m128 coord_dx = _mm_mul_ps(_mm_setr_ps(pRight->u.curr - pLeft->u.curr, pRight->v.curr - pLeft->v.curr, pRight->z.curr - pLeft->z.curr, pRight->invw.curr - pLeft->invw.curr), invWidth);
|
||||
const __m128 color_dx = _mm_mul_ps(_mm_setr_ps(pRight->color[0].curr - pLeft->color[0].curr, pRight->color[1].curr - pLeft->color[1].curr, pRight->color[2].curr - pLeft->color[2].curr, 0.0f), invWidth);
|
||||
const __m128 vtxColorFloat_dx = _mm_mul_ps(_mm_setr_ps(pRight->color[0].curr - pLeft->color[0].curr, pRight->color[1].curr - pLeft->color[1].curr, pRight->color[2].curr - pLeft->color[2].curr, 0.0f), invWidth);
|
||||
|
||||
size_t adr = (pLeft->Y*framebufferWidth)+XStart;
|
||||
|
||||
//CONSIDER: in case some other math is wrong (shouldve been clipped OK), we might go out of bounds here.
|
||||
//better check the Y value.
|
||||
if (RENDERER && (pLeft->Y < 0 || pLeft->Y > (framebufferHeight - 1)))
|
||||
if ( (pLeft->Y < 0) || (pLeft->Y >= framebufferHeight) )
|
||||
{
|
||||
printf("rasterizer rendering at y=%d! oops!\n",pLeft->Y);
|
||||
return;
|
||||
}
|
||||
if (!RENDERER && (pLeft->Y < 0 || pLeft->Y >= framebufferHeight))
|
||||
{
|
||||
printf("rasterizer rendering at y=%d! oops!\n",pLeft->Y);
|
||||
const float gpuScalingFactorHeight = (float)framebufferHeight / (float)GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||
printf("rasterizer rendering at y=%d! oops! (x%.1f)\n", pLeft->Y, gpuScalingFactorHeight);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -996,20 +1012,43 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline_SSE2(const POLYGON_ATTR
|
|||
|
||||
const __m128 negativeX = _mm_cvtepi32_ps(_mm_set1_epi32(-x));
|
||||
coord = _mm_add_ps(coord, _mm_mul_ps(coord_dx, negativeX));
|
||||
color = _mm_add_ps(color, _mm_mul_ps(color_dx, negativeX));
|
||||
vtxColorFloat = _mm_add_ps(vtxColorFloat, _mm_mul_ps(vtxColorFloat_dx, negativeX));
|
||||
|
||||
adr += -x;
|
||||
width -= -x;
|
||||
x = 0;
|
||||
}
|
||||
|
||||
// Normally, if an out-of-bounds write were to occur, this would cause an error to get logged.
|
||||
// However, due to rounding errors associated with floating-point conversions, it is common that
|
||||
// rendering at custom resolutions may cause the rendering width to be off by 1. Therefore, we
|
||||
// will treat this case as an exception and suppress the error through the use of this flag.
|
||||
bool customResolutionOutOfBoundsSuppress = false;
|
||||
|
||||
if (x+width > framebufferWidth)
|
||||
{
|
||||
if (RENDERER && !USELINEHACK && framebufferWidth == GPU_FRAMEBUFFER_NATIVE_WIDTH)
|
||||
if (RENDERER && !USELINEHACK)
|
||||
{
|
||||
printf("rasterizer rendering at x=%d! oops!\n",x+width-1);
|
||||
return;
|
||||
if ( (framebufferWidth != GPU_FRAMEBUFFER_NATIVE_WIDTH) && (x+width == framebufferWidth+1) )
|
||||
{
|
||||
customResolutionOutOfBoundsSuppress = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
const float gpuScalingFactorWidth = (float)framebufferWidth / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
printf("rasterizer rendering at x=%d! oops! (x%.2f)\n", x+width-1, gpuScalingFactorWidth);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (customResolutionOutOfBoundsSuppress)
|
||||
{
|
||||
width -= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
width = (s32)framebufferWidth - x;
|
||||
}
|
||||
width = framebufferWidth - x;
|
||||
}
|
||||
|
||||
CACHE_ALIGN float coord_s[4];
|
||||
|
@ -1018,12 +1057,12 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline_SSE2(const POLYGON_ATTR
|
|||
{
|
||||
_mm_store_ps(coord_s, coord);
|
||||
|
||||
this->_pixel_SSE2<ISFRONTFACING, ISSHADOWPOLYGON>(polyAttr, isTranslucent, adr, dstColor[adr], color, coord_s[0], coord_s[1], coord_s[2], 1.0f/coord_s[3]);
|
||||
this->_pixel_SSE2<ISFRONTFACING, ISSHADOWPOLYGON>(polyAttr, isTranslucent, adr, dstColor[adr], vtxColorFloat, coord_s[0], coord_s[1], coord_s[2], 1.0f/coord_s[3]);
|
||||
adr++;
|
||||
x++;
|
||||
|
||||
coord = _mm_add_ps(coord, coord_dx);
|
||||
color = _mm_add_ps(color, color_dx);
|
||||
vtxColorFloat = _mm_add_ps(vtxColorFloat, vtxColorFloat_dx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1601,30 +1640,30 @@ u32 SoftRasterizerTexture::GetRenderWidthShift() const
|
|||
return this->_renderWidthShift;
|
||||
}
|
||||
|
||||
FORCEINLINE void SoftRasterizerTexture::GetRenderSamplerCoordinates(const u8 wrapMode, s32 &iu, s32 &iv) const
|
||||
FORCEINLINE void SoftRasterizerTexture::GetRenderSamplerCoordinates(const u8 wrapMode, Vector2s32 &sampleCoordInOut) const
|
||||
{
|
||||
switch (wrapMode)
|
||||
{
|
||||
//flip none
|
||||
case 0x0: _hclamp(iu); _vclamp(iv); break;
|
||||
case 0x1: _hrepeat(iu); _vclamp(iv); break;
|
||||
case 0x2: _hclamp(iu); _vrepeat(iv); break;
|
||||
case 0x3: _hrepeat(iu); _vrepeat(iv); break;
|
||||
case 0x0: _hclamp(sampleCoordInOut.u); _vclamp(sampleCoordInOut.v); break;
|
||||
case 0x1: _hrepeat(sampleCoordInOut.u); _vclamp(sampleCoordInOut.v); break;
|
||||
case 0x2: _hclamp(sampleCoordInOut.u); _vrepeat(sampleCoordInOut.v); break;
|
||||
case 0x3: _hrepeat(sampleCoordInOut.u); _vrepeat(sampleCoordInOut.v); break;
|
||||
//flip S
|
||||
case 0x4: _hclamp(iu); _vclamp(iv); break;
|
||||
case 0x5: _hflip(iu); _vclamp(iv); break;
|
||||
case 0x6: _hclamp(iu); _vrepeat(iv); break;
|
||||
case 0x7: _hflip(iu); _vrepeat(iv); break;
|
||||
case 0x4: _hclamp(sampleCoordInOut.u); _vclamp(sampleCoordInOut.v); break;
|
||||
case 0x5: _hflip(sampleCoordInOut.u); _vclamp(sampleCoordInOut.v); break;
|
||||
case 0x6: _hclamp(sampleCoordInOut.u); _vrepeat(sampleCoordInOut.v); break;
|
||||
case 0x7: _hflip(sampleCoordInOut.u); _vrepeat(sampleCoordInOut.v); break;
|
||||
//flip T
|
||||
case 0x8: _hclamp(iu); _vclamp(iv); break;
|
||||
case 0x9: _hrepeat(iu); _vclamp(iv); break;
|
||||
case 0xA: _hclamp(iu); _vflip(iv); break;
|
||||
case 0xB: _hrepeat(iu); _vflip(iv); break;
|
||||
case 0x8: _hclamp(sampleCoordInOut.u); _vclamp(sampleCoordInOut.v); break;
|
||||
case 0x9: _hrepeat(sampleCoordInOut.u); _vclamp(sampleCoordInOut.v); break;
|
||||
case 0xA: _hclamp(sampleCoordInOut.u); _vflip(sampleCoordInOut.v); break;
|
||||
case 0xB: _hrepeat(sampleCoordInOut.u); _vflip(sampleCoordInOut.v); break;
|
||||
//flip both
|
||||
case 0xC: _hclamp(iu); _vclamp(iv); break;
|
||||
case 0xD: _hflip(iu); _vclamp(iv); break;
|
||||
case 0xE: _hclamp(iu); _vflip(iv); break;
|
||||
case 0xF: _hflip(iu); _vflip(iv); break;
|
||||
case 0xC: _hclamp(sampleCoordInOut.u); _vclamp(sampleCoordInOut.v); break;
|
||||
case 0xD: _hflip(sampleCoordInOut.u); _vclamp(sampleCoordInOut.v); break;
|
||||
case 0xE: _hclamp(sampleCoordInOut.u); _vflip(sampleCoordInOut.v); break;
|
||||
case 0xF: _hflip(sampleCoordInOut.u); _vflip(sampleCoordInOut.v); break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ public:
|
|||
s32 GetRenderHeightMask() const;
|
||||
u32 GetRenderWidthShift() const;
|
||||
|
||||
void GetRenderSamplerCoordinates(const u8 wrapMode, s32 &iu, s32 &iv) const;
|
||||
void GetRenderSamplerCoordinates(const u8 wrapMode, Vector2s32 &sampleCoordInOut) const;
|
||||
|
||||
void SetUseDeposterize(bool willDeposterize);
|
||||
void SetScalingFactor(size_t scalingFactor);
|
||||
|
@ -107,16 +107,16 @@ protected:
|
|||
u8 _textureWrapMode;
|
||||
|
||||
Render3DError _SetupTexture(const POLY &thePoly, size_t polyRenderIndex);
|
||||
FORCEINLINE Color4u8 _sample(const float u, const float v);
|
||||
FORCEINLINE Color4u8 _sample(const Vector2f32 &texCoord);
|
||||
FORCEINLINE float _round_s(double val);
|
||||
|
||||
template<bool ISSHADOWPOLYGON> FORCEINLINE void _shade(const PolygonMode polygonMode, const Color4u8 src, Color4u8 &dst, const float texCoordU, const float texCoordV);
|
||||
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, float r, float g, float b, float invu, float invv, float z, float w);
|
||||
template<bool ISSHADOWPOLYGON> FORCEINLINE void _shade(const PolygonMode polygonMode, const Color4u8 vtxColor, const Vector2f32 &texCoord, Color4u8 &outColor);
|
||||
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const Color4f32 &vtxColorFloat, float invu, float invv, float z, float w);
|
||||
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight);
|
||||
template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right);
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w);
|
||||
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const __m128 &vtxColorFloat, float invu, float invv, float z, float w);
|
||||
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight);
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue