GPU:
- Unify all colorspace conversion code. - Fix bug with VRAM-to-VRAM capture. OpenGL Renderer: - Try and fix a possible bug with applying fog to transparent fragments.
This commit is contained in:
parent
b543e309c5
commit
f8e0585d26
|
@ -49,6 +49,64 @@
|
|||
|
||||
u32 Render3DFramesPerSecond;
|
||||
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
|
||||
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
|
||||
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
|
||||
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
|
||||
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
|
||||
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
|
||||
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
|
||||
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
|
||||
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
|
||||
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
|
||||
};
|
||||
|
||||
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
|
||||
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
|
||||
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
|
||||
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
|
||||
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
|
||||
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
|
||||
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
|
||||
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
|
||||
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
|
||||
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
|
||||
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
|
||||
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
|
||||
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
|
||||
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
|
||||
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
|
||||
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
|
||||
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
|
||||
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
|
||||
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
|
||||
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
|
||||
};
|
||||
|
||||
//maybe not very precise
|
||||
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
|
||||
0, 4, 8, 13, 17, 22, 26, 31
|
||||
};
|
||||
|
||||
//TODO - generate this in the static init method more accurately
|
||||
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
|
||||
0, 8, 16, 26, 34, 44, 52, 63
|
||||
};
|
||||
|
||||
//instantiate static instance
|
||||
u16 GPUEngineBase::_fadeInColors[17][0x8000];
|
||||
u16 GPUEngineBase::_fadeOutColors[17][0x8000];
|
||||
|
@ -869,9 +927,12 @@ FORCEINLINE FragmentColor GPUEngineBase::_ColorEffectDecreaseBrightness(const Fr
|
|||
b = col.b;
|
||||
}
|
||||
|
||||
newColor.r = (r - (r * blendEVY / 16));
|
||||
newColor.g = (g - (g * blendEVY / 16));
|
||||
newColor.b = (b - (b * blendEVY / 16));
|
||||
if ( (INPUTFORMAT != NDSColorFormat_BGR555_Rev) && (OUTPUTFORMAT != NDSColorFormat_BGR555_Rev) )
|
||||
{
|
||||
newColor.r = (r - (r * blendEVY / 16));
|
||||
newColor.g = (g - (g * blendEVY / 16));
|
||||
newColor.b = (b - (b * blendEVY / 16));
|
||||
}
|
||||
|
||||
return newColor;
|
||||
}
|
||||
|
@ -1166,28 +1227,12 @@ void GPUEngineBase::_RenderLine_Clear(const u16 clearColor, const u16 l, void *r
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
{
|
||||
FragmentColor dstClearColor32;
|
||||
dstClearColor32.r = material_5bit_to_6bit[(dstClearColor16 >> 0) & 0x001F];
|
||||
dstClearColor32.g = material_5bit_to_6bit[(dstClearColor16 >> 5) & 0x001F];
|
||||
dstClearColor32.b = material_5bit_to_6bit[(dstClearColor16 >> 10) & 0x001F];
|
||||
dstClearColor32.a = 0;
|
||||
|
||||
memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, dstClearColor32.color);
|
||||
memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, COLOR555TO666(dstClearColor16));
|
||||
break;
|
||||
}
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
{
|
||||
FragmentColor dstClearColor32;
|
||||
dstClearColor32.r = material_5bit_to_8bit[(dstClearColor16 >> 0) & 0x001F];
|
||||
dstClearColor32.g = material_5bit_to_8bit[(dstClearColor16 >> 5) & 0x001F];
|
||||
dstClearColor32.b = material_5bit_to_8bit[(dstClearColor16 >> 10) & 0x001F];
|
||||
dstClearColor32.a = 0;
|
||||
|
||||
memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, dstClearColor32.color);
|
||||
memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, COLOR555TO888(dstClearColor16));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memset(this->_renderLineLayerIDNative, GPULayerID_Backdrop, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
|
@ -1915,6 +1960,8 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(const size_t srcX, const u16 src, c
|
|||
|
||||
ColorEffect selectedEffect = ColorEffect_Disable;
|
||||
TBlendTable *selectedBlendTable = this->_blendTable;
|
||||
u8 blendEVA = this->_BLDALPHA_EVA;
|
||||
u8 blendEVB = this->_BLDALPHA_EVB;
|
||||
|
||||
if (enableColorEffect)
|
||||
{
|
||||
|
@ -1963,9 +2010,9 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(const size_t srcX, const u16 src, c
|
|||
//it's tested by the spriteblend demo and the glory of heracles title screen
|
||||
if (srcAlpha != 0xFF)
|
||||
{
|
||||
const u8 BLDALPHA_EVA = srcAlpha;
|
||||
const u8 BLDALPHA_EVB = 16 - srcAlpha;
|
||||
selectedBlendTable = &GPUEngineBase::_blendTable555[BLDALPHA_EVA][BLDALPHA_EVB];
|
||||
blendEVA = srcAlpha;
|
||||
blendEVB = 16 - srcAlpha;
|
||||
selectedBlendTable = &GPUEngineBase::_blendTable555[blendEVA][blendEVB];
|
||||
}
|
||||
|
||||
forceBlendEffect = true;
|
||||
|
@ -2001,28 +2048,28 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(const size_t srcX, const u16 src, c
|
|||
}
|
||||
|
||||
// Render the pixel using the selected color effect.
|
||||
u16 finalDstColor;
|
||||
u16 finalDstColor16;
|
||||
|
||||
switch (selectedEffect)
|
||||
{
|
||||
case ColorEffect_Disable:
|
||||
finalDstColor = src;
|
||||
finalDstColor16 = src;
|
||||
break;
|
||||
|
||||
case ColorEffect_IncreaseBrightness:
|
||||
finalDstColor = this->_ColorEffectIncreaseBrightness(src & 0x7FFF);
|
||||
finalDstColor16 = this->_ColorEffectIncreaseBrightness(src & 0x7FFF);
|
||||
break;
|
||||
|
||||
case ColorEffect_DecreaseBrightness:
|
||||
finalDstColor = this->_ColorEffectDecreaseBrightness(src & 0x7FFF);
|
||||
finalDstColor16 = this->_ColorEffectDecreaseBrightness(src & 0x7FFF);
|
||||
break;
|
||||
|
||||
case ColorEffect_Blend:
|
||||
finalDstColor = this->_ColorEffectBlend(src, *(u16 *)dstColorLine, selectedBlendTable);
|
||||
finalDstColor16 = this->_ColorEffectBlend(src, *(u16 *)dstColorLine, selectedBlendTable);
|
||||
break;
|
||||
}
|
||||
|
||||
*(u16 *)dstColorLine = finalDstColor | 0x8000;
|
||||
*(u16 *)dstColorLine = finalDstColor16 | 0x8000;
|
||||
*dstLayerIDLine = LAYERID;
|
||||
}
|
||||
|
||||
|
@ -2428,28 +2475,28 @@ FORCEINLINE void GPUEngineBase::_RenderPixel3D(const size_t srcX, const Fragment
|
|||
|
||||
// Render the pixel using the selected color effect.
|
||||
const u16 srcRGB555 = R6G6B6TORGB15(src.r, src.g, src.b);
|
||||
u16 finalDstColor;
|
||||
u16 finalDstColor16;
|
||||
|
||||
switch (selectedEffect)
|
||||
{
|
||||
case ColorEffect_Disable:
|
||||
finalDstColor = srcRGB555;
|
||||
finalDstColor16 = srcRGB555;
|
||||
break;
|
||||
|
||||
case ColorEffect_IncreaseBrightness:
|
||||
finalDstColor = this->_ColorEffectIncreaseBrightness(srcRGB555);
|
||||
finalDstColor16 = this->_ColorEffectIncreaseBrightness(srcRGB555);
|
||||
break;
|
||||
|
||||
case ColorEffect_DecreaseBrightness:
|
||||
finalDstColor = this->_ColorEffectDecreaseBrightness(srcRGB555);
|
||||
finalDstColor16 = this->_ColorEffectDecreaseBrightness(srcRGB555);
|
||||
break;
|
||||
|
||||
case ColorEffect_Blend:
|
||||
finalDstColor = this->_ColorEffectBlend3D(src, *(u16 *)dstColorLine);
|
||||
finalDstColor16 = this->_ColorEffectBlend3D(src, *(u16 *)dstColorLine);
|
||||
break;
|
||||
}
|
||||
|
||||
*(u16 *)dstColorLine = finalDstColor | 0x8000;
|
||||
*(u16 *)dstColorLine = finalDstColor16 | 0x8000;
|
||||
*dstLayerIDLine = GPULayerID_BG0;
|
||||
}
|
||||
|
||||
|
@ -3923,8 +3970,6 @@ template <bool ISFULLINTENSITYHINT>
|
|||
void GPUEngineBase::ApplyMasterBrightness()
|
||||
{
|
||||
const NDSColorFormat outputFormat = GPU->GetDisplayInfo().colorFormat;
|
||||
const size_t pixBytes = GPU->GetDisplayInfo().pixelBytes;
|
||||
|
||||
const IOREG_MASTER_BRIGHT &MASTER_BRIGHT = this->_IORegisterMap->MASTER_BRIGHT;
|
||||
const u32 intensity = MASTER_BRIGHT.Intensity;
|
||||
|
||||
|
@ -3993,15 +4038,15 @@ void GPUEngineBase::ApplyMasterBrightness()
|
|||
switch (outputFormat)
|
||||
{
|
||||
case NDSColorFormat_BGR555_Rev:
|
||||
memset_u16(dst, 0x7FFF, pixCount);
|
||||
memset_u16(dst, 0xFFFF, pixCount);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
memset_u32(dst, 0x003F3F3F, pixCount);
|
||||
memset_u32(dst, 0x1F3F3F3F, pixCount);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
memset_u32(dst, 0x00FFFFFF, pixCount);
|
||||
memset_u32(dst, 0xFFFFFFFF, pixCount);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -4063,7 +4108,23 @@ void GPUEngineBase::ApplyMasterBrightness()
|
|||
else
|
||||
{
|
||||
// all black (optimization)
|
||||
memset(dst, 0, pixCount * pixBytes);
|
||||
switch (outputFormat)
|
||||
{
|
||||
case NDSColorFormat_BGR555_Rev:
|
||||
memset_u16(dst, 0x8000, pixCount);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
memset_u32(dst, 0x1F000000, pixCount);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
memset_u32(dst, 0xFF000000, pixCount);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -4500,33 +4561,7 @@ void GPUEngineBase::ResolveCustomRendering()
|
|||
|
||||
void GPUEngineBase::ResolveRGB666ToRGB888()
|
||||
{
|
||||
size_t i = 0;
|
||||
const size_t pixCount = this->renderedWidth * this->renderedHeight;
|
||||
FragmentColor *buffer = (FragmentColor *)this->renderedBuffer;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
// Convert to RGBA8888:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
__m128i color8888 = _mm_load_si128((__m128i *)(buffer + i));
|
||||
__m128i a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(color8888, 3), _mm_set1_epi8(0xF8)), _mm_and_si128(_mm_srli_epi32(color8888, 2), _mm_set1_epi8(0x07)) );
|
||||
color8888 = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(color8888, 2), _mm_set1_epi8(0xFC)), _mm_and_si128(_mm_srli_epi32(color8888, 4), _mm_set1_epi8(0x03)) );
|
||||
|
||||
color8888 = _mm_or_si128(_mm_and_si128(color8888, _mm_set1_epi32(0x00FFFFFF)), _mm_and_si128(a, _mm_set1_epi32(0xFF000000)));
|
||||
_mm_store_si128((__m128i *)(buffer + i), color8888);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
buffer[i].r = material_6bit_to_8bit[buffer[i].r];
|
||||
buffer[i].g = material_6bit_to_8bit[buffer[i].g];
|
||||
buffer[i].b = material_6bit_to_8bit[buffer[i].b];
|
||||
buffer[i].a = material_5bit_to_8bit[buffer[i].a];
|
||||
}
|
||||
ConvertColorBuffers6665To8888<false>((FragmentColor *)this->renderedBuffer, (FragmentColor *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
|
||||
}
|
||||
|
||||
void GPUEngineBase::ResolveToCustomFramebuffer()
|
||||
|
@ -5263,7 +5298,22 @@ void GPUEngineA::_RenderLine_DisplayCapture(const void *renderedLineSrcA, const
|
|||
{
|
||||
case 0: // Capture VRAM
|
||||
{
|
||||
this->VerifyVRAMLineDidChange(vramReadBlock, readLineIndexWithOffset);
|
||||
const bool didVRAMLineChange = this->VerifyVRAMLineDidChange(vramReadBlock, readLineIndexWithOffset);
|
||||
if (didVRAMLineChange)
|
||||
{
|
||||
if (vramConfiguration.banks[vramReadBlock].purpose == VramConfiguration::LCDC)
|
||||
{
|
||||
u32 cap_src_adr = readLineIndexWithOffset * GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
cap_src_adr &= 0x0000FFFF;
|
||||
cap_src = this->_VRAMNativeBlockPtr[vramReadBlock] + cap_src_adr;
|
||||
}
|
||||
else
|
||||
{
|
||||
cap_src = (u16 *)MMU.blank_memory;
|
||||
}
|
||||
|
||||
srcB = cap_src;
|
||||
}
|
||||
|
||||
if (this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset])
|
||||
{
|
||||
|
@ -5292,7 +5342,25 @@ void GPUEngineA::_RenderLine_DisplayCapture(const void *renderedLineSrcA, const
|
|||
default: // Capture source is SourceA+B blended
|
||||
{
|
||||
//INFO("Capture source is SourceA+B blended\n");
|
||||
this->VerifyVRAMLineDidChange(vramReadBlock, readLineIndexWithOffset);
|
||||
if (DISPCAPCNT.SrcB == 0)
|
||||
{
|
||||
const bool didVRAMLineChange = this->VerifyVRAMLineDidChange(vramReadBlock, readLineIndexWithOffset);
|
||||
if (didVRAMLineChange)
|
||||
{
|
||||
if (vramConfiguration.banks[vramReadBlock].purpose == VramConfiguration::LCDC)
|
||||
{
|
||||
u32 cap_src_adr = readLineIndexWithOffset * GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
cap_src_adr &= 0x0000FFFF;
|
||||
cap_src = this->_VRAMNativeBlockPtr[vramReadBlock] + cap_src_adr;
|
||||
}
|
||||
else
|
||||
{
|
||||
cap_src = (u16 *)MMU.blank_memory;
|
||||
}
|
||||
|
||||
srcB = cap_src;
|
||||
}
|
||||
}
|
||||
|
||||
if (DISPCAPCNT.SrcA == 0)
|
||||
{
|
||||
|
@ -5642,17 +5710,17 @@ u16 GPUEngineA::_RenderLine_DispCapture_BlendFunc(const u16 srcA, const u16 srcB
|
|||
if (a_alpha)
|
||||
{
|
||||
a = 0x8000;
|
||||
r = ((srcA & 0x1F) * blendEVA);
|
||||
g = (((srcA >> 5) & 0x1F) * blendEVA);
|
||||
b = (((srcA >> 10) & 0x1F) * blendEVA);
|
||||
r = ((srcA & 0x001F) * blendEVA);
|
||||
g = (((srcA >> 5) & 0x001F) * blendEVA);
|
||||
b = (((srcA >> 10) & 0x001F) * blendEVA);
|
||||
}
|
||||
|
||||
if (b_alpha)
|
||||
{
|
||||
a = 0x8000;
|
||||
r += ((srcB & 0x1F) * blendEVB);
|
||||
g += (((srcB >> 5) & 0x1F) * blendEVB);
|
||||
b += (((srcB >> 10) & 0x1F) * blendEVB);
|
||||
r += ((srcB & 0x001F) * blendEVB);
|
||||
g += (((srcB >> 5) & 0x001F) * blendEVB);
|
||||
b += (((srcB >> 10) & 0x001F) * blendEVB);
|
||||
}
|
||||
|
||||
r >>= 4;
|
||||
|
@ -5660,9 +5728,9 @@ u16 GPUEngineA::_RenderLine_DispCapture_BlendFunc(const u16 srcA, const u16 srcB
|
|||
b >>= 4;
|
||||
|
||||
//freedom wings sky will overflow while doing some fsaa/motionblur effect without this
|
||||
r = std::min((u16)31,r);
|
||||
g = std::min((u16)31,g);
|
||||
b = std::min((u16)31,b);
|
||||
r = std::min<u16>(0x001F, r);
|
||||
g = std::min<u16>(0x001F, g);
|
||||
b = std::min<u16>(0x001F, b);
|
||||
|
||||
return LOCAL_TO_LE_16(a | (b << 10) | (g << 5) | r);
|
||||
}
|
||||
|
@ -5729,7 +5797,6 @@ void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA,
|
|||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
const size_t ssePixCount = length - (length % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
|
@ -5754,6 +5821,7 @@ void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA,
|
|||
_mm_store_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; i < length; i++)
|
||||
{
|
||||
const u16 colorA = (!CAPTUREFROMNATIVESRCA) ? srcA[i] : srcA[offset + i];
|
||||
|
@ -5849,10 +5917,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++)
|
||||
{
|
||||
dst[i].r = material_5bit_to_6bit[(src[i] >> 0) & 0x001F];
|
||||
dst[i].g = material_5bit_to_6bit[(src[i] >> 5) & 0x001F];
|
||||
dst[i].b = material_5bit_to_6bit[(src[i] >> 10) & 0x001F];
|
||||
dst[i].a = 0;
|
||||
dst[i].color = COLOR555TO6665_OPAQUE(src[i] & 0x7FFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -5864,10 +5929,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++)
|
||||
{
|
||||
dst[i].r = material_5bit_to_8bit[(src[i] >> 0) & 0x001F];
|
||||
dst[i].g = material_5bit_to_8bit[(src[i] >> 5) & 0x001F];
|
||||
dst[i].b = material_5bit_to_8bit[(src[i] >> 10) & 0x001F];
|
||||
dst[i].a = 0;
|
||||
dst[i].color = COLOR555TO8888_OPAQUE(src[i] & 0x7FFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -5891,10 +5953,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
|
||||
for (size_t i = 0; i < customPixCount; i++)
|
||||
{
|
||||
dst[i].r = material_5bit_to_6bit[(src[i] >> 0) & 0x001F];
|
||||
dst[i].g = material_5bit_to_6bit[(src[i] >> 5) & 0x001F];
|
||||
dst[i].b = material_5bit_to_6bit[(src[i] >> 10) & 0x001F];
|
||||
dst[i].a = 0;
|
||||
dst[i].color = COLOR555TO6665_OPAQUE(src[i] & 0x7FFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -5906,10 +5965,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
|
||||
for (size_t i = 0; i < customPixCount; i++)
|
||||
{
|
||||
dst[i].r = material_5bit_to_8bit[(src[i] >> 0) & 0x001F];
|
||||
dst[i].g = material_5bit_to_8bit[(src[i] >> 5) & 0x001F];
|
||||
dst[i].b = material_5bit_to_8bit[(src[i] >> 10) & 0x001F];
|
||||
dst[i].a = 0;
|
||||
dst[i].color = COLOR555TO8888_OPAQUE(src[i] & 0x7FFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -5959,16 +6015,8 @@ void GPUEngineA::_HandleDisplayModeMainMemory(const size_t l)
|
|||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i+=2)
|
||||
{
|
||||
u32 src = DISP_FIFOrecv();
|
||||
|
||||
dst[i+0].r = material_5bit_to_6bit[(src >> 0) & 0x0000001F];
|
||||
dst[i+0].g = material_5bit_to_6bit[(src >> 5) & 0x0000001F];
|
||||
dst[i+0].b = material_5bit_to_6bit[(src >> 10) & 0x0000001F];
|
||||
dst[i+0].a = 0;
|
||||
|
||||
dst[i+1].r = material_5bit_to_6bit[(src >> 16) & 0x0000001F];
|
||||
dst[i+1].g = material_5bit_to_6bit[(src >> 21) & 0x0000001F];
|
||||
dst[i+1].b = material_5bit_to_6bit[(src >> 26) & 0x0000001F];
|
||||
dst[i+1].a = 0;
|
||||
dst[i+0].color = COLOR555TO6665_OPAQUE((src >> 0) & 0x7FFF);
|
||||
dst[i+1].color = COLOR555TO6665_OPAQUE((src >> 16) & 0x7FFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -5980,16 +6028,8 @@ void GPUEngineA::_HandleDisplayModeMainMemory(const size_t l)
|
|||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i+=2)
|
||||
{
|
||||
u32 src = DISP_FIFOrecv();
|
||||
|
||||
dst[i+0].r = material_5bit_to_8bit[(src >> 0) & 0x0000001F];
|
||||
dst[i+0].g = material_5bit_to_8bit[(src >> 5) & 0x0000001F];
|
||||
dst[i+0].b = material_5bit_to_8bit[(src >> 10) & 0x0000001F];
|
||||
dst[i+0].a = 0;
|
||||
|
||||
dst[i+1].r = material_5bit_to_8bit[(src >> 16) & 0x0000001F];
|
||||
dst[i+1].g = material_5bit_to_8bit[(src >> 21) & 0x0000001F];
|
||||
dst[i+1].b = material_5bit_to_8bit[(src >> 26) & 0x0000001F];
|
||||
dst[i+1].a = 0;
|
||||
dst[i+0].color = COLOR555TO8888_OPAQUE((src >> 0) & 0x7FFF);
|
||||
dst[i+1].color = COLOR555TO8888_OPAQUE((src >> 16) & 0x7FFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -6278,6 +6318,24 @@ void* GPUEngineB::_RenderLine_Layers(const u16 l)
|
|||
|
||||
GPUSubsystem::GPUSubsystem()
|
||||
{
|
||||
static bool needInitTables = true;
|
||||
|
||||
if (needInitTables)
|
||||
{
|
||||
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
|
||||
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
|
||||
|
||||
for (size_t i = 0; i < 32768; i++)
|
||||
{
|
||||
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
|
||||
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
|
||||
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
|
||||
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
|
||||
}
|
||||
|
||||
needInitTables = false;
|
||||
}
|
||||
|
||||
_defaultEventHandler = new GPUEventHandlerDefault;
|
||||
_event = _defaultEventHandler;
|
||||
|
||||
|
@ -6953,17 +7011,11 @@ void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551)
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
color32.r = material_5bit_to_6bit[(colorBGRA5551 & 0x001F)];
|
||||
color32.g = material_5bit_to_6bit[(colorBGRA5551 & 0x03E0) >> 5];
|
||||
color32.b = material_5bit_to_6bit[(colorBGRA5551 & 0x7C00) >> 10];
|
||||
color32.a = 0xFF;
|
||||
color32.color = COLOR555TO6665_OPAQUE(colorBGRA5551 & 0x7FFF);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
color32.r = material_5bit_to_8bit[(colorBGRA5551 & 0x001F)];
|
||||
color32.g = material_5bit_to_8bit[(colorBGRA5551 & 0x03E0) >> 5];
|
||||
color32.b = material_5bit_to_8bit[(colorBGRA5551 & 0x7C00) >> 10];
|
||||
color32.a = 0xFF;
|
||||
color32.color = COLOR555TO8888_OPAQUE(colorBGRA5551 & 0x7FFF);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -7026,6 +7078,82 @@ void NDSDisplay::SetEngineByID(const GPUEngineID theID)
|
|||
this->_gpu->SetDisplayByID(this->_ID);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffers8888To6665(const FragmentColor *src, FragmentColor *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To6665<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor8888To6665<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffers6665To8888(const FragmentColor *src, FragmentColor *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To8888<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor6665To8888<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffers8888To5551(const FragmentColor *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor8888To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffers6665To5551(const FragmentColor *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor6665To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template void GPUEngineBase::ParseReg_DISPCNT<GPUEngineID_Main>();
|
||||
template void GPUEngineBase::ParseReg_DISPCNT<GPUEngineID_Sub>();
|
||||
|
||||
|
@ -7061,3 +7189,15 @@ template void GPUEngineBase::RenderLayerBG<GPULayerID_BG0>(u16 *dstColorBuffer);
|
|||
template void GPUEngineBase::RenderLayerBG<GPULayerID_BG1>(u16 *dstColorBuffer);
|
||||
template void GPUEngineBase::RenderLayerBG<GPULayerID_BG2>(u16 *dstColorBuffer);
|
||||
template void GPUEngineBase::RenderLayerBG<GPULayerID_BG3>(u16 *dstColorBuffer);
|
||||
|
||||
template void ConvertColorBuffers8888To6665<true>(const FragmentColor *src, FragmentColor *dst, size_t pixCount);
|
||||
template void ConvertColorBuffers8888To6665<false>(const FragmentColor *src, FragmentColor *dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffers6665To8888<true>(const FragmentColor *src, FragmentColor *dst, size_t pixCount);
|
||||
template void ConvertColorBuffers6665To8888<false>(const FragmentColor *src, FragmentColor *dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffers8888To5551<true>(const FragmentColor *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffers8888To5551<false>(const FragmentColor *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffers6665To5551<true>(const FragmentColor *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffers6665To5551<false>(const FragmentColor *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
|
|
@ -1629,6 +1629,43 @@ public:
|
|||
extern GPUSubsystem *GPU;
|
||||
extern MMU_struct MMU;
|
||||
|
||||
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
|
||||
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
|
||||
|
||||
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
|
||||
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian
|
||||
#else
|
||||
#define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian
|
||||
#endif
|
||||
|
||||
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
|
||||
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian
|
||||
#else
|
||||
#define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian
|
||||
#endif
|
||||
|
||||
//produce a 15bpp color from individual 5bit components
|
||||
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
|
||||
|
||||
//produce a 16bpp color from individual 5bit components
|
||||
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
|
||||
|
||||
inline FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
|
||||
{
|
||||
FragmentColor ret;
|
||||
|
@ -1636,4 +1673,214 @@ inline FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const
|
|||
return ret;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE FragmentColor ConvertColor8888To6665(FragmentColor srcColor)
|
||||
{
|
||||
FragmentColor outColor;
|
||||
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
|
||||
outColor.g = srcColor.g >> 2;
|
||||
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
|
||||
outColor.a = srcColor.a >> 3;
|
||||
|
||||
return outColor;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE FragmentColor ConvertColor6665To8888(FragmentColor srcColor)
|
||||
{
|
||||
FragmentColor outColor;
|
||||
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
|
||||
outColor.g = material_6bit_to_8bit[srcColor.g];
|
||||
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
|
||||
outColor.a = material_5bit_to_8bit[srcColor.a];
|
||||
|
||||
return outColor;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ConvertColor8888To5551(FragmentColor srcColor)
|
||||
{
|
||||
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ConvertColor6665To5551(FragmentColor srcColor)
|
||||
{
|
||||
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE __m128i ConvertColor8888To6665(const __m128i src)
|
||||
{
|
||||
__m128i rgb;
|
||||
const __m128i a = _mm_and_si128( _mm_srli_epi32(src, 3), _mm_set1_epi32(0x1F000000) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
|
||||
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2) );
|
||||
#else
|
||||
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x003F0000)), 18), _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00003F00)), 2), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x0000003F)), 14)) );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
|
||||
}
|
||||
|
||||
return _mm_or_si128(rgb, a);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE __m128i ConvertColor6665To8888(const __m128i src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
__m128i rgb = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00FCFCFC)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00030303)) );
|
||||
const __m128i a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 3), _mm_set1_epi32(0xF8000000)), _mm_and_si128(_mm_srli_epi32(src, 2), _mm_set1_epi32(0x07000000)) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2) );
|
||||
#else
|
||||
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16)) );
|
||||
#endif
|
||||
}
|
||||
|
||||
return _mm_or_si128(rgb, a);
|
||||
}
|
||||
|
||||
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
|
||||
FORCEINLINE __m128i _ConvertColorBaseTo5551(const __m128i srcLo, const __m128i srcHi)
|
||||
{
|
||||
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
return srcLo;
|
||||
}
|
||||
|
||||
__m128i rgbLo;
|
||||
__m128i rgbHi;
|
||||
__m128i aLo;
|
||||
__m128i aHi;
|
||||
|
||||
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 17), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 17), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 1), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 1), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
}
|
||||
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 19), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 19), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 3), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 3), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
}
|
||||
|
||||
// Convert alpha from low bits
|
||||
aLo = _mm_and_si128(srcLo, _mm_set1_epi32(0xFF000000));
|
||||
aLo = _mm_cmpeq_epi32(aLo, _mm_setzero_si128());
|
||||
|
||||
// Convert alpha from high bits
|
||||
aHi = _mm_and_si128(srcHi, _mm_set1_epi32(0xFF000000));
|
||||
aHi = _mm_cmpeq_epi32(aHi, _mm_setzero_si128());
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
aLo = _mm_andnot_si128(aLo, _mm_set1_epi32(0x00008000));
|
||||
aHi = _mm_andnot_si128(aHi, _mm_set1_epi32(0x00008000));
|
||||
|
||||
return _mm_shuffle_epi8( _mm_or_si128(_mm_or_si128(rgbLo, aLo), _mm_slli_epi32(_mm_or_si128(rgbHi, aHi), 16)), _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0) );
|
||||
#else
|
||||
rgbLo = _mm_packs_epi32(rgbLo, _mm_setzero_si128());
|
||||
rgbHi = _mm_packs_epi32(rgbHi, _mm_setzero_si128());
|
||||
|
||||
// From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned
|
||||
// 16-bit. Since SSE2 only has packssdw (signed saturated 16-bit pack), using
|
||||
// packssdw on the alpha bit (0x8000) will result in a value of 0x7FFF, which is
|
||||
// incorrect. Now if we were to use SSE4.1's packusdw (unsigned saturated 16-bit
|
||||
// pack), we wouldn't have to go through this hassle. But not everyone has an
|
||||
// SSE4.1-capable CPU, so doing this the SSE2 way is more guaranteed to work for
|
||||
// everyone's CPU.
|
||||
//
|
||||
// To use packssdw, we take a bit one position lower for the alpha bit, run
|
||||
// packssdw, then shift the bit back to its original position. Then we por the
|
||||
// alpha vector with the post-packed color vector to get the final color.
|
||||
|
||||
aLo = _mm_andnot_si128(aLo, _mm_set1_epi32(0x00004000)); // Mask out the bit before A
|
||||
aLo = _mm_packs_epi32(aLo, _mm_setzero_si128()); // Pack 32-bit down to 16-bit
|
||||
aLo = _mm_slli_epi16(aLo, 1); // Shift the A bit back to where it needs to be
|
||||
|
||||
aHi = _mm_andnot_si128(aHi, _mm_set1_epi32(0x00004000));
|
||||
aHi = _mm_packs_epi32(aHi, _mm_setzero_si128());
|
||||
aHi = _mm_slli_epi16(aHi, 1);
|
||||
|
||||
return _mm_or_si128( _mm_or_si128(rgbLo, aLo), _mm_slli_epi32(_mm_or_si128(rgbHi, aHi), 16) );
|
||||
#endif
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE __m128i ConvertColor8888To5551(const __m128i srcLo, const __m128i srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE __m128i ConvertColor6665To5551(const __m128i srcLo, const __m128i srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<bool SWAP_RB> void ConvertColorBuffers8888To6665(const FragmentColor *src, FragmentColor *dst, size_t pixCount);
|
||||
template<bool SWAP_RB> void ConvertColorBuffers6665To8888(const FragmentColor *src, FragmentColor *dst, size_t pixCount);
|
||||
template<bool SWAP_RB> void ConvertColorBuffers8888To5551(const FragmentColor *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB> void ConvertColorBuffers6665To5551(const FragmentColor *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -34,10 +34,6 @@
|
|||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int major;
|
||||
|
@ -49,7 +45,7 @@ static OGLVersion _OGLDriverVersion = {0, 0, 0};
|
|||
|
||||
// Lookup Tables
|
||||
static CACHE_ALIGN GLfloat material_8bit_to_float[256] = {0};
|
||||
CACHE_ALIGN const GLfloat divide5bitBy31_LUT[32] = {0.0, 0.03225806451613, 0.06451612903226, 0.09677419354839,
|
||||
CACHE_ALIGN const GLfloat divide5bitBy31_LUT[32] = {0.0, 0.0322580645161, 0.0645161290323, 0.0967741935484,
|
||||
0.1290322580645, 0.1612903225806, 0.1935483870968, 0.2258064516129,
|
||||
0.2580645161290, 0.2903225806452, 0.3225806451613, 0.3548387096774,
|
||||
0.3870967741935, 0.4193548387097, 0.4516129032258, 0.4838709677419,
|
||||
|
@ -58,6 +54,24 @@ CACHE_ALIGN const GLfloat divide5bitBy31_LUT[32] = {0.0, 0.03225806451613, 0.064
|
|||
0.7741935483871, 0.8064516129032, 0.8387096774194, 0.8709677419355,
|
||||
0.9032258064516, 0.9354838709677, 0.9677419354839, 1.0};
|
||||
|
||||
|
||||
CACHE_ALIGN const GLfloat divide6bitBy63_LUT[64] = {0.0, 0.0158730158730, 0.0317460317460, 0.0476190476191,
|
||||
0.0634920634921, 0.0793650793651, 0.0952380952381, 0.1111111111111,
|
||||
0.1269841269841, 0.1428571428571, 0.1587301587302, 0.1746031746032,
|
||||
0.1904761904762, 0.2063492063492, 0.2222222222222, 0.2380952380952,
|
||||
0.2539682539683, 0.2698412698413, 0.2857142857143, 0.3015873015873,
|
||||
0.3174603174603, 0.3333333333333, 0.3492063492064, 0.3650793650794,
|
||||
0.3809523809524, 0.3968253968254, 0.4126984126984, 0.4285714285714,
|
||||
0.4444444444444, 0.4603174603175, 0.4761904761905, 0.4920634920635,
|
||||
0.5079365079365, 0.5238095238095, 0.5396825396825, 0.5555555555556,
|
||||
0.5714285714286, 0.5873015873016, 0.6031746031746, 0.6190476190476,
|
||||
0.6349206349206, 0.6507936507937, 0.6666666666667, 0.6825396825397,
|
||||
0.6984126984127, 0.7142857142857, 0.7301587301587, 0.7460317460318,
|
||||
0.7619047619048, 0.7777777777778, 0.7936507936508, 0.8095238095238,
|
||||
0.8253968253968, 0.8412698412698, 0.8571428571429, 0.8730158730159,
|
||||
0.8888888888889, 0.9047619047619, 0.9206349206349, 0.9365079365079,
|
||||
0.9523809523810, 0.9682539682540, 0.9841269841270, 1.0};
|
||||
|
||||
const GLfloat PostprocessVtxBuffer[16] = {-1.0f, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
|
||||
const GLubyte PostprocessElementBuffer[6] = {0, 1, 2, 2, 3, 0};
|
||||
|
@ -355,7 +369,7 @@ static const char *fragmentShader_100 = {"\
|
|||
gl_FragData[0] = newFragColor;\n\
|
||||
gl_FragData[1] = vec4( packVec3FromFloat(newFragDepth), float(polyEnableDepthWrite && (newFragColor.a > 0.999 || polySetNewDepthForTranslucent)));\n\
|
||||
gl_FragData[2] = vec4(float(polyID)/63.0, 0.0, 0.0, float(newFragColor.a > 0.999));\n\
|
||||
gl_FragData[3] = vec4( float(polyEnableFog), 0.0, 0.0, float(newFragColor.a > 0.999 || !polyEnableFog));\n\
|
||||
gl_FragData[3] = vec4(float(polyEnableFog), 0.0, 0.0, float((newFragColor.a > 0.999) ? 1.0 : 0.5));\n\
|
||||
gl_FragDepth = newFragDepth;\n\
|
||||
} \n\
|
||||
"};
|
||||
|
@ -462,7 +476,7 @@ static const char *FogFragShader_100 = {"\
|
|||
{\n\
|
||||
vec4 inFragColor = texture2D(texInFragColor, texCoord);\n\
|
||||
vec4 inFogAttributes = texture2D(texInFogAttributes, texCoord);\n\
|
||||
bool polyEnableFog = bool(inFogAttributes.r);\n\
|
||||
bool polyEnableFog = (inFogAttributes.r > 0.999);\n\
|
||||
vec4 newFoggedColor = inFragColor;\n\
|
||||
\n\
|
||||
if (polyEnableFog)\n\
|
||||
|
@ -543,98 +557,6 @@ static const char *FramebufferOutputRGBA8888FragShader_100 = {"\
|
|||
}\n\
|
||||
"};
|
||||
|
||||
FORCEINLINE u32 BGRA8888_32_To_RGBA6665_32(const u32 srcPix)
|
||||
{
|
||||
const u32 dstPix = (srcPix >> 2);
|
||||
|
||||
return (dstPix & 0x00003F00) << 16 | // R
|
||||
(dstPix & 0x003F0000) | // G
|
||||
(dstPix & 0x3F000000) >> 16 | // B
|
||||
((dstPix >> 1) & 0x0000001F); // A
|
||||
}
|
||||
|
||||
FORCEINLINE u32 BGRA8888_32Rev_To_RGBA6665_32Rev(const u32 srcPix)
|
||||
{
|
||||
const u32 dstPix = (srcPix >> 2);
|
||||
|
||||
return (dstPix & 0x003F0000) >> 16 | // R
|
||||
(dstPix & 0x00003F00) | // G
|
||||
(dstPix & 0x0000003F) << 16 | // B
|
||||
((dstPix >> 1) & 0x1F000000); // A
|
||||
}
|
||||
|
||||
FORCEINLINE FragmentColor BGRA8888_32_To_RGBA6665_32(const FragmentColor src)
|
||||
{
|
||||
FragmentColor dst = src;
|
||||
dst.r = src.b >> 2;
|
||||
dst.g = src.g >> 2;
|
||||
dst.b = src.r >> 2;
|
||||
dst.a = src.a >> 3;
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
FORCEINLINE FragmentColor BGRA8888_32Rev_To_RGBA6665_32Rev(const FragmentColor src)
|
||||
{
|
||||
FragmentColor dst = src;
|
||||
dst.r = src.b >> 2;
|
||||
dst.g = src.g >> 2;
|
||||
dst.b = src.r >> 2;
|
||||
dst.a = src.a >> 3;
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
FORCEINLINE u16 BGRA8888_32_To_RGBA5551_16(const FragmentColor src)
|
||||
{
|
||||
return R5G5B5TORGB15( (src.b >> 3),
|
||||
(src.g >> 3),
|
||||
(src.r >> 3)) |
|
||||
((src.a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
|
||||
FORCEINLINE u16 BGRA8888_32Rev_To_RGBA5551_16Rev(const FragmentColor src)
|
||||
{
|
||||
return R5G5B5TORGB15( (src.b >> 3),
|
||||
(src.g >> 3),
|
||||
(src.r >> 3)) |
|
||||
((src.a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
|
||||
FORCEINLINE __m128i BGRA8888_32Rev_To_RGBA6665_32Rev(const __m128i src)
|
||||
{
|
||||
const __m128i rgb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FCFCFC)), 2);
|
||||
const __m128i a = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0xF8000000)), 3);
|
||||
|
||||
return _mm_shuffle_epi8(_mm_or_si128(rgb, a), _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA
|
||||
}
|
||||
|
||||
FORCEINLINE __m128i BGRA8888_32Rev_To_RGBA5551_16Rev(const __m128i src)
|
||||
{
|
||||
__m128i b = _mm_and_si128(src, _mm_set1_epi32(0x000000F8)); // Read from R
|
||||
b = _mm_slli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i g = _mm_and_si128(src, _mm_set1_epi32(0x0000F800)); // Read from G
|
||||
g = _mm_srli_epi32(g, 6); // Shift in G
|
||||
|
||||
__m128i r = _mm_and_si128(src, _mm_set1_epi32(0x00F80000)); // Read from B
|
||||
r = _mm_srli_epi32(r, 19); // Shift to R
|
||||
|
||||
__m128i a = _mm_and_si128(src, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A
|
||||
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A
|
||||
|
||||
// All the colors are currently placed on 32 bit boundaries, so we need to swizzle them
|
||||
// to the lower 64 bits of our vector before we store them back to memory.
|
||||
// Note: Do not attempt to use packssdw here since packing with the 0x8000 bit set will
|
||||
// result in values of 0x7FFF, which are incorrect values in this case.
|
||||
return _mm_shuffle_epi8(_mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a), _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool IsVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision)
|
||||
{
|
||||
bool result = false;
|
||||
|
@ -1052,99 +974,62 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
// to the DS Y-coordinate.
|
||||
|
||||
size_t i = 0;
|
||||
const size_t pixCount = this->_framebufferWidth;
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
#endif
|
||||
|
||||
if (this->willFlipFramebufferOnGPU)
|
||||
{
|
||||
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
||||
|
||||
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
if ( (dstFramebuffer != NULL) && (dstRGBA5551 != NULL) )
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; i < ssePixCount; i += 4)
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + i));
|
||||
const __m128i color6665 = BGRA8888_32Rev_To_RGBA6665_32Rev(srcColor);
|
||||
const __m128i color5551 = BGRA8888_32Rev_To_RGBA5551_16Rev(srcColor);
|
||||
_mm_store_si128((__m128i *)(dstFramebuffer + i), color6665);
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color5551);
|
||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
|
||||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ConvertColor8888To6665<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ConvertColor8888To6665<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstFramebuffer[i] = BGRA8888_32_To_RGBA6665_32(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = BGRA8888_32_To_RGBA5551_16(srcFramebuffer[i]);
|
||||
#else
|
||||
dstFramebuffer[i] = BGRA8888_32Rev_To_RGBA6665_32Rev(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = BGRA8888_32Rev_To_RGBA5551_16Rev(srcFramebuffer[i]);
|
||||
#endif
|
||||
dstFramebuffer[i] = ConvertColor8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
|
||||
}
|
||||
}
|
||||
else if (dstFramebuffer != NULL)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + i));
|
||||
const __m128i color6665 = BGRA8888_32Rev_To_RGBA6665_32Rev(srcColor);
|
||||
_mm_store_si128((__m128i *)(dstFramebuffer + i), color6665);
|
||||
}
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstFramebuffer[i] = BGRA8888_32_To_RGBA6665_32(srcFramebuffer[i]);
|
||||
#else
|
||||
dstFramebuffer[i] = BGRA8888_32Rev_To_RGBA6665_32Rev(srcFramebuffer[i]);
|
||||
#endif
|
||||
}
|
||||
ConvertColorBuffers8888To6665<true>(srcFramebuffer, dstFramebuffer, pixCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + i));
|
||||
const __m128i color5551 = BGRA8888_32Rev_To_RGBA5551_16Rev(srcColor);
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color5551);
|
||||
}
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstRGBA5551[i] = BGRA8888_32_To_RGBA5551_16(srcFramebuffer[i]);
|
||||
#else
|
||||
dstRGBA5551[i] = BGRA8888_32Rev_To_RGBA5551_16Rev(srcFramebuffer[i]);
|
||||
#endif
|
||||
}
|
||||
ConvertColorBuffers8888To5551<true>(srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
if ( (dstFramebuffer != NULL) && (dstRGBA5551 != NULL) )
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; i < ssePixCount; i += 4)
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + i));
|
||||
const __m128i color5551 = BGRA8888_32Rev_To_RGBA5551_16Rev(srcColor);
|
||||
_mm_store_si128((__m128i *)(dstFramebuffer + i), srcColor);
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color5551);
|
||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
|
||||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), srcColorLo );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), srcColorHi );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstFramebuffer[i] = srcFramebuffer[i];
|
||||
#ifdef LOCAL_BE
|
||||
dstRGBA5551[i] = BGRA8888_32_To_RGBA5551_16(srcFramebuffer[i]);
|
||||
#else
|
||||
dstRGBA5551[i] = BGRA8888_32Rev_To_RGBA5551_16Rev(srcFramebuffer[i]);
|
||||
#endif
|
||||
dstFramebuffer[i] = ConvertColor8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
|
||||
}
|
||||
}
|
||||
else if (dstFramebuffer != NULL)
|
||||
|
@ -1153,27 +1038,14 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
}
|
||||
else
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + i));
|
||||
const __m128i color5551 = BGRA8888_32Rev_To_RGBA5551_16Rev(srcColor);
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color5551);
|
||||
}
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstRGBA5551[i] = BGRA8888_32_To_RGBA5551_16(srcFramebuffer[i]);
|
||||
#else
|
||||
dstRGBA5551[i] = BGRA8888_32Rev_To_RGBA5551_16Rev(srcFramebuffer[i]);
|
||||
#endif
|
||||
}
|
||||
ConvertColorBuffers8888To5551<true>(srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
else // In the case where OpenGL couldn't flip the framebuffer on the GPU, we'll instead need to flip the framebuffer during conversion.
|
||||
{
|
||||
const size_t pixCount = this->_framebufferWidth;
|
||||
|
||||
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
if ( (dstFramebuffer != NULL) && (dstRGBA5551 != NULL) )
|
||||
|
@ -1181,25 +1053,22 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
size_t x = 0;
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; x < ssePixCount; x += 8, ir += 8, iw += 8)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + ir));
|
||||
const __m128i color6665 = BGRA8888_32Rev_To_RGBA6665_32Rev(srcColor);
|
||||
const __m128i color5551 = BGRA8888_32Rev_To_RGBA5551_16Rev(srcColor);
|
||||
_mm_store_si128((__m128i *)(dstFramebuffer + iw), color6665);
|
||||
_mm_storel_epi64((__m128i *)(dstFramebuffer + iw), color5551);
|
||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
|
||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
|
||||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ConvertColor8888To6665<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ConvertColor8888To6665<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstFramebuffer[iw] = BGRA8888_32_To_RGBA6665_32(srcFramebuffer[ir]);
|
||||
dstRGBA5551[iw] = BGRA8888_32_To_RGBA5551_16(srcFramebuffer[ir]);
|
||||
#else
|
||||
dstFramebuffer[iw] = BGRA8888_32Rev_To_RGBA6665_32Rev(srcFramebuffer[ir]);
|
||||
dstRGBA5551[iw] = BGRA8888_32Rev_To_RGBA5551_16Rev(srcFramebuffer[ir]);
|
||||
#endif
|
||||
dstFramebuffer[iw] = ConvertColor8888To6665<true>(srcFramebuffer[ir]);
|
||||
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1207,46 +1076,14 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
size_t x = 0;
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + ir));
|
||||
const __m128i color6665 = BGRA8888_32Rev_To_RGBA6665_32Rev(srcColor);
|
||||
_mm_store_si128((__m128i *)(dstFramebuffer + iw), color6665);
|
||||
}
|
||||
#endif
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstFramebuffer[iw] = BGRA8888_32_To_RGBA6665_32(srcFramebuffer[ir]);
|
||||
#else
|
||||
dstFramebuffer[iw] = BGRA8888_32Rev_To_RGBA6665_32Rev(srcFramebuffer[ir]);
|
||||
#endif
|
||||
}
|
||||
ConvertColorBuffers8888To6665<true>(srcFramebuffer + ir, dstFramebuffer + iw, pixCount);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
size_t x = 0;
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + ir));
|
||||
const __m128i color5551 = BGRA8888_32Rev_To_RGBA5551_16Rev(srcColor);
|
||||
_mm_storel_epi64((__m128i *)(dstFramebuffer + iw), color5551);
|
||||
}
|
||||
#endif
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstRGBA5551[iw] = BGRA8888_32_To_RGBA5551_16(srcFramebuffer[ir]);
|
||||
#else
|
||||
dstRGBA5551[iw] = BGRA8888_32Rev_To_RGBA5551_16Rev(srcFramebuffer[ir]);
|
||||
#endif
|
||||
}
|
||||
ConvertColorBuffers8888To5551<true>(srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1257,23 +1094,22 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
size_t x = 0;
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; x < ssePixCount; x += 8, ir += 8, iw += 8)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + ir));
|
||||
const __m128i color5551 = BGRA8888_32Rev_To_RGBA5551_16Rev(srcColor);
|
||||
_mm_store_si128((__m128i *)(dstFramebuffer + iw), srcColor);
|
||||
_mm_storel_epi64((__m128i *)(dstFramebuffer + iw), color5551);
|
||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
|
||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
|
||||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), srcColorLo );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), srcColorHi );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
dstFramebuffer[iw] = srcFramebuffer[ir];
|
||||
#ifdef LOCAL_BE
|
||||
dstRGBA5551[iw] = BGRA8888_32_To_RGBA5551_16(srcFramebuffer[ir]);
|
||||
#else
|
||||
dstRGBA5551[iw] = BGRA8888_32Rev_To_RGBA5551_16Rev(srcFramebuffer[ir]);
|
||||
#endif
|
||||
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1294,23 +1130,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
size_t x = 0;
|
||||
#ifdef ENABLE_SSSE3
|
||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||
{
|
||||
const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + ir));
|
||||
const __m128i color5551 = BGRA8888_32Rev_To_RGBA5551_16Rev(srcColor);
|
||||
_mm_storel_epi64((__m128i *)(dstFramebuffer + iw), color5551);
|
||||
}
|
||||
#endif
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstRGBA5551[iw] = BGRA8888_32_To_RGBA5551_16(srcFramebuffer[ir]);
|
||||
#else
|
||||
dstRGBA5551[iw] = BGRA8888_32Rev_To_RGBA5551_16Rev(srcFramebuffer[ir]);
|
||||
#endif
|
||||
}
|
||||
ConvertColorBuffers8888To5551<true>(srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1323,11 +1143,7 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s
|
|||
{
|
||||
if (this->willConvertFramebufferOnGPU)
|
||||
{
|
||||
#ifdef ENABLE_SSE2
|
||||
return Render3D_SSE2::FlushFramebuffer(srcFramebuffer, NULL, dstRGBA5551);
|
||||
#else
|
||||
return Render3D::FlushFramebuffer(srcFramebuffer, NULL, dstRGBA5551);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2963,7 +2779,7 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf
|
|||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError OpenGLRenderer_1_2::ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const
|
||||
Render3DError OpenGLRenderer_1_2::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const
|
||||
{
|
||||
OGLRenderRef &OGLRef = *this->ref;
|
||||
|
||||
|
@ -2978,7 +2794,7 @@ Render3DError OpenGLRenderer_1_2::ClearUsingValues(const FragmentColor &clearCol
|
|||
if (this->isShaderSupported && this->isFBOSupported)
|
||||
{
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); // texGColorID
|
||||
glClearColor(divide5bitBy31_LUT[clearColor.r], divide5bitBy31_LUT[clearColor.g], divide5bitBy31_LUT[clearColor.b], divide5bitBy31_LUT[clearColor.a]);
|
||||
glClearColor(divide6bitBy63_LUT[clearColor6665.r], divide6bitBy63_LUT[clearColor6665.g], divide6bitBy63_LUT[clearColor6665.b], divide5bitBy31_LUT[clearColor6665.a]);
|
||||
glClearDepth((GLclampd)clearAttributes.depth / (GLclampd)0x00FFFFFF);
|
||||
glClearStencil(0xFF);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
|
||||
|
@ -2999,7 +2815,7 @@ Render3DError OpenGLRenderer_1_2::ClearUsingValues(const FragmentColor &clearCol
|
|||
}
|
||||
else
|
||||
{
|
||||
glClearColor(divide5bitBy31_LUT[clearColor.r], divide5bitBy31_LUT[clearColor.g], divide5bitBy31_LUT[clearColor.b], divide5bitBy31_LUT[clearColor.a]);
|
||||
glClearColor(divide6bitBy63_LUT[clearColor6665.r], divide6bitBy63_LUT[clearColor6665.g], divide6bitBy63_LUT[clearColor6665.b], divide5bitBy31_LUT[clearColor6665.a]);
|
||||
glClearDepth((GLclampd)clearAttributes.depth / (GLclampd)0x00FFFFFF);
|
||||
glClearStencil(clearAttributes.opaquePolyID);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
|
||||
|
|
|
@ -522,6 +522,7 @@ extern GPU3DInterface gpu3Dgl_3_2;
|
|||
|
||||
extern const GLenum RenderDrawList[4];
|
||||
extern CACHE_ALIGN const GLfloat divide5bitBy31_LUT[32];
|
||||
extern CACHE_ALIGN const GLfloat divide6bitBy63_LUT[64];
|
||||
extern const GLfloat PostprocessVtxBuffer[16];
|
||||
extern const GLubyte PostprocessElementBuffer[6];
|
||||
|
||||
|
@ -560,9 +561,7 @@ FORCEINLINE u32 BGRA8888_32_To_RGBA6665_32(const u32 srcPix);
|
|||
FORCEINLINE u32 BGRA8888_32Rev_To_RGBA6665_32Rev(const u32 srcPix);
|
||||
bool IsVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision);
|
||||
|
||||
#if defined(ENABLE_SSSE3)
|
||||
class OpenGLRenderer : public Render3D_SSSE3
|
||||
#elif defined(ENABLE_SSE2)
|
||||
#if defined(ENABLE_SSE2)
|
||||
class OpenGLRenderer : public Render3D_SSE2
|
||||
#else
|
||||
class OpenGLRenderer : public Render3D
|
||||
|
@ -719,7 +718,7 @@ protected:
|
|||
virtual Render3DError EndRender(const u64 frameCount);
|
||||
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const;
|
||||
|
||||
virtual void SetPolygonIndex(const size_t index);
|
||||
virtual Render3DError SetupPolygon(const POLY &thePoly);
|
||||
|
|
|
@ -251,7 +251,7 @@ static const char *GeometryFragShader_150 = {"\
|
|||
outFragColor = newFragColor;\n\
|
||||
outFragDepth = vec4( packVec3FromFloat(newFragDepth), float(bool(polyEnableDepthWrite) && (newFragColor.a > 0.999 || bool(polySetNewDepthForTranslucent))));\n\
|
||||
outPolyID = vec4(float(polyID)/63.0, 0.0, 0.0, float(newFragColor.a > 0.999));\n\
|
||||
outFogAttributes = vec4( float(polyEnableFog), 0.0, 0.0, float(newFragColor.a > 0.999 || !bool(polyEnableFog)));\n\
|
||||
outFogAttributes = vec4(float(polyEnableFog), 0.0, 0.0, float((newFragColor.a > 0.999) ? 1.0 : 0.5));\n\
|
||||
gl_FragDepth = newFragDepth;\n\
|
||||
} \n\
|
||||
"};
|
||||
|
@ -420,7 +420,7 @@ static const char *FogFragShader_150 = {"\
|
|||
{\n\
|
||||
vec4 inFragColor = texture(texInFragColor, texCoord);\n\
|
||||
vec4 inFogAttributes = texture(texInFogAttributes, texCoord);\n\
|
||||
bool polyEnableFog = bool(inFogAttributes.r);\n\
|
||||
bool polyEnableFog = (inFogAttributes.r > 0.999);\n\
|
||||
vec4 newFoggedColor = inFragColor;\n\
|
||||
\n\
|
||||
if (polyEnableFog)\n\
|
||||
|
@ -1585,7 +1585,7 @@ Render3DError OpenGLRenderer_3_2::ClearUsingImage(const u16 *__restrict colorBuf
|
|||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError OpenGLRenderer_3_2::ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const
|
||||
Render3DError OpenGLRenderer_3_2::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const
|
||||
{
|
||||
OGLRenderRef &OGLRef = *this->ref;
|
||||
OGLRef.selectedRenderingFBO = (CommonSettings.GFX3D_Renderer_Multisample) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID;
|
||||
|
@ -1593,10 +1593,10 @@ Render3DError OpenGLRenderer_3_2::ClearUsingValues(const FragmentColor &clearCol
|
|||
glDrawBuffers(4, RenderDrawList);
|
||||
glDepthMask(GL_TRUE);
|
||||
|
||||
const GLfloat oglColor[4] = {divide5bitBy31_LUT[clearColor.r], divide5bitBy31_LUT[clearColor.g], divide5bitBy31_LUT[clearColor.b], divide5bitBy31_LUT[clearColor.a]};
|
||||
const GLfloat oglColor[4] = {divide6bitBy63_LUT[clearColor6665.r], divide6bitBy63_LUT[clearColor6665.g], divide6bitBy63_LUT[clearColor6665.b], divide5bitBy31_LUT[clearColor6665.a]};
|
||||
const GLfloat oglDepth[4] = {(GLfloat)(clearAttributes.depth & 0x000000FF)/255.0f, (GLfloat)((clearAttributes.depth >> 8) & 0x000000FF)/255.0f, (GLfloat)((clearAttributes.depth >> 16) & 0x000000FF)/255.0f, 1.0};
|
||||
const GLfloat oglPolyID[4] = {(GLfloat)clearAttributes.opaquePolyID/63.0f, 0.0, 0.0, 1.0};
|
||||
const GLfloat oglFogAttr[4] = {(GLfloat)clearAttributes.isFogged, 0.0, 0.0, 1.0};
|
||||
const GLfloat oglPolyID[4] = {(GLfloat)clearAttributes.opaquePolyID/63.0f, 0.0f, 0.0f, 1.0f};
|
||||
const GLfloat oglFogAttr[4] = {(GLfloat)clearAttributes.isFogged, 0.0f, 0.0f, 1.0f};
|
||||
|
||||
glClearBufferfi(GL_DEPTH_STENCIL, 0, (GLfloat)clearAttributes.depth / (GLfloat)0x00FFFFFF, 0xFF);
|
||||
glClearBufferfv(GL_COLOR, 0, oglColor); // texGColorID
|
||||
|
|
|
@ -94,7 +94,7 @@ protected:
|
|||
virtual Render3DError DestroyToonTable();
|
||||
virtual Render3DError UpdateToonTable(const u16 *toonTableBuffer);
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const;
|
||||
|
||||
virtual void SetPolygonIndex(const size_t index);
|
||||
virtual Render3DError SetupPolygon(const POLY &thePoly);
|
||||
|
|
|
@ -266,63 +266,9 @@ Viewer3d_State* viewer3d_state = NULL;
|
|||
static GFX3D_Clipper boxtestClipper;
|
||||
|
||||
//tables that are provided to anyone
|
||||
CACHE_ALIGN u32 color_15bit_to_24bit_reverse[32768];
|
||||
CACHE_ALIGN u32 color_15bit_to_24bit[32768];
|
||||
CACHE_ALIGN u16 color_15bit_to_16bit_reverse[32768];
|
||||
CACHE_ALIGN u8 mixTable555[32][32][32];
|
||||
CACHE_ALIGN u32 dsDepthExtend_15bit_to_24bit[32768];
|
||||
|
||||
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
|
||||
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
|
||||
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
|
||||
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
|
||||
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
|
||||
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
|
||||
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
|
||||
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
|
||||
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
|
||||
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
|
||||
0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
|
||||
0x10, 0x12, 0x14, 0x16, 0x19, 0x1A, 0x1C, 0x1E,
|
||||
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
|
||||
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
|
||||
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
|
||||
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
|
||||
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
|
||||
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
|
||||
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
|
||||
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
|
||||
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
|
||||
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
|
||||
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
|
||||
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
|
||||
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
|
||||
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
|
||||
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
|
||||
};
|
||||
|
||||
//maybe not very precise
|
||||
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
|
||||
0, 4, 8, 13, 17, 22, 26, 31
|
||||
};
|
||||
|
||||
//TODO - generate this in the static init method more accurately
|
||||
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
|
||||
0, 8, 16, 26, 34, 44, 52, 63
|
||||
};
|
||||
|
||||
//private acceleration tables
|
||||
static float float16table[65536];
|
||||
static float float10Table[1024];
|
||||
|
@ -451,21 +397,11 @@ static BOOL flushPending = FALSE;
|
|||
static BOOL drawPending = FALSE;
|
||||
//------------------------------------------------------------
|
||||
|
||||
static void makeTables() {
|
||||
|
||||
//produce the color bits of a 24bpp color from a DS RGB15 using bit logic (internal use only)
|
||||
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
|
||||
|
||||
//produce the color bits of a 24bpp color from a DS RGB15 using bit logic (internal use only). RGB are reverse of usual
|
||||
#define RGB15TO24_BITLOGIC_REVERSE(col) ( (material_5bit_to_8bit[(col)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[((col)>>10)&0x1F] )
|
||||
|
||||
static void makeTables()
|
||||
{
|
||||
for (size_t i = 0; i < 32768; i++)
|
||||
{
|
||||
color_15bit_to_24bit[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
|
||||
color_15bit_to_24bit_reverse[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC_REVERSE(i) );
|
||||
color_15bit_to_16bit_reverse[i] = (((i & 0x001F) << 11) | (material_5bit_to_6bit[(i & 0x03E0) >> 5] << 5) | ((i & 0x7C00) >> 10));
|
||||
|
||||
// 15-bit to 24-bit depth formula from http://nocash.emubase.de/gbatek.htm#ds3drearplane
|
||||
// 15-bit to 24-bit depth formula from http://problemkaputt.de/gbatek.htm#ds3drearplane
|
||||
dsDepthExtend_15bit_to_24bit[i] = LE_TO_LOCAL_32( (i*0x200)+((i+1)>>15)*0x01FF );
|
||||
}
|
||||
|
||||
|
@ -771,9 +707,9 @@ static void SetVertex()
|
|||
vert.coord[1] = coordTransformed[1]/4096.0f;
|
||||
vert.coord[2] = coordTransformed[2]/4096.0f;
|
||||
vert.coord[3] = coordTransformed[3]/4096.0f;
|
||||
vert.color[0] = GFX3D_5TO6(colorRGB[0]);
|
||||
vert.color[1] = GFX3D_5TO6(colorRGB[1]);
|
||||
vert.color[2] = GFX3D_5TO6(colorRGB[2]);
|
||||
vert.color[0] = GFX3D_5TO6_LOOKUP(colorRGB[0]);
|
||||
vert.color[1] = GFX3D_5TO6_LOOKUP(colorRGB[1]);
|
||||
vert.color[2] = GFX3D_5TO6_LOOKUP(colorRGB[2]);
|
||||
vert.color_to_float();
|
||||
tempVertInfo.map[tempVertInfo.count] = vertlist->count + tempVertInfo.count - continuation;
|
||||
tempVertInfo.count++;
|
||||
|
|
|
@ -69,52 +69,8 @@ class EMUFILE;
|
|||
#define GFX3D_VEC_TEST 0x72
|
||||
#define GFX3D_NOP_NOARG_HACK 0xDD
|
||||
|
||||
//produce a 32bpp color from a ds RGB15, using a table
|
||||
#define RGB15TO32_NOALPHA(col) ( color_15bit_to_24bit[col&0x7FFF] )
|
||||
|
||||
//produce a 32bpp color from a ds RGB15 plus an 8bit alpha, using a table
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#define RGB15TO32(col,alpha8) ( (alpha8) | color_15bit_to_24bit[(col)&0x7FFF] )
|
||||
#else
|
||||
#define RGB15TO32(col,alpha8) ( ((alpha8)<<24) | color_15bit_to_24bit[(col)&0x7FFF] )
|
||||
#endif
|
||||
|
||||
//produce a 5555 32bit color from a ds RGB15 plus an 5bit alpha
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#define RGB15TO5555(col,alpha5) ( (alpha5) | ((((col) & 0x7C00)>>10)<<8) | ((((col) & 0x03E0)>>5)<<16) | (((col) & 0x001F)<<24) )
|
||||
#else
|
||||
#define RGB15TO5555(col,alpha5) ( ((alpha5)<<24) | ((((col) & 0x7C00)>>10)<<16) | ((((col) & 0x03E0)>>5)<<8) | ((col) & 0x001F) )
|
||||
#endif
|
||||
|
||||
//produce a 6665 32bit color from a ds RGB15 plus an 5bit alpha
|
||||
inline u32 RGB15TO6665(u16 col, u8 alpha5)
|
||||
{
|
||||
const u16 r = (col&0x001F)>>0;
|
||||
const u16 g = (col&0x03E0)>>5;
|
||||
const u16 b = (col&0x7C00)>>10;
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
const u32 ret = alpha5 | (((b<<1)+1)<<8) | (((g<<1)+1)<<16) | (((r<<1)+1)<<24);
|
||||
#else
|
||||
const u32 ret = (alpha5<<24) | (((b<<1)+1)<<16) | (((g<<1)+1)<<8) | ((r<<1)+1);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//produce a 24bpp color from a ds RGB15, using a table
|
||||
#define RGB15TO24_REVERSE(col) ( color_15bit_to_24bit_reverse[(col)&0x7FFF] )
|
||||
|
||||
//produce a 16bpp color from a ds RGB15, using a table
|
||||
#define RGB15TO16_REVERSE(col) ( color_15bit_to_16bit_reverse[(col)&0x7FFF] )
|
||||
|
||||
//produce a 15bpp color from individual 5bit components
|
||||
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
|
||||
|
||||
//produce a 16bpp color from individual 5bit components
|
||||
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
|
||||
|
||||
#define GFX3D_5TO6(x) ((x)?(((x)<<1)+1):0)
|
||||
#define GFX3D_5TO6_LOOKUP(x) (material_5bit_to_6bit[(x)])
|
||||
|
||||
// 15-bit to 24-bit depth formula from http://nocash.emubase.de/gbatek.htm#ds3drearplane
|
||||
#define DS_DEPTH15TO24(depth) ( dsDepthExtend_15bit_to_24bit[(depth) & 0x7FFF] )
|
||||
|
@ -733,18 +689,8 @@ extern u32 Render3DFramesPerSecond; // save the current 3D rendering frame count
|
|||
|
||||
//---------------------
|
||||
|
||||
extern CACHE_ALIGN u32 color_15bit_to_24bit[32768];
|
||||
extern CACHE_ALIGN u32 color_15bit_to_24bit_reverse[32768];
|
||||
extern CACHE_ALIGN u16 color_15bit_to_16bit_reverse[32768];
|
||||
extern CACHE_ALIGN u32 dsDepthExtend_15bit_to_24bit[32768];
|
||||
extern CACHE_ALIGN u8 mixTable555[32][32][32];
|
||||
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
|
||||
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
|
||||
|
||||
extern BOOL isSwapBuffers;
|
||||
|
||||
|
|
|
@ -49,10 +49,6 @@
|
|||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "bits.h"
|
||||
#include "common.h"
|
||||
#include "matrix.h"
|
||||
|
@ -495,7 +491,7 @@ public:
|
|||
dst.r = modulate_table[mainTexColor.r][src.r];
|
||||
dst.g = modulate_table[mainTexColor.g][src.g];
|
||||
dst.b = modulate_table[mainTexColor.b][src.b];
|
||||
dst.a = modulate_table[GFX3D_5TO6(mainTexColor.a)][GFX3D_5TO6(src.a)]>>1;
|
||||
dst.a = modulate_table[GFX3D_5TO6_LOOKUP(mainTexColor.a)][GFX3D_5TO6_LOOKUP(src.a)]>>1;
|
||||
//dst.a = 28;
|
||||
//#ifdef _MSC_VER
|
||||
//if(GetAsyncKeyState(VK_SHIFT)) {
|
||||
|
@ -538,7 +534,7 @@ public:
|
|||
dst.r = modulate_table[mainTexColor.r][src.r];
|
||||
dst.g = modulate_table[mainTexColor.g][src.r];
|
||||
dst.b = modulate_table[mainTexColor.b][src.r];
|
||||
dst.a = modulate_table[GFX3D_5TO6(mainTexColor.a)][GFX3D_5TO6(src.a)] >> 1;
|
||||
dst.a = modulate_table[GFX3D_5TO6_LOOKUP(mainTexColor.a)][GFX3D_5TO6_LOOKUP(src.a)] >> 1;
|
||||
|
||||
dst.r = min<u8>(0x3F, (dst.r + toonColor.r));
|
||||
dst.g = min<u8>(0x3F, (dst.g + toonColor.g));
|
||||
|
@ -549,7 +545,7 @@ public:
|
|||
dst.r = modulate_table[mainTexColor.r][toonColor.r];
|
||||
dst.g = modulate_table[mainTexColor.g][toonColor.g];
|
||||
dst.b = modulate_table[mainTexColor.b][toonColor.b];
|
||||
dst.a = modulate_table[GFX3D_5TO6(mainTexColor.a)][GFX3D_5TO6(src.a)] >> 1;
|
||||
dst.a = modulate_table[GFX3D_5TO6_LOOKUP(mainTexColor.a)][GFX3D_5TO6_LOOKUP(src.a)] >> 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1132,9 +1128,7 @@ void _HACK_Viewer_ExecUnit()
|
|||
|
||||
static Render3D* SoftRasterizerRendererCreate()
|
||||
{
|
||||
#if defined(ENABLE_SSSE3)
|
||||
return new SoftRasterizerRenderer_SSSE3;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
#if defined(ENABLE_SSE2)
|
||||
return new SoftRasterizerRenderer_SSE2;
|
||||
#else
|
||||
return new SoftRasterizerRenderer;
|
||||
|
@ -1145,9 +1139,7 @@ static void SoftRasterizerRendererDestroy()
|
|||
{
|
||||
if (CurrentRenderer != BaseRenderer)
|
||||
{
|
||||
#if defined(ENABLE_SSSE3)
|
||||
SoftRasterizerRenderer_SSSE3 *oldRenderer = (SoftRasterizerRenderer_SSSE3 *)CurrentRenderer;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
#if defined(ENABLE_SSE2)
|
||||
SoftRasterizerRenderer_SSE2 *oldRenderer = (SoftRasterizerRenderer_SSE2 *)CurrentRenderer;
|
||||
#else
|
||||
SoftRasterizerRenderer *oldRenderer = (SoftRasterizerRenderer *)CurrentRenderer;
|
||||
|
@ -1647,11 +1639,7 @@ Render3DError SoftRasterizerRenderer::UpdateEdgeMarkColorTable(const u16 *edgeMa
|
|||
//we can do this by rendering a 3d frame and then freezing the system, but only changing the edge mark colors
|
||||
for (size_t i = 0; i < 8; i++)
|
||||
{
|
||||
const u16 col = edgeMarkColorTable[i];
|
||||
this->edgeMarkTable[i].color = RGB15TO5555(col, (this->currentRenderState->enableAntialiasing) ? 0x10 : 0x1F);
|
||||
this->edgeMarkTable[i].r = GFX3D_5TO6(this->edgeMarkTable[i].r);
|
||||
this->edgeMarkTable[i].g = GFX3D_5TO6(this->edgeMarkTable[i].g);
|
||||
this->edgeMarkTable[i].b = GFX3D_5TO6(this->edgeMarkTable[i].b);
|
||||
this->edgeMarkTable[i].color = COLOR555TO6665(edgeMarkColorTable[i] & 0x7FFF, (this->currentRenderState->enableAntialiasing) ? 0x10 : 0x1F);
|
||||
|
||||
//zero 20-jun-2013 - this doesnt make any sense. at least, it should be related to the 0x8000 bit. if this is undocumented behaviour, lets write about which scenario proves it here, or which scenario is requiring this code.
|
||||
//// this seems to be the only thing that selectively disables edge marking
|
||||
|
@ -1735,10 +1723,9 @@ Render3DError SoftRasterizerRenderer::UpdateFogTable(const u8 *fogDensityTable)
|
|||
// new multithreaded method.
|
||||
Render3DError SoftRasterizerRenderer::RenderFog(const u8 *densityTable, const u32 color, const u32 offset, const u8 shift, const bool alphaOnly)
|
||||
{
|
||||
u32 r = GFX3D_5TO6((color)&0x1F);
|
||||
u32 g = GFX3D_5TO6((color>>5)&0x1F);
|
||||
u32 b = GFX3D_5TO6((color>>10)&0x1F);
|
||||
u32 a = (color>>16)&0x1F;
|
||||
FragmentColor fogColor;
|
||||
fogColor.color = COLOR555TO6665( color & 0x7FFF, (color>>16) & 0x1F );
|
||||
|
||||
const size_t framebufferFragmentCount = this->_framebufferWidth * this->_framebufferHeight;
|
||||
|
||||
if (!alphaOnly)
|
||||
|
@ -1750,10 +1737,10 @@ Render3DError SoftRasterizerRenderer::RenderFog(const u8 *densityTable, const u3
|
|||
const u8 fog = (this->_framebufferAttributes->isFogged[i] != 0) ? this->fogTable[fogIndex] : 0;
|
||||
|
||||
FragmentColor &destFragmentColor = this->_framebufferColor[i];
|
||||
destFragmentColor.r = ((128-fog)*destFragmentColor.r + r*fog)>>7;
|
||||
destFragmentColor.g = ((128-fog)*destFragmentColor.g + g*fog)>>7;
|
||||
destFragmentColor.b = ((128-fog)*destFragmentColor.b + b*fog)>>7;
|
||||
destFragmentColor.a = ((128-fog)*destFragmentColor.a + a*fog)>>7;
|
||||
destFragmentColor.r = ((128-fog)*destFragmentColor.r + fogColor.r*fog)>>7;
|
||||
destFragmentColor.g = ((128-fog)*destFragmentColor.g + fogColor.g*fog)>>7;
|
||||
destFragmentColor.b = ((128-fog)*destFragmentColor.b + fogColor.b*fog)>>7;
|
||||
destFragmentColor.a = ((128-fog)*destFragmentColor.a + fogColor.a*fog)>>7;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1765,7 +1752,7 @@ Render3DError SoftRasterizerRenderer::RenderFog(const u8 *densityTable, const u3
|
|||
const u8 fog = (this->_framebufferAttributes->isFogged[i] != 0) ? this->fogTable[fogIndex] : 0;
|
||||
|
||||
FragmentColor &destFragmentColor = this->_framebufferColor[i];
|
||||
destFragmentColor.a = ((128-fog)*destFragmentColor.a + a*fog)>>7;
|
||||
destFragmentColor.a = ((128-fog)*destFragmentColor.a + fogColor.a*fog)>>7;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1825,10 +1812,8 @@ END_EDGE_MARK: ;
|
|||
|
||||
if (param.enableFog)
|
||||
{
|
||||
const u32 r = GFX3D_5TO6( (param.fogColor ) & 0x1F );
|
||||
const u32 g = GFX3D_5TO6( (param.fogColor >> 5) & 0x1F );
|
||||
const u32 b = GFX3D_5TO6( (param.fogColor >> 10) & 0x1F );
|
||||
const u32 a = (param.fogColor >> 16) & 0x1F;
|
||||
FragmentColor fogColor;
|
||||
fogColor.color = COLOR555TO6665( param.fogColor & 0x7FFF, (param.fogColor>>16) & 0x1F );
|
||||
|
||||
const size_t fogIndex = depth >> 9;
|
||||
assert(fogIndex < 32768);
|
||||
|
@ -1836,12 +1821,12 @@ END_EDGE_MARK: ;
|
|||
|
||||
if (!param.fogAlphaOnly)
|
||||
{
|
||||
dstColor.r = ( (128-fog)*dstColor.r + r*fog ) >> 7;
|
||||
dstColor.g = ( (128-fog)*dstColor.g + g*fog ) >> 7;
|
||||
dstColor.b = ( (128-fog)*dstColor.b + b*fog ) >> 7;
|
||||
dstColor.r = ( (128-fog)*dstColor.r + fogColor.r*fog ) >> 7;
|
||||
dstColor.g = ( (128-fog)*dstColor.g + fogColor.g*fog ) >> 7;
|
||||
dstColor.b = ( (128-fog)*dstColor.b + fogColor.b*fog ) >> 7;
|
||||
}
|
||||
|
||||
dstColor.a = ( (128-fog)*dstColor.a + a*fog ) >> 7;
|
||||
dstColor.a = ( (128-fog)*dstColor.a + fogColor.a*fog ) >> 7;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1854,7 +1839,7 @@ Render3DError SoftRasterizerRenderer::UpdateToonTable(const u16 *toonTableBuffer
|
|||
//convert the toon colors
|
||||
for (size_t i = 0; i < 32; i++)
|
||||
{
|
||||
this->toonColor32LUT[i].color = (RGB15TO32_NOALPHA(toonTableBuffer[i])>>2)&0x3F3F3F3F;
|
||||
this->toonColor32LUT[i].color = ( COLOR555TO888(toonTableBuffer[i] & 0x7FFF) >> 2 ) & 0x003F3F3F;
|
||||
//printf("%d %d %d %d\n", this->toonColor32LUT[i].r, this->toonColor32LUT[i].g, this->toonColor32LUT[i].b, this->toonColor32LUT[i].a);
|
||||
}
|
||||
|
||||
|
@ -1874,7 +1859,7 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo
|
|||
{
|
||||
const size_t ir = readLine + ((x * xRatio) >> 16);
|
||||
|
||||
this->_framebufferColor[iw].color = RGB15TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F);
|
||||
this->_framebufferColor[iw].color = COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F);
|
||||
this->_framebufferAttributes->depth[iw] = depthBuffer[ir];
|
||||
this->_framebufferAttributes->isFogged[iw] = fogBuffer[ir];
|
||||
this->_framebufferAttributes->opaquePolyID[iw] = polyIDBuffer[ir];
|
||||
|
@ -1887,17 +1872,12 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError SoftRasterizerRenderer::ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const
|
||||
Render3DError SoftRasterizerRenderer::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const
|
||||
{
|
||||
FragmentColor convertedClearColor = clearColor;
|
||||
convertedClearColor.r = GFX3D_5TO6(clearColor.r);
|
||||
convertedClearColor.g = GFX3D_5TO6(clearColor.g);
|
||||
convertedClearColor.b = GFX3D_5TO6(clearColor.b);
|
||||
|
||||
for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++)
|
||||
{
|
||||
this->_framebufferAttributes->SetAtIndex(i, clearAttributes);
|
||||
this->_framebufferColor[i] = convertedClearColor;
|
||||
this->_framebufferColor[i] = clearColor6665;
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
|
@ -2046,14 +2026,9 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
|
|||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const
|
||||
Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const
|
||||
{
|
||||
FragmentColor convertedClearColor = clearColor;
|
||||
convertedClearColor.r = GFX3D_5TO6(clearColor.r);
|
||||
convertedClearColor.g = GFX3D_5TO6(clearColor.g);
|
||||
convertedClearColor.b = GFX3D_5TO6(clearColor.b);
|
||||
|
||||
const __m128i color_vec128 = _mm_set1_epi32(convertedClearColor.color);
|
||||
const __m128i color_vec128 = _mm_set1_epi32(clearColor6665.color);
|
||||
const __m128i attrDepth_vec128 = _mm_set1_epi32(clearAttributes.depth);
|
||||
const __m128i attrOpaquePolyID_vec128 = _mm_set1_epi8(clearAttributes.opaquePolyID);
|
||||
const __m128i attrTranslucentPolyID_vec128 = _mm_set1_epi8(clearAttributes.translucentPolyID);
|
||||
|
@ -2086,7 +2061,7 @@ Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor
|
|||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
this->_framebufferColor[i] = convertedClearColor;
|
||||
this->_framebufferColor[i] = clearColor6665;
|
||||
this->_framebufferAttributes->SetAtIndex(i, clearAttributes);
|
||||
}
|
||||
|
||||
|
|
|
@ -39,9 +39,7 @@ struct SoftRasterizerPostProcessParams
|
|||
bool fogAlphaOnly;
|
||||
};
|
||||
|
||||
#if defined(ENABLE_SSSE3)
|
||||
class SoftRasterizerRenderer : public Render3D_SSSE3
|
||||
#elif defined(ENABLE_SSE2)
|
||||
#if defined(ENABLE_SSE2)
|
||||
class SoftRasterizerRenderer : public Render3D_SSE2
|
||||
#else
|
||||
class SoftRasterizerRenderer : public Render3D
|
||||
|
@ -69,7 +67,7 @@ protected:
|
|||
virtual Render3DError EndRender(const u64 frameCount);
|
||||
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const;
|
||||
|
||||
public:
|
||||
int _debug_drawClippedUserPoly;
|
||||
|
@ -106,16 +104,7 @@ public:
|
|||
|
||||
class SoftRasterizerRenderer_SSE2 : public SoftRasterizerRenderer
|
||||
{
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
|
||||
class SoftRasterizerRenderer_SSSE3 : public SoftRasterizerRenderer_SSE2
|
||||
{
|
||||
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -24,10 +24,6 @@
|
|||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "bits.h"
|
||||
#include "common.h"
|
||||
#include "gfx3d.h"
|
||||
|
@ -612,23 +608,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
|
|||
{
|
||||
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
|
||||
{
|
||||
for (size_t i = 0; i < pixCount; i++)
|
||||
{
|
||||
dstFramebuffer[i].r = srcFramebuffer[i].r >> 2;
|
||||
dstFramebuffer[i].g = srcFramebuffer[i].g >> 2;
|
||||
dstFramebuffer[i].b = srcFramebuffer[i].b >> 2;
|
||||
dstFramebuffer[i].a = srcFramebuffer[i].a >> 3;
|
||||
}
|
||||
ConvertColorBuffers8888To6665<false>(srcFramebuffer, dstFramebuffer, pixCount);
|
||||
}
|
||||
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
|
||||
{
|
||||
for (size_t i = 0; i < pixCount; i++)
|
||||
{
|
||||
dstFramebuffer[i].r = material_6bit_to_8bit[srcFramebuffer[i].r];
|
||||
dstFramebuffer[i].g = material_6bit_to_8bit[srcFramebuffer[i].g];
|
||||
dstFramebuffer[i].b = material_6bit_to_8bit[srcFramebuffer[i].b];
|
||||
dstFramebuffer[i].a = material_5bit_to_8bit[srcFramebuffer[i].a];
|
||||
}
|
||||
ConvertColorBuffers6665To8888<false>(srcFramebuffer, dstFramebuffer, pixCount);
|
||||
}
|
||||
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
|
||||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
|
||||
|
@ -639,9 +623,13 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
|
|||
|
||||
if (dstRGBA5551 != NULL)
|
||||
{
|
||||
for (size_t i = 0; i < pixCount; i++)
|
||||
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
dstRGBA5551[i] = R6G6B6TORGB15(srcFramebuffer[i].r, srcFramebuffer[i].g, srcFramebuffer[i].b) | ((srcFramebuffer[i].a == 0) ? 0x0000 : 0x8000);
|
||||
ConvertColorBuffers6665To5551<false>(srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
ConvertColorBuffers8888To5551<false>(srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -657,20 +645,8 @@ Render3DError Render3D::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
{
|
||||
Render3DError error = RENDER3DERROR_NOERR;
|
||||
|
||||
FragmentColor clearColor;
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
clearColor.r = renderState.clearColor & 0x1F;
|
||||
clearColor.g = (renderState.clearColor >> 5) & 0x1F;
|
||||
clearColor.b = (renderState.clearColor >> 10) & 0x1F;
|
||||
clearColor.a = (renderState.clearColor >> 16) & 0x1F;
|
||||
#else
|
||||
const u32 clearColorSwapped = LE_TO_LOCAL_32(renderState.clearColor);
|
||||
clearColor.r = clearColorSwapped & 0x1F;
|
||||
clearColor.g = (clearColorSwapped >> 5) & 0x1F;
|
||||
clearColor.b = (clearColorSwapped >> 10) & 0x1F;
|
||||
clearColor.a = (clearColorSwapped >> 16) & 0x1F;
|
||||
#endif
|
||||
FragmentColor clearColor6665;
|
||||
clearColor6665.color = COLOR555TO6665(renderState.clearColor & 0x7FFF, (renderState.clearColor >> 16) & 0x1F);
|
||||
|
||||
FragmentAttributes clearFragment;
|
||||
clearFragment.opaquePolyID = (renderState.clearColor >> 24) & 0x3F;
|
||||
|
@ -732,12 +708,12 @@ Render3DError Render3D::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
error = this->ClearUsingImage(this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
if (error != RENDER3DERROR_NOERR)
|
||||
{
|
||||
error = this->ClearUsingValues(clearColor, clearFragment);
|
||||
error = this->ClearUsingValues(clearColor6665, clearFragment);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = this->ClearUsingValues(clearColor, clearFragment);
|
||||
error = this->ClearUsingValues(clearColor6665, clearFragment);
|
||||
}
|
||||
|
||||
return error;
|
||||
|
@ -748,7 +724,7 @@ Render3DError Render3D::ClearUsingImage(const u16 *__restrict colorBuffer, const
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError Render3D::ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const
|
||||
Render3DError Render3D::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
@ -831,130 +807,12 @@ Render3DError Render3D::VramReconfigureSignal()
|
|||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
Render3DError Render3D_SSE2::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebuffer, u16 *__restrict dstRGBA5551)
|
||||
{
|
||||
if ( (dstFramebuffer == NULL) && (dstRGBA5551 == NULL) )
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
|
||||
if (dstFramebuffer != NULL)
|
||||
{
|
||||
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
|
||||
{
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
// Convert to RGBA6665
|
||||
__m128i color6665 = _mm_load_si128((__m128i *)(srcFramebuffer + i));
|
||||
__m128i a = _mm_srli_epi32(_mm_and_si128(color6665, _mm_set1_epi32(0xF8000000)), 3);
|
||||
color6665 = _mm_srli_epi32(_mm_and_si128(color6665, _mm_set1_epi32(0x00FCFCFC)), 2);
|
||||
|
||||
color6665 = _mm_or_si128(color6665, a);
|
||||
_mm_store_si128((__m128i *)(dstFramebuffer + i), color6665);
|
||||
}
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstFramebuffer[i].r = srcFramebuffer[i].r >> 2;
|
||||
dstFramebuffer[i].g = srcFramebuffer[i].g >> 2;
|
||||
dstFramebuffer[i].b = srcFramebuffer[i].b >> 2;
|
||||
dstFramebuffer[i].a = srcFramebuffer[i].a >> 3;
|
||||
}
|
||||
}
|
||||
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
|
||||
{
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
// Convert to RGBA8888:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
__m128i color8888 = _mm_load_si128((__m128i *)(srcFramebuffer + i));
|
||||
__m128i a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(color8888, 3), _mm_set1_epi8(0xF8)), _mm_and_si128(_mm_srli_epi32(color8888, 2), _mm_set1_epi8(0x07)) );
|
||||
color8888 = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(color8888, 2), _mm_set1_epi8(0xFC)), _mm_and_si128(_mm_srli_epi32(color8888, 4), _mm_set1_epi8(0x03)) );
|
||||
|
||||
color8888 = _mm_or_si128(_mm_and_si128(color8888, _mm_set1_epi32(0x00FFFFFF)), _mm_and_si128(a, _mm_set1_epi32(0xFF000000)));
|
||||
_mm_store_si128((__m128i *)(dstFramebuffer + i), color8888);
|
||||
}
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstFramebuffer[i].r = material_6bit_to_8bit[srcFramebuffer[i].r];
|
||||
dstFramebuffer[i].g = material_6bit_to_8bit[srcFramebuffer[i].g];
|
||||
dstFramebuffer[i].b = material_6bit_to_8bit[srcFramebuffer[i].b];
|
||||
dstFramebuffer[i].a = material_5bit_to_8bit[srcFramebuffer[i].a];
|
||||
}
|
||||
}
|
||||
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
|
||||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
|
||||
{
|
||||
memcpy(dstFramebuffer, srcFramebuffer, pixCount * sizeof(FragmentColor));
|
||||
}
|
||||
}
|
||||
|
||||
if (dstRGBA5551 != NULL)
|
||||
{
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
// Convert to RGBA5551
|
||||
__m128i color5551 = _mm_load_si128((__m128i *)(srcFramebuffer + i));
|
||||
__m128i r = _mm_and_si128(color5551, _mm_set1_epi32(0x0000003E)); // Read from R
|
||||
r = _mm_srli_epi32(r, 1); // Shift to R
|
||||
|
||||
__m128i g = _mm_and_si128(color5551, _mm_set1_epi32(0x00003E00)); // Read from G
|
||||
g = _mm_srli_epi32(g, 4); // Shift in G
|
||||
|
||||
__m128i b = _mm_and_si128(color5551, _mm_set1_epi32(0x003E0000)); // Read from B
|
||||
b = _mm_srli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i a = _mm_and_si128(color5551, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A
|
||||
|
||||
// From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned
|
||||
// 16-bit. Since SSE2 only has packssdw (signed saturated 16-bit pack), using
|
||||
// packssdw on the alpha bit (0x8000) will result in a value of 0x7FFF, which is
|
||||
// incorrect. Now if we were to use SSE4.1's packusdw (unsigned saturated 16-bit
|
||||
// pack), we wouldn't have to go through this hassle. But not everyone has an
|
||||
// SSE4.1-capable CPU, so doing this the SSE2 way is more guaranteed to work for
|
||||
// everyone's CPU.
|
||||
//
|
||||
// To use packssdw, we take a bit one position lower for the alpha bit, run
|
||||
// packssdw, then shift the bit back to its original position. Then we por the
|
||||
// alpha vector with the post-packed color vector to get the final color.
|
||||
|
||||
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000)); // Mask out the bit before A
|
||||
a = _mm_packs_epi32(a, _mm_setzero_si128()); // Pack 32-bit down to 16-bit
|
||||
a = _mm_slli_epi16(a, 1); // Shift the A bit back to where it needs to be
|
||||
|
||||
// Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in.
|
||||
color5551 = _mm_or_si128(_mm_or_si128(r, g), b);
|
||||
color5551 = _mm_packs_epi32(color5551, _mm_setzero_si128());
|
||||
color5551 = _mm_or_si128(color5551, a);
|
||||
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color5551);
|
||||
}
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstRGBA5551[i] = R6G6B6TORGB15(srcFramebuffer[i].r, srcFramebuffer[i].g, srcFramebuffer[i].b) | ((srcFramebuffer[i].a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
||||
{
|
||||
Render3DError error = RENDER3DERROR_NOERR;
|
||||
|
||||
FragmentColor clearColor;
|
||||
clearColor.r = renderState.clearColor & 0x1F;
|
||||
clearColor.g = (renderState.clearColor >> 5) & 0x1F;
|
||||
clearColor.b = (renderState.clearColor >> 10) & 0x1F;
|
||||
clearColor.a = (renderState.clearColor >> 16) & 0x1F;
|
||||
FragmentColor clearColor6665;
|
||||
clearColor6665.color = COLOR555TO6665(renderState.clearColor & 0x7FFF, (renderState.clearColor >> 16) & 0x1F);
|
||||
|
||||
FragmentAttributes clearFragment;
|
||||
clearFragment.opaquePolyID = (renderState.clearColor >> 24) & 0x3F;
|
||||
|
@ -1080,12 +938,12 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
error = this->ClearUsingImage(this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
if (error != RENDER3DERROR_NOERR)
|
||||
{
|
||||
error = this->ClearUsingValues(clearColor, clearFragment);
|
||||
error = this->ClearUsingValues(clearColor6665, clearFragment);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = this->ClearUsingValues(clearColor, clearFragment);
|
||||
error = this->ClearUsingValues(clearColor6665, clearFragment);
|
||||
}
|
||||
|
||||
return error;
|
||||
|
|
|
@ -149,7 +149,7 @@ protected:
|
|||
virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebuffer, u16 *__restrict dstRGBA5551);
|
||||
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) const;
|
||||
|
||||
virtual Render3DError SetupPolygon(const POLY &thePoly);
|
||||
virtual Render3DError SetupTexture(const POLY &thePoly, bool enableTexturing);
|
||||
|
@ -201,22 +201,10 @@ public:
|
|||
|
||||
class Render3D_SSE2 : public Render3D
|
||||
{
|
||||
protected:
|
||||
virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebuffer, u16 *__restrict dstRGBA5551);
|
||||
|
||||
public:
|
||||
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
|
||||
class Render3D_SSSE3 : public Render3D_SSE2
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // RENDER3D_H
|
||||
|
|
|
@ -37,7 +37,7 @@ using std::max;
|
|||
//only dump this from ogl renderer. for now, softrasterizer creates things in an incompatible pixel format
|
||||
//#define DEBUG_DUMP_TEXTURE
|
||||
|
||||
#define CONVERT(color,alpha) ((TEXFORMAT == TexFormat_32bpp)?(RGB15TO32(color,alpha)):RGB15TO6665(color,alpha))
|
||||
#define CONVERT(color) ((TEXFORMAT == TexFormat_32bpp)?(COLOR555TO8888_OPAQUE(color)):COLOR555TO6665_OPAQUE(color))
|
||||
|
||||
//This class represents a number of regions of memory which should be viewed as contiguous
|
||||
class MemSpan
|
||||
|
@ -403,8 +403,7 @@ public:
|
|||
// format that is not A3I5 or A5I3), set all transparent pixels to 0 so that 3D
|
||||
// renderers can assume that the transparent color is 0 during texture sampling.
|
||||
|
||||
const u8 opaqueColor = (TEXFORMAT == TexFormat_32bpp) ? 0xFF : 0x1F;
|
||||
const u8 palZeroTransparent = ( 1 - ((format>>29) & 1) ) * opaqueColor;
|
||||
const bool isPalZeroTransparent = ( ((format >> 29) & 1) != 0 );
|
||||
|
||||
switch (newitem->mode)
|
||||
{
|
||||
|
@ -415,12 +414,12 @@ public:
|
|||
adr = ms.items[j].ptr;
|
||||
for(u32 x = 0; x < ms.items[j].len; x++)
|
||||
{
|
||||
u16 c = pal[*adr&31];
|
||||
u16 c = pal[*adr&31] & 0x7FFF;
|
||||
u8 alpha = *adr>>5;
|
||||
if(TEXFORMAT == TexFormat_15bpp)
|
||||
*dwdst++ = RGB15TO6665(c,material_3bit_to_5bit[alpha]);
|
||||
*dwdst++ = COLOR555TO6665(c,material_3bit_to_5bit[alpha]);
|
||||
else
|
||||
*dwdst++ = RGB15TO32(c,material_3bit_to_8bit[alpha]);
|
||||
*dwdst++ = COLOR555TO8888(c,material_3bit_to_8bit[alpha]);
|
||||
adr++;
|
||||
}
|
||||
}
|
||||
|
@ -429,7 +428,7 @@ public:
|
|||
|
||||
case TEXMODE_I2:
|
||||
{
|
||||
if (palZeroTransparent == 0)
|
||||
if (isPalZeroTransparent)
|
||||
{
|
||||
for(int j=0;j<ms.numItems;j++)
|
||||
{
|
||||
|
@ -437,23 +436,18 @@ public:
|
|||
for(u32 x = 0; x < ms.items[j].len; x++)
|
||||
{
|
||||
u8 bits;
|
||||
u16 c;
|
||||
|
||||
bits = (*adr)&0x3;
|
||||
c = pal[bits];
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(c,opaqueColor);
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(pal[bits] & 0x7FFF);
|
||||
|
||||
bits = ((*adr)>>2)&0x3;
|
||||
c = pal[bits];
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(c,opaqueColor);
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(pal[bits] & 0x7FFF);
|
||||
|
||||
bits = ((*adr)>>4)&0x3;
|
||||
c = pal[bits];
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(c,opaqueColor);
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(pal[bits] & 0x7FFF);
|
||||
|
||||
bits = ((*adr)>>6)&0x3;
|
||||
c = pal[bits];
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(c,opaqueColor);
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(pal[bits] & 0x7FFF);
|
||||
|
||||
adr++;
|
||||
}
|
||||
|
@ -470,20 +464,20 @@ public:
|
|||
u16 c;
|
||||
|
||||
bits = (*adr)&0x3;
|
||||
c = pal[bits];
|
||||
*dwdst++ = CONVERT(c,opaqueColor);
|
||||
c = pal[bits] & 0x7FFF;
|
||||
*dwdst++ = CONVERT(c);
|
||||
|
||||
bits = ((*adr)>>2)&0x3;
|
||||
c = pal[bits];
|
||||
*dwdst++ = CONVERT(c,opaqueColor);
|
||||
c = pal[bits] & 0x7FFF;
|
||||
*dwdst++ = CONVERT(c);
|
||||
|
||||
bits = ((*adr)>>4)&0x3;
|
||||
c = pal[bits];
|
||||
*dwdst++ = CONVERT(c,opaqueColor);
|
||||
c = pal[bits] & 0x7FFF;
|
||||
*dwdst++ = CONVERT(c);
|
||||
|
||||
bits = ((*adr)>>6)&0x3;
|
||||
c = pal[bits];
|
||||
*dwdst++ = CONVERT(c,opaqueColor);
|
||||
c = pal[bits] & 0x7FFF;
|
||||
*dwdst++ = CONVERT(c);
|
||||
|
||||
adr++;
|
||||
}
|
||||
|
@ -494,7 +488,7 @@ public:
|
|||
|
||||
case TEXMODE_I4:
|
||||
{
|
||||
if (palZeroTransparent == 0)
|
||||
if (isPalZeroTransparent)
|
||||
{
|
||||
for(int j=0;j<ms.numItems;j++)
|
||||
{
|
||||
|
@ -502,15 +496,12 @@ public:
|
|||
for(u32 x = 0; x < ms.items[j].len; x++)
|
||||
{
|
||||
u8 bits;
|
||||
u16 c;
|
||||
|
||||
bits = (*adr)&0xF;
|
||||
c = pal[bits];
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(c,opaqueColor);
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(pal[bits] & 0x7FFF);
|
||||
|
||||
bits = ((*adr)>>4);
|
||||
c = pal[bits];
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(c,opaqueColor);
|
||||
*dwdst++ = (bits == 0) ? 0 : CONVERT(pal[bits] & 0x7FFF);
|
||||
adr++;
|
||||
}
|
||||
}
|
||||
|
@ -526,12 +517,12 @@ public:
|
|||
u16 c;
|
||||
|
||||
bits = (*adr)&0xF;
|
||||
c = pal[bits];
|
||||
*dwdst++ = CONVERT(c,opaqueColor);
|
||||
c = pal[bits] & 0x7FFF;
|
||||
*dwdst++ = CONVERT(c);
|
||||
|
||||
bits = ((*adr)>>4);
|
||||
c = pal[bits];
|
||||
*dwdst++ = CONVERT(c,opaqueColor);
|
||||
c = pal[bits] & 0x7FFF;
|
||||
*dwdst++ = CONVERT(c);
|
||||
adr++;
|
||||
}
|
||||
}
|
||||
|
@ -541,15 +532,14 @@ public:
|
|||
|
||||
case TEXMODE_I8:
|
||||
{
|
||||
if (palZeroTransparent == 0)
|
||||
if (isPalZeroTransparent)
|
||||
{
|
||||
for(int j=0;j<ms.numItems;j++)
|
||||
{
|
||||
adr = ms.items[j].ptr;
|
||||
for(u32 x = 0; x < ms.items[j].len; ++x)
|
||||
{
|
||||
u16 c = pal[*adr];
|
||||
*dwdst++ = (*adr == 0) ? 0 : CONVERT(c,opaqueColor);
|
||||
*dwdst++ = (*adr == 0) ? 0 : CONVERT(pal[*adr] & 0x7FFF);
|
||||
adr++;
|
||||
}
|
||||
}
|
||||
|
@ -561,8 +551,8 @@ public:
|
|||
adr = ms.items[j].ptr;
|
||||
for(u32 x = 0; x < ms.items[j].len; ++x)
|
||||
{
|
||||
u16 c = pal[*adr];
|
||||
*dwdst++ = CONVERT(c,opaqueColor);
|
||||
const u16 c = pal[*adr] & 0x7FFF;
|
||||
*dwdst++ = CONVERT(c);
|
||||
adr++;
|
||||
}
|
||||
}
|
||||
|
@ -572,13 +562,14 @@ public:
|
|||
|
||||
case TEXMODE_4X4:
|
||||
{
|
||||
if(ms.numItems != 1) {
|
||||
if (ms.numItems != 1)
|
||||
{
|
||||
PROGINFO("Your 4x4 texture has overrun its texture slot.\n");
|
||||
}
|
||||
//this check isnt necessary since the addressing is tied to the texture data which will also run out:
|
||||
//if(msIndex.numItems != 1) PROGINFO("Your 4x4 texture index has overrun its slot.\n");
|
||||
|
||||
#define PAL4X4(offset) LE_TO_LOCAL_16( *(u16*)( MMU.texInfo.texPalSlot[((paletteAddress + (offset)*2)>>14)&0x7] + ((paletteAddress + (offset)*2)&0x3FFF) ) )
|
||||
#define PAL4X4(offset) ( LE_TO_LOCAL_16( *(u16*)( MMU.texInfo.texPalSlot[((paletteAddress + (offset)*2)>>14)&0x7] + ((paletteAddress + (offset)*2)&0x3FFF) ) ) & 0x7FFF )
|
||||
|
||||
u16* slot1;
|
||||
u32* map = (u32*)ms.items[0].ptr;
|
||||
|
@ -603,31 +594,32 @@ public:
|
|||
((y<<2)+2)*sizeX,((y<<2)+3)*sizeX};
|
||||
for (int x = 0; x < xTmpSize; x ++, d++)
|
||||
{
|
||||
if(d >= limit)
|
||||
if (d >= limit)
|
||||
dead = true;
|
||||
|
||||
if(dead) {
|
||||
if (dead)
|
||||
{
|
||||
for (int sy = 0; sy < 4; sy++)
|
||||
{
|
||||
u32 currentPos = (x<<2) + tmpPos[sy];
|
||||
const u32 currentPos = (x<<2) + tmpPos[sy];
|
||||
dwdst[currentPos] = dwdst[currentPos+1] = dwdst[currentPos+2] = dwdst[currentPos+3] = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 currBlock = LE_TO_LOCAL_32(map[d]);
|
||||
u16 pal1 = LE_TO_LOCAL_16(slot1[d]);
|
||||
u16 pal1offset = (pal1 & 0x3FFF)<<1;
|
||||
u8 mode = pal1>>14;
|
||||
const u32 currBlock = LE_TO_LOCAL_32(map[d]);
|
||||
const u16 pal1 = LE_TO_LOCAL_16(slot1[d]);
|
||||
const u16 pal1offset = (pal1 & 0x3FFF)<<1;
|
||||
const u8 mode = pal1>>14;
|
||||
u32 tmp_col[4];
|
||||
|
||||
tmp_col[0] = RGB15TO32( PAL4X4(pal1offset), 0xFF );
|
||||
tmp_col[1] = RGB15TO32( PAL4X4(pal1offset+1), 0xFF );
|
||||
tmp_col[0] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset) );
|
||||
tmp_col[1] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+1) );
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case 0:
|
||||
tmp_col[2] = RGB15TO32( PAL4X4(pal1offset+2), 0xFF );
|
||||
tmp_col[2] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+2) );
|
||||
tmp_col[3] = 0x00000000;
|
||||
break;
|
||||
|
||||
|
@ -647,8 +639,8 @@ public:
|
|||
break;
|
||||
|
||||
case 2:
|
||||
tmp_col[2] = RGB15TO32( PAL4X4(pal1offset+2), 0xFF );
|
||||
tmp_col[3] = RGB15TO32( PAL4X4(pal1offset+3), 0xFF );
|
||||
tmp_col[2] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+2) );
|
||||
tmp_col[3] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+3) );
|
||||
break;
|
||||
|
||||
case 3:
|
||||
|
@ -676,13 +668,13 @@ public:
|
|||
( ((g0*3 + g1*5)>>6) << 5 ) |
|
||||
( ((b0*3 + b1*5)>>6) << 10 );
|
||||
|
||||
tmp_col[2] = RGB15TO32(tmp1, 0xFF);
|
||||
tmp_col[3] = RGB15TO32(tmp2, 0xFF);
|
||||
tmp_col[2] = COLOR555TO8888_OPAQUE(tmp1);
|
||||
tmp_col[3] = COLOR555TO8888_OPAQUE(tmp2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(TEXFORMAT==TexFormat_15bpp)
|
||||
if (TEXFORMAT==TexFormat_15bpp)
|
||||
{
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
{
|
||||
|
@ -706,8 +698,8 @@ public:
|
|||
for (size_t sy = 0; sy < 4; sy++)
|
||||
{
|
||||
// Texture offset
|
||||
u32 currentPos = (x<<2) + tmpPos[sy];
|
||||
u8 currRow = (u8)((currBlock>>(sy<<3))&0xFF);
|
||||
const u32 currentPos = (x<<2) + tmpPos[sy];
|
||||
const u8 currRow = (u8)((currBlock>>(sy<<3))&0xFF);
|
||||
|
||||
dwdst[currentPos ] = tmp_col[ currRow &3];
|
||||
dwdst[currentPos+1] = tmp_col[(currRow>>2)&3];
|
||||
|
@ -721,17 +713,17 @@ public:
|
|||
|
||||
case TEXMODE_A5I3:
|
||||
{
|
||||
for(int j=0;j<ms.numItems;j++)
|
||||
for (int j = 0; j < ms.numItems; j++)
|
||||
{
|
||||
adr = ms.items[j].ptr;
|
||||
for(u32 x = 0; x < ms.items[j].len; ++x)
|
||||
for (u32 x = 0; x < ms.items[j].len; ++x)
|
||||
{
|
||||
u16 c = pal[*adr&0x07];
|
||||
u8 alpha = (*adr>>3);
|
||||
if(TEXFORMAT == TexFormat_15bpp)
|
||||
*dwdst++ = RGB15TO6665(c,alpha);
|
||||
const u16 c = pal[*adr&0x07] & 0x7FFF;
|
||||
const u8 alpha = (*adr>>3);
|
||||
if (TEXFORMAT == TexFormat_15bpp)
|
||||
*dwdst++ = COLOR555TO6665(c,alpha);
|
||||
else
|
||||
*dwdst++ = RGB15TO32(c,material_5bit_to_8bit[alpha]);
|
||||
*dwdst++ = COLOR555TO8888(c,material_5bit_to_8bit[alpha]);
|
||||
adr++;
|
||||
}
|
||||
}
|
||||
|
@ -740,15 +732,15 @@ public:
|
|||
|
||||
case TEXMODE_16BPP:
|
||||
{
|
||||
for(int j=0;j<ms.numItems;j++)
|
||||
for (int j = 0; j < ms.numItems; j++)
|
||||
{
|
||||
u16* map = (u16*)ms.items[j].ptr;
|
||||
int len = ms.items[j].len>>1;
|
||||
const u16 *map = (u16*)ms.items[j].ptr;
|
||||
const int len = ms.items[j].len>>1;
|
||||
|
||||
for(int x = 0; x < len; ++x)
|
||||
for (int x = 0; x < len; ++x)
|
||||
{
|
||||
u16 c = map[x];
|
||||
*dwdst++ = (c & 0x8000) ? CONVERT(c&0x7FFF,opaqueColor) : 0;
|
||||
const u16 c = map[x];
|
||||
*dwdst++ = (c & 0x8000) ? CONVERT(c&0x7FFF) : 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue