Core: Move low-level vector, vertex coordinate, and color data type declarations into types.h so that they can be used universally. Also update the data type names to be more descriptive and have better consistency.

- Add SIMD-float32 data types, and also add macros to track SIMD data-type availability.
- Also fix some bugs where 3D would fail to render on big-endian systems. (Regression from commit a67e040.)
This commit is contained in:
rogerman 2023-02-27 15:39:17 -08:00
parent d9be9c6bf6
commit cda8cb5686
19 changed files with 789 additions and 746 deletions

View File

@ -206,7 +206,7 @@ GPUEngineBase::GPUEngineBase()
_asyncClearLineCustom = 0; _asyncClearLineCustom = 0;
_asyncClearInterrupt = 0; _asyncClearInterrupt = 0;
_asyncClearBackdropColor16 = 0; _asyncClearBackdropColor16 = 0;
_asyncClearBackdropColor32.color = 0; _asyncClearBackdropColor32.value = 0;
_asyncClearIsRunning = false; _asyncClearIsRunning = false;
_asyncClearUseInternalCustomBuffer = false; _asyncClearUseInternalCustomBuffer = false;
@ -375,7 +375,7 @@ void GPUEngineBase::Reset()
renderState.selectedBGLayer = &this->_BGLayer[GPULayerID_BG0]; renderState.selectedBGLayer = &this->_BGLayer[GPULayerID_BG0];
renderState.backdropColor16 = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; renderState.backdropColor16 = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF;
renderState.workingBackdropColor16 = renderState.backdropColor16; renderState.workingBackdropColor16 = renderState.backdropColor16;
renderState.workingBackdropColor32.color = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) ); renderState.workingBackdropColor32.value = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) );
renderState.colorEffect = (ColorEffect)this->_IORegisterMap->BLDCNT.ColorEffect; renderState.colorEffect = (ColorEffect)this->_IORegisterMap->BLDCNT.ColorEffect;
renderState.blendEVA = 0; renderState.blendEVA = 0;
renderState.blendEVB = 0; renderState.blendEVB = 0;
@ -693,7 +693,7 @@ void GPUEngineBase::RenderLineClearAsync()
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
memset_u32(targetBufferHead + (lineInfo.blockOffsetCustom * sizeof(FragmentColor)), this->_asyncClearBackdropColor32.color, lineInfo.pixelCount); memset_u32(targetBufferHead + (lineInfo.blockOffsetCustom * sizeof(Color4u8)), this->_asyncClearBackdropColor32.value, lineInfo.pixelCount);
break; break;
} }
@ -716,9 +716,9 @@ void GPUEngineBase::RenderLineClearAsync()
} }
void GPUEngineBase::RenderLineClearAsyncStart(bool willClearInternalCustomBuffer, void GPUEngineBase::RenderLineClearAsyncStart(bool willClearInternalCustomBuffer,
s32 startLineIndex, size_t startLineIndex,
u16 clearColor16, u16 clearColor16,
FragmentColor clearColor32) Color4u8 clearColor32)
{ {
if (this->_asyncClearTask == NULL) if (this->_asyncClearTask == NULL)
{ {
@ -727,7 +727,7 @@ void GPUEngineBase::RenderLineClearAsyncStart(bool willClearInternalCustomBuffer
this->RenderLineClearAsyncFinish(); this->RenderLineClearAsyncFinish();
this->_asyncClearLineCustom = startLineIndex; this->_asyncClearLineCustom = (s32)startLineIndex;
this->_asyncClearBackdropColor16 = clearColor16; this->_asyncClearBackdropColor16 = clearColor16;
this->_asyncClearBackdropColor32 = clearColor32; this->_asyncClearBackdropColor32 = clearColor32;
this->_asyncClearUseInternalCustomBuffer = willClearInternalCustomBuffer; this->_asyncClearUseInternalCustomBuffer = willClearInternalCustomBuffer;
@ -750,9 +750,10 @@ void GPUEngineBase::RenderLineClearAsyncFinish()
this->_asyncClearInterrupt = 0; this->_asyncClearInterrupt = 0;
} }
void GPUEngineBase::RenderLineClearAsyncWaitForCustomLine(const s32 l) void GPUEngineBase::RenderLineClearAsyncWaitForCustomLine(const size_t l)
{ {
while (l >= atomic_and_barrier32(&this->_asyncClearLineCustom, 0x000000FF)) const s32 lineCompare = (s32)l;
while (lineCompare >= atomic_and_barrier32(&this->_asyncClearLineCustom, 0x000000FF))
{ {
// Do nothing -- just spin. // Do nothing -- just spin.
} }
@ -847,7 +848,7 @@ void GPUEngineBase::UpdateRenderStates(const size_t l)
{ {
currRenderState.workingBackdropColor16 = currRenderState.backdropColor16; currRenderState.workingBackdropColor16 = currRenderState.backdropColor16;
} }
currRenderState.workingBackdropColor32.color = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) ); currRenderState.workingBackdropColor32.value = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) );
// Save the current render states to this line's compositor info. // Save the current render states to this line's compositor info.
compInfo.renderState = currRenderState; compInfo.renderState = currRenderState;
@ -1003,7 +1004,7 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
memset_u32(compInfo.target.lineColorHeadCustom, compInfo.renderState.workingBackdropColor32.color, compInfo.line.pixelCount); memset_u32(compInfo.target.lineColorHeadCustom, compInfo.renderState.workingBackdropColor32.value, compInfo.line.pixelCount);
break; break;
} }
} }
@ -1251,7 +1252,7 @@ FORCEINLINE void GPUEngineBase::_CompositePixelImmediate(GPUEngineCompositorInfo
compInfo.target.xCustom = _gpuDstPitchIndex[srcX]; compInfo.target.xCustom = _gpuDstPitchIndex[srcX];
compInfo.target.lineLayerID = compInfo.target.lineLayerIDHeadNative + srcX; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHeadNative + srcX;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative + srcX; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative + srcX;
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHeadNative + srcX; compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHeadNative + srcX;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] != 0) : true; const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] != 0) : true;
pixelop.Composite16<COMPOSITORMODE, NDSColorFormat_BGR555_Rev, GPULayerType_BG>(compInfo, srcColor16, enableColorEffect, 0, 0); pixelop.Composite16<COMPOSITORMODE, NDSColorFormat_BGR555_Rev, GPULayerType_BG>(compInfo, srcColor16, enableColorEffect, 0, 0);
@ -1277,12 +1278,12 @@ void GPUEngineBase::_PrecompositeNativeToCustomLineBG(GPUEngineCompositorInfo &c
} }
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
void GPUEngineBase::_CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32) void GPUEngineBase::_CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32)
{ {
compInfo.target.xNative = 0; compInfo.target.xNative = 0;
compInfo.target.xCustom = 0; compInfo.target.xCustom = 0;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead;
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead;
compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead;
#ifdef USEMANUALVECTORIZATION #ifdef USEMANUALVECTORIZATION
@ -1326,7 +1327,7 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, co
compInfo.target.xNative = 0; compInfo.target.xNative = 0;
compInfo.target.xCustom = 0; compInfo.target.xCustom = 0;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead;
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead;
compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead;
size_t i = 0; size_t i = 0;
@ -1366,7 +1367,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
compInfo.target.xNative = 0; compInfo.target.xNative = 0;
compInfo.target.xCustom = 0; compInfo.target.xCustom = 0;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead;
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead;
compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead;
size_t i = 0; size_t i = 0;
@ -1395,7 +1396,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
} }
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (colorEffectEnable[compInfo.target.xCustom] != 0) : true; const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (colorEffectEnable[compInfo.target.xCustom] != 0) : true;
pixelop.Composite32<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE>(compInfo, ((FragmentColor *)vramColorPtr)[i], enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]); pixelop.Composite32<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE>(compInfo, ((Color4u8 *)vramColorPtr)[i], enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]);
} }
else else
{ {
@ -1829,7 +1830,7 @@ void GPUEngineBase::_RenderSprite16(GPUEngineCompositorInfo &compInfo,
{ {
for (size_t i = 0; i < length; i++, frameX++, spriteX+=readXStep) for (size_t i = 0; i < length; i++, frameX++, spriteX+=readXStep)
{ {
const u32 spriteX_word = spriteX >> 1; const u32 spriteX_word = (u32)spriteX >> 1;
const u32 palIndexAddress = objAddress + (spriteX_word & 0x0003) + ((spriteX_word & 0xFFFC) << 3); const u32 palIndexAddress = objAddress + (spriteX_word & 0x0003) + ((spriteX_word & 0xFFFC) << 3);
const u8 *__restrict palIndexBuffer = (u8 *)MMU_gpu_map(palIndexAddress); const u8 *__restrict palIndexBuffer = (u8 *)MMU_gpu_map(palIndexAddress);
const u8 palIndex = *palIndexBuffer; const u8 palIndex = *palIndexBuffer;
@ -1968,7 +1969,7 @@ void GPUEngineBase::SpriteRenderDebug(const u16 lineIndex, u16 *dst)
compInfo.target.xCustom = 0; compInfo.target.xCustom = 0;
compInfo.target.lineColor = (void **)&compInfo.target.lineColor16; compInfo.target.lineColor = (void **)&compInfo.target.lineColor16;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative;
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHeadNative; compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHeadNative;
compInfo.target.lineLayerID = NULL; compInfo.target.lineLayerID = NULL;
this->_SpriteRender<true>(compInfo, dst, NULL, NULL, &this->_sprPrio[lineIndex][0]); this->_SpriteRender<true>(compInfo, dst, NULL, NULL, &this->_sprPrio[lineIndex][0]);
@ -2100,7 +2101,7 @@ void GPUEngineBase::_SpriteRenderPerform(GPUEngineCompositorInfo &compInfo, u16
if (auxX >= 0 && auxY >= 0 && auxX < sprSize.width && auxY < sprSize.height) if (auxX >= 0 && auxY >= 0 && auxX < sprSize.width && auxY < sprSize.height)
{ {
size_t objOffset = 0; u32 objOffset = 0;
if (DISPCNT.OBJ_BMP_2D_dim) if (DISPCNT.OBJ_BMP_2D_dim)
{ {
@ -2262,7 +2263,7 @@ void GPUEngineBase::_RenderLine_Layers(GPUEngineCompositorInfo &compInfo)
compInfo.target.xNative = 0; compInfo.target.xNative = 0;
compInfo.target.xCustom = 0; compInfo.target.xCustom = 0;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative;
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHeadNative; compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHeadNative;
compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead;
compInfo.renderState.previouslyRenderedLayerID = GPULayerID_Backdrop; compInfo.renderState.previouslyRenderedLayerID = GPULayerID_Backdrop;
@ -2503,7 +2504,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
compInfo.target.xNative = srcX; compInfo.target.xNative = srcX;
compInfo.target.xCustom = _gpuDstPitchIndex[srcX]; compInfo.target.xCustom = _gpuDstPitchIndex[srcX];
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead + srcX; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead + srcX;
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead + srcX; compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead + srcX;
compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead + srcX; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead + srcX;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[GPULayerID_OBJ][compInfo.target.xNative] != 0) : true; const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[GPULayerID_OBJ][compInfo.target.xNative] != 0) : true;
@ -2522,7 +2523,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
for (size_t line = 0; line < compInfo.line.renderCount; line++) for (size_t line = 0; line < compInfo.line.renderCount; line++)
{ {
compInfo.target.lineColor16 = (u16 *)dstColorPtr; compInfo.target.lineColor16 = (u16 *)dstColorPtr;
compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr; compInfo.target.lineColor32 = (Color4u8 *)dstColorPtr;
compInfo.target.lineLayerID = dstLayerIDPtr; compInfo.target.lineLayerID = dstLayerIDPtr;
for (size_t i = 0; i < item->nbPixelsX; i++) for (size_t i = 0; i < item->nbPixelsX; i++)
@ -2542,14 +2543,14 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
const size_t dstX = compInfo.target.xCustom + p; const size_t dstX = compInfo.target.xCustom + p;
compInfo.target.lineColor16 = (u16 *)dstColorPtr + dstX; compInfo.target.lineColor16 = (u16 *)dstColorPtr + dstX;
compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr + dstX; compInfo.target.lineColor32 = (Color4u8 *)dstColorPtr + dstX;
compInfo.target.lineLayerID = dstLayerIDPtr + dstX; compInfo.target.lineLayerID = dstLayerIDPtr + dstX;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[GPULayerID_OBJ][compInfo.target.xNative] != 0) : true; const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[GPULayerID_OBJ][compInfo.target.xNative] != 0) : true;
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{ {
pixelop.Composite32<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_OBJ>(compInfo, ((FragmentColor *)vramColorPtr)[dstX], enableColorEffect, this->_sprAlpha[compInfo.line.indexNative][srcX], this->_sprType[compInfo.line.indexNative][srcX]); pixelop.Composite32<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_OBJ>(compInfo, ((Color4u8 *)vramColorPtr)[dstX], enableColorEffect, this->_sprAlpha[compInfo.line.indexNative][srcX], this->_sprType[compInfo.line.indexNative][srcX]);
} }
else else
{ {
@ -2558,8 +2559,8 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
} }
} }
vramColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)vramColorPtr + compInfo.line.widthCustom) : (void *)((u16 *)vramColorPtr + compInfo.line.widthCustom); vramColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)vramColorPtr + compInfo.line.widthCustom) : (void *)((u16 *)vramColorPtr + compInfo.line.widthCustom);
dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((FragmentColor *)dstColorPtr + compInfo.line.widthCustom); dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((Color4u8 *)dstColorPtr + compInfo.line.widthCustom);
dstLayerIDPtr += compInfo.line.widthCustom; dstLayerIDPtr += compInfo.line.widthCustom;
} }
} }
@ -2568,7 +2569,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
for (size_t line = 0; line < compInfo.line.renderCount; line++) for (size_t line = 0; line < compInfo.line.renderCount; line++)
{ {
compInfo.target.lineColor16 = (u16 *)dstColorPtr; compInfo.target.lineColor16 = (u16 *)dstColorPtr;
compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr; compInfo.target.lineColor32 = (Color4u8 *)dstColorPtr;
compInfo.target.lineLayerID = dstLayerIDPtr; compInfo.target.lineLayerID = dstLayerIDPtr;
for (size_t i = 0; i < item->nbPixelsX; i++) for (size_t i = 0; i < item->nbPixelsX; i++)
@ -2588,7 +2589,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
const size_t dstX = compInfo.target.xCustom + p; const size_t dstX = compInfo.target.xCustom + p;
compInfo.target.lineColor16 = (u16 *)dstColorPtr + dstX; compInfo.target.lineColor16 = (u16 *)dstColorPtr + dstX;
compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr + dstX; compInfo.target.lineColor32 = (Color4u8 *)dstColorPtr + dstX;
compInfo.target.lineLayerID = dstLayerIDPtr + dstX; compInfo.target.lineLayerID = dstLayerIDPtr + dstX;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[GPULayerID_OBJ][compInfo.target.xNative] != 0) : true; const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[GPULayerID_OBJ][compInfo.target.xNative] != 0) : true;
@ -2596,7 +2597,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
} }
} }
dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((FragmentColor *)dstColorPtr + compInfo.line.widthCustom); dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((Color4u8 *)dstColorPtr + compInfo.line.widthCustom);
dstLayerIDPtr += compInfo.line.widthCustom; dstLayerIDPtr += compInfo.line.widthCustom;
} }
} }
@ -2920,7 +2921,7 @@ void GPUEngineBase::RenderLayerBG(const GPULayerID layerID, u16 *dstColorBuffer)
compInfo.target.xCustom = compInfo.target.xNative; compInfo.target.xCustom = compInfo.target.xNative;
compInfo.target.lineColor = (void **)&compInfo.target.lineColor16; compInfo.target.lineColor = (void **)&compInfo.target.lineColor16;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative;
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHeadNative; compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHeadNative;
compInfo.target.lineLayerID = NULL; compInfo.target.lineLayerID = NULL;
for (size_t lineIndex = 0; lineIndex < layerHeight; lineIndex++) for (size_t lineIndex = 0; lineIndex < layerHeight; lineIndex++)
@ -3251,13 +3252,13 @@ GPUEngineA::GPUEngineA()
_isLineCaptureNative[3][l] = true; _isLineCaptureNative[3][l] = true;
} }
_3DFramebufferMain = (FragmentColor *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(FragmentColor)); _3DFramebufferMain = (Color4u8 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(Color4u8));
_3DFramebuffer16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16)); _3DFramebuffer16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16));
_captureWorkingDisplay16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); _captureWorkingDisplay16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16));
_captureWorkingA16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); _captureWorkingA16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16));
_captureWorkingB16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); _captureWorkingB16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16));
_captureWorkingA32 = (FragmentColor *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(FragmentColor)); _captureWorkingA32 = (Color4u8 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(Color4u8));
_captureWorkingB32 = (FragmentColor *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(FragmentColor)); _captureWorkingB32 = (Color4u8 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(Color4u8));
} }
GPUEngineA::~GPUEngineA() GPUEngineA::~GPUEngineA()
@ -3290,13 +3291,13 @@ void GPUEngineA::Reset()
const size_t customWidth = this->_targetDisplay->GetWidth(); const size_t customWidth = this->_targetDisplay->GetWidth();
const size_t customHeight = this->_targetDisplay->GetHeight(); const size_t customHeight = this->_targetDisplay->GetHeight();
memset(this->_3DFramebufferMain, 0, customWidth * customHeight * sizeof(FragmentColor)); memset(this->_3DFramebufferMain, 0, customWidth * customHeight * sizeof(Color4u8));
memset(this->_3DFramebuffer16, 0, customWidth * customHeight * sizeof(u16)); memset(this->_3DFramebuffer16, 0, customWidth * customHeight * sizeof(u16));
memset(this->_captureWorkingDisplay16, 0, customWidth * _gpuLargestDstLineCount * sizeof(u16)); memset(this->_captureWorkingDisplay16, 0, customWidth * _gpuLargestDstLineCount * sizeof(u16));
memset(this->_captureWorkingA16, 0, customWidth * _gpuLargestDstLineCount * sizeof(u16)); memset(this->_captureWorkingA16, 0, customWidth * _gpuLargestDstLineCount * sizeof(u16));
memset(this->_captureWorkingB16, 0, customWidth * _gpuLargestDstLineCount * sizeof(u16)); memset(this->_captureWorkingB16, 0, customWidth * _gpuLargestDstLineCount * sizeof(u16));
memset(this->_captureWorkingA32, 0, customWidth * _gpuLargestDstLineCount * sizeof(FragmentColor)); memset(this->_captureWorkingA32, 0, customWidth * _gpuLargestDstLineCount * sizeof(Color4u8));
memset(this->_captureWorkingB32, 0, customWidth * _gpuLargestDstLineCount * sizeof(FragmentColor)); memset(this->_captureWorkingB32, 0, customWidth * _gpuLargestDstLineCount * sizeof(Color4u8));
memset(&this->_dispCapCnt, 0, sizeof(DISPCAPCNT_parsed)); memset(&this->_dispCapCnt, 0, sizeof(DISPCAPCNT_parsed));
this->_displayCaptureEnable = false; this->_displayCaptureEnable = false;
@ -3382,7 +3383,7 @@ void GPUEngineA::ParseReg_DISPCAPCNT()
this->_dispCapCnt.srcA, this->_dispCapCnt.srcB);*/ this->_dispCapCnt.srcA, this->_dispCapCnt.srcB);*/
} }
FragmentColor* GPUEngineA::Get3DFramebufferMain() const Color4u8* GPUEngineA::Get3DFramebufferMain() const
{ {
return this->_3DFramebufferMain; return this->_3DFramebufferMain;
} }
@ -3406,30 +3407,30 @@ void GPUEngineA::AllocateWorkingBuffers(NDSColorFormat requestedColorFormat, siz
{ {
this->GPUEngineBase::AllocateWorkingBuffers(requestedColorFormat, w, h); this->GPUEngineBase::AllocateWorkingBuffers(requestedColorFormat, w, h);
FragmentColor *old3DFramebufferMain = this->_3DFramebufferMain; Color4u8 *old3DFramebufferMain = this->_3DFramebufferMain;
u16 *old3DFramebuffer16 = this->_3DFramebuffer16; u16 *old3DFramebuffer16 = this->_3DFramebuffer16;
u16 *oldCaptureWorkingDisplay16 = this->_captureWorkingDisplay16; u16 *oldCaptureWorkingDisplay16 = this->_captureWorkingDisplay16;
u16 *oldCaptureWorkingA16 = this->_captureWorkingA16; u16 *oldCaptureWorkingA16 = this->_captureWorkingA16;
u16 *oldCaptureWorkingB16 = this->_captureWorkingB16; u16 *oldCaptureWorkingB16 = this->_captureWorkingB16;
FragmentColor *oldCaptureWorkingA32 = this->_captureWorkingA32; Color4u8 *oldCaptureWorkingA32 = this->_captureWorkingA32;
FragmentColor *oldCaptureWorkingB32 = this->_captureWorkingB32; Color4u8 *oldCaptureWorkingB32 = this->_captureWorkingB32;
this->_3DFramebufferMain = (FragmentColor *)malloc_alignedPage(w * h * sizeof(FragmentColor)); this->_3DFramebufferMain = (Color4u8 *)malloc_alignedPage(w * h * sizeof(Color4u8));
this->_3DFramebuffer16 = (u16 *)malloc_alignedPage(w * h * sizeof(u16)); this->_3DFramebuffer16 = (u16 *)malloc_alignedPage(w * h * sizeof(u16));
this->_captureWorkingDisplay16 = (u16 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(u16)); this->_captureWorkingDisplay16 = (u16 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(u16));
this->_captureWorkingA16 = (u16 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(u16)); this->_captureWorkingA16 = (u16 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(u16));
this->_captureWorkingB16 = (u16 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(u16)); this->_captureWorkingB16 = (u16 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(u16));
this->_captureWorkingA32 = (FragmentColor *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(FragmentColor)); this->_captureWorkingA32 = (Color4u8 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(Color4u8));
this->_captureWorkingB32 = (FragmentColor *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(FragmentColor)); this->_captureWorkingB32 = (Color4u8 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(Color4u8));
const GPUEngineLineInfo &lineInfo = this->_currentCompositorInfo[GPU_VRAM_BLOCK_LINES].line; const GPUEngineLineInfo &lineInfo = this->_currentCompositorInfo[GPU_VRAM_BLOCK_LINES].line;
if (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR888_Rev) if (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR888_Rev)
{ {
this->_VRAMCustomBlockPtr[0] = (FragmentColor *)GPU->GetCustomVRAMBuffer(); this->_VRAMCustomBlockPtr[0] = (Color4u8 *)GPU->GetCustomVRAMBuffer();
this->_VRAMCustomBlockPtr[1] = (FragmentColor *)this->_VRAMCustomBlockPtr[0] + (1 * lineInfo.indexCustom * w); this->_VRAMCustomBlockPtr[1] = (Color4u8 *)this->_VRAMCustomBlockPtr[0] + (1 * lineInfo.indexCustom * w);
this->_VRAMCustomBlockPtr[2] = (FragmentColor *)this->_VRAMCustomBlockPtr[0] + (2 * lineInfo.indexCustom * w); this->_VRAMCustomBlockPtr[2] = (Color4u8 *)this->_VRAMCustomBlockPtr[0] + (2 * lineInfo.indexCustom * w);
this->_VRAMCustomBlockPtr[3] = (FragmentColor *)this->_VRAMCustomBlockPtr[0] + (3 * lineInfo.indexCustom * w); this->_VRAMCustomBlockPtr[3] = (Color4u8 *)this->_VRAMCustomBlockPtr[0] + (3 * lineInfo.indexCustom * w);
} }
else else
{ {
@ -3575,7 +3576,7 @@ void GPUEngineA::RenderLine(const size_t l)
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo) void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
{ {
const FragmentColor *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer(); const Color4u8 *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer();
if (framebuffer3D == NULL) if (framebuffer3D == NULL)
{ {
return; return;
@ -3590,12 +3591,12 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
const u8 *colorEffectEnable = (CurrentRenderer->GetFramebufferWidth() == GPU_FRAMEBUFFER_NATIVE_WIDTH) ? this->_enableColorEffectNative[GPULayerID_BG0] : this->_enableColorEffectCustom[GPULayerID_BG0]; const u8 *colorEffectEnable = (CurrentRenderer->GetFramebufferWidth() == GPU_FRAMEBUFFER_NATIVE_WIDTH) ? this->_enableColorEffectNative[GPULayerID_BG0] : this->_enableColorEffectCustom[GPULayerID_BG0];
const float customWidthScale = (float)compInfo.line.widthCustom / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH; const float customWidthScale = (float)compInfo.line.widthCustom / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
const FragmentColor *__restrict srcLinePtr = framebuffer3D + compInfo.line.blockOffsetCustom; const Color4u8 *__restrict srcLinePtr = framebuffer3D + compInfo.line.blockOffsetCustom;
compInfo.target.xNative = 0; compInfo.target.xNative = 0;
compInfo.target.xCustom = 0; compInfo.target.xCustom = 0;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead;
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead;
compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead;
// Horizontally offset the 3D layer by this amount. // Horizontally offset the 3D layer by this amount.
@ -3879,7 +3880,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
} }
const u16 *vramCustom16 = (u16 *)GPU->GetCustomVRAMBlankBuffer(); const u16 *vramCustom16 = (u16 *)GPU->GetCustomVRAMBlankBuffer();
const FragmentColor *vramCustom32 = (FragmentColor *)GPU->GetCustomVRAMBlankBuffer(); const Color4u8 *vramCustom32 = (Color4u8 *)GPU->GetCustomVRAMBlankBuffer();
if (!willReadNativeVRAM) if (!willReadNativeVRAM)
{ {
@ -3890,7 +3891,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
} }
vramCustom16 = (u16 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + vramCustomOffset; vramCustom16 = (u16 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + vramCustomOffset;
vramCustom32 = (FragmentColor *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + vramCustomOffset; vramCustom32 = (Color4u8 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + vramCustomOffset;
} }
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
@ -3903,9 +3904,9 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
} }
} }
srcAPtr = (DISPCAPCNT.SrcA == 0) ? (FragmentColor *)compInfo.target.lineColorHead : (FragmentColor *)CurrentRenderer->GetFramebuffer() + compInfo.line.blockOffsetCustom; srcAPtr = (DISPCAPCNT.SrcA == 0) ? (Color4u8 *)compInfo.target.lineColorHead : (Color4u8 *)CurrentRenderer->GetFramebuffer() + compInfo.line.blockOffsetCustom;
srcBPtr = (DISPCAPCNT.SrcB == 0) ? vramCustom32 : this->_fifoLine32; srcBPtr = (DISPCAPCNT.SrcB == 0) ? vramCustom32 : this->_fifoLine32;
dstCustomPtr = (FragmentColor *)this->_VRAMCustomBlockPtr[DISPCAPCNT.VRAMWriteBlock] + dstCustomOffset; dstCustomPtr = (Color4u8 *)this->_VRAMCustomBlockPtr[DISPCAPCNT.VRAMWriteBlock] + dstCustomOffset;
} }
else else
{ {
@ -4272,10 +4273,10 @@ u16 GPUEngineA::_RenderLine_DispCapture_BlendFunc(const u16 srcA, const u16 srcB
} }
template<NDSColorFormat COLORFORMAT> template<NDSColorFormat COLORFORMAT>
FragmentColor GPUEngineA::_RenderLine_DispCapture_BlendFunc(const FragmentColor srcA, const FragmentColor srcB, const u8 blendEVA, const u8 blendEVB) Color4u8 GPUEngineA::_RenderLine_DispCapture_BlendFunc(const Color4u8 srcA, const Color4u8 srcB, const u8 blendEVA, const u8 blendEVB)
{ {
FragmentColor outColor; Color4u8 outColor;
outColor.color = 0; outColor.value = 0;
u16 r = 0; u16 r = 0;
u16 g = 0; u16 g = 0;
@ -4328,17 +4329,17 @@ void GPUEngineA::_RenderLine_DispCapture_Blend_Buffer(const void *srcA, const vo
#endif #endif
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{ {
const FragmentColor *srcA_32 = (const FragmentColor *)srcA; const Color4u8 *srcA_32 = (const Color4u8 *)srcA;
const FragmentColor *srcB_32 = (const FragmentColor *)srcB; const Color4u8 *srcB_32 = (const Color4u8 *)srcB;
FragmentColor *dst32 = (FragmentColor *)dst; Color4u8 *dst32 = (Color4u8 *)dst;
#ifdef USEMANUALVECTORIZATION #ifdef USEMANUALVECTORIZATION
#pragma LOOPVECTORIZE_DISABLE #pragma LOOPVECTORIZE_DISABLE
#endif #endif
for (; i < length; i++) for (; i < length; i++)
{ {
const FragmentColor colorA = srcA_32[i]; const Color4u8 colorA = srcA_32[i];
const FragmentColor colorB = srcB_32[i]; const Color4u8 colorB = srcB_32[i];
dst32[i] = this->_RenderLine_DispCapture_BlendFunc<OUTPUTFORMAT>(colorA, colorB, blendEVA, blendEVB); dst32[i] = this->_RenderLine_DispCapture_BlendFunc<OUTPUTFORMAT>(colorA, colorB, blendEVA, blendEVB);
} }
@ -4383,9 +4384,9 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const GPUEngineLineInfo &lineInfo
for (size_t line = 0; line < lineInfo.renderCount; line++) for (size_t line = 0; line < lineInfo.renderCount; line++)
{ {
this->_RenderLine_DispCapture_Blend_Buffer<OUTPUTFORMAT>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt); this->_RenderLine_DispCapture_Blend_Buffer<OUTPUTFORMAT>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt);
srcA = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcA + lineInfo.widthCustom) : (void *)((u16 *)srcA + lineInfo.widthCustom); srcA = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)srcA + lineInfo.widthCustom) : (void *)((u16 *)srcA + lineInfo.widthCustom);
srcB = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcB + lineInfo.widthCustom) : (void *)((u16 *)srcB + lineInfo.widthCustom); srcB = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)srcB + lineInfo.widthCustom) : (void *)((u16 *)srcB + lineInfo.widthCustom);
dst = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)dst + lineInfo.widthCustom) : (void *)((u16 *)dst + lineInfo.widthCustom); dst = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)dst + lineInfo.widthCustom) : (void *)((u16 *)dst + lineInfo.widthCustom);
} }
} }
} }
@ -4943,7 +4944,7 @@ void GPUSubsystem::SetFramebufferPageCount(size_t pageCount)
pageCount = MAX_FRAMEBUFFER_PAGES; pageCount = MAX_FRAMEBUFFER_PAGES;
} }
this->_displayInfo.framebufferPageCount = pageCount; this->_displayInfo.framebufferPageCount = (u32)pageCount;
} }
size_t GPUSubsystem::GetCustomFramebufferWidth() const size_t GPUSubsystem::GetCustomFramebufferWidth() const
@ -4977,9 +4978,9 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h)
u8 *oldGpuDstToSrcSSSE3_u16_8e = _gpuDstToSrcSSSE3_u16_8e; u8 *oldGpuDstToSrcSSSE3_u16_8e = _gpuDstToSrcSSSE3_u16_8e;
u8 *oldGpuDstToSrcSSSE3_u32_4e = _gpuDstToSrcSSSE3_u32_4e; u8 *oldGpuDstToSrcSSSE3_u32_4e = _gpuDstToSrcSSSE3_u32_4e;
for (size_t srcX = 0, currentPitchCount = 0; srcX < GPU_FRAMEBUFFER_NATIVE_WIDTH; srcX++) for (u32 srcX = 0, currentPitchCount = 0; srcX < GPU_FRAMEBUFFER_NATIVE_WIDTH; srcX++)
{ {
const size_t pitch = (size_t)ceilf((srcX+1) * customWidthScale) - currentPitchCount; const u32 pitch = (u32)ceilf(((float)srcX+1.0f) * customWidthScale) - (float)currentPitchCount;
_gpuDstPitchCount[srcX] = pitch; _gpuDstPitchCount[srcX] = pitch;
_gpuDstPitchIndex[srcX] = currentPitchCount; _gpuDstPitchIndex[srcX] = currentPitchCount;
currentPitchCount += pitch; currentPitchCount += pitch;
@ -5067,8 +5068,8 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h)
this->_display[NDSDisplayID_Touch]->SetDisplaySize(w, h); this->_display[NDSDisplayID_Touch]->SetDisplaySize(w, h);
this->_displayInfo.isCustomSizeRequested = ( (w != GPU_FRAMEBUFFER_NATIVE_WIDTH) || (h != GPU_FRAMEBUFFER_NATIVE_HEIGHT) ); this->_displayInfo.isCustomSizeRequested = ( (w != GPU_FRAMEBUFFER_NATIVE_WIDTH) || (h != GPU_FRAMEBUFFER_NATIVE_HEIGHT) );
this->_displayInfo.customWidth = w; this->_displayInfo.customWidth = (u32)w;
this->_displayInfo.customHeight = h; this->_displayInfo.customHeight = (u32)h;
if (!this->_display[NDSDisplayID_Main]->IsCustomSizeRequested()) if (!this->_display[NDSDisplayID_Main]->IsCustomSizeRequested())
{ {
@ -5110,7 +5111,7 @@ void GPUSubsystem::SetColorFormat(const NDSColorFormat outputFormat)
this->_display[NDSDisplayID_Touch]->SetColorFormat(outputFormat); this->_display[NDSDisplayID_Touch]->SetColorFormat(outputFormat);
this->_displayInfo.colorFormat = this->_display[NDSDisplayID_Main]->GetColorFormat(); this->_displayInfo.colorFormat = this->_display[NDSDisplayID_Main]->GetColorFormat();
this->_displayInfo.pixelBytes = this->_display[NDSDisplayID_Main]->GetPixelBytes(); this->_displayInfo.pixelBytes = (u32)this->_display[NDSDisplayID_Main]->GetPixelBytes();
if (!this->_displayInfo.isCustomSizeRequested) if (!this->_displayInfo.isCustomSizeRequested)
{ {
@ -5128,7 +5129,7 @@ void GPUSubsystem::_AllocateFramebuffers(NDSColorFormat outputFormat, size_t w,
void *oldMasterFramebuffer = this->_masterFramebuffer; void *oldMasterFramebuffer = this->_masterFramebuffer;
void *oldCustomVRAM = this->_customVRAM; void *oldCustomVRAM = this->_customVRAM;
const size_t pixelBytes = (outputFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(FragmentColor); const size_t pixelBytes = (outputFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(Color4u8);
const size_t newCustomVRAMBlockSize = this->_lineInfo[GPU_VRAM_BLOCK_LINES].indexCustom * w; const size_t newCustomVRAMBlockSize = this->_lineInfo[GPU_VRAM_BLOCK_LINES].indexCustom * w;
const size_t newCustomVRAMBlankSize = _gpuLargestDstLineCount * GPU_VRAM_BLANK_REGION_LINES * w; const size_t newCustomVRAMBlankSize = _gpuLargestDstLineCount * GPU_VRAM_BLANK_REGION_LINES * w;
const size_t nativeFramebufferSize = GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16); const size_t nativeFramebufferSize = GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16);
@ -5136,8 +5137,8 @@ void GPUSubsystem::_AllocateFramebuffers(NDSColorFormat outputFormat, size_t w,
void *newCustomVRAM = NULL; void *newCustomVRAM = NULL;
this->_displayInfo.framebufferPageCount = pageCount; this->_displayInfo.framebufferPageCount = (u32)pageCount;
this->_displayInfo.framebufferPageSize = (nativeFramebufferSize * 2) + (customFramebufferSize * 2); this->_displayInfo.framebufferPageSize = (u32)( (nativeFramebufferSize * 2) + (customFramebufferSize * 2) );
this->_masterFramebuffer = malloc_alignedPage(this->_displayInfo.framebufferPageSize * this->_displayInfo.framebufferPageCount); this->_masterFramebuffer = malloc_alignedPage(this->_displayInfo.framebufferPageSize * this->_displayInfo.framebufferPageCount);
if (outputFormat != NDSColorFormat_BGR555_Rev) if (outputFormat != NDSColorFormat_BGR555_Rev)
@ -5207,10 +5208,10 @@ void GPUSubsystem::_AllocateFramebuffers(NDSColorFormat outputFormat, size_t w,
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
newCustomVRAM = (void *)malloc_alignedPage(((newCustomVRAMBlockSize * 4) + newCustomVRAMBlankSize) * sizeof(FragmentColor)); newCustomVRAM = (void *)malloc_alignedPage(((newCustomVRAMBlockSize * 4) + newCustomVRAMBlankSize) * sizeof(Color4u8));
memset(newCustomVRAM, 0, ((newCustomVRAMBlockSize * 4) + newCustomVRAMBlankSize) * sizeof(FragmentColor)); memset(newCustomVRAM, 0, ((newCustomVRAMBlockSize * 4) + newCustomVRAMBlankSize) * sizeof(Color4u8));
this->_customVRAM = newCustomVRAM; this->_customVRAM = newCustomVRAM;
this->_customVRAMBlank = (FragmentColor *)newCustomVRAM + (newCustomVRAMBlockSize * 4); this->_customVRAMBlank = (Color4u8 *)newCustomVRAM + (newCustomVRAMBlockSize * 4);
break; break;
default: default:
@ -5334,7 +5335,7 @@ void* GPUSubsystem::GetCustomVRAMAddressUsingMappedAddress(const u32 mappedAddr,
const size_t blockLine = (vramPixel >> 8) & 0x000000FF; // blockLine = (vramPixel % (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_VRAM_BLOCK_LINES)) / GPU_FRAMEBUFFER_NATIVE_WIDTH const size_t blockLine = (vramPixel >> 8) & 0x000000FF; // blockLine = (vramPixel % (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_VRAM_BLOCK_LINES)) / GPU_FRAMEBUFFER_NATIVE_WIDTH
const size_t linePixel = vramPixel & 0x000000FF; // linePixel = (vramPixel % (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_VRAM_BLOCK_LINES)) % GPU_FRAMEBUFFER_NATIVE_WIDTH const size_t linePixel = vramPixel & 0x000000FF; // linePixel = (vramPixel % (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_VRAM_BLOCK_LINES)) % GPU_FRAMEBUFFER_NATIVE_WIDTH
return (COLORFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)this->GetEngineMain()->GetCustomVRAMBlockPtr(blockID) + (this->_lineInfo[blockLine].indexCustom * this->_lineInfo[blockLine].widthCustom) + _gpuDstPitchIndex[linePixel] + offset) : (void *)((u16 *)this->GetEngineMain()->GetCustomVRAMBlockPtr(blockID) + (this->_lineInfo[blockLine].indexCustom * this->_lineInfo[blockLine].widthCustom) + _gpuDstPitchIndex[linePixel] + offset); return (COLORFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)this->GetEngineMain()->GetCustomVRAMBlockPtr(blockID) + (this->_lineInfo[blockLine].indexCustom * this->_lineInfo[blockLine].widthCustom) + _gpuDstPitchIndex[linePixel] + offset) : (void *)((u16 *)this->GetEngineMain()->GetCustomVRAMBlockPtr(blockID) + (this->_lineInfo[blockLine].indexCustom * this->_lineInfo[blockLine].widthCustom) + _gpuDstPitchIndex[linePixel] + offset);
} }
bool GPUSubsystem::GetWillPostprocessDisplays() const bool GPUSubsystem::GetWillPostprocessDisplays() const
@ -5540,13 +5541,13 @@ void GPUSubsystem::RenderLine(const size_t l)
this->_displayInfo.didPerformCustomRender[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->DidPerformCustomRender(); this->_displayInfo.didPerformCustomRender[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->DidPerformCustomRender();
this->_displayInfo.renderedBuffer[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetRenderedBuffer(); this->_displayInfo.renderedBuffer[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetRenderedBuffer();
this->_displayInfo.renderedWidth[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetRenderedWidth(); this->_displayInfo.renderedWidth[NDSDisplayID_Main] = (u32)this->_display[NDSDisplayID_Main]->GetRenderedWidth();
this->_displayInfo.renderedHeight[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetRenderedHeight(); this->_displayInfo.renderedHeight[NDSDisplayID_Main] = (u32)this->_display[NDSDisplayID_Main]->GetRenderedHeight();
this->_displayInfo.didPerformCustomRender[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->DidPerformCustomRender(); this->_displayInfo.didPerformCustomRender[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->DidPerformCustomRender();
this->_displayInfo.renderedBuffer[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetRenderedBuffer(); this->_displayInfo.renderedBuffer[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetRenderedBuffer();
this->_displayInfo.renderedWidth[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetRenderedWidth(); this->_displayInfo.renderedWidth[NDSDisplayID_Touch] = (u32)this->_display[NDSDisplayID_Touch]->GetRenderedWidth();
this->_displayInfo.renderedHeight[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetRenderedHeight(); this->_displayInfo.renderedHeight[NDSDisplayID_Touch] = (u32)this->_display[NDSDisplayID_Touch]->GetRenderedHeight();
this->_displayInfo.engineID[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetEngineID(); this->_displayInfo.engineID[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetEngineID();
this->_displayInfo.engineID[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetEngineID(); this->_displayInfo.engineID[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetEngineID();
@ -5638,16 +5639,16 @@ void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551)
} }
else else
{ {
FragmentColor color32; Color4u8 color32;
switch (this->_displayInfo.colorFormat) switch (this->_displayInfo.colorFormat)
{ {
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
color32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(colorBGRA5551 & 0x7FFF) ); color32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(colorBGRA5551 & 0x7FFF) );
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
color32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(colorBGRA5551 & 0x7FFF) ); color32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(colorBGRA5551 & 0x7FFF) );
break; break;
default: default:
@ -5657,7 +5658,7 @@ void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551)
for (size_t i = 0; i < this->_displayInfo.framebufferPageCount; i++) for (size_t i = 0; i < this->_displayInfo.framebufferPageCount; i++)
{ {
memset_u16((u8 *)this->_masterFramebuffer + (this->_displayInfo.framebufferPageSize * i), color16, nativeFramebufferPixCount); memset_u16((u8 *)this->_masterFramebuffer + (this->_displayInfo.framebufferPageSize * i), color16, nativeFramebufferPixCount);
memset_u32((u8 *)this->_masterFramebuffer + (this->_displayInfo.framebufferPageSize * i) + (nativeFramebufferPixCount * sizeof(u16)), color32.color, customFramebufferPixCount); memset_u32((u8 *)this->_masterFramebuffer + (this->_displayInfo.framebufferPageSize * i) + (nativeFramebufferPixCount * sizeof(u16)), color32.value, customFramebufferPixCount);
} }
} }
} }
@ -6313,7 +6314,7 @@ NDSColorFormat NDSDisplay::GetColorFormat() const
void NDSDisplay::SetColorFormat(NDSColorFormat colorFormat) void NDSDisplay::SetColorFormat(NDSColorFormat colorFormat)
{ {
this->_customColorFormat = colorFormat; this->_customColorFormat = colorFormat;
this->_customPixelBytes = (colorFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(FragmentColor); this->_customPixelBytes = (colorFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(Color4u8);
} }
size_t NDSDisplay::GetPixelBytes() const size_t NDSDisplay::GetPixelBytes() const
@ -6441,8 +6442,8 @@ void NDSDisplay::ApplyMasterBrightness(void *dst, const size_t pixCount, const G
} }
else else
{ {
((FragmentColor *)dst)[i] = colorop.increase<OUTPUTFORMAT>(((FragmentColor *)dst)[i], intensityClamped); ((Color4u8 *)dst)[i] = colorop.increase<OUTPUTFORMAT>(((Color4u8 *)dst)[i], intensityClamped);
((FragmentColor *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF; ((Color4u8 *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF;
} }
} }
} }
@ -6488,8 +6489,8 @@ void NDSDisplay::ApplyMasterBrightness(void *dst, const size_t pixCount, const G
} }
else else
{ {
((FragmentColor *)dst)[i] = colorop.decrease<OUTPUTFORMAT>(((FragmentColor *)dst)[i], intensityClamped); ((Color4u8 *)dst)[i] = colorop.decrease<OUTPUTFORMAT>(((Color4u8 *)dst)[i], intensityClamped);
((FragmentColor *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF; ((Color4u8 *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF;
} }
} }
} }

View File

@ -1330,7 +1330,7 @@ typedef struct
GPUDisplayMode displayOutputMode; GPUDisplayMode displayOutputMode;
u16 backdropColor16; u16 backdropColor16;
u16 workingBackdropColor16; u16 workingBackdropColor16;
FragmentColor workingBackdropColor32; Color4u8 workingBackdropColor32;
ColorEffect colorEffect; ColorEffect colorEffect;
u8 blendEVA; u8 blendEVA;
u8 blendEVB; u8 blendEVB;
@ -1342,11 +1342,11 @@ typedef struct
TBlendTable *blendTable555; TBlendTable *blendTable555;
u16 *brightnessUpTable555; u16 *brightnessUpTable555;
FragmentColor *brightnessUpTable666; Color4u8 *brightnessUpTable666;
FragmentColor *brightnessUpTable888; Color4u8 *brightnessUpTable888;
u16 *brightnessDownTable555; u16 *brightnessDownTable555;
FragmentColor *brightnessDownTable666; Color4u8 *brightnessDownTable666;
FragmentColor *brightnessDownTable888; Color4u8 *brightnessDownTable888;
u8 WIN0_enable[6]; u8 WIN0_enable[6];
u8 WIN1_enable[6]; u8 WIN1_enable[6];
@ -1387,7 +1387,7 @@ typedef struct
size_t xCustom; size_t xCustom;
void **lineColor; void **lineColor;
u16 *lineColor16; u16 *lineColor16;
FragmentColor *lineColor32; Color4u8 *lineColor32;
u8 *lineLayerID; u8 *lineLayerID;
} GPUEngineTargetState; } GPUEngineTargetState;
@ -1503,7 +1503,7 @@ protected:
volatile s32 _asyncClearLineCustom; volatile s32 _asyncClearLineCustom;
volatile s32 _asyncClearInterrupt; volatile s32 _asyncClearInterrupt;
u16 _asyncClearBackdropColor16; // Do not modify this variable directly. u16 _asyncClearBackdropColor16; // Do not modify this variable directly.
FragmentColor _asyncClearBackdropColor32; // Do not modify this variable directly. Color4u8 _asyncClearBackdropColor32; // Do not modify this variable directly.
bool _asyncClearUseInternalCustomBuffer; // Do not modify this variable directly. bool _asyncClearUseInternalCustomBuffer; // Do not modify this variable directly.
void _ResortBGLayers(); void _ResortBGLayers();
@ -1523,11 +1523,11 @@ protected:
template<bool MOSAIC> void _PrecompositeNativeToCustomLineBG(GPUEngineCompositorInfo &compInfo); template<bool MOSAIC> void _PrecompositeNativeToCustomLineBG(GPUEngineCompositorInfo &compInfo);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32); template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> void _CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom); template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> void _CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> void _CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr); template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> void _CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32); template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom); template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const void *__restrict vramColorPtr); template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const void *__restrict vramColorPtr);
@ -1620,9 +1620,9 @@ public:
void ApplySettings(); void ApplySettings();
void RenderLineClearAsync(); void RenderLineClearAsync();
void RenderLineClearAsyncStart(bool willClearInternalCustomBuffer, s32 startLineIndex, u16 clearColor16, FragmentColor clearColor32); void RenderLineClearAsyncStart(bool willClearInternalCustomBuffer, size_t startLineIndex, u16 clearColor16, Color4u8 clearColor32);
void RenderLineClearAsyncFinish(); void RenderLineClearAsyncFinish();
void RenderLineClearAsyncWaitForCustomLine(const s32 l); void RenderLineClearAsyncWaitForCustomLine(const size_t l);
void TransitionRenderStatesToDisplayInfo(NDSDisplayInfo &mutableInfo); void TransitionRenderStatesToDisplayInfo(NDSDisplayInfo &mutableInfo);
@ -1651,12 +1651,12 @@ private:
protected: protected:
CACHE_ALIGN u16 _fifoLine16[GPU_FRAMEBUFFER_NATIVE_WIDTH]; CACHE_ALIGN u16 _fifoLine16[GPU_FRAMEBUFFER_NATIVE_WIDTH];
CACHE_ALIGN FragmentColor _fifoLine32[GPU_FRAMEBUFFER_NATIVE_WIDTH]; CACHE_ALIGN Color4u8 _fifoLine32[GPU_FRAMEBUFFER_NATIVE_WIDTH];
CACHE_ALIGN u16 _VRAMNativeBlockCaptureCopy[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_VRAM_BLOCK_LINES * 4]; CACHE_ALIGN u16 _VRAMNativeBlockCaptureCopy[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_VRAM_BLOCK_LINES * 4];
u16 *_VRAMNativeBlockCaptureCopyPtr[4]; u16 *_VRAMNativeBlockCaptureCopyPtr[4];
FragmentColor *_3DFramebufferMain; Color4u8 *_3DFramebufferMain;
u16 *_3DFramebuffer16; u16 *_3DFramebuffer16;
u16 *_VRAMNativeBlockPtr[4]; u16 *_VRAMNativeBlockPtr[4];
@ -1668,8 +1668,8 @@ protected:
u16 *_captureWorkingDisplay16; u16 *_captureWorkingDisplay16;
u16 *_captureWorkingA16; u16 *_captureWorkingA16;
u16 *_captureWorkingB16; u16 *_captureWorkingB16;
FragmentColor *_captureWorkingA32; Color4u8 *_captureWorkingA32;
FragmentColor *_captureWorkingB32; Color4u8 *_captureWorkingB32;
DISPCAPCNT_parsed _dispCapCnt; DISPCAPCNT_parsed _dispCapCnt;
bool _displayCaptureEnable; bool _displayCaptureEnable;
@ -1692,10 +1692,10 @@ protected:
void _RenderLine_DispCapture_Copy(const GPUEngineLineInfo &lineInfo, const void *src, void *dst, const size_t captureLengthExt); // Do not use restrict pointers, since src and dst can be the same void _RenderLine_DispCapture_Copy(const GPUEngineLineInfo &lineInfo, const void *src, void *dst, const size_t captureLengthExt); // Do not use restrict pointers, since src and dst can be the same
u16 _RenderLine_DispCapture_BlendFunc(const u16 srcA, const u16 srcB, const u8 blendEVA, const u8 blendEVB); u16 _RenderLine_DispCapture_BlendFunc(const u16 srcA, const u16 srcB, const u8 blendEVA, const u8 blendEVB);
template<NDSColorFormat COLORFORMAT> FragmentColor _RenderLine_DispCapture_BlendFunc(const FragmentColor srcA, const FragmentColor srcB, const u8 blendEVA, const u8 blendEVB); template<NDSColorFormat COLORFORMAT> Color4u8 _RenderLine_DispCapture_BlendFunc(const Color4u8 srcA, const Color4u8 srcB, const u8 blendEVA, const u8 blendEVB);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
size_t _RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr); size_t _RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const Color4u8 *__restrict srcLinePtr);
template<NDSColorFormat OUTPUTFORMAT> template<NDSColorFormat OUTPUTFORMAT>
void _RenderLine_DispCapture_Blend_Buffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t pixCount); // Do not use restrict pointers, since srcB and dst can be the same void _RenderLine_DispCapture_Blend_Buffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t pixCount); // Do not use restrict pointers, since srcB and dst can be the same
@ -1716,7 +1716,7 @@ public:
void ParseReg_DISPCAPCNT(); void ParseReg_DISPCAPCNT();
bool IsLineCaptureNative(const size_t blockID, const size_t blockLine); bool IsLineCaptureNative(const size_t blockID, const size_t blockLine);
void* GetCustomVRAMBlockPtr(const size_t blockID); void* GetCustomVRAMBlockPtr(const size_t blockID);
FragmentColor* Get3DFramebufferMain() const; Color4u8* Get3DFramebufferMain() const;
u16* Get3DFramebuffer16() const; u16* Get3DFramebuffer16() const;
virtual void AllocateWorkingBuffers(NDSColorFormat requestedColorFormat, size_t w, size_t h); virtual void AllocateWorkingBuffers(NDSColorFormat requestedColorFormat, size_t w, size_t h);
@ -1819,8 +1819,8 @@ public:
bool IsCustomSizeRequested() const; bool IsCustomSizeRequested() const;
void* GetRenderedBuffer() const; void* GetRenderedBuffer() const;
size_t GetRenderedWidth() const; size_t GetRenderedWidth() const;
size_t GetRenderedHeight() const; size_t GetRenderedHeight() const;
bool IsEnabled() const; bool IsEnabled() const;
void SetIsEnabled(bool stateIsEnabled); void SetIsEnabled(bool stateIsEnabled);

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2021 DeSmuME team Copyright (C) 2021-2023 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -32,11 +32,11 @@ static CACHE_ALIGN u32 _gpuDstPitchIndex[GPU_FRAMEBUFFER_NATIVE_WIDTH]; // Key:
u8 PixelOperation::BlendTable555[17][17][32][32]; u8 PixelOperation::BlendTable555[17][17][32][32];
u16 PixelOperation::BrightnessUpTable555[17][0x8000]; u16 PixelOperation::BrightnessUpTable555[17][0x8000];
FragmentColor PixelOperation::BrightnessUpTable666[17][0x8000]; Color4u8 PixelOperation::BrightnessUpTable666[17][0x8000];
FragmentColor PixelOperation::BrightnessUpTable888[17][0x8000]; Color4u8 PixelOperation::BrightnessUpTable888[17][0x8000];
u16 PixelOperation::BrightnessDownTable555[17][0x8000]; u16 PixelOperation::BrightnessDownTable555[17][0x8000];
FragmentColor PixelOperation::BrightnessDownTable666[17][0x8000]; Color4u8 PixelOperation::BrightnessDownTable666[17][0x8000];
FragmentColor PixelOperation::BrightnessDownTable888[17][0x8000]; Color4u8 PixelOperation::BrightnessDownTable888[17][0x8000];
static CACHE_ALIGN ColorOperation colorop; static CACHE_ALIGN ColorOperation colorop;
static CACHE_ALIGN PixelOperation pixelop; static CACHE_ALIGN PixelOperation pixelop;
@ -71,9 +71,9 @@ FORCEINLINE u16 ColorOperation::blend(const u16 colA, const u16 colB, const TBle
} }
template <NDSColorFormat COLORFORMAT> template <NDSColorFormat COLORFORMAT>
FORCEINLINE FragmentColor ColorOperation::blend(const FragmentColor colA, const FragmentColor colB, const u16 blendEVA, const u16 blendEVB) const FORCEINLINE Color4u8 ColorOperation::blend(const Color4u8 colA, const Color4u8 colB, const u16 blendEVA, const u16 blendEVB) const
{ {
FragmentColor outColor; Color4u8 outColor;
u16 r16 = ( (colA.r * blendEVA) + (colB.r * blendEVB) ) / 16; u16 r16 = ( (colA.r * blendEVA) + (colB.r * blendEVB) ) / 16;
u16 g16 = ( (colA.g * blendEVA) + (colB.g * blendEVB) ) / 16; u16 g16 = ( (colA.g * blendEVA) + (colB.g * blendEVB) ) / 16;
@ -96,7 +96,7 @@ FORCEINLINE FragmentColor ColorOperation::blend(const FragmentColor colA, const
return outColor; return outColor;
} }
FORCEINLINE u16 ColorOperation::blend3D(const FragmentColor colA, const u16 colB) const FORCEINLINE u16 ColorOperation::blend3D(const Color4u8 colA, const u16 colB) const
{ {
const u16 alpha = colA.a + 1; const u16 alpha = colA.a + 1;
COLOR c2; COLOR c2;
@ -113,9 +113,9 @@ FORCEINLINE u16 ColorOperation::blend3D(const FragmentColor colA, const u16 colB
} }
template <NDSColorFormat COLORFORMAT> template <NDSColorFormat COLORFORMAT>
FORCEINLINE FragmentColor ColorOperation::blend3D(const FragmentColor colA, const FragmentColor colB) const FORCEINLINE Color4u8 ColorOperation::blend3D(const Color4u8 colA, const Color4u8 colB) const
{ {
FragmentColor blendedColor; Color4u8 blendedColor;
const u16 alpha = colA.a + 1; const u16 alpha = colA.a + 1;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev) if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
@ -149,10 +149,10 @@ FORCEINLINE u16 ColorOperation::increase(const u16 col, const u16 blendEVY) cons
} }
template <NDSColorFormat COLORFORMAT> template <NDSColorFormat COLORFORMAT>
FORCEINLINE FragmentColor ColorOperation::increase(const FragmentColor col, const u16 blendEVY) const FORCEINLINE Color4u8 ColorOperation::increase(const Color4u8 col, const u16 blendEVY) const
{ {
FragmentColor newColor; Color4u8 newColor;
newColor.color = 0; newColor.value = 0;
u32 r = col.r; u32 r = col.r;
u32 g = col.g; u32 g = col.g;
@ -188,10 +188,10 @@ FORCEINLINE u16 ColorOperation::decrease(const u16 col, const u16 blendEVY) cons
} }
template <NDSColorFormat COLORFORMAT> template <NDSColorFormat COLORFORMAT>
FORCEINLINE FragmentColor ColorOperation::decrease(const FragmentColor col, const u16 blendEVY) const FORCEINLINE Color4u8 ColorOperation::decrease(const Color4u8 col, const u16 blendEVY) const
{ {
FragmentColor newColor; Color4u8 newColor;
newColor.color = 0; newColor.value = 0;
u32 r = col.r; u32 r = col.r;
u32 g = col.g; u32 g = col.g;
@ -242,8 +242,8 @@ void PixelOperation::InitLUTs()
cur.bits.blue = (cur.bits.blue + ((31 - cur.bits.blue) * i / 16)); cur.bits.blue = (cur.bits.blue + ((31 - cur.bits.blue) * i / 16));
cur.bits.alpha = 0; cur.bits.alpha = 0;
PixelOperation::BrightnessUpTable555[i][j] = cur.val; PixelOperation::BrightnessUpTable555[i][j] = cur.val;
PixelOperation::BrightnessUpTable666[i][j].color = LOCAL_TO_LE_32( COLOR555TO666(cur.val) ); PixelOperation::BrightnessUpTable666[i][j].value = LOCAL_TO_LE_32( COLOR555TO666(cur.val) );
PixelOperation::BrightnessUpTable888[i][j].color = LOCAL_TO_LE_32( COLOR555TO888(cur.val) ); PixelOperation::BrightnessUpTable888[i][j].value = LOCAL_TO_LE_32( COLOR555TO888(cur.val) );
cur.val = j; cur.val = j;
cur.bits.red = (cur.bits.red - (cur.bits.red * i / 16)); cur.bits.red = (cur.bits.red - (cur.bits.red * i / 16));
@ -251,8 +251,8 @@ void PixelOperation::InitLUTs()
cur.bits.blue = (cur.bits.blue - (cur.bits.blue * i / 16)); cur.bits.blue = (cur.bits.blue - (cur.bits.blue * i / 16));
cur.bits.alpha = 0; cur.bits.alpha = 0;
PixelOperation::BrightnessDownTable555[i][j] = cur.val; PixelOperation::BrightnessDownTable555[i][j] = cur.val;
PixelOperation::BrightnessDownTable666[i][j].color = LOCAL_TO_LE_32( COLOR555TO666(cur.val) ); PixelOperation::BrightnessDownTable666[i][j].value = LOCAL_TO_LE_32( COLOR555TO666(cur.val) );
PixelOperation::BrightnessDownTable888[i][j].color = LOCAL_TO_LE_32( COLOR555TO888(cur.val) ); PixelOperation::BrightnessDownTable888[i][j].value = LOCAL_TO_LE_32( COLOR555TO888(cur.val) );
} }
} }
@ -279,7 +279,7 @@ template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER>
FORCEINLINE void PixelOperation::_copy16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const FORCEINLINE void PixelOperation::_copy16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const
{ {
u16 &dstColor16 = *compInfo.target.lineColor16; u16 &dstColor16 = *compInfo.target.lineColor16;
FragmentColor &dstColor32 = *compInfo.target.lineColor32; Color4u8 &dstColor32 = *compInfo.target.lineColor32;
u8 &dstLayerID = *compInfo.target.lineLayerID; u8 &dstLayerID = *compInfo.target.lineLayerID;
switch (OUTPUTFORMAT) switch (OUTPUTFORMAT)
@ -289,11 +289,11 @@ FORCEINLINE void PixelOperation::_copy16(GPUEngineCompositorInfo &compInfo, cons
break; break;
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(srcColor16) ); dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(srcColor16) );
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(srcColor16) ); dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(srcColor16) );
break; break;
} }
@ -304,10 +304,10 @@ FORCEINLINE void PixelOperation::_copy16(GPUEngineCompositorInfo &compInfo, cons
} }
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER>
FORCEINLINE void PixelOperation::_copy32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const FORCEINLINE void PixelOperation::_copy32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const
{ {
u16 &dstColor16 = *compInfo.target.lineColor16; u16 &dstColor16 = *compInfo.target.lineColor16;
FragmentColor &dstColor32 = *compInfo.target.lineColor32; Color4u8 &dstColor32 = *compInfo.target.lineColor32;
u8 &dstLayerID = *compInfo.target.lineLayerID; u8 &dstLayerID = *compInfo.target.lineLayerID;
switch (OUTPUTFORMAT) switch (OUTPUTFORMAT)
@ -341,7 +341,7 @@ template <NDSColorFormat OUTPUTFORMAT>
FORCEINLINE void PixelOperation::_brightnessUp16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const FORCEINLINE void PixelOperation::_brightnessUp16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const
{ {
u16 &dstColor16 = *compInfo.target.lineColor16; u16 &dstColor16 = *compInfo.target.lineColor16;
FragmentColor &dstColor32 = *compInfo.target.lineColor32; Color4u8 &dstColor32 = *compInfo.target.lineColor32;
u8 &dstLayerID = *compInfo.target.lineLayerID; u8 &dstLayerID = *compInfo.target.lineLayerID;
switch (OUTPUTFORMAT) switch (OUTPUTFORMAT)
@ -365,10 +365,10 @@ FORCEINLINE void PixelOperation::_brightnessUp16(GPUEngineCompositorInfo &compIn
} }
template <NDSColorFormat OUTPUTFORMAT> template <NDSColorFormat OUTPUTFORMAT>
FORCEINLINE void PixelOperation::_brightnessUp32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const FORCEINLINE void PixelOperation::_brightnessUp32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const
{ {
u16 &dstColor16 = *compInfo.target.lineColor16; u16 &dstColor16 = *compInfo.target.lineColor16;
FragmentColor &dstColor32 = *compInfo.target.lineColor32; Color4u8 &dstColor32 = *compInfo.target.lineColor32;
u8 &dstLayerID = *compInfo.target.lineLayerID; u8 &dstLayerID = *compInfo.target.lineLayerID;
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
@ -390,7 +390,7 @@ template <NDSColorFormat OUTPUTFORMAT>
FORCEINLINE void PixelOperation::_brightnessDown16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const FORCEINLINE void PixelOperation::_brightnessDown16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const
{ {
u16 &dstColor16 = *compInfo.target.lineColor16; u16 &dstColor16 = *compInfo.target.lineColor16;
FragmentColor &dstColor32 = *compInfo.target.lineColor32; Color4u8 &dstColor32 = *compInfo.target.lineColor32;
u8 &dstLayerID = *compInfo.target.lineLayerID; u8 &dstLayerID = *compInfo.target.lineLayerID;
switch (OUTPUTFORMAT) switch (OUTPUTFORMAT)
@ -414,10 +414,10 @@ FORCEINLINE void PixelOperation::_brightnessDown16(GPUEngineCompositorInfo &comp
} }
template <NDSColorFormat OUTPUTFORMAT> template <NDSColorFormat OUTPUTFORMAT>
FORCEINLINE void PixelOperation::_brightnessDown32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const FORCEINLINE void PixelOperation::_brightnessDown32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const
{ {
u16 &dstColor16 = *compInfo.target.lineColor16; u16 &dstColor16 = *compInfo.target.lineColor16;
FragmentColor &dstColor32 = *compInfo.target.lineColor32; Color4u8 &dstColor32 = *compInfo.target.lineColor32;
u8 &dstLayerID = *compInfo.target.lineLayerID; u8 &dstLayerID = *compInfo.target.lineLayerID;
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
@ -555,14 +555,14 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI
} }
else else
{ {
FragmentColor &dstColor32 = *compInfo.target.lineColor32; Color4u8 &dstColor32 = *compInfo.target.lineColor32;
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
switch (selectedEffect) switch (selectedEffect)
{ {
case ColorEffect_Disable: case ColorEffect_Disable:
dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(srcColor16) ); dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(srcColor16) );
break; break;
case ColorEffect_IncreaseBrightness: case ColorEffect_IncreaseBrightness:
@ -575,8 +575,8 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI
case ColorEffect_Blend: case ColorEffect_Blend:
{ {
FragmentColor srcColor32; Color4u8 srcColor32;
srcColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(srcColor16) ); srcColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(srcColor16) );
dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D<OUTPUTFORMAT>(srcColor32, dstColor32) : colorop.blend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB); dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D<OUTPUTFORMAT>(srcColor32, dstColor32) : colorop.blend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
break; break;
} }
@ -587,7 +587,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI
switch (selectedEffect) switch (selectedEffect)
{ {
case ColorEffect_Disable: case ColorEffect_Disable:
dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(srcColor16) ); dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(srcColor16) );
break; break;
case ColorEffect_IncreaseBrightness: case ColorEffect_IncreaseBrightness:
@ -600,8 +600,8 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI
case ColorEffect_Blend: case ColorEffect_Blend:
{ {
FragmentColor srcColor32; Color4u8 srcColor32;
srcColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(srcColor16) ); srcColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(srcColor16) );
dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D<OUTPUTFORMAT>(srcColor32, dstColor32) : colorop.blend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB); dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D<OUTPUTFORMAT>(srcColor32, dstColor32) : colorop.blend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
break; break;
} }
@ -613,7 +613,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI
} }
template <NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> template <NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE>
FORCEINLINE void PixelOperation::_unknownEffect32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const FORCEINLINE void PixelOperation::_unknownEffect32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const
{ {
u8 &dstLayerID = *compInfo.target.lineLayerID; u8 &dstLayerID = *compInfo.target.lineLayerID;
TBlendTable *selectedBlendTable = compInfo.renderState.blendTable555; TBlendTable *selectedBlendTable = compInfo.renderState.blendTable555;
@ -663,7 +663,7 @@ FORCEINLINE void PixelOperation::_unknownEffect32(GPUEngineCompositorInfo &compI
} }
else else
{ {
FragmentColor &dstColor32 = *compInfo.target.lineColor32; Color4u8 &dstColor32 = *compInfo.target.lineColor32;
switch (selectedEffect) switch (selectedEffect)
{ {
@ -716,7 +716,7 @@ FORCEINLINE void PixelOperation::Composite16(GPUEngineCompositorInfo &compInfo,
} }
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE>
FORCEINLINE void PixelOperation::Composite32(GPUEngineCompositorInfo &compInfo, FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const FORCEINLINE void PixelOperation::Composite32(GPUEngineCompositorInfo &compInfo, Color4u8 srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const
{ {
switch (COMPOSITORMODE) switch (COMPOSITORMODE)
{ {
@ -1013,7 +1013,7 @@ void GPUEngineBase::_MosaicLine(GPUEngineCompositorInfo &compInfo)
} }
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32) void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32)
{ {
// Do nothing. This is a placeholder for a manually vectorized version of this method. // Do nothing. This is a placeholder for a manually vectorized version of this method.
} }
@ -1086,7 +1086,7 @@ void GPUEngineBase::_PerformWindowTestingNative(GPUEngineCompositorInfo &compInf
} }
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr) size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const Color4u8 *__restrict srcLinePtr)
{ {
// Do nothing. This is a placeholder for a manually vectorized version of this method. // Do nothing. This is a placeholder for a manually vectorized version of this method.
return 0; return 0;

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2021 DeSmuME team Copyright (C) 2021-2023 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -47,16 +47,16 @@ public:
FORCEINLINE u16 blend(const u16 colA, const u16 colB, const u16 blendEVA, const u16 blendEVB) const; FORCEINLINE u16 blend(const u16 colA, const u16 colB, const u16 blendEVA, const u16 blendEVB) const;
FORCEINLINE u16 blend(const u16 colA, const u16 colB, const TBlendTable *blendTable) const; FORCEINLINE u16 blend(const u16 colA, const u16 colB, const TBlendTable *blendTable) const;
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor blend(const FragmentColor colA, const FragmentColor colB, const u16 blendEVA, const u16 blendEVB) const; template<NDSColorFormat COLORFORMAT> FORCEINLINE Color4u8 blend(const Color4u8 colA, const Color4u8 colB, const u16 blendEVA, const u16 blendEVB) const;
FORCEINLINE u16 blend3D(const FragmentColor colA, const u16 colB) const; FORCEINLINE u16 blend3D(const Color4u8 colA, const u16 colB) const;
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor blend3D(const FragmentColor colA, const FragmentColor colB) const; template<NDSColorFormat COLORFORMAT> FORCEINLINE Color4u8 blend3D(const Color4u8 colA, const Color4u8 colB) const;
FORCEINLINE u16 increase(const u16 col, const u16 blendEVY) const; FORCEINLINE u16 increase(const u16 col, const u16 blendEVY) const;
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor increase(const FragmentColor col, const u16 blendEVY) const; template<NDSColorFormat COLORFORMAT> FORCEINLINE Color4u8 increase(const Color4u8 col, const u16 blendEVY) const;
FORCEINLINE u16 decrease(const u16 col, const u16 blendEVY) const; FORCEINLINE u16 decrease(const u16 col, const u16 blendEVY) const;
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor decrease(const FragmentColor col, const u16 blendEVY) const; template<NDSColorFormat COLORFORMAT> FORCEINLINE Color4u8 decrease(const Color4u8 col, const u16 blendEVY) const;
}; };
class PixelOperation class PixelOperation
@ -66,31 +66,31 @@ private:
protected: protected:
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const; template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const;
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const; template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const;
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const; template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const;
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const; template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const;
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const; template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const;
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const; template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const;
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _unknownEffect16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const; template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _unknownEffect16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const;
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _unknownEffect32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const; template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _unknownEffect32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const;
public: public:
static CACHE_ALIGN u8 BlendTable555[17][17][32][32]; static CACHE_ALIGN u8 BlendTable555[17][17][32][32];
static CACHE_ALIGN u16 BrightnessUpTable555[17][0x8000]; static CACHE_ALIGN u16 BrightnessUpTable555[17][0x8000];
static CACHE_ALIGN FragmentColor BrightnessUpTable666[17][0x8000]; static CACHE_ALIGN Color4u8 BrightnessUpTable666[17][0x8000];
static CACHE_ALIGN FragmentColor BrightnessUpTable888[17][0x8000]; static CACHE_ALIGN Color4u8 BrightnessUpTable888[17][0x8000];
static CACHE_ALIGN u16 BrightnessDownTable555[17][0x8000]; static CACHE_ALIGN u16 BrightnessDownTable555[17][0x8000];
static CACHE_ALIGN FragmentColor BrightnessDownTable666[17][0x8000]; static CACHE_ALIGN Color4u8 BrightnessDownTable666[17][0x8000];
static CACHE_ALIGN FragmentColor BrightnessDownTable888[17][0x8000]; static CACHE_ALIGN Color4u8 BrightnessDownTable888[17][0x8000];
static void InitLUTs(); static void InitLUTs();
PixelOperation() {}; PixelOperation() {};
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void Composite16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const; template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void Composite16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const;
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void Composite32(GPUEngineCompositorInfo &compInfo, FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const; template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void Composite32(GPUEngineCompositorInfo &compInfo, Color4u8 srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const;
}; };
#endif // GPU_OPERATIONS_H #endif // GPU_OPERATIONS_H

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2021-2022 DeSmuME team Copyright (C) 2021-2023 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -2505,7 +2505,7 @@ void GPUEngineBase::_MosaicLine(GPUEngineCompositorInfo &compInfo)
} }
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32) void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32)
{ {
static const size_t step = sizeof(v256u8); static const size_t step = sizeof(v256u8);
@ -2749,10 +2749,10 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
{ {
const v256u32 src32[4] = { const v256u32 src32[4] = {
_mm256_load_si256((v256u32 *)((FragmentColor *)vramColorPtr + i) + 0), _mm256_load_si256((v256u32 *)((Color4u8 *)vramColorPtr + i) + 0),
_mm256_load_si256((v256u32 *)((FragmentColor *)vramColorPtr + i) + 1), _mm256_load_si256((v256u32 *)((Color4u8 *)vramColorPtr + i) + 1),
_mm256_load_si256((v256u32 *)((FragmentColor *)vramColorPtr + i) + 2), _mm256_load_si256((v256u32 *)((Color4u8 *)vramColorPtr + i) + 2),
_mm256_load_si256((v256u32 *)((FragmentColor *)vramColorPtr + i) + 3) _mm256_load_si256((v256u32 *)((Color4u8 *)vramColorPtr + i) + 3)
}; };
if (LAYERTYPE != GPULayerType_OBJ) if (LAYERTYPE != GPULayerType_OBJ)
@ -2906,7 +2906,7 @@ void GPUEngineBase::_PerformWindowTestingNative(GPUEngineCompositorInfo &compInf
} }
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr) size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const Color4u8 *__restrict srcLinePtr)
{ {
static const size_t step = sizeof(v256u32); static const size_t step = sizeof(v256u32);

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2021-2022 DeSmuME team Copyright (C) 2021-2023 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -2287,7 +2287,7 @@ void GPUEngineBase::_MosaicLine(GPUEngineCompositorInfo &compInfo)
} }
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32) void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32)
{ {
static const size_t step = sizeof(v128u8); static const size_t step = sizeof(v128u8);
@ -2530,10 +2530,10 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
{ {
const v128u32 src32[4] = { const v128u32 src32[4] = {
_mm_load_si128((v128u32 *)((FragmentColor *)vramColorPtr + i) + 0), _mm_load_si128((v128u32 *)((Color4u8 *)vramColorPtr + i) + 0),
_mm_load_si128((v128u32 *)((FragmentColor *)vramColorPtr + i) + 1), _mm_load_si128((v128u32 *)((Color4u8 *)vramColorPtr + i) + 1),
_mm_load_si128((v128u32 *)((FragmentColor *)vramColorPtr + i) + 2), _mm_load_si128((v128u32 *)((Color4u8 *)vramColorPtr + i) + 2),
_mm_load_si128((v128u32 *)((FragmentColor *)vramColorPtr + i) + 3) _mm_load_si128((v128u32 *)((Color4u8 *)vramColorPtr + i) + 3)
}; };
if (LAYERTYPE != GPULayerType_OBJ) if (LAYERTYPE != GPULayerType_OBJ)
@ -2688,7 +2688,7 @@ void GPUEngineBase::_PerformWindowTestingNative(GPUEngineCompositorInfo &compInf
} }
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr) size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const Color4u8 *__restrict srcLinePtr)
{ {
static const size_t step = sizeof(v128u8); static const size_t step = sizeof(v128u8);

View File

@ -1163,7 +1163,7 @@ OpenGLRenderer::OpenGLRenderer()
memset(ref, 0, sizeof(OGLRenderRef)); memset(ref, 0, sizeof(OGLRenderRef));
_mappedFramebuffer = NULL; _mappedFramebuffer = NULL;
_workingTextureUnpackBuffer = (FragmentColor *)malloc_alignedCacheLine(1024 * 1024 * sizeof(FragmentColor)); _workingTextureUnpackBuffer = (Color4u8 *)malloc_alignedCacheLine(1024 * 1024 * sizeof(Color4u8));
_pixelReadNeedsFinish = false; _pixelReadNeedsFinish = false;
_needsZeroDstAlphaPass = true; _needsZeroDstAlphaPass = true;
_currentPolyIndex = 0; _currentPolyIndex = 0;
@ -1352,8 +1352,8 @@ bool OpenGLRenderer::IsVersionSupported(unsigned int checkVersionMajor, unsigned
return result; return result;
} }
Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Color4u8 *__restrict srcFramebuffer,
FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16,
bool doFramebufferFlip, bool doFramebufferConvert) bool doFramebufferFlip, bool doFramebufferConvert)
{ {
if ( ((dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL)) || (srcFramebuffer == NULL) ) if ( ((dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL)) || (srcFramebuffer == NULL) )
@ -1389,7 +1389,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
#endif #endif
for (; i < this->_framebufferPixCount; i++) for (; i < this->_framebufferPixCount; i++)
{ {
dstFramebufferMain[i].color = ColorspaceCopy32<false>(srcFramebuffer[i]); dstFramebufferMain[i].value = ColorspaceCopy32<false>(srcFramebuffer[i]);
dstFramebuffer16[i] = ColorspaceConvert8888To5551<false>(srcFramebuffer[i]); dstFramebuffer16[i] = ColorspaceConvert8888To5551<false>(srcFramebuffer[i]);
} }
@ -1429,7 +1429,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
#endif #endif
for (; i < this->_framebufferPixCount; i++) for (; i < this->_framebufferPixCount; i++)
{ {
dstFramebufferMain[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]); dstFramebufferMain[i].value = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]); dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
} }
@ -1467,7 +1467,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
#endif #endif
for (; i < this->_framebufferPixCount; i++) for (; i < this->_framebufferPixCount; i++)
{ {
dstFramebufferMain[i].color = ColorspaceCopy32<true>(srcFramebuffer[i]); dstFramebufferMain[i].value = ColorspaceCopy32<true>(srcFramebuffer[i]);
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]); dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
} }
@ -1514,7 +1514,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
#endif #endif
for (; x < pixCount; x++, ir++, iw++) for (; x < pixCount; x++, ir++, iw++)
{ {
dstFramebufferMain[iw].color = ColorspaceCopy32<false>(srcFramebuffer[ir]); dstFramebufferMain[iw].value = ColorspaceCopy32<false>(srcFramebuffer[ir]);
dstFramebuffer16[iw] = ColorspaceConvert8888To5551<false>(srcFramebuffer[ir]); dstFramebuffer16[iw] = ColorspaceConvert8888To5551<false>(srcFramebuffer[ir]);
} }
} }
@ -1566,7 +1566,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
#endif #endif
for (; x < pixCount; x++, ir++, iw++) for (; x < pixCount; x++, ir++, iw++)
{ {
dstFramebufferMain[iw].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[ir]); dstFramebufferMain[iw].value = ColorspaceConvert8888To6665<true>(srcFramebuffer[ir]);
dstFramebuffer16[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]); dstFramebuffer16[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
} }
} }
@ -1616,7 +1616,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
#endif #endif
for (; x < pixCount; x++, ir++, iw++) for (; x < pixCount; x++, ir++, iw++)
{ {
dstFramebufferMain[iw].color = ColorspaceCopy32<true>(srcFramebuffer[ir]); dstFramebufferMain[iw].value = ColorspaceCopy32<true>(srcFramebuffer[ir]);
dstFramebuffer16[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]); dstFramebuffer16[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
} }
} }
@ -1649,7 +1649,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16) Render3DError OpenGLRenderer::FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16)
{ {
if (this->willFlipAndConvertFramebufferOnGPU && this->isPBOSupported) if (this->willFlipAndConvertFramebufferOnGPU && this->isPBOSupported)
{ {
@ -1666,7 +1666,7 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
FragmentColor* OpenGLRenderer::GetFramebuffer() Color4u8* OpenGLRenderer::GetFramebuffer()
{ {
return (this->willFlipAndConvertFramebufferOnGPU && this->isPBOSupported) ? this->_mappedFramebuffer : GPU->GetEngineMain()->Get3DFramebufferMain(); return (this->willFlipAndConvertFramebufferOnGPU && this->isPBOSupported) ? this->_mappedFramebuffer : GPU->GetEngineMain()->Get3DFramebufferMain();
} }
@ -2683,7 +2683,7 @@ Render3DError OpenGLRenderer_1_2::CreatePBOs()
glGenBuffersARB(1, &OGLRef.pboRenderDataID); glGenBuffersARB(1, &OGLRef.pboRenderDataID);
glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, OGLRef.pboRenderDataID); glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, OGLRef.pboRenderDataID);
glBufferDataARB(GL_PIXEL_PACK_BUFFER_ARB, this->_framebufferColorSizeBytes, NULL, GL_STREAM_READ_ARB); glBufferDataARB(GL_PIXEL_PACK_BUFFER_ARB, this->_framebufferColorSizeBytes, NULL, GL_STREAM_READ_ARB);
this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);
return OGLERROR_NOERR; return OGLERROR_NOERR;
} }
@ -4352,11 +4352,11 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D_State &renderState, co
if (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) if (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported)
{ {
const u8 alpha8 = (renderState.DISP3DCNT.EnableAntialiasing) ? 0x80 : 0xFF; const u8 alpha8 = (renderState.DISP3DCNT.EnableAntialiasing) ? 0x80 : 0xFF;
FragmentColor edgeColor32[8]; Color4u8 edgeColor32[8];
for (size_t i = 0; i < 8; i++) for (size_t i = 0; i < 8; i++)
{ {
edgeColor32[i].color = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8); edgeColor32[i].value = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8);
} }
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable);
@ -4760,7 +4760,7 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf
return OGLERROR_NOERR; return OGLERROR_NOERR;
} }
Render3DError OpenGLRenderer_1_2::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) Render3DError OpenGLRenderer_1_2::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{ {
OGLRenderRef &OGLRef = *this->ref; OGLRenderRef &OGLRef = *this->ref;
@ -5250,7 +5250,7 @@ Render3DError OpenGLRenderer_1_2::RenderFinish()
if (this->isPBOSupported) if (this->isPBOSupported)
{ {
this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);
} }
else else
{ {
@ -5273,7 +5273,7 @@ Render3DError OpenGLRenderer_1_2::RenderFlush(bool willFlushBuffer32, bool willF
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
FragmentColor *framebufferMain = (willFlushBuffer32) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL; Color4u8 *framebufferMain = (willFlushBuffer32) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL;
u16 *framebuffer16 = (willFlushBuffer16) ? GPU->GetEngineMain()->Get3DFramebuffer16() : NULL; u16 *framebuffer16 = (willFlushBuffer16) ? GPU->GetEngineMain()->Get3DFramebuffer16() : NULL;
if (this->isPBOSupported) if (this->isPBOSupported)
@ -5305,7 +5305,7 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h)
glFinish(); glFinish();
const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8);
if (this->isPBOSupported) if (this->isPBOSupported)
{ {
@ -5319,7 +5319,7 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h)
if (this->_mappedFramebuffer != NULL) if (this->_mappedFramebuffer != NULL)
{ {
this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);
glFinish(); glFinish();
} }
} }
@ -5363,8 +5363,8 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h)
} }
else else
{ {
FragmentColor *oldFramebufferColor = this->_framebufferColor; Color4u8 *oldFramebufferColor = this->_framebufferColor;
FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes); Color4u8 *newFramebufferColor = (Color4u8 *)malloc_alignedCacheLine(newFramebufferColorSizeBytes);
this->_framebufferColor = newFramebufferColor; this->_framebufferColor = newFramebufferColor;
free_aligned(oldFramebufferColor); free_aligned(oldFramebufferColor);
} }
@ -5562,11 +5562,11 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D_State &renderState, co
if (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) if (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported)
{ {
const u8 alpha8 = (renderState.DISP3DCNT.EnableAntialiasing) ? 0x80 : 0xFF; const u8 alpha8 = (renderState.DISP3DCNT.EnableAntialiasing) ? 0x80 : 0xFF;
FragmentColor edgeColor32[8]; Color4u8 edgeColor32[8];
for (size_t i = 0; i < 8; i++) for (size_t i = 0; i < 8; i++)
{ {
edgeColor32[i].color = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8); edgeColor32[i].value = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8);
} }
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable);
@ -5661,7 +5661,7 @@ Render3DError OpenGLRenderer_2_1::RenderFinish()
return OGLERROR_BEGINGL_FAILED; return OGLERROR_BEGINGL_FAILED;
} }
this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
ENDGL(); ENDGL();
} }
@ -5679,7 +5679,7 @@ Render3DError OpenGLRenderer_2_1::RenderFlush(bool willFlushBuffer32, bool willF
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
FragmentColor *framebufferMain = (willFlushBuffer32) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL; Color4u8 *framebufferMain = (willFlushBuffer32) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL;
u16 *framebuffer16 = (willFlushBuffer16) ? GPU->GetEngineMain()->Get3DFramebuffer16() : NULL; u16 *framebuffer16 = (willFlushBuffer16) ? GPU->GetEngineMain()->Get3DFramebuffer16() : NULL;
this->FlushFramebuffer(this->_mappedFramebuffer, framebufferMain, framebuffer16); this->FlushFramebuffer(this->_mappedFramebuffer, framebufferMain, framebuffer16);

View File

@ -366,23 +366,26 @@ enum OGLPolyDrawMode
union GLvec2 union GLvec2
{ {
GLfloat vec[2];
struct { GLfloat x, y; }; struct { GLfloat x, y; };
GLfloat v[2];
}; };
typedef union GLvec2 GLvec2;
union GLvec3 union GLvec3
{ {
GLfloat vec[3];
struct { GLfloat r, g, b; }; struct { GLfloat r, g, b; };
struct { GLfloat x, y, z; }; struct { GLfloat x, y, z; };
GLfloat v[3];
}; };
typedef union GLvec3 GLvec3;
union GLvec4 union GLvec4
{ {
GLfloat vec[4];
struct { GLfloat r, g, b, a; }; struct { GLfloat r, g, b, a; };
struct { GLfloat x, y, z, w; }; struct { GLfloat x, y, z, w; };
GLfloat v[4];
}; };
typedef union GLvec4 GLvec4;
struct OGLVertex struct OGLVertex
{ {
@ -390,6 +393,7 @@ struct OGLVertex
GLvec2 texCoord; GLvec2 texCoord;
GLvec3 color; GLvec3 color;
}; };
typedef struct OGLVertex OGLVertex;
struct OGLRenderStates struct OGLRenderStates
{ {
@ -405,6 +409,7 @@ struct OGLRenderStates
GLvec4 edgeColor[8]; GLvec4 edgeColor[8];
GLvec4 toonColor[32]; GLvec4 toonColor[32];
}; };
typedef struct OGLRenderStates OGLRenderStates;
union OGLPolyStates union OGLPolyStates
{ {
@ -429,6 +434,7 @@ union OGLPolyStates
u8 :7; u8 :7;
}; };
}; };
typedef union OGLPolyStates OGLPolyStates;
union OGLGeometryFlags union OGLGeometryFlags
{ {
@ -602,6 +608,7 @@ struct OGLRenderRef
CACHE_ALIGN GLuint workingCIDepthStencilBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN GLuint workingCIDepthStencilBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
CACHE_ALIGN GLuint workingCIFogAttributesBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN GLuint workingCIFogAttributesBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
}; };
typedef struct OGLRenderRef OGLRenderRef;
struct GFX3D_State; struct GFX3D_State;
struct POLY; struct POLY;
@ -696,8 +703,8 @@ private:
unsigned int versionRevision; unsigned int versionRevision;
private: private:
Render3DError _FlushFramebufferFlipAndConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, Render3DError _FlushFramebufferFlipAndConvertOnCPU(const Color4u8 *__restrict srcFramebuffer,
FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16,
bool doFramebufferFlip, bool doFramebufferConvert); bool doFramebufferFlip, bool doFramebufferConvert);
protected: protected:
@ -721,8 +728,8 @@ protected:
bool _emulateDepthLEqualPolygonFacing; bool _emulateDepthLEqualPolygonFacing;
bool _isDepthLEqualPolygonFacingSupported; bool _isDepthLEqualPolygonFacingSupported;
FragmentColor *_mappedFramebuffer; Color4u8 *_mappedFramebuffer;
FragmentColor *_workingTextureUnpackBuffer; Color4u8 *_workingTextureUnpackBuffer;
bool _pixelReadNeedsFinish; bool _pixelReadNeedsFinish;
bool _needsZeroDstAlphaPass; bool _needsZeroDstAlphaPass;
size_t _currentPolyIndex; size_t _currentPolyIndex;
@ -738,7 +745,7 @@ protected:
int _selectedMultisampleSize; int _selectedMultisampleSize;
size_t _clearImageIndex; size_t _clearImageIndex;
Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16); Render3DError FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16);
OpenGLTexture* GetLoadedTextureFromPolygon(const POLY &thePoly, bool enableTexturing); OpenGLTexture* GetLoadedTextureFromPolygon(const POLY &thePoly, bool enableTexturing);
template<OGLPolyDrawMode DRAWMODE> size_t DrawPolygonsForIndexRange(const POLY *rawPolyList, const CPoly *clippedPolyList, const size_t clippedPolyCount, size_t firstIndex, size_t lastIndex, size_t &indexOffset, POLYGON_ATTR &lastPolyAttr); template<OGLPolyDrawMode DRAWMODE> size_t DrawPolygonsForIndexRange(const POLY *rawPolyList, const CPoly *clippedPolyList, const size_t clippedPolyCount, size_t firstIndex, size_t lastIndex, size_t &indexOffset, POLYGON_ATTR &lastPolyAttr);
@ -824,7 +831,7 @@ public:
void SetVersion(unsigned int major, unsigned int minor, unsigned int revision); void SetVersion(unsigned int major, unsigned int minor, unsigned int revision);
bool IsVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision) const; bool IsVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision) const;
virtual FragmentColor* GetFramebuffer(); virtual Color4u8* GetFramebuffer();
virtual GLsizei GetLimitedMultisampleSize() const; virtual GLsizei GetLimitedMultisampleSize() const;
Render3DError ApplyRenderingSettings(const GFX3D_State &renderState); Render3DError ApplyRenderingSettings(const GFX3D_State &renderState);
@ -885,7 +892,7 @@ protected:
virtual Render3DError EndRender(); virtual Render3DError EndRender();
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID); virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID);
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes);
virtual void SetPolygonIndex(const size_t index); virtual void SetPolygonIndex(const size_t index);
virtual Render3DError SetupPolygon(const POLY &thePoly, bool treatAsTranslucent, bool willChangeStencilBuffer, bool isBackFacing); virtual Render3DError SetupPolygon(const POLY &thePoly, bool treatAsTranslucent, bool willChangeStencilBuffer, bool isBackFacing);

View File

@ -2359,7 +2359,7 @@ Render3DError OpenGLRenderer_3_2::ClearUsingImage(const u16 *__restrict colorBuf
return OGLERROR_NOERR; return OGLERROR_NOERR;
} }
Render3DError OpenGLRenderer_3_2::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) Render3DError OpenGLRenderer_3_2::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{ {
OGLRenderRef &OGLRef = *this->ref; OGLRenderRef &OGLRef = *this->ref;
OGLRef.selectedRenderingFBO = (this->_enableMultisampledRendering) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID; OGLRef.selectedRenderingFBO = (this->_enableMultisampledRendering) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID;
@ -2536,12 +2536,12 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
glFinish(); glFinish();
} }
const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8);
glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ); glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ);
if (this->_mappedFramebuffer != NULL) if (this->_mappedFramebuffer != NULL)
{ {
this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
glFinish(); glFinish();
} }

View File

@ -106,7 +106,7 @@ protected:
virtual Render3DError PostprocessFramebuffer(); virtual Render3DError PostprocessFramebuffer();
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID); virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID);
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes);
virtual void SetPolygonIndex(const size_t index); virtual void SetPolygonIndex(const size_t index);
virtual Render3DError SetupPolygon(const POLY &thePoly, bool treatAsTranslucent, bool willChangeStencilBuffer, bool isBackFacing); virtual Render3DError SetupPolygon(const POLY &thePoly, bool treatAsTranslucent, bool willChangeStencilBuffer, bool isBackFacing);

View File

@ -513,9 +513,9 @@ NDSGeometryEngine::NDSGeometryEngine()
void NDSGeometryEngine::__Init() void NDSGeometryEngine::__Init()
{ {
static const Vector16x2 zeroVec16x2 = {0, 0}; static const Vector2s16 zeroVec2s16 = {0, 0};
static const Vector16x3 zeroVec16x3 = {0, 0, 0}; static const Vector3s16 zeroVec3s16 = {0, 0, 0};
static const Vector32x4 zeroVec32x4 = {0, 0, 0, 0}; static const Vector4s32 zeroVec4s32 = {0, 0, 0, 0};
_mtxCurrentMode = MATRIXMODE_PROJECTION; _mtxCurrentMode = MATRIXMODE_PROJECTION;
@ -529,8 +529,8 @@ void NDSGeometryEngine::__Init()
for (size_t i = 0; i < NDSMATRIXSTACK_COUNT(MATRIXMODE_POSITION_VECTOR); i++) { MatrixInit(_mtxStackPositionVector[i]); } for (size_t i = 0; i < NDSMATRIXSTACK_COUNT(MATRIXMODE_POSITION_VECTOR); i++) { MatrixInit(_mtxStackPositionVector[i]); }
MatrixInit(_mtxStackTexture[0]); MatrixInit(_mtxStackTexture[0]);
_vecScale = zeroVec32x4; _vecScale = zeroVec4s32;
_vecTranslate = zeroVec32x4; _vecTranslate = zeroVec4s32;
_mtxStackIndex[MATRIXMODE_PROJECTION] = 0; _mtxStackIndex[MATRIXMODE_PROJECTION] = 0;
_mtxStackIndex[MATRIXMODE_POSITION] = 0; _mtxStackIndex[MATRIXMODE_POSITION] = 0;
@ -560,13 +560,8 @@ void NDSGeometryEngine::__Init()
_vtxColor666X.b = 63; _vtxColor666X.b = 63;
_vtxColor666X.a = 0; _vtxColor666X.a = 0;
_vtxColorFloat[0] = (float)_vtxColor666X.r; _vtxCoord16 = zeroVec3s16;
_vtxColorFloat[1] = (float)_vtxColor666X.g; _vecNormal = zeroVec4s32;
_vtxColorFloat[2] = (float)_vtxColor666X.b;
_vtxColorFloat[3] = (float)_vtxColor666X.a;
_vtxCoord16 = zeroVec16x3;
_vecNormal = zeroVec32x4;
_regViewport.X1 = 0; _regViewport.X1 = 0;
_regViewport.Y1 = 0; _regViewport.Y1 = 0;
@ -579,11 +574,9 @@ void NDSGeometryEngine::__Init()
_currentViewport.height = GPU_FRAMEBUFFER_NATIVE_HEIGHT; _currentViewport.height = GPU_FRAMEBUFFER_NATIVE_HEIGHT;
_texCoordTransformMode = TextureTransformationMode_None; _texCoordTransformMode = TextureTransformationMode_None;
_texCoord16 = zeroVec16x2; _texCoord16 = zeroVec2s16;
_texCoordTransformed.s = (s32)_texCoord16.s; _texCoordTransformed.s = (s32)_texCoord16.s;
_texCoordTransformed.t = (s32)_texCoord16.t; _texCoordTransformed.t = (s32)_texCoord16.t;
_texCoordTransformedFloat[0] = (float)_texCoordTransformed.s / 16.0f;
_texCoordTransformedFloat[1] = (float)_texCoordTransformed.t / 16.0f;
_doesViewportNeedUpdate = true; _doesViewportNeedUpdate = true;
_doesVertexColorNeedUpdate = true; _doesVertexColorNeedUpdate = true;
@ -610,15 +603,15 @@ void NDSGeometryEngine::__Init()
_regLightDirection[2] = 0; _regLightDirection[2] = 0;
_regLightDirection[3] = 0; _regLightDirection[3] = 0;
_vecLightDirectionTransformed[0] = zeroVec32x4; _vecLightDirectionTransformed[0] = zeroVec4s32;
_vecLightDirectionTransformed[1] = zeroVec32x4; _vecLightDirectionTransformed[1] = zeroVec4s32;
_vecLightDirectionTransformed[2] = zeroVec32x4; _vecLightDirectionTransformed[2] = zeroVec4s32;
_vecLightDirectionTransformed[3] = zeroVec32x4; _vecLightDirectionTransformed[3] = zeroVec4s32;
_vecLightDirectionHalfNegative[0] = zeroVec32x4; _vecLightDirectionHalfNegative[0] = zeroVec4s32;
_vecLightDirectionHalfNegative[1] = zeroVec32x4; _vecLightDirectionHalfNegative[1] = zeroVec4s32;
_vecLightDirectionHalfNegative[2] = zeroVec32x4; _vecLightDirectionHalfNegative[2] = zeroVec4s32;
_vecLightDirectionHalfNegative[3] = zeroVec32x4; _vecLightDirectionHalfNegative[3] = zeroVec4s32;
_doesLightHalfVectorNeedUpdate[0] = true; _doesLightHalfVectorNeedUpdate[0] = true;
_doesLightHalfVectorNeedUpdate[1] = true; _doesLightHalfVectorNeedUpdate[1] = true;
@ -1239,7 +1232,7 @@ void NDSGeometryEngine::SetNormal(const u32 param)
this->_doesTransformedTexCoordsNeedUpdate = true; this->_doesTransformedTexCoordsNeedUpdate = true;
} }
CACHE_ALIGN Vector32x4 normalTransformed = this->_vecNormal; CACHE_ALIGN Vector4s32 normalTransformed = this->_vecNormal;
MatrixMultVec3x3(_mtxCurrent[MATRIXMODE_POSITION_VECTOR], normalTransformed.vec); MatrixMultVec3x3(_mtxCurrent[MATRIXMODE_POSITION_VECTOR], normalTransformed.vec);
//apply lighting model //apply lighting model
@ -1331,7 +1324,7 @@ void NDSGeometryEngine::SetNormal(const u32 param)
} }
} }
const FragmentColor newVtxColor = { const Color4u8 newVtxColor = {
(u8)std::min<s32>(31, vertexColor[0]), (u8)std::min<s32>(31, vertexColor[0]),
(u8)std::min<s32>(31, vertexColor[1]), (u8)std::min<s32>(31, vertexColor[1]),
(u8)std::min<s32>(31, vertexColor[2]), (u8)std::min<s32>(31, vertexColor[2]),
@ -1379,9 +1372,9 @@ void NDSGeometryEngine::SetVertexColor(const u32 param)
} }
} }
void NDSGeometryEngine::SetVertexColor(const FragmentColor vtxColor555X) void NDSGeometryEngine::SetVertexColor(const Color4u8 vtxColor555X)
{ {
if (this->_vtxColor555X.color != vtxColor555X.color) if (this->_vtxColor555X.value != vtxColor555X.value)
{ {
this->_vtxColor15 = (vtxColor555X.r << 0) | (vtxColor555X.g << 5) | (vtxColor555X.b << 10); this->_vtxColor15 = (vtxColor555X.r << 0) | (vtxColor555X.g << 5) | (vtxColor555X.b << 10);
this->_vtxColor555X = vtxColor555X; this->_vtxColor555X = vtxColor555X;
@ -1411,15 +1404,18 @@ void NDSGeometryEngine::SetTexturePalette(const u32 texPalette)
this->_texPalette = texPalette; this->_texPalette = texPalette;
} }
void NDSGeometryEngine::SetTextureCoordinates(const u32 param) void NDSGeometryEngine::SetTextureCoordinates2s16(const u32 param)
{ {
VertexCoord16x2 inTexCoord16x2; Vector2s16 inTexCoord2s16;
inTexCoord16x2.value = param; #ifndef MSB_FIRST
inTexCoord2s16.value = param;
this->SetTextureCoordinates(inTexCoord16x2); #else
inTexCoord2s16.value = (param << 16) | (param >> 16);
#endif
this->SetTextureCoordinates2s16(inTexCoord2s16);
} }
void NDSGeometryEngine::SetTextureCoordinates(const VertexCoord16x2 &texCoord16) void NDSGeometryEngine::SetTextureCoordinates2s16(const Vector2s16 &texCoord16)
{ {
if (this->_texCoord16.value != texCoord16.value) if (this->_texCoord16.value != texCoord16.value)
{ {
@ -1455,83 +1451,90 @@ void NDSGeometryEngine::VertexListEnd()
this->_vtxCount = 0; this->_vtxCount = 0;
} }
bool NDSGeometryEngine::SetCurrentVertex16x2(const u32 param) bool NDSGeometryEngine::SetCurrentVertexPosition2s16(const u32 param)
{ {
VertexCoord16x2 inVtxCoord16x2; Vector2s16 inVtxCoord2s16;
inVtxCoord16x2.value = param; #ifndef MSB_FIRST
inVtxCoord2s16.value = param;
return this->SetCurrentVertex16x2(inVtxCoord16x2); #else
inVtxCoord2s16.value = (param >> 16) | (param << 16);
#endif
return this->SetCurrentVertexPosition2s16(inVtxCoord2s16);
} }
bool NDSGeometryEngine::SetCurrentVertex16x2(const VertexCoord16x2 inVtxCoord16x2) bool NDSGeometryEngine::SetCurrentVertexPosition2s16(const Vector2s16 inVtxCoord2s16)
{ {
if (this->_vtxCoord16CurrentIndex == 0) if (this->_vtxCoord16CurrentIndex == 0)
{ {
this->SetCurrentVertex16x2Immediate<0, 1>(inVtxCoord16x2); this->SetCurrentVertexPosition2s16Immediate<0, 1>(inVtxCoord2s16);
this->_vtxCoord16CurrentIndex++; this->_vtxCoord16CurrentIndex++;
return false; return false;
} }
this->SetCurrentVertex16x2Immediate<2, 3>(inVtxCoord16x2); this->SetCurrentVertexPosition2s16Immediate<2, 3>(inVtxCoord2s16);
this->_vtxCoord16CurrentIndex = 0; this->_vtxCoord16CurrentIndex = 0;
return true; return true;
} }
void NDSGeometryEngine::SetCurrentVertex10x3(const u32 param) void NDSGeometryEngine::SetCurrentVertexPosition3s10(const u32 param)
{ {
const VertexCoord16x3 inVtxCoord16x3 = { const Vector3s16 inVtxCoord3s16 = {
(s16)( (u16)(((param << 22) >> 22) << 6) ), (s16)( ((s32)((param << 22) & 0xFFC00000) / (s32)(1 << 22)) * (s32)(1 << 6) ),
(s16)( (u16)(((param << 12) >> 22) << 6) ), (s16)( ((s32)((param << 12) & 0xFFC00000) / (s32)(1 << 22)) * (s32)(1 << 6) ),
(s16)( (u16)(((param << 2) >> 22) << 6) ) (s16)( ((s32)((param << 2) & 0xFFC00000) / (s32)(1 << 22)) * (s32)(1 << 6) )
}; };
this->SetCurrentVertex(inVtxCoord16x3); this->SetCurrentVertexPosition(inVtxCoord3s16);
} }
void NDSGeometryEngine::SetCurrentVertex(const VertexCoord16x3 inVtxCoord16x3) void NDSGeometryEngine::SetCurrentVertexPosition(const Vector3s16 inVtxCoord3s16)
{ {
this->_vtxCoord16 = inVtxCoord16x3; this->_vtxCoord16 = inVtxCoord3s16;
} }
template <size_t ONE, size_t TWO> template <size_t ONE, size_t TWO>
void NDSGeometryEngine::SetCurrentVertex16x2Immediate(const u32 param) void NDSGeometryEngine::SetCurrentVertexPosition2s16Immediate(const u32 param)
{ {
VertexCoord16x2 inVtxCoord16x2; Vector2s16 inVtxCoord2s16;
inVtxCoord16x2.value = param; #ifndef MSB_FIRST
inVtxCoord2s16.value = param;
#else
inVtxCoord2s16.value = (param >> 16) | (param << 16);
#endif
this->SetCurrentVertex16x2Immediate<ONE, TWO>(inVtxCoord16x2); this->SetCurrentVertexPosition2s16Immediate<ONE, TWO>(inVtxCoord2s16);
} }
template <size_t ONE, size_t TWO> template <size_t ONE, size_t TWO>
void NDSGeometryEngine::SetCurrentVertex16x2Immediate(const VertexCoord16x2 inVtxCoord16x2) void NDSGeometryEngine::SetCurrentVertexPosition2s16Immediate(const Vector2s16 inVtxCoord2s16)
{ {
if (ONE < 3) if (ONE < 3)
{ {
this->_vtxCoord16.coord[ONE] = inVtxCoord16x2.coord[0]; this->_vtxCoord16.coord[ONE] = inVtxCoord2s16.coord[0];
} }
if (TWO < 3) if (TWO < 3)
{ {
this->_vtxCoord16.coord[TWO] = inVtxCoord16x2.coord[1]; this->_vtxCoord16.coord[TWO] = inVtxCoord2s16.coord[1];
} }
} }
void NDSGeometryEngine::SetCurrentVertex10x3Relative(const u32 param) void NDSGeometryEngine::SetCurrentVertexPosition3s10Relative(const u32 param)
{ {
const VertexCoord16x3 inVtxCoord16x3 = { const Vector3s16 inVtxCoord3s16 = {
(s16)( (s32)((param << 22) & 0xFFC00000) / (s32)(1 << 22) ), (s16)( (s32)((param << 22) & 0xFFC00000) / (s32)(1 << 22) ),
(s16)( (s32)((param << 12) & 0xFFC00000) / (s32)(1 << 22) ), (s16)( (s32)((param << 12) & 0xFFC00000) / (s32)(1 << 22) ),
(s16)( (s32)((param << 2) & 0xFFC00000) / (s32)(1 << 22) ) (s16)( (s32)((param << 2) & 0xFFC00000) / (s32)(1 << 22) )
}; };
this->SetCurrentVertexRelative(inVtxCoord16x3); this->SetCurrentVertexPositionRelative(inVtxCoord3s16);
} }
void NDSGeometryEngine::SetCurrentVertexRelative(const VertexCoord16x3 inVtxCoord16x3) void NDSGeometryEngine::SetCurrentVertexPositionRelative(const Vector3s16 inVtxCoord3s16)
{ {
this->_vtxCoord16.x += inVtxCoord16x3.x; this->_vtxCoord16.x += inVtxCoord3s16.x;
this->_vtxCoord16.y += inVtxCoord16x3.y; this->_vtxCoord16.y += inVtxCoord3s16.y;
this->_vtxCoord16.z += inVtxCoord16x3.z; this->_vtxCoord16.z += inVtxCoord3s16.z;
} }
//Submit a vertex to the GE //Submit a vertex to the GE
@ -1548,7 +1551,7 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList)
return; return;
} }
CACHE_ALIGN VertexCoord32x4 vtxCoordTransformed = { CACHE_ALIGN Vector4s32 vtxCoordTransformed = {
(s32)this->_vtxCoord16.x, (s32)this->_vtxCoord16.x,
(s32)this->_vtxCoord16.y, (s32)this->_vtxCoord16.y,
(s32)this->_vtxCoord16.z, (s32)this->_vtxCoord16.z,
@ -1588,10 +1591,6 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList)
this->_vtxColor666X.g = GFX3D_5TO6_LOOKUP(this->_vtxColor555X.g); this->_vtxColor666X.g = GFX3D_5TO6_LOOKUP(this->_vtxColor555X.g);
this->_vtxColor666X.b = GFX3D_5TO6_LOOKUP(this->_vtxColor555X.b); this->_vtxColor666X.b = GFX3D_5TO6_LOOKUP(this->_vtxColor555X.b);
this->_vtxColor666X.a = 0; this->_vtxColor666X.a = 0;
this->_vtxColorFloat[0] = (float)this->_vtxColor666X.r;
this->_vtxColorFloat[1] = (float)this->_vtxColor666X.g;
this->_vtxColorFloat[2] = (float)this->_vtxColor666X.b;
this->_vtxColorFloat[3] = (float)this->_vtxColor666X.a;
this->_doesVertexColorNeedUpdate = false; this->_doesVertexColorNeedUpdate = false;
} }
@ -1629,8 +1628,6 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList)
break; break;
} }
this->_texCoordTransformedFloat[0] = (float)this->_texCoordTransformed.s / 16.0f;
this->_texCoordTransformedFloat[1] = (float)this->_texCoordTransformed.t / 16.0f;
this->_doesTransformedTexCoordsNeedUpdate = false; this->_doesTransformedTexCoordsNeedUpdate = false;
} }
@ -1653,24 +1650,6 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList)
vtx.texCoord = this->_texCoordTransformed; vtx.texCoord = this->_texCoordTransformed;
vtx.color = this->_vtxColor666X; vtx.color = this->_vtxColor666X;
VERT &vert = targetGList.rawVertList[vertIndex];
vert.coord[0] = (float)vtxCoordTransformed.x / 4096.0f;
vert.coord[1] = (float)vtxCoordTransformed.y / 4096.0f;
vert.coord[2] = (float)vtxCoordTransformed.z / 4096.0f;
vert.coord[3] = (float)vtxCoordTransformed.w / 4096.0f;
vert.texcoord[0] = this->_texCoordTransformedFloat[0];
vert.texcoord[1] = this->_texCoordTransformedFloat[1];
vert.texcoord[2] = 0.0f;
vert.texcoord[3] = 0.0f;
vert.fcolor[0] = this->_vtxColorFloat[0];
vert.fcolor[1] = this->_vtxColorFloat[1];
vert.fcolor[2] = this->_vtxColorFloat[2];
vert.fcolor[3] = this->_vtxColorFloat[3];
vert.color[0] = this->_vtxColor666X.r;
vert.color[1] = this->_vtxColor666X.g;
vert.color[2] = this->_vtxColor666X.b;
vert.color[3] = this->_vtxColor666X.a;
this->_vtxIndex[this->_vtxCount] = (u16)(targetGList.rawVertCount + this->_vtxCount - continuation); this->_vtxIndex[this->_vtxCount] = (u16)(targetGList.rawVertCount + this->_vtxCount - continuation);
this->_vtxCount++; this->_vtxCount++;
@ -1876,7 +1855,7 @@ void NDSGeometryEngine::BoxTest()
const s32 z_d = (s32)( (s16)((uz+ud) & 0xFFFF) ); const s32 z_d = (s32)( (s16)((uz+ud) & 0xFFFF) );
//eight corners of cube //eight corners of cube
CACHE_ALIGN VertexCoord32x4 vtxPosition[8] = { CACHE_ALIGN Vector4s32 vtxPosition[8] = {
{ __x, __y, __z, fixedOne }, { __x, __y, __z, fixedOne },
{ x_w, __y, __z, fixedOne }, { x_w, __y, __z, fixedOne },
{ x_w, y_h, __z, fixedOne }, { x_w, y_h, __z, fixedOne },
@ -2029,7 +2008,7 @@ void NDSGeometryEngine::VectorTest(const u32 param)
// Bits 30-31: Ignored // Bits 30-31: Ignored
// Convert the coordinates to 20.12 fixed-point format for our vector-matrix multiplication. // Convert the coordinates to 20.12 fixed-point format for our vector-matrix multiplication.
CACHE_ALIGN Vector32x4 testVec = { CACHE_ALIGN Vector4s32 testVec = {
( (s32)((param << 22) & 0xFFC00000) / (s32)(1 << 19) ) | (s32)((param & 0x000001C0) >> 6), ( (s32)((param << 22) & 0xFFC00000) / (s32)(1 << 19) ) | (s32)((param & 0x000001C0) >> 6),
( (s32)((param << 12) & 0xFFC00000) / (s32)(1 << 19) ) | (s32)((param & 0x00007000) >> 16), ( (s32)((param << 12) & 0xFFC00000) / (s32)(1 << 19) ) | (s32)((param & 0x00007000) >> 16),
( (s32)((param << 2) & 0xFFC00000) / (s32)(1 << 19) ) | (s32)((param & 0x01C00000) >> 26), ( (s32)((param << 2) & 0xFFC00000) / (s32)(1 << 19) ) | (s32)((param & 0x01C00000) >> 26),
@ -2043,7 +2022,7 @@ void NDSGeometryEngine::VectorTest(const u32 param)
// greater than or equal to 1.0f (or 4096 in fixed-point). All of this means that for all // greater than or equal to 1.0f (or 4096 in fixed-point). All of this means that for all
// values >= 1.0f or < -1.0f will result in the sign bits becoming 1111b; otherwise, the sign // values >= 1.0f or < -1.0f will result in the sign bits becoming 1111b; otherwise, the sign
// bits will become 0000b. // bits will become 0000b.
const Vector16x3 resultVec = { const Vector3s16 resultVec = {
(s16)( ((testVec.x > 0) && (testVec.x < 4096)) ? ((s16)testVec.x & 0x0FFF) : ((s16)testVec.x | 0xF000) ), (s16)( ((testVec.x > 0) && (testVec.x < 4096)) ? ((s16)testVec.x & 0x0FFF) : ((s16)testVec.x | 0xF000) ),
(s16)( ((testVec.y > 0) && (testVec.y < 4096)) ? ((s16)testVec.y & 0x0FFF) : ((s16)testVec.y | 0xF000) ), (s16)( ((testVec.y > 0) && (testVec.y < 4096)) ? ((s16)testVec.y & 0x0FFF) : ((s16)testVec.y | 0xF000) ),
(s16)( ((testVec.z > 0) && (testVec.z < 4096)) ? ((s16)testVec.z & 0x0FFF) : ((s16)testVec.z | 0xF000) ) (s16)( ((testVec.z > 0) && (testVec.z < 4096)) ? ((s16)testVec.z & 0x0FFF) : ((s16)testVec.z | 0xF000) )
@ -2157,9 +2136,9 @@ void NDSGeometryEngine::MatrixCopyToStack(const MatrixMode whichMatrix, const si
void NDSGeometryEngine::UpdateLightDirectionHalfAngleVector(const size_t index) void NDSGeometryEngine::UpdateLightDirectionHalfAngleVector(const size_t index)
{ {
static const CACHE_ALIGN Vector32x4 lineOfSight = {0, 0, (s32)0xFFFFF000, 0}; static const CACHE_ALIGN Vector4s32 lineOfSight = {0, 0, (s32)0xFFFFF000, 0};
Vector32x4 half = { Vector4s32 half = {
this->_vecLightDirectionTransformed[index].x + lineOfSight.x, this->_vecLightDirectionTransformed[index].x + lineOfSight.x,
this->_vecLightDirectionTransformed[index].y + lineOfSight.y, this->_vecLightDirectionTransformed[index].y + lineOfSight.y,
this->_vecLightDirectionTransformed[index].z + lineOfSight.z, this->_vecLightDirectionTransformed[index].z + lineOfSight.z,
@ -2263,9 +2242,9 @@ void NDSGeometryEngine::SaveState_LegacyFormat(GeometryEngineLegacySave &outLega
outLegacySave.mtxMultiply4x3TempIndex = this->_mtxMultiply4x3TempIndex; outLegacySave.mtxMultiply4x3TempIndex = this->_mtxMultiply4x3TempIndex;
outLegacySave.mtxMultiply3x3TempIndex = this->_mtxMultiply3x3TempIndex; outLegacySave.mtxMultiply3x3TempIndex = this->_mtxMultiply3x3TempIndex;
outLegacySave.vtxCoord.vec3 = this->_vtxCoord16; outLegacySave.vtxPosition.vec3 = this->_vtxCoord16;
outLegacySave.vtxCoord.coord[3] = 0; outLegacySave.vtxPosition.coord[3] = 0;
outLegacySave.vtxCoord16CurrentIndex = this->_vtxCoord16CurrentIndex; outLegacySave.vtxPosition16CurrentIndex = this->_vtxCoord16CurrentIndex;
outLegacySave.vtxFormat = (u32)this->_vtxFormat; outLegacySave.vtxFormat = (u32)this->_vtxFormat;
outLegacySave.vecTranslate = this->_vecTranslate; outLegacySave.vecTranslate = this->_vecTranslate;
@ -2342,8 +2321,8 @@ void NDSGeometryEngine::LoadState_LegacyFormat(const GeometryEngineLegacySave &i
this->_mtxMultiply4x3TempIndex = inLegacySave.mtxMultiply4x3TempIndex; this->_mtxMultiply4x3TempIndex = inLegacySave.mtxMultiply4x3TempIndex;
this->_mtxMultiply3x3TempIndex = inLegacySave.mtxMultiply3x3TempIndex; this->_mtxMultiply3x3TempIndex = inLegacySave.mtxMultiply3x3TempIndex;
this->_vtxCoord16 = inLegacySave.vtxCoord.vec3; this->_vtxCoord16 = inLegacySave.vtxPosition.vec3;
this->_vtxCoord16CurrentIndex = inLegacySave.vtxCoord16CurrentIndex; this->_vtxCoord16CurrentIndex = inLegacySave.vtxPosition16CurrentIndex;
this->_vtxFormat = (PolygonPrimitiveType)inLegacySave.vtxFormat; this->_vtxFormat = (PolygonPrimitiveType)inLegacySave.vtxFormat;
this->_vecTranslate = inLegacySave.vecTranslate; this->_vecTranslate = inLegacySave.vecTranslate;
@ -2673,13 +2652,13 @@ static void gfx3d_glNormal(const u32 param)
static void gfx3d_glTexCoord(const u32 param) static void gfx3d_glTexCoord(const u32 param)
{ {
_gEngine.SetTextureCoordinates(param); _gEngine.SetTextureCoordinates2s16(param);
GFX_DELAY(1); GFX_DELAY(1);
} }
static void gfx3d_glVertex16b(const u32 param) static void gfx3d_glVertex16b(const u32 param)
{ {
const bool isVtxComplete = _gEngine.SetCurrentVertex16x2(param); const bool isVtxComplete = _gEngine.SetCurrentVertexPosition2s16(param);
if (isVtxComplete) if (isVtxComplete)
{ {
_gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]); _gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]);
@ -2689,7 +2668,7 @@ static void gfx3d_glVertex16b(const u32 param)
static void gfx3d_glVertex10b(const u32 param) static void gfx3d_glVertex10b(const u32 param)
{ {
_gEngine.SetCurrentVertex10x3(param); _gEngine.SetCurrentVertexPosition3s10(param);
_gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]); _gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]);
GFX_DELAY(8); GFX_DELAY(8);
} }
@ -2697,14 +2676,14 @@ static void gfx3d_glVertex10b(const u32 param)
template <size_t ONE, size_t TWO> template <size_t ONE, size_t TWO>
static void gfx3d_glVertex3_cord(const u32 param) static void gfx3d_glVertex3_cord(const u32 param)
{ {
_gEngine.SetCurrentVertex16x2Immediate<ONE, TWO>(param); _gEngine.SetCurrentVertexPosition2s16Immediate<ONE, TWO>(param);
_gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]); _gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]);
GFX_DELAY(8); GFX_DELAY(8);
} }
static void gfx3d_glVertex_rel(const u32 param) static void gfx3d_glVertex_rel(const u32 param)
{ {
_gEngine.SetCurrentVertex10x3Relative(param); _gEngine.SetCurrentVertexPosition3s10Relative(param);
_gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]); _gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]);
GFX_DELAY(8); GFX_DELAY(8);
} }
@ -3316,7 +3295,7 @@ size_t gfx3d_PerformClipping(const GFX3D_GeometryList &gList, CPoly *outCPolyUns
{ {
NDSVertex &vtx = cPoly.clipVtxFixed[j]; NDSVertex &vtx = cPoly.clipVtxFixed[j];
VERT &vert = cPoly.clipVerts[j]; VERT &vert = cPoly.clipVerts[j];
VertexCoord64x4 vtx64 = { Vector4s64 vtx64 = {
(s64)vtx.position.x, (s64)vtx.position.x,
(s64)vtx.position.y, (s64)vtx.position.y,
(s64)vtx.position.z, (s64)vtx.position.z,
@ -3398,7 +3377,7 @@ size_t gfx3d_PerformClipping(const GFX3D_GeometryList &gList, CPoly *outCPolyUns
vert.v = (float)(vtx.texCoord.v / 16); vert.v = (float)(vtx.texCoord.v / 16);
} }
vert.color32 = vtx.color.color; vert.color32 = vtx.color.value;
} }
} }
@ -3812,8 +3791,8 @@ SFORMAT SF_GFX3D[] = {
{ "MM4I", 1, 1, &gfx3d.gEngineLegacySave.mtxMultiply4x4TempIndex}, { "MM4I", 1, 1, &gfx3d.gEngineLegacySave.mtxMultiply4x4TempIndex},
{ "MM3I", 1, 1, &gfx3d.gEngineLegacySave.mtxMultiply4x3TempIndex}, { "MM3I", 1, 1, &gfx3d.gEngineLegacySave.mtxMultiply4x3TempIndex},
{ "MMxI", 1, 1, &gfx3d.gEngineLegacySave.mtxMultiply3x3TempIndex}, { "MMxI", 1, 1, &gfx3d.gEngineLegacySave.mtxMultiply3x3TempIndex},
{ "GSCO", 2, 4, &gfx3d.gEngineLegacySave.vtxCoord}, { "GSCO", 2, 4, &gfx3d.gEngineLegacySave.vtxPosition},
{ "GCOI", 1, 1, &gfx3d.gEngineLegacySave.vtxCoord16CurrentIndex}, { "GCOI", 1, 1, &gfx3d.gEngineLegacySave.vtxPosition16CurrentIndex},
{ "GVFM", 4, 1, &gfx3d.gEngineLegacySave.vtxFormat}, { "GVFM", 4, 1, &gfx3d.gEngineLegacySave.vtxFormat},
{ "GTRN", 4, 4, &gfx3d.gEngineLegacySave.vecTranslate}, { "GTRN", 4, 4, &gfx3d.gEngineLegacySave.vecTranslate},
{ "GTRI", 1, 1, &gfx3d.gEngineLegacySave.vecTranslateCurrentIndex}, { "GTRI", 1, 1, &gfx3d.gEngineLegacySave.vecTranslateCurrentIndex},
@ -3929,8 +3908,8 @@ void gfx3d_PrepareSaveStateBufferWrite()
} }
else // Framebuffer is at a custom size else // Framebuffer is at a custom size
{ {
const FragmentColor *__restrict src = CurrentRenderer->GetFramebuffer(); const Color4u8 *__restrict src = (Color4u8 *)CurrentRenderer->GetFramebuffer();
FragmentColor *__restrict dst = gfx3d.framebufferNativeSave; Color4u8 *__restrict dst = gfx3d.framebufferNativeSave;
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {
@ -4101,7 +4080,7 @@ void gfx3d_FinishLoadStateBufferRead()
switch (deviceInfo.renderID) switch (deviceInfo.renderID)
{ {
case RENDERID_NULL: case RENDERID_NULL:
memset(CurrentRenderer->GetFramebuffer(), 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(FragmentColor)); memset(CurrentRenderer->GetFramebuffer(), 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(Color4u8));
break; break;
case RENDERID_SOFTRASTERIZER: case RENDERID_SOFTRASTERIZER:
@ -4127,8 +4106,8 @@ void gfx3d_FinishLoadStateBufferRead()
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)gfx3d.framebufferNativeSave, (u32 *)gfx3d.framebufferNativeSave, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); ColorspaceConvertBuffer8888To6665<false, false>((u32 *)gfx3d.framebufferNativeSave, (u32 *)gfx3d.framebufferNativeSave, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
} }
const FragmentColor *__restrict src = gfx3d.framebufferNativeSave; const Color4u8 *__restrict src = gfx3d.framebufferNativeSave;
FragmentColor *__restrict dst = CurrentRenderer->GetFramebuffer(); Color4u8 *__restrict dst = (Color4u8 *)CurrentRenderer->GetFramebuffer();
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {

View File

@ -178,14 +178,14 @@ typedef union
u32 :24; // 8-31: Unused bits u32 :24; // 8-31: Unused bits
#else #else
u32 :24; // 8-31: Unused bits
u8 :6; // 2- 7: Unused bits u8 :6; // 2- 7: Unused bits
u8 MtxMode:2; // 0- 1: Set matrix mode; u8 MtxMode:2; // 0- 1: Set matrix mode;
// 0=Projection // 0=Projection
// 1=Position // 1=Position
// 2=Position+Vector // 2=Position+Vector
// 3=Texture // 3=Texture
u32 :24; // 8-31: Unused bits
#endif #endif
} IOREG_MTX_MODE; // 0x04000440: MTX_MODE command port } IOREG_MTX_MODE; // 0x04000440: MTX_MODE command port
@ -361,11 +361,11 @@ typedef union
u32 :24; // 8-31: Unused bits u32 :24; // 8-31: Unused bits
#else #else
u32 :24; // 8-31: Unused bits
u8 :6; // 2- 7: Unused bits u8 :6; // 2- 7: Unused bits
u8 DepthMode:1; // 1: Depth buffering select; 0=Z 1=W u8 DepthMode:1; // 1: Depth buffering select; 0=Z 1=W
u8 YSortMode:1; // 0: Translucent polygon Y-sorting mode; 0=Auto-sort, 1=Manual-sort u8 YSortMode:1; // 0: Translucent polygon Y-sorting mode; 0=Auto-sort, 1=Manual-sort
u32 :24; // 8-31: Unused bits
#endif #endif
}; };
} IOREG_SWAP_BUFFERS; // 0x04000540: SWAP_BUFFERS command port } IOREG_SWAP_BUFFERS; // 0x04000540: SWAP_BUFFERS command port
@ -378,10 +378,18 @@ typedef union
{ {
// Coordinate (0,0) represents the bottom-left of the screen. // Coordinate (0,0) represents the bottom-left of the screen.
// Coordinate (255,191) represents the top-right of the screen. // Coordinate (255,191) represents the top-right of the screen.
#ifndef MSB_FIRST
u8 X1; // 0- 7: First X-coordinate; 0...255 u8 X1; // 0- 7: First X-coordinate; 0...255
u8 Y1; // 8-15: First Y-coordinate; 0...191 u8 Y1; // 8-15: First Y-coordinate; 0...191
u8 X2; // 16-23: Second X-coordinate; 0...255 u8 X2; // 16-23: Second X-coordinate; 0...255
u8 Y2; // 24-31: Second Y-coordinate; 0...191 u8 Y2; // 24-31: Second Y-coordinate; 0...191
#else
u8 Y2; // 24-31: Second Y-coordinate; 0...191
u8 X2; // 16-23: Second X-coordinate; 0...255
u8 Y1; // 8-15: First Y-coordinate; 0...191
u8 X1; // 0- 7: First X-coordinate; 0...255
#endif
}; };
} IOREG_VIEWPORT; // 0x04000580: VIEWPORT command port } IOREG_VIEWPORT; // 0x04000580: VIEWPORT command port
@ -391,6 +399,7 @@ typedef union
struct struct
{ {
#ifndef MSB_FIRST
u8 TestBusy:1; u8 TestBusy:1;
u8 BoxTestResult:1; u8 BoxTestResult:1;
u8 :6; u8 :6;
@ -406,6 +415,23 @@ typedef union
u8 EngineBusy:1; u8 EngineBusy:1;
u8 :2; u8 :2;
u8 CommandListIRQ:2; u8 CommandListIRQ:2;
#else
u8 :6;
u8 BoxTestResult:1;
u8 TestBusy:1;
u8 AckMtxStackError:1;
u8 MtxStackBusy:1;
u8 ProjMtxStackLevel:1;
u8 PosVecMtxStackLevel:5;
u8 CommandListIRQ:2;
u8 :2;
u8 EngineBusy:1;
u8 CommandListEmpty:1;
u8 CommandListLessThanHalf:1;
u16 CommandListCount:9;
#endif
}; };
} IOREG_GXSTAT; // 0x04000600: Geometry engine status } IOREG_GXSTAT; // 0x04000600: Geometry engine status
@ -588,215 +614,22 @@ typedef struct VERT VERT;
#include "PACKED_END.h" #include "PACKED_END.h"
union Vector16x2
{
s16 vec[2];
s16 coord[2];
struct
{
s16 s, t;
};
struct
{
s16 u, v;
};
struct
{
s16 x, y;
} XY;
struct
{
s16 y, z;
} YZ;
struct
{
s16 x, z;
} XZ;
u32 value;
};
typedef union Vector16x2 Vector16x2;
typedef Vector16x2 VertexCoord16x2;
union Vector16x3
{
s16 vec[3];
s16 coord[3];
struct
{
s16 x, y, z;
};
};
typedef union Vector16x3 Vector16x3;
typedef Vector16x3 VertexCoord16x3;
union Vector16x4
{
s16 vec[4];
s16 coord[4];
struct
{
s16 x, y, z, w;
};
struct
{
Vector16x3 vec3;
s16 :16;
};
u64 value;
};
typedef union Vector16x4 Vector16x4;
typedef Vector16x4 VertexCoord16x4;
union Vector32x2
{
s32 vec[2];
s32 coord[2];
struct
{
s32 s, t;
};
struct
{
s32 u, v;
};
struct
{
s32 x, y;
} XY;
struct
{
s32 y, z;
} YZ;
struct
{
s32 x, z;
} XZ;
u64 value;
};
typedef union Vector32x2 Vector32x2;
typedef Vector32x2 VertexCoord32x2;
union Vector32x3
{
s32 vec[3];
s32 coord[3];
struct
{
s32 x, y, z;
};
};
typedef union Vector32x3 Vector32x3;
typedef Vector32x3 VertexCoord32x3;
union Vector32x4
{
s32 vec[4];
s32 coord[4];
struct
{
s32 x, y, z, w;
};
struct
{
Vector32x3 vec3;
s32 :32;
};
};
typedef union Vector32x4 Vector32x4;
typedef Vector32x4 VertexCoord32x4;
union Vector64x2
{
s64 vec[2];
s64 coord[2];
struct
{
s64 s, t;
};
struct
{
s64 u, v;
};
struct
{
s64 x, y;
} XY;
struct
{
s64 y, z;
} YZ;
struct
{
s64 x, z;
} XZ;
};
typedef union Vector64x2 Vector64x2;
typedef Vector64x2 VertexCoord64x2;
union Vector64x3
{
s64 vec[3];
s64 coord[3];
struct
{
s64 x, y, z;
};
};
typedef union Vector64x3 Vector64x3;
typedef Vector64x3 VertexCoord64x3;
union Vector64x4
{
s64 vec[4];
s64 coord[4];
struct
{
s64 x, y, z, w;
};
struct
{
Vector64x3 vec3;
s64 :64;
};
};
typedef union Vector64x4 Vector64x4;
typedef Vector64x4 VertexCoord64x4;
struct NDSVertex struct NDSVertex
{ {
VertexCoord32x4 position; Vector4s32 position;
VertexCoord32x2 texCoord; Vector2s32 texCoord;
FragmentColor color; Color4u8 color;
}; };
typedef struct NDSVertex NDSVertex; typedef struct NDSVertex NDSVertex;
struct NDSVertexf
{
Vector4f32 position;
Vector2f32 texCoord;
Color4f32 color;
};
typedef struct NDSVertexf NDSVertexf;
//ok, imagine the plane that cuts diagonally across a cube such that it clips //ok, imagine the plane that cuts diagonally across a cube such that it clips
//out to be a hexagon. within that plane, draw a quad such that it cuts off //out to be a hexagon. within that plane, draw a quad such that it cuts off
//four corners of the hexagon, and you will observe a decagon //four corners of the hexagon, and you will observe a decagon
@ -842,7 +675,6 @@ typedef struct GFX3D_State GFX3D_State;
struct GFX3D_GeometryList struct GFX3D_GeometryList
{ {
PAGE_ALIGN VERT rawVertList[VERTLIST_SIZE];
PAGE_ALIGN NDSVertex rawVtxList[VERTLIST_SIZE]; PAGE_ALIGN NDSVertex rawVtxList[VERTLIST_SIZE];
PAGE_ALIGN POLY rawPolyList[POLYLIST_SIZE]; PAGE_ALIGN POLY rawPolyList[POLYLIST_SIZE];
PAGE_ALIGN CPoly clippedPolyList[CLIPPED_POLYLIST_SIZE]; PAGE_ALIGN CPoly clippedPolyList[CLIPPED_POLYLIST_SIZE];
@ -900,13 +732,13 @@ struct GeometryEngineLegacySave
u8 mtxMultiply4x3TempIndex; u8 mtxMultiply4x3TempIndex;
u8 mtxMultiply3x3TempIndex; u8 mtxMultiply3x3TempIndex;
VertexCoord16x4 vtxCoord; Vector4s16 vtxPosition;
u8 vtxCoord16CurrentIndex; u8 vtxPosition16CurrentIndex;
u32 vtxFormat; u32 vtxFormat;
Vector32x4 vecTranslate; Vector4s32 vecTranslate;
u8 vecTranslateCurrentIndex; u8 vecTranslateCurrentIndex;
Vector32x4 vecScale; Vector4s32 vecScale;
u8 vecScaleCurrentIndex; u8 vecScaleCurrentIndex;
u32 texCoordT; u32 texCoordT;
@ -919,7 +751,7 @@ struct GeometryEngineLegacySave
float positionTestVtxFloat[4]; // Historically, the position test vertices were stored as floating point values, not as integers. float positionTestVtxFloat[4]; // Historically, the position test vertices were stored as floating point values, not as integers.
u16 boxTestCoord16[6]; u16 boxTestCoord16[6];
FragmentColor vtxColor; Color4u8 vtxColor;
u32 regLightColor[4]; u32 regLightColor[4];
u32 regLightDirection[4]; u32 regLightDirection[4];
@ -991,7 +823,7 @@ struct GFX3D
// Everything below is for save state compatibility. // Everything below is for save state compatibility.
GFX3D_LegacySave legacySave; GFX3D_LegacySave legacySave;
GeometryEngineLegacySave gEngineLegacySave; GeometryEngineLegacySave gEngineLegacySave;
PAGE_ALIGN FragmentColor framebufferNativeSave[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; // Rendered 3D framebuffer that is saved in RGBA8888 color format at the native size. PAGE_ALIGN Color4u8 framebufferNativeSave[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; // Rendered 3D framebuffer that is saved in RGBA8888 color format at the native size.
}; };
typedef struct GFX3D GFX3D; typedef struct GFX3D GFX3D;
@ -1007,8 +839,8 @@ protected:
CACHE_ALIGN NDSMatrix _tempMtxMultiply4x4; CACHE_ALIGN NDSMatrix _tempMtxMultiply4x4;
CACHE_ALIGN NDSMatrix _tempMtxMultiply4x3; CACHE_ALIGN NDSMatrix _tempMtxMultiply4x3;
CACHE_ALIGN NDSMatrix _tempMtxMultiply3x3; CACHE_ALIGN NDSMatrix _tempMtxMultiply3x3;
CACHE_ALIGN Vector32x4 _vecTranslate; CACHE_ALIGN Vector4s32 _vecTranslate;
CACHE_ALIGN Vector32x4 _vecScale; CACHE_ALIGN Vector4s32 _vecScale;
// Matrix stack handling // Matrix stack handling
CACHE_ALIGN NDSMatrixStack1 _mtxStackProjection; CACHE_ALIGN NDSMatrixStack1 _mtxStackProjection;
@ -1016,10 +848,10 @@ protected:
CACHE_ALIGN NDSMatrixStack32 _mtxStackPositionVector; CACHE_ALIGN NDSMatrixStack32 _mtxStackPositionVector;
CACHE_ALIGN NDSMatrixStack1 _mtxStackTexture; CACHE_ALIGN NDSMatrixStack1 _mtxStackTexture;
CACHE_ALIGN Vector32x4 _vecNormal; CACHE_ALIGN Vector4s32 _vecNormal;
CACHE_ALIGN VertexCoord16x3 _vtxCoord16; CACHE_ALIGN Vector3s16 _vtxCoord16;
CACHE_ALIGN VertexCoord16x2 _texCoord16; CACHE_ALIGN Vector2s16 _texCoord16;
CACHE_ALIGN VertexCoord32x2 _texCoordTransformed; CACHE_ALIGN Vector2s32 _texCoordTransformed;
CACHE_ALIGN u8 _shininessTablePending[128]; CACHE_ALIGN u8 _shininessTablePending[128];
CACHE_ALIGN u8 _shininessTableApplied[128]; CACHE_ALIGN u8 _shininessTableApplied[128];
@ -1035,10 +867,8 @@ protected:
u8 _vecTranslateCurrentIndex; u8 _vecTranslateCurrentIndex;
u32 _vtxColor15; u32 _vtxColor15;
FragmentColor _vtxColor555X; Color4u8 _vtxColor555X;
FragmentColor _vtxColor666X; Color4u8 _vtxColor666X;
float _vtxColorFloat[4];
float _texCoordTransformedFloat[2];
bool _doesViewportNeedUpdate; bool _doesViewportNeedUpdate;
bool _doesVertexColorNeedUpdate; bool _doesVertexColorNeedUpdate;
@ -1062,7 +892,7 @@ protected:
u8 _boxTestCoordCurrentIndex; u8 _boxTestCoordCurrentIndex;
u8 _positionTestCoordCurrentIndex; u8 _positionTestCoordCurrentIndex;
CACHE_ALIGN u16 _boxTestCoord16[6]; CACHE_ALIGN u16 _boxTestCoord16[6];
CACHE_ALIGN VertexCoord32x4 _positionTestVtx32; CACHE_ALIGN Vector4s32 _positionTestVtx32;
u32 _regLightColor[4]; u32 _regLightColor[4];
u32 _regLightDirection[4]; u32 _regLightDirection[4];
@ -1072,8 +902,8 @@ protected:
u16 _regEmission; u16 _regEmission;
u8 _shininessTablePendingIndex; u8 _shininessTablePendingIndex;
CACHE_ALIGN Vector32x4 _vecLightDirectionTransformed[4]; CACHE_ALIGN Vector4s32 _vecLightDirectionTransformed[4];
CACHE_ALIGN Vector32x4 _vecLightDirectionHalfNegative[4]; CACHE_ALIGN Vector4s32 _vecLightDirectionHalfNegative[4];
bool _doesLightHalfVectorNeedUpdate[4]; bool _doesLightHalfVectorNeedUpdate[4];
// This enum serves no real functional purpose except to be used for save state compatibility. // This enum serves no real functional purpose except to be used for save state compatibility.
@ -1131,24 +961,24 @@ public:
void SetViewport(const IOREG_VIEWPORT regViewport); void SetViewport(const IOREG_VIEWPORT regViewport);
void SetViewport(const GFX3D_Viewport viewport); void SetViewport(const GFX3D_Viewport viewport);
void SetVertexColor(const u32 param); void SetVertexColor(const u32 param);
void SetVertexColor(const FragmentColor vtxColor555X); void SetVertexColor(const Color4u8 vtxColor555X);
void SetTextureParameters(const u32 param); void SetTextureParameters(const u32 param);
void SetTextureParameters(const TEXIMAGE_PARAM texParams); void SetTextureParameters(const TEXIMAGE_PARAM texParams);
void SetTexturePalette(const u32 texPalette); void SetTexturePalette(const u32 texPalette);
void SetTextureCoordinates(const u32 param); void SetTextureCoordinates2s16(const u32 param);
void SetTextureCoordinates(const VertexCoord16x2 &texCoord16); void SetTextureCoordinates2s16(const Vector2s16 &texCoord16);
void VertexListBegin(const u32 param, const POLYGON_ATTR polyAttr); void VertexListBegin(const u32 param, const POLYGON_ATTR polyAttr);
void VertexListBegin(const PolygonPrimitiveType vtxFormat, const POLYGON_ATTR polyAttr); void VertexListBegin(const PolygonPrimitiveType vtxFormat, const POLYGON_ATTR polyAttr);
void VertexListEnd(); void VertexListEnd();
bool SetCurrentVertex16x2(const u32 param); bool SetCurrentVertexPosition2s16(const u32 param);
bool SetCurrentVertex16x2(const VertexCoord16x2 inVtxCoord16x2); bool SetCurrentVertexPosition2s16(const Vector2s16 inVtxCoord16x2);
void SetCurrentVertex10x3(const u32 param); void SetCurrentVertexPosition3s10(const u32 param);
void SetCurrentVertex(const VertexCoord16x3 inVtxCoord16x3); void SetCurrentVertexPosition(const Vector3s16 inVtxCoord16x3);
template<size_t ONE, size_t TWO> void SetCurrentVertex16x2Immediate(const u32 param); template<size_t ONE, size_t TWO> void SetCurrentVertexPosition2s16Immediate(const u32 param);
template<size_t ONE, size_t TWO> void SetCurrentVertex16x2Immediate(const VertexCoord16x2 inVtxCoord16x2); template<size_t ONE, size_t TWO> void SetCurrentVertexPosition2s16Immediate(const Vector2s16 inVtxCoord16x2);
void SetCurrentVertex10x3Relative(const u32 param); void SetCurrentVertexPosition3s10Relative(const u32 param);
void SetCurrentVertexRelative(const VertexCoord16x3 inVtxCoord16x3); void SetCurrentVertexPositionRelative(const Vector3s16 inVtxCoord16x3);
void AddCurrentVertexToList(GFX3D_GeometryList &targetGList); void AddCurrentVertexToList(GFX3D_GeometryList &targetGList);
void GeneratePolygon(POLY &targetPoly, GFX3D_GeometryList &targetGList); void GeneratePolygon(POLY &targetPoly, GFX3D_GeometryList &targetGList);

View File

@ -313,7 +313,7 @@ FORCEINLINE int edge_fx_fl::Step() {
static FORCEINLINE void alphaBlend(const bool isAlphaBlendingEnabled, const FragmentColor inSrc, FragmentColor &outDst) static FORCEINLINE void alphaBlend(const bool isAlphaBlendingEnabled, const Color4u8 inSrc, Color4u8 &outDst)
{ {
if (inSrc.a == 0) if (inSrc.a == 0)
{ {
@ -335,7 +335,7 @@ static FORCEINLINE void alphaBlend(const bool isAlphaBlendingEnabled, const Frag
} }
} }
static FORCEINLINE void EdgeBlend(FragmentColor &dst, const FragmentColor src) static FORCEINLINE void EdgeBlend(Color4u8 &dst, const Color4u8 src)
{ {
if (src.a == 31 || dst.a == 0) if (src.a == 31 || dst.a == 0)
{ {
@ -372,7 +372,7 @@ Render3DError RasterizerUnit<RENDERER>::_SetupTexture(const POLY &thePoly, size_
} }
template<bool RENDERER> template<bool RENDERER>
FORCEINLINE FragmentColor RasterizerUnit<RENDERER>::_sample(const float u, const float v) FORCEINLINE Color4u8 RasterizerUnit<RENDERER>::_sample(const float u, const float v)
{ {
//finally, we can use floor here. but, it is slower than we want. //finally, we can use floor here. but, it is slower than we want.
//the best solution is probably to wait until the pipeline is full of fixed point //the best solution is probably to wait until the pipeline is full of fixed point
@ -395,8 +395,8 @@ FORCEINLINE FragmentColor RasterizerUnit<RENDERER>::_sample(const float u, const
const u32 *textureData = this->_currentTexture->GetRenderData(); const u32 *textureData = this->_currentTexture->GetRenderData();
this->_currentTexture->GetRenderSamplerCoordinates(this->_textureWrapMode, iu, iv); this->_currentTexture->GetRenderSamplerCoordinates(this->_textureWrapMode, iu, iv);
FragmentColor color; Color4u8 color;
color.color = textureData[( iv << this->_currentTexture->GetRenderWidthShift() ) + iu]; color.value = textureData[( iv << this->_currentTexture->GetRenderWidthShift() ) + iu];
return color; return color;
} }
@ -416,7 +416,7 @@ FORCEINLINE float RasterizerUnit<RENDERER>::_round_s(double val)
} }
template<bool RENDERER> template<bool ISSHADOWPOLYGON> template<bool RENDERER> template<bool ISSHADOWPOLYGON>
FORCEINLINE void RasterizerUnit<RENDERER>::_shade(const PolygonMode polygonMode, const FragmentColor src, FragmentColor &dst, const float texCoordU, const float texCoordV) FORCEINLINE void RasterizerUnit<RENDERER>::_shade(const PolygonMode polygonMode, const Color4u8 src, Color4u8 &dst, const float texCoordU, const float texCoordV)
{ {
if (ISSHADOWPOLYGON) if (ISSHADOWPOLYGON)
{ {
@ -426,8 +426,8 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_shade(const PolygonMode polygonMode,
const GFX3D_State &renderState = *this->_softRender->currentRenderState; const GFX3D_State &renderState = *this->_softRender->currentRenderState;
static const FragmentColor colorWhite = MakeFragmentColor(0x3F, 0x3F, 0x3F, 0x1F); static const Color4u8 colorWhite = { 0x3F, 0x3F, 0x3F, 0x1F };
const FragmentColor mainTexColor = (this->_currentTexture->IsSamplingEnabled()) ? this->_sample(texCoordU, texCoordV) : colorWhite; const Color4u8 mainTexColor = (this->_currentTexture->IsSamplingEnabled()) ? this->_sample(texCoordU, texCoordV) : colorWhite;
switch (polygonMode) switch (polygonMode)
{ {
@ -468,7 +468,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_shade(const PolygonMode polygonMode,
case POLYGON_MODE_TOONHIGHLIGHT: case POLYGON_MODE_TOONHIGHLIGHT:
{ {
const FragmentColor toonColor = this->_softRender->toonColor32LUT[src.r >> 1]; const Color4u8 toonColor = this->_softRender->toonColor32LUT[src.r >> 1];
if (renderState.DISP3DCNT.PolygonShading == PolygonShadingMode_Highlight) if (renderState.DISP3DCNT.PolygonShading == PolygonShadingMode_Highlight)
{ {
@ -502,11 +502,11 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_shade(const PolygonMode polygonMode,
} }
template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON>
FORCEINLINE void RasterizerUnit<RENDERER>::_pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float z, float w) FORCEINLINE void RasterizerUnit<RENDERER>::_pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, float r, float g, float b, float invu, float invv, float z, float w)
{ {
const GFX3D_State &renderState = *this->_softRender->currentRenderState; const GFX3D_State &renderState = *this->_softRender->currentRenderState;
FragmentColor newDstColor32; Color4u8 newDstColor32;
FragmentColor shaderOutput; Color4u8 shaderOutput;
bool isOpaquePixel; bool isOpaquePixel;
u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex]; u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex];
@ -609,10 +609,10 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_pixel(const POLYGON_ATTR polyAttr, c
//this is a HACK: //this is a HACK:
//we are being very sloppy with our interpolation precision right now //we are being very sloppy with our interpolation precision right now
//and rather than fix it, i just want to clamp it //and rather than fix it, i just want to clamp it
newDstColor32 = MakeFragmentColor(max<u8>(0x00, min<u32>(0x3F, u32floor(r))), newDstColor32.r = max<u8>(0x00, min<u32>(0x3F, u32floor(r)));
max<u8>(0x00, min<u32>(0x3F, u32floor(g))), newDstColor32.g = max<u8>(0x00, min<u32>(0x3F, u32floor(g)));
max<u8>(0x00, min<u32>(0x3F, u32floor(b))), newDstColor32.b = max<u8>(0x00, min<u32>(0x3F, u32floor(b)));
polyAttr.Alpha); newDstColor32.a = polyAttr.Alpha;
//pixel shader //pixel shader
this->_shade<ISSHADOWPOLYGON>((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w); this->_shade<ISSHADOWPOLYGON>((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w);
@ -659,7 +659,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_pixel(const POLYGON_ATTR polyAttr, c
//draws a single scanline //draws a single scanline
template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK>
FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight) FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight)
{ {
const int XStart = pLeft->X; const int XStart = pLeft->X;
int width = pRight->X - XStart; int width = pRight->X - XStart;
@ -780,11 +780,11 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline(const POLYGON_ATTR poly
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON>
FORCEINLINE void RasterizerUnit<RENDERER>::_pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w) FORCEINLINE void RasterizerUnit<RENDERER>::_pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w)
{ {
const GFX3D_State &renderState = *this->_softRender->currentRenderState; const GFX3D_State &renderState = *this->_softRender->currentRenderState;
FragmentColor newDstColor32; Color4u8 newDstColor32;
FragmentColor shaderOutput; Color4u8 shaderOutput;
bool isOpaquePixel; bool isOpaquePixel;
u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex]; u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex];
@ -889,7 +889,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_pixel_SSE2(const POLYGON_ATTR polyAt
cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128()); cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128());
cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128()); cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128());
newDstColor32.color = _mm_cvtsi128_si32(cvtColor32); newDstColor32.value = _mm_cvtsi128_si32(cvtColor32);
//pixel shader //pixel shader
this->_shade<ISSHADOWPOLYGON>((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w); this->_shade<ISSHADOWPOLYGON>((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w);
@ -936,7 +936,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_pixel_SSE2(const POLYGON_ATTR polyAt
//draws a single scanline //draws a single scanline
template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> template<bool RENDERER> template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK>
FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight) FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight)
{ {
const int XStart = pLeft->X; const int XStart = pLeft->X;
int width = pRight->X - XStart; int width = pRight->X - XStart;
@ -1031,7 +1031,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::_drawscanline_SSE2(const POLYGON_ATTR
//runs several scanlines, until an edge is finished //runs several scanlines, until an edge is finished
template<bool RENDERER> template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> template<bool RENDERER> template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK>
void RasterizerUnit<RENDERER>::_runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right) void RasterizerUnit<RENDERER>::_runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right)
{ {
//oh lord, hack city for edge drawing //oh lord, hack city for edge drawing
@ -1065,9 +1065,9 @@ void RasterizerUnit<RENDERER>::_runscanlines(const POLYGON_ATTR polyAttr, const
#endif #endif
} }
const int xl = left->X; const size_t xl = left->X;
const int xr = right->X; const size_t xr = right->X;
const int y = left->Y; const size_t y = left->Y;
left->Step(); left->Step();
right->Step(); right->Step();
@ -1080,15 +1080,15 @@ void RasterizerUnit<RENDERER>::_runscanlines(const POLYGON_ATTR polyAttr, const
{ {
if (draw) if (draw)
{ {
int nxl = left->X; const size_t nxl = left->X;
int nxr = right->X; const size_t nxr = right->X;
if (top) if (top)
{ {
int xs = min(xl,xr); const size_t xs = min(xl, xr);
int xe = max(xl,xr); const size_t xe = max(xl, xr);
for (int x = xs; x <= xe; x++) for (size_t x = xs; x <= xe; x++)
{ {
int adr = (y*framebufferWidth)+x; const size_t adr = (y * framebufferWidth) + x;
dstColor[adr].r = 63; dstColor[adr].r = 63;
dstColor[adr].g = 0; dstColor[adr].g = 0;
dstColor[adr].b = 0; dstColor[adr].b = 0;
@ -1096,11 +1096,11 @@ void RasterizerUnit<RENDERER>::_runscanlines(const POLYGON_ATTR polyAttr, const
} }
else if (bottom) else if (bottom)
{ {
int xs = min(xl,xr); const size_t xs = min(xl, xr);
int xe = max(xl,xr); const size_t xe = max(xl, xr);
for (int x = xs; x <= xe; x++) for (size_t x = xs; x <= xe; x++)
{ {
int adr = (y*framebufferWidth)+x; const size_t adr = (y * framebufferWidth) + x;
dstColor[adr].r = 63; dstColor[adr].r = 63;
dstColor[adr].g = 0; dstColor[adr].g = 0;
dstColor[adr].b = 0; dstColor[adr].b = 0;
@ -1108,20 +1108,21 @@ void RasterizerUnit<RENDERER>::_runscanlines(const POLYGON_ATTR polyAttr, const
} }
else else
{ {
int xs = min(xl,nxl); size_t xs = min(xl, nxl);
int xe = max(xl,nxl); size_t xe = max(xl, nxl);
for (int x = xs; x <= xe; x++) for (size_t x = xs; x <= xe; x++)
{ {
int adr = (y*framebufferWidth)+x; const size_t adr = (y * framebufferWidth) + x;
dstColor[adr].r = 63; dstColor[adr].r = 63;
dstColor[adr].g = 0; dstColor[adr].g = 0;
dstColor[adr].b = 0; dstColor[adr].b = 0;
} }
xs = min(xr,nxr);
xe = max(xr,nxr); xs = min(xr, nxr);
for (int x = xs; x <= xe; x++) xe = max(xr, nxr);
for (size_t x = xs; x <= xe; x++)
{ {
int adr = (y*framebufferWidth)+x; const size_t adr = (y * framebufferWidth) + x;
dstColor[adr].r = 63; dstColor[adr].r = 63;
dstColor[adr].g = 0; dstColor[adr].g = 0;
dstColor[adr].b = 0; dstColor[adr].b = 0;
@ -1190,7 +1191,7 @@ void RasterizerUnit<RENDERER>::_sort_verts()
//I didnt reference anything for this algorithm but it seems like I've seen it somewhere before. //I didnt reference anything for this algorithm but it seems like I've seen it somewhere before.
//Maybe it is like crow's algorithm //Maybe it is like crow's algorithm
template<bool RENDERER> template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> template<bool RENDERER> template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK>
void RasterizerUnit<RENDERER>::_shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type) void RasterizerUnit<RENDERER>::_shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type)
{ {
bool failure = false; bool failure = false;
@ -1272,7 +1273,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::Render()
return; return;
} }
FragmentColor *dstColor = this->_softRender->GetFramebuffer(); Color4u8 *dstColor = this->_softRender->GetFramebuffer();
const size_t dstWidth = this->_softRender->GetFramebufferWidth(); const size_t dstWidth = this->_softRender->GetFramebufferWidth();
const size_t dstHeight = this->_softRender->GetFramebufferHeight(); const size_t dstHeight = this->_softRender->GetFramebufferHeight();
@ -1737,7 +1738,7 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
_enableLineHack = CommonSettings.GFX3D_LineHack; _enableLineHack = CommonSettings.GFX3D_LineHack;
_enableFragmentSamplingHack = CommonSettings.GFX3D_TXTHack; _enableFragmentSamplingHack = CommonSettings.GFX3D_TXTHack;
_HACK_viewer_rasterizerUnit.SetSLI(0, _framebufferHeight, false); _HACK_viewer_rasterizerUnit.SetSLI(0, (u32)_framebufferHeight, false);
const size_t coreCount = CommonSettings.num_cores; const size_t coreCount = CommonSettings.num_cores;
_threadCount = coreCount; _threadCount = coreCount;
@ -1768,7 +1769,7 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
_threadClearParam[0].startPixel = 0; _threadClearParam[0].startPixel = 0;
_threadClearParam[0].endPixel = _framebufferPixCount; _threadClearParam[0].endPixel = _framebufferPixCount;
_rasterizerUnit[0].SetSLI(_threadPostprocessParam[0].startLine, _threadPostprocessParam[0].endLine, false); _rasterizerUnit[0].SetSLI((u32)_threadPostprocessParam[0].startLine, (u32)_threadPostprocessParam[0].endLine, false);
_rasterizerUnit[0].SetRenderer(this); _rasterizerUnit[0].SetRenderer(this);
} }
else else
@ -1794,7 +1795,7 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
_threadClearParam[i].startPixel = i * _customPixelsPerThread; _threadClearParam[i].startPixel = i * _customPixelsPerThread;
_threadClearParam[i].endPixel = (i < _threadCount - 1) ? (i + 1) * _customPixelsPerThread : _framebufferPixCount; _threadClearParam[i].endPixel = (i < _threadCount - 1) ? (i + 1) * _customPixelsPerThread : _framebufferPixCount;
_rasterizerUnit[i].SetSLI(_threadPostprocessParam[i].startLine, _threadPostprocessParam[i].endLine, false); _rasterizerUnit[i].SetSLI((u32)_threadPostprocessParam[i].startLine, (u32)_threadPostprocessParam[i].endLine, false);
_rasterizerUnit[i].SetRenderer(this); _rasterizerUnit[i].SetRenderer(this);
char name[16]; char name[16];
@ -1988,7 +1989,7 @@ void SoftRasterizerRenderer::_UpdateEdgeMarkColorTable(const u16 *edgeMarkColorT
//we can do this by rendering a 3d frame and then freezing the system, but only changing the edge mark colors //we can do this by rendering a 3d frame and then freezing the system, but only changing the edge mark colors
for (size_t i = 0; i < 8; i++) for (size_t i = 0; i < 8; i++)
{ {
this->_edgeMarkTable[i].color = LE_TO_LOCAL_32( COLOR555TO6665(edgeMarkColorTable[i] & 0x7FFF, (this->currentRenderState->DISP3DCNT.EnableAntialiasing) ? 0x10 : 0x1F) ); this->_edgeMarkTable[i].value = LE_TO_LOCAL_32( COLOR555TO6665(edgeMarkColorTable[i] & 0x7FFF, (this->currentRenderState->DISP3DCNT.EnableAntialiasing) ? 0x10 : 0x1F) );
//zero 20-jun-2013 - this doesnt make any sense. at least, it should be related to the 0x8000 bit. if this is undocumented behaviour, lets write about which scenario proves it here, or which scenario is requiring this code. //zero 20-jun-2013 - this doesnt make any sense. at least, it should be related to the 0x8000 bit. if this is undocumented behaviour, lets write about which scenario proves it here, or which scenario is requiring this code.
//// this seems to be the only thing that selectively disables edge marking //// this seems to be the only thing that selectively disables edge marking
@ -2075,7 +2076,7 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz
{ {
for (size_t x = 0; x < this->_framebufferWidth; x++, i++) for (size_t x = 0; x < this->_framebufferWidth; x++, i++)
{ {
FragmentColor &dstColor = this->_framebufferColor[i]; Color4u8 &dstColor = this->_framebufferColor[i];
const u32 depth = this->_framebufferAttributes->depth[i]; const u32 depth = this->_framebufferAttributes->depth[i];
const u8 polyID = this->_framebufferAttributes->opaquePolyID[i]; const u8 polyID = this->_framebufferAttributes->opaquePolyID[i];
@ -2095,7 +2096,7 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz
const bool left = (x < 1) ? isEdgeMarkingClearValues : ((polyID != this->_framebufferAttributes->opaquePolyID[i-1]) && (depth >= this->_framebufferAttributes->depth[i-1])); const bool left = (x < 1) ? isEdgeMarkingClearValues : ((polyID != this->_framebufferAttributes->opaquePolyID[i-1]) && (depth >= this->_framebufferAttributes->depth[i-1]));
const bool up = (y < 1) ? isEdgeMarkingClearValues : ((polyID != this->_framebufferAttributes->opaquePolyID[i-this->_framebufferWidth]) && (depth >= this->_framebufferAttributes->depth[i-this->_framebufferWidth])); const bool up = (y < 1) ? isEdgeMarkingClearValues : ((polyID != this->_framebufferAttributes->opaquePolyID[i-this->_framebufferWidth]) && (depth >= this->_framebufferAttributes->depth[i-this->_framebufferWidth]));
FragmentColor edgeMarkColor = this->_edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i] >> 3]; Color4u8 edgeMarkColor = this->_edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i] >> 3];
if (right) if (right)
{ {
@ -2135,8 +2136,8 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz
if (param.enableFog) if (param.enableFog)
{ {
FragmentColor fogColor; Color4u8 fogColor;
fogColor.color = LE_TO_LOCAL_32( COLOR555TO6665(param.fogColor & 0x7FFF, (param.fogColor>>16) & 0x1F) ); fogColor.value = LE_TO_LOCAL_32( COLOR555TO6665(param.fogColor & 0x7FFF, (param.fogColor>>16) & 0x1F) );
const size_t fogIndex = depth >> 9; const size_t fogIndex = depth >> 9;
assert(fogIndex < 32768); assert(fogIndex < 32768);
@ -2194,7 +2195,7 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo
{ {
const size_t ir = readLine + ((x * xRatio) >> 16); const size_t ir = readLine + ((x * xRatio) >> 16);
this->_framebufferColor[iw].color = LE_TO_LOCAL_32( COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F) ); this->_framebufferColor[iw].value = LE_TO_LOCAL_32( COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F) );
this->_framebufferAttributes->depth[iw] = depthBuffer[ir]; this->_framebufferAttributes->depth[iw] = depthBuffer[ir];
this->_framebufferAttributes->isFogged[iw] = fogBuffer[ir]; this->_framebufferAttributes->isFogged[iw] = fogBuffer[ir];
this->_framebufferAttributes->opaquePolyID[iw] = opaquePolyID; this->_framebufferAttributes->opaquePolyID[iw] = opaquePolyID;
@ -2217,7 +2218,7 @@ void SoftRasterizerRenderer::ClearUsingValues_Execute(const size_t startPixel, c
} }
} }
Render3DError SoftRasterizerRenderer::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) Render3DError SoftRasterizerRenderer::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{ {
const bool doMultithreadedClear = (this->_threadCount > 0); const bool doMultithreadedClear = (this->_threadCount > 0);
@ -2333,7 +2334,7 @@ Render3DError SoftRasterizerRenderer::RenderFlush(bool willFlushBuffer32, bool w
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
FragmentColor *framebufferMain = (willFlushBuffer32 && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL; Color4u8 *framebufferMain = (willFlushBuffer32 && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL;
u16 *framebuffer16 = (willFlushBuffer16) ? GPU->GetEngineMain()->Get3DFramebuffer16() : NULL; u16 *framebuffer16 = (willFlushBuffer16) ? GPU->GetEngineMain()->Get3DFramebuffer16() : NULL;
this->FlushFramebuffer(this->_framebufferColor, framebufferMain, framebuffer16); this->FlushFramebuffer(this->_framebufferColor, framebufferMain, framebuffer16);
@ -2364,7 +2365,7 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
this->_threadClearParam[0].startPixel = 0; this->_threadClearParam[0].startPixel = 0;
this->_threadClearParam[0].endPixel = pixCount; this->_threadClearParam[0].endPixel = pixCount;
this->_rasterizerUnit[0].SetSLI(this->_threadPostprocessParam[0].startLine, this->_threadPostprocessParam[0].endLine, false); this->_rasterizerUnit[0].SetSLI((u32)this->_threadPostprocessParam[0].startLine, (u32)this->_threadPostprocessParam[0].endLine, false);
} }
else else
{ {
@ -2379,7 +2380,7 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
this->_threadClearParam[i].startPixel = i * this->_customPixelsPerThread; this->_threadClearParam[i].startPixel = i * this->_customPixelsPerThread;
this->_threadClearParam[i].endPixel = (i < this->_threadCount - 1) ? (i + 1) * this->_customPixelsPerThread : pixCount; this->_threadClearParam[i].endPixel = (i < this->_threadCount - 1) ? (i + 1) * this->_customPixelsPerThread : pixCount;
this->_rasterizerUnit[i].SetSLI(this->_threadPostprocessParam[i].startLine, this->_threadPostprocessParam[i].endLine, false); this->_rasterizerUnit[i].SetSLI((u32)this->_threadPostprocessParam[i].startLine, (u32)this->_threadPostprocessParam[i].endLine, false);
} }
} }
@ -2411,7 +2412,7 @@ SoftRasterizer_SIMD<SIMDBYTES>::SoftRasterizer_SIMD()
} }
template <size_t SIMDBYTES> template <size_t SIMDBYTES>
Render3DError SoftRasterizer_SIMD<SIMDBYTES>::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) Render3DError SoftRasterizer_SIMD<SIMDBYTES>::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{ {
this->LoadClearValues(clearColor6665, clearAttributes); this->LoadClearValues(clearColor6665, clearAttributes);
@ -2500,9 +2501,9 @@ Render3DError SoftRasterizer_SIMD<SIMDBYTES>::SetFramebufferSize(size_t w, size_
#if defined(ENABLE_AVX2) #if defined(ENABLE_AVX2)
void SoftRasterizerRenderer_AVX2::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) void SoftRasterizerRenderer_AVX2::LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{ {
this->_clearColor_v256u32 = _mm256_set1_epi32(clearColor6665.color); this->_clearColor_v256u32 = _mm256_set1_epi32(clearColor6665.value);
this->_clearDepth_v256u32 = _mm256_set1_epi32(clearAttributes.depth); this->_clearDepth_v256u32 = _mm256_set1_epi32(clearAttributes.depth);
this->_clearAttrOpaquePolyID_v256u8 = _mm256_set1_epi8(clearAttributes.opaquePolyID); this->_clearAttrOpaquePolyID_v256u8 = _mm256_set1_epi8(clearAttributes.opaquePolyID);
this->_clearAttrTranslucentPolyID_v256u8 = _mm256_set1_epi8(clearAttributes.translucentPolyID); this->_clearAttrTranslucentPolyID_v256u8 = _mm256_set1_epi8(clearAttributes.translucentPolyID);
@ -2537,9 +2538,9 @@ void SoftRasterizerRenderer_AVX2::ClearUsingValues_Execute(const size_t startPix
#elif defined(ENABLE_SSE2) #elif defined(ENABLE_SSE2)
void SoftRasterizerRenderer_SSE2::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) void SoftRasterizerRenderer_SSE2::LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{ {
this->_clearColor_v128u32 = _mm_set1_epi32(clearColor6665.color); this->_clearColor_v128u32 = _mm_set1_epi32(clearColor6665.value);
this->_clearDepth_v128u32 = _mm_set1_epi32(clearAttributes.depth); this->_clearDepth_v128u32 = _mm_set1_epi32(clearAttributes.depth);
this->_clearAttrOpaquePolyID_v128u8 = _mm_set1_epi8(clearAttributes.opaquePolyID); this->_clearAttrOpaquePolyID_v128u8 = _mm_set1_epi8(clearAttributes.opaquePolyID);
this->_clearAttrTranslucentPolyID_v128u8 = _mm_set1_epi8(clearAttributes.translucentPolyID); this->_clearAttrTranslucentPolyID_v128u8 = _mm_set1_epi8(clearAttributes.translucentPolyID);
@ -2574,9 +2575,9 @@ void SoftRasterizerRenderer_SSE2::ClearUsingValues_Execute(const size_t startPix
#elif defined(ENABLE_NEON_A64) #elif defined(ENABLE_NEON_A64)
void SoftRasterizerRenderer_NEON::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) void SoftRasterizerRenderer_NEON::LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{ {
this->_clearColor_v128u32x4.val[0] = vdupq_n_u32(clearColor6665.color); this->_clearColor_v128u32x4.val[0] = vdupq_n_u32(clearColor6665.value);
this->_clearColor_v128u32x4.val[1] = this->_clearColor_v128u32x4.val[0]; this->_clearColor_v128u32x4.val[1] = this->_clearColor_v128u32x4.val[0];
this->_clearColor_v128u32x4.val[2] = this->_clearColor_v128u32x4.val[0]; this->_clearColor_v128u32x4.val[2] = this->_clearColor_v128u32x4.val[0];
this->_clearColor_v128u32x4.val[3] = this->_clearColor_v128u32x4.val[0]; this->_clearColor_v128u32x4.val[3] = this->_clearColor_v128u32x4.val[0];
@ -2642,9 +2643,9 @@ void SoftRasterizerRenderer_NEON::ClearUsingValues_Execute(const size_t startPix
#elif defined(ENABLE_ALTIVEC) #elif defined(ENABLE_ALTIVEC)
void SoftRasterizerRenderer_AltiVec::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) void SoftRasterizerRenderer_AltiVec::LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{ {
this->_clearColor_v128u32 = (v128u32){clearColor6665.color,clearColor6665.color,clearColor6665.color,clearColor6665.color}; this->_clearColor_v128u32 = (v128u32){clearColor6665.value,clearColor6665.value,clearColor6665.value,clearColor6665.value};
this->_clearDepth_v128u32 = (v128u32){clearAttributes.depth,clearAttributes.depth,clearAttributes.depth,clearAttributes.depth}; this->_clearDepth_v128u32 = (v128u32){clearAttributes.depth,clearAttributes.depth,clearAttributes.depth,clearAttributes.depth};
this->_clearAttrOpaquePolyID_v128u8 = (v128u8){clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID, this->_clearAttrOpaquePolyID_v128u8 = (v128u8){clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,

View File

@ -107,22 +107,22 @@ protected:
u8 _textureWrapMode; u8 _textureWrapMode;
Render3DError _SetupTexture(const POLY &thePoly, size_t polyRenderIndex); Render3DError _SetupTexture(const POLY &thePoly, size_t polyRenderIndex);
FORCEINLINE FragmentColor _sample(const float u, const float v); FORCEINLINE Color4u8 _sample(const float u, const float v);
FORCEINLINE float _round_s(double val); FORCEINLINE float _round_s(double val);
template<bool ISSHADOWPOLYGON> FORCEINLINE void _shade(const PolygonMode polygonMode, const FragmentColor src, FragmentColor &dst, const float texCoordU, const float texCoordV); template<bool ISSHADOWPOLYGON> FORCEINLINE void _shade(const PolygonMode polygonMode, const Color4u8 src, Color4u8 &dst, const float texCoordU, const float texCoordV);
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float z, float w); template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, float r, float g, float b, float invu, float invv, float z, float w);
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight);
template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right); template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right);
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w); template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w);
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight);
#endif #endif
template<int TYPE> FORCEINLINE void _rot_verts(); template<int TYPE> FORCEINLINE void _rot_verts();
template<bool ISFRONTFACING, int TYPE> void _sort_verts(); template<bool ISFRONTFACING, int TYPE> void _sort_verts();
template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type); template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type);
public: public:
void SetSLI(u32 startLine, u32 endLine, bool debug); void SetSLI(u32 startLine, u32 endLine, bool debug);
@ -160,7 +160,7 @@ protected:
size_t _customPixelsPerThread; size_t _customPixelsPerThread;
u8 _fogTable[32768]; u8 _fogTable[32768];
FragmentColor _edgeMarkTable[8]; Color4u8 _edgeMarkTable[8];
bool _edgeMarkDisabled[8]; bool _edgeMarkDisabled[8];
bool _renderGeometryNeedsFinish; bool _renderGeometryNeedsFinish;
@ -178,11 +178,11 @@ protected:
virtual Render3DError EndRender(); virtual Render3DError EndRender();
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID); virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID);
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes);
public: public:
int _debug_drawClippedUserPoly; int _debug_drawClippedUserPoly;
CACHE_ALIGN FragmentColor toonColor32LUT[32]; CACHE_ALIGN Color4u8 toonColor32LUT[32];
FragmentAttributesBuffer *_framebufferAttributes; FragmentAttributesBuffer *_framebufferAttributes;
GFX3D_State *currentRenderState; GFX3D_State *currentRenderState;
@ -211,8 +211,8 @@ template <size_t SIMDBYTES>
class SoftRasterizer_SIMD : public SoftRasterizerRenderer class SoftRasterizer_SIMD : public SoftRasterizerRenderer
{ {
protected: protected:
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) = 0; virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) = 0;
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes);
public: public:
SoftRasterizer_SIMD(); SoftRasterizer_SIMD();
@ -233,7 +233,7 @@ protected:
v256u8 _clearAttrIsTranslucentPoly_v256u8; v256u8 _clearAttrIsTranslucentPoly_v256u8;
v256u8 _clearAttrPolyFacing_v256u8; v256u8 _clearAttrPolyFacing_v256u8;
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes);
public: public:
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
@ -252,7 +252,7 @@ protected:
v128u8 _clearAttrIsTranslucentPoly_v128u8; v128u8 _clearAttrIsTranslucentPoly_v128u8;
v128u8 _clearAttrPolyFacing_v128u8; v128u8 _clearAttrPolyFacing_v128u8;
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes);
public: public:
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
@ -271,7 +271,7 @@ protected:
uint8x16x4_t _clearAttrIsTranslucentPoly_v128u8x4; uint8x16x4_t _clearAttrIsTranslucentPoly_v128u8x4;
uint8x16x4_t _clearAttrPolyFacing_v128u8x4; uint8x16x4_t _clearAttrPolyFacing_v128u8x4;
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes);
public: public:
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
@ -290,7 +290,7 @@ protected:
v128u8 _clearAttrIsTranslucentPoly_v128u8; v128u8 _clearAttrIsTranslucentPoly_v128u8;
v128u8 _clearAttrPolyFacing_v128u8; v128u8 _clearAttrPolyFacing_v128u8;
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes);
public: public:
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);

View File

@ -208,7 +208,7 @@ Render3D::Render3D()
_framebufferHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT; _framebufferHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT;
_framebufferPixCount = _framebufferWidth * _framebufferHeight; _framebufferPixCount = _framebufferWidth * _framebufferHeight;
_framebufferSIMDPixCount = 0; _framebufferSIMDPixCount = 0;
_framebufferColorSizeBytes = _framebufferWidth * _framebufferHeight * sizeof(FragmentColor); _framebufferColorSizeBytes = _framebufferWidth * _framebufferHeight * sizeof(Color4u8);
_framebufferColor = NULL; _framebufferColor = NULL;
_internalRenderingFormat = NDSColorFormat_BGR666_Rev; _internalRenderingFormat = NDSColorFormat_BGR666_Rev;
@ -282,7 +282,7 @@ std::string Render3D::GetName()
return this->_deviceInfo.renderName; return this->_deviceInfo.renderName;
} }
FragmentColor* Render3D::GetFramebuffer() Color4u8* Render3D::GetFramebuffer()
{ {
return this->_framebufferColor; return this->_framebufferColor;
} }
@ -312,7 +312,7 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h)
this->_framebufferWidth = w; this->_framebufferWidth = w;
this->_framebufferHeight = h; this->_framebufferHeight = h;
this->_framebufferPixCount = w * h; this->_framebufferPixCount = w * h;
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor); this->_framebufferColorSizeBytes = w * h * sizeof(Color4u8);
this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferMain(); // Just use the buffer that is already present on the main GPU engine this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferMain(); // Just use the buffer that is already present on the main GPU engine
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
@ -466,7 +466,7 @@ Render3DError Render3D::EndRender()
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16) Render3DError Render3D::FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16)
{ {
if ( (dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL) ) if ( (dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL) )
{ {
@ -486,7 +486,7 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) || else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) ) ((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
{ {
memcpy(dstFramebufferMain, srcFramebuffer, this->_framebufferPixCount * sizeof(FragmentColor)); memcpy(dstFramebufferMain, srcFramebuffer, this->_framebufferPixCount * sizeof(Color4u8));
} }
this->_renderNeedsFlushMain = false; this->_renderNeedsFlushMain = false;
@ -648,7 +648,7 @@ Render3DError Render3D::ClearUsingImage(const u16 *__restrict colorBuffer, const
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
Render3DError Render3D::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) Render3DError Render3D::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{ {
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
@ -670,7 +670,7 @@ Render3DError Render3D::Reset()
memset(this->_framebufferColor, 0, this->_framebufferColorSizeBytes); memset(this->_framebufferColor, 0, this->_framebufferColorSizeBytes);
} }
this->_clearColor6665.color = 0; this->_clearColor6665.value = 0;
memset(&this->_clearAttributes, 0, sizeof(FragmentAttributes)); memset(&this->_clearAttributes, 0, sizeof(FragmentAttributes));
this->_renderNeedsFinish = false; this->_renderNeedsFinish = false;
@ -703,7 +703,7 @@ Render3DError Render3D::Render(const GFX3D_State &renderState, const GFX3D_Geome
this->_isPoweredOn = true; this->_isPoweredOn = true;
const u32 clearColorSwapped = LE_TO_LOCAL_32(renderState.clearColor); const u32 clearColorSwapped = LE_TO_LOCAL_32(renderState.clearColor);
this->_clearColor6665.color = LE_TO_LOCAL_32( COLOR555TO6665(clearColorSwapped & 0x7FFF, (clearColorSwapped >> 16) & 0x1F) ); this->_clearColor6665.value = LE_TO_LOCAL_32( COLOR555TO6665(clearColorSwapped & 0x7FFF, (clearColorSwapped >> 16) & 0x1F) );
this->_clearAttributes.opaquePolyID = (clearColorSwapped >> 24) & 0x3F; this->_clearAttributes.opaquePolyID = (clearColorSwapped >> 24) & 0x3F;
//special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display //special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display

View File

@ -156,9 +156,9 @@ protected:
size_t _framebufferPixCount; size_t _framebufferPixCount;
size_t _framebufferSIMDPixCount; size_t _framebufferSIMDPixCount;
size_t _framebufferColorSizeBytes; size_t _framebufferColorSizeBytes;
FragmentColor *_framebufferColor; Color4u8 *_framebufferColor;
FragmentColor _clearColor6665; Color4u8 _clearColor6665;
FragmentAttributes _clearAttributes; FragmentAttributes _clearAttributes;
NDSColorFormat _internalRenderingFormat; NDSColorFormat _internalRenderingFormat;
@ -203,10 +203,10 @@ protected:
virtual Render3DError RenderGeometry(); virtual Render3DError RenderGeometry();
virtual Render3DError PostprocessFramebuffer(); virtual Render3DError PostprocessFramebuffer();
virtual Render3DError EndRender(); virtual Render3DError EndRender();
virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16); virtual Render3DError FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16);
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID); virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID);
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes);
virtual Render3DError SetupTexture(const POLY &thePoly, size_t polyRenderIndex); virtual Render3DError SetupTexture(const POLY &thePoly, size_t polyRenderIndex);
virtual Render3DError SetupViewport(const GFX3D_Viewport viewport); virtual Render3DError SetupViewport(const GFX3D_Viewport viewport);
@ -258,7 +258,7 @@ public:
virtual NDSColorFormat GetColorFormat() const; // The output color format of the 3D renderer. virtual NDSColorFormat GetColorFormat() const; // The output color format of the 3D renderer.
virtual FragmentColor* GetFramebuffer(); virtual Color4u8* GetFramebuffer();
bool GetRenderNeedsFinish() const; bool GetRenderNeedsFinish() const;
void SetRenderNeedsFinish(const bool renderNeedsFinish); void SetRenderNeedsFinish(const bool renderNeedsFinish);

View File

@ -278,6 +278,15 @@ typedef __vector unsigned short v128u16;
typedef __vector signed short v128s16; typedef __vector signed short v128s16;
typedef __vector unsigned int v128u32; typedef __vector unsigned int v128u32;
typedef __vector signed int v128s32; typedef __vector signed int v128s32;
typedef __vector float v128f32;
#define AVAILABLE_TYPE_v128u8
#define AVAILABLE_TYPE_v128s8
#define AVAILABLE_TYPE_v128u16
#define AVAILABLE_TYPE_v128s16
#define AVAILABLE_TYPE_v128u32
#define AVAILABLE_TYPE_v128s32
#define AVAILABLE_TYPE_v128f32
#endif #endif
#ifdef ENABLE_NEON_A64 #ifdef ENABLE_NEON_A64
@ -288,6 +297,22 @@ typedef uint16x8_t v128u16;
typedef int16x8_t v128s16; typedef int16x8_t v128s16;
typedef uint32x4_t v128u32; typedef uint32x4_t v128u32;
typedef int32x4_t v128s32; typedef int32x4_t v128s32;
typedef float32x4_t v128f32;
#define AVAILABLE_TYPE_v128u8
#define AVAILABLE_TYPE_v128s8
#define AVAILABLE_TYPE_v128u16
#define AVAILABLE_TYPE_v128s16
#define AVAILABLE_TYPE_v128u32
#define AVAILABLE_TYPE_v128s32
#define AVAILABLE_TYPE_v128f32
#endif
#ifdef ENABLE_SSE
#include <immintrin.h>
#include <xmmintrin.h>
typedef __m128 v128f32;
#define AVAILABLE_TYPE_v128f32
#endif #endif
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
@ -298,11 +323,22 @@ typedef __m128i v128u16;
typedef __m128i v128s16; typedef __m128i v128s16;
typedef __m128i v128u32; typedef __m128i v128u32;
typedef __m128i v128s32; typedef __m128i v128s32;
#define AVAILABLE_TYPE_v128u8
#define AVAILABLE_TYPE_v128s8
#define AVAILABLE_TYPE_v128u16
#define AVAILABLE_TYPE_v128s16
#define AVAILABLE_TYPE_v128u32
#define AVAILABLE_TYPE_v128s32
#endif #endif
#if defined(ENABLE_AVX) || defined(ENABLE_AVX512_0) #if defined(ENABLE_AVX) || defined(ENABLE_AVX2) || defined(ENABLE_AVX512_0)
#include <immintrin.h> #include <immintrin.h>
typedef __m256 v256f32;
#define AVAILABLE_TYPE_v256f32
#if defined(ENABLE_AVX2) || defined(ENABLE_AVX512_0)
typedef __m256i v256u8; typedef __m256i v256u8;
typedef __m256i v256s8; typedef __m256i v256s8;
typedef __m256i v256u16; typedef __m256i v256u16;
@ -310,6 +346,14 @@ typedef __m256i v256s16;
typedef __m256i v256u32; typedef __m256i v256u32;
typedef __m256i v256s32; typedef __m256i v256s32;
#define AVAILABLE_TYPE_v256u8
#define AVAILABLE_TYPE_v256s8
#define AVAILABLE_TYPE_v256u16
#define AVAILABLE_TYPE_v256s16
#define AVAILABLE_TYPE_v256u32
#define AVAILABLE_TYPE_v256s32
#endif // defined(ENABLE_AVX2) || defined(ENABLE_AVX512_0)
#if defined(ENABLE_AVX512_0) #if defined(ENABLE_AVX512_0)
typedef __m512i v512u8; typedef __m512i v512u8;
typedef __m512i v512s8; typedef __m512i v512s8;
@ -317,9 +361,18 @@ typedef __m512i v512u16;
typedef __m512i v512s16; typedef __m512i v512s16;
typedef __m512i v512u32; typedef __m512i v512u32;
typedef __m512i v512s32; typedef __m512i v512s32;
#endif typedef __m512 v512f32;
#endif // defined(ENABLE_AVX) || defined(ENABLE_AVX512_0) #define AVAILABLE_TYPE_v512u8
#define AVAILABLE_TYPE_v512s8
#define AVAILABLE_TYPE_v512u16
#define AVAILABLE_TYPE_v512s16
#define AVAILABLE_TYPE_v512u32
#define AVAILABLE_TYPE_v512s32
#define AVAILABLE_TYPE_v512f32
#endif // defined(ENABLE_AVX512_0)
#endif // defined(ENABLE_AVX) || defined(ENABLE_AVX2) || defined(ENABLE_AVX512_0)
/*---------- GPU3D fixed-points types -----------*/ /*---------- GPU3D fixed-points types -----------*/
@ -350,6 +403,197 @@ typedef s16 v10;
#define floattov10(n) ((v10)((n) * (1 << 9))) #define floattov10(n) ((v10)((n) * (1 << 9)))
#define v10tofloat(n) (((float)(n)) / (float)(1<<9)) #define v10tofloat(n) (((float)(n)) / (float)(1<<9))
union Vector2s16
{
s16 vec[2];
s16 coord[2];
struct { s16 s, t; };
struct { s16 u, v; };
struct { s16 x, y; } XY;
struct { s16 y, z; } YZ;
struct { s16 x, z; } XZ;
u32 value;
};
typedef union Vector2s16 Vector2s16;
union Vector3s16
{
s16 vec[3];
s16 coord[3];
struct { s16 x, y, z; };
};
typedef union Vector3s16 Vector3s16;
union Vector4s16
{
s16 vec[4];
s16 coord[4];
struct { s16 x, y, z, w; };
struct
{
Vector3s16 vec3;
s16 :16;
};
u64 value;
};
typedef union Vector4s16 Vector4s16;
union Vector2s32
{
s32 vec[2];
s32 coord[2];
struct { s32 s, t; };
struct { s32 u, v; };
struct { s32 x, y; } XY;
struct { s32 y, z; } YZ;
struct { s32 x, z; } XZ;
u64 value;
};
typedef union Vector2s32 Vector2s32;
union Vector3s32
{
s32 vec[3];
s32 coord[3];
struct { s32 x, y, z; };
};
typedef union Vector3s32 Vector3s32;
union Vector4s32
{
s32 vec[4];
s32 coord[4];
struct { s32 x, y, z, w; };
struct
{
Vector3s32 vec3;
s32 :32;
};
};
typedef union Vector4s32 Vector4s32;
union Vector2s64
{
s64 vec[2];
s64 coord[2];
struct { s64 s, t; };
struct { s64 u, v; };
struct { s64 x, y; } XY;
struct { s64 y, z; } YZ;
struct { s64 x, z; } XZ;
};
typedef union Vector2s64 Vector2s64;
union Vector3s64
{
s64 vec[3];
s64 coord[3];
struct { s64 x, y, z; };
};
typedef union Vector3s64 Vector3s64;
union Vector4s64
{
s64 vec[4];
s64 coord[4];
struct { s64 x, y, z, w; };
struct
{
Vector3s64 vec3;
s64 :64;
};
};
typedef union Vector4s64 Vector4s64;
union Vector2f32
{
float vec[2];
float coord[2];
struct { float s, t; };
struct { float u, v; };
struct { float x, y; } XY;
struct { float y, z; } YZ;
struct { float x, z; } XZ;
};
typedef union Vector2f32 Vector2f32;
union Vector3f32
{
float vec[3];
float coord[3];
struct { float x, y, z; };
};
typedef union Vector3f32 Vector3f32;
union Vector4f32
{
float vec[4];
float coord[4];
struct { float x, y, z, w; };
struct
{
Vector3f32 vec3;
float ignore;
};
};
typedef union Vector4f32 Vector4f32;
union Color4u8
{
u8 component[4];
struct { u8 r, g, b, a; };
u32 value;
};
typedef union Color4u8 Color4u8;
union Color3s32
{
s32 component[3];
struct { s32 r, g, b; };
};
typedef union Color3s32 Color3s32;
union Color4s32
{
s32 component[4];
struct { s32 r, g, b, a; };
struct
{
Color3s32 color3;
s32 alpha;
};
};
typedef union Color4s32 Color4s32;
union Color3f32
{
float component[3];
struct { float r, g, b; };
};
typedef union Color3f32 Color3f32;
union Color4f32
{
float component[4];
struct { float r, g, b, a; };
struct
{
Color3f32 color3;
float alpha;
};
};
typedef union Color4f32 Color4f32;
/*----------------------*/ /*----------------------*/
#ifndef OBJ_C #ifndef OBJ_C

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2016-2022 DeSmuME team Copyright (C) 2016-2023 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -753,10 +753,10 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi
#endif #endif
for (; i < pixCount; i++) for (; i < pixCount; i++)
{ {
FragmentColor dstColor; Color4u8 dstColor;
dstColor.color = dst[i]; dstColor.value = dst[i];
FragmentColor &outColor = (FragmentColor &)dst[i]; Color4u8 &outColor = (Color4u8 &)dst[i];
outColor.r = dstColor.b; outColor.r = dstColor.b;
outColor.b = dstColor.r; outColor.b = dstColor.r;
} }
@ -786,10 +786,10 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi
#endif #endif
for (; i < pixCount; i++) for (; i < pixCount; i++)
{ {
FragmentColor dstColor; Color4u8 dstColor;
dstColor.color = dst[i]; dstColor.value = dst[i];
FragmentColor &outColor = (FragmentColor &)dst[i]; Color4u8 &outColor = (Color4u8 &)dst[i];
outColor.r = (u8)( ((u16)dstColor.b * intensity_u16) >> 16 ); outColor.r = (u8)( ((u16)dstColor.b * intensity_u16) >> 16 );
outColor.g = (u8)( ((u16)dstColor.g * intensity_u16) >> 16 ); outColor.g = (u8)( ((u16)dstColor.g * intensity_u16) >> 16 );
outColor.b = (u8)( ((u16)dstColor.r * intensity_u16) >> 16 ); outColor.b = (u8)( ((u16)dstColor.r * intensity_u16) >> 16 );
@ -802,7 +802,7 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi
#endif #endif
for (; i < pixCount; i++) for (; i < pixCount; i++)
{ {
FragmentColor &outColor = (FragmentColor &)dst[i]; Color4u8 &outColor = (Color4u8 &)dst[i];
outColor.r = (u8)( ((u16)outColor.r * intensity_u16) >> 16 ); outColor.r = (u8)( ((u16)outColor.r * intensity_u16) >> 16 );
outColor.g = (u8)( ((u16)outColor.g * intensity_u16) >> 16 ); outColor.g = (u8)( ((u16)outColor.g * intensity_u16) >> 16 );
outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 ); outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 );
@ -1333,7 +1333,7 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32(u32 *dst, size_t pixCount, fl
for (; i < pixCount; i++) for (; i < pixCount; i++)
{ {
FragmentColor &outColor = (FragmentColor &)dst[i]; Color4u8 &outColor = (Color4u8 &)dst[i];
outColor.r = (u8)( ((u16)outColor.r * intensity_u16) >> 16 ); outColor.r = (u8)( ((u16)outColor.r * intensity_u16) >> 16 );
outColor.g = (u8)( ((u16)outColor.g * intensity_u16) >> 16 ); outColor.g = (u8)( ((u16)outColor.g * intensity_u16) >> 16 );
outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 ); outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 );
@ -1350,10 +1350,10 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCo
{ {
for (; i < pixCount; i++) for (; i < pixCount; i++)
{ {
FragmentColor dstColor; Color4u8 dstColor;
dstColor.color = dst[i]; dstColor.value = dst[i];
FragmentColor &outColor = (FragmentColor &)dst[i]; Color4u8 &outColor = (Color4u8 &)dst[i];
outColor.r = dstColor.b; outColor.r = dstColor.b;
outColor.b = dstColor.r; outColor.b = dstColor.r;
} }
@ -1374,10 +1374,10 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCo
for (; i < pixCount; i++) for (; i < pixCount; i++)
{ {
FragmentColor dstColor; Color4u8 dstColor;
dstColor.color = dst[i]; dstColor.value = dst[i];
FragmentColor &outColor = (FragmentColor &)dst[i]; Color4u8 &outColor = (Color4u8 &)dst[i];
outColor.r = (u8)( ((u16)dstColor.b * intensity_u16) >> 16 ); outColor.r = (u8)( ((u16)dstColor.b * intensity_u16) >> 16 );
outColor.g = (u8)( ((u16)dstColor.g * intensity_u16) >> 16 ); outColor.g = (u8)( ((u16)dstColor.g * intensity_u16) >> 16 );
outColor.b = (u8)( ((u16)dstColor.r * intensity_u16) >> 16 ); outColor.b = (u8)( ((u16)dstColor.r * intensity_u16) >> 16 );

View File

@ -79,18 +79,6 @@ enum NDSColorFormat
NDSColorFormat_BGR888_Rev = 0x20008208 NDSColorFormat_BGR888_Rev = 0x20008208
}; };
union FragmentColor
{
u8 component[4];
struct
{
u8 r,g,b,a;
};
u32 color;
};
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32]; extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_6bit[64]; // Padded for vector lookup table routines. Only the first 32 indices are valid. Data is mirrored across 256-bit lanes. extern CACHE_ALIGN const u8 material_5bit_to_6bit[64]; // Padded for vector lookup table routines. Only the first 32 indices are valid. Data is mirrored across 256-bit lanes.
extern CACHE_ALIGN const u8 material_5bit_to_8bit[64]; // Padded for vector lookup table routines. Only the first 32 indices are valid. Data is mirrored across 256-bit lanes. extern CACHE_ALIGN const u8 material_5bit_to_8bit[64]; // Padded for vector lookup table routines. Only the first 32 indices are valid. Data is mirrored across 256-bit lanes.
@ -139,49 +127,49 @@ FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src)
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(FragmentColor srcColor) FORCEINLINE u32 ColorspaceConvert8888To6665(Color4u8 srcColor)
{ {
FragmentColor outColor; Color4u8 outColor;
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2; outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
outColor.g = srcColor.g >> 2; outColor.g = srcColor.g >> 2;
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2; outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
outColor.a = srcColor.a >> 3; outColor.a = srcColor.a >> 3;
return outColor.color; return outColor.value;
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor) FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor)
{ {
FragmentColor srcColorComponent; Color4u8 srcColorComponent;
srcColorComponent.color = srcColor; srcColorComponent.value = srcColor;
return ColorspaceConvert8888To6665<SWAP_RB>(srcColorComponent); return ColorspaceConvert8888To6665<SWAP_RB>(srcColorComponent);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert6665To8888(FragmentColor srcColor) FORCEINLINE u32 ColorspaceConvert6665To8888(Color4u8 srcColor)
{ {
FragmentColor outColor; Color4u8 outColor;
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)]; outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
outColor.g = material_6bit_to_8bit[srcColor.g]; outColor.g = material_6bit_to_8bit[srcColor.g];
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)]; outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
outColor.a = material_5bit_to_8bit[srcColor.a]; outColor.a = material_5bit_to_8bit[srcColor.a];
return outColor.color; return outColor.value;
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor) FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor)
{ {
FragmentColor srcColorComponent; Color4u8 srcColorComponent;
srcColorComponent.color = srcColor; srcColorComponent.value = srcColor;
return ColorspaceConvert6665To8888<SWAP_RB>(srcColorComponent); return ColorspaceConvert6665To8888<SWAP_RB>(srcColorComponent);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor) FORCEINLINE u16 ColorspaceConvert8888To5551(Color4u8 srcColor)
{ {
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 ); return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
} }
@ -189,14 +177,14 @@ FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor)
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor) FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor)
{ {
FragmentColor srcColorComponent; Color4u8 srcColorComponent;
srcColorComponent.color = srcColor; srcColorComponent.value = srcColor;
return ColorspaceConvert8888To5551<SWAP_RB>(srcColorComponent); return ColorspaceConvert8888To5551<SWAP_RB>(srcColorComponent);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor) FORCEINLINE u16 ColorspaceConvert6665To5551(Color4u8 srcColor)
{ {
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000); return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
} }
@ -204,35 +192,35 @@ FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor)
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor) FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor)
{ {
FragmentColor srcColorComponent; Color4u8 srcColorComponent;
srcColorComponent.color = srcColor; srcColorComponent.value = srcColor;
return ColorspaceConvert6665To5551<SWAP_RB>(srcColorComponent); return ColorspaceConvert6665To5551<SWAP_RB>(srcColorComponent);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert888XTo8888Opaque(FragmentColor srcColor) FORCEINLINE u32 ColorspaceConvert888XTo8888Opaque(Color4u8 srcColor)
{ {
FragmentColor outColor; Color4u8 outColor;
outColor.r = (SWAP_RB) ? srcColor.b : srcColor.r; outColor.r = (SWAP_RB) ? srcColor.b : srcColor.r;
outColor.g = srcColor.g; outColor.g = srcColor.g;
outColor.b = (SWAP_RB) ? srcColor.r : srcColor.b; outColor.b = (SWAP_RB) ? srcColor.r : srcColor.b;
outColor.a = 0xFF; outColor.a = 0xFF;
return outColor.color; return outColor.value;
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert888XTo8888Opaque(u32 srcColor) FORCEINLINE u32 ColorspaceConvert888XTo8888Opaque(u32 srcColor)
{ {
FragmentColor srcColorComponent; Color4u8 srcColorComponent;
srcColorComponent.color = srcColor; srcColorComponent.value = srcColor;
return ColorspaceConvert888XTo8888Opaque<SWAP_RB>(srcColorComponent); return ColorspaceConvert888XTo8888Opaque<SWAP_RB>(srcColorComponent);
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert888XTo888(FragmentColor srcColor, u8 *dst) FORCEINLINE void ColorspaceConvert888XTo888(Color4u8 srcColor, u8 *dst)
{ {
dst[0] = (SWAP_RB) ? srcColor.b : srcColor.r; dst[0] = (SWAP_RB) ? srcColor.b : srcColor.r;
dst[1] = srcColor.g; dst[1] = srcColor.g;
@ -242,8 +230,8 @@ FORCEINLINE void ColorspaceConvert888XTo888(FragmentColor srcColor, u8 *dst)
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert888XTo888(u32 srcColor, u8 *dst) FORCEINLINE void ColorspaceConvert888XTo888(u32 srcColor, u8 *dst)
{ {
FragmentColor srcColorComponent; Color4u8 srcColorComponent;
srcColorComponent.color = srcColor; srcColorComponent.value = srcColor;
ColorspaceConvert888XTo888<SWAP_RB>(srcColorComponent, dst); ColorspaceConvert888XTo888<SWAP_RB>(srcColorComponent, dst);
} }
@ -251,8 +239,8 @@ FORCEINLINE void ColorspaceConvert888XTo888(u32 srcColor, u8 *dst)
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555XTo888(u16 srcColor, u8 *dst) FORCEINLINE void ColorspaceConvert555XTo888(u16 srcColor, u8 *dst)
{ {
FragmentColor srcColorComponent; Color4u8 srcColorComponent;
srcColorComponent.color = ColorspaceConvert555To8888Opaque<SWAP_RB>(srcColor); srcColorComponent.value = ColorspaceConvert555To8888Opaque<SWAP_RB>(srcColor);
ColorspaceConvert888XTo888<false>(srcColorComponent, dst); ColorspaceConvert888XTo888<false>(srcColorComponent, dst);
} }
@ -264,22 +252,22 @@ FORCEINLINE u16 ColorspaceCopy16(u16 srcColor)
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceCopy32(FragmentColor srcColor) FORCEINLINE u32 ColorspaceCopy32(Color4u8 srcColor)
{ {
FragmentColor outColor; Color4u8 outColor;
outColor.r = (SWAP_RB) ? srcColor.b : srcColor.r; outColor.r = (SWAP_RB) ? srcColor.b : srcColor.r;
outColor.g = srcColor.g; outColor.g = srcColor.g;
outColor.b = (SWAP_RB) ? srcColor.r : srcColor.b; outColor.b = (SWAP_RB) ? srcColor.r : srcColor.b;
outColor.a = srcColor.a; outColor.a = srcColor.a;
return outColor.color; return outColor.value;
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceCopy32(u32 srcColor) FORCEINLINE u32 ColorspaceCopy32(u32 srcColor)
{ {
FragmentColor srcColorComponent; Color4u8 srcColorComponent;
srcColorComponent.color = srcColor; srcColorComponent.value = srcColor;
return ColorspaceCopy32<SWAP_RB>(srcColorComponent); return ColorspaceCopy32<SWAP_RB>(srcColorComponent);
} }
@ -308,9 +296,9 @@ FORCEINLINE u16 ColorspaceApplyIntensity16(u16 srcColor, float intensity)
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceApplyIntensity32(FragmentColor srcColor, float intensity) FORCEINLINE u32 ColorspaceApplyIntensity32(Color4u8 srcColor, float intensity)
{ {
FragmentColor outColor; Color4u8 outColor;
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r); outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r);
outColor.g = srcColor.g; outColor.g = srcColor.g;
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b); outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b);
@ -318,11 +306,11 @@ FORCEINLINE u32 ColorspaceApplyIntensity32(FragmentColor srcColor, float intensi
if (intensity > 0.999f) if (intensity > 0.999f)
{ {
return outColor.color; return outColor.value;
} }
else if (intensity < 0.001f) else if (intensity < 0.001f)
{ {
return (outColor.color & 0xFF000000); return (outColor.value & 0xFF000000);
} }
const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF)); const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF));
@ -331,14 +319,14 @@ FORCEINLINE u32 ColorspaceApplyIntensity32(FragmentColor srcColor, float intensi
outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 ); outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 );
outColor.a = outColor.a; outColor.a = outColor.a;
return outColor.color; return outColor.value;
} }
template <bool SWAP_RB> template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceApplyIntensity32(u32 srcColor, float intensity) FORCEINLINE u32 ColorspaceApplyIntensity32(u32 srcColor, float intensity)
{ {
FragmentColor srcColorComponent; Color4u8 srcColorComponent;
srcColorComponent.color = srcColor; srcColorComponent.value = srcColor;
return ColorspaceApplyIntensity32<SWAP_RB>(srcColorComponent); return ColorspaceApplyIntensity32<SWAP_RB>(srcColorComponent);
} }
@ -427,11 +415,4 @@ public:
size_t ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const; size_t ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const;
}; };
FORCEINLINE FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
{
FragmentColor ret;
ret.r = r; ret.g = g; ret.b = b; ret.a = a;
return ret;
}
#endif /* COLORSPACEHANDLER_H */ #endif /* COLORSPACEHANDLER_H */