From cda8cb5686061d0bb7e5e5b023abc2ef51352ba8 Mon Sep 17 00:00:00 2001 From: rogerman Date: Mon, 27 Feb 2023 15:39:17 -0800 Subject: [PATCH] Core: Move low-level vector, vertex coordinate, and color data type declarations into types.h so that they can be used universally. Also update the data type names to be more descriptive and have better consistency. - Add SIMD-float32 data types, and also add macros to track SIMD data-type availability. - Also fix some bugs where 3D would fail to render on big-endian systems. (Regression from commit a67e040.) --- desmume/src/GPU.cpp | 183 +++++------ desmume/src/GPU.h | 40 +-- desmume/src/GPU_Operations.cpp | 86 ++--- desmume/src/GPU_Operations.h | 30 +- desmume/src/GPU_Operations_AVX2.cpp | 14 +- desmume/src/GPU_Operations_SSE2.cpp | 14 +- desmume/src/OGLRender.cpp | 50 +-- desmume/src/OGLRender.h | 27 +- desmume/src/OGLRender_3_2.cpp | 6 +- desmume/src/OGLRender_3_2.h | 2 +- desmume/src/gfx3d.cpp | 203 ++++++------ desmume/src/gfx3d.h | 308 ++++-------------- desmume/src/rasterize.cpp | 137 ++++---- desmume/src/rasterize.h | 34 +- desmume/src/render3D.cpp | 16 +- desmume/src/render3D.h | 10 +- desmume/src/types.h | 250 +++++++++++++- .../colorspacehandler/colorspacehandler.cpp | 30 +- .../colorspacehandler/colorspacehandler.h | 95 +++--- 19 files changed, 789 insertions(+), 746 deletions(-) diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index b444afa41..66696443d 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -206,7 +206,7 @@ GPUEngineBase::GPUEngineBase() _asyncClearLineCustom = 0; _asyncClearInterrupt = 0; _asyncClearBackdropColor16 = 0; - _asyncClearBackdropColor32.color = 0; + _asyncClearBackdropColor32.value = 0; _asyncClearIsRunning = false; _asyncClearUseInternalCustomBuffer = false; @@ -375,7 +375,7 @@ void GPUEngineBase::Reset() renderState.selectedBGLayer = &this->_BGLayer[GPULayerID_BG0]; renderState.backdropColor16 = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; renderState.workingBackdropColor16 = renderState.backdropColor16; - renderState.workingBackdropColor32.color = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) ); + renderState.workingBackdropColor32.value = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) ); renderState.colorEffect = (ColorEffect)this->_IORegisterMap->BLDCNT.ColorEffect; renderState.blendEVA = 0; renderState.blendEVB = 0; @@ -693,7 +693,7 @@ void GPUEngineBase::RenderLineClearAsync() case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR888_Rev: - memset_u32(targetBufferHead + (lineInfo.blockOffsetCustom * sizeof(FragmentColor)), this->_asyncClearBackdropColor32.color, lineInfo.pixelCount); + memset_u32(targetBufferHead + (lineInfo.blockOffsetCustom * sizeof(Color4u8)), this->_asyncClearBackdropColor32.value, lineInfo.pixelCount); break; } @@ -716,9 +716,9 @@ void GPUEngineBase::RenderLineClearAsync() } void GPUEngineBase::RenderLineClearAsyncStart(bool willClearInternalCustomBuffer, - s32 startLineIndex, + size_t startLineIndex, u16 clearColor16, - FragmentColor clearColor32) + Color4u8 clearColor32) { if (this->_asyncClearTask == NULL) { @@ -727,7 +727,7 @@ void GPUEngineBase::RenderLineClearAsyncStart(bool willClearInternalCustomBuffer this->RenderLineClearAsyncFinish(); - this->_asyncClearLineCustom = startLineIndex; + this->_asyncClearLineCustom = (s32)startLineIndex; this->_asyncClearBackdropColor16 = clearColor16; this->_asyncClearBackdropColor32 = clearColor32; this->_asyncClearUseInternalCustomBuffer = willClearInternalCustomBuffer; @@ -750,9 +750,10 @@ void GPUEngineBase::RenderLineClearAsyncFinish() this->_asyncClearInterrupt = 0; } -void GPUEngineBase::RenderLineClearAsyncWaitForCustomLine(const s32 l) +void GPUEngineBase::RenderLineClearAsyncWaitForCustomLine(const size_t l) { - while (l >= atomic_and_barrier32(&this->_asyncClearLineCustom, 0x000000FF)) + const s32 lineCompare = (s32)l; + while (lineCompare >= atomic_and_barrier32(&this->_asyncClearLineCustom, 0x000000FF)) { // Do nothing -- just spin. } @@ -847,7 +848,7 @@ void GPUEngineBase::UpdateRenderStates(const size_t l) { currRenderState.workingBackdropColor16 = currRenderState.backdropColor16; } - currRenderState.workingBackdropColor32.color = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) ); + currRenderState.workingBackdropColor32.value = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(currRenderState.workingBackdropColor16)) ); // Save the current render states to this line's compositor info. compInfo.renderState = currRenderState; @@ -1003,7 +1004,7 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR888_Rev: - memset_u32(compInfo.target.lineColorHeadCustom, compInfo.renderState.workingBackdropColor32.color, compInfo.line.pixelCount); + memset_u32(compInfo.target.lineColorHeadCustom, compInfo.renderState.workingBackdropColor32.value, compInfo.line.pixelCount); break; } } @@ -1251,7 +1252,7 @@ FORCEINLINE void GPUEngineBase::_CompositePixelImmediate(GPUEngineCompositorInfo compInfo.target.xCustom = _gpuDstPitchIndex[srcX]; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHeadNative + srcX; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative + srcX; - compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHeadNative + srcX; + compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHeadNative + srcX; const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] != 0) : true; pixelop.Composite16(compInfo, srcColor16, enableColorEffect, 0, 0); @@ -1277,12 +1278,12 @@ void GPUEngineBase::_PrecompositeNativeToCustomLineBG(GPUEngineCompositorInfo &c } template -void GPUEngineBase::_CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32) +void GPUEngineBase::_CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32) { compInfo.target.xNative = 0; compInfo.target.xCustom = 0; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; - compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; + compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; #ifdef USEMANUALVECTORIZATION @@ -1326,7 +1327,7 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, co compInfo.target.xNative = 0; compInfo.target.xCustom = 0; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; - compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; + compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; size_t i = 0; @@ -1366,7 +1367,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo compInfo.target.xNative = 0; compInfo.target.xCustom = 0; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; - compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; + compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; size_t i = 0; @@ -1395,7 +1396,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo } const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (colorEffectEnable[compInfo.target.xCustom] != 0) : true; - pixelop.Composite32(compInfo, ((FragmentColor *)vramColorPtr)[i], enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]); + pixelop.Composite32(compInfo, ((Color4u8 *)vramColorPtr)[i], enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]); } else { @@ -1829,7 +1830,7 @@ void GPUEngineBase::_RenderSprite16(GPUEngineCompositorInfo &compInfo, { for (size_t i = 0; i < length; i++, frameX++, spriteX+=readXStep) { - const u32 spriteX_word = spriteX >> 1; + const u32 spriteX_word = (u32)spriteX >> 1; const u32 palIndexAddress = objAddress + (spriteX_word & 0x0003) + ((spriteX_word & 0xFFFC) << 3); const u8 *__restrict palIndexBuffer = (u8 *)MMU_gpu_map(palIndexAddress); const u8 palIndex = *palIndexBuffer; @@ -1968,7 +1969,7 @@ void GPUEngineBase::SpriteRenderDebug(const u16 lineIndex, u16 *dst) compInfo.target.xCustom = 0; compInfo.target.lineColor = (void **)&compInfo.target.lineColor16; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative; - compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHeadNative; + compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHeadNative; compInfo.target.lineLayerID = NULL; this->_SpriteRender(compInfo, dst, NULL, NULL, &this->_sprPrio[lineIndex][0]); @@ -2100,7 +2101,7 @@ void GPUEngineBase::_SpriteRenderPerform(GPUEngineCompositorInfo &compInfo, u16 if (auxX >= 0 && auxY >= 0 && auxX < sprSize.width && auxY < sprSize.height) { - size_t objOffset = 0; + u32 objOffset = 0; if (DISPCNT.OBJ_BMP_2D_dim) { @@ -2262,7 +2263,7 @@ void GPUEngineBase::_RenderLine_Layers(GPUEngineCompositorInfo &compInfo) compInfo.target.xNative = 0; compInfo.target.xCustom = 0; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative; - compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHeadNative; + compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHeadNative; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; compInfo.renderState.previouslyRenderedLayerID = GPULayerID_Backdrop; @@ -2503,7 +2504,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item compInfo.target.xNative = srcX; compInfo.target.xCustom = _gpuDstPitchIndex[srcX]; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead + srcX; - compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead + srcX; + compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead + srcX; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead + srcX; const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[GPULayerID_OBJ][compInfo.target.xNative] != 0) : true; @@ -2522,7 +2523,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item for (size_t line = 0; line < compInfo.line.renderCount; line++) { compInfo.target.lineColor16 = (u16 *)dstColorPtr; - compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr; + compInfo.target.lineColor32 = (Color4u8 *)dstColorPtr; compInfo.target.lineLayerID = dstLayerIDPtr; for (size_t i = 0; i < item->nbPixelsX; i++) @@ -2542,14 +2543,14 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item const size_t dstX = compInfo.target.xCustom + p; compInfo.target.lineColor16 = (u16 *)dstColorPtr + dstX; - compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr + dstX; + compInfo.target.lineColor32 = (Color4u8 *)dstColorPtr + dstX; compInfo.target.lineLayerID = dstLayerIDPtr + dstX; const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[GPULayerID_OBJ][compInfo.target.xNative] != 0) : true; if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - pixelop.Composite32(compInfo, ((FragmentColor *)vramColorPtr)[dstX], enableColorEffect, this->_sprAlpha[compInfo.line.indexNative][srcX], this->_sprType[compInfo.line.indexNative][srcX]); + pixelop.Composite32(compInfo, ((Color4u8 *)vramColorPtr)[dstX], enableColorEffect, this->_sprAlpha[compInfo.line.indexNative][srcX], this->_sprType[compInfo.line.indexNative][srcX]); } else { @@ -2558,8 +2559,8 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item } } - vramColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)vramColorPtr + compInfo.line.widthCustom) : (void *)((u16 *)vramColorPtr + compInfo.line.widthCustom); - dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((FragmentColor *)dstColorPtr + compInfo.line.widthCustom); + vramColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)vramColorPtr + compInfo.line.widthCustom) : (void *)((u16 *)vramColorPtr + compInfo.line.widthCustom); + dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((Color4u8 *)dstColorPtr + compInfo.line.widthCustom); dstLayerIDPtr += compInfo.line.widthCustom; } } @@ -2568,7 +2569,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item for (size_t line = 0; line < compInfo.line.renderCount; line++) { compInfo.target.lineColor16 = (u16 *)dstColorPtr; - compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr; + compInfo.target.lineColor32 = (Color4u8 *)dstColorPtr; compInfo.target.lineLayerID = dstLayerIDPtr; for (size_t i = 0; i < item->nbPixelsX; i++) @@ -2588,7 +2589,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item const size_t dstX = compInfo.target.xCustom + p; compInfo.target.lineColor16 = (u16 *)dstColorPtr + dstX; - compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr + dstX; + compInfo.target.lineColor32 = (Color4u8 *)dstColorPtr + dstX; compInfo.target.lineLayerID = dstLayerIDPtr + dstX; const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[GPULayerID_OBJ][compInfo.target.xNative] != 0) : true; @@ -2596,7 +2597,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item } } - dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((FragmentColor *)dstColorPtr + compInfo.line.widthCustom); + dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((Color4u8 *)dstColorPtr + compInfo.line.widthCustom); dstLayerIDPtr += compInfo.line.widthCustom; } } @@ -2920,7 +2921,7 @@ void GPUEngineBase::RenderLayerBG(const GPULayerID layerID, u16 *dstColorBuffer) compInfo.target.xCustom = compInfo.target.xNative; compInfo.target.lineColor = (void **)&compInfo.target.lineColor16; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative; - compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHeadNative; + compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHeadNative; compInfo.target.lineLayerID = NULL; for (size_t lineIndex = 0; lineIndex < layerHeight; lineIndex++) @@ -3251,13 +3252,13 @@ GPUEngineA::GPUEngineA() _isLineCaptureNative[3][l] = true; } - _3DFramebufferMain = (FragmentColor *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(FragmentColor)); + _3DFramebufferMain = (Color4u8 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(Color4u8)); _3DFramebuffer16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16)); _captureWorkingDisplay16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); _captureWorkingA16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); _captureWorkingB16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); - _captureWorkingA32 = (FragmentColor *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(FragmentColor)); - _captureWorkingB32 = (FragmentColor *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(FragmentColor)); + _captureWorkingA32 = (Color4u8 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(Color4u8)); + _captureWorkingB32 = (Color4u8 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(Color4u8)); } GPUEngineA::~GPUEngineA() @@ -3290,13 +3291,13 @@ void GPUEngineA::Reset() const size_t customWidth = this->_targetDisplay->GetWidth(); const size_t customHeight = this->_targetDisplay->GetHeight(); - memset(this->_3DFramebufferMain, 0, customWidth * customHeight * sizeof(FragmentColor)); + memset(this->_3DFramebufferMain, 0, customWidth * customHeight * sizeof(Color4u8)); memset(this->_3DFramebuffer16, 0, customWidth * customHeight * sizeof(u16)); memset(this->_captureWorkingDisplay16, 0, customWidth * _gpuLargestDstLineCount * sizeof(u16)); memset(this->_captureWorkingA16, 0, customWidth * _gpuLargestDstLineCount * sizeof(u16)); memset(this->_captureWorkingB16, 0, customWidth * _gpuLargestDstLineCount * sizeof(u16)); - memset(this->_captureWorkingA32, 0, customWidth * _gpuLargestDstLineCount * sizeof(FragmentColor)); - memset(this->_captureWorkingB32, 0, customWidth * _gpuLargestDstLineCount * sizeof(FragmentColor)); + memset(this->_captureWorkingA32, 0, customWidth * _gpuLargestDstLineCount * sizeof(Color4u8)); + memset(this->_captureWorkingB32, 0, customWidth * _gpuLargestDstLineCount * sizeof(Color4u8)); memset(&this->_dispCapCnt, 0, sizeof(DISPCAPCNT_parsed)); this->_displayCaptureEnable = false; @@ -3382,7 +3383,7 @@ void GPUEngineA::ParseReg_DISPCAPCNT() this->_dispCapCnt.srcA, this->_dispCapCnt.srcB);*/ } -FragmentColor* GPUEngineA::Get3DFramebufferMain() const +Color4u8* GPUEngineA::Get3DFramebufferMain() const { return this->_3DFramebufferMain; } @@ -3406,30 +3407,30 @@ void GPUEngineA::AllocateWorkingBuffers(NDSColorFormat requestedColorFormat, siz { this->GPUEngineBase::AllocateWorkingBuffers(requestedColorFormat, w, h); - FragmentColor *old3DFramebufferMain = this->_3DFramebufferMain; + Color4u8 *old3DFramebufferMain = this->_3DFramebufferMain; u16 *old3DFramebuffer16 = this->_3DFramebuffer16; u16 *oldCaptureWorkingDisplay16 = this->_captureWorkingDisplay16; u16 *oldCaptureWorkingA16 = this->_captureWorkingA16; u16 *oldCaptureWorkingB16 = this->_captureWorkingB16; - FragmentColor *oldCaptureWorkingA32 = this->_captureWorkingA32; - FragmentColor *oldCaptureWorkingB32 = this->_captureWorkingB32; + Color4u8 *oldCaptureWorkingA32 = this->_captureWorkingA32; + Color4u8 *oldCaptureWorkingB32 = this->_captureWorkingB32; - this->_3DFramebufferMain = (FragmentColor *)malloc_alignedPage(w * h * sizeof(FragmentColor)); + this->_3DFramebufferMain = (Color4u8 *)malloc_alignedPage(w * h * sizeof(Color4u8)); this->_3DFramebuffer16 = (u16 *)malloc_alignedPage(w * h * sizeof(u16)); this->_captureWorkingDisplay16 = (u16 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(u16)); this->_captureWorkingA16 = (u16 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(u16)); this->_captureWorkingB16 = (u16 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(u16)); - this->_captureWorkingA32 = (FragmentColor *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(FragmentColor)); - this->_captureWorkingB32 = (FragmentColor *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(FragmentColor)); + this->_captureWorkingA32 = (Color4u8 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(Color4u8)); + this->_captureWorkingB32 = (Color4u8 *)malloc_alignedPage(w * _gpuLargestDstLineCount * sizeof(Color4u8)); const GPUEngineLineInfo &lineInfo = this->_currentCompositorInfo[GPU_VRAM_BLOCK_LINES].line; if (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR888_Rev) { - this->_VRAMCustomBlockPtr[0] = (FragmentColor *)GPU->GetCustomVRAMBuffer(); - this->_VRAMCustomBlockPtr[1] = (FragmentColor *)this->_VRAMCustomBlockPtr[0] + (1 * lineInfo.indexCustom * w); - this->_VRAMCustomBlockPtr[2] = (FragmentColor *)this->_VRAMCustomBlockPtr[0] + (2 * lineInfo.indexCustom * w); - this->_VRAMCustomBlockPtr[3] = (FragmentColor *)this->_VRAMCustomBlockPtr[0] + (3 * lineInfo.indexCustom * w); + this->_VRAMCustomBlockPtr[0] = (Color4u8 *)GPU->GetCustomVRAMBuffer(); + this->_VRAMCustomBlockPtr[1] = (Color4u8 *)this->_VRAMCustomBlockPtr[0] + (1 * lineInfo.indexCustom * w); + this->_VRAMCustomBlockPtr[2] = (Color4u8 *)this->_VRAMCustomBlockPtr[0] + (2 * lineInfo.indexCustom * w); + this->_VRAMCustomBlockPtr[3] = (Color4u8 *)this->_VRAMCustomBlockPtr[0] + (3 * lineInfo.indexCustom * w); } else { @@ -3575,7 +3576,7 @@ void GPUEngineA::RenderLine(const size_t l) template void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo) { - const FragmentColor *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer(); + const Color4u8 *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer(); if (framebuffer3D == NULL) { return; @@ -3590,12 +3591,12 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo) const u8 *colorEffectEnable = (CurrentRenderer->GetFramebufferWidth() == GPU_FRAMEBUFFER_NATIVE_WIDTH) ? this->_enableColorEffectNative[GPULayerID_BG0] : this->_enableColorEffectCustom[GPULayerID_BG0]; const float customWidthScale = (float)compInfo.line.widthCustom / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH; - const FragmentColor *__restrict srcLinePtr = framebuffer3D + compInfo.line.blockOffsetCustom; + const Color4u8 *__restrict srcLinePtr = framebuffer3D + compInfo.line.blockOffsetCustom; compInfo.target.xNative = 0; compInfo.target.xCustom = 0; compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; - compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; + compInfo.target.lineColor32 = (Color4u8 *)compInfo.target.lineColorHead; compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; // Horizontally offset the 3D layer by this amount. @@ -3879,7 +3880,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI } const u16 *vramCustom16 = (u16 *)GPU->GetCustomVRAMBlankBuffer(); - const FragmentColor *vramCustom32 = (FragmentColor *)GPU->GetCustomVRAMBlankBuffer(); + const Color4u8 *vramCustom32 = (Color4u8 *)GPU->GetCustomVRAMBlankBuffer(); if (!willReadNativeVRAM) { @@ -3890,7 +3891,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI } vramCustom16 = (u16 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + vramCustomOffset; - vramCustom32 = (FragmentColor *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + vramCustomOffset; + vramCustom32 = (Color4u8 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + vramCustomOffset; } if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) @@ -3903,9 +3904,9 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI } } - srcAPtr = (DISPCAPCNT.SrcA == 0) ? (FragmentColor *)compInfo.target.lineColorHead : (FragmentColor *)CurrentRenderer->GetFramebuffer() + compInfo.line.blockOffsetCustom; + srcAPtr = (DISPCAPCNT.SrcA == 0) ? (Color4u8 *)compInfo.target.lineColorHead : (Color4u8 *)CurrentRenderer->GetFramebuffer() + compInfo.line.blockOffsetCustom; srcBPtr = (DISPCAPCNT.SrcB == 0) ? vramCustom32 : this->_fifoLine32; - dstCustomPtr = (FragmentColor *)this->_VRAMCustomBlockPtr[DISPCAPCNT.VRAMWriteBlock] + dstCustomOffset; + dstCustomPtr = (Color4u8 *)this->_VRAMCustomBlockPtr[DISPCAPCNT.VRAMWriteBlock] + dstCustomOffset; } else { @@ -4272,10 +4273,10 @@ u16 GPUEngineA::_RenderLine_DispCapture_BlendFunc(const u16 srcA, const u16 srcB } template -FragmentColor GPUEngineA::_RenderLine_DispCapture_BlendFunc(const FragmentColor srcA, const FragmentColor srcB, const u8 blendEVA, const u8 blendEVB) +Color4u8 GPUEngineA::_RenderLine_DispCapture_BlendFunc(const Color4u8 srcA, const Color4u8 srcB, const u8 blendEVA, const u8 blendEVB) { - FragmentColor outColor; - outColor.color = 0; + Color4u8 outColor; + outColor.value = 0; u16 r = 0; u16 g = 0; @@ -4328,17 +4329,17 @@ void GPUEngineA::_RenderLine_DispCapture_Blend_Buffer(const void *srcA, const vo #endif if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - const FragmentColor *srcA_32 = (const FragmentColor *)srcA; - const FragmentColor *srcB_32 = (const FragmentColor *)srcB; - FragmentColor *dst32 = (FragmentColor *)dst; + const Color4u8 *srcA_32 = (const Color4u8 *)srcA; + const Color4u8 *srcB_32 = (const Color4u8 *)srcB; + Color4u8 *dst32 = (Color4u8 *)dst; #ifdef USEMANUALVECTORIZATION #pragma LOOPVECTORIZE_DISABLE #endif for (; i < length; i++) { - const FragmentColor colorA = srcA_32[i]; - const FragmentColor colorB = srcB_32[i]; + const Color4u8 colorA = srcA_32[i]; + const Color4u8 colorB = srcB_32[i]; dst32[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB); } @@ -4383,9 +4384,9 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const GPUEngineLineInfo &lineInfo for (size_t line = 0; line < lineInfo.renderCount; line++) { this->_RenderLine_DispCapture_Blend_Buffer(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt); - srcA = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcA + lineInfo.widthCustom) : (void *)((u16 *)srcA + lineInfo.widthCustom); - srcB = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcB + lineInfo.widthCustom) : (void *)((u16 *)srcB + lineInfo.widthCustom); - dst = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)dst + lineInfo.widthCustom) : (void *)((u16 *)dst + lineInfo.widthCustom); + srcA = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)srcA + lineInfo.widthCustom) : (void *)((u16 *)srcA + lineInfo.widthCustom); + srcB = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)srcB + lineInfo.widthCustom) : (void *)((u16 *)srcB + lineInfo.widthCustom); + dst = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)dst + lineInfo.widthCustom) : (void *)((u16 *)dst + lineInfo.widthCustom); } } } @@ -4943,7 +4944,7 @@ void GPUSubsystem::SetFramebufferPageCount(size_t pageCount) pageCount = MAX_FRAMEBUFFER_PAGES; } - this->_displayInfo.framebufferPageCount = pageCount; + this->_displayInfo.framebufferPageCount = (u32)pageCount; } size_t GPUSubsystem::GetCustomFramebufferWidth() const @@ -4977,9 +4978,9 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h) u8 *oldGpuDstToSrcSSSE3_u16_8e = _gpuDstToSrcSSSE3_u16_8e; u8 *oldGpuDstToSrcSSSE3_u32_4e = _gpuDstToSrcSSSE3_u32_4e; - for (size_t srcX = 0, currentPitchCount = 0; srcX < GPU_FRAMEBUFFER_NATIVE_WIDTH; srcX++) + for (u32 srcX = 0, currentPitchCount = 0; srcX < GPU_FRAMEBUFFER_NATIVE_WIDTH; srcX++) { - const size_t pitch = (size_t)ceilf((srcX+1) * customWidthScale) - currentPitchCount; + const u32 pitch = (u32)ceilf(((float)srcX+1.0f) * customWidthScale) - (float)currentPitchCount; _gpuDstPitchCount[srcX] = pitch; _gpuDstPitchIndex[srcX] = currentPitchCount; currentPitchCount += pitch; @@ -5067,8 +5068,8 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h) this->_display[NDSDisplayID_Touch]->SetDisplaySize(w, h); this->_displayInfo.isCustomSizeRequested = ( (w != GPU_FRAMEBUFFER_NATIVE_WIDTH) || (h != GPU_FRAMEBUFFER_NATIVE_HEIGHT) ); - this->_displayInfo.customWidth = w; - this->_displayInfo.customHeight = h; + this->_displayInfo.customWidth = (u32)w; + this->_displayInfo.customHeight = (u32)h; if (!this->_display[NDSDisplayID_Main]->IsCustomSizeRequested()) { @@ -5110,7 +5111,7 @@ void GPUSubsystem::SetColorFormat(const NDSColorFormat outputFormat) this->_display[NDSDisplayID_Touch]->SetColorFormat(outputFormat); this->_displayInfo.colorFormat = this->_display[NDSDisplayID_Main]->GetColorFormat(); - this->_displayInfo.pixelBytes = this->_display[NDSDisplayID_Main]->GetPixelBytes(); + this->_displayInfo.pixelBytes = (u32)this->_display[NDSDisplayID_Main]->GetPixelBytes(); if (!this->_displayInfo.isCustomSizeRequested) { @@ -5128,7 +5129,7 @@ void GPUSubsystem::_AllocateFramebuffers(NDSColorFormat outputFormat, size_t w, void *oldMasterFramebuffer = this->_masterFramebuffer; void *oldCustomVRAM = this->_customVRAM; - const size_t pixelBytes = (outputFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(FragmentColor); + const size_t pixelBytes = (outputFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(Color4u8); const size_t newCustomVRAMBlockSize = this->_lineInfo[GPU_VRAM_BLOCK_LINES].indexCustom * w; const size_t newCustomVRAMBlankSize = _gpuLargestDstLineCount * GPU_VRAM_BLANK_REGION_LINES * w; const size_t nativeFramebufferSize = GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16); @@ -5136,8 +5137,8 @@ void GPUSubsystem::_AllocateFramebuffers(NDSColorFormat outputFormat, size_t w, void *newCustomVRAM = NULL; - this->_displayInfo.framebufferPageCount = pageCount; - this->_displayInfo.framebufferPageSize = (nativeFramebufferSize * 2) + (customFramebufferSize * 2); + this->_displayInfo.framebufferPageCount = (u32)pageCount; + this->_displayInfo.framebufferPageSize = (u32)( (nativeFramebufferSize * 2) + (customFramebufferSize * 2) ); this->_masterFramebuffer = malloc_alignedPage(this->_displayInfo.framebufferPageSize * this->_displayInfo.framebufferPageCount); if (outputFormat != NDSColorFormat_BGR555_Rev) @@ -5207,10 +5208,10 @@ void GPUSubsystem::_AllocateFramebuffers(NDSColorFormat outputFormat, size_t w, break; case NDSColorFormat_BGR888_Rev: - newCustomVRAM = (void *)malloc_alignedPage(((newCustomVRAMBlockSize * 4) + newCustomVRAMBlankSize) * sizeof(FragmentColor)); - memset(newCustomVRAM, 0, ((newCustomVRAMBlockSize * 4) + newCustomVRAMBlankSize) * sizeof(FragmentColor)); + newCustomVRAM = (void *)malloc_alignedPage(((newCustomVRAMBlockSize * 4) + newCustomVRAMBlankSize) * sizeof(Color4u8)); + memset(newCustomVRAM, 0, ((newCustomVRAMBlockSize * 4) + newCustomVRAMBlankSize) * sizeof(Color4u8)); this->_customVRAM = newCustomVRAM; - this->_customVRAMBlank = (FragmentColor *)newCustomVRAM + (newCustomVRAMBlockSize * 4); + this->_customVRAMBlank = (Color4u8 *)newCustomVRAM + (newCustomVRAMBlockSize * 4); break; default: @@ -5334,7 +5335,7 @@ void* GPUSubsystem::GetCustomVRAMAddressUsingMappedAddress(const u32 mappedAddr, const size_t blockLine = (vramPixel >> 8) & 0x000000FF; // blockLine = (vramPixel % (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_VRAM_BLOCK_LINES)) / GPU_FRAMEBUFFER_NATIVE_WIDTH const size_t linePixel = vramPixel & 0x000000FF; // linePixel = (vramPixel % (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_VRAM_BLOCK_LINES)) % GPU_FRAMEBUFFER_NATIVE_WIDTH - return (COLORFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)this->GetEngineMain()->GetCustomVRAMBlockPtr(blockID) + (this->_lineInfo[blockLine].indexCustom * this->_lineInfo[blockLine].widthCustom) + _gpuDstPitchIndex[linePixel] + offset) : (void *)((u16 *)this->GetEngineMain()->GetCustomVRAMBlockPtr(blockID) + (this->_lineInfo[blockLine].indexCustom * this->_lineInfo[blockLine].widthCustom) + _gpuDstPitchIndex[linePixel] + offset); + return (COLORFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((Color4u8 *)this->GetEngineMain()->GetCustomVRAMBlockPtr(blockID) + (this->_lineInfo[blockLine].indexCustom * this->_lineInfo[blockLine].widthCustom) + _gpuDstPitchIndex[linePixel] + offset) : (void *)((u16 *)this->GetEngineMain()->GetCustomVRAMBlockPtr(blockID) + (this->_lineInfo[blockLine].indexCustom * this->_lineInfo[blockLine].widthCustom) + _gpuDstPitchIndex[linePixel] + offset); } bool GPUSubsystem::GetWillPostprocessDisplays() const @@ -5540,13 +5541,13 @@ void GPUSubsystem::RenderLine(const size_t l) this->_displayInfo.didPerformCustomRender[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->DidPerformCustomRender(); this->_displayInfo.renderedBuffer[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetRenderedBuffer(); - this->_displayInfo.renderedWidth[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetRenderedWidth(); - this->_displayInfo.renderedHeight[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetRenderedHeight(); + this->_displayInfo.renderedWidth[NDSDisplayID_Main] = (u32)this->_display[NDSDisplayID_Main]->GetRenderedWidth(); + this->_displayInfo.renderedHeight[NDSDisplayID_Main] = (u32)this->_display[NDSDisplayID_Main]->GetRenderedHeight(); this->_displayInfo.didPerformCustomRender[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->DidPerformCustomRender(); this->_displayInfo.renderedBuffer[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetRenderedBuffer(); - this->_displayInfo.renderedWidth[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetRenderedWidth(); - this->_displayInfo.renderedHeight[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetRenderedHeight(); + this->_displayInfo.renderedWidth[NDSDisplayID_Touch] = (u32)this->_display[NDSDisplayID_Touch]->GetRenderedWidth(); + this->_displayInfo.renderedHeight[NDSDisplayID_Touch] = (u32)this->_display[NDSDisplayID_Touch]->GetRenderedHeight(); this->_displayInfo.engineID[NDSDisplayID_Main] = this->_display[NDSDisplayID_Main]->GetEngineID(); this->_displayInfo.engineID[NDSDisplayID_Touch] = this->_display[NDSDisplayID_Touch]->GetEngineID(); @@ -5638,16 +5639,16 @@ void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551) } else { - FragmentColor color32; + Color4u8 color32; switch (this->_displayInfo.colorFormat) { case NDSColorFormat_BGR666_Rev: - color32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(colorBGRA5551 & 0x7FFF) ); + color32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(colorBGRA5551 & 0x7FFF) ); break; case NDSColorFormat_BGR888_Rev: - color32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(colorBGRA5551 & 0x7FFF) ); + color32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(colorBGRA5551 & 0x7FFF) ); break; default: @@ -5657,7 +5658,7 @@ void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551) for (size_t i = 0; i < this->_displayInfo.framebufferPageCount; i++) { memset_u16((u8 *)this->_masterFramebuffer + (this->_displayInfo.framebufferPageSize * i), color16, nativeFramebufferPixCount); - memset_u32((u8 *)this->_masterFramebuffer + (this->_displayInfo.framebufferPageSize * i) + (nativeFramebufferPixCount * sizeof(u16)), color32.color, customFramebufferPixCount); + memset_u32((u8 *)this->_masterFramebuffer + (this->_displayInfo.framebufferPageSize * i) + (nativeFramebufferPixCount * sizeof(u16)), color32.value, customFramebufferPixCount); } } } @@ -6313,7 +6314,7 @@ NDSColorFormat NDSDisplay::GetColorFormat() const void NDSDisplay::SetColorFormat(NDSColorFormat colorFormat) { this->_customColorFormat = colorFormat; - this->_customPixelBytes = (colorFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(FragmentColor); + this->_customPixelBytes = (colorFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(Color4u8); } size_t NDSDisplay::GetPixelBytes() const @@ -6441,8 +6442,8 @@ void NDSDisplay::ApplyMasterBrightness(void *dst, const size_t pixCount, const G } else { - ((FragmentColor *)dst)[i] = colorop.increase(((FragmentColor *)dst)[i], intensityClamped); - ((FragmentColor *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF; + ((Color4u8 *)dst)[i] = colorop.increase(((Color4u8 *)dst)[i], intensityClamped); + ((Color4u8 *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF; } } } @@ -6488,8 +6489,8 @@ void NDSDisplay::ApplyMasterBrightness(void *dst, const size_t pixCount, const G } else { - ((FragmentColor *)dst)[i] = colorop.decrease(((FragmentColor *)dst)[i], intensityClamped); - ((FragmentColor *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF; + ((Color4u8 *)dst)[i] = colorop.decrease(((Color4u8 *)dst)[i], intensityClamped); + ((Color4u8 *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF; } } } diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index ffa8fb026..5808fa304 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -1330,7 +1330,7 @@ typedef struct GPUDisplayMode displayOutputMode; u16 backdropColor16; u16 workingBackdropColor16; - FragmentColor workingBackdropColor32; + Color4u8 workingBackdropColor32; ColorEffect colorEffect; u8 blendEVA; u8 blendEVB; @@ -1342,11 +1342,11 @@ typedef struct TBlendTable *blendTable555; u16 *brightnessUpTable555; - FragmentColor *brightnessUpTable666; - FragmentColor *brightnessUpTable888; + Color4u8 *brightnessUpTable666; + Color4u8 *brightnessUpTable888; u16 *brightnessDownTable555; - FragmentColor *brightnessDownTable666; - FragmentColor *brightnessDownTable888; + Color4u8 *brightnessDownTable666; + Color4u8 *brightnessDownTable888; u8 WIN0_enable[6]; u8 WIN1_enable[6]; @@ -1387,7 +1387,7 @@ typedef struct size_t xCustom; void **lineColor; u16 *lineColor16; - FragmentColor *lineColor32; + Color4u8 *lineColor32; u8 *lineLayerID; } GPUEngineTargetState; @@ -1503,7 +1503,7 @@ protected: volatile s32 _asyncClearLineCustom; volatile s32 _asyncClearInterrupt; u16 _asyncClearBackdropColor16; // Do not modify this variable directly. - FragmentColor _asyncClearBackdropColor32; // Do not modify this variable directly. + Color4u8 _asyncClearBackdropColor32; // Do not modify this variable directly. bool _asyncClearUseInternalCustomBuffer; // Do not modify this variable directly. void _ResortBGLayers(); @@ -1523,11 +1523,11 @@ protected: template void _PrecompositeNativeToCustomLineBG(GPUEngineCompositorInfo &compInfo); - template void _CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32); + template void _CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32); template void _CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom); template void _CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr); - template void _CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32); + template void _CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32); template size_t _CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom); template size_t _CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const void *__restrict vramColorPtr); @@ -1620,9 +1620,9 @@ public: void ApplySettings(); void RenderLineClearAsync(); - void RenderLineClearAsyncStart(bool willClearInternalCustomBuffer, s32 startLineIndex, u16 clearColor16, FragmentColor clearColor32); + void RenderLineClearAsyncStart(bool willClearInternalCustomBuffer, size_t startLineIndex, u16 clearColor16, Color4u8 clearColor32); void RenderLineClearAsyncFinish(); - void RenderLineClearAsyncWaitForCustomLine(const s32 l); + void RenderLineClearAsyncWaitForCustomLine(const size_t l); void TransitionRenderStatesToDisplayInfo(NDSDisplayInfo &mutableInfo); @@ -1651,12 +1651,12 @@ private: protected: CACHE_ALIGN u16 _fifoLine16[GPU_FRAMEBUFFER_NATIVE_WIDTH]; - CACHE_ALIGN FragmentColor _fifoLine32[GPU_FRAMEBUFFER_NATIVE_WIDTH]; + CACHE_ALIGN Color4u8 _fifoLine32[GPU_FRAMEBUFFER_NATIVE_WIDTH]; CACHE_ALIGN u16 _VRAMNativeBlockCaptureCopy[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_VRAM_BLOCK_LINES * 4]; u16 *_VRAMNativeBlockCaptureCopyPtr[4]; - FragmentColor *_3DFramebufferMain; + Color4u8 *_3DFramebufferMain; u16 *_3DFramebuffer16; u16 *_VRAMNativeBlockPtr[4]; @@ -1668,8 +1668,8 @@ protected: u16 *_captureWorkingDisplay16; u16 *_captureWorkingA16; u16 *_captureWorkingB16; - FragmentColor *_captureWorkingA32; - FragmentColor *_captureWorkingB32; + Color4u8 *_captureWorkingA32; + Color4u8 *_captureWorkingB32; DISPCAPCNT_parsed _dispCapCnt; bool _displayCaptureEnable; @@ -1692,10 +1692,10 @@ protected: void _RenderLine_DispCapture_Copy(const GPUEngineLineInfo &lineInfo, const void *src, void *dst, const size_t captureLengthExt); // Do not use restrict pointers, since src and dst can be the same u16 _RenderLine_DispCapture_BlendFunc(const u16 srcA, const u16 srcB, const u8 blendEVA, const u8 blendEVB); - template FragmentColor _RenderLine_DispCapture_BlendFunc(const FragmentColor srcA, const FragmentColor srcB, const u8 blendEVA, const u8 blendEVB); + template Color4u8 _RenderLine_DispCapture_BlendFunc(const Color4u8 srcA, const Color4u8 srcB, const u8 blendEVA, const u8 blendEVB); template - size_t _RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr); + size_t _RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const Color4u8 *__restrict srcLinePtr); template void _RenderLine_DispCapture_Blend_Buffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t pixCount); // Do not use restrict pointers, since srcB and dst can be the same @@ -1716,7 +1716,7 @@ public: void ParseReg_DISPCAPCNT(); bool IsLineCaptureNative(const size_t blockID, const size_t blockLine); void* GetCustomVRAMBlockPtr(const size_t blockID); - FragmentColor* Get3DFramebufferMain() const; + Color4u8* Get3DFramebufferMain() const; u16* Get3DFramebuffer16() const; virtual void AllocateWorkingBuffers(NDSColorFormat requestedColorFormat, size_t w, size_t h); @@ -1819,8 +1819,8 @@ public: bool IsCustomSizeRequested() const; void* GetRenderedBuffer() const; - size_t GetRenderedWidth() const; - size_t GetRenderedHeight() const; + size_t GetRenderedWidth() const; + size_t GetRenderedHeight() const; bool IsEnabled() const; void SetIsEnabled(bool stateIsEnabled); diff --git a/desmume/src/GPU_Operations.cpp b/desmume/src/GPU_Operations.cpp index bc515432b..fef0a68cb 100644 --- a/desmume/src/GPU_Operations.cpp +++ b/desmume/src/GPU_Operations.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2021 DeSmuME team + Copyright (C) 2021-2023 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,11 +32,11 @@ static CACHE_ALIGN u32 _gpuDstPitchIndex[GPU_FRAMEBUFFER_NATIVE_WIDTH]; // Key: u8 PixelOperation::BlendTable555[17][17][32][32]; u16 PixelOperation::BrightnessUpTable555[17][0x8000]; -FragmentColor PixelOperation::BrightnessUpTable666[17][0x8000]; -FragmentColor PixelOperation::BrightnessUpTable888[17][0x8000]; +Color4u8 PixelOperation::BrightnessUpTable666[17][0x8000]; +Color4u8 PixelOperation::BrightnessUpTable888[17][0x8000]; u16 PixelOperation::BrightnessDownTable555[17][0x8000]; -FragmentColor PixelOperation::BrightnessDownTable666[17][0x8000]; -FragmentColor PixelOperation::BrightnessDownTable888[17][0x8000]; +Color4u8 PixelOperation::BrightnessDownTable666[17][0x8000]; +Color4u8 PixelOperation::BrightnessDownTable888[17][0x8000]; static CACHE_ALIGN ColorOperation colorop; static CACHE_ALIGN PixelOperation pixelop; @@ -71,9 +71,9 @@ FORCEINLINE u16 ColorOperation::blend(const u16 colA, const u16 colB, const TBle } template -FORCEINLINE FragmentColor ColorOperation::blend(const FragmentColor colA, const FragmentColor colB, const u16 blendEVA, const u16 blendEVB) const +FORCEINLINE Color4u8 ColorOperation::blend(const Color4u8 colA, const Color4u8 colB, const u16 blendEVA, const u16 blendEVB) const { - FragmentColor outColor; + Color4u8 outColor; u16 r16 = ( (colA.r * blendEVA) + (colB.r * blendEVB) ) / 16; u16 g16 = ( (colA.g * blendEVA) + (colB.g * blendEVB) ) / 16; @@ -96,7 +96,7 @@ FORCEINLINE FragmentColor ColorOperation::blend(const FragmentColor colA, const return outColor; } -FORCEINLINE u16 ColorOperation::blend3D(const FragmentColor colA, const u16 colB) const +FORCEINLINE u16 ColorOperation::blend3D(const Color4u8 colA, const u16 colB) const { const u16 alpha = colA.a + 1; COLOR c2; @@ -113,9 +113,9 @@ FORCEINLINE u16 ColorOperation::blend3D(const FragmentColor colA, const u16 colB } template -FORCEINLINE FragmentColor ColorOperation::blend3D(const FragmentColor colA, const FragmentColor colB) const +FORCEINLINE Color4u8 ColorOperation::blend3D(const Color4u8 colA, const Color4u8 colB) const { - FragmentColor blendedColor; + Color4u8 blendedColor; const u16 alpha = colA.a + 1; if (COLORFORMAT == NDSColorFormat_BGR666_Rev) @@ -149,10 +149,10 @@ FORCEINLINE u16 ColorOperation::increase(const u16 col, const u16 blendEVY) cons } template -FORCEINLINE FragmentColor ColorOperation::increase(const FragmentColor col, const u16 blendEVY) const +FORCEINLINE Color4u8 ColorOperation::increase(const Color4u8 col, const u16 blendEVY) const { - FragmentColor newColor; - newColor.color = 0; + Color4u8 newColor; + newColor.value = 0; u32 r = col.r; u32 g = col.g; @@ -188,10 +188,10 @@ FORCEINLINE u16 ColorOperation::decrease(const u16 col, const u16 blendEVY) cons } template -FORCEINLINE FragmentColor ColorOperation::decrease(const FragmentColor col, const u16 blendEVY) const +FORCEINLINE Color4u8 ColorOperation::decrease(const Color4u8 col, const u16 blendEVY) const { - FragmentColor newColor; - newColor.color = 0; + Color4u8 newColor; + newColor.value = 0; u32 r = col.r; u32 g = col.g; @@ -242,8 +242,8 @@ void PixelOperation::InitLUTs() cur.bits.blue = (cur.bits.blue + ((31 - cur.bits.blue) * i / 16)); cur.bits.alpha = 0; PixelOperation::BrightnessUpTable555[i][j] = cur.val; - PixelOperation::BrightnessUpTable666[i][j].color = LOCAL_TO_LE_32( COLOR555TO666(cur.val) ); - PixelOperation::BrightnessUpTable888[i][j].color = LOCAL_TO_LE_32( COLOR555TO888(cur.val) ); + PixelOperation::BrightnessUpTable666[i][j].value = LOCAL_TO_LE_32( COLOR555TO666(cur.val) ); + PixelOperation::BrightnessUpTable888[i][j].value = LOCAL_TO_LE_32( COLOR555TO888(cur.val) ); cur.val = j; cur.bits.red = (cur.bits.red - (cur.bits.red * i / 16)); @@ -251,8 +251,8 @@ void PixelOperation::InitLUTs() cur.bits.blue = (cur.bits.blue - (cur.bits.blue * i / 16)); cur.bits.alpha = 0; PixelOperation::BrightnessDownTable555[i][j] = cur.val; - PixelOperation::BrightnessDownTable666[i][j].color = LOCAL_TO_LE_32( COLOR555TO666(cur.val) ); - PixelOperation::BrightnessDownTable888[i][j].color = LOCAL_TO_LE_32( COLOR555TO888(cur.val) ); + PixelOperation::BrightnessDownTable666[i][j].value = LOCAL_TO_LE_32( COLOR555TO666(cur.val) ); + PixelOperation::BrightnessDownTable888[i][j].value = LOCAL_TO_LE_32( COLOR555TO888(cur.val) ); } } @@ -279,7 +279,7 @@ template FORCEINLINE void PixelOperation::_copy16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const { u16 &dstColor16 = *compInfo.target.lineColor16; - FragmentColor &dstColor32 = *compInfo.target.lineColor32; + Color4u8 &dstColor32 = *compInfo.target.lineColor32; u8 &dstLayerID = *compInfo.target.lineLayerID; switch (OUTPUTFORMAT) @@ -289,11 +289,11 @@ FORCEINLINE void PixelOperation::_copy16(GPUEngineCompositorInfo &compInfo, cons break; case NDSColorFormat_BGR666_Rev: - dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(srcColor16) ); + dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(srcColor16) ); break; case NDSColorFormat_BGR888_Rev: - dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(srcColor16) ); + dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(srcColor16) ); break; } @@ -304,10 +304,10 @@ FORCEINLINE void PixelOperation::_copy16(GPUEngineCompositorInfo &compInfo, cons } template -FORCEINLINE void PixelOperation::_copy32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const +FORCEINLINE void PixelOperation::_copy32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const { u16 &dstColor16 = *compInfo.target.lineColor16; - FragmentColor &dstColor32 = *compInfo.target.lineColor32; + Color4u8 &dstColor32 = *compInfo.target.lineColor32; u8 &dstLayerID = *compInfo.target.lineLayerID; switch (OUTPUTFORMAT) @@ -341,7 +341,7 @@ template FORCEINLINE void PixelOperation::_brightnessUp16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const { u16 &dstColor16 = *compInfo.target.lineColor16; - FragmentColor &dstColor32 = *compInfo.target.lineColor32; + Color4u8 &dstColor32 = *compInfo.target.lineColor32; u8 &dstLayerID = *compInfo.target.lineLayerID; switch (OUTPUTFORMAT) @@ -365,10 +365,10 @@ FORCEINLINE void PixelOperation::_brightnessUp16(GPUEngineCompositorInfo &compIn } template -FORCEINLINE void PixelOperation::_brightnessUp32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const +FORCEINLINE void PixelOperation::_brightnessUp32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const { u16 &dstColor16 = *compInfo.target.lineColor16; - FragmentColor &dstColor32 = *compInfo.target.lineColor32; + Color4u8 &dstColor32 = *compInfo.target.lineColor32; u8 &dstLayerID = *compInfo.target.lineLayerID; if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) @@ -390,7 +390,7 @@ template FORCEINLINE void PixelOperation::_brightnessDown16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const { u16 &dstColor16 = *compInfo.target.lineColor16; - FragmentColor &dstColor32 = *compInfo.target.lineColor32; + Color4u8 &dstColor32 = *compInfo.target.lineColor32; u8 &dstLayerID = *compInfo.target.lineLayerID; switch (OUTPUTFORMAT) @@ -414,10 +414,10 @@ FORCEINLINE void PixelOperation::_brightnessDown16(GPUEngineCompositorInfo &comp } template -FORCEINLINE void PixelOperation::_brightnessDown32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const +FORCEINLINE void PixelOperation::_brightnessDown32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const { u16 &dstColor16 = *compInfo.target.lineColor16; - FragmentColor &dstColor32 = *compInfo.target.lineColor32; + Color4u8 &dstColor32 = *compInfo.target.lineColor32; u8 &dstLayerID = *compInfo.target.lineLayerID; if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) @@ -555,14 +555,14 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI } else { - FragmentColor &dstColor32 = *compInfo.target.lineColor32; + Color4u8 &dstColor32 = *compInfo.target.lineColor32; if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { switch (selectedEffect) { case ColorEffect_Disable: - dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(srcColor16) ); + dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(srcColor16) ); break; case ColorEffect_IncreaseBrightness: @@ -575,8 +575,8 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI case ColorEffect_Blend: { - FragmentColor srcColor32; - srcColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(srcColor16) ); + Color4u8 srcColor32; + srcColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque(srcColor16) ); dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D(srcColor32, dstColor32) : colorop.blend(srcColor32, dstColor32, blendEVA, blendEVB); break; } @@ -587,7 +587,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI switch (selectedEffect) { case ColorEffect_Disable: - dstColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(srcColor16) ); + dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(srcColor16) ); break; case ColorEffect_IncreaseBrightness: @@ -600,8 +600,8 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI case ColorEffect_Blend: { - FragmentColor srcColor32; - srcColor32.color = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(srcColor16) ); + Color4u8 srcColor32; + srcColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque(srcColor16) ); dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D(srcColor32, dstColor32) : colorop.blend(srcColor32, dstColor32, blendEVA, blendEVB); break; } @@ -613,7 +613,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI } template -FORCEINLINE void PixelOperation::_unknownEffect32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const +FORCEINLINE void PixelOperation::_unknownEffect32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const { u8 &dstLayerID = *compInfo.target.lineLayerID; TBlendTable *selectedBlendTable = compInfo.renderState.blendTable555; @@ -663,7 +663,7 @@ FORCEINLINE void PixelOperation::_unknownEffect32(GPUEngineCompositorInfo &compI } else { - FragmentColor &dstColor32 = *compInfo.target.lineColor32; + Color4u8 &dstColor32 = *compInfo.target.lineColor32; switch (selectedEffect) { @@ -716,7 +716,7 @@ FORCEINLINE void PixelOperation::Composite16(GPUEngineCompositorInfo &compInfo, } template -FORCEINLINE void PixelOperation::Composite32(GPUEngineCompositorInfo &compInfo, FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const +FORCEINLINE void PixelOperation::Composite32(GPUEngineCompositorInfo &compInfo, Color4u8 srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const { switch (COMPOSITORMODE) { @@ -1013,7 +1013,7 @@ void GPUEngineBase::_MosaicLine(GPUEngineCompositorInfo &compInfo) } template -void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32) +void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32) { // Do nothing. This is a placeholder for a manually vectorized version of this method. } @@ -1086,7 +1086,7 @@ void GPUEngineBase::_PerformWindowTestingNative(GPUEngineCompositorInfo &compInf } template -size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr) +size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const Color4u8 *__restrict srcLinePtr) { // Do nothing. This is a placeholder for a manually vectorized version of this method. return 0; diff --git a/desmume/src/GPU_Operations.h b/desmume/src/GPU_Operations.h index de65f2d6e..f39e58661 100644 --- a/desmume/src/GPU_Operations.h +++ b/desmume/src/GPU_Operations.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2021 DeSmuME team + Copyright (C) 2021-2023 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,16 +47,16 @@ public: FORCEINLINE u16 blend(const u16 colA, const u16 colB, const u16 blendEVA, const u16 blendEVB) const; FORCEINLINE u16 blend(const u16 colA, const u16 colB, const TBlendTable *blendTable) const; - template FORCEINLINE FragmentColor blend(const FragmentColor colA, const FragmentColor colB, const u16 blendEVA, const u16 blendEVB) const; + template FORCEINLINE Color4u8 blend(const Color4u8 colA, const Color4u8 colB, const u16 blendEVA, const u16 blendEVB) const; - FORCEINLINE u16 blend3D(const FragmentColor colA, const u16 colB) const; - template FORCEINLINE FragmentColor blend3D(const FragmentColor colA, const FragmentColor colB) const; + FORCEINLINE u16 blend3D(const Color4u8 colA, const u16 colB) const; + template FORCEINLINE Color4u8 blend3D(const Color4u8 colA, const Color4u8 colB) const; FORCEINLINE u16 increase(const u16 col, const u16 blendEVY) const; - template FORCEINLINE FragmentColor increase(const FragmentColor col, const u16 blendEVY) const; + template FORCEINLINE Color4u8 increase(const Color4u8 col, const u16 blendEVY) const; FORCEINLINE u16 decrease(const u16 col, const u16 blendEVY) const; - template FORCEINLINE FragmentColor decrease(const FragmentColor col, const u16 blendEVY) const; + template FORCEINLINE Color4u8 decrease(const Color4u8 col, const u16 blendEVY) const; }; class PixelOperation @@ -66,31 +66,31 @@ private: protected: template FORCEINLINE void _copy16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const; - template FORCEINLINE void _copy32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const; + template FORCEINLINE void _copy32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const; template FORCEINLINE void _brightnessUp16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const; - template FORCEINLINE void _brightnessUp32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const; + template FORCEINLINE void _brightnessUp32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const; template FORCEINLINE void _brightnessDown16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const; - template FORCEINLINE void _brightnessDown32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const; + template FORCEINLINE void _brightnessDown32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32) const; template FORCEINLINE void _unknownEffect16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const; - template FORCEINLINE void _unknownEffect32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const; + template FORCEINLINE void _unknownEffect32(GPUEngineCompositorInfo &compInfo, const Color4u8 srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const; public: static CACHE_ALIGN u8 BlendTable555[17][17][32][32]; static CACHE_ALIGN u16 BrightnessUpTable555[17][0x8000]; - static CACHE_ALIGN FragmentColor BrightnessUpTable666[17][0x8000]; - static CACHE_ALIGN FragmentColor BrightnessUpTable888[17][0x8000]; + static CACHE_ALIGN Color4u8 BrightnessUpTable666[17][0x8000]; + static CACHE_ALIGN Color4u8 BrightnessUpTable888[17][0x8000]; static CACHE_ALIGN u16 BrightnessDownTable555[17][0x8000]; - static CACHE_ALIGN FragmentColor BrightnessDownTable666[17][0x8000]; - static CACHE_ALIGN FragmentColor BrightnessDownTable888[17][0x8000]; + static CACHE_ALIGN Color4u8 BrightnessDownTable666[17][0x8000]; + static CACHE_ALIGN Color4u8 BrightnessDownTable888[17][0x8000]; static void InitLUTs(); PixelOperation() {}; template FORCEINLINE void Composite16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const; - template FORCEINLINE void Composite32(GPUEngineCompositorInfo &compInfo, FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const; + template FORCEINLINE void Composite32(GPUEngineCompositorInfo &compInfo, Color4u8 srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const; }; #endif // GPU_OPERATIONS_H diff --git a/desmume/src/GPU_Operations_AVX2.cpp b/desmume/src/GPU_Operations_AVX2.cpp index f27115eeb..78e5e9823 100644 --- a/desmume/src/GPU_Operations_AVX2.cpp +++ b/desmume/src/GPU_Operations_AVX2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2021-2022 DeSmuME team + Copyright (C) 2021-2023 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2505,7 +2505,7 @@ void GPUEngineBase::_MosaicLine(GPUEngineCompositorInfo &compInfo) } template -void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32) +void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32) { static const size_t step = sizeof(v256u8); @@ -2749,10 +2749,10 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo case NDSColorFormat_BGR888_Rev: { const v256u32 src32[4] = { - _mm256_load_si256((v256u32 *)((FragmentColor *)vramColorPtr + i) + 0), - _mm256_load_si256((v256u32 *)((FragmentColor *)vramColorPtr + i) + 1), - _mm256_load_si256((v256u32 *)((FragmentColor *)vramColorPtr + i) + 2), - _mm256_load_si256((v256u32 *)((FragmentColor *)vramColorPtr + i) + 3) + _mm256_load_si256((v256u32 *)((Color4u8 *)vramColorPtr + i) + 0), + _mm256_load_si256((v256u32 *)((Color4u8 *)vramColorPtr + i) + 1), + _mm256_load_si256((v256u32 *)((Color4u8 *)vramColorPtr + i) + 2), + _mm256_load_si256((v256u32 *)((Color4u8 *)vramColorPtr + i) + 3) }; if (LAYERTYPE != GPULayerType_OBJ) @@ -2906,7 +2906,7 @@ void GPUEngineBase::_PerformWindowTestingNative(GPUEngineCompositorInfo &compInf } template -size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr) +size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const Color4u8 *__restrict srcLinePtr) { static const size_t step = sizeof(v256u32); diff --git a/desmume/src/GPU_Operations_SSE2.cpp b/desmume/src/GPU_Operations_SSE2.cpp index 77c184ed5..3aca64de4 100644 --- a/desmume/src/GPU_Operations_SSE2.cpp +++ b/desmume/src/GPU_Operations_SSE2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2021-2022 DeSmuME team + Copyright (C) 2021-2023 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2287,7 +2287,7 @@ void GPUEngineBase::_MosaicLine(GPUEngineCompositorInfo &compInfo) } template -void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32) +void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const Color4u8 *__restrict srcColorNative32) { static const size_t step = sizeof(v128u8); @@ -2530,10 +2530,10 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo case NDSColorFormat_BGR888_Rev: { const v128u32 src32[4] = { - _mm_load_si128((v128u32 *)((FragmentColor *)vramColorPtr + i) + 0), - _mm_load_si128((v128u32 *)((FragmentColor *)vramColorPtr + i) + 1), - _mm_load_si128((v128u32 *)((FragmentColor *)vramColorPtr + i) + 2), - _mm_load_si128((v128u32 *)((FragmentColor *)vramColorPtr + i) + 3) + _mm_load_si128((v128u32 *)((Color4u8 *)vramColorPtr + i) + 0), + _mm_load_si128((v128u32 *)((Color4u8 *)vramColorPtr + i) + 1), + _mm_load_si128((v128u32 *)((Color4u8 *)vramColorPtr + i) + 2), + _mm_load_si128((v128u32 *)((Color4u8 *)vramColorPtr + i) + 3) }; if (LAYERTYPE != GPULayerType_OBJ) @@ -2688,7 +2688,7 @@ void GPUEngineBase::_PerformWindowTestingNative(GPUEngineCompositorInfo &compInf } template -size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr) +size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const Color4u8 *__restrict srcLinePtr) { static const size_t step = sizeof(v128u8); diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index e4021e105..9e8a430f0 100755 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -1163,7 +1163,7 @@ OpenGLRenderer::OpenGLRenderer() memset(ref, 0, sizeof(OGLRenderRef)); _mappedFramebuffer = NULL; - _workingTextureUnpackBuffer = (FragmentColor *)malloc_alignedCacheLine(1024 * 1024 * sizeof(FragmentColor)); + _workingTextureUnpackBuffer = (Color4u8 *)malloc_alignedCacheLine(1024 * 1024 * sizeof(Color4u8)); _pixelReadNeedsFinish = false; _needsZeroDstAlphaPass = true; _currentPolyIndex = 0; @@ -1352,8 +1352,8 @@ bool OpenGLRenderer::IsVersionSupported(unsigned int checkVersionMajor, unsigned return result; } -Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, - FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, +Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, + Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, bool doFramebufferFlip, bool doFramebufferConvert) { if ( ((dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL)) || (srcFramebuffer == NULL) ) @@ -1389,7 +1389,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen #endif for (; i < this->_framebufferPixCount; i++) { - dstFramebufferMain[i].color = ColorspaceCopy32(srcFramebuffer[i]); + dstFramebufferMain[i].value = ColorspaceCopy32(srcFramebuffer[i]); dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); } @@ -1429,7 +1429,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen #endif for (; i < this->_framebufferPixCount; i++) { - dstFramebufferMain[i].color = ColorspaceConvert8888To6665(srcFramebuffer[i]); + dstFramebufferMain[i].value = ColorspaceConvert8888To6665(srcFramebuffer[i]); dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); } @@ -1467,7 +1467,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen #endif for (; i < this->_framebufferPixCount; i++) { - dstFramebufferMain[i].color = ColorspaceCopy32(srcFramebuffer[i]); + dstFramebufferMain[i].value = ColorspaceCopy32(srcFramebuffer[i]); dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); } @@ -1514,7 +1514,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen #endif for (; x < pixCount; x++, ir++, iw++) { - dstFramebufferMain[iw].color = ColorspaceCopy32(srcFramebuffer[ir]); + dstFramebufferMain[iw].value = ColorspaceCopy32(srcFramebuffer[ir]); dstFramebuffer16[iw] = ColorspaceConvert8888To5551(srcFramebuffer[ir]); } } @@ -1566,7 +1566,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen #endif for (; x < pixCount; x++, ir++, iw++) { - dstFramebufferMain[iw].color = ColorspaceConvert8888To6665(srcFramebuffer[ir]); + dstFramebufferMain[iw].value = ColorspaceConvert8888To6665(srcFramebuffer[ir]); dstFramebuffer16[iw] = ColorspaceConvert8888To5551(srcFramebuffer[ir]); } } @@ -1616,7 +1616,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen #endif for (; x < pixCount; x++, ir++, iw++) { - dstFramebufferMain[iw].color = ColorspaceCopy32(srcFramebuffer[ir]); + dstFramebufferMain[iw].value = ColorspaceCopy32(srcFramebuffer[ir]); dstFramebuffer16[iw] = ColorspaceConvert8888To5551(srcFramebuffer[ir]); } } @@ -1649,7 +1649,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen return RENDER3DERROR_NOERR; } -Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16) +Render3DError OpenGLRenderer::FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16) { if (this->willFlipAndConvertFramebufferOnGPU && this->isPBOSupported) { @@ -1666,7 +1666,7 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s return RENDER3DERROR_NOERR; } -FragmentColor* OpenGLRenderer::GetFramebuffer() +Color4u8* OpenGLRenderer::GetFramebuffer() { return (this->willFlipAndConvertFramebufferOnGPU && this->isPBOSupported) ? this->_mappedFramebuffer : GPU->GetEngineMain()->Get3DFramebufferMain(); } @@ -2683,7 +2683,7 @@ Render3DError OpenGLRenderer_1_2::CreatePBOs() glGenBuffersARB(1, &OGLRef.pboRenderDataID); glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, OGLRef.pboRenderDataID); glBufferDataARB(GL_PIXEL_PACK_BUFFER_ARB, this->_framebufferColorSizeBytes, NULL, GL_STREAM_READ_ARB); - this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); + this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); return OGLERROR_NOERR; } @@ -4352,11 +4352,11 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D_State &renderState, co if (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) { const u8 alpha8 = (renderState.DISP3DCNT.EnableAntialiasing) ? 0x80 : 0xFF; - FragmentColor edgeColor32[8]; + Color4u8 edgeColor32[8]; for (size_t i = 0; i < 8; i++) { - edgeColor32[i].color = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8); + edgeColor32[i].value = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8); } glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); @@ -4760,7 +4760,7 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf return OGLERROR_NOERR; } -Render3DError OpenGLRenderer_1_2::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) +Render3DError OpenGLRenderer_1_2::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) { OGLRenderRef &OGLRef = *this->ref; @@ -5250,7 +5250,7 @@ Render3DError OpenGLRenderer_1_2::RenderFinish() if (this->isPBOSupported) { - this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); + this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); } else { @@ -5273,7 +5273,7 @@ Render3DError OpenGLRenderer_1_2::RenderFlush(bool willFlushBuffer32, bool willF return RENDER3DERROR_NOERR; } - FragmentColor *framebufferMain = (willFlushBuffer32) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL; + Color4u8 *framebufferMain = (willFlushBuffer32) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL; u16 *framebuffer16 = (willFlushBuffer16) ? GPU->GetEngineMain()->Get3DFramebuffer16() : NULL; if (this->isPBOSupported) @@ -5305,7 +5305,7 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h) glFinish(); - const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); + const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8); if (this->isPBOSupported) { @@ -5319,7 +5319,7 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h) if (this->_mappedFramebuffer != NULL) { - this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); + this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); glFinish(); } } @@ -5363,8 +5363,8 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h) } else { - FragmentColor *oldFramebufferColor = this->_framebufferColor; - FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes); + Color4u8 *oldFramebufferColor = this->_framebufferColor; + Color4u8 *newFramebufferColor = (Color4u8 *)malloc_alignedCacheLine(newFramebufferColorSizeBytes); this->_framebufferColor = newFramebufferColor; free_aligned(oldFramebufferColor); } @@ -5562,11 +5562,11 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D_State &renderState, co if (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) { const u8 alpha8 = (renderState.DISP3DCNT.EnableAntialiasing) ? 0x80 : 0xFF; - FragmentColor edgeColor32[8]; + Color4u8 edgeColor32[8]; for (size_t i = 0; i < 8; i++) { - edgeColor32[i].color = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8); + edgeColor32[i].value = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8); } glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); @@ -5661,7 +5661,7 @@ Render3DError OpenGLRenderer_2_1::RenderFinish() return OGLERROR_BEGINGL_FAILED; } - this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); + this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); ENDGL(); } @@ -5679,7 +5679,7 @@ Render3DError OpenGLRenderer_2_1::RenderFlush(bool willFlushBuffer32, bool willF return RENDER3DERROR_NOERR; } - FragmentColor *framebufferMain = (willFlushBuffer32) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL; + Color4u8 *framebufferMain = (willFlushBuffer32) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL; u16 *framebuffer16 = (willFlushBuffer16) ? GPU->GetEngineMain()->Get3DFramebuffer16() : NULL; this->FlushFramebuffer(this->_mappedFramebuffer, framebufferMain, framebuffer16); diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index 5ad7c309f..ada53834b 100755 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -366,23 +366,26 @@ enum OGLPolyDrawMode union GLvec2 { + GLfloat vec[2]; struct { GLfloat x, y; }; - GLfloat v[2]; }; +typedef union GLvec2 GLvec2; union GLvec3 { + GLfloat vec[3]; struct { GLfloat r, g, b; }; struct { GLfloat x, y, z; }; - GLfloat v[3]; }; +typedef union GLvec3 GLvec3; union GLvec4 { + GLfloat vec[4]; struct { GLfloat r, g, b, a; }; struct { GLfloat x, y, z, w; }; - GLfloat v[4]; }; +typedef union GLvec4 GLvec4; struct OGLVertex { @@ -390,6 +393,7 @@ struct OGLVertex GLvec2 texCoord; GLvec3 color; }; +typedef struct OGLVertex OGLVertex; struct OGLRenderStates { @@ -405,6 +409,7 @@ struct OGLRenderStates GLvec4 edgeColor[8]; GLvec4 toonColor[32]; }; +typedef struct OGLRenderStates OGLRenderStates; union OGLPolyStates { @@ -429,6 +434,7 @@ union OGLPolyStates u8 :7; }; }; +typedef union OGLPolyStates OGLPolyStates; union OGLGeometryFlags { @@ -602,6 +608,7 @@ struct OGLRenderRef CACHE_ALIGN GLuint workingCIDepthStencilBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN GLuint workingCIFogAttributesBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; }; +typedef struct OGLRenderRef OGLRenderRef; struct GFX3D_State; struct POLY; @@ -696,8 +703,8 @@ private: unsigned int versionRevision; private: - Render3DError _FlushFramebufferFlipAndConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, - FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, + Render3DError _FlushFramebufferFlipAndConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, + Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, bool doFramebufferFlip, bool doFramebufferConvert); protected: @@ -721,8 +728,8 @@ protected: bool _emulateDepthLEqualPolygonFacing; bool _isDepthLEqualPolygonFacingSupported; - FragmentColor *_mappedFramebuffer; - FragmentColor *_workingTextureUnpackBuffer; + Color4u8 *_mappedFramebuffer; + Color4u8 *_workingTextureUnpackBuffer; bool _pixelReadNeedsFinish; bool _needsZeroDstAlphaPass; size_t _currentPolyIndex; @@ -738,7 +745,7 @@ protected: int _selectedMultisampleSize; size_t _clearImageIndex; - Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16); + Render3DError FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16); OpenGLTexture* GetLoadedTextureFromPolygon(const POLY &thePoly, bool enableTexturing); template size_t DrawPolygonsForIndexRange(const POLY *rawPolyList, const CPoly *clippedPolyList, const size_t clippedPolyCount, size_t firstIndex, size_t lastIndex, size_t &indexOffset, POLYGON_ATTR &lastPolyAttr); @@ -824,7 +831,7 @@ public: void SetVersion(unsigned int major, unsigned int minor, unsigned int revision); bool IsVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision) const; - virtual FragmentColor* GetFramebuffer(); + virtual Color4u8* GetFramebuffer(); virtual GLsizei GetLimitedMultisampleSize() const; Render3DError ApplyRenderingSettings(const GFX3D_State &renderState); @@ -885,7 +892,7 @@ protected: virtual Render3DError EndRender(); virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID); - virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes); virtual void SetPolygonIndex(const size_t index); virtual Render3DError SetupPolygon(const POLY &thePoly, bool treatAsTranslucent, bool willChangeStencilBuffer, bool isBackFacing); diff --git a/desmume/src/OGLRender_3_2.cpp b/desmume/src/OGLRender_3_2.cpp index 3ca13907b..9f6665fe0 100644 --- a/desmume/src/OGLRender_3_2.cpp +++ b/desmume/src/OGLRender_3_2.cpp @@ -2359,7 +2359,7 @@ Render3DError OpenGLRenderer_3_2::ClearUsingImage(const u16 *__restrict colorBuf return OGLERROR_NOERR; } -Render3DError OpenGLRenderer_3_2::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) +Render3DError OpenGLRenderer_3_2::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) { OGLRenderRef &OGLRef = *this->ref; OGLRef.selectedRenderingFBO = (this->_enableMultisampledRendering) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID; @@ -2536,12 +2536,12 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h) glFinish(); } - const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); + const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8); glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ); if (this->_mappedFramebuffer != NULL) { - this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); + this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); glFinish(); } diff --git a/desmume/src/OGLRender_3_2.h b/desmume/src/OGLRender_3_2.h index 85aa39cd4..7f14d0b59 100644 --- a/desmume/src/OGLRender_3_2.h +++ b/desmume/src/OGLRender_3_2.h @@ -106,7 +106,7 @@ protected: virtual Render3DError PostprocessFramebuffer(); virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID); - virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes); virtual void SetPolygonIndex(const size_t index); virtual Render3DError SetupPolygon(const POLY &thePoly, bool treatAsTranslucent, bool willChangeStencilBuffer, bool isBackFacing); diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index d48af0b71..f4379b278 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -513,9 +513,9 @@ NDSGeometryEngine::NDSGeometryEngine() void NDSGeometryEngine::__Init() { - static const Vector16x2 zeroVec16x2 = {0, 0}; - static const Vector16x3 zeroVec16x3 = {0, 0, 0}; - static const Vector32x4 zeroVec32x4 = {0, 0, 0, 0}; + static const Vector2s16 zeroVec2s16 = {0, 0}; + static const Vector3s16 zeroVec3s16 = {0, 0, 0}; + static const Vector4s32 zeroVec4s32 = {0, 0, 0, 0}; _mtxCurrentMode = MATRIXMODE_PROJECTION; @@ -529,8 +529,8 @@ void NDSGeometryEngine::__Init() for (size_t i = 0; i < NDSMATRIXSTACK_COUNT(MATRIXMODE_POSITION_VECTOR); i++) { MatrixInit(_mtxStackPositionVector[i]); } MatrixInit(_mtxStackTexture[0]); - _vecScale = zeroVec32x4; - _vecTranslate = zeroVec32x4; + _vecScale = zeroVec4s32; + _vecTranslate = zeroVec4s32; _mtxStackIndex[MATRIXMODE_PROJECTION] = 0; _mtxStackIndex[MATRIXMODE_POSITION] = 0; @@ -560,13 +560,8 @@ void NDSGeometryEngine::__Init() _vtxColor666X.b = 63; _vtxColor666X.a = 0; - _vtxColorFloat[0] = (float)_vtxColor666X.r; - _vtxColorFloat[1] = (float)_vtxColor666X.g; - _vtxColorFloat[2] = (float)_vtxColor666X.b; - _vtxColorFloat[3] = (float)_vtxColor666X.a; - - _vtxCoord16 = zeroVec16x3; - _vecNormal = zeroVec32x4; + _vtxCoord16 = zeroVec3s16; + _vecNormal = zeroVec4s32; _regViewport.X1 = 0; _regViewport.Y1 = 0; @@ -579,11 +574,9 @@ void NDSGeometryEngine::__Init() _currentViewport.height = GPU_FRAMEBUFFER_NATIVE_HEIGHT; _texCoordTransformMode = TextureTransformationMode_None; - _texCoord16 = zeroVec16x2; + _texCoord16 = zeroVec2s16; _texCoordTransformed.s = (s32)_texCoord16.s; _texCoordTransformed.t = (s32)_texCoord16.t; - _texCoordTransformedFloat[0] = (float)_texCoordTransformed.s / 16.0f; - _texCoordTransformedFloat[1] = (float)_texCoordTransformed.t / 16.0f; _doesViewportNeedUpdate = true; _doesVertexColorNeedUpdate = true; @@ -610,15 +603,15 @@ void NDSGeometryEngine::__Init() _regLightDirection[2] = 0; _regLightDirection[3] = 0; - _vecLightDirectionTransformed[0] = zeroVec32x4; - _vecLightDirectionTransformed[1] = zeroVec32x4; - _vecLightDirectionTransformed[2] = zeroVec32x4; - _vecLightDirectionTransformed[3] = zeroVec32x4; + _vecLightDirectionTransformed[0] = zeroVec4s32; + _vecLightDirectionTransformed[1] = zeroVec4s32; + _vecLightDirectionTransformed[2] = zeroVec4s32; + _vecLightDirectionTransformed[3] = zeroVec4s32; - _vecLightDirectionHalfNegative[0] = zeroVec32x4; - _vecLightDirectionHalfNegative[1] = zeroVec32x4; - _vecLightDirectionHalfNegative[2] = zeroVec32x4; - _vecLightDirectionHalfNegative[3] = zeroVec32x4; + _vecLightDirectionHalfNegative[0] = zeroVec4s32; + _vecLightDirectionHalfNegative[1] = zeroVec4s32; + _vecLightDirectionHalfNegative[2] = zeroVec4s32; + _vecLightDirectionHalfNegative[3] = zeroVec4s32; _doesLightHalfVectorNeedUpdate[0] = true; _doesLightHalfVectorNeedUpdate[1] = true; @@ -1239,7 +1232,7 @@ void NDSGeometryEngine::SetNormal(const u32 param) this->_doesTransformedTexCoordsNeedUpdate = true; } - CACHE_ALIGN Vector32x4 normalTransformed = this->_vecNormal; + CACHE_ALIGN Vector4s32 normalTransformed = this->_vecNormal; MatrixMultVec3x3(_mtxCurrent[MATRIXMODE_POSITION_VECTOR], normalTransformed.vec); //apply lighting model @@ -1331,7 +1324,7 @@ void NDSGeometryEngine::SetNormal(const u32 param) } } - const FragmentColor newVtxColor = { + const Color4u8 newVtxColor = { (u8)std::min(31, vertexColor[0]), (u8)std::min(31, vertexColor[1]), (u8)std::min(31, vertexColor[2]), @@ -1379,9 +1372,9 @@ void NDSGeometryEngine::SetVertexColor(const u32 param) } } -void NDSGeometryEngine::SetVertexColor(const FragmentColor vtxColor555X) +void NDSGeometryEngine::SetVertexColor(const Color4u8 vtxColor555X) { - if (this->_vtxColor555X.color != vtxColor555X.color) + if (this->_vtxColor555X.value != vtxColor555X.value) { this->_vtxColor15 = (vtxColor555X.r << 0) | (vtxColor555X.g << 5) | (vtxColor555X.b << 10); this->_vtxColor555X = vtxColor555X; @@ -1411,15 +1404,18 @@ void NDSGeometryEngine::SetTexturePalette(const u32 texPalette) this->_texPalette = texPalette; } -void NDSGeometryEngine::SetTextureCoordinates(const u32 param) +void NDSGeometryEngine::SetTextureCoordinates2s16(const u32 param) { - VertexCoord16x2 inTexCoord16x2; - inTexCoord16x2.value = param; - - this->SetTextureCoordinates(inTexCoord16x2); + Vector2s16 inTexCoord2s16; +#ifndef MSB_FIRST + inTexCoord2s16.value = param; +#else + inTexCoord2s16.value = (param << 16) | (param >> 16); +#endif + this->SetTextureCoordinates2s16(inTexCoord2s16); } -void NDSGeometryEngine::SetTextureCoordinates(const VertexCoord16x2 &texCoord16) +void NDSGeometryEngine::SetTextureCoordinates2s16(const Vector2s16 &texCoord16) { if (this->_texCoord16.value != texCoord16.value) { @@ -1455,83 +1451,90 @@ void NDSGeometryEngine::VertexListEnd() this->_vtxCount = 0; } -bool NDSGeometryEngine::SetCurrentVertex16x2(const u32 param) +bool NDSGeometryEngine::SetCurrentVertexPosition2s16(const u32 param) { - VertexCoord16x2 inVtxCoord16x2; - inVtxCoord16x2.value = param; - - return this->SetCurrentVertex16x2(inVtxCoord16x2); + Vector2s16 inVtxCoord2s16; +#ifndef MSB_FIRST + inVtxCoord2s16.value = param; +#else + inVtxCoord2s16.value = (param >> 16) | (param << 16); +#endif + return this->SetCurrentVertexPosition2s16(inVtxCoord2s16); } -bool NDSGeometryEngine::SetCurrentVertex16x2(const VertexCoord16x2 inVtxCoord16x2) +bool NDSGeometryEngine::SetCurrentVertexPosition2s16(const Vector2s16 inVtxCoord2s16) { if (this->_vtxCoord16CurrentIndex == 0) { - this->SetCurrentVertex16x2Immediate<0, 1>(inVtxCoord16x2); + this->SetCurrentVertexPosition2s16Immediate<0, 1>(inVtxCoord2s16); this->_vtxCoord16CurrentIndex++; return false; } - this->SetCurrentVertex16x2Immediate<2, 3>(inVtxCoord16x2); + this->SetCurrentVertexPosition2s16Immediate<2, 3>(inVtxCoord2s16); this->_vtxCoord16CurrentIndex = 0; return true; } -void NDSGeometryEngine::SetCurrentVertex10x3(const u32 param) +void NDSGeometryEngine::SetCurrentVertexPosition3s10(const u32 param) { - const VertexCoord16x3 inVtxCoord16x3 = { - (s16)( (u16)(((param << 22) >> 22) << 6) ), - (s16)( (u16)(((param << 12) >> 22) << 6) ), - (s16)( (u16)(((param << 2) >> 22) << 6) ) + const Vector3s16 inVtxCoord3s16 = { + (s16)( ((s32)((param << 22) & 0xFFC00000) / (s32)(1 << 22)) * (s32)(1 << 6) ), + (s16)( ((s32)((param << 12) & 0xFFC00000) / (s32)(1 << 22)) * (s32)(1 << 6) ), + (s16)( ((s32)((param << 2) & 0xFFC00000) / (s32)(1 << 22)) * (s32)(1 << 6) ) }; - this->SetCurrentVertex(inVtxCoord16x3); + this->SetCurrentVertexPosition(inVtxCoord3s16); } -void NDSGeometryEngine::SetCurrentVertex(const VertexCoord16x3 inVtxCoord16x3) +void NDSGeometryEngine::SetCurrentVertexPosition(const Vector3s16 inVtxCoord3s16) { - this->_vtxCoord16 = inVtxCoord16x3; + this->_vtxCoord16 = inVtxCoord3s16; } template -void NDSGeometryEngine::SetCurrentVertex16x2Immediate(const u32 param) +void NDSGeometryEngine::SetCurrentVertexPosition2s16Immediate(const u32 param) { - VertexCoord16x2 inVtxCoord16x2; - inVtxCoord16x2.value = param; + Vector2s16 inVtxCoord2s16; +#ifndef MSB_FIRST + inVtxCoord2s16.value = param; +#else + inVtxCoord2s16.value = (param >> 16) | (param << 16); +#endif - this->SetCurrentVertex16x2Immediate(inVtxCoord16x2); + this->SetCurrentVertexPosition2s16Immediate(inVtxCoord2s16); } template -void NDSGeometryEngine::SetCurrentVertex16x2Immediate(const VertexCoord16x2 inVtxCoord16x2) +void NDSGeometryEngine::SetCurrentVertexPosition2s16Immediate(const Vector2s16 inVtxCoord2s16) { if (ONE < 3) { - this->_vtxCoord16.coord[ONE] = inVtxCoord16x2.coord[0]; + this->_vtxCoord16.coord[ONE] = inVtxCoord2s16.coord[0]; } if (TWO < 3) { - this->_vtxCoord16.coord[TWO] = inVtxCoord16x2.coord[1]; + this->_vtxCoord16.coord[TWO] = inVtxCoord2s16.coord[1]; } } -void NDSGeometryEngine::SetCurrentVertex10x3Relative(const u32 param) +void NDSGeometryEngine::SetCurrentVertexPosition3s10Relative(const u32 param) { - const VertexCoord16x3 inVtxCoord16x3 = { + const Vector3s16 inVtxCoord3s16 = { (s16)( (s32)((param << 22) & 0xFFC00000) / (s32)(1 << 22) ), (s16)( (s32)((param << 12) & 0xFFC00000) / (s32)(1 << 22) ), (s16)( (s32)((param << 2) & 0xFFC00000) / (s32)(1 << 22) ) }; - this->SetCurrentVertexRelative(inVtxCoord16x3); + this->SetCurrentVertexPositionRelative(inVtxCoord3s16); } -void NDSGeometryEngine::SetCurrentVertexRelative(const VertexCoord16x3 inVtxCoord16x3) +void NDSGeometryEngine::SetCurrentVertexPositionRelative(const Vector3s16 inVtxCoord3s16) { - this->_vtxCoord16.x += inVtxCoord16x3.x; - this->_vtxCoord16.y += inVtxCoord16x3.y; - this->_vtxCoord16.z += inVtxCoord16x3.z; + this->_vtxCoord16.x += inVtxCoord3s16.x; + this->_vtxCoord16.y += inVtxCoord3s16.y; + this->_vtxCoord16.z += inVtxCoord3s16.z; } //Submit a vertex to the GE @@ -1548,7 +1551,7 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList) return; } - CACHE_ALIGN VertexCoord32x4 vtxCoordTransformed = { + CACHE_ALIGN Vector4s32 vtxCoordTransformed = { (s32)this->_vtxCoord16.x, (s32)this->_vtxCoord16.y, (s32)this->_vtxCoord16.z, @@ -1588,10 +1591,6 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList) this->_vtxColor666X.g = GFX3D_5TO6_LOOKUP(this->_vtxColor555X.g); this->_vtxColor666X.b = GFX3D_5TO6_LOOKUP(this->_vtxColor555X.b); this->_vtxColor666X.a = 0; - this->_vtxColorFloat[0] = (float)this->_vtxColor666X.r; - this->_vtxColorFloat[1] = (float)this->_vtxColor666X.g; - this->_vtxColorFloat[2] = (float)this->_vtxColor666X.b; - this->_vtxColorFloat[3] = (float)this->_vtxColor666X.a; this->_doesVertexColorNeedUpdate = false; } @@ -1629,8 +1628,6 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList) break; } - this->_texCoordTransformedFloat[0] = (float)this->_texCoordTransformed.s / 16.0f; - this->_texCoordTransformedFloat[1] = (float)this->_texCoordTransformed.t / 16.0f; this->_doesTransformedTexCoordsNeedUpdate = false; } @@ -1653,24 +1650,6 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList) vtx.texCoord = this->_texCoordTransformed; vtx.color = this->_vtxColor666X; - VERT &vert = targetGList.rawVertList[vertIndex]; - vert.coord[0] = (float)vtxCoordTransformed.x / 4096.0f; - vert.coord[1] = (float)vtxCoordTransformed.y / 4096.0f; - vert.coord[2] = (float)vtxCoordTransformed.z / 4096.0f; - vert.coord[3] = (float)vtxCoordTransformed.w / 4096.0f; - vert.texcoord[0] = this->_texCoordTransformedFloat[0]; - vert.texcoord[1] = this->_texCoordTransformedFloat[1]; - vert.texcoord[2] = 0.0f; - vert.texcoord[3] = 0.0f; - vert.fcolor[0] = this->_vtxColorFloat[0]; - vert.fcolor[1] = this->_vtxColorFloat[1]; - vert.fcolor[2] = this->_vtxColorFloat[2]; - vert.fcolor[3] = this->_vtxColorFloat[3]; - vert.color[0] = this->_vtxColor666X.r; - vert.color[1] = this->_vtxColor666X.g; - vert.color[2] = this->_vtxColor666X.b; - vert.color[3] = this->_vtxColor666X.a; - this->_vtxIndex[this->_vtxCount] = (u16)(targetGList.rawVertCount + this->_vtxCount - continuation); this->_vtxCount++; @@ -1876,7 +1855,7 @@ void NDSGeometryEngine::BoxTest() const s32 z_d = (s32)( (s16)((uz+ud) & 0xFFFF) ); //eight corners of cube - CACHE_ALIGN VertexCoord32x4 vtxPosition[8] = { + CACHE_ALIGN Vector4s32 vtxPosition[8] = { { __x, __y, __z, fixedOne }, { x_w, __y, __z, fixedOne }, { x_w, y_h, __z, fixedOne }, @@ -2029,7 +2008,7 @@ void NDSGeometryEngine::VectorTest(const u32 param) // Bits 30-31: Ignored // Convert the coordinates to 20.12 fixed-point format for our vector-matrix multiplication. - CACHE_ALIGN Vector32x4 testVec = { + CACHE_ALIGN Vector4s32 testVec = { ( (s32)((param << 22) & 0xFFC00000) / (s32)(1 << 19) ) | (s32)((param & 0x000001C0) >> 6), ( (s32)((param << 12) & 0xFFC00000) / (s32)(1 << 19) ) | (s32)((param & 0x00007000) >> 16), ( (s32)((param << 2) & 0xFFC00000) / (s32)(1 << 19) ) | (s32)((param & 0x01C00000) >> 26), @@ -2043,7 +2022,7 @@ void NDSGeometryEngine::VectorTest(const u32 param) // greater than or equal to 1.0f (or 4096 in fixed-point). All of this means that for all // values >= 1.0f or < -1.0f will result in the sign bits becoming 1111b; otherwise, the sign // bits will become 0000b. - const Vector16x3 resultVec = { + const Vector3s16 resultVec = { (s16)( ((testVec.x > 0) && (testVec.x < 4096)) ? ((s16)testVec.x & 0x0FFF) : ((s16)testVec.x | 0xF000) ), (s16)( ((testVec.y > 0) && (testVec.y < 4096)) ? ((s16)testVec.y & 0x0FFF) : ((s16)testVec.y | 0xF000) ), (s16)( ((testVec.z > 0) && (testVec.z < 4096)) ? ((s16)testVec.z & 0x0FFF) : ((s16)testVec.z | 0xF000) ) @@ -2157,9 +2136,9 @@ void NDSGeometryEngine::MatrixCopyToStack(const MatrixMode whichMatrix, const si void NDSGeometryEngine::UpdateLightDirectionHalfAngleVector(const size_t index) { - static const CACHE_ALIGN Vector32x4 lineOfSight = {0, 0, (s32)0xFFFFF000, 0}; + static const CACHE_ALIGN Vector4s32 lineOfSight = {0, 0, (s32)0xFFFFF000, 0}; - Vector32x4 half = { + Vector4s32 half = { this->_vecLightDirectionTransformed[index].x + lineOfSight.x, this->_vecLightDirectionTransformed[index].y + lineOfSight.y, this->_vecLightDirectionTransformed[index].z + lineOfSight.z, @@ -2263,9 +2242,9 @@ void NDSGeometryEngine::SaveState_LegacyFormat(GeometryEngineLegacySave &outLega outLegacySave.mtxMultiply4x3TempIndex = this->_mtxMultiply4x3TempIndex; outLegacySave.mtxMultiply3x3TempIndex = this->_mtxMultiply3x3TempIndex; - outLegacySave.vtxCoord.vec3 = this->_vtxCoord16; - outLegacySave.vtxCoord.coord[3] = 0; - outLegacySave.vtxCoord16CurrentIndex = this->_vtxCoord16CurrentIndex; + outLegacySave.vtxPosition.vec3 = this->_vtxCoord16; + outLegacySave.vtxPosition.coord[3] = 0; + outLegacySave.vtxPosition16CurrentIndex = this->_vtxCoord16CurrentIndex; outLegacySave.vtxFormat = (u32)this->_vtxFormat; outLegacySave.vecTranslate = this->_vecTranslate; @@ -2342,8 +2321,8 @@ void NDSGeometryEngine::LoadState_LegacyFormat(const GeometryEngineLegacySave &i this->_mtxMultiply4x3TempIndex = inLegacySave.mtxMultiply4x3TempIndex; this->_mtxMultiply3x3TempIndex = inLegacySave.mtxMultiply3x3TempIndex; - this->_vtxCoord16 = inLegacySave.vtxCoord.vec3; - this->_vtxCoord16CurrentIndex = inLegacySave.vtxCoord16CurrentIndex; + this->_vtxCoord16 = inLegacySave.vtxPosition.vec3; + this->_vtxCoord16CurrentIndex = inLegacySave.vtxPosition16CurrentIndex; this->_vtxFormat = (PolygonPrimitiveType)inLegacySave.vtxFormat; this->_vecTranslate = inLegacySave.vecTranslate; @@ -2673,13 +2652,13 @@ static void gfx3d_glNormal(const u32 param) static void gfx3d_glTexCoord(const u32 param) { - _gEngine.SetTextureCoordinates(param); + _gEngine.SetTextureCoordinates2s16(param); GFX_DELAY(1); } static void gfx3d_glVertex16b(const u32 param) { - const bool isVtxComplete = _gEngine.SetCurrentVertex16x2(param); + const bool isVtxComplete = _gEngine.SetCurrentVertexPosition2s16(param); if (isVtxComplete) { _gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]); @@ -2689,7 +2668,7 @@ static void gfx3d_glVertex16b(const u32 param) static void gfx3d_glVertex10b(const u32 param) { - _gEngine.SetCurrentVertex10x3(param); + _gEngine.SetCurrentVertexPosition3s10(param); _gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]); GFX_DELAY(8); } @@ -2697,14 +2676,14 @@ static void gfx3d_glVertex10b(const u32 param) template static void gfx3d_glVertex3_cord(const u32 param) { - _gEngine.SetCurrentVertex16x2Immediate(param); + _gEngine.SetCurrentVertexPosition2s16Immediate(param); _gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]); GFX_DELAY(8); } static void gfx3d_glVertex_rel(const u32 param) { - _gEngine.SetCurrentVertex10x3Relative(param); + _gEngine.SetCurrentVertexPosition3s10Relative(param); _gEngine.AddCurrentVertexToList(gfx3d.gList[gfx3d.pendingListIndex]); GFX_DELAY(8); } @@ -3316,7 +3295,7 @@ size_t gfx3d_PerformClipping(const GFX3D_GeometryList &gList, CPoly *outCPolyUns { NDSVertex &vtx = cPoly.clipVtxFixed[j]; VERT &vert = cPoly.clipVerts[j]; - VertexCoord64x4 vtx64 = { + Vector4s64 vtx64 = { (s64)vtx.position.x, (s64)vtx.position.y, (s64)vtx.position.z, @@ -3398,7 +3377,7 @@ size_t gfx3d_PerformClipping(const GFX3D_GeometryList &gList, CPoly *outCPolyUns vert.v = (float)(vtx.texCoord.v / 16); } - vert.color32 = vtx.color.color; + vert.color32 = vtx.color.value; } } @@ -3812,8 +3791,8 @@ SFORMAT SF_GFX3D[] = { { "MM4I", 1, 1, &gfx3d.gEngineLegacySave.mtxMultiply4x4TempIndex}, { "MM3I", 1, 1, &gfx3d.gEngineLegacySave.mtxMultiply4x3TempIndex}, { "MMxI", 1, 1, &gfx3d.gEngineLegacySave.mtxMultiply3x3TempIndex}, - { "GSCO", 2, 4, &gfx3d.gEngineLegacySave.vtxCoord}, - { "GCOI", 1, 1, &gfx3d.gEngineLegacySave.vtxCoord16CurrentIndex}, + { "GSCO", 2, 4, &gfx3d.gEngineLegacySave.vtxPosition}, + { "GCOI", 1, 1, &gfx3d.gEngineLegacySave.vtxPosition16CurrentIndex}, { "GVFM", 4, 1, &gfx3d.gEngineLegacySave.vtxFormat}, { "GTRN", 4, 4, &gfx3d.gEngineLegacySave.vecTranslate}, { "GTRI", 1, 1, &gfx3d.gEngineLegacySave.vecTranslateCurrentIndex}, @@ -3929,8 +3908,8 @@ void gfx3d_PrepareSaveStateBufferWrite() } else // Framebuffer is at a custom size { - const FragmentColor *__restrict src = CurrentRenderer->GetFramebuffer(); - FragmentColor *__restrict dst = gfx3d.framebufferNativeSave; + const Color4u8 *__restrict src = (Color4u8 *)CurrentRenderer->GetFramebuffer(); + Color4u8 *__restrict dst = gfx3d.framebufferNativeSave; for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) { @@ -4101,7 +4080,7 @@ void gfx3d_FinishLoadStateBufferRead() switch (deviceInfo.renderID) { case RENDERID_NULL: - memset(CurrentRenderer->GetFramebuffer(), 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(FragmentColor)); + memset(CurrentRenderer->GetFramebuffer(), 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(Color4u8)); break; case RENDERID_SOFTRASTERIZER: @@ -4127,8 +4106,8 @@ void gfx3d_FinishLoadStateBufferRead() ColorspaceConvertBuffer8888To6665((u32 *)gfx3d.framebufferNativeSave, (u32 *)gfx3d.framebufferNativeSave, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); } - const FragmentColor *__restrict src = gfx3d.framebufferNativeSave; - FragmentColor *__restrict dst = CurrentRenderer->GetFramebuffer(); + const Color4u8 *__restrict src = gfx3d.framebufferNativeSave; + Color4u8 *__restrict dst = (Color4u8 *)CurrentRenderer->GetFramebuffer(); for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) { diff --git a/desmume/src/gfx3d.h b/desmume/src/gfx3d.h index 72a2cf98a..6a1409b33 100644 --- a/desmume/src/gfx3d.h +++ b/desmume/src/gfx3d.h @@ -178,14 +178,14 @@ typedef union u32 :24; // 8-31: Unused bits #else + u32 :24; // 8-31: Unused bits + u8 :6; // 2- 7: Unused bits u8 MtxMode:2; // 0- 1: Set matrix mode; // 0=Projection // 1=Position // 2=Position+Vector // 3=Texture - - u32 :24; // 8-31: Unused bits #endif } IOREG_MTX_MODE; // 0x04000440: MTX_MODE command port @@ -361,11 +361,11 @@ typedef union u32 :24; // 8-31: Unused bits #else + u32 :24; // 8-31: Unused bits + u8 :6; // 2- 7: Unused bits u8 DepthMode:1; // 1: Depth buffering select; 0=Z 1=W u8 YSortMode:1; // 0: Translucent polygon Y-sorting mode; 0=Auto-sort, 1=Manual-sort - - u32 :24; // 8-31: Unused bits #endif }; } IOREG_SWAP_BUFFERS; // 0x04000540: SWAP_BUFFERS command port @@ -378,10 +378,18 @@ typedef union { // Coordinate (0,0) represents the bottom-left of the screen. // Coordinate (255,191) represents the top-right of the screen. + +#ifndef MSB_FIRST u8 X1; // 0- 7: First X-coordinate; 0...255 u8 Y1; // 8-15: First Y-coordinate; 0...191 u8 X2; // 16-23: Second X-coordinate; 0...255 u8 Y2; // 24-31: Second Y-coordinate; 0...191 +#else + u8 Y2; // 24-31: Second Y-coordinate; 0...191 + u8 X2; // 16-23: Second X-coordinate; 0...255 + u8 Y1; // 8-15: First Y-coordinate; 0...191 + u8 X1; // 0- 7: First X-coordinate; 0...255 +#endif }; } IOREG_VIEWPORT; // 0x04000580: VIEWPORT command port @@ -391,6 +399,7 @@ typedef union struct { +#ifndef MSB_FIRST u8 TestBusy:1; u8 BoxTestResult:1; u8 :6; @@ -406,6 +415,23 @@ typedef union u8 EngineBusy:1; u8 :2; u8 CommandListIRQ:2; +#else + u8 :6; + u8 BoxTestResult:1; + u8 TestBusy:1; + + u8 AckMtxStackError:1; + u8 MtxStackBusy:1; + u8 ProjMtxStackLevel:1; + u8 PosVecMtxStackLevel:5; + + u8 CommandListIRQ:2; + u8 :2; + u8 EngineBusy:1; + u8 CommandListEmpty:1; + u8 CommandListLessThanHalf:1; + u16 CommandListCount:9; +#endif }; } IOREG_GXSTAT; // 0x04000600: Geometry engine status @@ -588,215 +614,22 @@ typedef struct VERT VERT; #include "PACKED_END.h" -union Vector16x2 -{ - s16 vec[2]; - s16 coord[2]; - - struct - { - s16 s, t; - }; - - struct - { - s16 u, v; - }; - - struct - { - s16 x, y; - } XY; - - struct - { - s16 y, z; - } YZ; - - struct - { - s16 x, z; - } XZ; - - u32 value; -}; -typedef union Vector16x2 Vector16x2; -typedef Vector16x2 VertexCoord16x2; - -union Vector16x3 -{ - s16 vec[3]; - s16 coord[3]; - - struct - { - s16 x, y, z; - }; -}; -typedef union Vector16x3 Vector16x3; -typedef Vector16x3 VertexCoord16x3; - -union Vector16x4 -{ - s16 vec[4]; - s16 coord[4]; - - struct - { - s16 x, y, z, w; - }; - - struct - { - Vector16x3 vec3; - s16 :16; - }; - - u64 value; -}; -typedef union Vector16x4 Vector16x4; -typedef Vector16x4 VertexCoord16x4; - -union Vector32x2 -{ - s32 vec[2]; - s32 coord[2]; - - struct - { - s32 s, t; - }; - - struct - { - s32 u, v; - }; - - struct - { - s32 x, y; - } XY; - - struct - { - s32 y, z; - } YZ; - - struct - { - s32 x, z; - } XZ; - - u64 value; -}; -typedef union Vector32x2 Vector32x2; -typedef Vector32x2 VertexCoord32x2; - -union Vector32x3 -{ - s32 vec[3]; - s32 coord[3]; - - struct - { - s32 x, y, z; - }; -}; -typedef union Vector32x3 Vector32x3; -typedef Vector32x3 VertexCoord32x3; - -union Vector32x4 -{ - s32 vec[4]; - s32 coord[4]; - - struct - { - s32 x, y, z, w; - }; - - struct - { - Vector32x3 vec3; - s32 :32; - }; -}; -typedef union Vector32x4 Vector32x4; -typedef Vector32x4 VertexCoord32x4; - -union Vector64x2 -{ - s64 vec[2]; - s64 coord[2]; - - struct - { - s64 s, t; - }; - - struct - { - s64 u, v; - }; - - struct - { - s64 x, y; - } XY; - - struct - { - s64 y, z; - } YZ; - - struct - { - s64 x, z; - } XZ; -}; -typedef union Vector64x2 Vector64x2; -typedef Vector64x2 VertexCoord64x2; - -union Vector64x3 -{ - s64 vec[3]; - s64 coord[3]; - - struct - { - s64 x, y, z; - }; -}; -typedef union Vector64x3 Vector64x3; -typedef Vector64x3 VertexCoord64x3; - -union Vector64x4 -{ - s64 vec[4]; - s64 coord[4]; - - struct - { - s64 x, y, z, w; - }; - - struct - { - Vector64x3 vec3; - s64 :64; - }; -}; -typedef union Vector64x4 Vector64x4; -typedef Vector64x4 VertexCoord64x4; - struct NDSVertex { - VertexCoord32x4 position; - VertexCoord32x2 texCoord; - FragmentColor color; + Vector4s32 position; + Vector2s32 texCoord; + Color4u8 color; }; typedef struct NDSVertex NDSVertex; +struct NDSVertexf +{ + Vector4f32 position; + Vector2f32 texCoord; + Color4f32 color; +}; +typedef struct NDSVertexf NDSVertexf; + //ok, imagine the plane that cuts diagonally across a cube such that it clips //out to be a hexagon. within that plane, draw a quad such that it cuts off //four corners of the hexagon, and you will observe a decagon @@ -842,7 +675,6 @@ typedef struct GFX3D_State GFX3D_State; struct GFX3D_GeometryList { - PAGE_ALIGN VERT rawVertList[VERTLIST_SIZE]; PAGE_ALIGN NDSVertex rawVtxList[VERTLIST_SIZE]; PAGE_ALIGN POLY rawPolyList[POLYLIST_SIZE]; PAGE_ALIGN CPoly clippedPolyList[CLIPPED_POLYLIST_SIZE]; @@ -900,13 +732,13 @@ struct GeometryEngineLegacySave u8 mtxMultiply4x3TempIndex; u8 mtxMultiply3x3TempIndex; - VertexCoord16x4 vtxCoord; - u8 vtxCoord16CurrentIndex; + Vector4s16 vtxPosition; + u8 vtxPosition16CurrentIndex; u32 vtxFormat; - Vector32x4 vecTranslate; + Vector4s32 vecTranslate; u8 vecTranslateCurrentIndex; - Vector32x4 vecScale; + Vector4s32 vecScale; u8 vecScaleCurrentIndex; u32 texCoordT; @@ -919,7 +751,7 @@ struct GeometryEngineLegacySave float positionTestVtxFloat[4]; // Historically, the position test vertices were stored as floating point values, not as integers. u16 boxTestCoord16[6]; - FragmentColor vtxColor; + Color4u8 vtxColor; u32 regLightColor[4]; u32 regLightDirection[4]; @@ -991,7 +823,7 @@ struct GFX3D // Everything below is for save state compatibility. GFX3D_LegacySave legacySave; GeometryEngineLegacySave gEngineLegacySave; - PAGE_ALIGN FragmentColor framebufferNativeSave[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; // Rendered 3D framebuffer that is saved in RGBA8888 color format at the native size. + PAGE_ALIGN Color4u8 framebufferNativeSave[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; // Rendered 3D framebuffer that is saved in RGBA8888 color format at the native size. }; typedef struct GFX3D GFX3D; @@ -1007,8 +839,8 @@ protected: CACHE_ALIGN NDSMatrix _tempMtxMultiply4x4; CACHE_ALIGN NDSMatrix _tempMtxMultiply4x3; CACHE_ALIGN NDSMatrix _tempMtxMultiply3x3; - CACHE_ALIGN Vector32x4 _vecTranslate; - CACHE_ALIGN Vector32x4 _vecScale; + CACHE_ALIGN Vector4s32 _vecTranslate; + CACHE_ALIGN Vector4s32 _vecScale; // Matrix stack handling CACHE_ALIGN NDSMatrixStack1 _mtxStackProjection; @@ -1016,10 +848,10 @@ protected: CACHE_ALIGN NDSMatrixStack32 _mtxStackPositionVector; CACHE_ALIGN NDSMatrixStack1 _mtxStackTexture; - CACHE_ALIGN Vector32x4 _vecNormal; - CACHE_ALIGN VertexCoord16x3 _vtxCoord16; - CACHE_ALIGN VertexCoord16x2 _texCoord16; - CACHE_ALIGN VertexCoord32x2 _texCoordTransformed; + CACHE_ALIGN Vector4s32 _vecNormal; + CACHE_ALIGN Vector3s16 _vtxCoord16; + CACHE_ALIGN Vector2s16 _texCoord16; + CACHE_ALIGN Vector2s32 _texCoordTransformed; CACHE_ALIGN u8 _shininessTablePending[128]; CACHE_ALIGN u8 _shininessTableApplied[128]; @@ -1035,10 +867,8 @@ protected: u8 _vecTranslateCurrentIndex; u32 _vtxColor15; - FragmentColor _vtxColor555X; - FragmentColor _vtxColor666X; - float _vtxColorFloat[4]; - float _texCoordTransformedFloat[2]; + Color4u8 _vtxColor555X; + Color4u8 _vtxColor666X; bool _doesViewportNeedUpdate; bool _doesVertexColorNeedUpdate; @@ -1062,7 +892,7 @@ protected: u8 _boxTestCoordCurrentIndex; u8 _positionTestCoordCurrentIndex; CACHE_ALIGN u16 _boxTestCoord16[6]; - CACHE_ALIGN VertexCoord32x4 _positionTestVtx32; + CACHE_ALIGN Vector4s32 _positionTestVtx32; u32 _regLightColor[4]; u32 _regLightDirection[4]; @@ -1072,8 +902,8 @@ protected: u16 _regEmission; u8 _shininessTablePendingIndex; - CACHE_ALIGN Vector32x4 _vecLightDirectionTransformed[4]; - CACHE_ALIGN Vector32x4 _vecLightDirectionHalfNegative[4]; + CACHE_ALIGN Vector4s32 _vecLightDirectionTransformed[4]; + CACHE_ALIGN Vector4s32 _vecLightDirectionHalfNegative[4]; bool _doesLightHalfVectorNeedUpdate[4]; // This enum serves no real functional purpose except to be used for save state compatibility. @@ -1131,24 +961,24 @@ public: void SetViewport(const IOREG_VIEWPORT regViewport); void SetViewport(const GFX3D_Viewport viewport); void SetVertexColor(const u32 param); - void SetVertexColor(const FragmentColor vtxColor555X); + void SetVertexColor(const Color4u8 vtxColor555X); void SetTextureParameters(const u32 param); void SetTextureParameters(const TEXIMAGE_PARAM texParams); void SetTexturePalette(const u32 texPalette); - void SetTextureCoordinates(const u32 param); - void SetTextureCoordinates(const VertexCoord16x2 &texCoord16); + void SetTextureCoordinates2s16(const u32 param); + void SetTextureCoordinates2s16(const Vector2s16 &texCoord16); void VertexListBegin(const u32 param, const POLYGON_ATTR polyAttr); void VertexListBegin(const PolygonPrimitiveType vtxFormat, const POLYGON_ATTR polyAttr); void VertexListEnd(); - bool SetCurrentVertex16x2(const u32 param); - bool SetCurrentVertex16x2(const VertexCoord16x2 inVtxCoord16x2); - void SetCurrentVertex10x3(const u32 param); - void SetCurrentVertex(const VertexCoord16x3 inVtxCoord16x3); - template void SetCurrentVertex16x2Immediate(const u32 param); - template void SetCurrentVertex16x2Immediate(const VertexCoord16x2 inVtxCoord16x2); - void SetCurrentVertex10x3Relative(const u32 param); - void SetCurrentVertexRelative(const VertexCoord16x3 inVtxCoord16x3); + bool SetCurrentVertexPosition2s16(const u32 param); + bool SetCurrentVertexPosition2s16(const Vector2s16 inVtxCoord16x2); + void SetCurrentVertexPosition3s10(const u32 param); + void SetCurrentVertexPosition(const Vector3s16 inVtxCoord16x3); + template void SetCurrentVertexPosition2s16Immediate(const u32 param); + template void SetCurrentVertexPosition2s16Immediate(const Vector2s16 inVtxCoord16x2); + void SetCurrentVertexPosition3s10Relative(const u32 param); + void SetCurrentVertexPositionRelative(const Vector3s16 inVtxCoord16x3); void AddCurrentVertexToList(GFX3D_GeometryList &targetGList); void GeneratePolygon(POLY &targetPoly, GFX3D_GeometryList &targetGList); diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index fc3a34f63..87ecac346 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -313,7 +313,7 @@ FORCEINLINE int edge_fx_fl::Step() { -static FORCEINLINE void alphaBlend(const bool isAlphaBlendingEnabled, const FragmentColor inSrc, FragmentColor &outDst) +static FORCEINLINE void alphaBlend(const bool isAlphaBlendingEnabled, const Color4u8 inSrc, Color4u8 &outDst) { if (inSrc.a == 0) { @@ -335,7 +335,7 @@ static FORCEINLINE void alphaBlend(const bool isAlphaBlendingEnabled, const Frag } } -static FORCEINLINE void EdgeBlend(FragmentColor &dst, const FragmentColor src) +static FORCEINLINE void EdgeBlend(Color4u8 &dst, const Color4u8 src) { if (src.a == 31 || dst.a == 0) { @@ -372,7 +372,7 @@ Render3DError RasterizerUnit::_SetupTexture(const POLY &thePoly, size_ } template -FORCEINLINE FragmentColor RasterizerUnit::_sample(const float u, const float v) +FORCEINLINE Color4u8 RasterizerUnit::_sample(const float u, const float v) { //finally, we can use floor here. but, it is slower than we want. //the best solution is probably to wait until the pipeline is full of fixed point @@ -395,8 +395,8 @@ FORCEINLINE FragmentColor RasterizerUnit::_sample(const float u, const const u32 *textureData = this->_currentTexture->GetRenderData(); this->_currentTexture->GetRenderSamplerCoordinates(this->_textureWrapMode, iu, iv); - FragmentColor color; - color.color = textureData[( iv << this->_currentTexture->GetRenderWidthShift() ) + iu]; + Color4u8 color; + color.value = textureData[( iv << this->_currentTexture->GetRenderWidthShift() ) + iu]; return color; } @@ -416,7 +416,7 @@ FORCEINLINE float RasterizerUnit::_round_s(double val) } template template -FORCEINLINE void RasterizerUnit::_shade(const PolygonMode polygonMode, const FragmentColor src, FragmentColor &dst, const float texCoordU, const float texCoordV) +FORCEINLINE void RasterizerUnit::_shade(const PolygonMode polygonMode, const Color4u8 src, Color4u8 &dst, const float texCoordU, const float texCoordV) { if (ISSHADOWPOLYGON) { @@ -426,8 +426,8 @@ FORCEINLINE void RasterizerUnit::_shade(const PolygonMode polygonMode, const GFX3D_State &renderState = *this->_softRender->currentRenderState; - static const FragmentColor colorWhite = MakeFragmentColor(0x3F, 0x3F, 0x3F, 0x1F); - const FragmentColor mainTexColor = (this->_currentTexture->IsSamplingEnabled()) ? this->_sample(texCoordU, texCoordV) : colorWhite; + static const Color4u8 colorWhite = { 0x3F, 0x3F, 0x3F, 0x1F }; + const Color4u8 mainTexColor = (this->_currentTexture->IsSamplingEnabled()) ? this->_sample(texCoordU, texCoordV) : colorWhite; switch (polygonMode) { @@ -468,7 +468,7 @@ FORCEINLINE void RasterizerUnit::_shade(const PolygonMode polygonMode, case POLYGON_MODE_TOONHIGHLIGHT: { - const FragmentColor toonColor = this->_softRender->toonColor32LUT[src.r >> 1]; + const Color4u8 toonColor = this->_softRender->toonColor32LUT[src.r >> 1]; if (renderState.DISP3DCNT.PolygonShading == PolygonShadingMode_Highlight) { @@ -502,11 +502,11 @@ FORCEINLINE void RasterizerUnit::_shade(const PolygonMode polygonMode, } template template -FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float z, float w) +FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, float r, float g, float b, float invu, float invv, float z, float w) { const GFX3D_State &renderState = *this->_softRender->currentRenderState; - FragmentColor newDstColor32; - FragmentColor shaderOutput; + Color4u8 newDstColor32; + Color4u8 shaderOutput; bool isOpaquePixel; u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex]; @@ -609,10 +609,10 @@ FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, c //this is a HACK: //we are being very sloppy with our interpolation precision right now //and rather than fix it, i just want to clamp it - newDstColor32 = MakeFragmentColor(max(0x00, min(0x3F, u32floor(r))), - max(0x00, min(0x3F, u32floor(g))), - max(0x00, min(0x3F, u32floor(b))), - polyAttr.Alpha); + newDstColor32.r = max(0x00, min(0x3F, u32floor(r))); + newDstColor32.g = max(0x00, min(0x3F, u32floor(g))); + newDstColor32.b = max(0x00, min(0x3F, u32floor(b))); + newDstColor32.a = polyAttr.Alpha; //pixel shader this->_shade((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w); @@ -659,7 +659,7 @@ FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, c //draws a single scanline template template -FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight) +FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight) { const int XStart = pLeft->X; int width = pRight->X - XStart; @@ -780,11 +780,11 @@ FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR poly #ifdef ENABLE_SSE2 template template -FORCEINLINE void RasterizerUnit::_pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w) +FORCEINLINE void RasterizerUnit::_pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w) { const GFX3D_State &renderState = *this->_softRender->currentRenderState; - FragmentColor newDstColor32; - FragmentColor shaderOutput; + Color4u8 newDstColor32; + Color4u8 shaderOutput; bool isOpaquePixel; u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex]; @@ -889,7 +889,7 @@ FORCEINLINE void RasterizerUnit::_pixel_SSE2(const POLYGON_ATTR polyAt cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128()); cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128()); - newDstColor32.color = _mm_cvtsi128_si32(cvtColor32); + newDstColor32.value = _mm_cvtsi128_si32(cvtColor32); //pixel shader this->_shade((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w); @@ -936,7 +936,7 @@ FORCEINLINE void RasterizerUnit::_pixel_SSE2(const POLYGON_ATTR polyAt //draws a single scanline template template -FORCEINLINE void RasterizerUnit::_drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight) +FORCEINLINE void RasterizerUnit::_drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight) { const int XStart = pLeft->X; int width = pRight->X - XStart; @@ -1031,7 +1031,7 @@ FORCEINLINE void RasterizerUnit::_drawscanline_SSE2(const POLYGON_ATTR //runs several scanlines, until an edge is finished template template -void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right) +void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right) { //oh lord, hack city for edge drawing @@ -1065,9 +1065,9 @@ void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const #endif } - const int xl = left->X; - const int xr = right->X; - const int y = left->Y; + const size_t xl = left->X; + const size_t xr = right->X; + const size_t y = left->Y; left->Step(); right->Step(); @@ -1080,15 +1080,15 @@ void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const { if (draw) { - int nxl = left->X; - int nxr = right->X; + const size_t nxl = left->X; + const size_t nxr = right->X; if (top) { - int xs = min(xl,xr); - int xe = max(xl,xr); - for (int x = xs; x <= xe; x++) + const size_t xs = min(xl, xr); + const size_t xe = max(xl, xr); + for (size_t x = xs; x <= xe; x++) { - int adr = (y*framebufferWidth)+x; + const size_t adr = (y * framebufferWidth) + x; dstColor[adr].r = 63; dstColor[adr].g = 0; dstColor[adr].b = 0; @@ -1096,11 +1096,11 @@ void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const } else if (bottom) { - int xs = min(xl,xr); - int xe = max(xl,xr); - for (int x = xs; x <= xe; x++) + const size_t xs = min(xl, xr); + const size_t xe = max(xl, xr); + for (size_t x = xs; x <= xe; x++) { - int adr = (y*framebufferWidth)+x; + const size_t adr = (y * framebufferWidth) + x; dstColor[adr].r = 63; dstColor[adr].g = 0; dstColor[adr].b = 0; @@ -1108,20 +1108,21 @@ void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const } else { - int xs = min(xl,nxl); - int xe = max(xl,nxl); - for (int x = xs; x <= xe; x++) + size_t xs = min(xl, nxl); + size_t xe = max(xl, nxl); + for (size_t x = xs; x <= xe; x++) { - int adr = (y*framebufferWidth)+x; + const size_t adr = (y * framebufferWidth) + x; dstColor[adr].r = 63; dstColor[adr].g = 0; dstColor[adr].b = 0; } - xs = min(xr,nxr); - xe = max(xr,nxr); - for (int x = xs; x <= xe; x++) + + xs = min(xr, nxr); + xe = max(xr, nxr); + for (size_t x = xs; x <= xe; x++) { - int adr = (y*framebufferWidth)+x; + const size_t adr = (y * framebufferWidth) + x; dstColor[adr].r = 63; dstColor[adr].g = 0; dstColor[adr].b = 0; @@ -1190,7 +1191,7 @@ void RasterizerUnit::_sort_verts() //I didnt reference anything for this algorithm but it seems like I've seen it somewhere before. //Maybe it is like crow's algorithm template template -void RasterizerUnit::_shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type) +void RasterizerUnit::_shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type) { bool failure = false; @@ -1272,7 +1273,7 @@ FORCEINLINE void RasterizerUnit::Render() return; } - FragmentColor *dstColor = this->_softRender->GetFramebuffer(); + Color4u8 *dstColor = this->_softRender->GetFramebuffer(); const size_t dstWidth = this->_softRender->GetFramebufferWidth(); const size_t dstHeight = this->_softRender->GetFramebufferHeight(); @@ -1737,7 +1738,7 @@ SoftRasterizerRenderer::SoftRasterizerRenderer() _enableLineHack = CommonSettings.GFX3D_LineHack; _enableFragmentSamplingHack = CommonSettings.GFX3D_TXTHack; - _HACK_viewer_rasterizerUnit.SetSLI(0, _framebufferHeight, false); + _HACK_viewer_rasterizerUnit.SetSLI(0, (u32)_framebufferHeight, false); const size_t coreCount = CommonSettings.num_cores; _threadCount = coreCount; @@ -1768,7 +1769,7 @@ SoftRasterizerRenderer::SoftRasterizerRenderer() _threadClearParam[0].startPixel = 0; _threadClearParam[0].endPixel = _framebufferPixCount; - _rasterizerUnit[0].SetSLI(_threadPostprocessParam[0].startLine, _threadPostprocessParam[0].endLine, false); + _rasterizerUnit[0].SetSLI((u32)_threadPostprocessParam[0].startLine, (u32)_threadPostprocessParam[0].endLine, false); _rasterizerUnit[0].SetRenderer(this); } else @@ -1794,7 +1795,7 @@ SoftRasterizerRenderer::SoftRasterizerRenderer() _threadClearParam[i].startPixel = i * _customPixelsPerThread; _threadClearParam[i].endPixel = (i < _threadCount - 1) ? (i + 1) * _customPixelsPerThread : _framebufferPixCount; - _rasterizerUnit[i].SetSLI(_threadPostprocessParam[i].startLine, _threadPostprocessParam[i].endLine, false); + _rasterizerUnit[i].SetSLI((u32)_threadPostprocessParam[i].startLine, (u32)_threadPostprocessParam[i].endLine, false); _rasterizerUnit[i].SetRenderer(this); char name[16]; @@ -1988,7 +1989,7 @@ void SoftRasterizerRenderer::_UpdateEdgeMarkColorTable(const u16 *edgeMarkColorT //we can do this by rendering a 3d frame and then freezing the system, but only changing the edge mark colors for (size_t i = 0; i < 8; i++) { - this->_edgeMarkTable[i].color = LE_TO_LOCAL_32( COLOR555TO6665(edgeMarkColorTable[i] & 0x7FFF, (this->currentRenderState->DISP3DCNT.EnableAntialiasing) ? 0x10 : 0x1F) ); + this->_edgeMarkTable[i].value = LE_TO_LOCAL_32( COLOR555TO6665(edgeMarkColorTable[i] & 0x7FFF, (this->currentRenderState->DISP3DCNT.EnableAntialiasing) ? 0x10 : 0x1F) ); //zero 20-jun-2013 - this doesnt make any sense. at least, it should be related to the 0x8000 bit. if this is undocumented behaviour, lets write about which scenario proves it here, or which scenario is requiring this code. //// this seems to be the only thing that selectively disables edge marking @@ -2075,7 +2076,7 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz { for (size_t x = 0; x < this->_framebufferWidth; x++, i++) { - FragmentColor &dstColor = this->_framebufferColor[i]; + Color4u8 &dstColor = this->_framebufferColor[i]; const u32 depth = this->_framebufferAttributes->depth[i]; const u8 polyID = this->_framebufferAttributes->opaquePolyID[i]; @@ -2095,7 +2096,7 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz const bool left = (x < 1) ? isEdgeMarkingClearValues : ((polyID != this->_framebufferAttributes->opaquePolyID[i-1]) && (depth >= this->_framebufferAttributes->depth[i-1])); const bool up = (y < 1) ? isEdgeMarkingClearValues : ((polyID != this->_framebufferAttributes->opaquePolyID[i-this->_framebufferWidth]) && (depth >= this->_framebufferAttributes->depth[i-this->_framebufferWidth])); - FragmentColor edgeMarkColor = this->_edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i] >> 3]; + Color4u8 edgeMarkColor = this->_edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i] >> 3]; if (right) { @@ -2135,8 +2136,8 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz if (param.enableFog) { - FragmentColor fogColor; - fogColor.color = LE_TO_LOCAL_32( COLOR555TO6665(param.fogColor & 0x7FFF, (param.fogColor>>16) & 0x1F) ); + Color4u8 fogColor; + fogColor.value = LE_TO_LOCAL_32( COLOR555TO6665(param.fogColor & 0x7FFF, (param.fogColor>>16) & 0x1F) ); const size_t fogIndex = depth >> 9; assert(fogIndex < 32768); @@ -2194,7 +2195,7 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo { const size_t ir = readLine + ((x * xRatio) >> 16); - this->_framebufferColor[iw].color = LE_TO_LOCAL_32( COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F) ); + this->_framebufferColor[iw].value = LE_TO_LOCAL_32( COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F) ); this->_framebufferAttributes->depth[iw] = depthBuffer[ir]; this->_framebufferAttributes->isFogged[iw] = fogBuffer[ir]; this->_framebufferAttributes->opaquePolyID[iw] = opaquePolyID; @@ -2217,7 +2218,7 @@ void SoftRasterizerRenderer::ClearUsingValues_Execute(const size_t startPixel, c } } -Render3DError SoftRasterizerRenderer::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) +Render3DError SoftRasterizerRenderer::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) { const bool doMultithreadedClear = (this->_threadCount > 0); @@ -2333,7 +2334,7 @@ Render3DError SoftRasterizerRenderer::RenderFlush(bool willFlushBuffer32, bool w return RENDER3DERROR_NOERR; } - FragmentColor *framebufferMain = (willFlushBuffer32 && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL; + Color4u8 *framebufferMain = (willFlushBuffer32 && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) ? GPU->GetEngineMain()->Get3DFramebufferMain() : NULL; u16 *framebuffer16 = (willFlushBuffer16) ? GPU->GetEngineMain()->Get3DFramebuffer16() : NULL; this->FlushFramebuffer(this->_framebufferColor, framebufferMain, framebuffer16); @@ -2364,7 +2365,7 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h) this->_threadClearParam[0].startPixel = 0; this->_threadClearParam[0].endPixel = pixCount; - this->_rasterizerUnit[0].SetSLI(this->_threadPostprocessParam[0].startLine, this->_threadPostprocessParam[0].endLine, false); + this->_rasterizerUnit[0].SetSLI((u32)this->_threadPostprocessParam[0].startLine, (u32)this->_threadPostprocessParam[0].endLine, false); } else { @@ -2379,7 +2380,7 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h) this->_threadClearParam[i].startPixel = i * this->_customPixelsPerThread; this->_threadClearParam[i].endPixel = (i < this->_threadCount - 1) ? (i + 1) * this->_customPixelsPerThread : pixCount; - this->_rasterizerUnit[i].SetSLI(this->_threadPostprocessParam[i].startLine, this->_threadPostprocessParam[i].endLine, false); + this->_rasterizerUnit[i].SetSLI((u32)this->_threadPostprocessParam[i].startLine, (u32)this->_threadPostprocessParam[i].endLine, false); } } @@ -2411,7 +2412,7 @@ SoftRasterizer_SIMD::SoftRasterizer_SIMD() } template -Render3DError SoftRasterizer_SIMD::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) +Render3DError SoftRasterizer_SIMD::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) { this->LoadClearValues(clearColor6665, clearAttributes); @@ -2500,9 +2501,9 @@ Render3DError SoftRasterizer_SIMD::SetFramebufferSize(size_t w, size_ #if defined(ENABLE_AVX2) -void SoftRasterizerRenderer_AVX2::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) +void SoftRasterizerRenderer_AVX2::LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) { - this->_clearColor_v256u32 = _mm256_set1_epi32(clearColor6665.color); + this->_clearColor_v256u32 = _mm256_set1_epi32(clearColor6665.value); this->_clearDepth_v256u32 = _mm256_set1_epi32(clearAttributes.depth); this->_clearAttrOpaquePolyID_v256u8 = _mm256_set1_epi8(clearAttributes.opaquePolyID); this->_clearAttrTranslucentPolyID_v256u8 = _mm256_set1_epi8(clearAttributes.translucentPolyID); @@ -2537,9 +2538,9 @@ void SoftRasterizerRenderer_AVX2::ClearUsingValues_Execute(const size_t startPix #elif defined(ENABLE_SSE2) -void SoftRasterizerRenderer_SSE2::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) +void SoftRasterizerRenderer_SSE2::LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) { - this->_clearColor_v128u32 = _mm_set1_epi32(clearColor6665.color); + this->_clearColor_v128u32 = _mm_set1_epi32(clearColor6665.value); this->_clearDepth_v128u32 = _mm_set1_epi32(clearAttributes.depth); this->_clearAttrOpaquePolyID_v128u8 = _mm_set1_epi8(clearAttributes.opaquePolyID); this->_clearAttrTranslucentPolyID_v128u8 = _mm_set1_epi8(clearAttributes.translucentPolyID); @@ -2574,9 +2575,9 @@ void SoftRasterizerRenderer_SSE2::ClearUsingValues_Execute(const size_t startPix #elif defined(ENABLE_NEON_A64) -void SoftRasterizerRenderer_NEON::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) +void SoftRasterizerRenderer_NEON::LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) { - this->_clearColor_v128u32x4.val[0] = vdupq_n_u32(clearColor6665.color); + this->_clearColor_v128u32x4.val[0] = vdupq_n_u32(clearColor6665.value); this->_clearColor_v128u32x4.val[1] = this->_clearColor_v128u32x4.val[0]; this->_clearColor_v128u32x4.val[2] = this->_clearColor_v128u32x4.val[0]; this->_clearColor_v128u32x4.val[3] = this->_clearColor_v128u32x4.val[0]; @@ -2642,9 +2643,9 @@ void SoftRasterizerRenderer_NEON::ClearUsingValues_Execute(const size_t startPix #elif defined(ENABLE_ALTIVEC) -void SoftRasterizerRenderer_AltiVec::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) +void SoftRasterizerRenderer_AltiVec::LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) { - this->_clearColor_v128u32 = (v128u32){clearColor6665.color,clearColor6665.color,clearColor6665.color,clearColor6665.color}; + this->_clearColor_v128u32 = (v128u32){clearColor6665.value,clearColor6665.value,clearColor6665.value,clearColor6665.value}; this->_clearDepth_v128u32 = (v128u32){clearAttributes.depth,clearAttributes.depth,clearAttributes.depth,clearAttributes.depth}; this->_clearAttrOpaquePolyID_v128u8 = (v128u8){clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID, diff --git a/desmume/src/rasterize.h b/desmume/src/rasterize.h index 74f94275f..e5951d704 100644 --- a/desmume/src/rasterize.h +++ b/desmume/src/rasterize.h @@ -107,22 +107,22 @@ protected: u8 _textureWrapMode; Render3DError _SetupTexture(const POLY &thePoly, size_t polyRenderIndex); - FORCEINLINE FragmentColor _sample(const float u, const float v); + FORCEINLINE Color4u8 _sample(const float u, const float v); FORCEINLINE float _round_s(double val); - template FORCEINLINE void _shade(const PolygonMode polygonMode, const FragmentColor src, FragmentColor &dst, const float texCoordU, const float texCoordV); - template FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float z, float w); - template FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); - template void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right); + template FORCEINLINE void _shade(const PolygonMode polygonMode, const Color4u8 src, Color4u8 &dst, const float texCoordU, const float texCoordV); + template FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, float r, float g, float b, float invu, float invv, float z, float w); + template FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); + template void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right); #ifdef ENABLE_SSE2 - template FORCEINLINE void _pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w); - template FORCEINLINE void _drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); + template FORCEINLINE void _pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w); + template FORCEINLINE void _drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); #endif template FORCEINLINE void _rot_verts(); template void _sort_verts(); - template void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type); + template void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type); public: void SetSLI(u32 startLine, u32 endLine, bool debug); @@ -160,7 +160,7 @@ protected: size_t _customPixelsPerThread; u8 _fogTable[32768]; - FragmentColor _edgeMarkTable[8]; + Color4u8 _edgeMarkTable[8]; bool _edgeMarkDisabled[8]; bool _renderGeometryNeedsFinish; @@ -178,11 +178,11 @@ protected: virtual Render3DError EndRender(); virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID); - virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes); public: int _debug_drawClippedUserPoly; - CACHE_ALIGN FragmentColor toonColor32LUT[32]; + CACHE_ALIGN Color4u8 toonColor32LUT[32]; FragmentAttributesBuffer *_framebufferAttributes; GFX3D_State *currentRenderState; @@ -211,8 +211,8 @@ template class SoftRasterizer_SIMD : public SoftRasterizerRenderer { protected: - virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) = 0; - virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) = 0; + virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes); public: SoftRasterizer_SIMD(); @@ -233,7 +233,7 @@ protected: v256u8 _clearAttrIsTranslucentPoly_v256u8; v256u8 _clearAttrPolyFacing_v256u8; - virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes); public: virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); @@ -252,7 +252,7 @@ protected: v128u8 _clearAttrIsTranslucentPoly_v128u8; v128u8 _clearAttrPolyFacing_v128u8; - virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes); public: virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); @@ -271,7 +271,7 @@ protected: uint8x16x4_t _clearAttrIsTranslucentPoly_v128u8x4; uint8x16x4_t _clearAttrPolyFacing_v128u8x4; - virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes); public: virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); @@ -290,7 +290,7 @@ protected: v128u8 _clearAttrIsTranslucentPoly_v128u8; v128u8 _clearAttrPolyFacing_v128u8; - virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual void LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes); public: virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index 0beef10d2..2efd07d47 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -208,7 +208,7 @@ Render3D::Render3D() _framebufferHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT; _framebufferPixCount = _framebufferWidth * _framebufferHeight; _framebufferSIMDPixCount = 0; - _framebufferColorSizeBytes = _framebufferWidth * _framebufferHeight * sizeof(FragmentColor); + _framebufferColorSizeBytes = _framebufferWidth * _framebufferHeight * sizeof(Color4u8); _framebufferColor = NULL; _internalRenderingFormat = NDSColorFormat_BGR666_Rev; @@ -282,7 +282,7 @@ std::string Render3D::GetName() return this->_deviceInfo.renderName; } -FragmentColor* Render3D::GetFramebuffer() +Color4u8* Render3D::GetFramebuffer() { return this->_framebufferColor; } @@ -312,7 +312,7 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h) this->_framebufferWidth = w; this->_framebufferHeight = h; this->_framebufferPixCount = w * h; - this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor); + this->_framebufferColorSizeBytes = w * h * sizeof(Color4u8); this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferMain(); // Just use the buffer that is already present on the main GPU engine return RENDER3DERROR_NOERR; @@ -466,7 +466,7 @@ Render3DError Render3D::EndRender() return RENDER3DERROR_NOERR; } -Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16) +Render3DError Render3D::FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16) { if ( (dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL) ) { @@ -486,7 +486,7 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) || ((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) ) { - memcpy(dstFramebufferMain, srcFramebuffer, this->_framebufferPixCount * sizeof(FragmentColor)); + memcpy(dstFramebufferMain, srcFramebuffer, this->_framebufferPixCount * sizeof(Color4u8)); } this->_renderNeedsFlushMain = false; @@ -648,7 +648,7 @@ Render3DError Render3D::ClearUsingImage(const u16 *__restrict colorBuffer, const return RENDER3DERROR_NOERR; } -Render3DError Render3D::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) +Render3DError Render3D::ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes) { return RENDER3DERROR_NOERR; } @@ -670,7 +670,7 @@ Render3DError Render3D::Reset() memset(this->_framebufferColor, 0, this->_framebufferColorSizeBytes); } - this->_clearColor6665.color = 0; + this->_clearColor6665.value = 0; memset(&this->_clearAttributes, 0, sizeof(FragmentAttributes)); this->_renderNeedsFinish = false; @@ -703,7 +703,7 @@ Render3DError Render3D::Render(const GFX3D_State &renderState, const GFX3D_Geome this->_isPoweredOn = true; const u32 clearColorSwapped = LE_TO_LOCAL_32(renderState.clearColor); - this->_clearColor6665.color = LE_TO_LOCAL_32( COLOR555TO6665(clearColorSwapped & 0x7FFF, (clearColorSwapped >> 16) & 0x1F) ); + this->_clearColor6665.value = LE_TO_LOCAL_32( COLOR555TO6665(clearColorSwapped & 0x7FFF, (clearColorSwapped >> 16) & 0x1F) ); this->_clearAttributes.opaquePolyID = (clearColorSwapped >> 24) & 0x3F; //special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display diff --git a/desmume/src/render3D.h b/desmume/src/render3D.h index 82fec9011..6c4a62c2e 100644 --- a/desmume/src/render3D.h +++ b/desmume/src/render3D.h @@ -156,9 +156,9 @@ protected: size_t _framebufferPixCount; size_t _framebufferSIMDPixCount; size_t _framebufferColorSizeBytes; - FragmentColor *_framebufferColor; + Color4u8 *_framebufferColor; - FragmentColor _clearColor6665; + Color4u8 _clearColor6665; FragmentAttributes _clearAttributes; NDSColorFormat _internalRenderingFormat; @@ -203,10 +203,10 @@ protected: virtual Render3DError RenderGeometry(); virtual Render3DError PostprocessFramebuffer(); virtual Render3DError EndRender(); - virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16); + virtual Render3DError FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16); virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID); - virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual Render3DError ClearUsingValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes); virtual Render3DError SetupTexture(const POLY &thePoly, size_t polyRenderIndex); virtual Render3DError SetupViewport(const GFX3D_Viewport viewport); @@ -258,7 +258,7 @@ public: virtual NDSColorFormat GetColorFormat() const; // The output color format of the 3D renderer. - virtual FragmentColor* GetFramebuffer(); + virtual Color4u8* GetFramebuffer(); bool GetRenderNeedsFinish() const; void SetRenderNeedsFinish(const bool renderNeedsFinish); diff --git a/desmume/src/types.h b/desmume/src/types.h index b52e0f6e2..3c0aa71db 100644 --- a/desmume/src/types.h +++ b/desmume/src/types.h @@ -278,6 +278,15 @@ typedef __vector unsigned short v128u16; typedef __vector signed short v128s16; typedef __vector unsigned int v128u32; typedef __vector signed int v128s32; +typedef __vector float v128f32; + +#define AVAILABLE_TYPE_v128u8 +#define AVAILABLE_TYPE_v128s8 +#define AVAILABLE_TYPE_v128u16 +#define AVAILABLE_TYPE_v128s16 +#define AVAILABLE_TYPE_v128u32 +#define AVAILABLE_TYPE_v128s32 +#define AVAILABLE_TYPE_v128f32 #endif #ifdef ENABLE_NEON_A64 @@ -288,6 +297,22 @@ typedef uint16x8_t v128u16; typedef int16x8_t v128s16; typedef uint32x4_t v128u32; typedef int32x4_t v128s32; +typedef float32x4_t v128f32; + +#define AVAILABLE_TYPE_v128u8 +#define AVAILABLE_TYPE_v128s8 +#define AVAILABLE_TYPE_v128u16 +#define AVAILABLE_TYPE_v128s16 +#define AVAILABLE_TYPE_v128u32 +#define AVAILABLE_TYPE_v128s32 +#define AVAILABLE_TYPE_v128f32 +#endif + +#ifdef ENABLE_SSE +#include +#include +typedef __m128 v128f32; +#define AVAILABLE_TYPE_v128f32 #endif #ifdef ENABLE_SSE2 @@ -298,11 +323,22 @@ typedef __m128i v128u16; typedef __m128i v128s16; typedef __m128i v128u32; typedef __m128i v128s32; + +#define AVAILABLE_TYPE_v128u8 +#define AVAILABLE_TYPE_v128s8 +#define AVAILABLE_TYPE_v128u16 +#define AVAILABLE_TYPE_v128s16 +#define AVAILABLE_TYPE_v128u32 +#define AVAILABLE_TYPE_v128s32 #endif -#if defined(ENABLE_AVX) || defined(ENABLE_AVX512_0) +#if defined(ENABLE_AVX) || defined(ENABLE_AVX2) || defined(ENABLE_AVX512_0) #include +typedef __m256 v256f32; +#define AVAILABLE_TYPE_v256f32 + +#if defined(ENABLE_AVX2) || defined(ENABLE_AVX512_0) typedef __m256i v256u8; typedef __m256i v256s8; typedef __m256i v256u16; @@ -310,6 +346,14 @@ typedef __m256i v256s16; typedef __m256i v256u32; typedef __m256i v256s32; +#define AVAILABLE_TYPE_v256u8 +#define AVAILABLE_TYPE_v256s8 +#define AVAILABLE_TYPE_v256u16 +#define AVAILABLE_TYPE_v256s16 +#define AVAILABLE_TYPE_v256u32 +#define AVAILABLE_TYPE_v256s32 +#endif // defined(ENABLE_AVX2) || defined(ENABLE_AVX512_0) + #if defined(ENABLE_AVX512_0) typedef __m512i v512u8; typedef __m512i v512s8; @@ -317,9 +361,18 @@ typedef __m512i v512u16; typedef __m512i v512s16; typedef __m512i v512u32; typedef __m512i v512s32; -#endif +typedef __m512 v512f32; -#endif // defined(ENABLE_AVX) || defined(ENABLE_AVX512_0) +#define AVAILABLE_TYPE_v512u8 +#define AVAILABLE_TYPE_v512s8 +#define AVAILABLE_TYPE_v512u16 +#define AVAILABLE_TYPE_v512s16 +#define AVAILABLE_TYPE_v512u32 +#define AVAILABLE_TYPE_v512s32 +#define AVAILABLE_TYPE_v512f32 +#endif // defined(ENABLE_AVX512_0) + +#endif // defined(ENABLE_AVX) || defined(ENABLE_AVX2) || defined(ENABLE_AVX512_0) /*---------- GPU3D fixed-points types -----------*/ @@ -350,6 +403,197 @@ typedef s16 v10; #define floattov10(n) ((v10)((n) * (1 << 9))) #define v10tofloat(n) (((float)(n)) / (float)(1<<9)) +union Vector2s16 +{ + s16 vec[2]; + s16 coord[2]; + struct { s16 s, t; }; + struct { s16 u, v; }; + struct { s16 x, y; } XY; + struct { s16 y, z; } YZ; + struct { s16 x, z; } XZ; + + u32 value; +}; +typedef union Vector2s16 Vector2s16; + +union Vector3s16 +{ + s16 vec[3]; + s16 coord[3]; + struct { s16 x, y, z; }; +}; +typedef union Vector3s16 Vector3s16; + +union Vector4s16 +{ + s16 vec[4]; + s16 coord[4]; + struct { s16 x, y, z, w; }; + + struct + { + Vector3s16 vec3; + s16 :16; + }; + + u64 value; +}; +typedef union Vector4s16 Vector4s16; + +union Vector2s32 +{ + s32 vec[2]; + s32 coord[2]; + struct { s32 s, t; }; + struct { s32 u, v; }; + struct { s32 x, y; } XY; + struct { s32 y, z; } YZ; + struct { s32 x, z; } XZ; + + u64 value; +}; +typedef union Vector2s32 Vector2s32; + +union Vector3s32 +{ + s32 vec[3]; + s32 coord[3]; + struct { s32 x, y, z; }; +}; +typedef union Vector3s32 Vector3s32; + +union Vector4s32 +{ + s32 vec[4]; + s32 coord[4]; + struct { s32 x, y, z, w; }; + + struct + { + Vector3s32 vec3; + s32 :32; + }; +}; +typedef union Vector4s32 Vector4s32; + +union Vector2s64 +{ + s64 vec[2]; + s64 coord[2]; + struct { s64 s, t; }; + struct { s64 u, v; }; + struct { s64 x, y; } XY; + struct { s64 y, z; } YZ; + struct { s64 x, z; } XZ; +}; +typedef union Vector2s64 Vector2s64; + +union Vector3s64 +{ + s64 vec[3]; + s64 coord[3]; + struct { s64 x, y, z; }; +}; +typedef union Vector3s64 Vector3s64; + +union Vector4s64 +{ + s64 vec[4]; + s64 coord[4]; + struct { s64 x, y, z, w; }; + + struct + { + Vector3s64 vec3; + s64 :64; + }; +}; +typedef union Vector4s64 Vector4s64; + +union Vector2f32 +{ + float vec[2]; + float coord[2]; + struct { float s, t; }; + struct { float u, v; }; + struct { float x, y; } XY; + struct { float y, z; } YZ; + struct { float x, z; } XZ; +}; +typedef union Vector2f32 Vector2f32; + +union Vector3f32 +{ + float vec[3]; + float coord[3]; + struct { float x, y, z; }; +}; +typedef union Vector3f32 Vector3f32; + +union Vector4f32 +{ + float vec[4]; + float coord[4]; + struct { float x, y, z, w; }; + + struct + { + Vector3f32 vec3; + float ignore; + }; +}; +typedef union Vector4f32 Vector4f32; + +union Color4u8 +{ + u8 component[4]; + struct { u8 r, g, b, a; }; + + u32 value; +}; +typedef union Color4u8 Color4u8; + +union Color3s32 +{ + s32 component[3]; + struct { s32 r, g, b; }; +}; +typedef union Color3s32 Color3s32; + +union Color4s32 +{ + s32 component[4]; + struct { s32 r, g, b, a; }; + + struct + { + Color3s32 color3; + s32 alpha; + }; +}; +typedef union Color4s32 Color4s32; + +union Color3f32 +{ + float component[3]; + struct { float r, g, b; }; +}; +typedef union Color3f32 Color3f32; + +union Color4f32 +{ + float component[4]; + struct { float r, g, b, a; }; + + struct + { + Color3f32 color3; + float alpha; + }; +}; +typedef union Color4f32 Color4f32; + /*----------------------*/ #ifndef OBJ_C diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp index 095856820..9704845be 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2022 DeSmuME team + Copyright (C) 2016-2023 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -753,10 +753,10 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi #endif for (; i < pixCount; i++) { - FragmentColor dstColor; - dstColor.color = dst[i]; + Color4u8 dstColor; + dstColor.value = dst[i]; - FragmentColor &outColor = (FragmentColor &)dst[i]; + Color4u8 &outColor = (Color4u8 &)dst[i]; outColor.r = dstColor.b; outColor.b = dstColor.r; } @@ -786,10 +786,10 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi #endif for (; i < pixCount; i++) { - FragmentColor dstColor; - dstColor.color = dst[i]; + Color4u8 dstColor; + dstColor.value = dst[i]; - FragmentColor &outColor = (FragmentColor &)dst[i]; + Color4u8 &outColor = (Color4u8 &)dst[i]; outColor.r = (u8)( ((u16)dstColor.b * intensity_u16) >> 16 ); outColor.g = (u8)( ((u16)dstColor.g * intensity_u16) >> 16 ); outColor.b = (u8)( ((u16)dstColor.r * intensity_u16) >> 16 ); @@ -802,7 +802,7 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi #endif for (; i < pixCount; i++) { - FragmentColor &outColor = (FragmentColor &)dst[i]; + Color4u8 &outColor = (Color4u8 &)dst[i]; outColor.r = (u8)( ((u16)outColor.r * intensity_u16) >> 16 ); outColor.g = (u8)( ((u16)outColor.g * intensity_u16) >> 16 ); outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 ); @@ -1333,7 +1333,7 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32(u32 *dst, size_t pixCount, fl for (; i < pixCount; i++) { - FragmentColor &outColor = (FragmentColor &)dst[i]; + Color4u8 &outColor = (Color4u8 &)dst[i]; outColor.r = (u8)( ((u16)outColor.r * intensity_u16) >> 16 ); outColor.g = (u8)( ((u16)outColor.g * intensity_u16) >> 16 ); outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 ); @@ -1350,10 +1350,10 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCo { for (; i < pixCount; i++) { - FragmentColor dstColor; - dstColor.color = dst[i]; + Color4u8 dstColor; + dstColor.value = dst[i]; - FragmentColor &outColor = (FragmentColor &)dst[i]; + Color4u8 &outColor = (Color4u8 &)dst[i]; outColor.r = dstColor.b; outColor.b = dstColor.r; } @@ -1374,10 +1374,10 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCo for (; i < pixCount; i++) { - FragmentColor dstColor; - dstColor.color = dst[i]; + Color4u8 dstColor; + dstColor.value = dst[i]; - FragmentColor &outColor = (FragmentColor &)dst[i]; + Color4u8 &outColor = (Color4u8 &)dst[i]; outColor.r = (u8)( ((u16)dstColor.b * intensity_u16) >> 16 ); outColor.g = (u8)( ((u16)dstColor.g * intensity_u16) >> 16 ); outColor.b = (u8)( ((u16)dstColor.r * intensity_u16) >> 16 ); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.h b/desmume/src/utils/colorspacehandler/colorspacehandler.h index 90393b7a7..48bae72e8 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.h @@ -79,18 +79,6 @@ enum NDSColorFormat NDSColorFormat_BGR888_Rev = 0x20008208 }; -union FragmentColor -{ - u8 component[4]; - - struct - { - u8 r,g,b,a; - }; - - u32 color; -}; - extern CACHE_ALIGN const u32 material_5bit_to_31bit[32]; extern CACHE_ALIGN const u8 material_5bit_to_6bit[64]; // Padded for vector lookup table routines. Only the first 32 indices are valid. Data is mirrored across 256-bit lanes. extern CACHE_ALIGN const u8 material_5bit_to_8bit[64]; // Padded for vector lookup table routines. Only the first 32 indices are valid. Data is mirrored across 256-bit lanes. @@ -139,49 +127,49 @@ FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src) } template -FORCEINLINE u32 ColorspaceConvert8888To6665(FragmentColor srcColor) +FORCEINLINE u32 ColorspaceConvert8888To6665(Color4u8 srcColor) { - FragmentColor outColor; + Color4u8 outColor; outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2; outColor.g = srcColor.g >> 2; outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2; outColor.a = srcColor.a >> 3; - return outColor.color; + return outColor.value; } template FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor) { - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; + Color4u8 srcColorComponent; + srcColorComponent.value = srcColor; return ColorspaceConvert8888To6665(srcColorComponent); } template -FORCEINLINE u32 ColorspaceConvert6665To8888(FragmentColor srcColor) +FORCEINLINE u32 ColorspaceConvert6665To8888(Color4u8 srcColor) { - FragmentColor outColor; + Color4u8 outColor; outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)]; outColor.g = material_6bit_to_8bit[srcColor.g]; outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)]; outColor.a = material_5bit_to_8bit[srcColor.a]; - return outColor.color; + return outColor.value; } template FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor) { - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; + Color4u8 srcColorComponent; + srcColorComponent.value = srcColor; return ColorspaceConvert6665To8888(srcColorComponent); } template -FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor) +FORCEINLINE u16 ColorspaceConvert8888To5551(Color4u8 srcColor) { return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 ); } @@ -189,14 +177,14 @@ FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor) template FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor) { - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; + Color4u8 srcColorComponent; + srcColorComponent.value = srcColor; return ColorspaceConvert8888To5551(srcColorComponent); } template -FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor) +FORCEINLINE u16 ColorspaceConvert6665To5551(Color4u8 srcColor) { return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000); } @@ -204,35 +192,35 @@ FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor) template FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor) { - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; + Color4u8 srcColorComponent; + srcColorComponent.value = srcColor; return ColorspaceConvert6665To5551(srcColorComponent); } template -FORCEINLINE u32 ColorspaceConvert888XTo8888Opaque(FragmentColor srcColor) +FORCEINLINE u32 ColorspaceConvert888XTo8888Opaque(Color4u8 srcColor) { - FragmentColor outColor; + Color4u8 outColor; outColor.r = (SWAP_RB) ? srcColor.b : srcColor.r; outColor.g = srcColor.g; outColor.b = (SWAP_RB) ? srcColor.r : srcColor.b; outColor.a = 0xFF; - return outColor.color; + return outColor.value; } template FORCEINLINE u32 ColorspaceConvert888XTo8888Opaque(u32 srcColor) { - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; + Color4u8 srcColorComponent; + srcColorComponent.value = srcColor; return ColorspaceConvert888XTo8888Opaque(srcColorComponent); } template -FORCEINLINE void ColorspaceConvert888XTo888(FragmentColor srcColor, u8 *dst) +FORCEINLINE void ColorspaceConvert888XTo888(Color4u8 srcColor, u8 *dst) { dst[0] = (SWAP_RB) ? srcColor.b : srcColor.r; dst[1] = srcColor.g; @@ -242,8 +230,8 @@ FORCEINLINE void ColorspaceConvert888XTo888(FragmentColor srcColor, u8 *dst) template FORCEINLINE void ColorspaceConvert888XTo888(u32 srcColor, u8 *dst) { - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; + Color4u8 srcColorComponent; + srcColorComponent.value = srcColor; ColorspaceConvert888XTo888(srcColorComponent, dst); } @@ -251,8 +239,8 @@ FORCEINLINE void ColorspaceConvert888XTo888(u32 srcColor, u8 *dst) template FORCEINLINE void ColorspaceConvert555XTo888(u16 srcColor, u8 *dst) { - FragmentColor srcColorComponent; - srcColorComponent.color = ColorspaceConvert555To8888Opaque(srcColor); + Color4u8 srcColorComponent; + srcColorComponent.value = ColorspaceConvert555To8888Opaque(srcColor); ColorspaceConvert888XTo888(srcColorComponent, dst); } @@ -264,22 +252,22 @@ FORCEINLINE u16 ColorspaceCopy16(u16 srcColor) } template -FORCEINLINE u32 ColorspaceCopy32(FragmentColor srcColor) +FORCEINLINE u32 ColorspaceCopy32(Color4u8 srcColor) { - FragmentColor outColor; + Color4u8 outColor; outColor.r = (SWAP_RB) ? srcColor.b : srcColor.r; outColor.g = srcColor.g; outColor.b = (SWAP_RB) ? srcColor.r : srcColor.b; outColor.a = srcColor.a; - return outColor.color; + return outColor.value; } template FORCEINLINE u32 ColorspaceCopy32(u32 srcColor) { - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; + Color4u8 srcColorComponent; + srcColorComponent.value = srcColor; return ColorspaceCopy32(srcColorComponent); } @@ -308,9 +296,9 @@ FORCEINLINE u16 ColorspaceApplyIntensity16(u16 srcColor, float intensity) } template -FORCEINLINE u32 ColorspaceApplyIntensity32(FragmentColor srcColor, float intensity) +FORCEINLINE u32 ColorspaceApplyIntensity32(Color4u8 srcColor, float intensity) { - FragmentColor outColor; + Color4u8 outColor; outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r); outColor.g = srcColor.g; outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b); @@ -318,11 +306,11 @@ FORCEINLINE u32 ColorspaceApplyIntensity32(FragmentColor srcColor, float intensi if (intensity > 0.999f) { - return outColor.color; + return outColor.value; } else if (intensity < 0.001f) { - return (outColor.color & 0xFF000000); + return (outColor.value & 0xFF000000); } const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF)); @@ -331,14 +319,14 @@ FORCEINLINE u32 ColorspaceApplyIntensity32(FragmentColor srcColor, float intensi outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 ); outColor.a = outColor.a; - return outColor.color; + return outColor.value; } template FORCEINLINE u32 ColorspaceApplyIntensity32(u32 srcColor, float intensity) { - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; + Color4u8 srcColorComponent; + srcColorComponent.value = srcColor; return ColorspaceApplyIntensity32(srcColorComponent); } @@ -427,11 +415,4 @@ public: size_t ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const; }; -FORCEINLINE FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a) -{ - FragmentColor ret; - ret.r = r; ret.g = g; ret.b = b; ret.a = a; - return ret; -} - #endif /* COLORSPACEHANDLER_H */