GPU: Do a massive refactor of the manually vectorized code and add full support for AVX2.
- Most notably, each version of the manually vectorized code now resides in their own files.
- Depending on the rendering situation, the new AVX2 code may increase rendering performance by 5% to up to 50%.
- Certain functions automatically gain manual vectorization support since the new GPU code makes use of the new general-purpose copy functions that were added in commit e991b16
. In other words, AVX-512 and AltiVec builds also benefit from this.
This commit is contained in:
parent
4226fa7ab2
commit
0db98725dc
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
Copyright (C) 2006 yopyop
|
||||
Copyright (C) 2006-2007 Theo Berkau
|
||||
Copyright (C) 2007 shash
|
||||
Copyright (C) 2009-2019 DeSmuME team
|
||||
Copyright (C) 2009-2021 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -28,7 +28,6 @@
|
|||
#include "./utils/colorspacehandler/colorspacehandler.h"
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
|
||||
#endif
|
||||
|
||||
|
@ -40,6 +39,14 @@
|
|||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_AVX2
|
||||
#include "./utils/colorspacehandler/colorspacehandler_AVX2.h"
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_AVX512_1
|
||||
#include "./utils/colorspacehandler/colorspacehandler_AVX512.h"
|
||||
#endif
|
||||
|
||||
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
|
||||
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
|
||||
// then substitute the palignr instruction with an SSE2 equivalent.
|
||||
|
@ -1201,8 +1208,9 @@ typedef struct
|
|||
|
||||
typedef struct
|
||||
{
|
||||
u8 begin;
|
||||
u8 trunc;
|
||||
u8 begin[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
u8 trunc[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
u32 trunc32[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
} MosaicTableEntry;
|
||||
|
||||
typedef struct
|
||||
|
@ -1272,34 +1280,21 @@ typedef struct
|
|||
FragmentColor *brightnessDownTable666;
|
||||
FragmentColor *brightnessDownTable888;
|
||||
|
||||
bool srcEffectEnable[6];
|
||||
bool dstBlendEnable[6];
|
||||
#ifdef ENABLE_SSE2
|
||||
__m128i srcEffectEnable_SSE2[6];
|
||||
#ifdef ENABLE_SSSE3
|
||||
__m128i dstBlendEnable_SSSE3;
|
||||
#else
|
||||
__m128i dstBlendEnable_SSE2[6];
|
||||
#endif
|
||||
#endif // ENABLE_SSE2
|
||||
bool dstAnyBlendEnable;
|
||||
|
||||
u8 WIN0_enable[6];
|
||||
u8 WIN1_enable[6];
|
||||
u8 WINOUT_enable[6];
|
||||
u8 WINOBJ_enable[6];
|
||||
#if defined(ENABLE_SSE2)
|
||||
__m128i WIN0_enable_SSE2[6];
|
||||
__m128i WIN1_enable_SSE2[6];
|
||||
__m128i WINOUT_enable_SSE2[6];
|
||||
__m128i WINOBJ_enable_SSE2[6];
|
||||
#endif
|
||||
|
||||
bool WIN0_ENABLED;
|
||||
bool WIN1_ENABLED;
|
||||
bool WINOBJ_ENABLED;
|
||||
bool isAnyWindowEnabled;
|
||||
|
||||
u8 srcEffectEnable[6];
|
||||
u8 dstBlendEnable[6];
|
||||
bool dstAnyBlendEnable;
|
||||
CACHE_ALIGN u8 dstBlendEnableVecLookup[128]; // Supports up to 1024-bit vectors
|
||||
|
||||
MosaicTableEntry *mosaicWidthBG;
|
||||
MosaicTableEntry *mosaicHeightBG;
|
||||
MosaicTableEntry *mosaicWidthOBJ;
|
||||
|
@ -1340,32 +1335,26 @@ typedef struct
|
|||
class GPUEngineBase
|
||||
{
|
||||
protected:
|
||||
static CACHE_ALIGN u16 _brightnessUpTable555[17][0x8000];
|
||||
static CACHE_ALIGN FragmentColor _brightnessUpTable666[17][0x8000];
|
||||
static CACHE_ALIGN FragmentColor _brightnessUpTable888[17][0x8000];
|
||||
static CACHE_ALIGN u16 _brightnessDownTable555[17][0x8000];
|
||||
static CACHE_ALIGN FragmentColor _brightnessDownTable666[17][0x8000];
|
||||
static CACHE_ALIGN FragmentColor _brightnessDownTable888[17][0x8000];
|
||||
static CACHE_ALIGN u8 _blendTable555[17][17][32][32];
|
||||
|
||||
static const CACHE_ALIGN SpriteSize _sprSizeTab[4][4];
|
||||
static const CACHE_ALIGN BGLayerSize _BGLayerSizeLUT[8][4];
|
||||
static const CACHE_ALIGN BGType _mode2type[8][4];
|
||||
|
||||
static struct MosaicLookup
|
||||
{
|
||||
CACHE_ALIGN MosaicTableEntry table[16][256];
|
||||
CACHE_ALIGN MosaicTableEntry table[16];
|
||||
|
||||
MosaicLookup()
|
||||
{
|
||||
for (size_t m = 0; m < 16; m++)
|
||||
{
|
||||
for (size_t i = 0; i < 256; i++)
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++)
|
||||
{
|
||||
size_t mosaic = m+1;
|
||||
MosaicTableEntry &te = table[m][i];
|
||||
te.begin = ((i % mosaic) == 0);
|
||||
te.trunc = (i / mosaic) * mosaic;
|
||||
MosaicTableEntry &te = table[m];
|
||||
|
||||
te.begin[i] = ((i % mosaic) == 0);
|
||||
te.trunc[i] = (i / mosaic) * mosaic;
|
||||
te.trunc32[i] = te.trunc[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1413,12 +1402,12 @@ protected:
|
|||
itemsForPriority_t _itemsForPriority[NB_PRIORITIES];
|
||||
|
||||
struct MosaicColor {
|
||||
u16 bg[4][256];
|
||||
CACHE_ALIGN u16 bg[4][GPU_FRAMEBUFFER_NATIVE_WIDTH + sizeof(u32)]; // Pad this buffersa little bit to avoid buffer overruns with vectorized gather instructions.
|
||||
struct Obj {
|
||||
u16 color;
|
||||
u8 alpha;
|
||||
u8 opaque;
|
||||
} obj[256];
|
||||
} obj[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
} _mosaicColors;
|
||||
|
||||
GPUEngineID _engineID;
|
||||
|
@ -1452,7 +1441,6 @@ protected:
|
|||
FragmentColor _asyncClearBackdropColor32; // Do not modify this variable directly.
|
||||
bool _asyncClearUseInternalCustomBuffer; // Do not modify this variable directly.
|
||||
|
||||
void _InitLUTs();
|
||||
void _Reset_Base();
|
||||
void _ResortBGLayers();
|
||||
|
||||
|
@ -1466,12 +1454,19 @@ protected:
|
|||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING, PixelLookupFunc GetPixelFunc> void _RenderPixelIterate(GPUEngineCompositorInfo &compInfo, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal);
|
||||
|
||||
TILEENTRY _GetTileEntry(const u32 tileMapAddress, const u16 xOffset, const u16 layerWidthMask);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> FORCEINLINE void _CompositePixelImmediate(GPUEngineCompositorInfo &compInfo, const size_t srcX, u16 srcColor16, bool opaque);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> FORCEINLINE void _CompositePixelImmediate(GPUEngineCompositorInfo &compInfo, const size_t srcX, u16 srcColor16, bool isOpaque);
|
||||
template<bool ISFIRSTLINE> void _MosaicLine(GPUEngineCompositorInfo &compInfo);
|
||||
|
||||
template<bool MOSAIC> void _PrecompositeNativeToCustomLineBG(GPUEngineCompositorInfo &compInfo);
|
||||
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> void _CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> void _CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr);
|
||||
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr);
|
||||
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING> void _RenderLine_BGText(GPUEngineCompositorInfo &compInfo, const u16 XBG, const u16 YBG);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING> void _RenderLine_BGAffine(GPUEngineCompositorInfo &compInfo, const IOREG_BGnParameter ¶m);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING> void _RenderLine_BGExtended(GPUEngineCompositorInfo &compInfo, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM);
|
||||
|
@ -1487,79 +1482,22 @@ protected:
|
|||
template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeOff(const size_t l);
|
||||
template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeNormal(const size_t l);
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> size_t _ApplyMasterBrightnessUp_LoopOp(void *__restrict dst, const size_t pixCount, const u8 intensityClamped);
|
||||
template<NDSColorFormat OUTPUTFORMAT> size_t _ApplyMasterBrightnessDown_LoopOp(void *__restrict dst, const size_t pixCount, const u8 intensityClamped);
|
||||
|
||||
template<size_t WIN_NUM> void _UpdateWINH(GPUEngineCompositorInfo &compInfo);
|
||||
template<size_t WIN_NUM> bool _IsWindowInsideVerticalRange(GPUEngineCompositorInfo &compInfo);
|
||||
void _PerformWindowTesting(GPUEngineCompositorInfo &compInfo);
|
||||
void _PerformWindowTestingNative(GPUEngineCompositorInfo &compInfo, const size_t layerID, const u8 *__restrict win0, const u8 *__restrict win1, const u8 *__restrict winObj, u8 *__restrict didPassWindowTestNative, u8 *__restrict enableColorEffectNative);
|
||||
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING> FORCEINLINE void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> FORCEINLINE void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo);
|
||||
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, itemsForPriority_t *__restrict item);
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _PixelCopy(GPUEngineCompositorInfo &compInfo, const u16 srcColor16);
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _PixelCopy(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32);
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _PixelBrightnessUp(GPUEngineCompositorInfo &compInfo, const u16 srcColor16);
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _PixelBrightnessUp(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32);
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _PixelBrightnessDown(GPUEngineCompositorInfo &compInfo, const u16 srcColor16);
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _PixelBrightnessDown(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32);
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _PixelUnknownEffect(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode);
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _PixelUnknownEffect(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode);
|
||||
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _PixelComposite(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode);
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _PixelComposite(GPUEngineCompositorInfo &compInfo, FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode);
|
||||
|
||||
FORCEINLINE u16 _ColorEffectBlend(const u16 colA, const u16 colB, const u16 blendEVA, const u16 blendEVB);
|
||||
FORCEINLINE u16 _ColorEffectBlend(const u16 colA, const u16 colB, const TBlendTable *blendTable);
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor _ColorEffectBlend(const FragmentColor colA, const FragmentColor colB, const u16 blendEVA, const u16 blendEVB);
|
||||
|
||||
FORCEINLINE u16 _ColorEffectBlend3D(const FragmentColor colA, const u16 colB);
|
||||
template<NDSColorFormat COLORFORMATB> FORCEINLINE FragmentColor _ColorEffectBlend3D(const FragmentColor colA, const FragmentColor colB);
|
||||
|
||||
FORCEINLINE u16 _ColorEffectIncreaseBrightness(const u16 col, const u16 blendEVY);
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor _ColorEffectIncreaseBrightness(const FragmentColor col, const u16 blendEVY);
|
||||
|
||||
FORCEINLINE u16 _ColorEffectDecreaseBrightness(const u16 col, const u16 blendEVY);
|
||||
FORCEINLINE FragmentColor _ColorEffectDecreaseBrightness(const FragmentColor col, const u16 blendEVY);
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
template<NDSColorFormat COLORFORMAT, bool USECONSTANTBLENDVALUESHINT> FORCEINLINE __m128i _ColorEffectBlend(const __m128i &colA, const __m128i &colB, const __m128i &blendEVA, const __m128i &blendEVB);
|
||||
template<NDSColorFormat COLORFORMATB> FORCEINLINE __m128i _ColorEffectBlend3D(const __m128i &colA_Lo, const __m128i &colA_Hi, const __m128i &colB);
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE __m128i _ColorEffectIncreaseBrightness(const __m128i &col, const __m128i &blendEVY);
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE __m128i _ColorEffectDecreaseBrightness(const __m128i &col, const __m128i &blendEVY);
|
||||
template<bool WILLDEFERCOMPOSITING> FORCEINLINE void _RenderPixel_CheckWindows16_SSE2(GPUEngineCompositorInfo &compInfo, const size_t dstX, __m128i &didPassWindowTest, __m128i &enableColorEffect) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _PixelCopy16_SSE2(GPUEngineCompositorInfo &compInfo, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID);
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _PixelCopyWithMask16_SSE2(GPUEngineCompositorInfo &compInfo, const __m128i &passMask8, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID);
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _PixelBrightnessUp16_SSE2(GPUEngineCompositorInfo &compInfo, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID);
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _PixelBrightnessUpWithMask16_SSE2(GPUEngineCompositorInfo &compInfo, const __m128i &passMask8, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID);
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _PixelBrightnessDown16_SSE2(GPUEngineCompositorInfo &compInfo, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID);
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _PixelBrightnessDownWithMask16_SSE2(GPUEngineCompositorInfo &compInfo, const __m128i &passMask8, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID);
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE>
|
||||
FORCEINLINE void _PixelUnknownEffectWithMask16_SSE2(GPUEngineCompositorInfo &compInfo,
|
||||
const __m128i &passMask8,
|
||||
const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0,
|
||||
const __m128i &srcEffectEnableMask,
|
||||
const __m128i &enableColorEffectMask,
|
||||
const __m128i &spriteAlpha,
|
||||
const __m128i &spriteMode,
|
||||
__m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0,
|
||||
__m128i &dstLayerID);
|
||||
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
|
||||
FORCEINLINE void _PixelComposite16_SSE2(GPUEngineCompositorInfo &compInfo,
|
||||
const bool didAllPixelsPass,
|
||||
const __m128i &passMask8,
|
||||
const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0,
|
||||
const __m128i &srcEffectEnableMask,
|
||||
const u8 *__restrict enableColorEffectPtr,
|
||||
const u8 *__restrict sprAlphaPtr,
|
||||
const u8 *__restrict sprModePtr);
|
||||
#endif
|
||||
|
||||
template<bool ISDEBUGRENDER, bool ISOBJMODEBITMAP> FORCEINLINE void _RenderSpriteUpdatePixel(GPUEngineCompositorInfo &compInfo, size_t frameX, const u16 *__restrict srcPalette, const u8 palIndex, const OBJMode objMode, const u8 prio, const u8 spriteNum, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab);
|
||||
template<bool ISDEBUGRENDER> void _RenderSpriteBMP(GPUEngineCompositorInfo &compInfo, const u32 objAddress, const size_t length, size_t frameX, size_t spriteX, const s32 readXStep, const u8 spriteAlpha, const OBJMode objMode, const u8 prio, const u8 spriteNum, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab);
|
||||
template<bool ISDEBUGRENDER> size_t _RenderSpriteBMP_LoopOp(const size_t length, const u8 spriteAlpha, const u8 prio, const u8 spriteNum, const u16 *__restrict vramBuffer, size_t &frameX, size_t &spriteX, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab);
|
||||
template<bool ISDEBUGRENDER> void _RenderSprite256(GPUEngineCompositorInfo &compInfo, const u32 objAddress, const size_t length, size_t frameX, size_t spriteX, const s32 readXStep, const u16 *__restrict palColorBuffer, const OBJMode objMode, const u8 prio, const u8 spriteNum, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab);
|
||||
template<bool ISDEBUGRENDER> void _RenderSprite16(GPUEngineCompositorInfo &compInfo, const u32 objAddress, const size_t length, size_t frameX, size_t spriteX, const s32 readXStep, const u16 *__restrict palColorBuffer, const OBJMode objMode, const u8 prio, const u8 spriteNum, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab);
|
||||
void _RenderSpriteWin(const u8 *src, const bool col256, const size_t lg, size_t sprX, size_t x, const s32 xdir);
|
||||
|
@ -1703,14 +1641,16 @@ protected:
|
|||
u16 _RenderLine_DispCapture_BlendFunc(const u16 srcA, const u16 srcB, const u8 blendEVA, const u8 blendEVB);
|
||||
template<NDSColorFormat COLORFORMAT> FragmentColor _RenderLine_DispCapture_BlendFunc(const FragmentColor srcA, const FragmentColor srcB, const u8 blendEVA, const u8 blendEVB);
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
template<NDSColorFormat COLORFORMAT> __m128i _RenderLine_DispCapture_BlendFunc_SSE2(const __m128i &srcA, const __m128i &srcB, const __m128i &blendEVA, const __m128i &blendEVB);
|
||||
#endif
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
|
||||
size_t _RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const FragmentColor *__restrict srcLinePtr);
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT>
|
||||
void _RenderLine_DispCapture_BlendToCustomDstBuffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t length); // Do not use restrict pointers, since srcB and dst can be the same
|
||||
void _RenderLine_DispCapture_Blend_Buffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t pixCount); // Do not use restrict pointers, since srcB and dst can be the same
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST>
|
||||
template<NDSColorFormat OUTPUTFORMAT>
|
||||
size_t _RenderLine_DispCapture_Blend_VecLoop(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t length);
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH, bool ISCAPTURENATIVE>
|
||||
void _RenderLine_DispCapture_Blend(const GPUEngineLineInfo &lineInfo, const void *srcA, const void *srcB, void *dst, const size_t captureLengthExt); // Do not use restrict pointers, since srcB and dst can be the same
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeVRAM(const GPUEngineLineInfo &lineInfo);
|
||||
|
@ -1986,20 +1926,6 @@ public:
|
|||
void SetClientData(void *clientData);
|
||||
};
|
||||
|
||||
template <s32 INTEGERSCALEHINT, bool SCALEVERTICAL, bool USELINEINDEX, bool NEEDENDIANSWAP, size_t ELEMENTSIZE>
|
||||
void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLineIndex,
|
||||
void *__restrict dstBuffer, const size_t dstLineIndex, const size_t dstLineWidth, const size_t dstLineCount);
|
||||
|
||||
template <s32 INTEGERSCALEHINT, bool SCALEVERTICAL, bool USELINEINDEX, bool NEEDENDIANSWAP, size_t ELEMENTSIZE>
|
||||
void CopyLineExpandHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer);
|
||||
|
||||
template <s32 INTEGERSCALEHINT, bool USELINEINDEX, bool NEEDENDIANSWAP, size_t ELEMENTSIZE>
|
||||
void CopyLineReduceHinted(const void *__restrict srcBuffer, const size_t srcLineIndex, const size_t srcLineWidth,
|
||||
void *__restrict dstBuffer, const size_t dstLineIndex);
|
||||
|
||||
template <s32 INTEGERSCALEHINT, bool USELINEINDEX, bool NEEDENDIANSWAP, size_t ELEMENTSIZE>
|
||||
void CopyLineReduceHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer);
|
||||
|
||||
extern GPUSubsystem *GPU;
|
||||
extern MMU_struct MMU;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
Copyright (C) 2021 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef GPU_OPERATIONS_H
|
||||
#define GPU_OPERATIONS_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "./utils/colorspacehandler/colorspacehandler.h"
|
||||
|
||||
#include "GPU.h"
|
||||
|
||||
|
||||
template <s32 INTEGERSCALEHINT, bool SCALEVERTICAL, bool USELINEINDEX, bool NEEDENDIANSWAP, size_t ELEMENTSIZE>
|
||||
void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLineIndex,
|
||||
void *__restrict dstBuffer, const size_t dstLineIndex, const size_t dstLineWidth, const size_t dstLineCount);
|
||||
|
||||
template <s32 INTEGERSCALEHINT, bool SCALEVERTICAL, bool USELINEINDEX, bool NEEDENDIANSWAP, size_t ELEMENTSIZE>
|
||||
void CopyLineExpandHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer);
|
||||
|
||||
template <s32 INTEGERSCALEHINT, bool USELINEINDEX, bool NEEDENDIANSWAP, size_t ELEMENTSIZE>
|
||||
void CopyLineReduceHinted(const void *__restrict srcBuffer, const size_t srcLineIndex, const size_t srcLineWidth,
|
||||
void *__restrict dstBuffer, const size_t dstLineIndex);
|
||||
|
||||
template <s32 INTEGERSCALEHINT, bool USELINEINDEX, bool NEEDENDIANSWAP, size_t ELEMENTSIZE>
|
||||
void CopyLineReduceHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer);
|
||||
|
||||
class ColorOperation
|
||||
{
|
||||
public:
|
||||
ColorOperation() {};
|
||||
|
||||
FORCEINLINE u16 blend(const u16 colA, const u16 colB, const u16 blendEVA, const u16 blendEVB) const;
|
||||
FORCEINLINE u16 blend(const u16 colA, const u16 colB, const TBlendTable *blendTable) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor blend(const FragmentColor colA, const FragmentColor colB, const u16 blendEVA, const u16 blendEVB) const;
|
||||
|
||||
FORCEINLINE u16 blend3D(const FragmentColor colA, const u16 colB) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor blend3D(const FragmentColor colA, const FragmentColor colB) const;
|
||||
|
||||
FORCEINLINE u16 increase(const u16 col, const u16 blendEVY) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor increase(const FragmentColor col, const u16 blendEVY) const;
|
||||
|
||||
FORCEINLINE u16 decrease(const u16 col, const u16 blendEVY) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE FragmentColor decrease(const FragmentColor col, const u16 blendEVY) const;
|
||||
};
|
||||
|
||||
class PixelOperation
|
||||
{
|
||||
private:
|
||||
template<GPULayerType LAYERTYPE> FORCEINLINE void __selectedEffect(const GPUEngineCompositorInfo &compInfo, const u8 &dstLayerID, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode, ColorEffect &selectedEffect, TBlendTable **selectedBlendTable, u8 &blendEVA, u8 &blendEVB) const;
|
||||
|
||||
protected:
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _unknownEffect16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void _unknownEffect32(GPUEngineCompositorInfo &compInfo, const FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const OBJMode spriteMode) const;
|
||||
|
||||
public:
|
||||
static CACHE_ALIGN u8 BlendTable555[17][17][32][32];
|
||||
static CACHE_ALIGN u16 BrightnessUpTable555[17][0x8000];
|
||||
static CACHE_ALIGN FragmentColor BrightnessUpTable666[17][0x8000];
|
||||
static CACHE_ALIGN FragmentColor BrightnessUpTable888[17][0x8000];
|
||||
static CACHE_ALIGN u16 BrightnessDownTable555[17][0x8000];
|
||||
static CACHE_ALIGN FragmentColor BrightnessDownTable666[17][0x8000];
|
||||
static CACHE_ALIGN FragmentColor BrightnessDownTable888[17][0x8000];
|
||||
static void InitLUTs();
|
||||
|
||||
PixelOperation() {};
|
||||
|
||||
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void Composite16(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const;
|
||||
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE> FORCEINLINE void Composite32(GPUEngineCompositorInfo &compInfo, FragmentColor srcColor32, const bool enableColorEffect, const u8 spriteAlpha, const u8 spriteMode) const;
|
||||
};
|
||||
|
||||
#endif // GPU_OPERATIONS_H
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
Copyright (C) 2021 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef GPU_OPERATIONS_AVX2_H
|
||||
#define GPU_OPERATIONS_AVX2_H
|
||||
|
||||
#include "GPU_Operations.h"
|
||||
|
||||
#ifndef ENABLE_AVX2
|
||||
#warning This header requires AVX2 support.
|
||||
#else
|
||||
|
||||
class ColorOperation_AVX2
|
||||
{
|
||||
public:
|
||||
ColorOperation_AVX2() {};
|
||||
|
||||
FORCEINLINE v256u16 blend(const v256u16 &colA, const v256u16 &colB, const v256u16 &blendEVA, const v256u16 &blendEVB) const;
|
||||
template<NDSColorFormat COLORFORMAT, bool USECONSTANTBLENDVALUESHINT> FORCEINLINE v256u32 blend(const v256u32 &colA, const v256u32 &colB, const v256u16 &blendEVA, const v256u16 &blendEVB) const;
|
||||
|
||||
FORCEINLINE v256u16 blend3D(const v256u32 &colA_Lo, const v256u32 &colA_Hi, const v256u16 &colB) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE v256u32 blend3D(const v256u32 &colA, const v256u32 &colB) const;
|
||||
|
||||
FORCEINLINE v256u16 increase(const v256u16 &col, const v256u16 &blendEVY) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE v256u32 increase(const v256u32 &col, const v256u16 &blendEVY) const;
|
||||
|
||||
FORCEINLINE v256u16 decrease(const v256u16 &col, const v256u16 &blendEVY) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE v256u32 decrease(const v256u32 &col, const v256u16 &blendEVY) const;
|
||||
};
|
||||
|
||||
class PixelOperation_AVX2
|
||||
{
|
||||
protected:
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy16(GPUEngineCompositorInfo &compInfo, const v256u8 &srcLayerID, const v256u16 &src1, const v256u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy32(GPUEngineCompositorInfo &compInfo, const v256u8 &srcLayerID, const v256u32 &src3, const v256u32 &src2, const v256u32 &src1, const v256u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copyMask16(GPUEngineCompositorInfo &compInfo, const v256u8 &passMask8, const v256u8 &srcLayerID, const v256u16 &src1, const v256u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copyMask32(GPUEngineCompositorInfo &compInfo, const v256u8 &passMask8, const v256u8 &srcLayerID, const v256u32 &src3, const v256u32 &src2, const v256u32 &src1, const v256u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp16(GPUEngineCompositorInfo &compInfo, const v256u16 &evy16, const v256u8 &srcLayerID, const v256u16 &src1, const v256u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp32(GPUEngineCompositorInfo &compInfo, const v256u16 &evy16, const v256u8 &srcLayerID, const v256u32 &src3, const v256u32 &src2, const v256u32 &src1, const v256u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUpMask16(GPUEngineCompositorInfo &compInfo, const v256u8 &passMask8, const v256u16 &evy16, const v256u8 &srcLayerID, const v256u16 &src1, const v256u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUpMask32(GPUEngineCompositorInfo &compInfo, const v256u8 &passMask8, const v256u16 &evy16, const v256u8 &srcLayerID, const v256u32 &src3, const v256u32 &src2, const v256u32 &src1, const v256u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown16(GPUEngineCompositorInfo &compInfo, const v256u16 &evy16, const v256u8 &srcLayerID, const v256u16 &src1, const v256u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown32(GPUEngineCompositorInfo &compInfo, const v256u16 &evy16, const v256u8 &srcLayerID, const v256u32 &src3, const v256u32 &src2, const v256u32 &src1, const v256u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDownMask16(GPUEngineCompositorInfo &compInfo, const v256u8 &passMask8, const v256u16 &evy16, const v256u8 &srcLayerID, const v256u16 &src1, const v256u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDownMask32(GPUEngineCompositorInfo &compInfo, const v256u8 &passMask8, const v256u16 &evy16, const v256u8 &srcLayerID, const v256u32 &src3, const v256u32 &src2, const v256u32 &src1, const v256u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE>
|
||||
FORCEINLINE void _unknownEffectMask16(GPUEngineCompositorInfo &compInfo,
|
||||
const v256u8 &passMask8,
|
||||
const v256u16 &evy16,
|
||||
const v256u8 &srcLayerID,
|
||||
const v256u16 &src1, const v256u16 &src0,
|
||||
const v256u8 &srcEffectEnableMask,
|
||||
const v256u8 &dstBlendEnableMaskLUT,
|
||||
const v256u8 &enableColorEffectMask,
|
||||
const v256u8 &spriteAlpha,
|
||||
const v256u8 &spriteMode) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE>
|
||||
FORCEINLINE void _unknownEffectMask32(GPUEngineCompositorInfo &compInfo,
|
||||
const v256u8 &passMask8,
|
||||
const v256u16 &evy16,
|
||||
const v256u8 &srcLayerID,
|
||||
const v256u32 &src3, const v256u32 &src2, const v256u32 &src1, const v256u32 &src0,
|
||||
const v256u8 &srcEffectEnableMask,
|
||||
const v256u8 &dstBlendEnableMaskLUT,
|
||||
const v256u8 &enableColorEffectMask,
|
||||
const v256u8 &spriteAlpha,
|
||||
const v256u8 &spriteMode) const;
|
||||
|
||||
public:
|
||||
PixelOperation_AVX2() {};
|
||||
|
||||
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
|
||||
FORCEINLINE void Composite16(GPUEngineCompositorInfo &compInfo,
|
||||
const bool didAllPixelsPass,
|
||||
const v256u8 &passMask8,
|
||||
const v256u16 &evy16,
|
||||
const v256u8 &srcLayerID,
|
||||
const v256u16 &src1, const v256u16 &src0,
|
||||
const v256u8 &srcEffectEnableMask,
|
||||
const v256u8 &dstBlendEnableMaskLUT,
|
||||
const u8 *__restrict enableColorEffectPtr,
|
||||
const u8 *__restrict sprAlphaPtr,
|
||||
const u8 *__restrict sprModePtr) const;
|
||||
|
||||
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
|
||||
FORCEINLINE void Composite32(GPUEngineCompositorInfo &compInfo,
|
||||
const bool didAllPixelsPass,
|
||||
const v256u8 &passMask8,
|
||||
const v256u16 &evy16,
|
||||
const v256u8 &srcLayerID,
|
||||
const v256u32 &src3, const v256u32 &src2, const v256u32 &src1, const v256u32 &src0,
|
||||
const v256u8 &srcEffectEnableMask,
|
||||
const v256u8 &dstBlendEnableMaskLUT,
|
||||
const u8 *__restrict enableColorEffectPtr,
|
||||
const u8 *__restrict sprAlphaPtr,
|
||||
const u8 *__restrict sprModePtr) const;
|
||||
};
|
||||
|
||||
#endif // ENABLE_AVX2
|
||||
|
||||
#endif // GPU_OPERATIONS_AVX2_H
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
Copyright (C) 2021 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef GPU_OPERATIONS_SSE2_H
|
||||
#define GPU_OPERATIONS_SSE2_H
|
||||
|
||||
#include "GPU_Operations.h"
|
||||
|
||||
#ifndef ENABLE_SSE2
|
||||
#warning This header requires SSE2 support.
|
||||
#else
|
||||
|
||||
class ColorOperation_SSE2
|
||||
{
|
||||
public:
|
||||
ColorOperation_SSE2() {};
|
||||
|
||||
FORCEINLINE v128u16 blend(const v128u16 &colA, const v128u16 &colB, const v128u16 &blendEVA, const v128u16 &blendEVB) const;
|
||||
template<NDSColorFormat COLORFORMAT, bool USECONSTANTBLENDVALUESHINT> FORCEINLINE v128u32 blend(const v128u32 &colA, const v128u32 &colB, const v128u16 &blendEVA, const v128u16 &blendEVB) const;
|
||||
|
||||
FORCEINLINE v128u16 blend3D(const v128u32 &colA_Lo, const v128u32 &colA_Hi, const v128u16 &colB) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE v128u32 blend3D(const v128u32 &colA, const v128u32 &colB) const;
|
||||
|
||||
FORCEINLINE v128u16 increase(const v128u16 &col, const v128u16 &blendEVY) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE v128u32 increase(const v128u32 &col, const v128u16 &blendEVY) const;
|
||||
|
||||
FORCEINLINE v128u16 decrease(const v128u16 &col, const v128u16 &blendEVY) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE v128u32 decrease(const v128u32 &col, const v128u16 &blendEVY) const;
|
||||
};
|
||||
|
||||
class PixelOperation_SSE2
|
||||
{
|
||||
protected:
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy16(GPUEngineCompositorInfo &compInfo, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy32(GPUEngineCompositorInfo &compInfo, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copyMask16(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copyMask32(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp16(GPUEngineCompositorInfo &compInfo, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp32(GPUEngineCompositorInfo &compInfo, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUpMask16(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUpMask32(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown16(GPUEngineCompositorInfo &compInfo, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown32(GPUEngineCompositorInfo &compInfo, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDownMask16(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDownMask32(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE>
|
||||
FORCEINLINE void _unknownEffectMask16(GPUEngineCompositorInfo &compInfo,
|
||||
const v128u8 &passMask8,
|
||||
const v128u16 &evy16,
|
||||
const v128u8 &srcLayerID,
|
||||
const v128u16 &src1, const v128u16 &src0,
|
||||
const v128u8 &srcEffectEnableMask,
|
||||
const v128u8 &dstBlendEnableMaskLUT,
|
||||
const v128u8 &enableColorEffectMask,
|
||||
const v128u8 &spriteAlpha,
|
||||
const v128u8 &spriteMode) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE>
|
||||
FORCEINLINE void _unknownEffectMask32(GPUEngineCompositorInfo &compInfo,
|
||||
const v128u8 &passMask8,
|
||||
const v128u16 &evy16,
|
||||
const v128u8 &srcLayerID,
|
||||
const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0,
|
||||
const v128u8 &srcEffectEnableMask,
|
||||
const v128u8 &dstBlendEnableMaskLUT,
|
||||
const v128u8 &enableColorEffectMask,
|
||||
const v128u8 &spriteAlpha,
|
||||
const v128u8 &spriteMode) const;
|
||||
|
||||
public:
|
||||
PixelOperation_SSE2() {};
|
||||
|
||||
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
|
||||
FORCEINLINE void Composite16(GPUEngineCompositorInfo &compInfo,
|
||||
const bool didAllPixelsPass,
|
||||
const v128u8 &passMask8,
|
||||
const v128u16 &evy16,
|
||||
const v128u8 &srcLayerID,
|
||||
const v128u16 &src1, const v128u16 &src0,
|
||||
const v128u8 &srcEffectEnableMask,
|
||||
const v128u8 &dstBlendEnableMaskLUT,
|
||||
const u8 *__restrict enableColorEffectPtr,
|
||||
const u8 *__restrict sprAlphaPtr,
|
||||
const u8 *__restrict sprModePtr) const;
|
||||
|
||||
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
|
||||
FORCEINLINE void Composite32(GPUEngineCompositorInfo &compInfo,
|
||||
const bool didAllPixelsPass,
|
||||
const v128u8 &passMask8,
|
||||
const v128u16 &evy16,
|
||||
const v128u8 &srcLayerID,
|
||||
const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0,
|
||||
const v128u8 &srcEffectEnableMask,
|
||||
const v128u8 &dstBlendEnableMaskLUT,
|
||||
const u8 *__restrict enableColorEffectPtr,
|
||||
const u8 *__restrict sprAlphaPtr,
|
||||
const u8 *__restrict sprModePtr) const;
|
||||
};
|
||||
|
||||
#endif // ENABLE_SSE2
|
||||
|
||||
#endif // GPU_OPERATIONS_SSE2_H
|
|
@ -46,7 +46,7 @@
|
|||
#include "driver.h"
|
||||
#include "emufile.h"
|
||||
#include "matrix.h"
|
||||
#include "GPU.h"
|
||||
#include "GPU_Operations.h"
|
||||
#include "MMU.h"
|
||||
#include "render3D.h"
|
||||
#include "mem.h"
|
||||
|
|
Loading…
Reference in New Issue