FIFO.cpp: Add new manually vectorized versions of FIFO buffer line copies for AVX2, AVX-512, and AltiVec.
This commit is contained in:
parent
79437371e3
commit
31851c2524
|
@ -1,7 +1,7 @@
|
||||||
/*
|
/*
|
||||||
Copyright 2006 yopyop
|
Copyright 2006 yopyop
|
||||||
Copyright 2007 shash
|
Copyright 2007 shash
|
||||||
Copyright 2007-2015 DeSmuME team
|
Copyright 2007-2021 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -29,6 +29,29 @@
|
||||||
#include "NDSSystem.h"
|
#include "NDSSystem.h"
|
||||||
#include "gfx3d.h"
|
#include "gfx3d.h"
|
||||||
|
|
||||||
|
#if defined(ENABLE_AVX512_1)
|
||||||
|
#define USEVECTORSIZE_512
|
||||||
|
#define VECTORSIZE 64
|
||||||
|
#include "./utils/colorspacehandler/colorspacehandler_AVX512.h"
|
||||||
|
#elif defined(ENABLE_AVX2)
|
||||||
|
#define USEVECTORSIZE_256
|
||||||
|
#define VECTORSIZE 32
|
||||||
|
#include "./utils/colorspacehandler/colorspacehandler_AVX2.h"
|
||||||
|
#elif defined(ENABLE_SSE2)
|
||||||
|
#define USEVECTORSIZE_128
|
||||||
|
#define VECTORSIZE 16
|
||||||
|
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
|
||||||
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
|
#define USEVECTORSIZE_128
|
||||||
|
#define VECTORSIZE 16
|
||||||
|
#include "./utils/colorspacehandler/colorspacehandler_AltiVec.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(USEVECTORSIZE_512) || defined(USEVECTORSIZE_256) || defined(USEVECTORSIZE_128)
|
||||||
|
#define USEMANUALVECTORIZATION
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// ========================================================= IPC FIFO
|
// ========================================================= IPC FIFO
|
||||||
IPC_FIFO ipc_fifo[2];
|
IPC_FIFO ipc_fifo[2];
|
||||||
|
|
||||||
|
@ -317,23 +340,233 @@ void DISP_FIFOinit()
|
||||||
memset(&disp_fifo, 0, sizeof(DISP_FIFO));
|
memset(&disp_fifo, 0, sizeof(DISP_FIFO));
|
||||||
}
|
}
|
||||||
|
|
||||||
void DISP_FIFOsend(u32 val)
|
void DISP_FIFOsend_u32(u32 val)
|
||||||
{
|
{
|
||||||
//INFO("DISP_FIFO send value 0x%08X (head 0x%06X, tail 0x%06X)\n", val, disp_fifo.head, disp_fifo.tail);
|
//INFO("DISP_FIFO send value 0x%08X (head 0x%06X, tail 0x%06X)\n", val, disp_fifo.head, disp_fifo.tail);
|
||||||
disp_fifo.buf[disp_fifo.tail] = val;
|
disp_fifo.buf[disp_fifo.tail] = val;
|
||||||
|
|
||||||
disp_fifo.tail++;
|
disp_fifo.tail++;
|
||||||
if (disp_fifo.tail > 0x5FFF)
|
if (disp_fifo.head >= 0x6000)
|
||||||
disp_fifo.tail = 0;
|
{
|
||||||
|
disp_fifo.head -= 0x6000;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 DISP_FIFOrecv()
|
u32 DISP_FIFOrecv_u32()
|
||||||
{
|
{
|
||||||
//if (disp_fifo.tail == disp_fifo.head) return (0); // FIFO is empty
|
//if (disp_fifo.tail == disp_fifo.head) return (0); // FIFO is empty
|
||||||
u32 val = disp_fifo.buf[disp_fifo.head];
|
u32 val = disp_fifo.buf[disp_fifo.head];
|
||||||
|
|
||||||
disp_fifo.head++;
|
disp_fifo.head++;
|
||||||
if (disp_fifo.head > 0x5FFF)
|
if (disp_fifo.head >= 0x6000)
|
||||||
disp_fifo.head = 0;
|
{
|
||||||
return (val);
|
disp_fifo.head -= 0x6000;
|
||||||
|
}
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _DISP_FIFOrecv_LineAdvance()
|
||||||
|
{
|
||||||
|
disp_fifo.head += (GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)) / sizeof(u32);
|
||||||
|
if (disp_fifo.head >= 0x6000)
|
||||||
|
{
|
||||||
|
disp_fifo.head -= 0x6000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DISP_FIFOrecv_Line16(u16 *__restrict dst)
|
||||||
|
{
|
||||||
|
#ifndef ENABLE_ALTIVEC // buffer_copy_fast() doesn't support endian swapping
|
||||||
|
if ( (disp_fifo.head + (GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)) / sizeof(u32) <= 0x6000)
|
||||||
|
#ifdef USEMANUALVECTORIZATION
|
||||||
|
&& (disp_fifo.head == (disp_fifo.head & ~(VECTORSIZE - 1)))
|
||||||
|
#endif
|
||||||
|
)
|
||||||
|
{
|
||||||
|
buffer_copy_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)>(dst, disp_fifo.buf + disp_fifo.head);
|
||||||
|
_DISP_FIFOrecv_LineAdvance();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif // ENABLE_ALTIVEC
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++)
|
||||||
|
{
|
||||||
|
((u32 *)dst)[i] = LE_TO_LOCAL_32( DISP_FIFOrecv_u32() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USEMANUALVECTORIZATION
|
||||||
|
|
||||||
|
template <NDSColorFormat OUTPUTFORMAT>
|
||||||
|
void _DISP_FIFOrecv_LineOpaque16_vec(u32 *__restrict dst)
|
||||||
|
{
|
||||||
|
buffer_copy_or_constant_s16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16), false>(dst, disp_fifo.buf + disp_fifo.head, 0x8000);
|
||||||
|
_DISP_FIFOrecv_LineAdvance();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <NDSColorFormat OUTPUTFORMAT>
|
||||||
|
void _DISP_FIFOrecv_LineOpaque32_vec(u32 *__restrict dst)
|
||||||
|
{
|
||||||
|
#if defined(ENABLE_AVX512_0)
|
||||||
|
for (size_t i = 0, d = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(v512u16); i++, d+=2)
|
||||||
|
{
|
||||||
|
const v512u16 fifoColor = _mm512_load_si512((v512u16 *)(disp_fifo.buf + disp_fifo.head));
|
||||||
|
|
||||||
|
disp_fifo.head += (sizeof(v512u16)/sizeof(u32));
|
||||||
|
if (disp_fifo.head >= 0x6000)
|
||||||
|
{
|
||||||
|
disp_fifo.head -= 0x6000;
|
||||||
|
}
|
||||||
|
|
||||||
|
v512u32 dstLo = _mm512_setzero_si512();
|
||||||
|
v512u32 dstHi = _mm512_setzero_si512();
|
||||||
|
|
||||||
|
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||||
|
{
|
||||||
|
ColorspaceConvert555To6665Opaque_AVX512<false>(fifoColor, dstLo, dstHi);
|
||||||
|
}
|
||||||
|
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||||
|
{
|
||||||
|
ColorspaceConvert555To8888Opaque_AVX512<false>(fifoColor, dstLo, dstHi);
|
||||||
|
}
|
||||||
|
|
||||||
|
_mm512_store_si512((v512u32 *)dst + d + 0, dstLo);
|
||||||
|
_mm512_store_si512((v512u32 *)dst + d + 1, dstHi);
|
||||||
|
}
|
||||||
|
#elif defined(ENABLE_AVX2)
|
||||||
|
for (size_t i = 0, d = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(v256u16); i++, d+=2)
|
||||||
|
{
|
||||||
|
const v256u16 fifoColor = _mm256_load_si256((v256u16 *)(disp_fifo.buf + disp_fifo.head));
|
||||||
|
|
||||||
|
disp_fifo.head += (sizeof(v256u16)/sizeof(u32));
|
||||||
|
if (disp_fifo.head >= 0x6000)
|
||||||
|
{
|
||||||
|
disp_fifo.head -= 0x6000;
|
||||||
|
}
|
||||||
|
|
||||||
|
v256u32 dstLo = _mm256_setzero_si256();
|
||||||
|
v256u32 dstHi = _mm256_setzero_si256();
|
||||||
|
|
||||||
|
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||||
|
{
|
||||||
|
ColorspaceConvert555To6665Opaque_AVX2<false>(fifoColor, dstLo, dstHi);
|
||||||
|
}
|
||||||
|
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||||
|
{
|
||||||
|
ColorspaceConvert555To8888Opaque_AVX2<false>(fifoColor, dstLo, dstHi);
|
||||||
|
}
|
||||||
|
|
||||||
|
_mm256_store_si256((v256u32 *)dst + d + 0, dstLo);
|
||||||
|
_mm256_store_si256((v256u32 *)dst + d + 1, dstHi);
|
||||||
|
}
|
||||||
|
#elif defined(ENABLE_SSE2)
|
||||||
|
for (size_t i = 0, d = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(v128u16); i++, d+=2)
|
||||||
|
{
|
||||||
|
const v128u16 fifoColor = _mm_load_si128((v128u16 *)(disp_fifo.buf + disp_fifo.head));
|
||||||
|
|
||||||
|
disp_fifo.head += (sizeof(v128u16)/sizeof(u32));
|
||||||
|
if (disp_fifo.head >= 0x6000)
|
||||||
|
{
|
||||||
|
disp_fifo.head -= 0x6000;
|
||||||
|
}
|
||||||
|
|
||||||
|
v128u32 dstLo = _mm_setzero_si128();
|
||||||
|
v128u32 dstHi = _mm_setzero_si128();
|
||||||
|
|
||||||
|
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||||
|
{
|
||||||
|
ColorspaceConvert555To6665Opaque_SSE2<false>(fifoColor, dstLo, dstHi);
|
||||||
|
}
|
||||||
|
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||||
|
{
|
||||||
|
ColorspaceConvert555To8888Opaque_SSE2<false>(fifoColor, dstLo, dstHi);
|
||||||
|
}
|
||||||
|
|
||||||
|
_mm_store_si128((v128u32 *)dst + d + 0, dstLo);
|
||||||
|
_mm_store_si128((v128u32 *)dst + d + 1, dstHi);
|
||||||
|
}
|
||||||
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
|
for (size_t i = 0, d = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16); i+=16, d+=32)
|
||||||
|
{
|
||||||
|
const v128u16 fifoColor = vec_ld(disp_fifo.head, disp_fifo.buf);
|
||||||
|
|
||||||
|
disp_fifo.head += (sizeof(v128u16)/sizeof(u32));
|
||||||
|
if (disp_fifo.head >= 0x6000)
|
||||||
|
{
|
||||||
|
disp_fifo.head -= 0x6000;
|
||||||
|
}
|
||||||
|
|
||||||
|
v128u32 dstLo = ((v128u32){0,0,0,0});
|
||||||
|
v128u32 dstHi = ((v128u32){0,0,0,0});
|
||||||
|
|
||||||
|
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||||
|
{
|
||||||
|
ColorspaceConvert555To6665Opaque_Altivec<false>(fifoColor, dstLo, dstHi);
|
||||||
|
}
|
||||||
|
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||||
|
{
|
||||||
|
ColorspaceConvert555To8888Opaque_Altivec<false>(fifoColor, dstLo, dstHi);
|
||||||
|
}
|
||||||
|
|
||||||
|
vec_st(dstLo, d + 0, dst);
|
||||||
|
vec_st(dstHi, d + 16, dst);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <NDSColorFormat OUTPUTFORMAT>
|
||||||
|
void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst)
|
||||||
|
{
|
||||||
|
#ifdef USEMANUALVECTORIZATION
|
||||||
|
if ( (disp_fifo.head + (GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)) / sizeof(u32) <= 0x6000) && (disp_fifo.head == (disp_fifo.head & ~(VECTORSIZE - 1))) )
|
||||||
|
{
|
||||||
|
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
|
||||||
|
{
|
||||||
|
_DISP_FIFOrecv_LineOpaque16_vec<OUTPUTFORMAT>(dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_DISP_FIFOrecv_LineOpaque32_vec<OUTPUTFORMAT>(dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++)
|
||||||
|
{
|
||||||
|
const u32 src = DISP_FIFOrecv_u32();
|
||||||
|
#ifdef MSB_FIRST
|
||||||
|
dst[i] = (src >> 16) | (src << 16) | 0x80008000;
|
||||||
|
#else
|
||||||
|
dst[i] = src | 0x80008000;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i+=2)
|
||||||
|
{
|
||||||
|
const u32 src = DISP_FIFOrecv_u32();
|
||||||
|
|
||||||
|
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||||
|
{
|
||||||
|
dst[i+0] = COLOR555TO6665_OPAQUE((src >> 0) & 0x7FFF);
|
||||||
|
dst[i+1] = COLOR555TO6665_OPAQUE((src >> 16) & 0x7FFF);
|
||||||
|
}
|
||||||
|
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||||
|
{
|
||||||
|
dst[i+0] = COLOR555TO8888_OPAQUE((src >> 0) & 0x7FFF);
|
||||||
|
dst[i+1] = COLOR555TO8888_OPAQUE((src >> 16) & 0x7FFF);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DISP_FIFOreset()
|
void DISP_FIFOreset()
|
||||||
|
@ -341,3 +574,7 @@ void DISP_FIFOreset()
|
||||||
disp_fifo.head = 0;
|
disp_fifo.head = 0;
|
||||||
disp_fifo.tail = 0;
|
disp_fifo.tail = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template void DISP_FIFOrecv_LineOpaque<NDSColorFormat_BGR555_Rev>(u32 *__restrict dst);
|
||||||
|
template void DISP_FIFOrecv_LineOpaque<NDSColorFormat_BGR666_Rev>(u32 *__restrict dst);
|
||||||
|
template void DISP_FIFOrecv_LineOpaque<NDSColorFormat_BGR888_Rev>(u32 *__restrict dst);
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/*
|
/*
|
||||||
Copyright 2006 yopyop
|
Copyright 2006 yopyop
|
||||||
Copyright 2007 shash
|
Copyright 2007 shash
|
||||||
Copyright 2007-2011 DeSmuME team
|
Copyright 2007-2021 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -22,6 +22,7 @@
|
||||||
#define FIFO_H
|
#define FIFO_H
|
||||||
|
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
|
#include "./utils/colorspacehandler/colorspacehandler.h"
|
||||||
|
|
||||||
//=================================================== IPC FIFO
|
//=================================================== IPC FIFO
|
||||||
typedef struct
|
typedef struct
|
||||||
|
@ -78,15 +79,20 @@ void GFX_FIFOcnt(u32 val);
|
||||||
//=================================================== Display memory FIFO
|
//=================================================== Display memory FIFO
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
u32 buf[0x6000]; // 256x192 32K color
|
CACHE_ALIGN u32 buf[0x6000]; // 256x192 32K color
|
||||||
u32 head; // head
|
u32 head; // head
|
||||||
u32 tail; // tail
|
u32 tail; // tail
|
||||||
} DISP_FIFO;
|
} DISP_FIFO;
|
||||||
|
|
||||||
extern DISP_FIFO disp_fifo;
|
extern DISP_FIFO disp_fifo;
|
||||||
void DISP_FIFOinit();
|
void DISP_FIFOinit();
|
||||||
void DISP_FIFOsend(u32 val);
|
|
||||||
u32 DISP_FIFOrecv();
|
void DISP_FIFOsend_u32(u32 val);
|
||||||
|
u32 DISP_FIFOrecv_u32();
|
||||||
|
|
||||||
|
void DISP_FIFOrecv_Line16(u16 *__restrict dst);
|
||||||
|
template<NDSColorFormat OUTPUTFORMAT> void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst);
|
||||||
|
|
||||||
void DISP_FIFOreset();
|
void DISP_FIFOreset();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
Copyright (C) 2006 yopyop
|
Copyright (C) 2006 yopyop
|
||||||
Copyright (C) 2006-2007 Theo Berkau
|
Copyright (C) 2006-2007 Theo Berkau
|
||||||
Copyright (C) 2007 shash
|
Copyright (C) 2007 shash
|
||||||
Copyright (C) 2008-2019 DeSmuME team
|
Copyright (C) 2008-2021 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -7784,22 +7784,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
|
||||||
|
|
||||||
void GPUEngineA::_RenderLine_DispCapture_FIFOToBuffer(u16 *fifoLineBuffer)
|
void GPUEngineA::_RenderLine_DispCapture_FIFOToBuffer(u16 *fifoLineBuffer)
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_SSE2
|
DISP_FIFOrecv_Line16(fifoLineBuffer);
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++)
|
|
||||||
{
|
|
||||||
const u32 srcA = DISP_FIFOrecv();
|
|
||||||
const u32 srcB = DISP_FIFOrecv();
|
|
||||||
const u32 srcC = DISP_FIFOrecv();
|
|
||||||
const u32 srcD = DISP_FIFOrecv();
|
|
||||||
const __m128i fifoColor = _mm_setr_epi32(srcA, srcB, srcC, srcD);
|
|
||||||
_mm_store_si128((__m128i *)fifoLineBuffer + i, fifoColor);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++)
|
|
||||||
{
|
|
||||||
((u32 *)fifoLineBuffer)[i] = LE_TO_LOCAL_32( DISP_FIFOrecv() );
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<NDSColorFormat COLORFORMAT, int SOURCESWITCH, size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRC, bool CAPTURETONATIVEDST>
|
template<NDSColorFormat COLORFORMAT, int SOURCESWITCH, size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRC, bool CAPTURETONATIVEDST>
|
||||||
|
@ -8461,100 +8446,23 @@ void GPUEngineA::_HandleDisplayModeMainMemory(const GPUEngineLineInfo &lineInfo)
|
||||||
// Displays video using color data directly read from main memory.
|
// Displays video using color data directly read from main memory.
|
||||||
// Doing this should always result in an output line that is at the native size (192px x 1px).
|
// Doing this should always result in an output line that is at the native size (192px x 1px).
|
||||||
|
|
||||||
#ifdef ENABLE_SSE2
|
|
||||||
switch (OUTPUTFORMAT)
|
switch (OUTPUTFORMAT)
|
||||||
{
|
{
|
||||||
case NDSColorFormat_BGR555_Rev:
|
case NDSColorFormat_BGR555_Rev:
|
||||||
{
|
{
|
||||||
u32 *__restrict dst = (u32 *__restrict)((u16 *)this->_nativeBuffer + (lineInfo.indexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH));
|
u32 *__restrict dst = (u32 *__restrict)((u16 *)this->_nativeBuffer + (lineInfo.indexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH));
|
||||||
const __m128i alphaBit = _mm_set1_epi16(0x8000);
|
DISP_FIFOrecv_LineOpaque<OUTPUTFORMAT>(dst);
|
||||||
|
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++)
|
|
||||||
{
|
|
||||||
const u32 srcA = DISP_FIFOrecv();
|
|
||||||
const u32 srcB = DISP_FIFOrecv();
|
|
||||||
const u32 srcC = DISP_FIFOrecv();
|
|
||||||
const u32 srcD = DISP_FIFOrecv();
|
|
||||||
const __m128i fifoColor = _mm_setr_epi32(srcA, srcB, srcC, srcD);
|
|
||||||
_mm_store_si128((__m128i *)dst + i, _mm_or_si128(fifoColor, alphaBit));
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case NDSColorFormat_BGR666_Rev:
|
case NDSColorFormat_BGR666_Rev:
|
||||||
case NDSColorFormat_BGR888_Rev:
|
case NDSColorFormat_BGR888_Rev:
|
||||||
{
|
{
|
||||||
FragmentColor *__restrict dst = (FragmentColor *__restrict)this->_nativeBuffer + (lineInfo.indexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
u32 *__restrict dst = (u32 *__restrict)this->_nativeBuffer + (lineInfo.indexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
|
DISP_FIFOrecv_LineOpaque<OUTPUTFORMAT>(dst);
|
||||||
for (size_t i = 0, d = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++, d+=2)
|
|
||||||
{
|
|
||||||
const u32 srcA = DISP_FIFOrecv();
|
|
||||||
const u32 srcB = DISP_FIFOrecv();
|
|
||||||
const u32 srcC = DISP_FIFOrecv();
|
|
||||||
const u32 srcD = DISP_FIFOrecv();
|
|
||||||
const __m128i fifoColor = _mm_setr_epi32(srcA, srcB, srcC, srcD);
|
|
||||||
|
|
||||||
__m128i dstLo = _mm_setzero_si128();
|
|
||||||
__m128i dstHi = _mm_setzero_si128();
|
|
||||||
|
|
||||||
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
|
||||||
{
|
|
||||||
ColorspaceConvert555To6665Opaque_SSE2<false>(fifoColor, dstLo, dstHi);
|
|
||||||
}
|
|
||||||
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
|
||||||
{
|
|
||||||
ColorspaceConvert555To8888Opaque_SSE2<false>(fifoColor, dstLo, dstHi);
|
|
||||||
}
|
|
||||||
|
|
||||||
_mm_store_si128((__m128i *)dst + d + 0, dstLo);
|
|
||||||
_mm_store_si128((__m128i *)dst + d + 1, dstHi);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
switch (OUTPUTFORMAT)
|
|
||||||
{
|
|
||||||
case NDSColorFormat_BGR555_Rev:
|
|
||||||
{
|
|
||||||
u32 *__restrict dst = (u32 *__restrict)((u16 *)this->_nativeBuffer + (lineInfo.indexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH));
|
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++)
|
|
||||||
{
|
|
||||||
const u32 src = DISP_FIFOrecv();
|
|
||||||
#ifdef MSB_FIRST
|
|
||||||
dst[i] = (src >> 16) | (src << 16) | 0x80008000;
|
|
||||||
#else
|
|
||||||
dst[i] = src | 0x80008000;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case NDSColorFormat_BGR666_Rev:
|
|
||||||
{
|
|
||||||
FragmentColor *__restrict dst = (FragmentColor *__restrict)this->_nativeBuffer + (lineInfo.indexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i+=2)
|
|
||||||
{
|
|
||||||
const u32 src = DISP_FIFOrecv();
|
|
||||||
dst[i+0].color = COLOR555TO6665_OPAQUE((src >> 0) & 0x7FFF);
|
|
||||||
dst[i+1].color = COLOR555TO6665_OPAQUE((src >> 16) & 0x7FFF);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case NDSColorFormat_BGR888_Rev:
|
|
||||||
{
|
|
||||||
FragmentColor *__restrict dst = (FragmentColor *__restrict)this->_nativeBuffer + (lineInfo.indexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i+=2)
|
|
||||||
{
|
|
||||||
const u32 src = DISP_FIFOrecv();
|
|
||||||
dst[i+0].color = COLOR555TO8888_OPAQUE((src >> 0) & 0x7FFF);
|
|
||||||
dst[i+1].color = COLOR555TO8888_OPAQUE((src >> 16) & 0x7FFF);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING>
|
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING>
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2006 yopyop
|
Copyright (C) 2006 yopyop
|
||||||
Copyright (C) 2007 shash
|
Copyright (C) 2007 shash
|
||||||
Copyright (C) 2007-2018 DeSmuME team
|
Copyright (C) 2007-2021 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -3427,7 +3427,7 @@ void FASTCALL _MMU_ARM9_write08(u32 adr, u8 val)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_DISPMMEMFIFO:
|
case REG_DISPA_DISPMMEMFIFO:
|
||||||
DISP_FIFOsend(val);
|
DISP_FIFOsend_u32(val);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPB_BG0HOFS:
|
case REG_DISPB_BG0HOFS:
|
||||||
|
@ -3936,7 +3936,7 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_DISPMMEMFIFO:
|
case REG_DISPA_DISPMMEMFIFO:
|
||||||
DISP_FIFOsend(val);
|
DISP_FIFOsend_u32(val);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_MASTERBRIGHT:
|
case REG_DISPA_MASTERBRIGHT:
|
||||||
|
@ -4503,7 +4503,7 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_DISPMMEMFIFO:
|
case REG_DISPA_DISPMMEMFIFO:
|
||||||
DISP_FIFOsend(val);
|
DISP_FIFOsend_u32(val);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_MASTERBRIGHT:
|
case REG_DISPA_MASTERBRIGHT:
|
||||||
|
|
Loading…
Reference in New Issue