FIFO.cpp: Code cleanup; remove AltiVec-specific code from display FIFO, as it is no longer needed.
- The new code works by pre-swapping big-endian words on disp_fifo.buf write, rather than swapping the big-endian words during disp_fifo.buf read. - There is a behavior change here. Before, 8-bit and 16-bit writes to disp_fifo.buf would increment disp_fifo.tail. Now, 8-bit and 16-bit writes only increment disp_fifo.tail when the most significant bit within the FIFO value's 32-bit boundary is written to. - Behavior is unchanged when doing 32-bit writes. In practice, the rare games that use display FIFO have only ever done 32-bit writes, so this scenario is well tested.
This commit is contained in:
parent
f8a7723e86
commit
5ab59eac86
|
@ -44,7 +44,6 @@
|
||||||
#elif defined(ENABLE_ALTIVEC)
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
#define USEVECTORSIZE_128
|
#define USEVECTORSIZE_128
|
||||||
#define VECTORSIZE 16
|
#define VECTORSIZE 16
|
||||||
#include "./utils/colorspacehandler/colorspacehandler_AltiVec.h"
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(USEVECTORSIZE_512) || defined(USEVECTORSIZE_256) || defined(USEVECTORSIZE_128)
|
#if defined(USEVECTORSIZE_512) || defined(USEVECTORSIZE_256) || defined(USEVECTORSIZE_128)
|
||||||
|
@ -340,12 +339,96 @@ void DISP_FIFOinit()
|
||||||
memset(&disp_fifo, 0, sizeof(DISP_FIFO));
|
memset(&disp_fifo, 0, sizeof(DISP_FIFO));
|
||||||
}
|
}
|
||||||
|
|
||||||
void DISP_FIFOsend_u32(u32 val)
|
template <typename T, size_t ADDROFFSET>
|
||||||
|
void DISP_FIFOsend(const T val)
|
||||||
{
|
{
|
||||||
//INFO("DISP_FIFO send value 0x%08X (head 0x%06X, tail 0x%06X)\n", val, disp_fifo.head, disp_fifo.tail);
|
//INFO("DISP_FIFO send value 0x%08X (head 0x%06X, tail 0x%06X)\n", val, disp_fifo.head, disp_fifo.tail);
|
||||||
disp_fifo.buf[disp_fifo.tail] = val;
|
|
||||||
|
|
||||||
disp_fifo.tail++;
|
const size_t numBytes = sizeof(T);
|
||||||
|
const size_t baseWriteAddress = disp_fifo.tail * sizeof(u32);
|
||||||
|
const size_t finalWriteAddress = baseWriteAddress + ADDROFFSET;
|
||||||
|
|
||||||
|
switch (numBytes)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
{
|
||||||
|
#ifndef MSB_FIRST
|
||||||
|
HostWriteByte((u8 *)disp_fifo.buf, finalWriteAddress, val);
|
||||||
|
#else
|
||||||
|
switch (ADDROFFSET)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
HostWriteByte((u8 *)disp_fifo.buf, baseWriteAddress + 2, val);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
HostWriteByte((u8 *)disp_fifo.buf, baseWriteAddress + 3, val);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
HostWriteByte((u8 *)disp_fifo.buf, baseWriteAddress + 0, val);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
HostWriteByte((u8 *)disp_fifo.buf, baseWriteAddress + 1, val);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef MSB_FIRST
|
||||||
|
if (ADDROFFSET == 3)
|
||||||
|
#else
|
||||||
|
if (ADDROFFSET == 1)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
disp_fifo.tail++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
{
|
||||||
|
#ifndef MSB_FIRST
|
||||||
|
HostWriteWord((u8 *)disp_fifo.buf, finalWriteAddress, val);
|
||||||
|
#else
|
||||||
|
switch (ADDROFFSET)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
HostWriteWord((u8 *)disp_fifo.buf, baseWriteAddress + 2, val);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
HostWriteWord((u8 *)disp_fifo.buf, baseWriteAddress + 0, val);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef MSB_FIRST
|
||||||
|
if (ADDROFFSET == 2)
|
||||||
|
#else
|
||||||
|
if (ADDROFFSET == 0)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
disp_fifo.tail++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 4:
|
||||||
|
HostWriteTwoWords((u8 *)disp_fifo.buf, finalWriteAddress, val);
|
||||||
|
disp_fifo.tail++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (disp_fifo.tail >= 0x6000)
|
if (disp_fifo.tail >= 0x6000)
|
||||||
{
|
{
|
||||||
disp_fifo.tail = 0;
|
disp_fifo.tail = 0;
|
||||||
|
@ -380,19 +463,7 @@ void DISP_FIFOrecv_Line16(u16 *__restrict dst)
|
||||||
#ifdef USEMANUALVECTORIZATION
|
#ifdef USEMANUALVECTORIZATION
|
||||||
if ( (disp_fifo.head + (GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)) / sizeof(u32) <= 0x6000) && (disp_fifo.head == (disp_fifo.head & ~(VECTORSIZE - 1))) )
|
if ( (disp_fifo.head + (GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)) / sizeof(u32) <= 0x6000) && (disp_fifo.head == (disp_fifo.head & ~(VECTORSIZE - 1))) )
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_ALTIVEC
|
|
||||||
// Big-endian systems read the pixels in their correct bit order, but swap 16-bit chunks
|
|
||||||
// within 32-bit lanes, and so we can't use a standard buffer copy function here.
|
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16); i+=sizeof(v128u16))
|
|
||||||
{
|
|
||||||
v128u16 fifoColor = vec_ld(i, disp_fifo.buf + disp_fifo.head);
|
|
||||||
fifoColor = vec_perm( (v128u8)fifoColor, (v128u8)fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) );
|
|
||||||
vec_st(fifoColor, i, dst);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
buffer_copy_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)>(dst, disp_fifo.buf + disp_fifo.head);
|
buffer_copy_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)>(dst, disp_fifo.buf + disp_fifo.head);
|
||||||
#endif // ENABLE_ALTIVEC
|
|
||||||
|
|
||||||
_DISP_FIFOrecv_LineAdvance();
|
_DISP_FIFOrecv_LineAdvance();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -401,82 +472,11 @@ void DISP_FIFOrecv_Line16(u16 *__restrict dst)
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++)
|
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++)
|
||||||
{
|
{
|
||||||
const u32 src = DISP_FIFOrecv_u32();
|
const u32 src = DISP_FIFOrecv_u32();
|
||||||
#ifdef MSB_FIRST
|
|
||||||
((u32 *)dst)[i] = (src >> 16) | (src << 16);
|
|
||||||
#else
|
|
||||||
((u32 *)dst)[i] = src;
|
((u32 *)dst)[i] = src;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USEMANUALVECTORIZATION
|
|
||||||
|
|
||||||
template <NDSColorFormat OUTPUTFORMAT>
|
|
||||||
void _DISP_FIFOrecv_LineOpaque16_vec(u32 *__restrict dst)
|
|
||||||
{
|
|
||||||
#ifdef ENABLE_ALTIVEC
|
|
||||||
// Big-endian systems read the pixels in their correct bit order, but swap 16-bit chunks
|
|
||||||
// within 32-bit lanes, and so we can't use a standard buffer copy function here.
|
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16); i+=sizeof(v128u16))
|
|
||||||
{
|
|
||||||
v128u16 fifoColor = vec_ld(i, disp_fifo.buf + disp_fifo.head);
|
|
||||||
fifoColor = vec_perm( (v128u8)fifoColor, (v128u8)fifoColor, ((v128u8){2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13}) );
|
|
||||||
fifoColor = vec_or(fifoColor, ((v128u16){0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}));
|
|
||||||
vec_st(fifoColor, i, dst);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
buffer_copy_or_constant_s16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16), false>(dst, disp_fifo.buf + disp_fifo.head, 0x8000);
|
|
||||||
#endif // ENABLE_ALTIVEC
|
|
||||||
|
|
||||||
_DISP_FIFOrecv_LineAdvance();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <NDSColorFormat OUTPUTFORMAT>
|
|
||||||
void _DISP_FIFOrecv_LineOpaque32_vec(u32 *__restrict dst)
|
|
||||||
{
|
|
||||||
#ifdef ENABLE_ALTIVEC
|
|
||||||
for (size_t i = 0, d = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16); i+=16, d+=32)
|
|
||||||
{
|
|
||||||
v128u16 fifoColor = vec_ld(0, disp_fifo.buf + disp_fifo.head);
|
|
||||||
|
|
||||||
disp_fifo.head += (sizeof(v128u16)/sizeof(u32));
|
|
||||||
if (disp_fifo.head >= 0x6000)
|
|
||||||
{
|
|
||||||
disp_fifo.head -= 0x6000;
|
|
||||||
}
|
|
||||||
|
|
||||||
v128u32 dstLo = ((v128u32){0,0,0,0});
|
|
||||||
v128u32 dstHi = ((v128u32){0,0,0,0});
|
|
||||||
fifoColor = vec_perm( (v128u8)fifoColor, (v128u8)fifoColor, ((v128u8){10,11, 8,9, 14,15, 12,13, 2,3, 0,1, 6,7, 4,5}) );
|
|
||||||
|
|
||||||
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
|
||||||
{
|
|
||||||
ColorspaceConvert555To6665Opaque_AltiVec<false, BESwapDst>(fifoColor, dstLo, dstHi);
|
|
||||||
}
|
|
||||||
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
|
||||||
{
|
|
||||||
ColorspaceConvert555To8888Opaque_AltiVec<false, BESwapDst>(fifoColor, dstLo, dstHi);
|
|
||||||
}
|
|
||||||
|
|
||||||
vec_st(dstLo, d + 0, dst);
|
|
||||||
vec_st(dstHi, d + 16, dst);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
|
||||||
{
|
|
||||||
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
|
||||||
}
|
|
||||||
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
|
||||||
{
|
|
||||||
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
|
||||||
}
|
|
||||||
_DISP_FIFOrecv_LineAdvance();
|
|
||||||
#endif // ENABLE_ALTIVEC
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // USEMANUALVECTORIZATION
|
|
||||||
|
|
||||||
template <NDSColorFormat OUTPUTFORMAT>
|
template <NDSColorFormat OUTPUTFORMAT>
|
||||||
void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst)
|
void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst)
|
||||||
{
|
{
|
||||||
|
@ -485,26 +485,28 @@ void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst)
|
||||||
{
|
{
|
||||||
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
|
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
|
||||||
{
|
{
|
||||||
_DISP_FIFOrecv_LineOpaque16_vec<OUTPUTFORMAT>(dst);
|
buffer_copy_or_constant_s16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16), false>(dst, disp_fifo.buf + disp_fifo.head, 0x8000);
|
||||||
}
|
}
|
||||||
else
|
else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||||
{
|
{
|
||||||
_DISP_FIFOrecv_LineOpaque32_vec<OUTPUTFORMAT>(dst);
|
ColorspaceConvertBuffer555To6665Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
}
|
}
|
||||||
|
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||||
|
{
|
||||||
|
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
_DISP_FIFOrecv_LineAdvance();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif // USEMANUALVECTORIZATION
|
||||||
{
|
{
|
||||||
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
|
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++)
|
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++)
|
||||||
{
|
{
|
||||||
const u32 src = DISP_FIFOrecv_u32();
|
const u32 src = DISP_FIFOrecv_u32();
|
||||||
#ifdef MSB_FIRST
|
|
||||||
dst[i] = (src >> 16) | (src << 16) | 0x80008000;
|
|
||||||
#else
|
|
||||||
dst[i] = src | 0x80008000;
|
dst[i] = src | 0x80008000;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -534,6 +536,14 @@ void DISP_FIFOreset()
|
||||||
disp_fifo.tail = 0;
|
disp_fifo.tail = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template void DISP_FIFOsend< u8, 0>(const u8 val);
|
||||||
|
template void DISP_FIFOsend< u8, 1>(const u8 val);
|
||||||
|
template void DISP_FIFOsend< u8, 2>(const u8 val);
|
||||||
|
template void DISP_FIFOsend< u8, 3>(const u8 val);
|
||||||
|
template void DISP_FIFOsend<u16, 0>(const u16 val);
|
||||||
|
template void DISP_FIFOsend<u16, 2>(const u16 val);
|
||||||
|
template void DISP_FIFOsend<u32, 0>(const u32 val);
|
||||||
|
|
||||||
template void DISP_FIFOrecv_LineOpaque<NDSColorFormat_BGR555_Rev>(u32 *__restrict dst);
|
template void DISP_FIFOrecv_LineOpaque<NDSColorFormat_BGR555_Rev>(u32 *__restrict dst);
|
||||||
template void DISP_FIFOrecv_LineOpaque<NDSColorFormat_BGR666_Rev>(u32 *__restrict dst);
|
template void DISP_FIFOrecv_LineOpaque<NDSColorFormat_BGR666_Rev>(u32 *__restrict dst);
|
||||||
template void DISP_FIFOrecv_LineOpaque<NDSColorFormat_BGR888_Rev>(u32 *__restrict dst);
|
template void DISP_FIFOrecv_LineOpaque<NDSColorFormat_BGR888_Rev>(u32 *__restrict dst);
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/*
|
/*
|
||||||
Copyright 2006 yopyop
|
Copyright 2006 yopyop
|
||||||
Copyright 2007 shash
|
Copyright 2007 shash
|
||||||
Copyright 2007-2021 DeSmuME team
|
Copyright 2007-2022 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -87,7 +87,7 @@ typedef struct
|
||||||
extern DISP_FIFO disp_fifo;
|
extern DISP_FIFO disp_fifo;
|
||||||
void DISP_FIFOinit();
|
void DISP_FIFOinit();
|
||||||
|
|
||||||
void DISP_FIFOsend_u32(u32 val);
|
template<typename T, size_t ADDROFFSET> void DISP_FIFOsend(const T val);
|
||||||
u32 DISP_FIFOrecv_u32();
|
u32 DISP_FIFOrecv_u32();
|
||||||
|
|
||||||
void DISP_FIFOrecv_Line16(u16 *__restrict dst);
|
void DISP_FIFOrecv_Line16(u16 *__restrict dst);
|
||||||
|
|
|
@ -3425,7 +3425,19 @@ void FASTCALL _MMU_ARM9_write08(u32 adr, u8 val)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_DISPMMEMFIFO:
|
case REG_DISPA_DISPMMEMFIFO:
|
||||||
DISP_FIFOsend_u32(val);
|
DISP_FIFOsend<u8, 0>(val);
|
||||||
|
return;
|
||||||
|
|
||||||
|
case REG_DISPA_DISPMMEMFIFO+1:
|
||||||
|
DISP_FIFOsend<u8, 1>(val);
|
||||||
|
return;
|
||||||
|
|
||||||
|
case REG_DISPA_DISPMMEMFIFO+2:
|
||||||
|
DISP_FIFOsend<u8, 2>(val);
|
||||||
|
return;
|
||||||
|
|
||||||
|
case REG_DISPA_DISPMMEMFIFO+3:
|
||||||
|
DISP_FIFOsend<u8, 3>(val);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPB_BG0HOFS:
|
case REG_DISPB_BG0HOFS:
|
||||||
|
@ -3992,7 +4004,11 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_DISPMMEMFIFO:
|
case REG_DISPA_DISPMMEMFIFO:
|
||||||
DISP_FIFOsend_u32(val);
|
DISP_FIFOsend<u16, 0>(val);
|
||||||
|
return;
|
||||||
|
|
||||||
|
case REG_DISPA_DISPMMEMFIFO+2:
|
||||||
|
DISP_FIFOsend<u16, 2>(val);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_MASTERBRIGHT:
|
case REG_DISPA_MASTERBRIGHT:
|
||||||
|
@ -4635,7 +4651,7 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_DISPMMEMFIFO:
|
case REG_DISPA_DISPMMEMFIFO:
|
||||||
DISP_FIFOsend_u32(val);
|
DISP_FIFOsend<u32, 0>(val);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_MASTERBRIGHT:
|
case REG_DISPA_MASTERBRIGHT:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2005 Theo Berkau
|
Copyright (C) 2005 Theo Berkau
|
||||||
Copyright (C) 2005-2006 Guillaume Duhamel
|
Copyright (C) 2005-2006 Guillaume Duhamel
|
||||||
Copyright (C) 2008-2010 DeSmuME team
|
Copyright (C) 2008-2022 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -140,6 +140,11 @@ static INLINE u16 HostReadWord(u8* const mem, const u32 addr)
|
||||||
return *((u16 *) (mem + addr));
|
return *((u16 *) (mem + addr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static INLINE void HostWriteByte(u8* const mem, const u32 addr, const u8 val)
|
||||||
|
{
|
||||||
|
mem[addr] = val;
|
||||||
|
}
|
||||||
|
|
||||||
static INLINE void HostWriteWord(u8* const mem, const u32 addr, const u16 val)
|
static INLINE void HostWriteWord(u8* const mem, const u32 addr, const u16 val)
|
||||||
{
|
{
|
||||||
*((u16 *) (mem + addr)) = val;
|
*((u16 *) (mem + addr)) = val;
|
||||||
|
|
Loading…
Reference in New Issue