- Partially fix a bug with affine and extended BG layers on big-endian systems. Such layers that perform rotation or scaling aren’t fixed yet.
- Loosen a restriction on taking the faster code path in GPUEngineBase::_RenderPixelIterate_Final().
- Silence a compiler warning on non-SSE2 systems.
This commit is contained in:
rogerman 2016-07-06 01:23:36 +00:00
parent b129392683
commit 771ceee36b
2 changed files with 30 additions and 9 deletions

View File

@ -2589,15 +2589,26 @@ template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENAB
void GPUEngineBase::_RenderPixelIterate_Final(u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter &param, const u32 map, const u32 tile, const u16 *__restrict pal)
{
const u16 lineWidth = (ISDEBUGRENDER) ? this->_BGLayer[LAYERID].size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH;
IOREG_BGnX x = param.BGnX;
IOREG_BGnY y = param.BGnY;
const s32 dx = (s32)param.BGnPA.value;
const s32 dy = (s32)param.BGnPC.value;
const s16 dx = (s16)LOCAL_TO_LE_16(param.BGnPA.value);
const s16 dy = (s16)LOCAL_TO_LE_16(param.BGnPC.value);
const s32 wh = this->_BGLayer[LAYERID].size.width;
const s32 ht = this->_BGLayer[LAYERID].size.height;
const s32 wmask = wh - 1;
const s32 hmask = ht - 1;
IOREG_BGnX x = param.BGnX;
IOREG_BGnY y = param.BGnY;
#ifdef LOCAL_BE
// This only seems to work in the unrotated/unscaled case. I'm not too sure
// about how these bits should really be arranged on big-endian, but at
// least this arrangement fixes a bunch of games that use affine or extended
// layers, just as long as they don't perform any rotation/scaling.
// - rogerman, 2016-07-05
x.value = ((x.value & 0x00FFFFFF) << 8) | ((x.value & 0xFF000000) >> 24);
y.value = ((y.value & 0x00FFFFFF) << 8) | ((y.value & 0xFF000000) >> 24);
#endif
u8 index;
u16 color;
@ -2608,7 +2619,7 @@ void GPUEngineBase::_RenderPixelIterate_Final(u16 *__restrict dstColorLine, cons
s32 auxX = (WRAP) ? (x.Integer & wmask) : x.Integer;
const s32 auxY = (WRAP) ? (y.Integer & hmask) : y.Integer;
if (WRAP || (auxX + lineWidth < wh && auxX >= 0 && auxY < ht && auxY >= 0))
if ( WRAP || ((auxX >= 0) && (auxX + lineWidth <= wh) && (auxY >= 0) && (auxY < ht)) )
{
for (size_t i = 0; i < lineWidth; i++)
{
@ -2627,14 +2638,16 @@ void GPUEngineBase::_RenderPixelIterate_Final(u16 *__restrict dstColorLine, cons
auxX++;
if (WRAP)
auxX = auxX & wmask;
{
auxX &= wmask;
}
}
return;
}
}
for (size_t i = 0; i < lineWidth; i++, x.value += dx, y.value += dy)
for (size_t i = 0; i < lineWidth; i++, x.value+=dx, y.value+=dy)
{
const s32 auxX = (WRAP) ? (x.Integer & wmask) : x.Integer;
const s32 auxY = (WRAP) ? (y.Integer & hmask) : y.Integer;
@ -2828,7 +2841,9 @@ void GPUEngineBase::_RenderPixelsCustom(void *__restrict dstColorLine, u8 *__res
const NDSColorFormat outputFormat = GPU->GetDisplayInfo().colorFormat;
const size_t dstPixCount = lineWidth;
#ifdef ENABLE_SSE2
const size_t ssePixCount = (dstPixCount - (dstPixCount % 8));
#endif
const size_t lineCount = _gpuDstLineCount[lineIndex];
for (size_t l = 0; l < lineCount; l++)

View File

@ -305,13 +305,19 @@ typedef IOREG_BGnPA IOREG_BGnPD; // 0x400x026, 0x400x036: BGn rotation/scaling
typedef union
{
u32 value;
s32 value;
struct
{
#ifdef LOCAL_LE
u32 Fraction:8;
s32 Integer:20;
u32 :4;
s32 :4;
#else
s32 :4;
s32 Integer:20;
u32 Fraction:8;
#endif
};
} IOREG_BGnX; // 0x400x028, 0x400x038: BGn X-coordinate (Engine A+B)
typedef IOREG_BGnX IOREG_BGnY; // 0x400x02C, 0x400x03C: BGn Y-coordinate (Engine A+B)