diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index fbdc9c885..573c58eb2 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -2589,15 +2589,26 @@ template_BGLayer[LAYERID].size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; - IOREG_BGnX x = param.BGnX; - IOREG_BGnY y = param.BGnY; - const s32 dx = (s32)param.BGnPA.value; - const s32 dy = (s32)param.BGnPC.value; + const s16 dx = (s16)LOCAL_TO_LE_16(param.BGnPA.value); + const s16 dy = (s16)LOCAL_TO_LE_16(param.BGnPC.value); const s32 wh = this->_BGLayer[LAYERID].size.width; const s32 ht = this->_BGLayer[LAYERID].size.height; const s32 wmask = wh - 1; const s32 hmask = ht - 1; + IOREG_BGnX x = param.BGnX; + IOREG_BGnY y = param.BGnY; + +#ifdef LOCAL_BE + // This only seems to work in the unrotated/unscaled case. I'm not too sure + // about how these bits should really be arranged on big-endian, but at + // least this arrangement fixes a bunch of games that use affine or extended + // layers, just as long as they don't perform any rotation/scaling. + // - rogerman, 2016-07-05 + x.value = ((x.value & 0x00FFFFFF) << 8) | ((x.value & 0xFF000000) >> 24); + y.value = ((y.value & 0x00FFFFFF) << 8) | ((y.value & 0xFF000000) >> 24); +#endif + u8 index; u16 color; @@ -2608,7 +2619,7 @@ void GPUEngineBase::_RenderPixelIterate_Final(u16 *__restrict dstColorLine, cons s32 auxX = (WRAP) ? (x.Integer & wmask) : x.Integer; const s32 auxY = (WRAP) ? (y.Integer & hmask) : y.Integer; - if (WRAP || (auxX + lineWidth < wh && auxX >= 0 && auxY < ht && auxY >= 0)) + if ( WRAP || ((auxX >= 0) && (auxX + lineWidth <= wh) && (auxY >= 0) && (auxY < ht)) ) { for (size_t i = 0; i < lineWidth; i++) { @@ -2627,14 +2638,16 @@ void GPUEngineBase::_RenderPixelIterate_Final(u16 *__restrict dstColorLine, cons auxX++; if (WRAP) - auxX = auxX & wmask; + { + auxX &= wmask; + } } return; } } - for (size_t i = 0; i < lineWidth; i++, x.value += dx, y.value += dy) + for (size_t i = 0; i < lineWidth; i++, x.value+=dx, y.value+=dy) { const s32 auxX = (WRAP) ? (x.Integer & wmask) : x.Integer; const s32 auxY = (WRAP) ? (y.Integer & hmask) : y.Integer; @@ -2828,7 +2841,9 @@ void GPUEngineBase::_RenderPixelsCustom(void *__restrict dstColorLine, u8 *__res const NDSColorFormat outputFormat = GPU->GetDisplayInfo().colorFormat; const size_t dstPixCount = lineWidth; +#ifdef ENABLE_SSE2 const size_t ssePixCount = (dstPixCount - (dstPixCount % 8)); +#endif const size_t lineCount = _gpuDstLineCount[lineIndex]; for (size_t l = 0; l < lineCount; l++) diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index b1993ae37..788a1c5c3 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -305,13 +305,19 @@ typedef IOREG_BGnPA IOREG_BGnPD; // 0x400x026, 0x400x036: BGn rotation/scaling typedef union { - u32 value; + s32 value; struct { +#ifdef LOCAL_LE u32 Fraction:8; s32 Integer:20; - u32 :4; + s32 :4; +#else + s32 :4; + s32 Integer:20; + u32 Fraction:8; +#endif }; } IOREG_BGnX; // 0x400x028, 0x400x038: BGn X-coordinate (Engine A+B) typedef IOREG_BGnX IOREG_BGnY; // 0x400x02C, 0x400x03C: BGn Y-coordinate (Engine A+B)