From 771ceee36b49af5cd2a3266dd128246cc54169ff Mon Sep 17 00:00:00 2001 From: rogerman Date: Wed, 6 Jul 2016 01:23:36 +0000 Subject: [PATCH] =?UTF-8?q?GPU:=20-=20Partially=20fix=20a=20bug=20with=20a?= =?UTF-8?q?ffine=20and=20extended=20BG=20layers=20on=20big-endian=20system?= =?UTF-8?q?s.=20Such=20layers=20that=20perform=20rotation=20or=20scaling?= =?UTF-8?q?=20aren=E2=80=99t=20fixed=20yet.=20-=20Loosen=20a=20restriction?= =?UTF-8?q?=20on=20taking=20the=20faster=20code=20path=20in=20GPUEngineBas?= =?UTF-8?q?e::=5FRenderPixelIterate=5FFinal().=20-=20Silence=20a=20compile?= =?UTF-8?q?r=20warning=20on=20non-SSE2=20systems.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- desmume/src/GPU.cpp | 29 ++++++++++++++++++++++------- desmume/src/GPU.h | 10 ++++++++-- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index fbdc9c885..573c58eb2 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -2589,15 +2589,26 @@ template_BGLayer[LAYERID].size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; - IOREG_BGnX x = param.BGnX; - IOREG_BGnY y = param.BGnY; - const s32 dx = (s32)param.BGnPA.value; - const s32 dy = (s32)param.BGnPC.value; + const s16 dx = (s16)LOCAL_TO_LE_16(param.BGnPA.value); + const s16 dy = (s16)LOCAL_TO_LE_16(param.BGnPC.value); const s32 wh = this->_BGLayer[LAYERID].size.width; const s32 ht = this->_BGLayer[LAYERID].size.height; const s32 wmask = wh - 1; const s32 hmask = ht - 1; + IOREG_BGnX x = param.BGnX; + IOREG_BGnY y = param.BGnY; + +#ifdef LOCAL_BE + // This only seems to work in the unrotated/unscaled case. I'm not too sure + // about how these bits should really be arranged on big-endian, but at + // least this arrangement fixes a bunch of games that use affine or extended + // layers, just as long as they don't perform any rotation/scaling. + // - rogerman, 2016-07-05 + x.value = ((x.value & 0x00FFFFFF) << 8) | ((x.value & 0xFF000000) >> 24); + y.value = ((y.value & 0x00FFFFFF) << 8) | ((y.value & 0xFF000000) >> 24); +#endif + u8 index; u16 color; @@ -2608,7 +2619,7 @@ void GPUEngineBase::_RenderPixelIterate_Final(u16 *__restrict dstColorLine, cons s32 auxX = (WRAP) ? (x.Integer & wmask) : x.Integer; const s32 auxY = (WRAP) ? (y.Integer & hmask) : y.Integer; - if (WRAP || (auxX + lineWidth < wh && auxX >= 0 && auxY < ht && auxY >= 0)) + if ( WRAP || ((auxX >= 0) && (auxX + lineWidth <= wh) && (auxY >= 0) && (auxY < ht)) ) { for (size_t i = 0; i < lineWidth; i++) { @@ -2627,14 +2638,16 @@ void GPUEngineBase::_RenderPixelIterate_Final(u16 *__restrict dstColorLine, cons auxX++; if (WRAP) - auxX = auxX & wmask; + { + auxX &= wmask; + } } return; } } - for (size_t i = 0; i < lineWidth; i++, x.value += dx, y.value += dy) + for (size_t i = 0; i < lineWidth; i++, x.value+=dx, y.value+=dy) { const s32 auxX = (WRAP) ? (x.Integer & wmask) : x.Integer; const s32 auxY = (WRAP) ? (y.Integer & hmask) : y.Integer; @@ -2828,7 +2841,9 @@ void GPUEngineBase::_RenderPixelsCustom(void *__restrict dstColorLine, u8 *__res const NDSColorFormat outputFormat = GPU->GetDisplayInfo().colorFormat; const size_t dstPixCount = lineWidth; +#ifdef ENABLE_SSE2 const size_t ssePixCount = (dstPixCount - (dstPixCount % 8)); +#endif const size_t lineCount = _gpuDstLineCount[lineIndex]; for (size_t l = 0; l < lineCount; l++) diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index b1993ae37..788a1c5c3 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -305,13 +305,19 @@ typedef IOREG_BGnPA IOREG_BGnPD; // 0x400x026, 0x400x036: BGn rotation/scaling typedef union { - u32 value; + s32 value; struct { +#ifdef LOCAL_LE u32 Fraction:8; s32 Integer:20; - u32 :4; + s32 :4; +#else + s32 :4; + s32 Integer:20; + u32 Fraction:8; +#endif }; } IOREG_BGnX; // 0x400x028, 0x400x038: BGn X-coordinate (Engine A+B) typedef IOREG_BGnX IOREG_BGnY; // 0x400x02C, 0x400x03C: BGn Y-coordinate (Engine A+B)