From 0f4b3ead00e9bd12857156f9d453e3a4fd9a40d1 Mon Sep 17 00:00:00 2001 From: zeromus Date: Fri, 15 Jun 2018 17:47:42 -0400 Subject: [PATCH] replace SMLAL_BB and friends with more clear logic that hopefully isnt bugged but I couldnt find a game exercising it, so I dont know whether it's correct. should fix #139 --- desmume/src/arm_instructions.cpp | 56 ++++++++++++++++---------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/desmume/src/arm_instructions.cpp b/desmume/src/arm_instructions.cpp index 8f14002ae..43412058a 100644 --- a/desmume/src/arm_instructions.cpp +++ b/desmume/src/arm_instructions.cpp @@ -3403,52 +3403,52 @@ TEMPLATE static u32 FASTCALL OP_SMLA_T_T(const u32 i) TEMPLATE static u32 FASTCALL OP_SMLAL_B_B(const u32 i) { - s64 tmp = (s64)(LWORD(cpu->R[REG_POS(i,0)])* LWORD(cpu->R[REG_POS(i,8)])); - u64 res = (u64)tmp + cpu->R[REG_POS(i,12)]; - - //INFO("SMLALBB %08X * %08X + %08X%08X = %08X%08X\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + (res + ((tmp<0)*0xFFFFFFFF))), (int)(u32) res); - - cpu->R[REG_POS(i,12)] = (u32) res; - cpu->R[REG_POS(i,16)] += (res + ((tmp<0)*0xFFFFFFFF)); + s32 a = LWORD(cpu->R[REG_POS(i,0)]); + s32 b = LWORD(cpu->R[REG_POS(i,8)]); + s64 product = (s64)(a*b); + s64 dst64 = (s64)(cpu->R[REG_POS(i,12)] + (((s64)cpu->R[REG_POS(i,16)])<<32)); + s64 sum = dst64 + product; + cpu->R[REG_POS(i,12)] = (u32)(sum & 0xFFFFFFFF); + cpu->R[REG_POS(i,16)] = (u32)((sum>>32) & 0xFFFFFFFF); return 2; } TEMPLATE static u32 FASTCALL OP_SMLAL_B_T(const u32 i) { - s64 tmp = (s64)(LWORD(cpu->R[REG_POS(i,0)])* HWORD(cpu->R[REG_POS(i,8)])); - u64 res = (u64)tmp + cpu->R[REG_POS(i,12)]; - - //INFO("SMLALBT %08X * %08X + %08X%08X = %08X%08X\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + res + ((tmp<0)*0xFFFFFFFF)), (int)(u32) res); - - cpu->R[REG_POS(i,12)] = (u32) res; - cpu->R[REG_POS(i,16)] += res + ((tmp<0)*0xFFFFFFFF); + s32 a = LWORD(cpu->R[REG_POS(i,0)]); + s32 b = HWORD(cpu->R[REG_POS(i,8)]); + s64 product = (s64)(a*b); + s64 dst64 = (s64)(cpu->R[REG_POS(i,12)] + (((s64)cpu->R[REG_POS(i,16)])<<32)); + s64 sum = dst64 + product; + cpu->R[REG_POS(i,12)] = (u32)(sum & 0xFFFFFFFF); + cpu->R[REG_POS(i,16)] = (u32)((sum>>32) & 0xFFFFFFFF); return 2; } TEMPLATE static u32 FASTCALL OP_SMLAL_T_B(const u32 i) { - s64 tmp = (s64)(HWORD(cpu->R[REG_POS(i,0)])* (s64)LWORD(cpu->R[REG_POS(i,8)])); - u64 res = (u64)tmp + cpu->R[REG_POS(i,12)]; - - //INFO("SMLALTB %08X * %08X + %08X%08X = %08X%08X\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + res + ((tmp<0)*0xFFFFFFFF)), (int)(u32) res); - - cpu->R[REG_POS(i,12)] = (u32) res; - cpu->R[REG_POS(i,16)] += res + ((tmp<0)*0xFFFFFFFF); + s32 a = HWORD(cpu->R[REG_POS(i,0)]); + s32 b = LWORD(cpu->R[REG_POS(i,8)]); + s64 product = (s64)(a*b); + s64 dst64 = (s64)(cpu->R[REG_POS(i,12)] + (((s64)cpu->R[REG_POS(i,16)])<<32)); + s64 sum = dst64 + product; + cpu->R[REG_POS(i,12)] = (u32)(sum & 0xFFFFFFFF); + cpu->R[REG_POS(i,16)] = (u32)((sum>>32) & 0xFFFFFFFF); return 2; } TEMPLATE static u32 FASTCALL OP_SMLAL_T_T(const u32 i) { - s64 tmp = (s64)(HWORD(cpu->R[REG_POS(i,0)])* HWORD(cpu->R[REG_POS(i,8)])); - u64 res = (u64)tmp + cpu->R[REG_POS(i,12)]; - - //INFO("SMLALTT %08X * %08X + %08X%08X = %08X%08X\n", (int)cpu->R[REG_POS(i,0)], (int)cpu->R[REG_POS(i,8)], (int)cpu->R[REG_POS(i,16)], (int)cpu->R[REG_POS(i,12)], (int)(cpu->R[REG_POS(i,16)] + res + ((tmp<0)*0xFFFFFFFF)), (int)(u32) res); - - cpu->R[REG_POS(i,12)] = (u32) res; - cpu->R[REG_POS(i,16)] += res + ((tmp<0)*0xFFFFFFFF); + s32 a = HWORD(cpu->R[REG_POS(i,0)]); + s32 b = HWORD(cpu->R[REG_POS(i,8)]); + s64 product = (s64)(a*b); + s64 dst64 = (s64)(cpu->R[REG_POS(i,12)] + (((s64)cpu->R[REG_POS(i,16)])<<32)); + s64 sum = dst64 + product; + cpu->R[REG_POS(i,12)] = (u32)(sum & 0xFFFFFFFF); + cpu->R[REG_POS(i,16)] = (u32)((sum>>32) & 0xFFFFFFFF); return 2; }