From cc2c86cf1182bf8cb0c19074692d1297aa493af5 Mon Sep 17 00:00:00 2001 From: zeromus Date: Sat, 13 Aug 2016 23:48:51 +0000 Subject: [PATCH] fix #1555 (regression in Kingdom Hearts Re:coded caused by r5440) by changing how wacky nearly-out-of-limits geometry is handled to a possibly more plausible mechanism --- desmume/src/gfx3d.cpp | 25 ++++++++++++++++++------- desmume/src/matrix.cpp | 5 ----- desmume/src/matrix.h | 16 ---------------- desmume/src/types.h | 14 ++------------ 4 files changed, 20 insertions(+), 40 deletions(-) diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index 5a6e669bb..9ed7786e0 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -1,6 +1,6 @@ /* Copyright (C) 2006 yopyop - Copyright (C) 2008-2015 DeSmuME team + Copyright (C) 2008-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -659,12 +659,23 @@ static void SetVertex() return; if(polylist->count >= POLYLIST_SIZE) return; - - //TODO - think about keeping the clip matrix concatenated, - //so that we only have to multiply one matrix here - //(we could lazy cache the concatenated clip matrix and only generate it - //when we need to) - MatrixMultVec4x4_M2(mtxCurrent[0], coordTransformed); + + //games will definitely count on overflowing the matrix math + //scenarios to balance here: + //+ spectrobes beyond the portals excavation blower and drill tools: sets very large overflowing +x,+y in the modelview matrix to push things offscreen + //morover in some conditions there will be vertical glitched lines sometimes when drilling at the top center of the screen. + //+ kingdom hearts re-coded: first conversation with cast characters will place them oddly with something overflowing to about 0xA??????? + //+ SM64: skybox + //+ TBD other things, probably, dragon quest worldmaps? + //At first I tried saturating the math elsewhere, but that couldn't fix all cases + //So after some fooling around, I found this nicely aesthetic way of balancing all the cases. I don't doubt that it's still inaccurate, however + //Note, if <<3 seems weird, it's reasonable if you assume the goal is to end up with 16 integer bits and a sign bit. + MatrixMultVec4x4(mtxCurrent[1],coordTransformed); //modelview + for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<3>>3); //balances everything ok + //for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<4>>4); //breaks SM64 skyboxes + //for(int i=0;i<4;i++) coordTransformed[i] = (((u32)coordTransformed[i])<<4>>4)|(((s32)(coordTransformed[i]&0x80000000))>>3); //another way generally to drop precision (but breaks spectrobes which does seem to need some kind of buggy wrap-around behaviour) + MatrixMultVec4x4(mtxCurrent[0],coordTransformed); //projection + for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<3>>3); //no proof this is needed, but suspected to be similar based on above //printf("%f %f %f\n",s16coord[0]/4096.0f,s16coord[1]/4096.0f,s16coord[2]/4096.0f); //printf("x %f %f %f %f\n",mtxCurrent[0][0]/4096.0f,mtxCurrent[0][1]/4096.0f,mtxCurrent[0][2]/4096.0f,mtxCurrent[0][3]/4096.0f); diff --git a/desmume/src/matrix.cpp b/desmume/src/matrix.cpp index 666029bbf..35ecf271a 100644 --- a/desmume/src/matrix.cpp +++ b/desmume/src/matrix.cpp @@ -427,8 +427,3 @@ void MatrixTranslate(s32 *matrix, const s32 *ptr) }); } -void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr) -{ - MatrixMultVec4x4(matrix+16,vecPtr); - MatrixMultVec4x4(matrix,vecPtr); -} diff --git a/desmume/src/matrix.h b/desmume/src/matrix.h index 8aa87c2fe..d060a4d38 100644 --- a/desmume/src/matrix.h +++ b/desmume/src/matrix.h @@ -276,13 +276,6 @@ FORCEINLINE void MatrixMultVec4x4(const float *matrix, float *vecPtr) _mm_store_ps(vecPtr,_util_MatrixMultVec4x4_((SSE_MATRIX)matrix,_mm_load_ps(vecPtr))); } -FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr) -{ - //there are hardly any gains from merging these manually - MatrixMultVec4x4(matrix+16,vecPtr); - MatrixMultVec4x4(matrix,vecPtr); -} - FORCEINLINE void MatrixMultVec3x3(const float * matrix, float * vecPtr) { const __m128 vec = _mm_load_ps(vecPtr); @@ -355,13 +348,6 @@ void MatrixMultiply(float * matrix, const float * rightMatrix); void MatrixTranslate(float *matrix, const float *ptr); void MatrixScale(float * matrix, const float * ptr); -FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr) -{ - //there are hardly any gains from merging these manually - MatrixMultVec4x4(matrix+16,vecPtr); - MatrixMultVec4x4(matrix,vecPtr); -} - template FORCEINLINE void vector_fix2float(float* matrix, const float divisor) { @@ -373,8 +359,6 @@ FORCEINLINE void vector_fix2float(float* matrix, const float divisor) void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr); -void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr); - void MatrixMultiply(s32* matrix, const s32* rightMatrix); void MatrixScale(s32 *matrix, const s32 *ptr); void MatrixTranslate(s32 *matrix, const s32 *ptr); diff --git a/desmume/src/types.h b/desmume/src/types.h index ce794d5a7..56b225c3b 100644 --- a/desmume/src/types.h +++ b/desmume/src/types.h @@ -504,21 +504,11 @@ FORCEINLINE s64 fx32_mul(const s32 a, const s32 b) FORCEINLINE s32 fx32_shiftdown(const s64 a) { - s64 shifted; #ifdef _MSC_VER - shifted = __ll_rshift(a,12); + return (s32)__ll_rshift(a,12); #else - shifted = (a>>12); + return (s32)(a>>12); #endif - //either matrix math is happening at higher precision (an extra bit would suffice, I think), or the sums sent to this are saturated. - //tested by: spectrobes beyond the portals excavation blower - //(it sets very large +x,+y in the modelview matrix to push things offscreen, but the +y will overflow and become negative if we're not careful) - //I didnt think very hard about what would be fastest here on 32bit systems - //NOTE: this was intended for use in MatrixMultVec4x4_M2; it may not be appropriate for other uses of fx32_shiftdown. - //if this causes problems we should refactor the math routines a bit to take care of saturating in another function - if(shifted>(s32)0x7FFFFFFF) return 0x7FFFFFFF; - else if(shifted<=(s32)0x80000000) return 0x80000000; - else return shifted; } FORCEINLINE s64 fx32_shiftup(const s32 a)