fix #1555 (regression in Kingdom Hearts Re:coded caused by r5440) by changing how wacky nearly-out-of-limits geometry is handled to a possibly more plausible mechanism

This commit is contained in:
zeromus 2016-08-13 23:48:51 +00:00
parent d89fa0c761
commit cc2c86cf11
4 changed files with 20 additions and 40 deletions

View File

@ -1,6 +1,6 @@
/* /*
Copyright (C) 2006 yopyop Copyright (C) 2006 yopyop
Copyright (C) 2008-2015 DeSmuME team Copyright (C) 2008-2016 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -660,11 +660,22 @@ static void SetVertex()
if(polylist->count >= POLYLIST_SIZE) if(polylist->count >= POLYLIST_SIZE)
return; return;
//TODO - think about keeping the clip matrix concatenated, //games will definitely count on overflowing the matrix math
//so that we only have to multiply one matrix here //scenarios to balance here:
//(we could lazy cache the concatenated clip matrix and only generate it //+ spectrobes beyond the portals excavation blower and drill tools: sets very large overflowing +x,+y in the modelview matrix to push things offscreen
//when we need to) //morover in some conditions there will be vertical glitched lines sometimes when drilling at the top center of the screen.
MatrixMultVec4x4_M2(mtxCurrent[0], coordTransformed); //+ kingdom hearts re-coded: first conversation with cast characters will place them oddly with something overflowing to about 0xA???????
//+ SM64: skybox
//+ TBD other things, probably, dragon quest worldmaps?
//At first I tried saturating the math elsewhere, but that couldn't fix all cases
//So after some fooling around, I found this nicely aesthetic way of balancing all the cases. I don't doubt that it's still inaccurate, however
//Note, if <<3 seems weird, it's reasonable if you assume the goal is to end up with 16 integer bits and a sign bit.
MatrixMultVec4x4(mtxCurrent[1],coordTransformed); //modelview
for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<3>>3); //balances everything ok
//for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<4>>4); //breaks SM64 skyboxes
//for(int i=0;i<4;i++) coordTransformed[i] = (((u32)coordTransformed[i])<<4>>4)|(((s32)(coordTransformed[i]&0x80000000))>>3); //another way generally to drop precision (but breaks spectrobes which does seem to need some kind of buggy wrap-around behaviour)
MatrixMultVec4x4(mtxCurrent[0],coordTransformed); //projection
for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<3>>3); //no proof this is needed, but suspected to be similar based on above
//printf("%f %f %f\n",s16coord[0]/4096.0f,s16coord[1]/4096.0f,s16coord[2]/4096.0f); //printf("%f %f %f\n",s16coord[0]/4096.0f,s16coord[1]/4096.0f,s16coord[2]/4096.0f);
//printf("x %f %f %f %f\n",mtxCurrent[0][0]/4096.0f,mtxCurrent[0][1]/4096.0f,mtxCurrent[0][2]/4096.0f,mtxCurrent[0][3]/4096.0f); //printf("x %f %f %f %f\n",mtxCurrent[0][0]/4096.0f,mtxCurrent[0][1]/4096.0f,mtxCurrent[0][2]/4096.0f,mtxCurrent[0][3]/4096.0f);

View File

@ -427,8 +427,3 @@ void MatrixTranslate(s32 *matrix, const s32 *ptr)
}); });
} }
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr)
{
MatrixMultVec4x4(matrix+16,vecPtr);
MatrixMultVec4x4(matrix,vecPtr);
}

View File

@ -276,13 +276,6 @@ FORCEINLINE void MatrixMultVec4x4(const float *matrix, float *vecPtr)
_mm_store_ps(vecPtr,_util_MatrixMultVec4x4_((SSE_MATRIX)matrix,_mm_load_ps(vecPtr))); _mm_store_ps(vecPtr,_util_MatrixMultVec4x4_((SSE_MATRIX)matrix,_mm_load_ps(vecPtr)));
} }
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
{
//there are hardly any gains from merging these manually
MatrixMultVec4x4(matrix+16,vecPtr);
MatrixMultVec4x4(matrix,vecPtr);
}
FORCEINLINE void MatrixMultVec3x3(const float * matrix, float * vecPtr) FORCEINLINE void MatrixMultVec3x3(const float * matrix, float * vecPtr)
{ {
const __m128 vec = _mm_load_ps(vecPtr); const __m128 vec = _mm_load_ps(vecPtr);
@ -355,13 +348,6 @@ void MatrixMultiply(float * matrix, const float * rightMatrix);
void MatrixTranslate(float *matrix, const float *ptr); void MatrixTranslate(float *matrix, const float *ptr);
void MatrixScale(float * matrix, const float * ptr); void MatrixScale(float * matrix, const float * ptr);
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
{
//there are hardly any gains from merging these manually
MatrixMultVec4x4(matrix+16,vecPtr);
MatrixMultVec4x4(matrix,vecPtr);
}
template<int NUM_ROWS> template<int NUM_ROWS>
FORCEINLINE void vector_fix2float(float* matrix, const float divisor) FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
{ {
@ -373,8 +359,6 @@ FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr); void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr);
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr);
void MatrixMultiply(s32* matrix, const s32* rightMatrix); void MatrixMultiply(s32* matrix, const s32* rightMatrix);
void MatrixScale(s32 *matrix, const s32 *ptr); void MatrixScale(s32 *matrix, const s32 *ptr);
void MatrixTranslate(s32 *matrix, const s32 *ptr); void MatrixTranslate(s32 *matrix, const s32 *ptr);

View File

@ -504,21 +504,11 @@ FORCEINLINE s64 fx32_mul(const s32 a, const s32 b)
FORCEINLINE s32 fx32_shiftdown(const s64 a) FORCEINLINE s32 fx32_shiftdown(const s64 a)
{ {
s64 shifted;
#ifdef _MSC_VER #ifdef _MSC_VER
shifted = __ll_rshift(a,12); return (s32)__ll_rshift(a,12);
#else #else
shifted = (a>>12); return (s32)(a>>12);
#endif #endif
//either matrix math is happening at higher precision (an extra bit would suffice, I think), or the sums sent to this are saturated.
//tested by: spectrobes beyond the portals excavation blower
//(it sets very large +x,+y in the modelview matrix to push things offscreen, but the +y will overflow and become negative if we're not careful)
//I didnt think very hard about what would be fastest here on 32bit systems
//NOTE: this was intended for use in MatrixMultVec4x4_M2; it may not be appropriate for other uses of fx32_shiftdown.
//if this causes problems we should refactor the math routines a bit to take care of saturating in another function
if(shifted>(s32)0x7FFFFFFF) return 0x7FFFFFFF;
else if(shifted<=(s32)0x80000000) return 0x80000000;
else return shifted;
} }
FORCEINLINE s64 fx32_shiftup(const s32 a) FORCEINLINE s64 fx32_shiftup(const s32 a)