fix #1555 (regression in Kingdom Hearts Re:coded caused by r5440) by changing how wacky nearly-out-of-limits geometry is handled to a possibly more plausible mechanism
This commit is contained in:
parent
d89fa0c761
commit
cc2c86cf11
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Copyright (C) 2006 yopyop
|
||||
Copyright (C) 2008-2015 DeSmuME team
|
||||
Copyright (C) 2008-2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -660,11 +660,22 @@ static void SetVertex()
|
|||
if(polylist->count >= POLYLIST_SIZE)
|
||||
return;
|
||||
|
||||
//TODO - think about keeping the clip matrix concatenated,
|
||||
//so that we only have to multiply one matrix here
|
||||
//(we could lazy cache the concatenated clip matrix and only generate it
|
||||
//when we need to)
|
||||
MatrixMultVec4x4_M2(mtxCurrent[0], coordTransformed);
|
||||
//games will definitely count on overflowing the matrix math
|
||||
//scenarios to balance here:
|
||||
//+ spectrobes beyond the portals excavation blower and drill tools: sets very large overflowing +x,+y in the modelview matrix to push things offscreen
|
||||
//morover in some conditions there will be vertical glitched lines sometimes when drilling at the top center of the screen.
|
||||
//+ kingdom hearts re-coded: first conversation with cast characters will place them oddly with something overflowing to about 0xA???????
|
||||
//+ SM64: skybox
|
||||
//+ TBD other things, probably, dragon quest worldmaps?
|
||||
//At first I tried saturating the math elsewhere, but that couldn't fix all cases
|
||||
//So after some fooling around, I found this nicely aesthetic way of balancing all the cases. I don't doubt that it's still inaccurate, however
|
||||
//Note, if <<3 seems weird, it's reasonable if you assume the goal is to end up with 16 integer bits and a sign bit.
|
||||
MatrixMultVec4x4(mtxCurrent[1],coordTransformed); //modelview
|
||||
for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<3>>3); //balances everything ok
|
||||
//for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<4>>4); //breaks SM64 skyboxes
|
||||
//for(int i=0;i<4;i++) coordTransformed[i] = (((u32)coordTransformed[i])<<4>>4)|(((s32)(coordTransformed[i]&0x80000000))>>3); //another way generally to drop precision (but breaks spectrobes which does seem to need some kind of buggy wrap-around behaviour)
|
||||
MatrixMultVec4x4(mtxCurrent[0],coordTransformed); //projection
|
||||
for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<3>>3); //no proof this is needed, but suspected to be similar based on above
|
||||
|
||||
//printf("%f %f %f\n",s16coord[0]/4096.0f,s16coord[1]/4096.0f,s16coord[2]/4096.0f);
|
||||
//printf("x %f %f %f %f\n",mtxCurrent[0][0]/4096.0f,mtxCurrent[0][1]/4096.0f,mtxCurrent[0][2]/4096.0f,mtxCurrent[0][3]/4096.0f);
|
||||
|
|
|
@ -427,8 +427,3 @@ void MatrixTranslate(s32 *matrix, const s32 *ptr)
|
|||
});
|
||||
}
|
||||
|
||||
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr)
|
||||
{
|
||||
MatrixMultVec4x4(matrix+16,vecPtr);
|
||||
MatrixMultVec4x4(matrix,vecPtr);
|
||||
}
|
||||
|
|
|
@ -276,13 +276,6 @@ FORCEINLINE void MatrixMultVec4x4(const float *matrix, float *vecPtr)
|
|||
_mm_store_ps(vecPtr,_util_MatrixMultVec4x4_((SSE_MATRIX)matrix,_mm_load_ps(vecPtr)));
|
||||
}
|
||||
|
||||
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
|
||||
{
|
||||
//there are hardly any gains from merging these manually
|
||||
MatrixMultVec4x4(matrix+16,vecPtr);
|
||||
MatrixMultVec4x4(matrix,vecPtr);
|
||||
}
|
||||
|
||||
FORCEINLINE void MatrixMultVec3x3(const float * matrix, float * vecPtr)
|
||||
{
|
||||
const __m128 vec = _mm_load_ps(vecPtr);
|
||||
|
@ -355,13 +348,6 @@ void MatrixMultiply(float * matrix, const float * rightMatrix);
|
|||
void MatrixTranslate(float *matrix, const float *ptr);
|
||||
void MatrixScale(float * matrix, const float * ptr);
|
||||
|
||||
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
|
||||
{
|
||||
//there are hardly any gains from merging these manually
|
||||
MatrixMultVec4x4(matrix+16,vecPtr);
|
||||
MatrixMultVec4x4(matrix,vecPtr);
|
||||
}
|
||||
|
||||
template<int NUM_ROWS>
|
||||
FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
|
||||
{
|
||||
|
@ -373,8 +359,6 @@ FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
|
|||
|
||||
void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr);
|
||||
|
||||
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr);
|
||||
|
||||
void MatrixMultiply(s32* matrix, const s32* rightMatrix);
|
||||
void MatrixScale(s32 *matrix, const s32 *ptr);
|
||||
void MatrixTranslate(s32 *matrix, const s32 *ptr);
|
||||
|
|
|
@ -504,21 +504,11 @@ FORCEINLINE s64 fx32_mul(const s32 a, const s32 b)
|
|||
|
||||
FORCEINLINE s32 fx32_shiftdown(const s64 a)
|
||||
{
|
||||
s64 shifted;
|
||||
#ifdef _MSC_VER
|
||||
shifted = __ll_rshift(a,12);
|
||||
return (s32)__ll_rshift(a,12);
|
||||
#else
|
||||
shifted = (a>>12);
|
||||
return (s32)(a>>12);
|
||||
#endif
|
||||
//either matrix math is happening at higher precision (an extra bit would suffice, I think), or the sums sent to this are saturated.
|
||||
//tested by: spectrobes beyond the portals excavation blower
|
||||
//(it sets very large +x,+y in the modelview matrix to push things offscreen, but the +y will overflow and become negative if we're not careful)
|
||||
//I didnt think very hard about what would be fastest here on 32bit systems
|
||||
//NOTE: this was intended for use in MatrixMultVec4x4_M2; it may not be appropriate for other uses of fx32_shiftdown.
|
||||
//if this causes problems we should refactor the math routines a bit to take care of saturating in another function
|
||||
if(shifted>(s32)0x7FFFFFFF) return 0x7FFFFFFF;
|
||||
else if(shifted<=(s32)0x80000000) return 0x80000000;
|
||||
else return shifted;
|
||||
}
|
||||
|
||||
FORCEINLINE s64 fx32_shiftup(const s32 a)
|
||||
|
|
Loading…
Reference in New Issue