fix #1555 (regression in Kingdom Hearts Re:coded caused by r5440) by changing how wacky nearly-out-of-limits geometry is handled to a possibly more plausible mechanism
This commit is contained in:
parent
d89fa0c761
commit
cc2c86cf11
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2006 yopyop
|
Copyright (C) 2006 yopyop
|
||||||
Copyright (C) 2008-2015 DeSmuME team
|
Copyright (C) 2008-2016 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -659,12 +659,23 @@ static void SetVertex()
|
||||||
return;
|
return;
|
||||||
if(polylist->count >= POLYLIST_SIZE)
|
if(polylist->count >= POLYLIST_SIZE)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
//TODO - think about keeping the clip matrix concatenated,
|
//games will definitely count on overflowing the matrix math
|
||||||
//so that we only have to multiply one matrix here
|
//scenarios to balance here:
|
||||||
//(we could lazy cache the concatenated clip matrix and only generate it
|
//+ spectrobes beyond the portals excavation blower and drill tools: sets very large overflowing +x,+y in the modelview matrix to push things offscreen
|
||||||
//when we need to)
|
//morover in some conditions there will be vertical glitched lines sometimes when drilling at the top center of the screen.
|
||||||
MatrixMultVec4x4_M2(mtxCurrent[0], coordTransformed);
|
//+ kingdom hearts re-coded: first conversation with cast characters will place them oddly with something overflowing to about 0xA???????
|
||||||
|
//+ SM64: skybox
|
||||||
|
//+ TBD other things, probably, dragon quest worldmaps?
|
||||||
|
//At first I tried saturating the math elsewhere, but that couldn't fix all cases
|
||||||
|
//So after some fooling around, I found this nicely aesthetic way of balancing all the cases. I don't doubt that it's still inaccurate, however
|
||||||
|
//Note, if <<3 seems weird, it's reasonable if you assume the goal is to end up with 16 integer bits and a sign bit.
|
||||||
|
MatrixMultVec4x4(mtxCurrent[1],coordTransformed); //modelview
|
||||||
|
for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<3>>3); //balances everything ok
|
||||||
|
//for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<4>>4); //breaks SM64 skyboxes
|
||||||
|
//for(int i=0;i<4;i++) coordTransformed[i] = (((u32)coordTransformed[i])<<4>>4)|(((s32)(coordTransformed[i]&0x80000000))>>3); //another way generally to drop precision (but breaks spectrobes which does seem to need some kind of buggy wrap-around behaviour)
|
||||||
|
MatrixMultVec4x4(mtxCurrent[0],coordTransformed); //projection
|
||||||
|
for(int i=0;i<4;i++) coordTransformed[i] = (((s32)coordTransformed[i])<<3>>3); //no proof this is needed, but suspected to be similar based on above
|
||||||
|
|
||||||
//printf("%f %f %f\n",s16coord[0]/4096.0f,s16coord[1]/4096.0f,s16coord[2]/4096.0f);
|
//printf("%f %f %f\n",s16coord[0]/4096.0f,s16coord[1]/4096.0f,s16coord[2]/4096.0f);
|
||||||
//printf("x %f %f %f %f\n",mtxCurrent[0][0]/4096.0f,mtxCurrent[0][1]/4096.0f,mtxCurrent[0][2]/4096.0f,mtxCurrent[0][3]/4096.0f);
|
//printf("x %f %f %f %f\n",mtxCurrent[0][0]/4096.0f,mtxCurrent[0][1]/4096.0f,mtxCurrent[0][2]/4096.0f,mtxCurrent[0][3]/4096.0f);
|
||||||
|
|
|
@ -427,8 +427,3 @@ void MatrixTranslate(s32 *matrix, const s32 *ptr)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr)
|
|
||||||
{
|
|
||||||
MatrixMultVec4x4(matrix+16,vecPtr);
|
|
||||||
MatrixMultVec4x4(matrix,vecPtr);
|
|
||||||
}
|
|
||||||
|
|
|
@ -276,13 +276,6 @@ FORCEINLINE void MatrixMultVec4x4(const float *matrix, float *vecPtr)
|
||||||
_mm_store_ps(vecPtr,_util_MatrixMultVec4x4_((SSE_MATRIX)matrix,_mm_load_ps(vecPtr)));
|
_mm_store_ps(vecPtr,_util_MatrixMultVec4x4_((SSE_MATRIX)matrix,_mm_load_ps(vecPtr)));
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
|
|
||||||
{
|
|
||||||
//there are hardly any gains from merging these manually
|
|
||||||
MatrixMultVec4x4(matrix+16,vecPtr);
|
|
||||||
MatrixMultVec4x4(matrix,vecPtr);
|
|
||||||
}
|
|
||||||
|
|
||||||
FORCEINLINE void MatrixMultVec3x3(const float * matrix, float * vecPtr)
|
FORCEINLINE void MatrixMultVec3x3(const float * matrix, float * vecPtr)
|
||||||
{
|
{
|
||||||
const __m128 vec = _mm_load_ps(vecPtr);
|
const __m128 vec = _mm_load_ps(vecPtr);
|
||||||
|
@ -355,13 +348,6 @@ void MatrixMultiply(float * matrix, const float * rightMatrix);
|
||||||
void MatrixTranslate(float *matrix, const float *ptr);
|
void MatrixTranslate(float *matrix, const float *ptr);
|
||||||
void MatrixScale(float * matrix, const float * ptr);
|
void MatrixScale(float * matrix, const float * ptr);
|
||||||
|
|
||||||
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
|
|
||||||
{
|
|
||||||
//there are hardly any gains from merging these manually
|
|
||||||
MatrixMultVec4x4(matrix+16,vecPtr);
|
|
||||||
MatrixMultVec4x4(matrix,vecPtr);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int NUM_ROWS>
|
template<int NUM_ROWS>
|
||||||
FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
|
FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
|
||||||
{
|
{
|
||||||
|
@ -373,8 +359,6 @@ FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
|
||||||
|
|
||||||
void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr);
|
void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr);
|
||||||
|
|
||||||
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr);
|
|
||||||
|
|
||||||
void MatrixMultiply(s32* matrix, const s32* rightMatrix);
|
void MatrixMultiply(s32* matrix, const s32* rightMatrix);
|
||||||
void MatrixScale(s32 *matrix, const s32 *ptr);
|
void MatrixScale(s32 *matrix, const s32 *ptr);
|
||||||
void MatrixTranslate(s32 *matrix, const s32 *ptr);
|
void MatrixTranslate(s32 *matrix, const s32 *ptr);
|
||||||
|
|
|
@ -504,21 +504,11 @@ FORCEINLINE s64 fx32_mul(const s32 a, const s32 b)
|
||||||
|
|
||||||
FORCEINLINE s32 fx32_shiftdown(const s64 a)
|
FORCEINLINE s32 fx32_shiftdown(const s64 a)
|
||||||
{
|
{
|
||||||
s64 shifted;
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
shifted = __ll_rshift(a,12);
|
return (s32)__ll_rshift(a,12);
|
||||||
#else
|
#else
|
||||||
shifted = (a>>12);
|
return (s32)(a>>12);
|
||||||
#endif
|
#endif
|
||||||
//either matrix math is happening at higher precision (an extra bit would suffice, I think), or the sums sent to this are saturated.
|
|
||||||
//tested by: spectrobes beyond the portals excavation blower
|
|
||||||
//(it sets very large +x,+y in the modelview matrix to push things offscreen, but the +y will overflow and become negative if we're not careful)
|
|
||||||
//I didnt think very hard about what would be fastest here on 32bit systems
|
|
||||||
//NOTE: this was intended for use in MatrixMultVec4x4_M2; it may not be appropriate for other uses of fx32_shiftdown.
|
|
||||||
//if this causes problems we should refactor the math routines a bit to take care of saturating in another function
|
|
||||||
if(shifted>(s32)0x7FFFFFFF) return 0x7FFFFFFF;
|
|
||||||
else if(shifted<=(s32)0x80000000) return 0x80000000;
|
|
||||||
else return shifted;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCEINLINE s64 fx32_shiftup(const s32 a)
|
FORCEINLINE s64 fx32_shiftup(const s32 a)
|
||||||
|
|
Loading…
Reference in New Issue