nobody is allowed to set alignments on typedefs without testing on linux and windows simultaneously. its only known to work on data declarations. in the meantime, sort all this crap out hopefully.

2013-11-08 19:19:49 +00:00 · 2013-11-08 19:19:49 +00:00 · caf8d336e7
parent dae13c04b1
commit caf8d336e7
3 changed files with 40 additions and 7 deletions
--- a/desmume/src/gfx3d.cpp
+++ b/desmume/src/gfx3d.cpp
@ -1480,7 +1480,7 @@ static BOOL gfx3d_glBoxTest(u32 v)
 	float zd = float16table[(uz+ud)&0xFFFF];
 	//eight corners of cube
-	VERT verts[8];
+	CACHE_ALIGN VERT verts[8];
 	verts[0].set_coord(x,y,z,1);
 	verts[1].set_coord(xw,y,z,1);
 	verts[2].set_coord(xw,yh,z,1);
@ -1529,13 +1529,19 @@ static BOOL gfx3d_glBoxTest(u32 v)
 	////---------------------
 	//transform all coords
-	for(int i=0;i<8;i++) {
+	for(int i=0;i<8;i++)
 	{
 		//this cant work. its left as a reminder that we could (and probably should) do the boxtest in all fixed point values
 		//MatrixMultVec4x4_M2(mtxCurrent[0], verts[i].coord);
 		//but change it all to floating point and do it that way instead
 		CACHE_ALIGN float temp1[16] = {mtxCurrent[1][0]/4096.0f,mtxCurrent[1][1]/4096.0f,mtxCurrent[1][2]/4096.0f,mtxCurrent[1][3]/4096.0f,mtxCurrent[1][4]/4096.0f,mtxCurrent[1][5]/4096.0f,mtxCurrent[1][6]/4096.0f,mtxCurrent[1][7]/4096.0f,mtxCurrent[1][8]/4096.0f,mtxCurrent[1][9]/4096.0f,mtxCurrent[1][10]/4096.0f,mtxCurrent[1][11]/4096.0f,mtxCurrent[1][12]/4096.0f,mtxCurrent[1][13]/4096.0f,mtxCurrent[1][14]/4096.0f,mtxCurrent[1][15]/4096.0f};
 		CACHE_ALIGN float temp0[16] = {mtxCurrent[0][0]/4096.0f,mtxCurrent[0][1]/4096.0f,mtxCurrent[0][2]/4096.0f,mtxCurrent[0][3]/4096.0f,mtxCurrent[0][4]/4096.0f,mtxCurrent[0][5]/4096.0f,mtxCurrent[0][6]/4096.0f,mtxCurrent[0][7]/4096.0f,mtxCurrent[0][8]/4096.0f,mtxCurrent[0][9]/4096.0f,mtxCurrent[0][10]/4096.0f,mtxCurrent[0][11]/4096.0f,mtxCurrent[0][12]/4096.0f,mtxCurrent[0][13]/4096.0f,mtxCurrent[0][14]/4096.0f,mtxCurrent[0][15]/4096.0f};
-		MatrixMultVec4x4(temp1,verts[i].coord);
+
-		MatrixMultVec4x4(temp0,verts[i].coord);
+		DS_ALIGN(16) VERT_POS4f vert = { verts[i].x, verts[i].y, verts[i].z, verts[i].w };
 		_NOSSE_MatrixMultVec4x4(temp1,verts[i].coord);
 		_NOSSE_MatrixMultVec4x4(temp0,verts[i].coord);
 	}
 	//clip each poly
--- a/desmume/src/gfx3d.h
+++ b/desmume/src/gfx3d.h
@ -520,6 +520,29 @@ struct POLYLIST {
 	int count;
 };
 //just a vert with a 4 float position
 struct VERT_POS4f
 {
 	union {
 		float coord[4];
 		struct {
 			float x,y,z,w;
 		};
 		struct {
 			float x,y,z,w;
 		} position;
 	};
 	void set_coord(float x, float y, float z, float w)
 	{ 
 		this->x = x; 
 		this->y = y; 
 		this->z = z; 
 		this->w = w; 
 	}
 };
 //dont use SSE optimized matrix instructions in here, things might not be aligned
 //we havent padded this because the sheer bulk of data leaves things running faster without the extra bloat
 struct VERT {
 	// Align to 16 for SSE instructions to work
 	union {
@ -546,8 +569,10 @@ struct VERT {
 		z = coords[2];
 		w = coords[3];
 	}
 	u8 color[3];
 	float fcolor[3];
 	u8 color[3];
 	void color_to_float() {
 		fcolor[0] = color[0];
 		fcolor[1] = color[1];
--- a/desmume/src/types.h
+++ b/desmume/src/types.h
@ -93,6 +93,9 @@
 #endif
 //------------alignment macros-------------
 //dont apply these to types without further testing. it only works portably here on declarations of variables
 //cant we find a pattern other people use more successfully?
 #if defined(_MSC_VER) || defined(__INTEL_COMPILER)
 #define DS_ALIGN(X) __declspec(align(X))
 #elif defined(__GNUC__)
@ -100,11 +103,10 @@
 #else
 #define DS_ALIGN(X)
 #endif
 #define CACHE_ALIGN DS_ALIGN(32)
 //use this for example when you want a byte value to be better-aligned
 #define FAST_ALIGN DS_ALIGN(4)
 //---------------------------------------------
 #ifdef __MINGW32__
 #define FASTCALL __attribute__((fastcall))