Texture coordinates are stored in fixed point format in TEV which allows overflows to be emulated correctly. Added logic to calculated texture LOD and use the correct mip. Dumping textures will now dump all mip levels. Added line rendering. Changed data stored in vertex from float arrays to vectors for cleaner math.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5178 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-03-09 04:38:07 +00:00 · 2010-03-09 04:38:07 +00:00 · cc7c6cd35f
parent 5beb6dfd47
commit cc7c6cd35f
16 changed files with 704 additions and 323 deletions
--- a/Source/Core/VideoCommon/Src/BPMemory.h
+++ b/Source/Core/VideoCommon/Src/BPMemory.h
@ -451,7 +451,8 @@ union TexMode0
        unsigned mag_filter : 1;
        unsigned min_filter : 3;
        unsigned diag_lod : 1;
-        signed lod_bias : 10;
+        signed lod_bias : 8;
+		unsigned pad0 : 2;
        unsigned max_aniso : 2;
        unsigned lod_clamp : 1;
    };
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.cpp
@ -90,13 +90,13 @@ namespace Clipper
    static inline int CalcClipMask(OutputVertexData *v)
    {
 	    int cmask = 0;
-        float* pos = v->projectedPosition;
-	    if (pos[3] - pos[0] < 0) cmask |= CLIP_POS_X_BIT;
-	    if (pos[0] + pos[3] < 0) cmask |= CLIP_NEG_X_BIT;
-	    if (pos[3] - pos[1] < 0) cmask |= CLIP_POS_Y_BIT;
-	    if (pos[1] + pos[3] < 0) cmask |= CLIP_NEG_Y_BIT;
-	    if (pos[3] * pos[2] > 0) cmask |= CLIP_POS_Z_BIT;
-	    if (pos[2] + pos[3] < 0) cmask |= CLIP_NEG_Z_BIT;
+        Vec4 pos = v->projectedPosition;
+	    if (pos.w - pos.x < 0) cmask |= CLIP_POS_X_BIT;
+	    if (pos.x + pos.w < 0) cmask |= CLIP_NEG_X_BIT;
+	    if (pos.w - pos.y < 0) cmask |= CLIP_POS_Y_BIT;
+	    if (pos.y + pos.w < 0) cmask |= CLIP_NEG_Y_BIT;
+	    if (pos.w * pos.z > 0) cmask |= CLIP_POS_Z_BIT;
+	    if (pos.z + pos.w < 0) cmask |= CLIP_NEG_Z_BIT;
 	    return cmask;
    }

@ -109,7 +109,7 @@ namespace Clipper
    #define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0))

    #define CLIP_DOTPROD(I, A, B, C, D) \
-	    (Vertices[I]->projectedPosition[0] * A + Vertices[I]->projectedPosition[1] * B + Vertices[I]->projectedPosition[2] * C + Vertices[I]->projectedPosition[3] * D)
+	    (Vertices[I]->projectedPosition.x * A + Vertices[I]->projectedPosition.y * B + Vertices[I]->projectedPosition.z * C + Vertices[I]->projectedPosition.w * D)

    #define POLY_CLIP( PLANE_BIT, A, B, C, D )                          \
    {                                                                   \
@ -153,6 +153,27 @@ namespace Clipper
 	    }									                            \
    }

+	#define LINE_CLIP(PLANE_BIT, A, B, C, D )					\
+	{															\
+		if (mask & PLANE_BIT) {									\
+			const float dp0 = CLIP_DOTPROD( 0, A, B, C, D );	\
+			const float dp1 = CLIP_DOTPROD( 1, A, B, C, D );	\
+			const bool neg_dp0 = dp0 < 0;						\
+			const bool neg_dp1 = dp1 < 0;						\
+																\
+			if (neg_dp0 && neg_dp1)								\
+				return;											\
+																\
+			if (neg_dp1) {										\
+				float t = dp1 / (dp1 - dp0);					\
+				if (t > t1) t1 = t;								\
+			} else if (neg_dp0) {								\
+				float t = dp0 / (dp0 - dp1);					\
+				if (t > t0) t0 = t;								\
+			}													\
+		}														\
+	}
+
    void ClipTriangle(int *indices, int &numIndices)
    {
 	    int mask = 0;
@ -202,6 +223,53 @@ namespace Clipper
 	    }
    }

+	void ClipLine(int *indices)
+	{
+		int mask = 0;
+		int clip_mask[2] = { 0, 0 };
+
+		for (int i = 0; i < 2; ++i)
+		{
+			clip_mask[i] = CalcClipMask(Vertices[i]);
+			mask |= clip_mask[i];
+		}
+
+		if (mask == 0) 
+			return;
+
+		float t0 = 0;
+		float t1 = 0;
+
+		// Mark unused in case of early termination 
+		// of the macros below. (When fully clipped)
+		indices[0] = SKIP_FLAG;
+		indices[1] = SKIP_FLAG;
+
+		LINE_CLIP(CLIP_POS_X_BIT, -1,  0,  0, 1);
+		LINE_CLIP(CLIP_NEG_X_BIT,  1,  0,  0, 1);
+		LINE_CLIP(CLIP_POS_Y_BIT,  0, -1,  0, 1);
+		LINE_CLIP(CLIP_NEG_Y_BIT,  0,  1,  0, 1);
+		LINE_CLIP(CLIP_POS_Z_BIT,  0,  0, -1, 1);
+		LINE_CLIP(CLIP_NEG_Z_BIT,  0,  0,  1, 1);
+
+		// Restore the old values as this line 
+		// was not fully clipped.
+		indices[0] = 0;
+		indices[1] = 1;
+
+		int numVertices = 2;
+
+		if (clip_mask[0]) {
+			indices[0] = numVertices;
+			AddInterpolatedVertex(t0, 0, 1, numVertices);
+		}
+
+		if (clip_mask[1]) {
+			indices[1] = numVertices;
+			AddInterpolatedVertex(t1, 1, 0, numVertices);
+		}
+	}
+
    void ProcessTriangle(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2)
    {
        if (stats.thisFrame.numDrawnObjects < g_Config.drawStart || stats.thisFrame.numDrawnObjects >= g_Config.drawEnd )
@ -247,6 +315,75 @@ namespace Clipper
        }
    }

+	void CopyVertex(OutputVertexData *dst, OutputVertexData *src, float dx, float dy, unsigned int sOffset)
+	{
+		dst->screenPosition.x = src->screenPosition.x + dx;
+		dst->screenPosition.y = src->screenPosition.y + dy;
+		dst->screenPosition.z = src->screenPosition.z;
+
+		for (int i = 0; i < 3; ++i)
+			dst->normal[i] = src->normal[i];
+
+		for (int i = 0; i < 4; ++i)
+			dst->color[0][i] = src->color[0][i];
+
+		// todo - s offset
+		for (int i = 0; i < 8; ++i)
+			dst->texCoords[i] = src->texCoords[i];
+	}
+
+	void ProcessLine(OutputVertexData *lineV0, OutputVertexData *lineV1)
+	{
+		int indices[4] = { 0, 1, SKIP_FLAG, SKIP_FLAG };
+
+		Vertices[0] = lineV0;
+        Vertices[1] = lineV1;
+
+		ClipLine(indices);
+
+		if(indices[0] != SKIP_FLAG)
+		{
+			OutputVertexData *v0 = Vertices[indices[0]];
+			OutputVertexData *v1 = Vertices[indices[1]];
+
+			PerspectiveDivide(v0);
+            PerspectiveDivide(v1);
+
+			float dx = v1->screenPosition.x - v0->screenPosition.x;
+			float dy = v1->screenPosition.y - v0->screenPosition.y;
+			
+			float screenDx = 0;
+			float screenDy = 0;
+
+			if(abs(dx) > abs(dy))
+			{
+				if(dx > 0)
+					screenDy = bpmem.lineptwidth.linesize / -12.0f;
+				else
+					screenDy = bpmem.lineptwidth.linesize / 12.0f;
+			}
+			else
+			{
+				if(dy > 0)
+					screenDx = bpmem.lineptwidth.linesize / 12.0f;
+				else
+					screenDx = bpmem.lineptwidth.linesize / -12.0f;
+			}
+
+			OutputVertexData triangle[3];
+
+			CopyVertex(&triangle[0], v0, screenDx, screenDy, 0);
+			CopyVertex(&triangle[1], v1, screenDx, screenDy, 0);
+			CopyVertex(&triangle[2], v1, -screenDx, -screenDy, bpmem.lineptwidth.lineoff);
+
+			// ccw winding
+			Rasterizer::DrawTriangleFrontFace(&triangle[2], &triangle[1], &triangle[0]);
+
+			CopyVertex(&triangle[1], v0, -screenDx, -screenDy, bpmem.lineptwidth.lineoff);
+
+			Rasterizer::DrawTriangleFrontFace(&triangle[0], &triangle[1], &triangle[2]);
+		}
+	}
        
    bool CullTest(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2, bool &backface)
    {
@ -260,15 +397,15 @@ namespace Clipper
            return false;
        }

-        float x0 = v0->projectedPosition[0];
-        float x1 = v1->projectedPosition[0];
-        float x2 = v2->projectedPosition[0];
-        float y1 = v1->projectedPosition[1];
-        float y0 = v0->projectedPosition[1];
-        float y2 = v2->projectedPosition[1];
-        float w0 = v0->projectedPosition[3];
-        float w1 = v1->projectedPosition[3];
-        float w2 = v2->projectedPosition[3];
+        float x0 = v0->projectedPosition.x;
+        float x1 = v1->projectedPosition.x;
+        float x2 = v2->projectedPosition.x;
+        float y1 = v1->projectedPosition.y;
+        float y0 = v0->projectedPosition.y;
+        float y2 = v2->projectedPosition.y;
+        float w0 = v0->projectedPosition.w;
+        float w1 = v1->projectedPosition.w;
+        float w2 = v2->projectedPosition.w;

        float normalZDir = (x0*w2 - x2*w0)*y1 + (x2*y0 - x0*y2)*w1 + (y2*w0 - y0*w2)*x1; 

@ -291,13 +428,13 @@ namespace Clipper

    void PerspectiveDivide(OutputVertexData *vertex)
    {
-        float *projected = vertex->projectedPosition;
-        float *screen = vertex->screenPosition;
+        Vec4 &projected = vertex->projectedPosition;
+        Vec3 &screen = vertex->screenPosition;

-        float wInverse = 1.0f/projected[3];
-        screen[0] = projected[0] * wInverse * xfregs.viewport.wd + m_ViewOffset[0];
-        screen[1] = projected[1] * wInverse * xfregs.viewport.ht + m_ViewOffset[1];
-        screen[2] = projected[2] * wInverse + m_ViewOffset[2];
+        float wInverse = 1.0f/projected.w;
+        screen.x = projected.x * wInverse * xfregs.viewport.wd + m_ViewOffset[0];
+        screen.y = projected.y * wInverse * xfregs.viewport.ht + m_ViewOffset[1];
+        screen.z = projected.z * wInverse + m_ViewOffset[2];
    }
    
 }
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.h
@ -31,6 +31,7 @@ namespace Clipper

    void ProcessTriangle(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2);

+	void ProcessLine(OutputVertexData *v0, OutputVertexData *v1);

    bool CullTest(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2, bool &backface);

--- a/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.cpp
@ -49,36 +49,32 @@ void Init()
    }
 }

-bool SaveTexture(const char* filename, u32 texmap, int width, int height)
-{
-    u8 *data = new u8[width * height * 4];
-    
-    GetTextureBGRA(data, texmap, width, height);
-
-    bool result = SaveTGA(filename, width, height, data);
-
-    delete []data;
-
-    return result;
-}
-
-void SaveTexture(const char* filename, u32 texmap)
+void SaveTexture(const char* filename, u32 texmap, s32 mip)
 {
    FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
    u8 subTexmap = texmap & 3;

    TexImage0& ti0 = texUnit.texImage0[subTexmap];

-    SaveTexture(filename, texmap, ti0.width + 1, ti0.height + 1);
+	int width = ti0.width + 1;
+	int height = ti0.height + 1;
+
+	u8 *data = new u8[width * height * 4];
+    
+    GetTextureBGRA(data, texmap, mip, width, height);
+
+    bool result = SaveTGA(filename, width, height, data);
+
+    delete []data;
 }

-void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height)
+void GetTextureBGRA(u8 *dst, u32 texmap, s32 mip, int width, int height)
 {
    u8 sample[4];    

    for (int y = 0; y < height; y++)
        for (int x = 0; x < width; x++) {
-            TextureSampler::Sample((float)x, (float)y, 0, texmap, sample);
+            TextureSampler::SampleMip(x << 7, y << 7, mip, false, texmap, sample);
            // rgba to bgra
            *(dst++) = sample[2];
            *(dst++) = sample[1];
@ -87,13 +83,32 @@ void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height)
        }
 }

+s32 GetMaxTextureLod(u32 texmap)
+{
+	FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
+    u8 subTexmap = texmap & 3;
+
+	u8 maxLod = texUnit.texMode1[subTexmap].max_lod;
+	u8 mip = maxLod >> 4;
+	u8 fract = maxLod & 0xf;
+
+	if(fract)
+		++mip;
+
+	return (s32)mip;
+}
+
 void DumpActiveTextures()
 {
    for (unsigned int stageNum = 0; stageNum < bpmem.genMode.numindstages; stageNum++)
    {
        u32 texmap = bpmem.tevindref.getTexMap(stageNum);

-        SaveTexture(StringFromFormat("%star%i_ind%i_map%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap).c_str(), texmap);     
+		s32 maxLod = GetMaxTextureLod(texmap);
+		for (s32 mip = 0; mip < maxLod; ++mip)
+		{
+			SaveTexture(StringFromFormat("%star%i_ind%i_map%i_mip%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap, mip).c_str(), texmap, mip);
+		}
    }

    for (unsigned int stageNum = 0; stageNum <= bpmem.genMode.numtevstages; stageNum++)
@ -104,7 +119,11 @@ void DumpActiveTextures()

        int texmap = order.getTexMap(stageOdd);

-        SaveTexture(StringFromFormat("%star%i_stage%i_map%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap).c_str(), texmap);           
+        s32 maxLod = GetMaxTextureLod(texmap);
+		for (s32 mip = 0; mip < maxLod; ++mip)
+		{
+			SaveTexture(StringFromFormat("%star%i_stage%i_map%i_mip%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap, mip).c_str(), texmap, mip);
+		}
    }
 }

--- a/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.h
@ -22,7 +22,7 @@ namespace DebugUtil
 {
    void Init();

-    void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height);
+    void GetTextureBGRA(u8 *dst, u32 texmap, s32 mip, int width, int height);

    void DumpActiveTextures();

--- a/Source/Plugins/Plugin_VideoSoftware/Src/HwRasterizer.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/HwRasterizer.cpp
@ -155,7 +155,7 @@ namespace HwRasterizer
        int width = texImage0.width;
        int height = texImage0.height;

-        DebugUtil::GetTextureBGRA(temp, 0, width, height);
+        DebugUtil::GetTextureBGRA(temp, 0, 0, width, height);

        glGenTextures(1, (GLuint *)&texture);
 		glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texture);
--- a/Source/Plugins/Plugin_VideoSoftware/Src/NativeVertexFormat.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/NativeVertexFormat.h
@ -18,6 +18,8 @@
 #ifndef _NATIVEVERTEXFORMAT_H
 #define _NATIVEVERTEXFORMAT_H

+#include "../../Plugin_VideoDX9/Src/Vec3.h"
+
 #ifdef WIN32
 #define LOADERDECL __cdecl
 #else
@ -26,25 +28,33 @@

 typedef void (LOADERDECL *TPipelineFunction)();

+struct Vec4
+{
+	float x;
+	float y;
+	float z;
+	float w;
+};
+
 struct InputVertexData
 {
    u8 posMtx;
    u8 texMtx[8];

-    float position[4];    
-    float normal[3][3];
+    Vec3 position;    
+    Vec3 normal[3];
    u8 color[2][4];
    float texCoords[8][2];
 };

 struct OutputVertexData
 {
-    float mvPosition[3];
-    float projectedPosition[4];
-    float screenPosition[3];
-    float normal[3][3];
+    Vec3 mvPosition;
+    Vec4 projectedPosition;
+    Vec3 screenPosition;
+    Vec3 normal[3];
    u8 color[2][4];
-    float texCoords[8][3];
+    Vec3 texCoords[8];

    void Lerp(float t, OutputVertexData *a, OutputVertexData *b)
    {
@ -52,17 +62,16 @@ struct OutputVertexData

        #define LINTERP_INT(T, OUT, IN) (OUT) + (((IN - OUT) * T) >> 8)

-        for (int i = 0; i < 3; ++i)
-            mvPosition[i] = LINTERP(t, a->mvPosition[i], b->mvPosition[i]);
+        mvPosition = LINTERP(t, a->mvPosition, b->mvPosition);

-        for (int i = 0; i < 4; ++i)
-            projectedPosition[i] = LINTERP(t, a->projectedPosition[i], b->projectedPosition[i]);
+        projectedPosition.x = LINTERP(t, a->projectedPosition.x, b->projectedPosition.x);
+		projectedPosition.y = LINTERP(t, a->projectedPosition.y, b->projectedPosition.y);
+		projectedPosition.z = LINTERP(t, a->projectedPosition.z, b->projectedPosition.z);
+		projectedPosition.w = LINTERP(t, a->projectedPosition.w, b->projectedPosition.w);

        for (int i = 0; i < 3; ++i)
        {
-            normal[i][0] = LINTERP(t, a->normal[i][0], b->normal[i][0]);
-            normal[i][1] = LINTERP(t, a->normal[i][1], b->normal[i][1]);
-            normal[i][2] = LINTERP(t, a->normal[i][2], b->normal[i][2]);
+            normal[i] = LINTERP(t, a->normal[i], b->normal[i]);
        }

        u16 t_int = (u16)(t * 256);
@ -74,9 +83,7 @@ struct OutputVertexData

        for (int i = 0; i < 8; ++i)
        {
-            texCoords[i][0] = LINTERP(t, a->texCoords[i][0], b->texCoords[i][0]);
-            texCoords[i][1] = LINTERP(t, a->texCoords[i][1], b->texCoords[i][1]);
-            texCoords[i][2] = LINTERP(t, a->texCoords[i][2], b->texCoords[i][2]);
+            texCoords[i] = LINTERP(t, a->texCoords[i], b->texCoords[i]);
        }

        #undef LINTERP
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp
@ -27,8 +27,20 @@
 #include "VideoConfig.h"


-#define BLOCK_SIZE 8
+#define BLOCK_SIZE 2

+#define CLAMP(x, a, b) (x>b)?b:(x<a)?a:x
+
+// returns approximation of log2(f) in s28.4
+// results are close enough to use for LOD
+static inline s32 FixedLog2(float f)
+{
+	u32 *x = (u32*)&f;
+	s32 logInt = ((*x & 0x7F800000) >> 19) - 2032; // integer part
+	s32 logFract = (*x & 0x007fffff) >> 19; // approximate fractional part
+
+	return logInt + logFract;
+}

 namespace Rasterizer
 {
@ -43,6 +55,7 @@ s32 scissorRight = 0;
 s32 scissorBottom = 0;

 Tev tev;
+RasterBlock rasterBlock;

 void Init()
 {
@ -91,53 +104,58 @@ void SetTevReg(int reg, int comp, bool konst, s16 color)
    tev.SetRegColor(reg, comp, konst, color);
 }

-inline void Draw(s32 x, s32 y)
+inline void Draw(s32 x, s32 y, s32 xi, s32 yi)
 {
    INCSTAT(stats.thisFrame.rasterizedPixels);

-    float zFloat = 1.0f + ZSlope.GetValue(x, y);
-    if(zFloat < 0|| zFloat > 1)
-        return;
+	float zFloat = 1.0f + ZSlope.GetValue(x, y);
+	if (zFloat < 0.0f || zFloat > 1.0f)
+		return;

-    u32 z = (u32)(zFloat * 0x00ffffff);
+	s32 z = (s32)(zFloat * 0x00ffffff);

-    if (bpmem.zcontrol.zcomploc && bpmem.zmode.testenable)
-    {
-        // early z
-        if (!EfbInterface::ZCompare(x, y, z))
-            return;
-    }
+	if (bpmem.zcontrol.zcomploc && bpmem.zmode.testenable)
+	{
+		// early z
+		if (!EfbInterface::ZCompare(x, y, z))
+			return;
+	}

-    float invW = 1.0f / WSlope.GetValue(x, y);
+	RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];

-    tev.Position[0] = x;
-    tev.Position[1] = y;
-    tev.Position[2] = z;
+	float invW = pixel.InvW;

-    for(unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
-    {
-        for(int comp = 0; comp < 4; comp++)
-            tev.Color[i][comp] = (u8)ColorSlopes[i][comp].GetValue(x, y);
-    }
+	tev.Position[0] = x;
+	tev.Position[1] = y;
+	tev.Position[2] = z;

-    for(unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
-    {
-        if (xfregs.texMtxInfo[i].projection)
-        {
-            float q = TexSlopes[i][2].GetValue(x, y) * invW;
-            float invQ = invW / q;
-            tev.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * invQ * (bpmem.texcoords[i].s.scale_minus_1 + 1);
-            tev.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * invQ * (bpmem.texcoords[i].t.scale_minus_1 + 1);
-            tev.Lod[i] = 0;
-        }
-        else
-        {
-            tev.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * invW * (bpmem.texcoords[i].s.scale_minus_1 + 1);
-            tev.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * invW * (bpmem.texcoords[i].t.scale_minus_1 + 1);
-            tev.Lod[i] = 0;
-        }
-    }
+	//  colors
+	for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
+	{
+		for(int comp = 0; comp < 4; comp++)
+			tev.Color[i][comp] = (u8)ColorSlopes[i][comp].GetValue(x, y);
+	}

+	// tex coords
+	for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
+	{
+		// multiply by 128 because TEV stores stores UVs as s17.7
+		tev.Uv[i].s = (s32)(pixel.Uv[i][0] * 128);
+		tev.Uv[i].t = (s32)(pixel.Uv[i][1] * 128);
+	}
+
+	for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
+	{
+		tev.IndirectLod[i] = rasterBlock.IndirectLod[i];
+		tev.IndirectLinear[i] = rasterBlock.IndirectLinear[i];
+	}
+
+	for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
+	{
+		tev.TextureLod[i] = rasterBlock.TextureLod[i];
+		tev.TextureLinear[i] = rasterBlock.TextureLinear[i];
+	}
+   
    tev.Draw();
 }

@ -155,6 +173,109 @@ void InitSlope(Slope *slope, float f1, float f2, float f3, float DX31, float DX1
    slope->y0 = Y1;
 }

+inline void CalculateLOD(s32 &lod, bool &linear, u32 texmap, u32 texcoord)
+{
+	FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
+	u8 subTexmap = texmap & 3;
+
+	// LOD calculation requires data from the texture mode for bias, etc.
+	// it does not seem to use the actual texture size
+	TexMode0& tm0 = texUnit.texMode0[subTexmap];
+	TexMode1& tm1 = texUnit.texMode1[subTexmap];
+
+	float sDelta, tDelta;
+	if (tm0.diag_lod)
+	{
+		float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
+		float *uv1 = rasterBlock.Pixel[1][1].Uv[texcoord];
+
+		sDelta = abs(uv0[0] - uv1[0]);
+		tDelta = abs(uv0[1] - uv1[1]);
+	}
+	else
+	{
+		float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
+		float *uv1 = rasterBlock.Pixel[1][0].Uv[texcoord];
+		float *uv2 = rasterBlock.Pixel[0][1].Uv[texcoord];
+
+		sDelta = max(abs(uv0[0] - uv1[0]), abs(uv0[0] - uv2[0]));
+		tDelta = max(abs(uv0[1] - uv1[1]), abs(uv0[1] - uv2[1]));
+	}
+
+	// get LOD in s28.4
+	lod = FixedLog2(max(sDelta, tDelta));
+
+	// bias is s2.5
+	int bias = tm0.lod_bias;
+	bias >>= 1;
+	lod += bias;
+
+	linear = (lod >= 0 && (tm0.min_filter & 4) || lod < 0 && tm0.mag_filter);
+
+	// order of checks matters
+	// should be:
+	// if lod > max then max
+	// else if lod < min then min
+	lod = CLAMP(lod, (s32)tm1.min_lod, (s32)tm1.max_lod);
+}
+
+void BuildBlock(s32 blockX, s32 blockY)
+{
+	for (s32 yi = 0; yi < BLOCK_SIZE; yi++)
+	{
+		for (s32 xi = 0; xi < BLOCK_SIZE; xi++)
+		{
+			RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
+
+			s32 x = xi + blockX;
+			s32 y = yi + blockY;			
+
+			float invW = 1.0f / WSlope.GetValue(x, y);
+			pixel.InvW = invW;
+
+			// tex coords
+			for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
+			{
+				float projection;
+				if (xfregs.texMtxInfo[i].projection)
+				{
+					float q = TexSlopes[i][2].GetValue(x, y) * invW;
+					projection = invW / q;
+				}
+				else
+					projection = invW;
+
+				pixel.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * projection;
+				pixel.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * projection;
+			}
+		}
+	}
+
+	u32 indref = bpmem.tevindref.hex;
+	for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
+	{
+		u32 texmap = indref & 3;
+		indref >>= 3;
+		u32 texcoord = indref & 3;
+		indref >>= 3;
+
+		CalculateLOD(rasterBlock.IndirectLod[i], rasterBlock.IndirectLinear[i], texmap, texcoord);
+	}
+
+	for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
+	{
+		int stageOdd = i&1;
+		TwoTevStageOrders &order = bpmem.tevorders[i >> 1];
+		if(order.getEnable(stageOdd))
+		{
+			u32 texmap = order.getTexMap(stageOdd);
+			u32 texcoord = order.getTexCoord(stageOdd);
+
+			CalculateLOD(rasterBlock.TextureLod[i], rasterBlock.TextureLinear[i], texmap, texcoord);
+		}
+	}
+}
+
 void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2)
 {
    INCSTAT(stats.thisFrame.numTrianglesDrawn);
@ -217,7 +338,7 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
    float fltdy12 = flty1 - v1->screenPosition[1];
    float fltdy31 = v2->screenPosition[1] - flty1;

-    float w[3] = { 1.0f / v0->projectedPosition[3], 1.0f / v1->projectedPosition[3], 1.0f / v2->projectedPosition[3] };
+    float w[3] = { 1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w };
    InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31, fltx1, flty1);

    InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, fltdx12, fltdy12, fltdy31, fltx1, flty1);
@ -281,14 +402,16 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
            // Skip block when outside an edge
            if(a == 0x0 || b == 0x0 || c == 0x0) continue;

+			BuildBlock(x, y);
+
            // Accept whole block when totally covered
            if(a == 0xF && b == 0xF && c == 0xF)
            {
                for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
                {
-                    for(s32 ix = x; ix < x + BLOCK_SIZE; ix++)
+                    for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
                    {                        
-                        Draw(ix, iy + y);
+                        Draw(x + ix, y + iy, ix, iy);
                    }
                }
            }
@ -298,17 +421,17 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
                s32 CY2 = C2 + DX23 * y0 - DY23 * x0;
                s32 CY3 = C3 + DX31 * y0 - DY31 * x0;

-                for(s32 iy = y; iy < y + BLOCK_SIZE; iy++)
+                for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
                {
                    s32 CX1 = CY1;
                    s32 CX2 = CY2;
                    s32 CX3 = CY3;

-                    for(s32 ix = x; ix < x + BLOCK_SIZE; ix++)
+                    for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
                    {
                        if(CX1 > 0 && CX2 > 0 && CX3 > 0)
                        {
-                            Draw(ix, iy);
+                            Draw(x + ix, y + iy, ix, iy);
                        }

                        CX1 -= FDY12;
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.h
@ -39,6 +39,21 @@ namespace Rasterizer
        float y0;
        float GetValue(s32 x, s32 y) { return f0 + (dfdx * (x - x0)) + (dfdy * (y - y0)); }
    };
+
+	struct RasterBlockPixel
+	{
+		float InvW;
+		float Uv[8][2];
+	};
+
+	struct RasterBlock
+	{
+		RasterBlockPixel Pixel[2][2];
+		s32 IndirectLod[4];
+		bool IndirectLinear[4];
+		s32 TextureLod[16];
+		bool TextureLinear[16];
+	};
    
 }

--- a/Source/Plugins/Plugin_VideoSoftware/Src/SetupUnit.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/SetupUnit.cpp
@ -134,10 +134,38 @@ void SetupUnit::SetupTriFan()
 }

 void SetupUnit::SetupLine()
-{}
+{
+	if (m_VertexCounter < 1)
+    {
+        m_VertexCounter++;
+        m_VertWritePointer = m_VertPointer[m_VertexCounter];
+        return;
+    }
+
+    Clipper::ProcessLine(m_VertPointer[0], m_VertPointer[1]);
+
+    m_VertexCounter = 0;
+    m_VertWritePointer = m_VertPointer[0];
+}

 void SetupUnit::SetupLineStrip()
-{}
+{
+	if (m_VertexCounter < 1)
+    {
+        m_VertexCounter++;
+		m_VertWritePointer = m_VertPointer[m_VertexCounter];
+        return;
+    }
+
+	m_VertexCounter++;
+
+    Clipper::ProcessLine(m_VertPointer[0], m_VertPointer[1]);
+
+	m_VertWritePointer = m_VertPointer[0];
+
+	m_VertPointer[0] = m_VertPointer[1];
+	m_VertPointer[1] = &m_Vertices[m_VertexCounter & 1];
+}

 void SetupUnit::SetupPoint()
 {}
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp
@ -439,34 +439,33 @@ static bool AlphaTest(int alpha)
    return true;
 }

-inline float WrapIndirectCoord(float coord, int wrapMode)
+inline s32 WrapIndirectCoord(s32 coord, int wrapMode)
 {
    switch (wrapMode) {
        case ITW_OFF:
            return coord;
        case ITW_256:
-            return fmod(coord, 256);
-         case ITW_128:
-            return fmod(coord, 128);
+            return (coord % (256 << 7));
+        case ITW_128:
+            return (coord % (128 << 7));
        case ITW_64:
-            return fmod(coord, 64);
+            return (coord % (64 << 7));
        case ITW_32:
-            return fmod(coord, 32);
+            return (coord % (32 << 7));
        case ITW_16:
-            return fmod(coord, 16);
+            return (coord % (16 << 7));
        case ITW_0:
            return 0;
    }
    return 0;
 }

-void Tev::Indirect(unsigned int stageNum, float s, float t)
+void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
 {
    TevStageIndirect &indirect = bpmem.tevind[stageNum];
    u8 *indmap = IndirectTex[indirect.bt];
-    

-    float indcoord[3];
+    s32 indcoord[3];

    // alpha bump select
    switch (indirect.bs) {
@ -494,32 +493,32 @@ void Tev::Indirect(unsigned int stageNum, float s, float t)
    // format
    switch(indirect.fmt) {
        case ITF_8:
-            indcoord[0] = (float)indmap[ALP_C] + bias[0];
-            indcoord[1] = (float)indmap[BLU_C] + bias[1];
-            indcoord[2] = (float)indmap[GRN_C] + bias[2];
+            indcoord[0] = indmap[ALP_C] + bias[0];
+            indcoord[1] = indmap[BLU_C] + bias[1];
+            indcoord[2] = indmap[GRN_C] + bias[2];
            AlphaBump = AlphaBump & 0xf8;
            break;
        case ITF_5:
-            indcoord[0] = (float)(indmap[ALP_C] & 0x1f) + bias[0];
-            indcoord[1] = (float)(indmap[BLU_C] & 0x1f) + bias[1];
-            indcoord[2] = (float)(indmap[GRN_C] & 0x1f) + bias[2];
+            indcoord[0] = (indmap[ALP_C] & 0x1f) + bias[0];
+            indcoord[1] = (indmap[BLU_C] & 0x1f) + bias[1];
+            indcoord[2] = (indmap[GRN_C] & 0x1f) + bias[2];
            AlphaBump = AlphaBump & 0xe0;
            break;
        case ITF_4:
-            indcoord[0] = (float)(indmap[ALP_C] & 0x0f) + bias[0];
-            indcoord[1] = (float)(indmap[BLU_C] & 0x0f) + bias[1];
-            indcoord[2] = (float)(indmap[GRN_C] & 0x0f) + bias[2];
+            indcoord[0] = (indmap[ALP_C] & 0x0f) + bias[0];
+            indcoord[1] = (indmap[BLU_C] & 0x0f) + bias[1];
+            indcoord[2] = (indmap[GRN_C] & 0x0f) + bias[2];
            AlphaBump = AlphaBump & 0xf0;
            break;
        case ITF_3:
-            indcoord[0] = (float)(indmap[ALP_C] & 0x07) + bias[0];
-            indcoord[1] = (float)(indmap[BLU_C] & 0x07) + bias[1];
-            indcoord[2] = (float)(indmap[GRN_C] & 0x07) + bias[2];
+            indcoord[0] = (indmap[ALP_C] & 0x07) + bias[0];
+            indcoord[1] = (indmap[BLU_C] & 0x07) + bias[1];
+            indcoord[2] = (indmap[GRN_C] & 0x07) + bias[2];
            AlphaBump = AlphaBump & 0xf8;
            break;
    }

-    float indtevtrans[2] = { 0,0 };
+    s64 indtevtrans[2] = { 0,0 };

    // matrix multiply
    int indmtxid = indirect.mid & 3;
@ -529,39 +528,40 @@ void Tev::Indirect(unsigned int stageNum, float s, float t)
        int scale = ((u32)indmtx.col0.s0 << 0) |
 	                ((u32)indmtx.col1.s1 << 2) |
 	                ((u32)indmtx.col2.s2 << 4);
-        float fscale = 0.0f;
+
+		int shift;

        switch (indirect.mid & 12) {
-            case 0:
-                fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f;
+            case 0:   
+				shift = 3 + (17 - scale);
                indtevtrans[0] = indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2];
                indtevtrans[1] = indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2];
                break;
            case 4: // s matrix
-                fscale = powf(2.0f, (float)(scale - 17)) / 256;
+				shift = 8 + (17 - scale);
                indtevtrans[0] = s * indcoord[0];
                indtevtrans[1] = t * indcoord[0];
                break;
            case 8: // t matrix
-                fscale = powf(2.0f, (float)(scale - 17)) / 256;
+				shift = 8 + (17 - scale);
                indtevtrans[0] = s * indcoord[1];
                indtevtrans[1] = t * indcoord[1];
                break;
        }

-        indtevtrans[0] *= fscale;
-        indtevtrans[1] *= fscale;
+		indtevtrans[0] = shift >= 0 ? indtevtrans[0] >> shift : indtevtrans[0] << -shift;
+		indtevtrans[1] = shift >= 0 ? indtevtrans[1] >> shift : indtevtrans[1] << -shift;
    }

-    if (indirect.fb_addprev)
+	if (indirect.fb_addprev)
    {
-        TexCoord[0] += WrapIndirectCoord(s, indirect.sw) + indtevtrans[0];
-        TexCoord[1] += WrapIndirectCoord(t, indirect.tw) + indtevtrans[1];
+        TexCoord.s += (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
+        TexCoord.t += (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
    }
    else
    {
-        TexCoord[0] = WrapIndirectCoord(s, indirect.sw) + indtevtrans[0];
-        TexCoord[1] = WrapIndirectCoord(t, indirect.tw) + indtevtrans[1];
+        TexCoord.s = (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
+        TexCoord.t = (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
    }
 }

@ -580,10 +580,12 @@ void Tev::Draw()
        u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum);
        u32 texmap = bpmem.tevindref.getTexMap(stageNum);

-        float scaleS = bpmem.texscale[stageNum2].getScaleS(stageOdd);
-        float scaleT = bpmem.texscale[stageNum2].getScaleT(stageOdd);
+		const TEXSCALE& texscale = bpmem.texscale[stageNum2];
+		s32 scaleS = stageOdd ? texscale.ss1:texscale.ss0;
+        s32 scaleT = stageOdd ? texscale.ts1:texscale.ts0;

-        TextureSampler::Sample(Uv[texcoordSel][0] * scaleS, Uv[texcoordSel][1] * scaleT, Lod[texcoordSel], texmap, IndirectTex[stageNum]);
+        TextureSampler::Sample(Uv[texcoordSel].s >> scaleS, Uv[texcoordSel].t >> scaleT,
+			IndirectLod[stageNum], IndirectLinear[stageNum], texmap, IndirectTex[stageNum]);

 #ifdef _DEBUG
        if (g_Config.bDumpTevStages)
@ -608,14 +610,14 @@ void Tev::Draw()
        int texcoordSel = order.getTexCoord(stageOdd);
        int texmap = order.getTexMap(stageOdd);

-        Indirect(stageNum, Uv[texcoordSel][0], Uv[texcoordSel][1]);
+        Indirect(stageNum, Uv[texcoordSel].s, Uv[texcoordSel].t);

        // sample texture
        if (order.getEnable(stageOdd))
        {
            u8 texel[4];
    
-            TextureSampler::Sample(TexCoord[0], TexCoord[1], Lod[texcoordSel], texmap, texel);
+			TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum], TextureLinear[stageNum], texmap, texel);

            int swaptable = ac.tswap * 2;            

--- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.h
@ -21,7 +21,20 @@
 #include "BPMemLoader.h"

 class Tev
-{
+{ 
+	struct InputRegType {
+        unsigned a : 8;
+        unsigned b : 8;
+        unsigned c : 8;
+        signed   d : 11;
+    };
+
+	struct TextureCoordinateType
+	{
+		signed s : 24;
+		signed t : 24;
+	};
+
    // color order: RGBA
    s16 Reg[4][4];    
    s16 KonstantColors[4][4];
@ -32,7 +45,7 @@ class Tev
    s16 Zero16[4];
    u8 AlphaBump;
    u8 IndirectTex[4][4];
-    float TexCoord[2];
+	TextureCoordinateType TexCoord;

    s16 *m_ColorInputLUT[16][3];
    s16 *m_AlphaInputLUT[8];        // values must point to RGBA color
@ -49,20 +62,16 @@ class Tev
    void DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac);
    void DrawAlphaCompare(TevStageCombiner::AlphaCombiner &ac);

-    void Indirect(unsigned int stageNum, float s, float t);    
-
-    struct InputRegType {
-        unsigned a : 8;
-        unsigned b : 8;
-        unsigned c : 8;
-        signed   d : 11;
-    };
+    void Indirect(unsigned int stageNum, s32 s, s32 t);

 public:
-    s32 Position[3];
+	s32 Position[3];
    u8 Color[2][4];
-    float Uv[8][2];
-    float Lod[8];
+    TextureCoordinateType Uv[8];
+    s32 IndirectLod[4];
+	bool IndirectLinear[4];
+	s32 TextureLod[16];
+	bool TextureLinear[16];

    void Init();

--- a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp
@ -23,29 +23,11 @@

 #include <cmath>

+#define ALLOW_MIPMAP 1
+
 namespace TextureSampler
 {

-inline int iround(float x)
-{
-    int t;
-
-#if defined(_WIN32) && !defined(_M_X64)
-    __asm
-    {
-        fld  x
-        fistp t
-    }
-#else
-	t = (int)x;
-	if((x - t) >= 0.5)
-		return t + 1;
-#endif
-
-    return t;
-}
-
-
 inline void WrapCoord(int &coord, int wrapMode, int imageSize)
 {
    switch (wrapMode)
@ -85,9 +67,53 @@ inline void AddTexel(u8 *inTexel, u32 *outTexel, u32 fract)
    outTexel[3] += inTexel[3] * fract;
 }

-void Sample(float s, float t, float lod, u8 texmap, u8 *sample)
+void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8 *sample)
 {
-    FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
+    int baseMip = 0;
+	bool mipLinear = false;
+
+#if (ALLOW_MIPMAP)
+	FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
+    TexMode0& tm0 = texUnit.texMode0[texmap & 3];
+
+	s32 lodFract = lod & 0xf;
+
+	if (lod > 0 && tm0.min_filter & 3)
+	{
+		// use mipmap
+		baseMip = lod >> 4;
+		mipLinear = (lodFract && tm0.min_filter & 2);
+
+		// if using nearest mip filter and lodFract >= 0.5 round up to next mip
+		baseMip += (lodFract >> 3) & (tm0.min_filter & 1);
+	}
+
+	if (mipLinear)
+	{
+		u8 sampledTex[4];
+        u32 texel[4];
+
+		SampleMip(s, t, baseMip, linear, texmap, sampledTex);
+		SetTexel(sampledTex, texel, (16 - lodFract));
+
+		SampleMip(s, t, baseMip + 1, linear, texmap, sampledTex);
+		AddTexel(sampledTex, texel, lodFract);
+
+		sample[0] = (u8)(texel[0] >> 4);
+        sample[1] = (u8)(texel[1] >> 4);
+        sample[2] = (u8)(texel[2] >> 4);
+        sample[3] = (u8)(texel[3] >> 4);
+	}
+	else
+#endif
+	{
+		SampleMip(s, t, baseMip, linear, texmap, sample);
+	}	
+}
+
+void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
+{
+	FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
    u8 subTexmap = texmap & 3;

    TexMode0& tm0 = texUnit.texMode0[subTexmap];
@ -97,59 +123,85 @@ void Sample(float s, float t, float lod, u8 texmap, u8 *sample)
    u32 imageBase = texUnit.texImage3[subTexmap].image_base << 5;    
    u8 *imageSrc = g_VideoInitialize.pGetMemoryPointer(imageBase);

-    bool linear = false;
-    if ((lod > 0 && tm0.min_filter > 4) || (lod <= 0 && tm0.mag_filter))
-        linear = true;
+	int imageWidth = ti0.width;
+	int imageHeight = ti0.height;
+
+	int tlutAddress = texTlut.tmem_offset << 9;
+	
+	// reduce sample location and texture size to mip level
+	// move texture pointer to mip location
+	if (mip)
+	{
+		int mipWidth = imageWidth + 1;
+		int mipHeight = imageHeight + 1;
+
+		int fmtWidth = TexDecoder_GetBlockWidthInTexels(ti0.format);
+		int fmtHeight = TexDecoder_GetBlockHeightInTexels(ti0.format);
+		int fmtDepth = TexDecoder_GetTexelSizeInNibbles(ti0.format);
+
+		imageWidth >>= mip;
+		imageHeight >>= mip;
+		s >>= mip;
+		t >>= mip;
+
+		while (mip)
+		{
+			mipWidth = max(mipWidth, fmtWidth);
+			mipHeight = max(mipHeight, fmtHeight);
+			u32 size = (mipWidth * mipHeight * fmtDepth) >> 1;
+
+			imageSrc += size;
+			mipWidth >>= 1;
+			mipHeight >>= 1;
+			mip--;
+		}
+	}
+
+	// integer part of sample location
+	int imageS = s >> 7;
+	int imageT = t >> 7;

    if (linear)
    {
-        s32 s256 = s32((s - 0.5f) * 256);
-        s32 t256 = s32((t- 0.5f) * 256);
-
-        int imageS = s256 >> 8;
-        int imageSPlus1 = imageS + 1;
-        u32 fractS = s256 & 0xff;
-        fractS += fractS >> 7;
-
-        int imageT = t256 >> 8;
+        // linear sampling
+		int imageSPlus1 = imageS + 1;
+        int fractS = s & 0x7f;
+        
        int imageTPlus1 = imageT + 1;
-        u32 fractT = t256 & 0xff;
-        fractT += fractT >> 7;
+        int fractT = t & 0x7f;

        u8 sampledTex[4];
        u32 texel[4];

-        WrapCoord(imageS, tm0.wrap_s, ti0.width);
-        WrapCoord(imageT, tm0.wrap_t, ti0.height);
-        WrapCoord(imageSPlus1, tm0.wrap_s, ti0.width);
-        WrapCoord(imageTPlus1, tm0.wrap_t, ti0.height);
+        WrapCoord(imageS, tm0.wrap_s, imageWidth);
+        WrapCoord(imageT, tm0.wrap_t, imageHeight);
+        WrapCoord(imageSPlus1, tm0.wrap_s, imageWidth);
+        WrapCoord(imageTPlus1, tm0.wrap_t, imageHeight);

-        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
-        SetTexel(sampledTex, texel, (256 - fractS) * (256 - fractT));
+        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
+        SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));

-        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
-        AddTexel(sampledTex, texel, (fractS) * (256 - fractT));
+        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
+        AddTexel(sampledTex, texel, (fractS) * (128 - fractT));

-        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
-        AddTexel(sampledTex, texel, (256 - fractS) * (fractT));
+        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
+        AddTexel(sampledTex, texel, (128 - fractS) * (fractT));

-        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
+        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
        AddTexel(sampledTex, texel, (fractS) * (fractT));

-        sample[0] = (u8)(texel[0] >> 16);
-        sample[1] = (u8)(texel[1] >> 16);
-        sample[2] = (u8)(texel[2] >> 16);
-        sample[3] = (u8)(texel[3] >> 16);
+        sample[0] = (u8)(texel[0] >> 14);
+        sample[1] = (u8)(texel[1] >> 14);
+        sample[2] = (u8)(texel[2] >> 14);
+        sample[3] = (u8)(texel[3] >> 14);
    }
    else
    {
-        int imageS = int(s);
-        int imageT = int(t);
+        // nearest neighbor sampling
+		WrapCoord(imageS, tm0.wrap_s, imageWidth);
+        WrapCoord(imageT, tm0.wrap_t, imageHeight);

-        WrapCoord(imageS, tm0.wrap_s, ti0.width);
-        WrapCoord(imageT, tm0.wrap_t, ti0.height);
-
-        TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);   
+        TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);   
    }
 }

--- a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.h
@ -23,7 +23,9 @@

 namespace TextureSampler
 {
-    void Sample(float s, float t, float lod, u8 texmap, u8 *sample);
+	void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8 *sample);
+
+	void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample);
 }


--- a/Source/Plugins/Plugin_VideoSoftware/Src/TransformUnit.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/TransformUnit.cpp
@ -22,6 +22,7 @@
 #include "TransformUnit.h"
 #include "XFMemLoader.h"
 #include "CPMemLoader.h"
+#include "BPMemLoader.h"
 #include "NativeVertexFormat.h"

 #include "../../Plugin_VideoDX9/Src/Vec3.h"
@ -30,48 +31,48 @@
 namespace TransformUnit
 {

-void MultiplyVec2Mat24(const float *vec, const float *mat, float *result)
+void MultiplyVec2Mat24(const Vec3 &vec, const float *mat, Vec3 &result)
 {
-    result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] + mat[3];
-    result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] + mat[7];
+    result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] + mat[3];
+    result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] + mat[7];
 }

-void MultiplyVec2Mat34(const float *vec, const float *mat, float *result)
+void MultiplyVec2Mat34(const Vec3 &vec, const float *mat, Vec3 &result)
 {
-    result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] + mat[3];
-    result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] + mat[7];
-    result[2] = mat[8] * vec[0] + mat[9] * vec[1] + mat[10] + mat[11];
+    result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] + mat[3];
+    result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] + mat[7];
+    result.z = mat[8] * vec.x + mat[9] * vec.y + mat[10] + mat[11];
 }

-void MultiplyVec3Mat33(const float *vec, const float *mat, float *result)
+void MultiplyVec3Mat33(const Vec3 &vec, const float *mat, Vec3 &result)
 {
-    result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2];
-    result[1] = mat[3] * vec[0] + mat[4] * vec[1] + mat[5] * vec[2];
-    result[2] = mat[6] * vec[0] + mat[7] * vec[1] + mat[8] * vec[2];
+    result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] * vec.z;
+    result.y = mat[3] * vec.x + mat[4] * vec.y + mat[5] * vec.z;
+    result.z = mat[6] * vec.x + mat[7] * vec.y + mat[8] * vec.z;
 }

-void MultiplyVec3Mat34(const float *vec, const float *mat, float *result)
+void MultiplyVec3Mat34(const Vec3 &vec, const float *mat, Vec3 &result)
 {
-    result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2] + mat[3];
-    result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] * vec[2] + mat[7];
-    result[2] = mat[8] * vec[0] + mat[9] * vec[1] + mat[10] * vec[2] + mat[11];
+    result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] * vec.z + mat[3];
+    result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] * vec.z + mat[7];
+    result.z = mat[8] * vec.x + mat[9] * vec.y + mat[10] * vec.z + mat[11];
 }

-void MultipleVec3Perspective(const float *vec, const float *proj, float *result)
+void MultipleVec3Perspective(const Vec3 &vec, const float *proj, Vec4 &result)
 {
-    result[0] = proj[0] * vec[0] + proj[1] * vec[2];
-    result[1] = proj[2] * vec[1] + proj[3] * vec[2];
-    //result[2] = (proj[4] * vec[2] + proj[5]);
-    result[2] = (proj[4] * vec[2] + proj[5]) * (1.0f - (float)1e-7);
-    result[3] = -vec[2];
+    result.x = proj[0] * vec.x + proj[1] * vec.z;
+    result.y = proj[2] * vec.y + proj[3] * vec.z;
+    //result.z = (proj[4] * vec.z + proj[5]);
+    result.z = (proj[4] * vec.z + proj[5]) * (1.0f - (float)1e-7);
+    result.w = -vec.z;
 }

-void MultipleVec3Ortho(const float *vec, const float *proj, float *result)
+void MultipleVec3Ortho(const Vec3 &vec, const float *proj, Vec4 &result)
 {
-    result[0] = proj[0] * vec[0] + proj[1];
-    result[1] = proj[2] * vec[1] + proj[3];
-    result[2] = proj[4] * vec[2] + proj[5];
-    result[3] = 1;
+    result.x = proj[0] * vec.x + proj[1];
+    result.y = proj[2] * vec.y + proj[3];
+    result.z = proj[4] * vec.z + proj[5];
+    result.w = 1;
 }

 void TransformPosition(const InputVertexData *src, OutputVertexData *dst)
@ -98,55 +99,53 @@ void TransformNormal(const InputVertexData *src, bool nbt, OutputVertexData *dst
        MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]);
        MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]);
        MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]);
-        Vec3 *norm0 = (Vec3*)dst->normal[0];
-        norm0->normalize();
+        dst->normal[0].normalize();
    }
    else
    {
        MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]);
-        Vec3 *norm0 = (Vec3*)dst->normal[0];
-        norm0->normalize();
+        dst->normal[0].normalize();
    }    
 }

 inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bool specialCase, const InputVertexData *srcVertex, OutputVertexData *dstVertex)
 {
-    const float *src;
+    const Vec3 *src;
    switch (texinfo.sourcerow)
    {
        case XF_SRCGEOM_INROW:
-            src = srcVertex->position;
+            src = &srcVertex->position;
            break;
        case XF_SRCNORMAL_INROW:
-            src = srcVertex->normal[0];
+            src = &srcVertex->normal[0];
            break;
        case XF_SRCBINORMAL_T_INROW:
-            src = srcVertex->normal[1];
+            src = &srcVertex->normal[1];
            break;
        case XF_SRCBINORMAL_B_INROW:
-            src = srcVertex->normal[2];
+            src = &srcVertex->normal[2];
            break;
        default:
            _assert_(texinfo.sourcerow >= XF_SRCTEX0_INROW && texinfo.sourcerow <= XF_SRCTEX7_INROW);
-            src = srcVertex->texCoords[texinfo.sourcerow - XF_SRCTEX0_INROW];
+            src = (Vec3*)srcVertex->texCoords[texinfo.sourcerow - XF_SRCTEX0_INROW];
            break;
    }

    const float *mat = (const float*)&xfregs.posMatrices[srcVertex->texMtx[coordNum] * 4];
-    float *dst = dstVertex->texCoords[coordNum];
+    Vec3 *dst = &dstVertex->texCoords[coordNum];

    if (texinfo.inputform == XF_TEXINPUT_AB11)
    {
-        MultiplyVec2Mat34(src, mat, dst); 
+        MultiplyVec2Mat34(*src, mat, *dst); 
    }
    else
    {
-        MultiplyVec3Mat34(src, mat, dst); 
+        MultiplyVec3Mat34(*src, mat, *dst); 
    }

    if (xfregs.dualTexTrans)
    {
-        float tempCoord[3];
+        Vec3 tempCoord;

        // normalize
        const PostMtxInfo &postInfo = xfregs.postMtxInfo[coordNum];
@ -157,12 +156,12 @@ inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bo
 			// no normalization
 			// q of input is 1
 			// q of output is unknown
-			tempCoord[0] = dst[0];
-			tempCoord[1] = dst[1];
+			tempCoord.x = dst->x;
+			tempCoord.y = dst->y;

-			dst[0] = postMat[0] * tempCoord[0] + postMat[1] * tempCoord[1] + postMat[2] + postMat[3];
-			dst[1] = postMat[4] * tempCoord[0] + postMat[5] * tempCoord[1] + postMat[6] + postMat[7];
-			dst[2] = 0.0f;
+			dst->x = postMat[0] * tempCoord.x + postMat[1] * tempCoord.y + postMat[2] + postMat[3];
+			dst->y = postMat[4] * tempCoord.x + postMat[5] * tempCoord.y + postMat[6] + postMat[7];
+			dst->z = 1.0f;
 		}
 		else
 		{		
@ -170,18 +169,14 @@ inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bo
 			{
 				float length = sqrtf(dst[0] * dst[0] + dst[1] * dst[1] + dst[2] * dst[2]);
 				float invL = 1.0f / length;
-				tempCoord[0] = invL * dst[0];
-				tempCoord[1] = invL * dst[1];
-				tempCoord[2] = invL * dst[2];
+				tempCoord = *dst * invL;
 			}
 			else
 			{
-				tempCoord[0] = dst[0];
-				tempCoord[1] = dst[1];
-				tempCoord[2] = dst[2];
+				tempCoord = *dst;
 			}

-			MultiplyVec3Mat34(tempCoord, postMat, dst);
+			MultiplyVec3Mat34(tempCoord, postMat, *dst);
 		}
    }
 }
@ -220,13 +215,8 @@ inline float SafeDivide(float n, float d)
    return (d==0)?(n>0?1:0):n/d;
 }

-void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const LitChannel &chan, Vec3 &lightCol)
+void LightColor(const Vec3 &pos, const Vec3 &normal, u8 lightNum, const LitChannel &chan, Vec3 &lightCol)
 {
-    // must be the size of 3 32bit floats for the light pointer to be valid
-    _assert_(sizeof(Vec3) == 12);
-
-    const Vec3 *pos = (const Vec3*)vertexPos;
-    const Vec3 *norm0 = (const Vec3*)normal;
    const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*lightNum];

    if (!(chan.attnfunc & 1)) {
@ -237,15 +227,15 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
                break;
            case LIGHTDIF_SIGN:
                {
-                    Vec3 ldir = (light->pos - *pos).normalized();
-                    float diffuse = ldir * (*norm0);
+                    Vec3 ldir = (light->pos - pos).normalized();
+                    float diffuse = ldir * normal;
                    AddScaledIntegerColor(light->color, diffuse, lightCol);
                }
                break;
            case LIGHTDIF_CLAMP:
                {
-                    Vec3 ldir = (light->pos - *pos).normalized();
-                    float diffuse = max(0.0f, ldir * (*norm0));
+                    Vec3 ldir = (light->pos - pos).normalized();
+                    float diffuse = max(0.0f, ldir * normal);
                    AddScaledIntegerColor(light->color, diffuse, lightCol);
                }
                break;
@ -254,7 +244,7 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
    }
    else { // spec and spot
        // not sure about divide by zero checks
-        Vec3 ldir = light->pos - *pos;
+        Vec3 ldir = light->pos - pos;
        float attn;

        if (chan.attnfunc == 3) { // spot
@ -269,7 +259,7 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
        }
        else if (chan.attnfunc == 1) { // specular
            // donko - what is going on here?  655.36 is a guess but seems about right.
-            attn = (light->pos * (*norm0)) > -655.36 ? max(0.0f, (light->dir * (*norm0))) : 0;
+            attn = (light->pos * normal) > -655.36 ? max(0.0f, (light->dir * normal)) : 0;
            ldir.set(1.0f, attn, attn * attn);

            float cosAtt = max(0.0f, light->cosatt * ldir);
@ -283,14 +273,14 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
                break;
            case LIGHTDIF_SIGN:
                {
-                    float difAttn = ldir * (*norm0);
+                    float difAttn = ldir * normal;
                    AddScaledIntegerColor(light->color, attn * difAttn, lightCol);
                }
                break;

            case LIGHTDIF_CLAMP:
                {
-                    float difAttn = max(0.0f, ldir * (*norm0));
+                    float difAttn = max(0.0f, ldir * normal);
                    AddScaledIntegerColor(light->color, attn * difAttn, lightCol);
                }
                break;
@ -299,13 +289,8 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
    }
 }

-void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const LitChannel &chan, float &lightCol)
+void LightAlpha(const Vec3 &pos, const Vec3 &normal, u8 lightNum, const LitChannel &chan, float &lightCol)
 {
-    // must be the size of 3 32bit floats for the light pointer to be valid
-    _assert_(sizeof(Vec3) == 12);
-
-    const Vec3 *pos = (const Vec3*)vertexPos;
-    const Vec3 *norm0 = (const Vec3*)normal;
    const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*lightNum];

    if (!(chan.attnfunc & 1)) {
@ -316,15 +301,15 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
                break;
            case LIGHTDIF_SIGN:
                {
-                    Vec3 ldir = (light->pos - *pos).normalized();                    
-                    float diffuse = ldir * (*norm0);
+                    Vec3 ldir = (light->pos - pos).normalized();                    
+                    float diffuse = ldir * normal;
                    lightCol += light->color[0] * diffuse;
                }
                break;
            case LIGHTDIF_CLAMP:
                {
-                    Vec3 ldir = (light->pos - *pos).normalized();
-                    float diffuse = max(0.0f, ldir * (*norm0));
+                    Vec3 ldir = (light->pos - pos).normalized();
+                    float diffuse = max(0.0f, ldir * normal);
                    lightCol += light->color[0] * diffuse;
                }
                break;
@ -332,7 +317,7 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
        }
    }
    else { // spec and spot
-        Vec3 ldir = light->pos - *pos;
+        Vec3 ldir = light->pos - pos;
        float attn;

        if (chan.attnfunc == 3) { // spot
@ -347,7 +332,7 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
        }
        else if (chan.attnfunc == 1) { // specular
            // donko - what is going on here?  655.36 is a guess but seems about right.
-            attn = (light->pos * (*norm0)) > -655.36 ? max(0.0f, (light->dir * (*norm0))) : 0;
+            attn = (light->pos * normal) > -655.36 ? max(0.0f, (light->dir * normal)) : 0;
            ldir.set(1.0f, attn, attn * attn);

            float cosAtt = light->cosatt * ldir;
@ -361,14 +346,14 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
                break;
            case LIGHTDIF_SIGN:
                {
-                    float difAttn = ldir * (*norm0);
+                    float difAttn = ldir * normal;
                    lightCol += light->color[0] * attn * difAttn;
                }
                break;

            case LIGHTDIF_CLAMP:
                {
-                    float difAttn = max(0.0f, ldir * (*norm0));
+                    float difAttn = max(0.0f, ldir * normal);
                    lightCol += light->color[0] * attn * difAttn;
                }
                break;
@ -472,14 +457,11 @@ void TransformTexCoord(const InputVertexData *src, OutputVertexData *dst, bool s
            break;
        case XF_TEXGEN_EMBOSS_MAP:
            {
-                const Vec3 *pos = (const Vec3*)dst->mvPosition;
-                const Vec3 *norm1 = (const Vec3*)dst->normal[1];
-                const Vec3 *norm2 = (const Vec3*)dst->normal[2];
                const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*texinfo.embosslightshift];

-                Vec3 ldir = (light->pos - *pos).normalized();
-                float d1 = ldir * (*norm1);
-                float d2 = ldir * (*norm2);
+                Vec3 ldir = (light->pos - dst->mvPosition).normalized();
+                float d1 = ldir * dst->normal[1];
+                float d2 = ldir * dst->normal[2];

                dst->texCoords[coordNum][0] = dst->texCoords[texinfo.embosssourceshift][0] + d1;
                dst->texCoords[coordNum][1] = dst->texCoords[texinfo.embosssourceshift][1] + d2;
@ -503,6 +485,9 @@ void TransformTexCoord(const InputVertexData *src, OutputVertexData *dst, bool s
        default:
            ERROR_LOG(VIDEO, "Bad tex gen type %i", texinfo.texgentype);            
        }
+
+		dst->texCoords[coordNum][0] *= (bpmem.texcoords[coordNum].s.scale_minus_1 + 1);
+		dst->texCoords[coordNum][1] *= (bpmem.texcoords[coordNum].t.scale_minus_1 + 1);
    }
 }

--- a/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp
@ -24,32 +24,32 @@ namespace VertexFormatConverter
 {
    void LoadNormal1_Byte(InputVertexData *dst, u8 *src)
    {
-        dst->normal[0][0] = (float)(s8)src[0] / 128;
-        dst->normal[0][1] = (float)(s8)src[1] / 128;
-        dst->normal[0][2] = (float)(s8)src[2] / 128;
+        dst->normal[0].x = (float)(s8)src[0] / 128;
+        dst->normal[0].y = (float)(s8)src[1] / 128;
+        dst->normal[0].z = (float)(s8)src[2] / 128;
    }

    void LoadNormal1_Short(InputVertexData *dst, u8 *src)
    {
-        dst->normal[0][0] = (float)((s16*)src)[0] / 32768;
-        dst->normal[0][1] = (float)((s16*)src)[1] / 32768;
-        dst->normal[0][2] = (float)((s16*)src)[2] / 32768;
+        dst->normal[0].x = (float)((s16*)src)[0] / 32768;
+        dst->normal[0].y = (float)((s16*)src)[1] / 32768;
+        dst->normal[0].z = (float)((s16*)src)[2] / 32768;
    }

    void LoadNormal1_Float(InputVertexData *dst, u8 *src)
    {
-        dst->normal[0][0] = ((float*)src)[0];
-        dst->normal[0][1] = ((float*)src)[1];
-        dst->normal[0][2] = ((float*)src)[2];
+        dst->normal[0].x = ((float*)src)[0];
+        dst->normal[0].y = ((float*)src)[1];
+        dst->normal[0].z = ((float*)src)[2];
    }

    void LoadNormal3_Byte(InputVertexData *dst, u8 *src)
    {
        for (int i = 0, j = 0; i < 3; i++, j+=3)
        {
-            dst->normal[i][0] = (float)(s8)src[j + 0] / 128;
-            dst->normal[i][1] = (float)(s8)src[j + 1] / 128;
-            dst->normal[i][2] = (float)(s8)src[j + 2] / 128;
+            dst->normal[i].x = (float)(s8)src[j + 0] / 128;
+            dst->normal[i].y = (float)(s8)src[j + 1] / 128;
+            dst->normal[i].z = (float)(s8)src[j + 2] / 128;
        }
    }

@ -57,9 +57,9 @@ namespace VertexFormatConverter
    {
        for (int i = 0, j = 0; i < 3; i++, j+=3)
        {
-            dst->normal[i][0] = (float)((s16*)src)[j + 0] / 32768;
-            dst->normal[i][1] = (float)((s16*)src)[j + 1] / 32768;
-            dst->normal[i][2] = (float)((s16*)src)[j + 2] / 32768;
+            dst->normal[i].x = (float)((s16*)src)[j + 0] / 32768;
+            dst->normal[i].y = (float)((s16*)src)[j + 1] / 32768;
+            dst->normal[i].z = (float)((s16*)src)[j + 2] / 32768;
        }
    }

@ -67,9 +67,9 @@ namespace VertexFormatConverter
    {
        for (int i = 0, j = 0; i < 3; i++, j+=3)
        {
-            dst->normal[i][0] = ((float*)src)[j + 0];
-            dst->normal[i][1] = ((float*)src)[j + 1];
-            dst->normal[i][2] = ((float*)src)[j + 2];
+            dst->normal[i].x = ((float*)src)[j + 0];
+            dst->normal[i].y = ((float*)src)[j + 1];
+            dst->normal[i].z = ((float*)src)[j + 2];
        }
    }
 }