Texture coordinates are stored in fixed point format in TEV which allows overflows to be emulated correctly. Added logic to calculated texture LOD and use the correct mip. Dumping textures will now dump all mip levels. Added line rendering. Changed data stored in vertex from float arrays to vectors for cleaner math.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5178 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-03-09 04:38:07 +00:00 · 2010-03-09 04:38:07 +00:00 · cc7c6cd35f
parent 5beb6dfd47
commit cc7c6cd35f
16 changed files with 704 additions and 323 deletions
--- a/Source/Core/VideoCommon/Src/BPMemory.h
+++ b/Source/Core/VideoCommon/Src/BPMemory.h
@ -451,7 +451,8 @@ union TexMode0
        unsigned mag_filter : 1;
        unsigned min_filter : 3;
        unsigned diag_lod : 1;
-        signed lod_bias : 10;
+        signed lod_bias : 8;
 		unsigned pad0 : 2;
        unsigned max_aniso : 2;
        unsigned lod_clamp : 1;
    };
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.cpp
@ -90,13 +90,13 @@ namespace Clipper
    static inline int CalcClipMask(OutputVertexData *v)
    {
 	    int cmask = 0;
-        float* pos = v->projectedPosition;
+        Vec4 pos = v->projectedPosition;
-	    if (pos[3] - pos[0] < 0) cmask |= CLIP_POS_X_BIT;
+	    if (pos.w - pos.x < 0) cmask |= CLIP_POS_X_BIT;
-	    if (pos[0] + pos[3] < 0) cmask |= CLIP_NEG_X_BIT;
+	    if (pos.x + pos.w < 0) cmask |= CLIP_NEG_X_BIT;
-	    if (pos[3] - pos[1] < 0) cmask |= CLIP_POS_Y_BIT;
+	    if (pos.w - pos.y < 0) cmask |= CLIP_POS_Y_BIT;
-	    if (pos[1] + pos[3] < 0) cmask |= CLIP_NEG_Y_BIT;
+	    if (pos.y + pos.w < 0) cmask |= CLIP_NEG_Y_BIT;
-	    if (pos[3] * pos[2] > 0) cmask |= CLIP_POS_Z_BIT;
+	    if (pos.w * pos.z > 0) cmask |= CLIP_POS_Z_BIT;
-	    if (pos[2] + pos[3] < 0) cmask |= CLIP_NEG_Z_BIT;
+	    if (pos.z + pos.w < 0) cmask |= CLIP_NEG_Z_BIT;
 	    return cmask;
    }
@ -109,7 +109,7 @@ namespace Clipper
    #define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0))
    #define CLIP_DOTPROD(I, A, B, C, D) \
-	    (Vertices[I]->projectedPosition[0] * A + Vertices[I]->projectedPosition[1] * B + Vertices[I]->projectedPosition[2] * C + Vertices[I]->projectedPosition[3] * D)
+	    (Vertices[I]->projectedPosition.x * A + Vertices[I]->projectedPosition.y * B + Vertices[I]->projectedPosition.z * C + Vertices[I]->projectedPosition.w * D)
    #define POLY_CLIP( PLANE_BIT, A, B, C, D )                          \
    {                                                                   \
@ -153,6 +153,27 @@ namespace Clipper
 	    }									                            \
    }
 	#define LINE_CLIP(PLANE_BIT, A, B, C, D )					\
 	{															\
 		if (mask & PLANE_BIT) {									\
 			const float dp0 = CLIP_DOTPROD( 0, A, B, C, D );	\
 			const float dp1 = CLIP_DOTPROD( 1, A, B, C, D );	\
 			const bool neg_dp0 = dp0 < 0;						\
 			const bool neg_dp1 = dp1 < 0;						\
 																\
 			if (neg_dp0 && neg_dp1)								\
 				return;											\
 																\
 			if (neg_dp1) {										\
 				float t = dp1 / (dp1 - dp0);					\
 				if (t > t1) t1 = t;								\
 			} else if (neg_dp0) {								\
 				float t = dp0 / (dp0 - dp1);					\
 				if (t > t0) t0 = t;								\
 			}													\
 		}														\
 	}
    void ClipTriangle(int *indices, int &numIndices)
    {
 	    int mask = 0;
@ -202,6 +223,53 @@ namespace Clipper
 	    }
    }
 	void ClipLine(int *indices)
 	{
 		int mask = 0;
 		int clip_mask[2] = { 0, 0 };
 		for (int i = 0; i < 2; ++i)
 		{
 			clip_mask[i] = CalcClipMask(Vertices[i]);
 			mask |= clip_mask[i];
 		}
 		if (mask == 0) 
 			return;
 		float t0 = 0;
 		float t1 = 0;
 		// Mark unused in case of early termination 
 		// of the macros below. (When fully clipped)
 		indices[0] = SKIP_FLAG;
 		indices[1] = SKIP_FLAG;
 		LINE_CLIP(CLIP_POS_X_BIT, -1,  0,  0, 1);
 		LINE_CLIP(CLIP_NEG_X_BIT,  1,  0,  0, 1);
 		LINE_CLIP(CLIP_POS_Y_BIT,  0, -1,  0, 1);
 		LINE_CLIP(CLIP_NEG_Y_BIT,  0,  1,  0, 1);
 		LINE_CLIP(CLIP_POS_Z_BIT,  0,  0, -1, 1);
 		LINE_CLIP(CLIP_NEG_Z_BIT,  0,  0,  1, 1);
 		// Restore the old values as this line 
 		// was not fully clipped.
 		indices[0] = 0;
 		indices[1] = 1;
 		int numVertices = 2;
 		if (clip_mask[0]) {
 			indices[0] = numVertices;
 			AddInterpolatedVertex(t0, 0, 1, numVertices);
 		}
 		if (clip_mask[1]) {
 			indices[1] = numVertices;
 			AddInterpolatedVertex(t1, 1, 0, numVertices);
 		}
 	}
    void ProcessTriangle(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2)
    {
        if (stats.thisFrame.numDrawnObjects < g_Config.drawStart || stats.thisFrame.numDrawnObjects >= g_Config.drawEnd )
@ -247,6 +315,75 @@ namespace Clipper
        }
    }
 	void CopyVertex(OutputVertexData *dst, OutputVertexData *src, float dx, float dy, unsigned int sOffset)
 	{
 		dst->screenPosition.x = src->screenPosition.x + dx;
 		dst->screenPosition.y = src->screenPosition.y + dy;
 		dst->screenPosition.z = src->screenPosition.z;
 		for (int i = 0; i < 3; ++i)
 			dst->normal[i] = src->normal[i];
 		for (int i = 0; i < 4; ++i)
 			dst->color[0][i] = src->color[0][i];
 		// todo - s offset
 		for (int i = 0; i < 8; ++i)
 			dst->texCoords[i] = src->texCoords[i];
 	}
 	void ProcessLine(OutputVertexData *lineV0, OutputVertexData *lineV1)
 	{
 		int indices[4] = { 0, 1, SKIP_FLAG, SKIP_FLAG };
 		Vertices[0] = lineV0;
        Vertices[1] = lineV1;
 		ClipLine(indices);
 		if(indices[0] != SKIP_FLAG)
 		{
 			OutputVertexData *v0 = Vertices[indices[0]];
 			OutputVertexData *v1 = Vertices[indices[1]];
 			PerspectiveDivide(v0);
            PerspectiveDivide(v1);
 			float dx = v1->screenPosition.x - v0->screenPosition.x;
 			float dy = v1->screenPosition.y - v0->screenPosition.y;
 			float screenDx = 0;
 			float screenDy = 0;
 			if(abs(dx) > abs(dy))
 			{
 				if(dx > 0)
 					screenDy = bpmem.lineptwidth.linesize / -12.0f;
 				else
 					screenDy = bpmem.lineptwidth.linesize / 12.0f;
 			}
 			else
 			{
 				if(dy > 0)
 					screenDx = bpmem.lineptwidth.linesize / 12.0f;
 				else
 					screenDx = bpmem.lineptwidth.linesize / -12.0f;
 			}
 			OutputVertexData triangle[3];
 			CopyVertex(&triangle[0], v0, screenDx, screenDy, 0);
 			CopyVertex(&triangle[1], v1, screenDx, screenDy, 0);
 			CopyVertex(&triangle[2], v1, -screenDx, -screenDy, bpmem.lineptwidth.lineoff);
 			// ccw winding
 			Rasterizer::DrawTriangleFrontFace(&triangle[2], &triangle[1], &triangle[0]);
 			CopyVertex(&triangle[1], v0, -screenDx, -screenDy, bpmem.lineptwidth.lineoff);
 			Rasterizer::DrawTriangleFrontFace(&triangle[0], &triangle[1], &triangle[2]);
 		}
 	}
    bool CullTest(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2, bool &backface)
    {
@ -260,15 +397,15 @@ namespace Clipper
            return false;
        }
-        float x0 = v0->projectedPosition[0];
+        float x0 = v0->projectedPosition.x;
-        float x1 = v1->projectedPosition[0];
+        float x1 = v1->projectedPosition.x;
-        float x2 = v2->projectedPosition[0];
+        float x2 = v2->projectedPosition.x;
-        float y1 = v1->projectedPosition[1];
+        float y1 = v1->projectedPosition.y;
-        float y0 = v0->projectedPosition[1];
+        float y0 = v0->projectedPosition.y;
-        float y2 = v2->projectedPosition[1];
+        float y2 = v2->projectedPosition.y;
-        float w0 = v0->projectedPosition[3];
+        float w0 = v0->projectedPosition.w;
-        float w1 = v1->projectedPosition[3];
+        float w1 = v1->projectedPosition.w;
-        float w2 = v2->projectedPosition[3];
+        float w2 = v2->projectedPosition.w;
        float normalZDir = (x0*w2 - x2*w0)*y1 + (x2*y0 - x0*y2)*w1 + (y2*w0 - y0*w2)*x1; 
@ -291,13 +428,13 @@ namespace Clipper
    void PerspectiveDivide(OutputVertexData *vertex)
    {
-        float *projected = vertex->projectedPosition;
+        Vec4 &projected = vertex->projectedPosition;
-        float *screen = vertex->screenPosition;
+        Vec3 &screen = vertex->screenPosition;
-        float wInverse = 1.0f/projected[3];
+        float wInverse = 1.0f/projected.w;
-        screen[0] = projected[0] * wInverse * xfregs.viewport.wd + m_ViewOffset[0];
+        screen.x = projected.x * wInverse * xfregs.viewport.wd + m_ViewOffset[0];
-        screen[1] = projected[1] * wInverse * xfregs.viewport.ht + m_ViewOffset[1];
+        screen.y = projected.y * wInverse * xfregs.viewport.ht + m_ViewOffset[1];
-        screen[2] = projected[2] * wInverse + m_ViewOffset[2];
+        screen.z = projected.z * wInverse + m_ViewOffset[2];
    }
 }
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.h
@ -31,6 +31,7 @@ namespace Clipper
    void ProcessTriangle(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2);
 	void ProcessLine(OutputVertexData *v0, OutputVertexData *v1);
    bool CullTest(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2, bool &backface);
--- a/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.cpp
@ -49,36 +49,32 @@ void Init()
    }
 }
-bool SaveTexture(const char* filename, u32 texmap, int width, int height)
+void SaveTexture(const char* filename, u32 texmap, s32 mip)
 {
    u8 *data = new u8[width * height * 4];
    GetTextureBGRA(data, texmap, width, height);
    bool result = SaveTGA(filename, width, height, data);
    delete []data;
    return result;
 }
 void SaveTexture(const char* filename, u32 texmap)
 {
    FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
    u8 subTexmap = texmap & 3;
    TexImage0& ti0 = texUnit.texImage0[subTexmap];
-    SaveTexture(filename, texmap, ti0.width + 1, ti0.height + 1);
+	int width = ti0.width + 1;
 	int height = ti0.height + 1;
 	u8 *data = new u8[width * height * 4];
    GetTextureBGRA(data, texmap, mip, width, height);
    bool result = SaveTGA(filename, width, height, data);
    delete []data;
 }
-void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height)
+void GetTextureBGRA(u8 *dst, u32 texmap, s32 mip, int width, int height)
 {
    u8 sample[4];    
    for (int y = 0; y < height; y++)
        for (int x = 0; x < width; x++) {
-            TextureSampler::Sample((float)x, (float)y, 0, texmap, sample);
+            TextureSampler::SampleMip(x << 7, y << 7, mip, false, texmap, sample);
            // rgba to bgra
            *(dst++) = sample[2];
            *(dst++) = sample[1];
@ -87,13 +83,32 @@ void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height)
        }
 }
 s32 GetMaxTextureLod(u32 texmap)
 {
 	FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
    u8 subTexmap = texmap & 3;
 	u8 maxLod = texUnit.texMode1[subTexmap].max_lod;
 	u8 mip = maxLod >> 4;
 	u8 fract = maxLod & 0xf;
 	if(fract)
 		++mip;
 	return (s32)mip;
 }
 void DumpActiveTextures()
 {
    for (unsigned int stageNum = 0; stageNum < bpmem.genMode.numindstages; stageNum++)
    {
        u32 texmap = bpmem.tevindref.getTexMap(stageNum);
-        SaveTexture(StringFromFormat("%star%i_ind%i_map%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap).c_str(), texmap);     
+		s32 maxLod = GetMaxTextureLod(texmap);
 		for (s32 mip = 0; mip < maxLod; ++mip)
 		{
 			SaveTexture(StringFromFormat("%star%i_ind%i_map%i_mip%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap, mip).c_str(), texmap, mip);
 		}
    }
    for (unsigned int stageNum = 0; stageNum <= bpmem.genMode.numtevstages; stageNum++)
@ -104,7 +119,11 @@ void DumpActiveTextures()
        int texmap = order.getTexMap(stageOdd);
-        SaveTexture(StringFromFormat("%star%i_stage%i_map%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap).c_str(), texmap);           
+        s32 maxLod = GetMaxTextureLod(texmap);
 		for (s32 mip = 0; mip < maxLod; ++mip)
 		{
 			SaveTexture(StringFromFormat("%star%i_stage%i_map%i_mip%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap, mip).c_str(), texmap, mip);
 		}
    }
 }
--- a/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.h
@ -22,7 +22,7 @@ namespace DebugUtil
 {
    void Init();
-    void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height);
+    void GetTextureBGRA(u8 *dst, u32 texmap, s32 mip, int width, int height);
    void DumpActiveTextures();
--- a/Source/Plugins/Plugin_VideoSoftware/Src/HwRasterizer.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/HwRasterizer.cpp
@ -155,7 +155,7 @@ namespace HwRasterizer
        int width = texImage0.width;
        int height = texImage0.height;
-        DebugUtil::GetTextureBGRA(temp, 0, width, height);
+        DebugUtil::GetTextureBGRA(temp, 0, 0, width, height);
        glGenTextures(1, (GLuint *)&texture);
 		glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texture);
--- a/Source/Plugins/Plugin_VideoSoftware/Src/NativeVertexFormat.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/NativeVertexFormat.h
@ -18,6 +18,8 @@
 #ifndef _NATIVEVERTEXFORMAT_H
 #define _NATIVEVERTEXFORMAT_H
 #include "../../Plugin_VideoDX9/Src/Vec3.h"
 #ifdef WIN32
 #define LOADERDECL __cdecl
 #else
@ -26,25 +28,33 @@
 typedef void (LOADERDECL *TPipelineFunction)();
 struct Vec4
 {
 	float x;
 	float y;
 	float z;
 	float w;
 };
 struct InputVertexData
 {
    u8 posMtx;
    u8 texMtx[8];
-    float position[4];    
+    Vec3 position;    
-    float normal[3][3];
+    Vec3 normal[3];
    u8 color[2][4];
    float texCoords[8][2];
 };
 struct OutputVertexData
 {
-    float mvPosition[3];
+    Vec3 mvPosition;
-    float projectedPosition[4];
+    Vec4 projectedPosition;
-    float screenPosition[3];
+    Vec3 screenPosition;
-    float normal[3][3];
+    Vec3 normal[3];
    u8 color[2][4];
-    float texCoords[8][3];
+    Vec3 texCoords[8];
    void Lerp(float t, OutputVertexData *a, OutputVertexData *b)
    {
@ -52,17 +62,16 @@ struct OutputVertexData
        #define LINTERP_INT(T, OUT, IN) (OUT) + (((IN - OUT) * T) >> 8)
-        for (int i = 0; i < 3; ++i)
+        mvPosition = LINTERP(t, a->mvPosition, b->mvPosition);
            mvPosition[i] = LINTERP(t, a->mvPosition[i], b->mvPosition[i]);
-        for (int i = 0; i < 4; ++i)
+        projectedPosition.x = LINTERP(t, a->projectedPosition.x, b->projectedPosition.x);
-            projectedPosition[i] = LINTERP(t, a->projectedPosition[i], b->projectedPosition[i]);
+		projectedPosition.y = LINTERP(t, a->projectedPosition.y, b->projectedPosition.y);
 		projectedPosition.z = LINTERP(t, a->projectedPosition.z, b->projectedPosition.z);
 		projectedPosition.w = LINTERP(t, a->projectedPosition.w, b->projectedPosition.w);
        for (int i = 0; i < 3; ++i)
        {
-            normal[i][0] = LINTERP(t, a->normal[i][0], b->normal[i][0]);
+            normal[i] = LINTERP(t, a->normal[i], b->normal[i]);
            normal[i][1] = LINTERP(t, a->normal[i][1], b->normal[i][1]);
            normal[i][2] = LINTERP(t, a->normal[i][2], b->normal[i][2]);
        }
        u16 t_int = (u16)(t * 256);
@ -74,9 +83,7 @@ struct OutputVertexData
        for (int i = 0; i < 8; ++i)
        {
-            texCoords[i][0] = LINTERP(t, a->texCoords[i][0], b->texCoords[i][0]);
+            texCoords[i] = LINTERP(t, a->texCoords[i], b->texCoords[i]);
            texCoords[i][1] = LINTERP(t, a->texCoords[i][1], b->texCoords[i][1]);
            texCoords[i][2] = LINTERP(t, a->texCoords[i][2], b->texCoords[i][2]);
        }
        #undef LINTERP
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp
@ -27,8 +27,20 @@
 #include "VideoConfig.h"
-#define BLOCK_SIZE 8
+#define BLOCK_SIZE 2
 #define CLAMP(x, a, b) (x>b)?b:(x<a)?a:x
 // returns approximation of log2(f) in s28.4
 // results are close enough to use for LOD
 static inline s32 FixedLog2(float f)
 {
 	u32 *x = (u32*)&f;
 	s32 logInt = ((*x & 0x7F800000) >> 19) - 2032; // integer part
 	s32 logFract = (*x & 0x007fffff) >> 19; // approximate fractional part
 	return logInt + logFract;
 }
 namespace Rasterizer
 {
@ -43,6 +55,7 @@ s32 scissorRight = 0;
 s32 scissorBottom = 0;
 Tev tev;
 RasterBlock rasterBlock;
 void Init()
 {
@ -91,15 +104,15 @@ void SetTevReg(int reg, int comp, bool konst, s16 color)
    tev.SetRegColor(reg, comp, konst, color);
 }
-inline void Draw(s32 x, s32 y)
+inline void Draw(s32 x, s32 y, s32 xi, s32 yi)
 {
    INCSTAT(stats.thisFrame.rasterizedPixels);
 	float zFloat = 1.0f + ZSlope.GetValue(x, y);
-    if(zFloat < 0|| zFloat > 1)
+	if (zFloat < 0.0f || zFloat > 1.0f)
 		return;
-    u32 z = (u32)(zFloat * 0x00ffffff);
+	s32 z = (s32)(zFloat * 0x00ffffff);
 	if (bpmem.zcontrol.zcomploc && bpmem.zmode.testenable)
 	{
@ -108,34 +121,39 @@ inline void Draw(s32 x, s32 y)
 			return;
 	}
-    float invW = 1.0f / WSlope.GetValue(x, y);
+	RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
 	float invW = pixel.InvW;
 	tev.Position[0] = x;
 	tev.Position[1] = y;
 	tev.Position[2] = z;
-    for(unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
+	//  colors
 	for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
 	{
 		for(int comp = 0; comp < 4; comp++)
 			tev.Color[i][comp] = (u8)ColorSlopes[i][comp].GetValue(x, y);
 	}
-    for(unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
+	// tex coords
 	for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
 	{
-        if (xfregs.texMtxInfo[i].projection)
+		// multiply by 128 because TEV stores stores UVs as s17.7
-        {
+		tev.Uv[i].s = (s32)(pixel.Uv[i][0] * 128);
-            float q = TexSlopes[i][2].GetValue(x, y) * invW;
+		tev.Uv[i].t = (s32)(pixel.Uv[i][1] * 128);
            float invQ = invW / q;
            tev.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * invQ * (bpmem.texcoords[i].s.scale_minus_1 + 1);
            tev.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * invQ * (bpmem.texcoords[i].t.scale_minus_1 + 1);
            tev.Lod[i] = 0;
 	}
-        else
+
 	for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
 	{
-            tev.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * invW * (bpmem.texcoords[i].s.scale_minus_1 + 1);
+		tev.IndirectLod[i] = rasterBlock.IndirectLod[i];
-            tev.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * invW * (bpmem.texcoords[i].t.scale_minus_1 + 1);
+		tev.IndirectLinear[i] = rasterBlock.IndirectLinear[i];
            tev.Lod[i] = 0;
 	}
 	for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
 	{
 		tev.TextureLod[i] = rasterBlock.TextureLod[i];
 		tev.TextureLinear[i] = rasterBlock.TextureLinear[i];
 	}
    tev.Draw();
@ -155,6 +173,109 @@ void InitSlope(Slope *slope, float f1, float f2, float f3, float DX31, float DX1
    slope->y0 = Y1;
 }
 inline void CalculateLOD(s32 &lod, bool &linear, u32 texmap, u32 texcoord)
 {
 	FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
 	u8 subTexmap = texmap & 3;
 	// LOD calculation requires data from the texture mode for bias, etc.
 	// it does not seem to use the actual texture size
 	TexMode0& tm0 = texUnit.texMode0[subTexmap];
 	TexMode1& tm1 = texUnit.texMode1[subTexmap];
 	float sDelta, tDelta;
 	if (tm0.diag_lod)
 	{
 		float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
 		float *uv1 = rasterBlock.Pixel[1][1].Uv[texcoord];
 		sDelta = abs(uv0[0] - uv1[0]);
 		tDelta = abs(uv0[1] - uv1[1]);
 	}
 	else
 	{
 		float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
 		float *uv1 = rasterBlock.Pixel[1][0].Uv[texcoord];
 		float *uv2 = rasterBlock.Pixel[0][1].Uv[texcoord];
 		sDelta = max(abs(uv0[0] - uv1[0]), abs(uv0[0] - uv2[0]));
 		tDelta = max(abs(uv0[1] - uv1[1]), abs(uv0[1] - uv2[1]));
 	}
 	// get LOD in s28.4
 	lod = FixedLog2(max(sDelta, tDelta));
 	// bias is s2.5
 	int bias = tm0.lod_bias;
 	bias >>= 1;
 	lod += bias;
 	linear = (lod >= 0 && (tm0.min_filter & 4) || lod < 0 && tm0.mag_filter);
 	// order of checks matters
 	// should be:
 	// if lod > max then max
 	// else if lod < min then min
 	lod = CLAMP(lod, (s32)tm1.min_lod, (s32)tm1.max_lod);
 }
 void BuildBlock(s32 blockX, s32 blockY)
 {
 	for (s32 yi = 0; yi < BLOCK_SIZE; yi++)
 	{
 		for (s32 xi = 0; xi < BLOCK_SIZE; xi++)
 		{
 			RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
 			s32 x = xi + blockX;
 			s32 y = yi + blockY;			
 			float invW = 1.0f / WSlope.GetValue(x, y);
 			pixel.InvW = invW;
 			// tex coords
 			for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
 			{
 				float projection;
 				if (xfregs.texMtxInfo[i].projection)
 				{
 					float q = TexSlopes[i][2].GetValue(x, y) * invW;
 					projection = invW / q;
 				}
 				else
 					projection = invW;
 				pixel.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * projection;
 				pixel.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * projection;
 			}
 		}
 	}
 	u32 indref = bpmem.tevindref.hex;
 	for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
 	{
 		u32 texmap = indref & 3;
 		indref >>= 3;
 		u32 texcoord = indref & 3;
 		indref >>= 3;
 		CalculateLOD(rasterBlock.IndirectLod[i], rasterBlock.IndirectLinear[i], texmap, texcoord);
 	}
 	for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
 	{
 		int stageOdd = i&1;
 		TwoTevStageOrders &order = bpmem.tevorders[i >> 1];
 		if(order.getEnable(stageOdd))
 		{
 			u32 texmap = order.getTexMap(stageOdd);
 			u32 texcoord = order.getTexCoord(stageOdd);
 			CalculateLOD(rasterBlock.TextureLod[i], rasterBlock.TextureLinear[i], texmap, texcoord);
 		}
 	}
 }
 void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2)
 {
    INCSTAT(stats.thisFrame.numTrianglesDrawn);
@ -217,7 +338,7 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
    float fltdy12 = flty1 - v1->screenPosition[1];
    float fltdy31 = v2->screenPosition[1] - flty1;
-    float w[3] = { 1.0f / v0->projectedPosition[3], 1.0f / v1->projectedPosition[3], 1.0f / v2->projectedPosition[3] };
+    float w[3] = { 1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w };
    InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31, fltx1, flty1);
    InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, fltdx12, fltdy12, fltdy31, fltx1, flty1);
@ -281,14 +402,16 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
            // Skip block when outside an edge
            if(a == 0x0 || b == 0x0 || c == 0x0) continue;
 			BuildBlock(x, y);
            // Accept whole block when totally covered
            if(a == 0xF && b == 0xF && c == 0xF)
            {
                for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
                {
-                    for(s32 ix = x; ix < x + BLOCK_SIZE; ix++)
+                    for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
                    {                        
-                        Draw(ix, iy + y);
+                        Draw(x + ix, y + iy, ix, iy);
                    }
                }
            }
@ -298,17 +421,17 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
                s32 CY2 = C2 + DX23 * y0 - DY23 * x0;
                s32 CY3 = C3 + DX31 * y0 - DY31 * x0;
-                for(s32 iy = y; iy < y + BLOCK_SIZE; iy++)
+                for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
                {
                    s32 CX1 = CY1;
                    s32 CX2 = CY2;
                    s32 CX3 = CY3;
-                    for(s32 ix = x; ix < x + BLOCK_SIZE; ix++)
+                    for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
                    {
                        if(CX1 > 0 && CX2 > 0 && CX3 > 0)
                        {
-                            Draw(ix, iy);
+                            Draw(x + ix, y + iy, ix, iy);
                        }
                        CX1 -= FDY12;
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.h
@ -40,6 +40,21 @@ namespace Rasterizer
        float GetValue(s32 x, s32 y) { return f0 + (dfdx * (x - x0)) + (dfdy * (y - y0)); }
    };
 	struct RasterBlockPixel
 	{
 		float InvW;
 		float Uv[8][2];
 	};
 	struct RasterBlock
 	{
 		RasterBlockPixel Pixel[2][2];
 		s32 IndirectLod[4];
 		bool IndirectLinear[4];
 		s32 TextureLod[16];
 		bool TextureLinear[16];
 	};
 }
 #endif
--- a/Source/Plugins/Plugin_VideoSoftware/Src/SetupUnit.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/SetupUnit.cpp
@ -134,10 +134,38 @@ void SetupUnit::SetupTriFan()
 }
 void SetupUnit::SetupLine()
-{}
+{
 	if (m_VertexCounter < 1)
    {
        m_VertexCounter++;
        m_VertWritePointer = m_VertPointer[m_VertexCounter];
        return;
    }
    Clipper::ProcessLine(m_VertPointer[0], m_VertPointer[1]);
    m_VertexCounter = 0;
    m_VertWritePointer = m_VertPointer[0];
 }
 void SetupUnit::SetupLineStrip()
-{}
+{
 	if (m_VertexCounter < 1)
    {
        m_VertexCounter++;
 		m_VertWritePointer = m_VertPointer[m_VertexCounter];
        return;
    }
 	m_VertexCounter++;
    Clipper::ProcessLine(m_VertPointer[0], m_VertPointer[1]);
 	m_VertWritePointer = m_VertPointer[0];
 	m_VertPointer[0] = m_VertPointer[1];
 	m_VertPointer[1] = &m_Vertices[m_VertexCounter & 1];
 }
 void SetupUnit::SetupPoint()
 {}
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp
@ -439,34 +439,33 @@ static bool AlphaTest(int alpha)
    return true;
 }
-inline float WrapIndirectCoord(float coord, int wrapMode)
+inline s32 WrapIndirectCoord(s32 coord, int wrapMode)
 {
    switch (wrapMode) {
        case ITW_OFF:
            return coord;
        case ITW_256:
-            return fmod(coord, 256);
+            return (coord % (256 << 7));
        case ITW_128:
-            return fmod(coord, 128);
+            return (coord % (128 << 7));
        case ITW_64:
-            return fmod(coord, 64);
+            return (coord % (64 << 7));
        case ITW_32:
-            return fmod(coord, 32);
+            return (coord % (32 << 7));
        case ITW_16:
-            return fmod(coord, 16);
+            return (coord % (16 << 7));
        case ITW_0:
            return 0;
    }
    return 0;
 }
-void Tev::Indirect(unsigned int stageNum, float s, float t)
+void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
 {
    TevStageIndirect &indirect = bpmem.tevind[stageNum];
    u8 *indmap = IndirectTex[indirect.bt];
-
+    s32 indcoord[3];
    float indcoord[3];
    // alpha bump select
    switch (indirect.bs) {
@ -494,32 +493,32 @@ void Tev::Indirect(unsigned int stageNum, float s, float t)
    // format
    switch(indirect.fmt) {
        case ITF_8:
-            indcoord[0] = (float)indmap[ALP_C] + bias[0];
+            indcoord[0] = indmap[ALP_C] + bias[0];
-            indcoord[1] = (float)indmap[BLU_C] + bias[1];
+            indcoord[1] = indmap[BLU_C] + bias[1];
-            indcoord[2] = (float)indmap[GRN_C] + bias[2];
+            indcoord[2] = indmap[GRN_C] + bias[2];
            AlphaBump = AlphaBump & 0xf8;
            break;
        case ITF_5:
-            indcoord[0] = (float)(indmap[ALP_C] & 0x1f) + bias[0];
+            indcoord[0] = (indmap[ALP_C] & 0x1f) + bias[0];
-            indcoord[1] = (float)(indmap[BLU_C] & 0x1f) + bias[1];
+            indcoord[1] = (indmap[BLU_C] & 0x1f) + bias[1];
-            indcoord[2] = (float)(indmap[GRN_C] & 0x1f) + bias[2];
+            indcoord[2] = (indmap[GRN_C] & 0x1f) + bias[2];
            AlphaBump = AlphaBump & 0xe0;
            break;
        case ITF_4:
-            indcoord[0] = (float)(indmap[ALP_C] & 0x0f) + bias[0];
+            indcoord[0] = (indmap[ALP_C] & 0x0f) + bias[0];
-            indcoord[1] = (float)(indmap[BLU_C] & 0x0f) + bias[1];
+            indcoord[1] = (indmap[BLU_C] & 0x0f) + bias[1];
-            indcoord[2] = (float)(indmap[GRN_C] & 0x0f) + bias[2];
+            indcoord[2] = (indmap[GRN_C] & 0x0f) + bias[2];
            AlphaBump = AlphaBump & 0xf0;
            break;
        case ITF_3:
-            indcoord[0] = (float)(indmap[ALP_C] & 0x07) + bias[0];
+            indcoord[0] = (indmap[ALP_C] & 0x07) + bias[0];
-            indcoord[1] = (float)(indmap[BLU_C] & 0x07) + bias[1];
+            indcoord[1] = (indmap[BLU_C] & 0x07) + bias[1];
-            indcoord[2] = (float)(indmap[GRN_C] & 0x07) + bias[2];
+            indcoord[2] = (indmap[GRN_C] & 0x07) + bias[2];
            AlphaBump = AlphaBump & 0xf8;
            break;
    }
-    float indtevtrans[2] = { 0,0 };
+    s64 indtevtrans[2] = { 0,0 };
    // matrix multiply
    int indmtxid = indirect.mid & 3;
@ -529,39 +528,40 @@ void Tev::Indirect(unsigned int stageNum, float s, float t)
        int scale = ((u32)indmtx.col0.s0 << 0) |
 	                ((u32)indmtx.col1.s1 << 2) |
 	                ((u32)indmtx.col2.s2 << 4);
-        float fscale = 0.0f;
+
 		int shift;
        switch (indirect.mid & 12) {
            case 0:   
-                fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f;
+				shift = 3 + (17 - scale);
                indtevtrans[0] = indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2];
                indtevtrans[1] = indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2];
                break;
            case 4: // s matrix
-                fscale = powf(2.0f, (float)(scale - 17)) / 256;
+				shift = 8 + (17 - scale);
                indtevtrans[0] = s * indcoord[0];
                indtevtrans[1] = t * indcoord[0];
                break;
            case 8: // t matrix
-                fscale = powf(2.0f, (float)(scale - 17)) / 256;
+				shift = 8 + (17 - scale);
                indtevtrans[0] = s * indcoord[1];
                indtevtrans[1] = t * indcoord[1];
                break;
        }
-        indtevtrans[0] *= fscale;
+		indtevtrans[0] = shift >= 0 ? indtevtrans[0] >> shift : indtevtrans[0] << -shift;
-        indtevtrans[1] *= fscale;
+		indtevtrans[1] = shift >= 0 ? indtevtrans[1] >> shift : indtevtrans[1] << -shift;
    }
 	if (indirect.fb_addprev)
    {
-        TexCoord[0] += WrapIndirectCoord(s, indirect.sw) + indtevtrans[0];
+        TexCoord.s += (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
-        TexCoord[1] += WrapIndirectCoord(t, indirect.tw) + indtevtrans[1];
+        TexCoord.t += (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
    }
    else
    {
-        TexCoord[0] = WrapIndirectCoord(s, indirect.sw) + indtevtrans[0];
+        TexCoord.s = (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
-        TexCoord[1] = WrapIndirectCoord(t, indirect.tw) + indtevtrans[1];
+        TexCoord.t = (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
    }
 }
@ -580,10 +580,12 @@ void Tev::Draw()
        u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum);
        u32 texmap = bpmem.tevindref.getTexMap(stageNum);
-        float scaleS = bpmem.texscale[stageNum2].getScaleS(stageOdd);
+		const TEXSCALE& texscale = bpmem.texscale[stageNum2];
-        float scaleT = bpmem.texscale[stageNum2].getScaleT(stageOdd);
+		s32 scaleS = stageOdd ? texscale.ss1:texscale.ss0;
        s32 scaleT = stageOdd ? texscale.ts1:texscale.ts0;
-        TextureSampler::Sample(Uv[texcoordSel][0] * scaleS, Uv[texcoordSel][1] * scaleT, Lod[texcoordSel], texmap, IndirectTex[stageNum]);
+        TextureSampler::Sample(Uv[texcoordSel].s >> scaleS, Uv[texcoordSel].t >> scaleT,
 			IndirectLod[stageNum], IndirectLinear[stageNum], texmap, IndirectTex[stageNum]);
 #ifdef _DEBUG
        if (g_Config.bDumpTevStages)
@ -608,14 +610,14 @@ void Tev::Draw()
        int texcoordSel = order.getTexCoord(stageOdd);
        int texmap = order.getTexMap(stageOdd);
-        Indirect(stageNum, Uv[texcoordSel][0], Uv[texcoordSel][1]);
+        Indirect(stageNum, Uv[texcoordSel].s, Uv[texcoordSel].t);
        // sample texture
        if (order.getEnable(stageOdd))
        {
            u8 texel[4];
-            TextureSampler::Sample(TexCoord[0], TexCoord[1], Lod[texcoordSel], texmap, texel);
+			TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum], TextureLinear[stageNum], texmap, texel);
            int swaptable = ac.tswap * 2;            
--- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.h
@ -22,6 +22,19 @@
 class Tev
 { 
 	struct InputRegType {
        unsigned a : 8;
        unsigned b : 8;
        unsigned c : 8;
        signed   d : 11;
    };
 	struct TextureCoordinateType
 	{
 		signed s : 24;
 		signed t : 24;
 	};
    // color order: RGBA
    s16 Reg[4][4];    
    s16 KonstantColors[4][4];
@ -32,7 +45,7 @@ class Tev
    s16 Zero16[4];
    u8 AlphaBump;
    u8 IndirectTex[4][4];
-    float TexCoord[2];
+	TextureCoordinateType TexCoord;
    s16 *m_ColorInputLUT[16][3];
    s16 *m_AlphaInputLUT[8];        // values must point to RGBA color
@ -49,20 +62,16 @@ class Tev
    void DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac);
    void DrawAlphaCompare(TevStageCombiner::AlphaCombiner &ac);
-    void Indirect(unsigned int stageNum, float s, float t);    
+    void Indirect(unsigned int stageNum, s32 s, s32 t);
    struct InputRegType {
        unsigned a : 8;
        unsigned b : 8;
        unsigned c : 8;
        signed   d : 11;
    };
 public:
 	s32 Position[3];
    u8 Color[2][4];
-    float Uv[8][2];
+    TextureCoordinateType Uv[8];
-    float Lod[8];
+    s32 IndirectLod[4];
 	bool IndirectLinear[4];
 	s32 TextureLod[16];
 	bool TextureLinear[16];
    void Init();
--- a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp
@ -23,29 +23,11 @@
 #include <cmath>
 #define ALLOW_MIPMAP 1
 namespace TextureSampler
 {
 inline int iround(float x)
 {
    int t;
 #if defined(_WIN32) && !defined(_M_X64)
    __asm
    {
        fld  x
        fistp t
    }
 #else
 	t = (int)x;
 	if((x - t) >= 0.5)
 		return t + 1;
 #endif
    return t;
 }
 inline void WrapCoord(int &coord, int wrapMode, int imageSize)
 {
    switch (wrapMode)
@ -85,7 +67,51 @@ inline void AddTexel(u8 *inTexel, u32 *outTexel, u32 fract)
    outTexel[3] += inTexel[3] * fract;
 }
-void Sample(float s, float t, float lod, u8 texmap, u8 *sample)
+void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8 *sample)
 {
    int baseMip = 0;
 	bool mipLinear = false;
 #if (ALLOW_MIPMAP)
 	FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
    TexMode0& tm0 = texUnit.texMode0[texmap & 3];
 	s32 lodFract = lod & 0xf;
 	if (lod > 0 && tm0.min_filter & 3)
 	{
 		// use mipmap
 		baseMip = lod >> 4;
 		mipLinear = (lodFract && tm0.min_filter & 2);
 		// if using nearest mip filter and lodFract >= 0.5 round up to next mip
 		baseMip += (lodFract >> 3) & (tm0.min_filter & 1);
 	}
 	if (mipLinear)
 	{
 		u8 sampledTex[4];
        u32 texel[4];
 		SampleMip(s, t, baseMip, linear, texmap, sampledTex);
 		SetTexel(sampledTex, texel, (16 - lodFract));
 		SampleMip(s, t, baseMip + 1, linear, texmap, sampledTex);
 		AddTexel(sampledTex, texel, lodFract);
 		sample[0] = (u8)(texel[0] >> 4);
        sample[1] = (u8)(texel[1] >> 4);
        sample[2] = (u8)(texel[2] >> 4);
        sample[3] = (u8)(texel[3] >> 4);
 	}
 	else
 #endif
 	{
 		SampleMip(s, t, baseMip, linear, texmap, sample);
 	}	
 }
 void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
 {
 	FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
    u8 subTexmap = texmap & 3;
@ -97,59 +123,85 @@ void Sample(float s, float t, float lod, u8 texmap, u8 *sample)
    u32 imageBase = texUnit.texImage3[subTexmap].image_base << 5;    
    u8 *imageSrc = g_VideoInitialize.pGetMemoryPointer(imageBase);
-    bool linear = false;
+	int imageWidth = ti0.width;
-    if ((lod > 0 && tm0.min_filter > 4) || (lod <= 0 && tm0.mag_filter))
+	int imageHeight = ti0.height;
-        linear = true;
+
 	int tlutAddress = texTlut.tmem_offset << 9;
 	// reduce sample location and texture size to mip level
 	// move texture pointer to mip location
 	if (mip)
 	{
 		int mipWidth = imageWidth + 1;
 		int mipHeight = imageHeight + 1;
 		int fmtWidth = TexDecoder_GetBlockWidthInTexels(ti0.format);
 		int fmtHeight = TexDecoder_GetBlockHeightInTexels(ti0.format);
 		int fmtDepth = TexDecoder_GetTexelSizeInNibbles(ti0.format);
 		imageWidth >>= mip;
 		imageHeight >>= mip;
 		s >>= mip;
 		t >>= mip;
 		while (mip)
 		{
 			mipWidth = max(mipWidth, fmtWidth);
 			mipHeight = max(mipHeight, fmtHeight);
 			u32 size = (mipWidth * mipHeight * fmtDepth) >> 1;
 			imageSrc += size;
 			mipWidth >>= 1;
 			mipHeight >>= 1;
 			mip--;
 		}
 	}
 	// integer part of sample location
 	int imageS = s >> 7;
 	int imageT = t >> 7;
    if (linear)
    {
-        s32 s256 = s32((s - 0.5f) * 256);
+        // linear sampling
        s32 t256 = s32((t- 0.5f) * 256);
        int imageS = s256 >> 8;
 		int imageSPlus1 = imageS + 1;
-        u32 fractS = s256 & 0xff;
+        int fractS = s & 0x7f;
        fractS += fractS >> 7;
        int imageT = t256 >> 8;
        int imageTPlus1 = imageT + 1;
-        u32 fractT = t256 & 0xff;
+        int fractT = t & 0x7f;
        fractT += fractT >> 7;
        u8 sampledTex[4];
        u32 texel[4];
-        WrapCoord(imageS, tm0.wrap_s, ti0.width);
+        WrapCoord(imageS, tm0.wrap_s, imageWidth);
-        WrapCoord(imageT, tm0.wrap_t, ti0.height);
+        WrapCoord(imageT, tm0.wrap_t, imageHeight);
-        WrapCoord(imageSPlus1, tm0.wrap_s, ti0.width);
+        WrapCoord(imageSPlus1, tm0.wrap_s, imageWidth);
-        WrapCoord(imageTPlus1, tm0.wrap_t, ti0.height);
+        WrapCoord(imageTPlus1, tm0.wrap_t, imageHeight);
-        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
+        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
-        SetTexel(sampledTex, texel, (256 - fractS) * (256 - fractT));
+        SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
-        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
+        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
-        AddTexel(sampledTex, texel, (fractS) * (256 - fractT));
+        AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
-        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
+        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
-        AddTexel(sampledTex, texel, (256 - fractS) * (fractT));
+        AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
-        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);
+        TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);
        AddTexel(sampledTex, texel, (fractS) * (fractT));
-        sample[0] = (u8)(texel[0] >> 16);
+        sample[0] = (u8)(texel[0] >> 14);
-        sample[1] = (u8)(texel[1] >> 16);
+        sample[1] = (u8)(texel[1] >> 14);
-        sample[2] = (u8)(texel[2] >> 16);
+        sample[2] = (u8)(texel[2] >> 14);
-        sample[3] = (u8)(texel[3] >> 16);
+        sample[3] = (u8)(texel[3] >> 14);
    }
    else
    {
-        int imageS = int(s);
+        // nearest neighbor sampling
-        int imageT = int(t);
+		WrapCoord(imageS, tm0.wrap_s, imageWidth);
        WrapCoord(imageT, tm0.wrap_t, imageHeight);
-        WrapCoord(imageS, tm0.wrap_s, ti0.width);
+        TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format);   
        WrapCoord(imageT, tm0.wrap_t, ti0.height);
        TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format);   
    }
 }
--- a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.h
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.h
@ -23,7 +23,9 @@
 namespace TextureSampler
 {
-    void Sample(float s, float t, float lod, u8 texmap, u8 *sample);
+	void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8 *sample);
 	void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample);
 }
--- a/Source/Plugins/Plugin_VideoSoftware/Src/TransformUnit.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/TransformUnit.cpp
@ -22,6 +22,7 @@
 #include "TransformUnit.h"
 #include "XFMemLoader.h"
 #include "CPMemLoader.h"
 #include "BPMemLoader.h"
 #include "NativeVertexFormat.h"
 #include "../../Plugin_VideoDX9/Src/Vec3.h"
@ -30,48 +31,48 @@
 namespace TransformUnit
 {
-void MultiplyVec2Mat24(const float *vec, const float *mat, float *result)
+void MultiplyVec2Mat24(const Vec3 &vec, const float *mat, Vec3 &result)
 {
-    result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] + mat[3];
+    result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] + mat[3];
-    result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] + mat[7];
+    result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] + mat[7];
 }
-void MultiplyVec2Mat34(const float *vec, const float *mat, float *result)
+void MultiplyVec2Mat34(const Vec3 &vec, const float *mat, Vec3 &result)
 {
-    result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] + mat[3];
+    result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] + mat[3];
-    result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] + mat[7];
+    result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] + mat[7];
-    result[2] = mat[8] * vec[0] + mat[9] * vec[1] + mat[10] + mat[11];
+    result.z = mat[8] * vec.x + mat[9] * vec.y + mat[10] + mat[11];
 }
-void MultiplyVec3Mat33(const float *vec, const float *mat, float *result)
+void MultiplyVec3Mat33(const Vec3 &vec, const float *mat, Vec3 &result)
 {
-    result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2];
+    result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] * vec.z;
-    result[1] = mat[3] * vec[0] + mat[4] * vec[1] + mat[5] * vec[2];
+    result.y = mat[3] * vec.x + mat[4] * vec.y + mat[5] * vec.z;
-    result[2] = mat[6] * vec[0] + mat[7] * vec[1] + mat[8] * vec[2];
+    result.z = mat[6] * vec.x + mat[7] * vec.y + mat[8] * vec.z;
 }
-void MultiplyVec3Mat34(const float *vec, const float *mat, float *result)
+void MultiplyVec3Mat34(const Vec3 &vec, const float *mat, Vec3 &result)
 {
-    result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2] + mat[3];
+    result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] * vec.z + mat[3];
-    result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] * vec[2] + mat[7];
+    result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] * vec.z + mat[7];
-    result[2] = mat[8] * vec[0] + mat[9] * vec[1] + mat[10] * vec[2] + mat[11];
+    result.z = mat[8] * vec.x + mat[9] * vec.y + mat[10] * vec.z + mat[11];
 }
-void MultipleVec3Perspective(const float *vec, const float *proj, float *result)
+void MultipleVec3Perspective(const Vec3 &vec, const float *proj, Vec4 &result)
 {
-    result[0] = proj[0] * vec[0] + proj[1] * vec[2];
+    result.x = proj[0] * vec.x + proj[1] * vec.z;
-    result[1] = proj[2] * vec[1] + proj[3] * vec[2];
+    result.y = proj[2] * vec.y + proj[3] * vec.z;
-    //result[2] = (proj[4] * vec[2] + proj[5]);
+    //result.z = (proj[4] * vec.z + proj[5]);
-    result[2] = (proj[4] * vec[2] + proj[5]) * (1.0f - (float)1e-7);
+    result.z = (proj[4] * vec.z + proj[5]) * (1.0f - (float)1e-7);
-    result[3] = -vec[2];
+    result.w = -vec.z;
 }
-void MultipleVec3Ortho(const float *vec, const float *proj, float *result)
+void MultipleVec3Ortho(const Vec3 &vec, const float *proj, Vec4 &result)
 {
-    result[0] = proj[0] * vec[0] + proj[1];
+    result.x = proj[0] * vec.x + proj[1];
-    result[1] = proj[2] * vec[1] + proj[3];
+    result.y = proj[2] * vec.y + proj[3];
-    result[2] = proj[4] * vec[2] + proj[5];
+    result.z = proj[4] * vec.z + proj[5];
-    result[3] = 1;
+    result.w = 1;
 }
 void TransformPosition(const InputVertexData *src, OutputVertexData *dst)
@ -98,55 +99,53 @@ void TransformNormal(const InputVertexData *src, bool nbt, OutputVertexData *dst
        MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]);
        MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]);
        MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]);
-        Vec3 *norm0 = (Vec3*)dst->normal[0];
+        dst->normal[0].normalize();
        norm0->normalize();
    }
    else
    {
        MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]);
-        Vec3 *norm0 = (Vec3*)dst->normal[0];
+        dst->normal[0].normalize();
        norm0->normalize();
    }    
 }
 inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bool specialCase, const InputVertexData *srcVertex, OutputVertexData *dstVertex)
 {
-    const float *src;
+    const Vec3 *src;
    switch (texinfo.sourcerow)
    {
        case XF_SRCGEOM_INROW:
-            src = srcVertex->position;
+            src = &srcVertex->position;
            break;
        case XF_SRCNORMAL_INROW:
-            src = srcVertex->normal[0];
+            src = &srcVertex->normal[0];
            break;
        case XF_SRCBINORMAL_T_INROW:
-            src = srcVertex->normal[1];
+            src = &srcVertex->normal[1];
            break;
        case XF_SRCBINORMAL_B_INROW:
-            src = srcVertex->normal[2];
+            src = &srcVertex->normal[2];
            break;
        default:
            _assert_(texinfo.sourcerow >= XF_SRCTEX0_INROW && texinfo.sourcerow <= XF_SRCTEX7_INROW);
-            src = srcVertex->texCoords[texinfo.sourcerow - XF_SRCTEX0_INROW];
+            src = (Vec3*)srcVertex->texCoords[texinfo.sourcerow - XF_SRCTEX0_INROW];
            break;
    }
    const float *mat = (const float*)&xfregs.posMatrices[srcVertex->texMtx[coordNum] * 4];
-    float *dst = dstVertex->texCoords[coordNum];
+    Vec3 *dst = &dstVertex->texCoords[coordNum];
    if (texinfo.inputform == XF_TEXINPUT_AB11)
    {
-        MultiplyVec2Mat34(src, mat, dst); 
+        MultiplyVec2Mat34(*src, mat, *dst); 
    }
    else
    {
-        MultiplyVec3Mat34(src, mat, dst); 
+        MultiplyVec3Mat34(*src, mat, *dst); 
    }
    if (xfregs.dualTexTrans)
    {
-        float tempCoord[3];
+        Vec3 tempCoord;
        // normalize
        const PostMtxInfo &postInfo = xfregs.postMtxInfo[coordNum];
@ -157,12 +156,12 @@ inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bo
 			// no normalization
 			// q of input is 1
 			// q of output is unknown
-			tempCoord[0] = dst[0];
+			tempCoord.x = dst->x;
-			tempCoord[1] = dst[1];
+			tempCoord.y = dst->y;
-			dst[0] = postMat[0] * tempCoord[0] + postMat[1] * tempCoord[1] + postMat[2] + postMat[3];
+			dst->x = postMat[0] * tempCoord.x + postMat[1] * tempCoord.y + postMat[2] + postMat[3];
-			dst[1] = postMat[4] * tempCoord[0] + postMat[5] * tempCoord[1] + postMat[6] + postMat[7];
+			dst->y = postMat[4] * tempCoord.x + postMat[5] * tempCoord.y + postMat[6] + postMat[7];
-			dst[2] = 0.0f;
+			dst->z = 1.0f;
 		}
 		else
 		{		
@ -170,18 +169,14 @@ inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bo
 			{
 				float length = sqrtf(dst[0] * dst[0] + dst[1] * dst[1] + dst[2] * dst[2]);
 				float invL = 1.0f / length;
-				tempCoord[0] = invL * dst[0];
+				tempCoord = *dst * invL;
 				tempCoord[1] = invL * dst[1];
 				tempCoord[2] = invL * dst[2];
 			}
 			else
 			{
-				tempCoord[0] = dst[0];
+				tempCoord = *dst;
 				tempCoord[1] = dst[1];
 				tempCoord[2] = dst[2];
 			}
-			MultiplyVec3Mat34(tempCoord, postMat, dst);
+			MultiplyVec3Mat34(tempCoord, postMat, *dst);
 		}
    }
 }
@ -220,13 +215,8 @@ inline float SafeDivide(float n, float d)
    return (d==0)?(n>0?1:0):n/d;
 }
-void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const LitChannel &chan, Vec3 &lightCol)
+void LightColor(const Vec3 &pos, const Vec3 &normal, u8 lightNum, const LitChannel &chan, Vec3 &lightCol)
 {
    // must be the size of 3 32bit floats for the light pointer to be valid
    _assert_(sizeof(Vec3) == 12);
    const Vec3 *pos = (const Vec3*)vertexPos;
    const Vec3 *norm0 = (const Vec3*)normal;
    const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*lightNum];
    if (!(chan.attnfunc & 1)) {
@ -237,15 +227,15 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
                break;
            case LIGHTDIF_SIGN:
                {
-                    Vec3 ldir = (light->pos - *pos).normalized();
+                    Vec3 ldir = (light->pos - pos).normalized();
-                    float diffuse = ldir * (*norm0);
+                    float diffuse = ldir * normal;
                    AddScaledIntegerColor(light->color, diffuse, lightCol);
                }
                break;
            case LIGHTDIF_CLAMP:
                {
-                    Vec3 ldir = (light->pos - *pos).normalized();
+                    Vec3 ldir = (light->pos - pos).normalized();
-                    float diffuse = max(0.0f, ldir * (*norm0));
+                    float diffuse = max(0.0f, ldir * normal);
                    AddScaledIntegerColor(light->color, diffuse, lightCol);
                }
                break;
@ -254,7 +244,7 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
    }
    else { // spec and spot
        // not sure about divide by zero checks
-        Vec3 ldir = light->pos - *pos;
+        Vec3 ldir = light->pos - pos;
        float attn;
        if (chan.attnfunc == 3) { // spot
@ -269,7 +259,7 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
        }
        else if (chan.attnfunc == 1) { // specular
            // donko - what is going on here?  655.36 is a guess but seems about right.
-            attn = (light->pos * (*norm0)) > -655.36 ? max(0.0f, (light->dir * (*norm0))) : 0;
+            attn = (light->pos * normal) > -655.36 ? max(0.0f, (light->dir * normal)) : 0;
            ldir.set(1.0f, attn, attn * attn);
            float cosAtt = max(0.0f, light->cosatt * ldir);
@ -283,14 +273,14 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
                break;
            case LIGHTDIF_SIGN:
                {
-                    float difAttn = ldir * (*norm0);
+                    float difAttn = ldir * normal;
                    AddScaledIntegerColor(light->color, attn * difAttn, lightCol);
                }
                break;
            case LIGHTDIF_CLAMP:
                {
-                    float difAttn = max(0.0f, ldir * (*norm0));
+                    float difAttn = max(0.0f, ldir * normal);
                    AddScaledIntegerColor(light->color, attn * difAttn, lightCol);
                }
                break;
@ -299,13 +289,8 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const
    }
 }
-void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const LitChannel &chan, float &lightCol)
+void LightAlpha(const Vec3 &pos, const Vec3 &normal, u8 lightNum, const LitChannel &chan, float &lightCol)
 {
    // must be the size of 3 32bit floats for the light pointer to be valid
    _assert_(sizeof(Vec3) == 12);
    const Vec3 *pos = (const Vec3*)vertexPos;
    const Vec3 *norm0 = (const Vec3*)normal;
    const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*lightNum];
    if (!(chan.attnfunc & 1)) {
@ -316,15 +301,15 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
                break;
            case LIGHTDIF_SIGN:
                {
-                    Vec3 ldir = (light->pos - *pos).normalized();                    
+                    Vec3 ldir = (light->pos - pos).normalized();                    
-                    float diffuse = ldir * (*norm0);
+                    float diffuse = ldir * normal;
                    lightCol += light->color[0] * diffuse;
                }
                break;
            case LIGHTDIF_CLAMP:
                {
-                    Vec3 ldir = (light->pos - *pos).normalized();
+                    Vec3 ldir = (light->pos - pos).normalized();
-                    float diffuse = max(0.0f, ldir * (*norm0));
+                    float diffuse = max(0.0f, ldir * normal);
                    lightCol += light->color[0] * diffuse;
                }
                break;
@ -332,7 +317,7 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
        }
    }
    else { // spec and spot
-        Vec3 ldir = light->pos - *pos;
+        Vec3 ldir = light->pos - pos;
        float attn;
        if (chan.attnfunc == 3) { // spot
@ -347,7 +332,7 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
        }
        else if (chan.attnfunc == 1) { // specular
            // donko - what is going on here?  655.36 is a guess but seems about right.
-            attn = (light->pos * (*norm0)) > -655.36 ? max(0.0f, (light->dir * (*norm0))) : 0;
+            attn = (light->pos * normal) > -655.36 ? max(0.0f, (light->dir * normal)) : 0;
            ldir.set(1.0f, attn, attn * attn);
            float cosAtt = light->cosatt * ldir;
@ -361,14 +346,14 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const
                break;
            case LIGHTDIF_SIGN:
                {
-                    float difAttn = ldir * (*norm0);
+                    float difAttn = ldir * normal;
                    lightCol += light->color[0] * attn * difAttn;
                }
                break;
            case LIGHTDIF_CLAMP:
                {
-                    float difAttn = max(0.0f, ldir * (*norm0));
+                    float difAttn = max(0.0f, ldir * normal);
                    lightCol += light->color[0] * attn * difAttn;
                }
                break;
@ -472,14 +457,11 @@ void TransformTexCoord(const InputVertexData *src, OutputVertexData *dst, bool s
            break;
        case XF_TEXGEN_EMBOSS_MAP:
            {
                const Vec3 *pos = (const Vec3*)dst->mvPosition;
                const Vec3 *norm1 = (const Vec3*)dst->normal[1];
                const Vec3 *norm2 = (const Vec3*)dst->normal[2];
                const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*texinfo.embosslightshift];
-                Vec3 ldir = (light->pos - *pos).normalized();
+                Vec3 ldir = (light->pos - dst->mvPosition).normalized();
-                float d1 = ldir * (*norm1);
+                float d1 = ldir * dst->normal[1];
-                float d2 = ldir * (*norm2);
+                float d2 = ldir * dst->normal[2];
                dst->texCoords[coordNum][0] = dst->texCoords[texinfo.embosssourceshift][0] + d1;
                dst->texCoords[coordNum][1] = dst->texCoords[texinfo.embosssourceshift][1] + d2;
@ -503,6 +485,9 @@ void TransformTexCoord(const InputVertexData *src, OutputVertexData *dst, bool s
        default:
            ERROR_LOG(VIDEO, "Bad tex gen type %i", texinfo.texgentype);            
        }
 		dst->texCoords[coordNum][0] *= (bpmem.texcoords[coordNum].s.scale_minus_1 + 1);
 		dst->texCoords[coordNum][1] *= (bpmem.texcoords[coordNum].t.scale_minus_1 + 1);
    }
 }
--- a/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp
+++ b/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp
@ -24,32 +24,32 @@ namespace VertexFormatConverter
 {
    void LoadNormal1_Byte(InputVertexData *dst, u8 *src)
    {
-        dst->normal[0][0] = (float)(s8)src[0] / 128;
+        dst->normal[0].x = (float)(s8)src[0] / 128;
-        dst->normal[0][1] = (float)(s8)src[1] / 128;
+        dst->normal[0].y = (float)(s8)src[1] / 128;
-        dst->normal[0][2] = (float)(s8)src[2] / 128;
+        dst->normal[0].z = (float)(s8)src[2] / 128;
    }
    void LoadNormal1_Short(InputVertexData *dst, u8 *src)
    {
-        dst->normal[0][0] = (float)((s16*)src)[0] / 32768;
+        dst->normal[0].x = (float)((s16*)src)[0] / 32768;
-        dst->normal[0][1] = (float)((s16*)src)[1] / 32768;
+        dst->normal[0].y = (float)((s16*)src)[1] / 32768;
-        dst->normal[0][2] = (float)((s16*)src)[2] / 32768;
+        dst->normal[0].z = (float)((s16*)src)[2] / 32768;
    }
    void LoadNormal1_Float(InputVertexData *dst, u8 *src)
    {
-        dst->normal[0][0] = ((float*)src)[0];
+        dst->normal[0].x = ((float*)src)[0];
-        dst->normal[0][1] = ((float*)src)[1];
+        dst->normal[0].y = ((float*)src)[1];
-        dst->normal[0][2] = ((float*)src)[2];
+        dst->normal[0].z = ((float*)src)[2];
    }
    void LoadNormal3_Byte(InputVertexData *dst, u8 *src)
    {
        for (int i = 0, j = 0; i < 3; i++, j+=3)
        {
-            dst->normal[i][0] = (float)(s8)src[j + 0] / 128;
+            dst->normal[i].x = (float)(s8)src[j + 0] / 128;
-            dst->normal[i][1] = (float)(s8)src[j + 1] / 128;
+            dst->normal[i].y = (float)(s8)src[j + 1] / 128;
-            dst->normal[i][2] = (float)(s8)src[j + 2] / 128;
+            dst->normal[i].z = (float)(s8)src[j + 2] / 128;
        }
    }
@ -57,9 +57,9 @@ namespace VertexFormatConverter
    {
        for (int i = 0, j = 0; i < 3; i++, j+=3)
        {
-            dst->normal[i][0] = (float)((s16*)src)[j + 0] / 32768;
+            dst->normal[i].x = (float)((s16*)src)[j + 0] / 32768;
-            dst->normal[i][1] = (float)((s16*)src)[j + 1] / 32768;
+            dst->normal[i].y = (float)((s16*)src)[j + 1] / 32768;
-            dst->normal[i][2] = (float)((s16*)src)[j + 2] / 32768;
+            dst->normal[i].z = (float)((s16*)src)[j + 2] / 32768;
        }
    }
@ -67,9 +67,9 @@ namespace VertexFormatConverter
    {
        for (int i = 0, j = 0; i < 3; i++, j+=3)
        {
-            dst->normal[i][0] = ((float*)src)[j + 0];
+            dst->normal[i].x = ((float*)src)[j + 0];
-            dst->normal[i][1] = ((float*)src)[j + 1];
+            dst->normal[i].y = ((float*)src)[j + 1];
-            dst->normal[i][2] = ((float*)src)[j + 2];
+            dst->normal[i].z = ((float*)src)[j + 2];
        }
    }
 }