USB: restore formatting of standalone third party code

2020-11-04 15:19:01 +01:00 · 2020-11-04 15:19:01 +01:00 · ede7fa86fa
parent 9da3d9a5bf
commit ede7fa86fa
5 changed files with 512 additions and 1344 deletions
--- a/pcsx2/USB/usb-eyetoy/jo_mpeg.cpp
+++ b/pcsx2/USB/usb-eyetoy/jo_mpeg.cpp
@ -35,163 +35,38 @@
 #include "jo_mpeg.h"

 // Huffman tables
-static const unsigned char s_jo_HTDC_Y[9][2] = {{4, 3}, {0, 2}, {1, 2}, {5, 3}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {126, 7}};
-static const unsigned char s_jo_HTDC_C[9][2] = {{0, 2}, {1, 2}, {2, 2}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {126, 7}, {254, 8}};
+static const unsigned char s_jo_HTDC_Y[9][2] = {{4,3}, {0,2}, {1,2}, {5,3}, {6,3}, {14,4}, {30,5}, {62,6}, {126,7}};
+static const unsigned char s_jo_HTDC_C[9][2] = {{0,2}, {1,2}, {2,2}, {6,3}, {14,4}, {30,5}, {62,6}, {126,7}, {254,8}};
 static const unsigned char s_jo_HTAC[32][40][2] = {
-	{
-		{6, 3},
-		{8, 5},
-		{10, 6},
-		{12, 8},
-		{76, 9},
-		{66, 9},
-		{20, 11},
-		{58, 13},
-		{48, 13},
-		{38, 13},
-		{32, 13},
-		{52, 14},
-		{50, 14},
-		{48, 14},
-		{46, 14},
-		{62, 15},
-		{62, 15},
-		{58, 15},
-		{56, 15},
-		{54, 15},
-		{52, 15},
-		{50, 15},
-		{48, 15},
-		{46, 15},
-		{44, 15},
-		{42, 15},
-		{40, 15},
-		{38, 15},
-		{36, 15},
-		{34, 15},
-		{32, 15},
-		{48, 16},
-		{46, 16},
-		{44, 16},
-		{42, 16},
-		{40, 16},
-		{38, 16},
-		{36, 16},
-		{34, 16},
-		{32, 16},
-	},
-	{{6, 4}, {12, 7}, {74, 9}, {24, 11}, {54, 13}, {44, 14}, {42, 14}, {62, 16}, {60, 16}, {58, 16}, {56, 16}, {54, 16}, {52, 16}, {50, 16}, {38, 17}, {36, 17}, {34, 17}, {32, 17}},
-	{{10, 5}, {8, 8}, {22, 11}, {40, 13}, {40, 14}},
-	{{14, 6}, {72, 9}, {56, 13}, {38, 14}},
-	{{12, 6}, {30, 11}, {36, 13}},
-	{{14, 7}, {18, 11}, {36, 14}},
-	{{10, 7}, {60, 13}, {40, 17}},
-	{{8, 7}, {42, 13}},
-	{{14, 8}, {34, 13}},
-	{{10, 8}, {34, 14}},
-	{{78, 9}, {32, 14}},
-	{{70, 9}, {52, 17}},
-	{{68, 9}, {50, 17}},
-	{{64, 9}, {48, 17}},
-	{{28, 11}, {46, 17}},
-	{{26, 11}, {44, 17}},
-	{{16, 11}, {42, 17}},
-	{{62, 13}},
-	{{52, 13}},
-	{{50, 13}},
-	{{46, 13}},
-	{{44, 13}},
-	{{62, 14}},
-	{{60, 14}},
-	{{58, 14}},
-	{{56, 14}},
-	{{54, 14}},
-	{{62, 17}},
-	{{60, 17}},
-	{{58, 17}},
-	{{56, 17}},
-	{{54, 17}},
+{{6,3},{8,5},{10,6},{12,8},{76,9},{66,9},{20,11},{58,13},{48,13},{38,13},{32,13},{52,14},{50,14},{48,14},{46,14},{62,15},{62,15},{58,15},{56,15},{54,15},{52,15},{50,15},{48,15},{46,15},{44,15},{42,15},{40,15},{38,15},{36,15},{34,15},{32,15},{48,16},{46,16},{44,16},{42,16},{40,16},{38,16},{36,16},{34,16},{32,16},},
+{{6,4},{12,7},{74,9},{24,11},{54,13},{44,14},{42,14},{62,16},{60,16},{58,16},{56,16},{54,16},{52,16},{50,16},{38,17},{36,17},{34,17},{32,17}},
+{{10,5},{8,8},{22,11},{40,13},{40,14}},
+{{14,6},{72,9},{56,13},{38,14}},
+{{12,6},{30,11},{36,13}},  {{14,7},{18,11},{36,14}},  {{10,7},{60,13},{40,17}},
+{{8,7},{42,13}},  {{14,8},{34,13}},  {{10,8},{34,14}},  {{78,9},{32,14}},  {{70,9},{52,17}},  {{68,9},{50,17}},  {{64,9},{48,17}},  {{28,11},{46,17}},  {{26,11},{44,17}},  {{16,11},{42,17}},
+{{62,13}}, {{52,13}}, {{50,13}}, {{46,13}}, {{44,13}}, {{62,14}}, {{60,14}}, {{58,14}}, {{56,14}}, {{54,14}}, {{62,17}}, {{60,17}}, {{58,17}}, {{56,17}}, {{54,17}},
 };
 static const float s_jo_quantTbl[64] = {
-	0.015625f,
-	0.005632f,
-	0.005035f,
-	0.004832f,
-	0.004808f,
-	0.005892f,
-	0.007964f,
-	0.013325f,
-	0.005632f,
-	0.004061f,
-	0.003135f,
-	0.003193f,
-	0.003338f,
-	0.003955f,
-	0.004898f,
-	0.008828f,
-	0.005035f,
-	0.003135f,
-	0.002816f,
-	0.003013f,
-	0.003299f,
-	0.003581f,
-	0.005199f,
-	0.009125f,
-	0.004832f,
-	0.003484f,
-	0.003129f,
-	0.003348f,
-	0.003666f,
-	0.003979f,
-	0.005309f,
-	0.009632f,
-	0.005682f,
-	0.003466f,
-	0.003543f,
-	0.003666f,
-	0.003906f,
-	0.004546f,
-	0.005774f,
-	0.009439f,
-	0.006119f,
-	0.004248f,
-	0.004199f,
-	0.004228f,
-	0.004546f,
-	0.005062f,
-	0.006124f,
-	0.009942f,
-	0.008883f,
-	0.006167f,
-	0.006096f,
-	0.005777f,
-	0.006078f,
-	0.006391f,
-	0.007621f,
-	0.012133f,
-	0.016780f,
-	0.011263f,
-	0.009907f,
-	0.010139f,
-	0.009849f,
-	0.010297f,
-	0.012133f,
-	0.019785f,
+	0.015625f,0.005632f,0.005035f,0.004832f,0.004808f,0.005892f,0.007964f,0.013325f,
+	0.005632f,0.004061f,0.003135f,0.003193f,0.003338f,0.003955f,0.004898f,0.008828f,
+	0.005035f,0.003135f,0.002816f,0.003013f,0.003299f,0.003581f,0.005199f,0.009125f,
+	0.004832f,0.003484f,0.003129f,0.003348f,0.003666f,0.003979f,0.005309f,0.009632f,
+	0.005682f,0.003466f,0.003543f,0.003666f,0.003906f,0.004546f,0.005774f,0.009439f,
+	0.006119f,0.004248f,0.004199f,0.004228f,0.004546f,0.005062f,0.006124f,0.009942f,
+	0.008883f,0.006167f,0.006096f,0.005777f,0.006078f,0.006391f,0.007621f,0.012133f,
+	0.016780f,0.011263f,0.009907f,0.010139f,0.009849f,0.010297f,0.012133f,0.019785f,
 };
-static const unsigned char s_jo_ZigZag[] = {0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53, 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63};
+static const unsigned char s_jo_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18,24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 };

-typedef struct
-{
-	unsigned char* buf_ptr;
+typedef struct {
+	unsigned char *buf_ptr;
 	int buf, cnt;
 } jo_bits_t;

-static void jo_writeBits(jo_bits_t* b, int value, int count)
-{
+static void jo_writeBits(jo_bits_t *b, int value, int count) {
 	b->cnt += count;
 	b->buf |= value << (24 - b->cnt);
-	while (b->cnt >= 8)
-	{
+	while(b->cnt >= 8) {
 		unsigned char c = (b->buf >> 16) & 255;
 		//putc(c, b->fp);
 		*(b->buf_ptr) = c & 0xff;
@ -201,8 +76,7 @@ static void jo_writeBits(jo_bits_t* b, int value, int count)
 	}
 }

-static void jo_DCT(float* d0, float* d1, float* d2, float* d3, float* d4, float* d5, float* d6, float* d7)
-{
+static void jo_DCT(float *d0, float *d1, float *d2, float *d3, float *d4, float *d5, float *d6, float *d7) {
 	float tmp0 = *d0 + *d7;
 	float tmp7 = *d0 - *d7;
 	float tmp1 = *d1 + *d6;
@ -213,52 +87,48 @@ static void jo_DCT(float* d0, float* d1, float* d2, float* d3, float* d4, float*
 	float tmp4 = *d3 - *d4;

 	// Even part
-	float tmp10 = tmp0 + tmp3; // phase 2
+	float tmp10 = tmp0 + tmp3;	// phase 2
 	float tmp13 = tmp0 - tmp3;
 	float tmp11 = tmp1 + tmp2;
 	float tmp12 = tmp1 - tmp2;

-	*d0 = tmp10 + tmp11; // phase 3
+	*d0 = tmp10 + tmp11; 		// phase 3
 	*d4 = tmp10 - tmp11;

 	float z1 = (tmp12 + tmp13) * 0.707106781f; // c4
-	*d2 = tmp13 + z1;                          // phase 5
+	*d2 = tmp13 + z1; 		// phase 5
 	*d6 = tmp13 - z1;

 	// Odd part
-	tmp10 = tmp4 + tmp5; // phase 2
+	tmp10 = tmp4 + tmp5; 		// phase 2
 	tmp11 = tmp5 + tmp6;
 	tmp12 = tmp6 + tmp7;

 	// The rotator is modified from fig 4-8 to avoid extra negations.
 	float z5 = (tmp10 - tmp12) * 0.382683433f; // c6
-	float z2 = tmp10 * 0.541196100f + z5;      // c2-c6
-	float z4 = tmp12 * 1.306562965f + z5;      // c2+c6
-	float z3 = tmp11 * 0.707106781f;           // c4
+	float z2 = tmp10 * 0.541196100f + z5; // c2-c6
+	float z4 = tmp12 * 1.306562965f + z5; // c2+c6
+	float z3 = tmp11 * 0.707106781f; // c4

-	float z11 = tmp7 + z3; // phase 5
+	float z11 = tmp7 + z3;		// phase 5
 	float z13 = tmp7 - z3;

-	*d5 = z13 + z2; // phase 6
+	*d5 = z13 + z2;			// phase 6
 	*d3 = z13 - z2;
 	*d1 = z11 + z4;
 	*d7 = z11 - z4;
 }

-static int jo_processDU(jo_bits_t* bits, float A[64], const unsigned char htdc[9][2], int DC)
-{
-	for (int dataOff = 0; dataOff < 64; dataOff += 8)
-	{
-		jo_DCT(&A[dataOff], &A[dataOff + 1], &A[dataOff + 2], &A[dataOff + 3], &A[dataOff + 4], &A[dataOff + 5], &A[dataOff + 6], &A[dataOff + 7]);
+static int jo_processDU(jo_bits_t *bits, float A[64], const unsigned char htdc[9][2], int DC) {
+	for(int dataOff=0; dataOff<64; dataOff+=8) {
+		jo_DCT(&A[dataOff], &A[dataOff+1], &A[dataOff+2], &A[dataOff+3], &A[dataOff+4], &A[dataOff+5], &A[dataOff+6], &A[dataOff+7]);
 	}
-	for (int dataOff = 0; dataOff < 8; ++dataOff)
-	{
-		jo_DCT(&A[dataOff], &A[dataOff + 8], &A[dataOff + 16], &A[dataOff + 24], &A[dataOff + 32], &A[dataOff + 40], &A[dataOff + 48], &A[dataOff + 56]);
+	for(int dataOff=0; dataOff<8; ++dataOff) {
+		jo_DCT(&A[dataOff], &A[dataOff+8], &A[dataOff+16], &A[dataOff+24], &A[dataOff+32], &A[dataOff+40], &A[dataOff+48], &A[dataOff+56]);
 	}
 	int Q[64];
-	for (int i = 0; i < 64; ++i)
-	{
-		float v = A[i] * s_jo_quantTbl[i];
+	for(int i=0; i<64; ++i) {
+		float v = A[i]*s_jo_quantTbl[i];
 		Q[s_jo_ZigZag[i]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f));
 	}

@ -266,48 +136,36 @@ static int jo_processDU(jo_bits_t* bits, float A[64], const unsigned char htdc[9
 	int aDC = DC < 0 ? -DC : DC;
 	int size = 0;
 	int tempval = aDC;
-	while (tempval)
-	{
+	while(tempval) {
 		size++;
 		tempval >>= 1;
 	}
 	jo_writeBits(bits, htdc[size][0], htdc[size][1]);
-	if (DC < 0)
-		aDC ^= (1 << size) - 1;
+	if(DC < 0) aDC ^= (1 << size) - 1;
 	jo_writeBits(bits, aDC, size);

 	int endpos = 63;
-	for (; (endpos > 0) && (Q[endpos] == 0); --endpos)
-	{ /* do nothing */
-	}
-	for (int i = 1; i <= endpos;)
-	{
+	for(; (endpos>0)&&(Q[endpos]==0); --endpos) { /* do nothing */ }
+	for(int i = 1; i <= endpos;) {
 		int run = 0;
-		while (Q[i] == 0 && i < endpos)
-		{
+		while (Q[i]==0 && i<endpos) {
 			++run;
 			++i;
 		}
 		int AC = Q[i++];
 		int aAC = AC < 0 ? -AC : AC;
 		int code = 0, size = 0;
-		if (run < 32 && aAC <= 40)
-		{
-			code = s_jo_HTAC[run][aAC - 1][0];
-			size = s_jo_HTAC[run][aAC - 1][1];
-			if (AC < 0)
-				code += 1;
+		if (run<32 && aAC<=40) {
+			code = s_jo_HTAC[run][aAC-1][0];
+			size = s_jo_HTAC[run][aAC-1][1];
+			if (AC < 0) code += 1;
 		}
-		if (!size)
-		{
+		if(!size) {
 			jo_writeBits(bits, 1, 6);
 			jo_writeBits(bits, run, 6);
-			if (AC < -127)
-			{
+			if (AC < -127) {
 				jo_writeBits(bits, 128, 12);
-			}
-			else if (AC > 127)
-			{
+			} else if(AC > 127) {
 				jo_writeBits(bits, 0, 12);
 			}
 			code = AC & 0xFFF;
@ -320,23 +178,17 @@ static int jo_processDU(jo_bits_t* bits, float A[64], const unsigned char htdc[9
 	return Q[0];
 }

-unsigned long jo_write_mpeg(unsigned char* mpeg_buf, const unsigned char* raw, int width, int height, int format, int flipx, int flipy)
-{
+unsigned long jo_write_mpeg(unsigned char *mpeg_buf, const unsigned char *raw, int width, int height, int format, int flipx, int flipy) {
 	int lastDCY = 128, lastDCCR = 128, lastDCCB = 128;
-	unsigned char* head = mpeg_buf;
+	unsigned char *head = mpeg_buf;
 	jo_bits_t bits = {mpeg_buf};

-	for (int vblock = 0; vblock < (height + 15) / 16; vblock++)
-	{
-		for (int hblock = 0; hblock < (width + 15) / 16; hblock++)
-		{
-			if (vblock == 0 && hblock == 0)
-			{
+	for (int vblock = 0; vblock < (height+15)/16; vblock++) {
+		for (int hblock = 0; hblock < (width+15)/16; hblock++) {
+			if (vblock == 0 && hblock == 0) {
 				jo_writeBits(&bits, 0b01, 2); // macroblock_type = intra+quant
-				jo_writeBits(&bits, 8, 5);    // quantiser_scale_code = 8
-			}
-			else
-			{
+				jo_writeBits(&bits, 8, 5); // quantiser_scale_code = 8
+			} else {
 				jo_writeBits(&bits, 0b1, 1); // macroblock_address_increment
 				jo_writeBits(&bits, 0b1, 1); // macroblock_type = intra
 			}
@ -344,113 +196,87 @@ unsigned long jo_write_mpeg(unsigned char* mpeg_buf, const unsigned char* raw, i
 			float Y[256], CBx[256], CRx[256];
 			float CB[64], CR[64];

-			if (format == JO_RGBX)
-			{
-				for (int i = 0; i < 256; ++i)
-				{
-					int y = vblock * 16 + (i / 16);
-					int x = hblock * 16 + (i & 15);
-					x = x >= width ? width - 1 : x;
-					y = y >= height ? height - 1 : y;
-					if (flipx)
-						x = width - 1 - x;
-					if (flipy)
-						y = height - 1 - y;
-					const unsigned char* c = raw + y * width * 4 + x * 4;
+			if (format == JO_RGBX) {
+				for (int i=0; i<256; ++i) {
+					int y = vblock*16+(i/16);
+					int x = hblock*16+(i&15);
+					x = x >= width ? width-1 : x;
+					y = y >= height ? height-1 : y;
+					if (flipx) x = width - 1 - x;
+					if (flipy) y = height - 1 - y;
+					const unsigned char *c = raw + y*width*4+x*4;
 					float r, g, b;
-					if (flipx && flipy)
-					{
+					if (flipx && flipy) {
 						r = c[2], g = c[1], b = c[0];
-					}
-					else
-					{
+					} else {
 						r = c[0], g = c[1], b = c[2];
 					}
-					Y[i] = (0.299f * r + 0.587f * g + 0.114f * b) * (219.f / 255) + 16;
-					CBx[i] = (-0.299f * r - 0.587f * g + 0.886f * b) * (224.f / 255) + 128;
-					CRx[i] = (0.701f * r - 0.587f * g - 0.114f * b) * (224.f / 255) + 128;
+					Y[i] = (0.299f*r + 0.587f*g + 0.114f*b) * (219.f/255) + 16;
+					CBx[i] = (-0.299f*r - 0.587f*g + 0.886f*b) * (224.f/255) + 128;
+					CRx[i] = (0.701f*r - 0.587f*g - 0.114f*b) * (224.f/255) + 128;
 				}
 				// Downsample Cb,Cr (420 format)
-				for (int i = 0; i < 64; ++i)
-				{
-					int j = (i & 7) * 2 + (i & 56) * 4;
-					CB[i] = (CBx[j] + CBx[j + 1] + CBx[j + 16] + CBx[j + 17]) * 0.25f;
-					CR[i] = (CRx[j] + CRx[j + 1] + CRx[j + 16] + CRx[j + 17]) * 0.25f;
+				for (int i=0; i<64; ++i) {
+					int j =(i&7)*2 + (i&56)*4;
+					CB[i] = (CBx[j] + CBx[j+1] + CBx[j+16] + CBx[j+17]) * 0.25f;
+					CR[i] = (CRx[j] + CRx[j+1] + CRx[j+16] + CRx[j+17]) * 0.25f;
 				}
-			}
-			else if (format == JO_RGB24)
-			{
-				for (int i = 0; i < 256; ++i)
-				{
-					int y = vblock * 16 + (i / 16);
-					int x = hblock * 16 + (i & 15);
-					x = x >= width ? width - 1 : x;
-					y = y >= height ? height - 1 : y;
-					if (flipx)
-						x = width - 1 - x;
-					if (flipy)
-						y = height - 1 - y;
-					const unsigned char* c = raw + y * width * 3 + x * 3;
+			} else
+			if (format == JO_RGB24) {
+				for (int i=0; i<256; ++i) {
+					int y = vblock*16+(i/16);
+					int x = hblock*16+(i&15);
+					x = x >= width ? width-1 : x;
+					y = y >= height ? height-1 : y;
+					if (flipx) x = width - 1 - x;
+					if (flipy) y = height - 1 - y;
+					const unsigned char *c = raw + y*width*3+x*3;
 					float r, g, b;
-					if (flipx && flipy)
-					{
+					if (flipx && flipy) {
 						r = c[2], g = c[1], b = c[0];
-					}
-					else
-					{
+					} else {
 						r = c[0], g = c[1], b = c[2];
 					}
-					Y[i] = (0.299f * r + 0.587f * g + 0.114f * b) * (219.f / 255) + 16;
-					CBx[i] = (-0.299f * r - 0.587f * g + 0.886f * b) * (224.f / 255) + 128;
-					CRx[i] = (0.701f * r - 0.587f * g - 0.114f * b) * (224.f / 255) + 128;
+					Y[i] = (0.299f*r + 0.587f*g + 0.114f*b) * (219.f/255) + 16;
+					CBx[i] = (-0.299f*r - 0.587f*g + 0.886f*b) * (224.f/255) + 128;
+					CRx[i] = (0.701f*r - 0.587f*g - 0.114f*b) * (224.f/255) + 128;
 				}
 				// Downsample Cb,Cr (420 format)
-				for (int i = 0; i < 64; ++i)
-				{
-					int j = (i & 7) * 2 + (i & 56) * 4;
-					CB[i] = (CBx[j] + CBx[j + 1] + CBx[j + 16] + CBx[j + 17]) * 0.25f;
-					CR[i] = (CRx[j] + CRx[j + 1] + CRx[j + 16] + CRx[j + 17]) * 0.25f;
+				for (int i=0; i<64; ++i) {
+					int j =(i&7)*2 + (i&56)*4;
+					CB[i] = (CBx[j] + CBx[j+1] + CBx[j+16] + CBx[j+17]) * 0.25f;
+					CR[i] = (CRx[j] + CRx[j+1] + CRx[j+16] + CRx[j+17]) * 0.25f;
 				}
-			}
-			else if (format == JO_YUYV)
-			{
-				for (int i = 0; i < 256; i += 2)
-				{
-					int y = vblock * 16 + (i / 16);
-					int x = hblock * 16 + (i & 15);
-					x = x >= width ? width - 1 : x;
-					y = y >= height ? height - 1 : y;
-					if (flipx)
-						x = width - 1 - x;
-					if (flipy)
-						y = height - 1 - y;
-					const unsigned char* c = raw + y * width * 2 + x * 2 - 2;
-					if (flipx)
-					{
-						Y[i + 1] = c[0];
-						CB[i / 4] = c[1];
-						Y[i] = c[2];
-						CR[i / 4] = c[3];
-					}
-					else
-					{
-						Y[i] = c[2];
-						CB[i / 4] = c[3];
-						Y[i + 1] = c[4];
-						CR[i / 4] = c[5];
+			} else
+			if (format == JO_YUYV) {
+				for (int i=0; i<256; i+=2) {
+					int y = vblock*16+(i/16);
+					int x = hblock*16+(i&15);
+					x = x >= width ? width-1 : x;
+					y = y >= height ? height-1 : y;
+					if (flipx) x = width - 1 - x;
+					if (flipy) y = height - 1 - y;
+					const unsigned char *c = raw + y*width*2+x*2-2;
+					if (flipx) {
+						Y[i+1]  = c[0];
+						CB[i/4] = c[1];
+						Y[i]    = c[2];
+						CR[i/4] = c[3];
+					} else {
+						Y[i]    = c[2];
+						CB[i/4] = c[3];
+						Y[i+1]  = c[4];
+						CR[i/4] = c[5];
 					}
 				}
 			}

-			for (int k1 = 0; k1 < 2; ++k1)
-			{
-				for (int k2 = 0; k2 < 2; ++k2)
-				{
+			for (int k1=0; k1<2; ++k1) {
+				for (int k2=0; k2<2; ++k2) {
 					float block[64];
-					for (int i = 0; i < 64; i += 8)
-					{
-						int j = (i & 7) + (i & 56) * 2 + k1 * 8 * 16 + k2 * 8;
-						memcpy(block + i, Y + j, 8 * sizeof(Y[0]));
+					for (int i=0; i<64; i+=8) {
+						int j = (i&7)+(i&56)*2 + k1*8*16 + k2*8;
+						memcpy(block+i, Y+j, 8*sizeof(Y[0]));
 					}
 					lastDCY = jo_processDU(&bits, block, s_jo_HTDC_Y, lastDCY);
 				}
--- a/pcsx2/USB/usb-eyetoy/jo_mpeg.h
+++ b/pcsx2/USB/usb-eyetoy/jo_mpeg.h
@ -1,37 +1,20 @@
-/*  PCSX2 - PS2 Emulator for PCs
- *  Copyright (C) 2002-2020  PCSX2 Dev Team
- *
- *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
- *  of the GNU Lesser General Public License as published by the Free Software Found-
- *  ation, either version 3 of the License, or (at your option) any later version.
- *
- *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- *  PURPOSE.  See the GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along with PCSX2.
- *  If not, see <http://www.gnu.org/licenses/>.
- */
-
 #ifdef __cplusplus
 extern "C" {
 #endif

-typedef enum
-{
+typedef enum {
 	JO_RGBX,
 	JO_RGB24,
 	JO_YUYV,
 } jo_mpeg_format_t;

-typedef enum
-{
+typedef enum {
 	JO_NONE,
 	JO_FLIP_X,
 	JO_FLIP_Y,
 } jo_mpeg_flip_t;

-unsigned long jo_write_mpeg(unsigned char* mpeg_buf, const unsigned char* rgbx, int width, int height, int format, int flipx, int flipy);
+unsigned long jo_write_mpeg(unsigned char *mpeg_buf, const unsigned char *rgbx, int width, int height, int format, int flipx, int flipy);

 #ifdef __cplusplus
 }
--- a/pcsx2/USB/usb-eyetoy/jpgd/jpgd.cpp
+++ b/pcsx2/USB/usb-eyetoy/jpgd/jpgd.cpp
--- a/pcsx2/USB/usb-eyetoy/jpgd/jpgd.h
+++ b/pcsx2/USB/usb-eyetoy/jpgd/jpgd.h
@ -11,9 +11,9 @@
 #include <stdint.h>

 #ifdef _MSC_VER
-#define JPGD_NORETURN __declspec(noreturn)
+#define JPGD_NORETURN __declspec(noreturn) 
 #elif defined(__GNUC__)
-#define JPGD_NORETURN __attribute__((noreturn))
+#define JPGD_NORETURN __attribute__ ((noreturn))
 #else
 #define JPGD_NORETURN
 #endif
@ -23,11 +23,11 @@

 namespace jpgd
 {
-	typedef unsigned char uint8;
-	typedef signed short int16;
+	typedef unsigned char  uint8;
+	typedef   signed short int16;
 	typedef unsigned short uint16;
-	typedef unsigned int uint;
-	typedef signed int int32;
+	typedef unsigned int   uint;
+	typedef   signed int   int32;

 	// Loads a JPEG image from a memory buffer or a file.
 	// req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
@ -40,42 +40,15 @@ namespace jpgd
 	// Success/failure error codes.
 	enum jpgd_status
 	{
-		JPGD_SUCCESS = 0,
-		JPGD_FAILED = -1,
-		JPGD_DONE = 1,
-		JPGD_BAD_DHT_COUNTS = -256,
-		JPGD_BAD_DHT_INDEX,
-		JPGD_BAD_DHT_MARKER,
-		JPGD_BAD_DQT_MARKER,
-		JPGD_BAD_DQT_TABLE,
-		JPGD_BAD_PRECISION,
-		JPGD_BAD_HEIGHT,
-		JPGD_BAD_WIDTH,
-		JPGD_TOO_MANY_COMPONENTS,
-		JPGD_BAD_SOF_LENGTH,
-		JPGD_BAD_VARIABLE_MARKER,
-		JPGD_BAD_DRI_LENGTH,
-		JPGD_BAD_SOS_LENGTH,
-		JPGD_BAD_SOS_COMP_ID,
-		JPGD_W_EXTRA_BYTES_BEFORE_MARKER,
-		JPGD_NO_ARITHMITIC_SUPPORT,
-		JPGD_UNEXPECTED_MARKER,
-		JPGD_NOT_JPEG,
-		JPGD_UNSUPPORTED_MARKER,
-		JPGD_BAD_DQT_LENGTH,
-		JPGD_TOO_MANY_BLOCKS,
-		JPGD_UNDEFINED_QUANT_TABLE,
-		JPGD_UNDEFINED_HUFF_TABLE,
-		JPGD_NOT_SINGLE_SCAN,
-		JPGD_UNSUPPORTED_COLORSPACE,
-		JPGD_UNSUPPORTED_SAMP_FACTORS,
-		JPGD_DECODE_ERROR,
-		JPGD_BAD_RESTART_MARKER,
-		JPGD_BAD_SOS_SPECTRAL,
-		JPGD_BAD_SOS_SUCCESSIVE,
-		JPGD_STREAM_READ,
-		JPGD_NOTENOUGHMEM,
-		JPGD_TOO_MANY_SCANS
+		JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
+		JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
+		JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
+		JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
+		JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
+		JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
+		JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
+		JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER,
+		JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM, JPGD_TOO_MANY_SCANS
 	};

 	// Input stream interface.
@ -86,8 +59,8 @@ namespace jpgd
 	class jpeg_decoder_stream
 	{
 	public:
-		jpeg_decoder_stream() {}
-		virtual ~jpeg_decoder_stream() {}
+		jpeg_decoder_stream() { }
+		virtual ~jpeg_decoder_stream() { }

 		// The read() method is called when the internal input buffer is empty.
 		// Parameters:
@ -103,7 +76,7 @@ namespace jpgd
 	class jpeg_decoder_file_stream : public jpeg_decoder_stream
 	{
 		jpeg_decoder_file_stream(const jpeg_decoder_file_stream&);
-		jpeg_decoder_file_stream& operator=(const jpeg_decoder_file_stream&);
+		jpeg_decoder_file_stream& operator =(const jpeg_decoder_file_stream&);

 		FILE* m_pFile;
 		bool m_eof_flag, m_error_flag;
@ -125,28 +98,13 @@ namespace jpgd
 		uint m_ofs, m_size;

 	public:
-		jpeg_decoder_mem_stream()
-			: m_pSrc_data(NULL)
-			, m_ofs(0)
-			, m_size(0)
-		{
-		}
-		jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size)
-			: m_pSrc_data(pSrc_data)
-			, m_ofs(0)
-			, m_size(size)
-		{
-		}
+		jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { }
+		jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { }

-		virtual ~jpeg_decoder_mem_stream() {}
+		virtual ~jpeg_decoder_mem_stream() { }

 		bool open(const uint8* pSrc_data, uint size);
-		void close()
-		{
-			m_pSrc_data = NULL;
-			m_ofs = 0;
-			m_size = 0;
-		}
+		void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; }

 		virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag);
 	};
@ -156,15 +114,8 @@ namespace jpgd

 	enum
 	{
-		JPGD_IN_BUF_SIZE = 8192,
-		JPGD_MAX_BLOCKS_PER_MCU = 10,
-		JPGD_MAX_HUFF_TABLES = 8,
-		JPGD_MAX_QUANT_TABLES = 4,
-		JPGD_MAX_COMPONENTS = 4,
-		JPGD_MAX_COMPS_IN_SCAN = 4,
-		JPGD_MAX_BLOCKS_PER_ROW = 16384,
-		JPGD_MAX_HEIGHT = 32768,
-		JPGD_MAX_WIDTH = 32768
+		JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
+		JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 16384, JPGD_MAX_HEIGHT = 32768, JPGD_MAX_WIDTH = 32768
 	};

 	typedef int16 jpgd_quant_t;
@ -191,7 +142,7 @@ namespace jpgd
 		int begin_decoding();

 		// Returns the next scan line.
-		// For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1).
+		// For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). 
 		// Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
 		// Returns JPGD_SUCCESS if a scan line has been returned.
 		// Returns JPGD_DONE if all scan lines have been returned.
@ -213,17 +164,17 @@ namespace jpgd

 	private:
 		jpeg_decoder(const jpeg_decoder&);
-		jpeg_decoder& operator=(const jpeg_decoder&);
+		jpeg_decoder& operator =(const jpeg_decoder&);

 		typedef void (*pDecode_block_func)(jpeg_decoder*, int, int, int);

 		struct huff_tables
 		{
 			bool ac_table;
-			uint look_up[256];
-			uint look_up2[256];
+			uint  look_up[256];
+			uint  look_up2[256];
 			uint8 code_size[JPGD_HUFF_CODE_SIZE_MAX_LENGTH];
-			uint tree[JPGD_HUFF_TREE_MAX_LENGTH];
+			uint  tree[JPGD_HUFF_TREE_MAX_LENGTH];
 		};

 		struct coeff_buf
@ -263,26 +214,26 @@ namespace jpgd
 		int m_comp_ident[JPGD_MAX_COMPONENTS];        // component's ID
 		int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
 		int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
-		int m_comps_in_scan;                     // # of components in scan
-		int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan
-		int m_comp_dc_tab[JPGD_MAX_COMPONENTS];  // component's DC Huffman coding table selector
-		int m_comp_ac_tab[JPGD_MAX_COMPONENTS];  // component's AC Huffman coding table selector
-		int m_spectral_start;                    // spectral selection start
-		int m_spectral_end;                      // spectral selection end
-		int m_successive_low;                    // successive approximation low
-		int m_successive_high;                   // successive approximation high
-		int m_max_mcu_x_size;                    // MCU's max. X size in pixels
-		int m_max_mcu_y_size;                    // MCU's max. Y size in pixels
+		int m_comps_in_scan;                          // # of components in scan
+		int m_comp_list[JPGD_MAX_COMPS_IN_SCAN];      // components in this scan
+		int m_comp_dc_tab[JPGD_MAX_COMPONENTS];       // component's DC Huffman coding table selector
+		int m_comp_ac_tab[JPGD_MAX_COMPONENTS];       // component's AC Huffman coding table selector
+		int m_spectral_start;                         // spectral selection start
+		int m_spectral_end;                           // spectral selection end
+		int m_successive_low;                         // successive approximation low
+		int m_successive_high;                        // successive approximation high
+		int m_max_mcu_x_size;                         // MCU's max. X size in pixels
+		int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
 		int m_blocks_per_mcu;
 		int m_max_blocks_per_row;
 		int m_mcus_per_row, m_mcus_per_col;
 		int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
-		int m_total_lines_left; // total # lines left in image
-		int m_mcu_lines_left;   // total # lines left in this MCU
+		int m_total_lines_left;                       // total # lines left in image
+		int m_mcu_lines_left;                         // total # lines left in this MCU
 		int m_num_buffered_scanlines;
 		int m_real_dest_bytes_per_scan_line;
-		int m_dest_bytes_per_scan_line; // rounded up
-		int m_dest_bytes_per_pixel;     // 4 (RGB) or 1 (Y)
+		int m_dest_bytes_per_scan_line;               // rounded up
+		int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
 		huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
 		coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
 		coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
@ -324,12 +275,7 @@ namespace jpgd
 		bool m_sample_buf_prev_valid;
 		bool m_has_sse2;

-		inline int check_sample_buf_ofs(int ofs) const
-		{
-			assert(ofs >= 0);
-			assert(ofs < m_max_blocks_per_row * 64);
-			return ofs;
-		}
+		inline int check_sample_buf_ofs(int ofs) const { assert(ofs >= 0); assert(ofs < m_max_blocks_per_row * 64); return ofs; }
 		void free_all_blocks();
 		JPGD_NORETURN void stop_decoding(jpgd_status status);
 		void* alloc(size_t n, bool zero = false);
--- a/pcsx2/USB/usb-eyetoy/jpgd/jpgd_idct.h
+++ b/pcsx2/USB/usb-eyetoy/jpgd/jpgd_idct.h
@ -24,26 +24,26 @@
 #include <immintrin.h>

 #ifdef _MSC_VER
-#define JPGD_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+	#define JPGD_SIMD_ALIGN(type, name) __declspec(align(16)) type name
 #else
-#define JPGD_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+	#define JPGD_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 #endif

 #define BITS_INV_ACC 4
 #define SHIFT_INV_ROW 16 - BITS_INV_ACC
 #define SHIFT_INV_COL 1 + BITS_INV_ACC
-const short IRND_INV_ROW = 1024 * (6 - BITS_INV_ACC); //1 << (SHIFT_INV_ROW-1)
-const short IRND_INV_COL = 16 * (BITS_INV_ACC - 3);   // 1 << (SHIFT_INV_COL-1)
-const short IRND_INV_CORR = IRND_INV_COL - 1;         // correction -1.0 and round
+const short IRND_INV_ROW = 1024 * (6 - BITS_INV_ACC);	//1 << (SHIFT_INV_ROW-1)
+const short IRND_INV_COL = 16 * (BITS_INV_ACC - 3);		// 1 << (SHIFT_INV_COL-1)
+const short IRND_INV_CORR = IRND_INV_COL - 1;			// correction -1.0 and round

 JPGD_SIMD_ALIGN(short, shortM128_one_corr[8]) = {1, 1, 1, 1, 1, 1, 1, 1};
 JPGD_SIMD_ALIGN(short, shortM128_round_inv_row[8]) = {IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0};
 JPGD_SIMD_ALIGN(short, shortM128_round_inv_col[8]) = {IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL};
-JPGD_SIMD_ALIGN(short, shortM128_round_inv_corr[8]) = {IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR};
-JPGD_SIMD_ALIGN(short, shortM128_tg_1_16[8]) = {13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036};          // tg * (2<<16) + 0.5
-JPGD_SIMD_ALIGN(short, shortM128_tg_2_16[8]) = {27146, 27146, 27146, 27146, 27146, 27146, 27146, 27146};          // tg * (2<<16) + 0.5
-JPGD_SIMD_ALIGN(short, shortM128_tg_3_16[8]) = {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746};  // tg * (2<<16) + 0.5
-JPGD_SIMD_ALIGN(short, shortM128_cos_4_16[8]) = {-19195, -19195, -19195, -19195, -19195, -19195, -19195, -19195}; // cos * (2<<16) + 0.5
+JPGD_SIMD_ALIGN(short, shortM128_round_inv_corr[8])= {IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR};
+JPGD_SIMD_ALIGN(short, shortM128_tg_1_16[8]) = {13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036}; // tg * (2<<16) + 0.5
+JPGD_SIMD_ALIGN(short, shortM128_tg_2_16[8]) = {27146, 27146, 27146, 27146, 27146, 27146, 27146, 27146}; // tg * (2<<16) + 0.5
+JPGD_SIMD_ALIGN(short, shortM128_tg_3_16[8]) = {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746}; // tg * (2<<16) + 0.5
+JPGD_SIMD_ALIGN(short, shortM128_cos_4_16[8]) = {-19195, -19195, -19195, -19195, -19195, -19195, -19195, -19195};// cos * (2<<16) + 0.5

 //-----------------------------------------------------------------------------
 // Table for rows 0,4 - constants are multiplied on cos_4_16
@ -56,22 +56,22 @@ JPGD_SIMD_ALIGN(short, shortM128_tab_i_04[]) = {
 	16384, -8867, 16384, -21407, // w13 w12 w09 w08
 	16384, 8867, -16384, -21407, // w07 w06 w03 w02
 	-16384, 21407, 16384, -8867, // w15 w14 w11 w10
-	22725, 19266, 19266, -4520,  // w21 w20 w17 w16
+	22725, 19266, 19266, -4520, // w21 w20 w17 w16
 	12873, -22725, 4520, -12873, // w29 w28 w25 w24
 	12873, 4520, -22725, -12873, // w23 w22 w19 w18
 	4520, 19266, 19266, -22725}; // w31 w30 w27 w26

-// Table for rows 1,7 - constants are multiplied on cos_1_16
+	// Table for rows 1,7 - constants are multiplied on cos_1_16
 //movq -> w05 w04 w01 w00
 JPGD_SIMD_ALIGN(short, shortM128_tab_i_17[]) = {
 	22725, 29692, 22725, 12299,
 	22725, -12299, 22725, -29692, // w13 w12 w09 w08
 	22725, 12299, -22725, -29692, // w07 w06 w03 w02
 	-22725, 29692, 22725, -12299, // w15 w14 w11 w10
-	31521, 26722, 26722, -6270,   // w21 w20 w17 w16
-	17855, -31521, 6270, -17855,  // w29 w28 w25 w24
-	17855, 6270, -31521, -17855,  // w23 w22 w19 w18
-	6270, 26722, 26722, -31521};  // w31 w30 w27 w26
+	31521, 26722, 26722, -6270, // w21 w20 w17 w16
+	17855, -31521, 6270, -17855, // w29 w28 w25 w24
+	17855, 6270, -31521, -17855, // w23 w22 w19 w18
+	6270, 26722, 26722, -31521}; // w31 w30 w27 w26

 // Table for rows 2,6 - constants are multiplied on cos_2_16
 //movq -> w05 w04 w01 w00
@ -80,10 +80,10 @@ JPGD_SIMD_ALIGN(short, shortM128_tab_i_26[]) = {
 	21407, -11585, 21407, -27969, // w13 w12 w09 w08
 	21407, 11585, -21407, -27969, // w07 w06 w03 w02
 	-21407, 27969, 21407, -11585, // w15 w14 w11 w10
-	29692, 25172, 25172, -5906,   // w21 w20 w17 w16
-	16819, -29692, 5906, -16819,  // w29 w28 w25 w24
-	16819, 5906, -29692, -16819,  // w23 w22 w19 w18
-	5906, 25172, 25172, -29692};  // w31 w30 w27 w26
+	29692, 25172, 25172, -5906,	// w21 w20 w17 w16
+	16819, -29692, 5906, -16819, // w29 w28 w25 w24
+	16819, 5906, -29692, -16819, // w23 w22 w19 w18
+	5906, 25172, 25172, -29692}; // w31 w30 w27 w26
 // Table for rows 3,5 - constants are multiplied on cos_3_16
 //movq -> w05 w04 w01 w00
 JPGD_SIMD_ALIGN(short, shortM128_tab_i_35[]) = {
@ -91,28 +91,28 @@ JPGD_SIMD_ALIGN(short, shortM128_tab_i_35[]) = {
 	19266, -10426, 19266, -25172, // w13 w12 w09 w08
 	19266, 10426, -19266, -25172, // w07 w06 w03 w02
 	-19266, 25172, 19266, -10426, // w15 w14 w11 w10
-	26722, 22654, 22654, -5315,   // w21 w20 w17 w16
-	15137, -26722, 5315, -15137,  // w29 w28 w25 w24
-	15137, 5315, -26722, -15137,  // w23 w22 w19 w18
-	5315, 22654, 22654, -26722};  // w31 w30 w27 w26
+	26722, 22654, 22654, -5315, // w21 w20 w17 w16
+	15137, -26722, 5315, -15137, // w29 w28 w25 w24
+	15137, 5315, -26722, -15137, // w23 w22 w19 w18
+	5315, 22654, 22654, -26722}; // w31 w30 w27 w26

-JPGD_SIMD_ALIGN(short, shortM128_128[8]) = {128, 128, 128, 128, 128, 128, 128, 128};
+JPGD_SIMD_ALIGN(short, shortM128_128[8]) = { 128, 128, 128, 128, 128, 128, 128, 128 };

-void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
+void idctSSEShortU8(const short *pInput, uint8_t * pOutputUB)
 {
 	__m128i r_xmm0, r_xmm4;
 	__m128i r_xmm1, r_xmm2, r_xmm3, r_xmm5, r_xmm6, r_xmm7;
 	__m128i row0, row1, row2, row3, row4, row5, row6, row7;
-	short* pTab_i_04 = shortM128_tab_i_04;
-	short* pTab_i_26 = shortM128_tab_i_26;
+	short * pTab_i_04 = shortM128_tab_i_04;
+	short * pTab_i_26 = shortM128_tab_i_26;

 	//Get pointers for this input and output
 	pTab_i_04 = shortM128_tab_i_04;
 	pTab_i_26 = shortM128_tab_i_26;

 	//Row 1 and Row 3
-	r_xmm0 = _mm_load_si128((__m128i*)pInput);
-	r_xmm4 = _mm_load_si128((__m128i*)(&pInput[2 * 8]));
+	r_xmm0 = _mm_load_si128((__m128i *) pInput);
+	r_xmm4 = _mm_load_si128((__m128i *) (&pInput[2*8]));

 	// *** Work on the data in xmm0
 	//low shuffle mask = 0xd8 = 11 01 10 00
@ -121,58 +121,58 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)

 	// copy short 2 and short 0 to all locations
 	r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
-
+		
 	// add to those copies
-	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i*)pTab_i_04));
+	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));

 	// shuffle mask = 0x55 = 01 01 01 01
 	// copy short 3 and short 1 to all locations
 	r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
-
+		
 	// high shuffle mask = 0xd8 = 11 01 10 00
 	// get short 6 and short 4 into bit positions 64-95
 	// get short 7 and short 5 into bit positions 96-127
 	r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
-
+		
 	// add to short 3 and short 1
-	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i*)&pTab_i_04[16]));
-
+	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
+		
 	// shuffle mask = 0xaa = 10 10 10 10
 	// copy short 6 and short 4 to all locations
 	r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
-
+		
 	// shuffle mask = 0xaa = 11 11 11 11
 	// copy short 7 and short 5 to all locations
 	r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
-
+		
 	// add to short 6 and short 4
-	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i*)&pTab_i_04[8]));
-
+	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); 
+		
 	// *** Work on the data in xmm4
 	// high shuffle mask = 0xd8 11 01 10 00
 	// get short 6 and short 4 into bit positions 64-95
 	// get short 7 and short 5 into bit positions 96-127
 	r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
-
+		
 	// (xmm0 short 2 and short 0 plus pSi) + some constants
-	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i*)shortM128_round_inv_row));
+	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
 	r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
-	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i*)&pTab_i_04[24]));
+	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
 	r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
 	r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
-	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i*)&shortM128_tab_i_26[0]));
+	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0]));
 	r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
 	r_xmm2 = r_xmm1;
 	r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
-	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i*)&shortM128_tab_i_26[8]));
+	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8])); 
 	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
 	r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
 	r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
-	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i*)&shortM128_tab_i_26[16]));
+	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16])); 
 	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
 	r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
-	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i*)shortM128_round_inv_row));
-	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i*)&shortM128_tab_i_26[24]));
+	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
+	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24]));
 	r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
 	r_xmm6 = r_xmm5;
 	r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
@ -187,37 +187,37 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	row2 = _mm_packs_epi32(r_xmm4, r_xmm6);

 	//Row 5 and row 7
-	r_xmm0 = _mm_load_si128((__m128i*)(&pInput[4 * 8]));
-	r_xmm4 = _mm_load_si128((__m128i*)(&pInput[6 * 8]));
+	r_xmm0 = _mm_load_si128((__m128i *) (&pInput[4*8]));
+	r_xmm4 = _mm_load_si128((__m128i *) (&pInput[6*8]));

 	r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
 	r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
-	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i*)pTab_i_04));
+	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
 	r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
 	r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
-	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i*)&pTab_i_04[16]));
+	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
 	r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
 	r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
-	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i*)&pTab_i_04[8]));
+	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); 
 	r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
-	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i*)shortM128_round_inv_row));
+	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
 	r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
-	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i*)&pTab_i_04[24]));
+	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
 	r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
 	r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
-	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i*)&shortM128_tab_i_26[0]));
+	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0]));
 	r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
 	r_xmm2 = r_xmm1;
 	r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
-	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i*)&shortM128_tab_i_26[8]));
+	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8])); 
 	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
 	r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
 	r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
-	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i*)&shortM128_tab_i_26[16]));
+	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16])); 
 	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
 	r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
-	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i*)shortM128_round_inv_row));
-	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i*)&shortM128_tab_i_26[24]));
+	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
+	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24]));
 	r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
 	r_xmm6 = r_xmm5;
 	r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
@ -234,37 +234,37 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	//Row 4 and row 2
 	pTab_i_04 = shortM128_tab_i_35;
 	pTab_i_26 = shortM128_tab_i_17;
-	r_xmm0 = _mm_load_si128((__m128i*)(&pInput[3 * 8]));
-	r_xmm4 = _mm_load_si128((__m128i*)(&pInput[1 * 8]));
+	r_xmm0 = _mm_load_si128((__m128i *) (&pInput[3*8]));
+	r_xmm4 = _mm_load_si128((__m128i *) (&pInput[1*8]));

 	r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
 	r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
-	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i*)pTab_i_04));
+	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
 	r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
 	r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
-	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i*)&pTab_i_04[16]));
+	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
 	r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
 	r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
-	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i*)&pTab_i_04[8]));
+	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); 
 	r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
-	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i*)shortM128_round_inv_row));
+	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
 	r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
-	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i*)&pTab_i_04[24]));
+	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
 	r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
 	r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
-	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i*)&pTab_i_26[0]));
+	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0]));
 	r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
 	r_xmm2 = r_xmm1;
 	r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
-	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i*)&pTab_i_26[8]));
+	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8])); 
 	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
 	r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
 	r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
-	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i*)&pTab_i_26[16]));
+	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16])); 
 	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
 	r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
-	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i*)shortM128_round_inv_row));
-	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i*)&pTab_i_26[24]));
+	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
+	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24]));
 	r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
 	r_xmm6 = r_xmm5;
 	r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
@ -279,37 +279,37 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	row1 = _mm_packs_epi32(r_xmm4, r_xmm6);

 	//Row 6 and row 8
-	r_xmm0 = _mm_load_si128((__m128i*)(&pInput[5 * 8]));
-	r_xmm4 = _mm_load_si128((__m128i*)(&pInput[7 * 8]));
+	r_xmm0 = _mm_load_si128((__m128i *) (&pInput[5*8]));
+	r_xmm4 = _mm_load_si128((__m128i *) (&pInput[7*8]));

 	r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
 	r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
-	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i*)pTab_i_04));
+	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
 	r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
 	r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
-	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i*)&pTab_i_04[16]));
+	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
 	r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
 	r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
-	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i*)&pTab_i_04[8]));
+	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); 
 	r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
-	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i*)shortM128_round_inv_row));
+	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
 	r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
-	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i*)&pTab_i_04[24]));
+	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
 	r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
 	r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
-	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i*)&pTab_i_26[0]));
+	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0]));
 	r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
 	r_xmm2 = r_xmm1;
 	r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
-	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i*)&pTab_i_26[8]));
+	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8])); 
 	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
 	r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
 	r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
-	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i*)&pTab_i_26[16]));
+	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16])); 
 	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
 	r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
-	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i*)shortM128_round_inv_row));
-	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i*)&pTab_i_26[24]));
+	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
+	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24]));
 	r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
 	r_xmm6 = r_xmm5;
 	r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
@ -323,13 +323,13 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
 	row7 = _mm_packs_epi32(r_xmm4, r_xmm6);

-	r_xmm1 = _mm_load_si128((__m128i*)shortM128_tg_3_16);
+	r_xmm1 = _mm_load_si128((__m128i *) shortM128_tg_3_16);
 	r_xmm2 = row5;
 	r_xmm3 = row3;
 	r_xmm0 = _mm_mulhi_epi16(row5, r_xmm1);

 	r_xmm1 = _mm_mulhi_epi16(r_xmm1, r_xmm3);
-	r_xmm5 = _mm_load_si128((__m128i*)shortM128_tg_1_16);
+	r_xmm5 = _mm_load_si128((__m128i *) shortM128_tg_1_16);
 	r_xmm6 = row7;
 	r_xmm4 = _mm_mulhi_epi16(row7, r_xmm5);

@ -339,7 +339,7 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	r_xmm7 = row6;

 	r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm3);
-	r_xmm3 = _mm_load_si128((__m128i*)shortM128_tg_2_16);
+	r_xmm3 = _mm_load_si128((__m128i *) shortM128_tg_2_16);
 	r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm1);
 	r_xmm7 = _mm_mulhi_epi16(r_xmm7, r_xmm3);
 	r_xmm1 = r_xmm0;
@ -347,11 +347,11 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm6);
 	r_xmm4 = _mm_adds_epi16(r_xmm4, row1);
 	r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm4);
-	r_xmm0 = _mm_adds_epi16(r_xmm0, *((__m128i*)shortM128_one_corr));
+	r_xmm0 = _mm_adds_epi16(r_xmm0, *((__m128i *) shortM128_one_corr));
 	r_xmm4 = _mm_subs_epi16(r_xmm4, r_xmm1);
 	r_xmm6 = r_xmm5;
 	r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm2);
-	r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i*)shortM128_one_corr));
+	r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_one_corr));
 	r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2);

 	//Intermediate results, needed later
@ -359,9 +359,9 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	temp7 = r_xmm0;

 	r_xmm1 = r_xmm4;
-	r_xmm0 = _mm_load_si128((__m128i*)shortM128_cos_4_16);
+	r_xmm0 = _mm_load_si128((__m128i *) shortM128_cos_4_16);
 	r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm5);
-	r_xmm2 = _mm_load_si128((__m128i*)shortM128_cos_4_16);
+	r_xmm2 = _mm_load_si128((__m128i *) shortM128_cos_4_16);
 	r_xmm2 = _mm_mulhi_epi16(r_xmm2, r_xmm4);

 	//Intermediate results, needed later
@ -377,24 +377,24 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	r_xmm6 = _mm_subs_epi16(r_xmm6, row4);
 	r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm2);

-	r_xmm4 = _mm_or_si128(r_xmm4, *((__m128i*)shortM128_one_corr));
+	r_xmm4 = _mm_or_si128(r_xmm4, *((__m128i *) shortM128_one_corr));
 	r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm1);
-	r_xmm0 = _mm_or_si128(r_xmm0, *((__m128i*)shortM128_one_corr));
+	r_xmm0 = _mm_or_si128(r_xmm0, *((__m128i *) shortM128_one_corr));

 	r_xmm2 = r_xmm5;
 	r_xmm5 = _mm_adds_epi16(r_xmm5, r_xmm7);
 	r_xmm1 = r_xmm6;
-	r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i*)shortM128_round_inv_col));
+	r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_round_inv_col));
 	r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm7);
 	r_xmm7 = temp7;
 	r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm3);
-	r_xmm6 = _mm_adds_epi16(r_xmm6, *((__m128i*)shortM128_round_inv_col));
+	r_xmm6 = _mm_adds_epi16(r_xmm6, *((__m128i *) shortM128_round_inv_col));
 	r_xmm7 = _mm_adds_epi16(r_xmm7, r_xmm5);
 	r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL);
 	r_xmm1 = _mm_subs_epi16(r_xmm1, r_xmm3);
-	r_xmm1 = _mm_adds_epi16(r_xmm1, *((__m128i*)shortM128_round_inv_corr));
+	r_xmm1 = _mm_adds_epi16(r_xmm1, *((__m128i *) shortM128_round_inv_corr));
 	r_xmm3 = r_xmm6;
-	r_xmm2 = _mm_adds_epi16(r_xmm2, *((__m128i*)shortM128_round_inv_corr));
+	r_xmm2 = _mm_adds_epi16(r_xmm2, *((__m128i *) shortM128_round_inv_corr));
 	r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm4);

 	//Store results for row 0
@ -406,7 +406,7 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	r_xmm1 = _mm_adds_epi16(r_xmm1, r_xmm0);

 	//Store results for row 1
-	//_mm_store_si128((__m128i *) (&pOutput[1*8]), r_xmm6);
+	//_mm_store_si128((__m128i *) (&pOutput[1*8]), r_xmm6); 
 	__m128i r1 = r_xmm6;

 	r_xmm1 = _mm_srai_epi16(r_xmm1, SHIFT_INV_COL);
@ -415,24 +415,24 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL);

 	//Store results for row 2
-	//_mm_store_si128((__m128i *) (&pOutput[2*8]), r_xmm1);
+	//_mm_store_si128((__m128i *) (&pOutput[2*8]), r_xmm1); 
 	__m128i r2 = r_xmm1;

-	r_xmm5 = _mm_subs_epi16(r_xmm5, temp7);
+	r_xmm5 = _mm_subs_epi16(r_xmm5, temp7); 
 	r_xmm5 = _mm_srai_epi16(r_xmm5, SHIFT_INV_COL);

 	//Store results for row 7
-	//_mm_store_si128((__m128i *) (&pOutput[7*8]), r_xmm5);
+	//_mm_store_si128((__m128i *) (&pOutput[7*8]), r_xmm5); 
 	__m128i r7 = r_xmm5;

 	r_xmm3 = _mm_subs_epi16(r_xmm3, r_xmm4);
 	r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2);
-	r_xmm2 = _mm_subs_epi16(r_xmm2, temp3);
+	r_xmm2 = _mm_subs_epi16(r_xmm2, temp3); 
 	r_xmm6 = _mm_srai_epi16(r_xmm6, SHIFT_INV_COL);
 	r_xmm2 = _mm_srai_epi16(r_xmm2, SHIFT_INV_COL);

 	//Store results for row 3
-	//_mm_store_si128((__m128i *) (&pOutput[3*8]), r_xmm6);
+	//_mm_store_si128((__m128i *) (&pOutput[3*8]), r_xmm6); 
 	__m128i r3 = r_xmm6;

 	r_xmm3 = _mm_srai_epi16(r_xmm3, SHIFT_INV_COL);
@ -446,17 +446,17 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB)
 	__m128i r5 = r_xmm7;
 	__m128i r6 = r_xmm3;

-	r0 = _mm_add_epi16(*(const __m128i*)shortM128_128, r0);
-	r1 = _mm_add_epi16(*(const __m128i*)shortM128_128, r1);
-	r2 = _mm_add_epi16(*(const __m128i*)shortM128_128, r2);
-	r3 = _mm_add_epi16(*(const __m128i*)shortM128_128, r3);
-	r4 = _mm_add_epi16(*(const __m128i*)shortM128_128, r4);
-	r5 = _mm_add_epi16(*(const __m128i*)shortM128_128, r5);
-	r6 = _mm_add_epi16(*(const __m128i*)shortM128_128, r6);
-	r7 = _mm_add_epi16(*(const __m128i*)shortM128_128, r7);
+	r0 = _mm_add_epi16(*(const __m128i *)shortM128_128, r0);
+	r1 = _mm_add_epi16(*(const __m128i *)shortM128_128, r1);
+	r2 = _mm_add_epi16(*(const __m128i *)shortM128_128, r2);
+	r3 = _mm_add_epi16(*(const __m128i *)shortM128_128, r3);
+	r4 = _mm_add_epi16(*(const __m128i *)shortM128_128, r4);
+	r5 = _mm_add_epi16(*(const __m128i *)shortM128_128, r5);
+	r6 = _mm_add_epi16(*(const __m128i *)shortM128_128, r6);
+	r7 = _mm_add_epi16(*(const __m128i *)shortM128_128, r7);

-	((__m128i*)pOutputUB)[0] = _mm_packus_epi16(r0, r1);
-	((__m128i*)pOutputUB)[1] = _mm_packus_epi16(r2, r3);
-	((__m128i*)pOutputUB)[2] = _mm_packus_epi16(r4, r5);
-	((__m128i*)pOutputUB)[3] = _mm_packus_epi16(r6, r7);
+	((__m128i *)pOutputUB)[0] = _mm_packus_epi16(r0, r1);
+	((__m128i *)pOutputUB)[1] = _mm_packus_epi16(r2, r3);
+	((__m128i *)pOutputUB)[2] = _mm_packus_epi16(r4, r5);
+	((__m128i *)pOutputUB)[3] = _mm_packus_epi16(r6, r7);
 }