diff --git a/pcsx2/USB/usb-eyetoy/jo_mpeg.cpp b/pcsx2/USB/usb-eyetoy/jo_mpeg.cpp index bec0394696..078f84cf84 100644 --- a/pcsx2/USB/usb-eyetoy/jo_mpeg.cpp +++ b/pcsx2/USB/usb-eyetoy/jo_mpeg.cpp @@ -35,163 +35,38 @@ #include "jo_mpeg.h" // Huffman tables -static const unsigned char s_jo_HTDC_Y[9][2] = {{4, 3}, {0, 2}, {1, 2}, {5, 3}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {126, 7}}; -static const unsigned char s_jo_HTDC_C[9][2] = {{0, 2}, {1, 2}, {2, 2}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {126, 7}, {254, 8}}; +static const unsigned char s_jo_HTDC_Y[9][2] = {{4,3}, {0,2}, {1,2}, {5,3}, {6,3}, {14,4}, {30,5}, {62,6}, {126,7}}; +static const unsigned char s_jo_HTDC_C[9][2] = {{0,2}, {1,2}, {2,2}, {6,3}, {14,4}, {30,5}, {62,6}, {126,7}, {254,8}}; static const unsigned char s_jo_HTAC[32][40][2] = { - { - {6, 3}, - {8, 5}, - {10, 6}, - {12, 8}, - {76, 9}, - {66, 9}, - {20, 11}, - {58, 13}, - {48, 13}, - {38, 13}, - {32, 13}, - {52, 14}, - {50, 14}, - {48, 14}, - {46, 14}, - {62, 15}, - {62, 15}, - {58, 15}, - {56, 15}, - {54, 15}, - {52, 15}, - {50, 15}, - {48, 15}, - {46, 15}, - {44, 15}, - {42, 15}, - {40, 15}, - {38, 15}, - {36, 15}, - {34, 15}, - {32, 15}, - {48, 16}, - {46, 16}, - {44, 16}, - {42, 16}, - {40, 16}, - {38, 16}, - {36, 16}, - {34, 16}, - {32, 16}, - }, - {{6, 4}, {12, 7}, {74, 9}, {24, 11}, {54, 13}, {44, 14}, {42, 14}, {62, 16}, {60, 16}, {58, 16}, {56, 16}, {54, 16}, {52, 16}, {50, 16}, {38, 17}, {36, 17}, {34, 17}, {32, 17}}, - {{10, 5}, {8, 8}, {22, 11}, {40, 13}, {40, 14}}, - {{14, 6}, {72, 9}, {56, 13}, {38, 14}}, - {{12, 6}, {30, 11}, {36, 13}}, - {{14, 7}, {18, 11}, {36, 14}}, - {{10, 7}, {60, 13}, {40, 17}}, - {{8, 7}, {42, 13}}, - {{14, 8}, {34, 13}}, - {{10, 8}, {34, 14}}, - {{78, 9}, {32, 14}}, - {{70, 9}, {52, 17}}, - {{68, 9}, {50, 17}}, - {{64, 9}, {48, 17}}, - {{28, 11}, {46, 17}}, - {{26, 11}, {44, 17}}, - {{16, 11}, {42, 17}}, - {{62, 13}}, - {{52, 13}}, - {{50, 13}}, - {{46, 13}}, - {{44, 13}}, - {{62, 14}}, - {{60, 14}}, - {{58, 14}}, - {{56, 14}}, - {{54, 14}}, - {{62, 17}}, - {{60, 17}}, - {{58, 17}}, - {{56, 17}}, - {{54, 17}}, +{{6,3},{8,5},{10,6},{12,8},{76,9},{66,9},{20,11},{58,13},{48,13},{38,13},{32,13},{52,14},{50,14},{48,14},{46,14},{62,15},{62,15},{58,15},{56,15},{54,15},{52,15},{50,15},{48,15},{46,15},{44,15},{42,15},{40,15},{38,15},{36,15},{34,15},{32,15},{48,16},{46,16},{44,16},{42,16},{40,16},{38,16},{36,16},{34,16},{32,16},}, +{{6,4},{12,7},{74,9},{24,11},{54,13},{44,14},{42,14},{62,16},{60,16},{58,16},{56,16},{54,16},{52,16},{50,16},{38,17},{36,17},{34,17},{32,17}}, +{{10,5},{8,8},{22,11},{40,13},{40,14}}, +{{14,6},{72,9},{56,13},{38,14}}, +{{12,6},{30,11},{36,13}}, {{14,7},{18,11},{36,14}}, {{10,7},{60,13},{40,17}}, +{{8,7},{42,13}}, {{14,8},{34,13}}, {{10,8},{34,14}}, {{78,9},{32,14}}, {{70,9},{52,17}}, {{68,9},{50,17}}, {{64,9},{48,17}}, {{28,11},{46,17}}, {{26,11},{44,17}}, {{16,11},{42,17}}, +{{62,13}}, {{52,13}}, {{50,13}}, {{46,13}}, {{44,13}}, {{62,14}}, {{60,14}}, {{58,14}}, {{56,14}}, {{54,14}}, {{62,17}}, {{60,17}}, {{58,17}}, {{56,17}}, {{54,17}}, }; static const float s_jo_quantTbl[64] = { - 0.015625f, - 0.005632f, - 0.005035f, - 0.004832f, - 0.004808f, - 0.005892f, - 0.007964f, - 0.013325f, - 0.005632f, - 0.004061f, - 0.003135f, - 0.003193f, - 0.003338f, - 0.003955f, - 0.004898f, - 0.008828f, - 0.005035f, - 0.003135f, - 0.002816f, - 0.003013f, - 0.003299f, - 0.003581f, - 0.005199f, - 0.009125f, - 0.004832f, - 0.003484f, - 0.003129f, - 0.003348f, - 0.003666f, - 0.003979f, - 0.005309f, - 0.009632f, - 0.005682f, - 0.003466f, - 0.003543f, - 0.003666f, - 0.003906f, - 0.004546f, - 0.005774f, - 0.009439f, - 0.006119f, - 0.004248f, - 0.004199f, - 0.004228f, - 0.004546f, - 0.005062f, - 0.006124f, - 0.009942f, - 0.008883f, - 0.006167f, - 0.006096f, - 0.005777f, - 0.006078f, - 0.006391f, - 0.007621f, - 0.012133f, - 0.016780f, - 0.011263f, - 0.009907f, - 0.010139f, - 0.009849f, - 0.010297f, - 0.012133f, - 0.019785f, + 0.015625f,0.005632f,0.005035f,0.004832f,0.004808f,0.005892f,0.007964f,0.013325f, + 0.005632f,0.004061f,0.003135f,0.003193f,0.003338f,0.003955f,0.004898f,0.008828f, + 0.005035f,0.003135f,0.002816f,0.003013f,0.003299f,0.003581f,0.005199f,0.009125f, + 0.004832f,0.003484f,0.003129f,0.003348f,0.003666f,0.003979f,0.005309f,0.009632f, + 0.005682f,0.003466f,0.003543f,0.003666f,0.003906f,0.004546f,0.005774f,0.009439f, + 0.006119f,0.004248f,0.004199f,0.004228f,0.004546f,0.005062f,0.006124f,0.009942f, + 0.008883f,0.006167f,0.006096f,0.005777f,0.006078f,0.006391f,0.007621f,0.012133f, + 0.016780f,0.011263f,0.009907f,0.010139f,0.009849f,0.010297f,0.012133f,0.019785f, }; -static const unsigned char s_jo_ZigZag[] = {0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53, 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63}; +static const unsigned char s_jo_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18,24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; -typedef struct -{ - unsigned char* buf_ptr; +typedef struct { + unsigned char *buf_ptr; int buf, cnt; } jo_bits_t; -static void jo_writeBits(jo_bits_t* b, int value, int count) -{ +static void jo_writeBits(jo_bits_t *b, int value, int count) { b->cnt += count; b->buf |= value << (24 - b->cnt); - while (b->cnt >= 8) - { + while(b->cnt >= 8) { unsigned char c = (b->buf >> 16) & 255; //putc(c, b->fp); *(b->buf_ptr) = c & 0xff; @@ -201,8 +76,7 @@ static void jo_writeBits(jo_bits_t* b, int value, int count) } } -static void jo_DCT(float* d0, float* d1, float* d2, float* d3, float* d4, float* d5, float* d6, float* d7) -{ +static void jo_DCT(float *d0, float *d1, float *d2, float *d3, float *d4, float *d5, float *d6, float *d7) { float tmp0 = *d0 + *d7; float tmp7 = *d0 - *d7; float tmp1 = *d1 + *d6; @@ -213,52 +87,48 @@ static void jo_DCT(float* d0, float* d1, float* d2, float* d3, float* d4, float* float tmp4 = *d3 - *d4; // Even part - float tmp10 = tmp0 + tmp3; // phase 2 + float tmp10 = tmp0 + tmp3; // phase 2 float tmp13 = tmp0 - tmp3; float tmp11 = tmp1 + tmp2; float tmp12 = tmp1 - tmp2; - *d0 = tmp10 + tmp11; // phase 3 + *d0 = tmp10 + tmp11; // phase 3 *d4 = tmp10 - tmp11; float z1 = (tmp12 + tmp13) * 0.707106781f; // c4 - *d2 = tmp13 + z1; // phase 5 + *d2 = tmp13 + z1; // phase 5 *d6 = tmp13 - z1; // Odd part - tmp10 = tmp4 + tmp5; // phase 2 + tmp10 = tmp4 + tmp5; // phase 2 tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; // The rotator is modified from fig 4-8 to avoid extra negations. float z5 = (tmp10 - tmp12) * 0.382683433f; // c6 - float z2 = tmp10 * 0.541196100f + z5; // c2-c6 - float z4 = tmp12 * 1.306562965f + z5; // c2+c6 - float z3 = tmp11 * 0.707106781f; // c4 + float z2 = tmp10 * 0.541196100f + z5; // c2-c6 + float z4 = tmp12 * 1.306562965f + z5; // c2+c6 + float z3 = tmp11 * 0.707106781f; // c4 - float z11 = tmp7 + z3; // phase 5 + float z11 = tmp7 + z3; // phase 5 float z13 = tmp7 - z3; - *d5 = z13 + z2; // phase 6 + *d5 = z13 + z2; // phase 6 *d3 = z13 - z2; *d1 = z11 + z4; *d7 = z11 - z4; } -static int jo_processDU(jo_bits_t* bits, float A[64], const unsigned char htdc[9][2], int DC) -{ - for (int dataOff = 0; dataOff < 64; dataOff += 8) - { - jo_DCT(&A[dataOff], &A[dataOff + 1], &A[dataOff + 2], &A[dataOff + 3], &A[dataOff + 4], &A[dataOff + 5], &A[dataOff + 6], &A[dataOff + 7]); +static int jo_processDU(jo_bits_t *bits, float A[64], const unsigned char htdc[9][2], int DC) { + for(int dataOff=0; dataOff<64; dataOff+=8) { + jo_DCT(&A[dataOff], &A[dataOff+1], &A[dataOff+2], &A[dataOff+3], &A[dataOff+4], &A[dataOff+5], &A[dataOff+6], &A[dataOff+7]); } - for (int dataOff = 0; dataOff < 8; ++dataOff) - { - jo_DCT(&A[dataOff], &A[dataOff + 8], &A[dataOff + 16], &A[dataOff + 24], &A[dataOff + 32], &A[dataOff + 40], &A[dataOff + 48], &A[dataOff + 56]); + for(int dataOff=0; dataOff<8; ++dataOff) { + jo_DCT(&A[dataOff], &A[dataOff+8], &A[dataOff+16], &A[dataOff+24], &A[dataOff+32], &A[dataOff+40], &A[dataOff+48], &A[dataOff+56]); } int Q[64]; - for (int i = 0; i < 64; ++i) - { - float v = A[i] * s_jo_quantTbl[i]; + for(int i=0; i<64; ++i) { + float v = A[i]*s_jo_quantTbl[i]; Q[s_jo_ZigZag[i]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f)); } @@ -266,48 +136,36 @@ static int jo_processDU(jo_bits_t* bits, float A[64], const unsigned char htdc[9 int aDC = DC < 0 ? -DC : DC; int size = 0; int tempval = aDC; - while (tempval) - { + while(tempval) { size++; tempval >>= 1; } jo_writeBits(bits, htdc[size][0], htdc[size][1]); - if (DC < 0) - aDC ^= (1 << size) - 1; + if(DC < 0) aDC ^= (1 << size) - 1; jo_writeBits(bits, aDC, size); int endpos = 63; - for (; (endpos > 0) && (Q[endpos] == 0); --endpos) - { /* do nothing */ - } - for (int i = 1; i <= endpos;) - { + for(; (endpos>0)&&(Q[endpos]==0); --endpos) { /* do nothing */ } + for(int i = 1; i <= endpos;) { int run = 0; - while (Q[i] == 0 && i < endpos) - { + while (Q[i]==0 && i 127) - { + } else if(AC > 127) { jo_writeBits(bits, 0, 12); } code = AC & 0xFFF; @@ -320,23 +178,17 @@ static int jo_processDU(jo_bits_t* bits, float A[64], const unsigned char htdc[9 return Q[0]; } -unsigned long jo_write_mpeg(unsigned char* mpeg_buf, const unsigned char* raw, int width, int height, int format, int flipx, int flipy) -{ +unsigned long jo_write_mpeg(unsigned char *mpeg_buf, const unsigned char *raw, int width, int height, int format, int flipx, int flipy) { int lastDCY = 128, lastDCCR = 128, lastDCCB = 128; - unsigned char* head = mpeg_buf; + unsigned char *head = mpeg_buf; jo_bits_t bits = {mpeg_buf}; - for (int vblock = 0; vblock < (height + 15) / 16; vblock++) - { - for (int hblock = 0; hblock < (width + 15) / 16; hblock++) - { - if (vblock == 0 && hblock == 0) - { + for (int vblock = 0; vblock < (height+15)/16; vblock++) { + for (int hblock = 0; hblock < (width+15)/16; hblock++) { + if (vblock == 0 && hblock == 0) { jo_writeBits(&bits, 0b01, 2); // macroblock_type = intra+quant - jo_writeBits(&bits, 8, 5); // quantiser_scale_code = 8 - } - else - { + jo_writeBits(&bits, 8, 5); // quantiser_scale_code = 8 + } else { jo_writeBits(&bits, 0b1, 1); // macroblock_address_increment jo_writeBits(&bits, 0b1, 1); // macroblock_type = intra } @@ -344,113 +196,87 @@ unsigned long jo_write_mpeg(unsigned char* mpeg_buf, const unsigned char* raw, i float Y[256], CBx[256], CRx[256]; float CB[64], CR[64]; - if (format == JO_RGBX) - { - for (int i = 0; i < 256; ++i) - { - int y = vblock * 16 + (i / 16); - int x = hblock * 16 + (i & 15); - x = x >= width ? width - 1 : x; - y = y >= height ? height - 1 : y; - if (flipx) - x = width - 1 - x; - if (flipy) - y = height - 1 - y; - const unsigned char* c = raw + y * width * 4 + x * 4; + if (format == JO_RGBX) { + for (int i=0; i<256; ++i) { + int y = vblock*16+(i/16); + int x = hblock*16+(i&15); + x = x >= width ? width-1 : x; + y = y >= height ? height-1 : y; + if (flipx) x = width - 1 - x; + if (flipy) y = height - 1 - y; + const unsigned char *c = raw + y*width*4+x*4; float r, g, b; - if (flipx && flipy) - { + if (flipx && flipy) { r = c[2], g = c[1], b = c[0]; - } - else - { + } else { r = c[0], g = c[1], b = c[2]; } - Y[i] = (0.299f * r + 0.587f * g + 0.114f * b) * (219.f / 255) + 16; - CBx[i] = (-0.299f * r - 0.587f * g + 0.886f * b) * (224.f / 255) + 128; - CRx[i] = (0.701f * r - 0.587f * g - 0.114f * b) * (224.f / 255) + 128; + Y[i] = (0.299f*r + 0.587f*g + 0.114f*b) * (219.f/255) + 16; + CBx[i] = (-0.299f*r - 0.587f*g + 0.886f*b) * (224.f/255) + 128; + CRx[i] = (0.701f*r - 0.587f*g - 0.114f*b) * (224.f/255) + 128; } // Downsample Cb,Cr (420 format) - for (int i = 0; i < 64; ++i) - { - int j = (i & 7) * 2 + (i & 56) * 4; - CB[i] = (CBx[j] + CBx[j + 1] + CBx[j + 16] + CBx[j + 17]) * 0.25f; - CR[i] = (CRx[j] + CRx[j + 1] + CRx[j + 16] + CRx[j + 17]) * 0.25f; + for (int i=0; i<64; ++i) { + int j =(i&7)*2 + (i&56)*4; + CB[i] = (CBx[j] + CBx[j+1] + CBx[j+16] + CBx[j+17]) * 0.25f; + CR[i] = (CRx[j] + CRx[j+1] + CRx[j+16] + CRx[j+17]) * 0.25f; } - } - else if (format == JO_RGB24) - { - for (int i = 0; i < 256; ++i) - { - int y = vblock * 16 + (i / 16); - int x = hblock * 16 + (i & 15); - x = x >= width ? width - 1 : x; - y = y >= height ? height - 1 : y; - if (flipx) - x = width - 1 - x; - if (flipy) - y = height - 1 - y; - const unsigned char* c = raw + y * width * 3 + x * 3; + } else + if (format == JO_RGB24) { + for (int i=0; i<256; ++i) { + int y = vblock*16+(i/16); + int x = hblock*16+(i&15); + x = x >= width ? width-1 : x; + y = y >= height ? height-1 : y; + if (flipx) x = width - 1 - x; + if (flipy) y = height - 1 - y; + const unsigned char *c = raw + y*width*3+x*3; float r, g, b; - if (flipx && flipy) - { + if (flipx && flipy) { r = c[2], g = c[1], b = c[0]; - } - else - { + } else { r = c[0], g = c[1], b = c[2]; } - Y[i] = (0.299f * r + 0.587f * g + 0.114f * b) * (219.f / 255) + 16; - CBx[i] = (-0.299f * r - 0.587f * g + 0.886f * b) * (224.f / 255) + 128; - CRx[i] = (0.701f * r - 0.587f * g - 0.114f * b) * (224.f / 255) + 128; + Y[i] = (0.299f*r + 0.587f*g + 0.114f*b) * (219.f/255) + 16; + CBx[i] = (-0.299f*r - 0.587f*g + 0.886f*b) * (224.f/255) + 128; + CRx[i] = (0.701f*r - 0.587f*g - 0.114f*b) * (224.f/255) + 128; } // Downsample Cb,Cr (420 format) - for (int i = 0; i < 64; ++i) - { - int j = (i & 7) * 2 + (i & 56) * 4; - CB[i] = (CBx[j] + CBx[j + 1] + CBx[j + 16] + CBx[j + 17]) * 0.25f; - CR[i] = (CRx[j] + CRx[j + 1] + CRx[j + 16] + CRx[j + 17]) * 0.25f; + for (int i=0; i<64; ++i) { + int j =(i&7)*2 + (i&56)*4; + CB[i] = (CBx[j] + CBx[j+1] + CBx[j+16] + CBx[j+17]) * 0.25f; + CR[i] = (CRx[j] + CRx[j+1] + CRx[j+16] + CRx[j+17]) * 0.25f; } - } - else if (format == JO_YUYV) - { - for (int i = 0; i < 256; i += 2) - { - int y = vblock * 16 + (i / 16); - int x = hblock * 16 + (i & 15); - x = x >= width ? width - 1 : x; - y = y >= height ? height - 1 : y; - if (flipx) - x = width - 1 - x; - if (flipy) - y = height - 1 - y; - const unsigned char* c = raw + y * width * 2 + x * 2 - 2; - if (flipx) - { - Y[i + 1] = c[0]; - CB[i / 4] = c[1]; - Y[i] = c[2]; - CR[i / 4] = c[3]; - } - else - { - Y[i] = c[2]; - CB[i / 4] = c[3]; - Y[i + 1] = c[4]; - CR[i / 4] = c[5]; + } else + if (format == JO_YUYV) { + for (int i=0; i<256; i+=2) { + int y = vblock*16+(i/16); + int x = hblock*16+(i&15); + x = x >= width ? width-1 : x; + y = y >= height ? height-1 : y; + if (flipx) x = width - 1 - x; + if (flipy) y = height - 1 - y; + const unsigned char *c = raw + y*width*2+x*2-2; + if (flipx) { + Y[i+1] = c[0]; + CB[i/4] = c[1]; + Y[i] = c[2]; + CR[i/4] = c[3]; + } else { + Y[i] = c[2]; + CB[i/4] = c[3]; + Y[i+1] = c[4]; + CR[i/4] = c[5]; } } } - for (int k1 = 0; k1 < 2; ++k1) - { - for (int k2 = 0; k2 < 2; ++k2) - { + for (int k1=0; k1<2; ++k1) { + for (int k2=0; k2<2; ++k2) { float block[64]; - for (int i = 0; i < 64; i += 8) - { - int j = (i & 7) + (i & 56) * 2 + k1 * 8 * 16 + k2 * 8; - memcpy(block + i, Y + j, 8 * sizeof(Y[0])); + for (int i=0; i<64; i+=8) { + int j = (i&7)+(i&56)*2 + k1*8*16 + k2*8; + memcpy(block+i, Y+j, 8*sizeof(Y[0])); } lastDCY = jo_processDU(&bits, block, s_jo_HTDC_Y, lastDCY); } diff --git a/pcsx2/USB/usb-eyetoy/jo_mpeg.h b/pcsx2/USB/usb-eyetoy/jo_mpeg.h index 230e942d39..1c386d0c15 100644 --- a/pcsx2/USB/usb-eyetoy/jo_mpeg.h +++ b/pcsx2/USB/usb-eyetoy/jo_mpeg.h @@ -1,37 +1,20 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2020 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - #ifdef __cplusplus extern "C" { #endif -typedef enum -{ +typedef enum { JO_RGBX, JO_RGB24, JO_YUYV, } jo_mpeg_format_t; -typedef enum -{ +typedef enum { JO_NONE, JO_FLIP_X, JO_FLIP_Y, } jo_mpeg_flip_t; -unsigned long jo_write_mpeg(unsigned char* mpeg_buf, const unsigned char* rgbx, int width, int height, int format, int flipx, int flipy); +unsigned long jo_write_mpeg(unsigned char *mpeg_buf, const unsigned char *rgbx, int width, int height, int format, int flipx, int flipy); #ifdef __cplusplus } diff --git a/pcsx2/USB/usb-eyetoy/jpgd/jpgd.cpp b/pcsx2/USB/usb-eyetoy/jpgd/jpgd.cpp index 849de47b04..0ae0a7ae21 100644 --- a/pcsx2/USB/usb-eyetoy/jpgd/jpgd.cpp +++ b/pcsx2/USB/usb-eyetoy/jpgd/jpgd.cpp @@ -3,7 +3,7 @@ // Supports box and linear chroma upsampling. // // Released under two licenses. You are free to choose which license you want: -// License 1: +// License 1: // Public Domain // // License 2: @@ -34,116 +34,73 @@ #include #ifdef _MSC_VER -#pragma warning(disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable +#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable #endif #ifndef JPGD_USE_SSE2 -#if defined(__GNUC__) -#if defined(__SSE2__) -#define JPGD_USE_SSE2 (1) -#endif -#elif defined(_MSC_VER) -#if defined(_M_X64) -#define JPGD_USE_SSE2 (1) -#endif -#endif + #if defined(__GNUC__) + #if defined(__SSE2__) + #define JPGD_USE_SSE2 (1) + #endif + #elif defined(_MSC_VER) + #if defined(_M_X64) + #define JPGD_USE_SSE2 (1) + #endif + #endif #endif #define JPGD_TRUE (1) #define JPGD_FALSE (0) -#define JPGD_MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define JPGD_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b)) +#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b)) -namespace jpgd -{ +namespace jpgd { static inline void* jpgd_malloc(size_t nSize) { return malloc(nSize); } static inline void jpgd_free(void* p) { free(p); } // DCT coefficients are stored in this sequence. - static int g_ZAG[64] = {0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63}; + static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; enum JPEG_MARKER { - M_SOF0 = 0xC0, - M_SOF1 = 0xC1, - M_SOF2 = 0xC2, - M_SOF3 = 0xC3, - M_SOF5 = 0xC5, - M_SOF6 = 0xC6, - M_SOF7 = 0xC7, - M_JPG = 0xC8, - M_SOF9 = 0xC9, - M_SOF10 = 0xCA, - M_SOF11 = 0xCB, - M_SOF13 = 0xCD, - M_SOF14 = 0xCE, - M_SOF15 = 0xCF, - M_DHT = 0xC4, - M_DAC = 0xCC, - M_RST0 = 0xD0, - M_RST1 = 0xD1, - M_RST2 = 0xD2, - M_RST3 = 0xD3, - M_RST4 = 0xD4, - M_RST5 = 0xD5, - M_RST6 = 0xD6, - M_RST7 = 0xD7, - M_SOI = 0xD8, - M_EOI = 0xD9, - M_SOS = 0xDA, - M_DQT = 0xDB, - M_DNL = 0xDC, - M_DRI = 0xDD, - M_DHP = 0xDE, - M_EXP = 0xDF, - M_APP0 = 0xE0, - M_APP15 = 0xEF, - M_JPG0 = 0xF0, - M_JPG13 = 0xFD, - M_COM = 0xFE, - M_TEM = 0x01, - M_ERROR = 0x100, - RST0 = 0xD0 + M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8, + M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC, + M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7, + M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF, + M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0 }; - enum JPEG_SUBSAMPLING - { - JPGD_GRAYSCALE = 0, - JPGD_YH1V1, - JPGD_YH2V1, - JPGD_YH1V2, - JPGD_YH2V2 - }; + enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }; #if JPGD_USE_SSE2 #include "jpgd_idct.h" #endif -#define CONST_BITS 13 -#define PASS1_BITS 2 +#define CONST_BITS 13 +#define PASS1_BITS 2 #define SCALEDONE ((int32)1) -#define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */ +#define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */ -#define DESCALE(x, n) (((x) + (SCALEDONE << ((n)-1))) >> (n)) -#define DESCALE_ZEROSHIFT(x, n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)) +#define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n)) +#define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)) -#define MULTIPLY(var, cnst) ((var) * (cnst)) +#define MULTIPLY(var, cnst) ((var) * (cnst)) #define CLAMP(i) ((static_cast(i) > 255) ? (((~i) >> 31) & 0xFF) : (i)) @@ -203,7 +160,7 @@ namespace jpgd { static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc) { - (void)pTemp; + (void)pTemp; (void)pSrc; } }; @@ -307,525 +264,22 @@ namespace jpgd }; static const uint8 s_idct_row_table[] = - { - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 2, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 2, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 2, - 1, - 1, - 0, - 0, - 0, - 0, - 0, - 2, - 2, - 1, - 0, - 0, - 0, - 0, - 0, - 3, - 2, - 1, - 0, - 0, - 0, - 0, - 0, - 4, - 2, - 1, - 0, - 0, - 0, - 0, - 0, - 4, - 3, - 1, - 0, - 0, - 0, - 0, - 0, - 4, - 3, - 2, - 0, - 0, - 0, - 0, - 0, - 4, - 3, - 2, - 1, - 0, - 0, - 0, - 0, - 4, - 3, - 2, - 1, - 1, - 0, - 0, - 0, - 4, - 3, - 2, - 2, - 1, - 0, - 0, - 0, - 4, - 3, - 3, - 2, - 1, - 0, - 0, - 0, - 4, - 4, - 3, - 2, - 1, - 0, - 0, - 0, - 5, - 4, - 3, - 2, - 1, - 0, - 0, - 0, - 6, - 4, - 3, - 2, - 1, - 0, - 0, - 0, - 6, - 5, - 3, - 2, - 1, - 0, - 0, - 0, - 6, - 5, - 4, - 2, - 1, - 0, - 0, - 0, - 6, - 5, - 4, - 3, - 1, - 0, - 0, - 0, - 6, - 5, - 4, - 3, - 2, - 0, - 0, - 0, - 6, - 5, - 4, - 3, - 2, - 1, - 0, - 0, - 6, - 5, - 4, - 3, - 2, - 1, - 1, - 0, - 6, - 5, - 4, - 3, - 2, - 2, - 1, - 0, - 6, - 5, - 4, - 3, - 3, - 2, - 1, - 0, - 6, - 5, - 4, - 4, - 3, - 2, - 1, - 0, - 6, - 5, - 5, - 4, - 3, - 2, - 1, - 0, - 6, - 6, - 5, - 4, - 3, - 2, - 1, - 0, - 7, - 6, - 5, - 4, - 3, - 2, - 1, - 0, - 8, - 6, - 5, - 4, - 3, - 2, - 1, - 0, - 8, - 7, - 5, - 4, - 3, - 2, - 1, - 0, - 8, - 7, - 6, - 4, - 3, - 2, - 1, - 0, - 8, - 7, - 6, - 5, - 3, - 2, - 1, - 0, - 8, - 7, - 6, - 5, - 4, - 2, - 1, - 0, - 8, - 7, - 6, - 5, - 4, - 3, - 1, - 0, - 8, - 7, - 6, - 5, - 4, - 3, - 2, - 0, - 8, - 7, - 6, - 5, - 4, - 3, - 2, - 1, - 8, - 7, - 6, - 5, - 4, - 3, - 2, - 2, - 8, - 7, - 6, - 5, - 4, - 3, - 3, - 2, - 8, - 7, - 6, - 5, - 4, - 4, - 3, - 2, - 8, - 7, - 6, - 5, - 5, - 4, - 3, - 2, - 8, - 7, - 6, - 6, - 5, - 4, - 3, - 2, - 8, - 7, - 7, - 6, - 5, - 4, - 3, - 2, - 8, - 8, - 7, - 6, - 5, - 4, - 3, - 2, - 8, - 8, - 8, - 6, - 5, - 4, - 3, - 2, - 8, - 8, - 8, - 7, - 5, - 4, - 3, - 2, - 8, - 8, - 8, - 7, - 6, - 4, - 3, - 2, - 8, - 8, - 8, - 7, - 6, - 5, - 3, - 2, - 8, - 8, - 8, - 7, - 6, - 5, - 4, - 2, - 8, - 8, - 8, - 7, - 6, - 5, - 4, - 3, - 8, - 8, - 8, - 7, - 6, - 5, - 4, - 4, - 8, - 8, - 8, - 7, - 6, - 5, - 5, - 4, - 8, - 8, - 8, - 7, - 6, - 6, - 5, - 4, - 8, - 8, - 8, - 7, - 7, - 6, - 5, - 4, - 8, - 8, - 8, - 8, - 7, - 6, - 5, - 4, - 8, - 8, - 8, - 8, - 8, - 6, - 5, - 4, - 8, - 8, - 8, - 8, - 8, - 7, - 5, - 4, - 8, - 8, - 8, - 8, - 8, - 7, - 6, - 4, - 8, - 8, - 8, - 8, - 8, - 7, - 6, - 5, - 8, - 8, - 8, - 8, - 8, - 7, - 6, - 6, - 8, - 8, - 8, - 8, - 8, - 7, - 7, - 6, - 8, - 8, - 8, - 8, - 8, - 8, - 7, - 6, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 6, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, + { + 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0, + 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0, + 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0, + 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0, + 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2, + 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2, + 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4, + 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, }; - static const uint8 s_idct_col_table[] = - { - 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}; + static const uint8 s_idct_col_table[] = + { + 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 + }; // Scalar "fast pathing" IDCT. static void idct(const jpgd_block_coeff_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag, bool use_simd) @@ -834,7 +288,7 @@ namespace jpgd assert(block_max_zag >= 1); assert(block_max_zag <= 64); - + if (block_max_zag <= 1) { int k = ((pSrc_ptr[0] + 4) >> 3) + 128; @@ -872,33 +326,15 @@ namespace jpgd { switch (*pRow_tab) { - case 0: - Row<0>::idct(pTemp, pSrc); - break; - case 1: - Row<1>::idct(pTemp, pSrc); - break; - case 2: - Row<2>::idct(pTemp, pSrc); - break; - case 3: - Row<3>::idct(pTemp, pSrc); - break; - case 4: - Row<4>::idct(pTemp, pSrc); - break; - case 5: - Row<5>::idct(pTemp, pSrc); - break; - case 6: - Row<6>::idct(pTemp, pSrc); - break; - case 7: - Row<7>::idct(pTemp, pSrc); - break; - case 8: - Row<8>::idct(pTemp, pSrc); - break; + case 0: Row<0>::idct(pTemp, pSrc); break; + case 1: Row<1>::idct(pTemp, pSrc); break; + case 2: Row<2>::idct(pTemp, pSrc); break; + case 3: Row<3>::idct(pTemp, pSrc); break; + case 4: Row<4>::idct(pTemp, pSrc); break; + case 5: Row<5>::idct(pTemp, pSrc); break; + case 6: Row<6>::idct(pTemp, pSrc); break; + case 7: Row<7>::idct(pTemp, pSrc); break; + case 8: Row<8>::idct(pTemp, pSrc); break; } pSrc += 8; @@ -912,30 +348,14 @@ namespace jpgd { switch (nonzero_rows) { - case 1: - Col<1>::idct(pDst_ptr, pTemp); - break; - case 2: - Col<2>::idct(pDst_ptr, pTemp); - break; - case 3: - Col<3>::idct(pDst_ptr, pTemp); - break; - case 4: - Col<4>::idct(pDst_ptr, pTemp); - break; - case 5: - Col<5>::idct(pDst_ptr, pTemp); - break; - case 6: - Col<6>::idct(pDst_ptr, pTemp); - break; - case 7: - Col<7>::idct(pDst_ptr, pTemp); - break; - case 8: - Col<8>::idct(pDst_ptr, pTemp); - break; + case 1: Col<1>::idct(pDst_ptr, pTemp); break; + case 2: Col<2>::idct(pDst_ptr, pTemp); break; + case 3: Col<3>::idct(pDst_ptr, pTemp); break; + case 4: Col<4>::idct(pDst_ptr, pTemp); break; + case 5: Col<5>::idct(pDst_ptr, pTemp); break; + case 6: Col<6>::idct(pDst_ptr, pTemp); break; + case 7: Col<7>::idct(pDst_ptr, pTemp); break; + case 8: Col<8>::idct(pDst_ptr, pTemp); break; } pTemp++; @@ -1197,8 +617,8 @@ namespace jpgd } // Tables and macro used to fully decode the DPCM differences. - static const int s_extend_test[16] = {0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000}; - static const int s_extend_offset[16] = {0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767}; + static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; + static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 }; //static const int s_extend_mask[] = { 0, (1 << 0), (1 << 1), (1 << 2), (1 << 3), (1 << 4), (1 << 5), (1 << 6), (1 << 7), (1 << 8), (1 << 9), (1 << 10), (1 << 11), (1 << 12), (1 << 13), (1 << 14), (1 << 15), (1 << 16) }; #define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x)) @@ -1207,7 +627,7 @@ namespace jpgd void jpeg_decoder::free_all_blocks() { m_pStream = nullptr; - for (mem_block* b = m_pMem_blocks; b;) + for (mem_block* b = m_pMem_blocks; b; ) { mem_block* n = b->m_pNext; jpgd_free(b); @@ -1224,7 +644,7 @@ namespace jpgd free_all_blocks(); longjmp(m_jmp_state, status); } - + void* jpeg_decoder::alloc(size_t nSize, bool zero) { nSize = (JPGD_MAX(nSize, 1) + 3) & ~3; @@ -1253,16 +673,15 @@ namespace jpgd b->m_size = capacity; rv = b->m_data; } - if (zero) - memset(rv, 0, nSize); + if (zero) memset(rv, 0, nSize); return rv; } void* jpeg_decoder::alloc_aligned(size_t nSize, uint32_t align, bool zero) { assert((align >= 1U) && ((align & (align - 1U)) == 0U)); - void* p = alloc(nSize + align - 1U, zero); - p = (void*)(((uintptr_t)p + (align - 1U)) & ~((uintptr_t)(align - 1U))); + void *p = alloc(nSize + align - 1U, zero); + p = (void *)( ((uintptr_t)p + (align - 1U)) & ~((uintptr_t)(align - 1U)) ); return p; } @@ -1600,73 +1019,73 @@ namespace jpgd { int c; - for (;;) + for (; ; ) { c = next_marker(); switch (c) { - case M_SOF0: - case M_SOF1: - case M_SOF2: - case M_SOF3: - case M_SOF5: - case M_SOF6: - case M_SOF7: - // case M_JPG: - case M_SOF9: - case M_SOF10: - case M_SOF11: - case M_SOF13: - case M_SOF14: - case M_SOF15: - case M_SOI: - case M_EOI: - case M_SOS: - { - return c; - } - case M_DHT: - { - read_dht_marker(); - break; - } - // No arithmitic support - dumb patents! - case M_DAC: - { - stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); - break; - } - case M_DQT: - { - read_dqt_marker(); - break; - } - case M_DRI: - { - read_dri_marker(); - break; - } - //case M_APP0: /* no need to read the JFIF marker */ - case M_JPG: - case M_RST0: /* no parameters */ - case M_RST1: - case M_RST2: - case M_RST3: - case M_RST4: - case M_RST5: - case M_RST6: - case M_RST7: - case M_TEM: - { - stop_decoding(JPGD_UNEXPECTED_MARKER); - break; - } - default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */ - { - skip_variable_marker(); - break; - } + case M_SOF0: + case M_SOF1: + case M_SOF2: + case M_SOF3: + case M_SOF5: + case M_SOF6: + case M_SOF7: + // case M_JPG: + case M_SOF9: + case M_SOF10: + case M_SOF11: + case M_SOF13: + case M_SOF14: + case M_SOF15: + case M_SOI: + case M_EOI: + case M_SOS: + { + return c; + } + case M_DHT: + { + read_dht_marker(); + break; + } + // No arithmitic support - dumb patents! + case M_DAC: + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + case M_DQT: + { + read_dqt_marker(); + break; + } + case M_DRI: + { + read_dri_marker(); + break; + } + //case M_APP0: /* no need to read the JFIF marker */ + case M_JPG: + case M_RST0: /* no parameters */ + case M_RST1: + case M_RST2: + case M_RST3: + case M_RST4: + case M_RST5: + case M_RST6: + case M_RST7: + case M_TEM: + { + stop_decoding(JPGD_UNEXPECTED_MARKER); + break; + } + default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */ + { + skip_variable_marker(); + break; + } } } } @@ -1688,7 +1107,7 @@ namespace jpgd bytesleft = 4096; - for (;;) + for (; ; ) { if (--bytesleft == 0) stop_decoding(JPGD_NOT_JPEG); @@ -1722,28 +1141,28 @@ namespace jpgd switch (c) { - case M_SOF2: - { - m_progressive_flag = JPGD_TRUE; - read_sof_marker(); - break; - } - case M_SOF0: /* baseline DCT */ - case M_SOF1: /* extended sequential DCT */ - { - read_sof_marker(); - break; - } - case M_SOF9: /* Arithmitic coding */ - { - stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); - break; - } - default: - { - stop_decoding(JPGD_UNSUPPORTED_MARKER); - break; - } + case M_SOF2: + { + m_progressive_flag = JPGD_TRUE; + read_sof_marker(); + break; + } + case M_SOF0: /* baseline DCT */ + case M_SOF1: /* extended sequential DCT */ + { + read_sof_marker(); + break; + } + case M_SOF9: /* Arithmitic coding */ + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + default: + { + stop_decoding(JPGD_UNSUPPORTED_MARKER); + break; + } } } @@ -1774,7 +1193,7 @@ namespace jpgd m_image_x_size = m_image_y_size = 0; m_pStream = pStream; m_progressive_flag = JPGD_FALSE; - + memset(m_huff_ac, 0, sizeof(m_huff_ac)); memset(m_huff_num, 0, sizeof(m_huff_num)); memset(m_huff_val, 0, sizeof(m_huff_val)); @@ -1879,8 +1298,8 @@ namespace jpgd } #define SCALEBITS 16 -#define ONE_HALF ((int)1 << (SCALEBITS - 1)) -#define FIX(x) ((int)((x) * (1L << SCALEBITS) + 0.5f)) +#define ONE_HALF ((int) 1 << (SCALEBITS-1)) +#define FIX(x) ((int) ((x) * (1L<> 1) - 1; static const uint8_t s_muls[2][2][4] = - { - { - {1, 3, 3, 9}, - {3, 9, 1, 3}, - }, - {{3, 1, 9, 3}, {9, 3, 3, 1}}}; + { + { { 1, 3, 3, 9 }, { 3, 9, 1, 3 }, }, + { { 3, 1, 9, 3 }, { 9, 3, 3, 1 } } + }; if (((row & 15) >= 1) && ((row & 15) <= 14)) { @@ -2773,77 +2186,77 @@ namespace jpgd switch (m_scan_type) { - case JPGD_YH2V2: + case JPGD_YH2V2: + { + if ((m_flags & cFlagBoxChromaFiltering) == 0) { - if ((m_flags & cFlagBoxChromaFiltering) == 0) + if (m_num_buffered_scanlines == 1) { - if (m_num_buffered_scanlines == 1) - { - *pScan_line = m_pScan_line_1; - } - else if (m_num_buffered_scanlines == 0) - { - m_num_buffered_scanlines = H2V2ConvertFiltered(); - *pScan_line = m_pScan_line_0; - } - - m_num_buffered_scanlines--; + *pScan_line = m_pScan_line_1; } - else + else if (m_num_buffered_scanlines == 0) { - if ((m_mcu_lines_left & 1) == 0) - { - H2V2Convert(); - *pScan_line = m_pScan_line_0; - } - else - *pScan_line = m_pScan_line_1; + m_num_buffered_scanlines = H2V2ConvertFiltered(); + *pScan_line = m_pScan_line_0; } - break; + m_num_buffered_scanlines--; } - case JPGD_YH2V1: + else { - if ((m_flags & cFlagBoxChromaFiltering) == 0) - H2V1ConvertFiltered(); - else - H2V1Convert(); - *pScan_line = m_pScan_line_0; - break; - } - case JPGD_YH1V2: - { - if (chroma_y_filtering) + if ((m_mcu_lines_left & 1) == 0) { - H1V2ConvertFiltered(); + H2V2Convert(); *pScan_line = m_pScan_line_0; } else + *pScan_line = m_pScan_line_1; + } + + break; + } + case JPGD_YH2V1: + { + if ((m_flags & cFlagBoxChromaFiltering) == 0) + H2V1ConvertFiltered(); + else + H2V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_YH1V2: + { + if (chroma_y_filtering) + { + H1V2ConvertFiltered(); + *pScan_line = m_pScan_line_0; + } + else + { + if ((m_mcu_lines_left & 1) == 0) { - if ((m_mcu_lines_left & 1) == 0) - { - H1V2Convert(); - *pScan_line = m_pScan_line_0; - } - else - *pScan_line = m_pScan_line_1; + H1V2Convert(); + *pScan_line = m_pScan_line_0; } + else + *pScan_line = m_pScan_line_1; + } - break; - } - case JPGD_YH1V1: - { - H1V1Convert(); - *pScan_line = m_pScan_line_0; - break; - } - case JPGD_GRAYSCALE: - { - gray_convert(); - *pScan_line = m_pScan_line_0; + break; + } + case JPGD_YH1V1: + { + H1V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_GRAYSCALE: + { + gray_convert(); + *pScan_line = m_pScan_line_0; - break; - } + break; + } } *pScan_line_len = m_real_dest_bytes_per_scan_line; @@ -3230,8 +2643,8 @@ namespace jpgd stop_decoding(JPGD_DECODE_ERROR); // Allocate the coefficient buffer, enough for one MCU - m_pMCU_coefficients = (jpgd_block_coeff_t*)alloc_aligned(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_coeff_t)); - + m_pMCU_coefficients = (jpgd_block_coeff_t *)alloc_aligned(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_coeff_t)); + for (i = 0; i < m_max_blocks_per_mcu; i++) m_mcu_block_max_zag[i] = 64; @@ -3545,7 +2958,7 @@ namespace jpgd uint32_t total_scans = 0; const uint32_t MAX_SCANS_TO_PROCESS = 1000; - for (;;) + for (; ; ) { int dc_only_scan, refinement_scan; pDecode_block_func decode_block_func; @@ -3564,7 +2977,7 @@ namespace jpgd if (m_spectral_end) stop_decoding(JPGD_BAD_SOS_SPECTRAL); } - else if (m_comps_in_scan != 1) /* AC scans can only contain one component */ + else if (m_comps_in_scan != 1) /* AC scans can only contain one component */ stop_decoding(JPGD_BAD_SOS_SPECTRAL); if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) diff --git a/pcsx2/USB/usb-eyetoy/jpgd/jpgd.h b/pcsx2/USB/usb-eyetoy/jpgd/jpgd.h index bca66655ac..39136696ba 100644 --- a/pcsx2/USB/usb-eyetoy/jpgd/jpgd.h +++ b/pcsx2/USB/usb-eyetoy/jpgd/jpgd.h @@ -11,9 +11,9 @@ #include #ifdef _MSC_VER -#define JPGD_NORETURN __declspec(noreturn) +#define JPGD_NORETURN __declspec(noreturn) #elif defined(__GNUC__) -#define JPGD_NORETURN __attribute__((noreturn)) +#define JPGD_NORETURN __attribute__ ((noreturn)) #else #define JPGD_NORETURN #endif @@ -23,11 +23,11 @@ namespace jpgd { - typedef unsigned char uint8; - typedef signed short int16; + typedef unsigned char uint8; + typedef signed short int16; typedef unsigned short uint16; - typedef unsigned int uint; - typedef signed int int32; + typedef unsigned int uint; + typedef signed int int32; // Loads a JPEG image from a memory buffer or a file. // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA). @@ -40,42 +40,15 @@ namespace jpgd // Success/failure error codes. enum jpgd_status { - JPGD_SUCCESS = 0, - JPGD_FAILED = -1, - JPGD_DONE = 1, - JPGD_BAD_DHT_COUNTS = -256, - JPGD_BAD_DHT_INDEX, - JPGD_BAD_DHT_MARKER, - JPGD_BAD_DQT_MARKER, - JPGD_BAD_DQT_TABLE, - JPGD_BAD_PRECISION, - JPGD_BAD_HEIGHT, - JPGD_BAD_WIDTH, - JPGD_TOO_MANY_COMPONENTS, - JPGD_BAD_SOF_LENGTH, - JPGD_BAD_VARIABLE_MARKER, - JPGD_BAD_DRI_LENGTH, - JPGD_BAD_SOS_LENGTH, - JPGD_BAD_SOS_COMP_ID, - JPGD_W_EXTRA_BYTES_BEFORE_MARKER, - JPGD_NO_ARITHMITIC_SUPPORT, - JPGD_UNEXPECTED_MARKER, - JPGD_NOT_JPEG, - JPGD_UNSUPPORTED_MARKER, - JPGD_BAD_DQT_LENGTH, - JPGD_TOO_MANY_BLOCKS, - JPGD_UNDEFINED_QUANT_TABLE, - JPGD_UNDEFINED_HUFF_TABLE, - JPGD_NOT_SINGLE_SCAN, - JPGD_UNSUPPORTED_COLORSPACE, - JPGD_UNSUPPORTED_SAMP_FACTORS, - JPGD_DECODE_ERROR, - JPGD_BAD_RESTART_MARKER, - JPGD_BAD_SOS_SPECTRAL, - JPGD_BAD_SOS_SUCCESSIVE, - JPGD_STREAM_READ, - JPGD_NOTENOUGHMEM, - JPGD_TOO_MANY_SCANS + JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1, + JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE, + JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS, + JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH, + JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER, + JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS, + JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE, + JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, + JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM, JPGD_TOO_MANY_SCANS }; // Input stream interface. @@ -86,8 +59,8 @@ namespace jpgd class jpeg_decoder_stream { public: - jpeg_decoder_stream() {} - virtual ~jpeg_decoder_stream() {} + jpeg_decoder_stream() { } + virtual ~jpeg_decoder_stream() { } // The read() method is called when the internal input buffer is empty. // Parameters: @@ -103,7 +76,7 @@ namespace jpgd class jpeg_decoder_file_stream : public jpeg_decoder_stream { jpeg_decoder_file_stream(const jpeg_decoder_file_stream&); - jpeg_decoder_file_stream& operator=(const jpeg_decoder_file_stream&); + jpeg_decoder_file_stream& operator =(const jpeg_decoder_file_stream&); FILE* m_pFile; bool m_eof_flag, m_error_flag; @@ -125,28 +98,13 @@ namespace jpgd uint m_ofs, m_size; public: - jpeg_decoder_mem_stream() - : m_pSrc_data(NULL) - , m_ofs(0) - , m_size(0) - { - } - jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) - : m_pSrc_data(pSrc_data) - , m_ofs(0) - , m_size(size) - { - } + jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { } + jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { } - virtual ~jpeg_decoder_mem_stream() {} + virtual ~jpeg_decoder_mem_stream() { } bool open(const uint8* pSrc_data, uint size); - void close() - { - m_pSrc_data = NULL; - m_ofs = 0; - m_size = 0; - } + void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; } virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); }; @@ -156,15 +114,8 @@ namespace jpgd enum { - JPGD_IN_BUF_SIZE = 8192, - JPGD_MAX_BLOCKS_PER_MCU = 10, - JPGD_MAX_HUFF_TABLES = 8, - JPGD_MAX_QUANT_TABLES = 4, - JPGD_MAX_COMPONENTS = 4, - JPGD_MAX_COMPS_IN_SCAN = 4, - JPGD_MAX_BLOCKS_PER_ROW = 16384, - JPGD_MAX_HEIGHT = 32768, - JPGD_MAX_WIDTH = 32768 + JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4, + JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 16384, JPGD_MAX_HEIGHT = 32768, JPGD_MAX_WIDTH = 32768 }; typedef int16 jpgd_quant_t; @@ -191,7 +142,7 @@ namespace jpgd int begin_decoding(); // Returns the next scan line. - // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). // Returns JPGD_SUCCESS if a scan line has been returned. // Returns JPGD_DONE if all scan lines have been returned. @@ -213,17 +164,17 @@ namespace jpgd private: jpeg_decoder(const jpeg_decoder&); - jpeg_decoder& operator=(const jpeg_decoder&); + jpeg_decoder& operator =(const jpeg_decoder&); typedef void (*pDecode_block_func)(jpeg_decoder*, int, int, int); struct huff_tables { bool ac_table; - uint look_up[256]; - uint look_up2[256]; + uint look_up[256]; + uint look_up2[256]; uint8 code_size[JPGD_HUFF_CODE_SIZE_MAX_LENGTH]; - uint tree[JPGD_HUFF_TREE_MAX_LENGTH]; + uint tree[JPGD_HUFF_TREE_MAX_LENGTH]; }; struct coeff_buf @@ -263,26 +214,26 @@ namespace jpgd int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID int m_comp_h_blocks[JPGD_MAX_COMPONENTS]; int m_comp_v_blocks[JPGD_MAX_COMPONENTS]; - int m_comps_in_scan; // # of components in scan - int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan - int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector - int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector - int m_spectral_start; // spectral selection start - int m_spectral_end; // spectral selection end - int m_successive_low; // successive approximation low - int m_successive_high; // successive approximation high - int m_max_mcu_x_size; // MCU's max. X size in pixels - int m_max_mcu_y_size; // MCU's max. Y size in pixels + int m_comps_in_scan; // # of components in scan + int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan + int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector + int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector + int m_spectral_start; // spectral selection start + int m_spectral_end; // spectral selection end + int m_successive_low; // successive approximation low + int m_successive_high; // successive approximation high + int m_max_mcu_x_size; // MCU's max. X size in pixels + int m_max_mcu_y_size; // MCU's max. Y size in pixels int m_blocks_per_mcu; int m_max_blocks_per_row; int m_mcus_per_row, m_mcus_per_col; int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU]; - int m_total_lines_left; // total # lines left in image - int m_mcu_lines_left; // total # lines left in this MCU + int m_total_lines_left; // total # lines left in image + int m_mcu_lines_left; // total # lines left in this MCU int m_num_buffered_scanlines; int m_real_dest_bytes_per_scan_line; - int m_dest_bytes_per_scan_line; // rounded up - int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y) + int m_dest_bytes_per_scan_line; // rounded up + int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y) huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES]; coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS]; coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS]; @@ -324,12 +275,7 @@ namespace jpgd bool m_sample_buf_prev_valid; bool m_has_sse2; - inline int check_sample_buf_ofs(int ofs) const - { - assert(ofs >= 0); - assert(ofs < m_max_blocks_per_row * 64); - return ofs; - } + inline int check_sample_buf_ofs(int ofs) const { assert(ofs >= 0); assert(ofs < m_max_blocks_per_row * 64); return ofs; } void free_all_blocks(); JPGD_NORETURN void stop_decoding(jpgd_status status); void* alloc(size_t n, bool zero = false); diff --git a/pcsx2/USB/usb-eyetoy/jpgd/jpgd_idct.h b/pcsx2/USB/usb-eyetoy/jpgd/jpgd_idct.h index 16d34dec03..876425a959 100644 --- a/pcsx2/USB/usb-eyetoy/jpgd/jpgd_idct.h +++ b/pcsx2/USB/usb-eyetoy/jpgd/jpgd_idct.h @@ -24,26 +24,26 @@ #include #ifdef _MSC_VER -#define JPGD_SIMD_ALIGN(type, name) __declspec(align(16)) type name + #define JPGD_SIMD_ALIGN(type, name) __declspec(align(16)) type name #else -#define JPGD_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + #define JPGD_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) #endif #define BITS_INV_ACC 4 #define SHIFT_INV_ROW 16 - BITS_INV_ACC #define SHIFT_INV_COL 1 + BITS_INV_ACC -const short IRND_INV_ROW = 1024 * (6 - BITS_INV_ACC); //1 << (SHIFT_INV_ROW-1) -const short IRND_INV_COL = 16 * (BITS_INV_ACC - 3); // 1 << (SHIFT_INV_COL-1) -const short IRND_INV_CORR = IRND_INV_COL - 1; // correction -1.0 and round +const short IRND_INV_ROW = 1024 * (6 - BITS_INV_ACC); //1 << (SHIFT_INV_ROW-1) +const short IRND_INV_COL = 16 * (BITS_INV_ACC - 3); // 1 << (SHIFT_INV_COL-1) +const short IRND_INV_CORR = IRND_INV_COL - 1; // correction -1.0 and round JPGD_SIMD_ALIGN(short, shortM128_one_corr[8]) = {1, 1, 1, 1, 1, 1, 1, 1}; JPGD_SIMD_ALIGN(short, shortM128_round_inv_row[8]) = {IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0}; JPGD_SIMD_ALIGN(short, shortM128_round_inv_col[8]) = {IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL}; -JPGD_SIMD_ALIGN(short, shortM128_round_inv_corr[8]) = {IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR}; -JPGD_SIMD_ALIGN(short, shortM128_tg_1_16[8]) = {13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036}; // tg * (2<<16) + 0.5 -JPGD_SIMD_ALIGN(short, shortM128_tg_2_16[8]) = {27146, 27146, 27146, 27146, 27146, 27146, 27146, 27146}; // tg * (2<<16) + 0.5 -JPGD_SIMD_ALIGN(short, shortM128_tg_3_16[8]) = {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746}; // tg * (2<<16) + 0.5 -JPGD_SIMD_ALIGN(short, shortM128_cos_4_16[8]) = {-19195, -19195, -19195, -19195, -19195, -19195, -19195, -19195}; // cos * (2<<16) + 0.5 +JPGD_SIMD_ALIGN(short, shortM128_round_inv_corr[8])= {IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR}; +JPGD_SIMD_ALIGN(short, shortM128_tg_1_16[8]) = {13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036}; // tg * (2<<16) + 0.5 +JPGD_SIMD_ALIGN(short, shortM128_tg_2_16[8]) = {27146, 27146, 27146, 27146, 27146, 27146, 27146, 27146}; // tg * (2<<16) + 0.5 +JPGD_SIMD_ALIGN(short, shortM128_tg_3_16[8]) = {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746}; // tg * (2<<16) + 0.5 +JPGD_SIMD_ALIGN(short, shortM128_cos_4_16[8]) = {-19195, -19195, -19195, -19195, -19195, -19195, -19195, -19195};// cos * (2<<16) + 0.5 //----------------------------------------------------------------------------- // Table for rows 0,4 - constants are multiplied on cos_4_16 @@ -56,22 +56,22 @@ JPGD_SIMD_ALIGN(short, shortM128_tab_i_04[]) = { 16384, -8867, 16384, -21407, // w13 w12 w09 w08 16384, 8867, -16384, -21407, // w07 w06 w03 w02 -16384, 21407, 16384, -8867, // w15 w14 w11 w10 - 22725, 19266, 19266, -4520, // w21 w20 w17 w16 + 22725, 19266, 19266, -4520, // w21 w20 w17 w16 12873, -22725, 4520, -12873, // w29 w28 w25 w24 12873, 4520, -22725, -12873, // w23 w22 w19 w18 4520, 19266, 19266, -22725}; // w31 w30 w27 w26 -// Table for rows 1,7 - constants are multiplied on cos_1_16 + // Table for rows 1,7 - constants are multiplied on cos_1_16 //movq -> w05 w04 w01 w00 JPGD_SIMD_ALIGN(short, shortM128_tab_i_17[]) = { 22725, 29692, 22725, 12299, 22725, -12299, 22725, -29692, // w13 w12 w09 w08 22725, 12299, -22725, -29692, // w07 w06 w03 w02 -22725, 29692, 22725, -12299, // w15 w14 w11 w10 - 31521, 26722, 26722, -6270, // w21 w20 w17 w16 - 17855, -31521, 6270, -17855, // w29 w28 w25 w24 - 17855, 6270, -31521, -17855, // w23 w22 w19 w18 - 6270, 26722, 26722, -31521}; // w31 w30 w27 w26 + 31521, 26722, 26722, -6270, // w21 w20 w17 w16 + 17855, -31521, 6270, -17855, // w29 w28 w25 w24 + 17855, 6270, -31521, -17855, // w23 w22 w19 w18 + 6270, 26722, 26722, -31521}; // w31 w30 w27 w26 // Table for rows 2,6 - constants are multiplied on cos_2_16 //movq -> w05 w04 w01 w00 @@ -80,10 +80,10 @@ JPGD_SIMD_ALIGN(short, shortM128_tab_i_26[]) = { 21407, -11585, 21407, -27969, // w13 w12 w09 w08 21407, 11585, -21407, -27969, // w07 w06 w03 w02 -21407, 27969, 21407, -11585, // w15 w14 w11 w10 - 29692, 25172, 25172, -5906, // w21 w20 w17 w16 - 16819, -29692, 5906, -16819, // w29 w28 w25 w24 - 16819, 5906, -29692, -16819, // w23 w22 w19 w18 - 5906, 25172, 25172, -29692}; // w31 w30 w27 w26 + 29692, 25172, 25172, -5906, // w21 w20 w17 w16 + 16819, -29692, 5906, -16819, // w29 w28 w25 w24 + 16819, 5906, -29692, -16819, // w23 w22 w19 w18 + 5906, 25172, 25172, -29692}; // w31 w30 w27 w26 // Table for rows 3,5 - constants are multiplied on cos_3_16 //movq -> w05 w04 w01 w00 JPGD_SIMD_ALIGN(short, shortM128_tab_i_35[]) = { @@ -91,28 +91,28 @@ JPGD_SIMD_ALIGN(short, shortM128_tab_i_35[]) = { 19266, -10426, 19266, -25172, // w13 w12 w09 w08 19266, 10426, -19266, -25172, // w07 w06 w03 w02 -19266, 25172, 19266, -10426, // w15 w14 w11 w10 - 26722, 22654, 22654, -5315, // w21 w20 w17 w16 - 15137, -26722, 5315, -15137, // w29 w28 w25 w24 - 15137, 5315, -26722, -15137, // w23 w22 w19 w18 - 5315, 22654, 22654, -26722}; // w31 w30 w27 w26 + 26722, 22654, 22654, -5315, // w21 w20 w17 w16 + 15137, -26722, 5315, -15137, // w29 w28 w25 w24 + 15137, 5315, -26722, -15137, // w23 w22 w19 w18 + 5315, 22654, 22654, -26722}; // w31 w30 w27 w26 -JPGD_SIMD_ALIGN(short, shortM128_128[8]) = {128, 128, 128, 128, 128, 128, 128, 128}; +JPGD_SIMD_ALIGN(short, shortM128_128[8]) = { 128, 128, 128, 128, 128, 128, 128, 128 }; -void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) +void idctSSEShortU8(const short *pInput, uint8_t * pOutputUB) { __m128i r_xmm0, r_xmm4; __m128i r_xmm1, r_xmm2, r_xmm3, r_xmm5, r_xmm6, r_xmm7; __m128i row0, row1, row2, row3, row4, row5, row6, row7; - short* pTab_i_04 = shortM128_tab_i_04; - short* pTab_i_26 = shortM128_tab_i_26; + short * pTab_i_04 = shortM128_tab_i_04; + short * pTab_i_26 = shortM128_tab_i_26; //Get pointers for this input and output pTab_i_04 = shortM128_tab_i_04; pTab_i_26 = shortM128_tab_i_26; //Row 1 and Row 3 - r_xmm0 = _mm_load_si128((__m128i*)pInput); - r_xmm4 = _mm_load_si128((__m128i*)(&pInput[2 * 8])); + r_xmm0 = _mm_load_si128((__m128i *) pInput); + r_xmm4 = _mm_load_si128((__m128i *) (&pInput[2*8])); // *** Work on the data in xmm0 //low shuffle mask = 0xd8 = 11 01 10 00 @@ -121,58 +121,58 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) // copy short 2 and short 0 to all locations r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0); - + // add to those copies - r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i*)pTab_i_04)); + r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04)); // shuffle mask = 0x55 = 01 01 01 01 // copy short 3 and short 1 to all locations r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55); - + // high shuffle mask = 0xd8 = 11 01 10 00 // get short 6 and short 4 into bit positions 64-95 // get short 7 and short 5 into bit positions 96-127 r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8); - + // add to short 3 and short 1 - r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i*)&pTab_i_04[16])); - + r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16])); + // shuffle mask = 0xaa = 10 10 10 10 // copy short 6 and short 4 to all locations r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa); - + // shuffle mask = 0xaa = 11 11 11 11 // copy short 7 and short 5 to all locations r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff); - + // add to short 6 and short 4 - r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i*)&pTab_i_04[8])); - + r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); + // *** Work on the data in xmm4 // high shuffle mask = 0xd8 11 01 10 00 // get short 6 and short 4 into bit positions 64-95 // get short 7 and short 5 into bit positions 96-127 r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8); - + // (xmm0 short 2 and short 0 plus pSi) + some constants - r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i*)shortM128_round_inv_row)); + r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row)); r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8); - r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i*)&pTab_i_04[24])); + r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24])); r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0); r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa); - r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i*)&shortM128_tab_i_26[0])); + r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0])); r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2); r_xmm2 = r_xmm1; r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55); - r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i*)&shortM128_tab_i_26[8])); + r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8])); r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3); r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff); r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0); - r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i*)&shortM128_tab_i_26[16])); + r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16])); r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1); r_xmm2 = _mm_srai_epi32(r_xmm2, 12); - r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i*)shortM128_round_inv_row)); - r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i*)&shortM128_tab_i_26[24])); + r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row)); + r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24])); r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6); r_xmm6 = r_xmm5; r_xmm0 = _mm_srai_epi32(r_xmm0, 12); @@ -187,37 +187,37 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) row2 = _mm_packs_epi32(r_xmm4, r_xmm6); //Row 5 and row 7 - r_xmm0 = _mm_load_si128((__m128i*)(&pInput[4 * 8])); - r_xmm4 = _mm_load_si128((__m128i*)(&pInput[6 * 8])); + r_xmm0 = _mm_load_si128((__m128i *) (&pInput[4*8])); + r_xmm4 = _mm_load_si128((__m128i *) (&pInput[6*8])); r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8); r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0); - r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i*)pTab_i_04)); + r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04)); r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55); r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8); - r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i*)&pTab_i_04[16])); + r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16])); r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa); r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff); - r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i*)&pTab_i_04[8])); + r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8); - r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i*)shortM128_round_inv_row)); + r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row)); r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8); - r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i*)&pTab_i_04[24])); + r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24])); r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0); r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa); - r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i*)&shortM128_tab_i_26[0])); + r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0])); r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2); r_xmm2 = r_xmm1; r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55); - r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i*)&shortM128_tab_i_26[8])); + r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8])); r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3); r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff); r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0); - r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i*)&shortM128_tab_i_26[16])); + r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16])); r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1); r_xmm2 = _mm_srai_epi32(r_xmm2, 12); - r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i*)shortM128_round_inv_row)); - r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i*)&shortM128_tab_i_26[24])); + r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row)); + r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24])); r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6); r_xmm6 = r_xmm5; r_xmm0 = _mm_srai_epi32(r_xmm0, 12); @@ -234,37 +234,37 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) //Row 4 and row 2 pTab_i_04 = shortM128_tab_i_35; pTab_i_26 = shortM128_tab_i_17; - r_xmm0 = _mm_load_si128((__m128i*)(&pInput[3 * 8])); - r_xmm4 = _mm_load_si128((__m128i*)(&pInput[1 * 8])); + r_xmm0 = _mm_load_si128((__m128i *) (&pInput[3*8])); + r_xmm4 = _mm_load_si128((__m128i *) (&pInput[1*8])); r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8); r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0); - r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i*)pTab_i_04)); + r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04)); r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55); r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8); - r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i*)&pTab_i_04[16])); + r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16])); r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa); r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff); - r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i*)&pTab_i_04[8])); + r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8); - r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i*)shortM128_round_inv_row)); + r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row)); r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8); - r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i*)&pTab_i_04[24])); + r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24])); r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0); r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa); - r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i*)&pTab_i_26[0])); + r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0])); r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2); r_xmm2 = r_xmm1; r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55); - r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i*)&pTab_i_26[8])); + r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8])); r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3); r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff); r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0); - r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i*)&pTab_i_26[16])); + r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16])); r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1); r_xmm2 = _mm_srai_epi32(r_xmm2, 12); - r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i*)shortM128_round_inv_row)); - r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i*)&pTab_i_26[24])); + r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row)); + r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24])); r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6); r_xmm6 = r_xmm5; r_xmm0 = _mm_srai_epi32(r_xmm0, 12); @@ -279,37 +279,37 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) row1 = _mm_packs_epi32(r_xmm4, r_xmm6); //Row 6 and row 8 - r_xmm0 = _mm_load_si128((__m128i*)(&pInput[5 * 8])); - r_xmm4 = _mm_load_si128((__m128i*)(&pInput[7 * 8])); + r_xmm0 = _mm_load_si128((__m128i *) (&pInput[5*8])); + r_xmm4 = _mm_load_si128((__m128i *) (&pInput[7*8])); r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8); r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0); - r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i*)pTab_i_04)); + r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04)); r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55); r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8); - r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i*)&pTab_i_04[16])); + r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16])); r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa); r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff); - r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i*)&pTab_i_04[8])); + r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8); - r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i*)shortM128_round_inv_row)); + r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row)); r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8); - r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i*)&pTab_i_04[24])); + r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24])); r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0); r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa); - r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i*)&pTab_i_26[0])); + r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0])); r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2); r_xmm2 = r_xmm1; r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55); - r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i*)&pTab_i_26[8])); + r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8])); r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3); r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff); r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0); - r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i*)&pTab_i_26[16])); + r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16])); r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1); r_xmm2 = _mm_srai_epi32(r_xmm2, 12); - r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i*)shortM128_round_inv_row)); - r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i*)&pTab_i_26[24])); + r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row)); + r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24])); r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6); r_xmm6 = r_xmm5; r_xmm0 = _mm_srai_epi32(r_xmm0, 12); @@ -323,13 +323,13 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b); row7 = _mm_packs_epi32(r_xmm4, r_xmm6); - r_xmm1 = _mm_load_si128((__m128i*)shortM128_tg_3_16); + r_xmm1 = _mm_load_si128((__m128i *) shortM128_tg_3_16); r_xmm2 = row5; r_xmm3 = row3; r_xmm0 = _mm_mulhi_epi16(row5, r_xmm1); r_xmm1 = _mm_mulhi_epi16(r_xmm1, r_xmm3); - r_xmm5 = _mm_load_si128((__m128i*)shortM128_tg_1_16); + r_xmm5 = _mm_load_si128((__m128i *) shortM128_tg_1_16); r_xmm6 = row7; r_xmm4 = _mm_mulhi_epi16(row7, r_xmm5); @@ -339,7 +339,7 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) r_xmm7 = row6; r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm3); - r_xmm3 = _mm_load_si128((__m128i*)shortM128_tg_2_16); + r_xmm3 = _mm_load_si128((__m128i *) shortM128_tg_2_16); r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm1); r_xmm7 = _mm_mulhi_epi16(r_xmm7, r_xmm3); r_xmm1 = r_xmm0; @@ -347,11 +347,11 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm6); r_xmm4 = _mm_adds_epi16(r_xmm4, row1); r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm4); - r_xmm0 = _mm_adds_epi16(r_xmm0, *((__m128i*)shortM128_one_corr)); + r_xmm0 = _mm_adds_epi16(r_xmm0, *((__m128i *) shortM128_one_corr)); r_xmm4 = _mm_subs_epi16(r_xmm4, r_xmm1); r_xmm6 = r_xmm5; r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm2); - r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i*)shortM128_one_corr)); + r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_one_corr)); r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2); //Intermediate results, needed later @@ -359,9 +359,9 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) temp7 = r_xmm0; r_xmm1 = r_xmm4; - r_xmm0 = _mm_load_si128((__m128i*)shortM128_cos_4_16); + r_xmm0 = _mm_load_si128((__m128i *) shortM128_cos_4_16); r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm5); - r_xmm2 = _mm_load_si128((__m128i*)shortM128_cos_4_16); + r_xmm2 = _mm_load_si128((__m128i *) shortM128_cos_4_16); r_xmm2 = _mm_mulhi_epi16(r_xmm2, r_xmm4); //Intermediate results, needed later @@ -377,24 +377,24 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) r_xmm6 = _mm_subs_epi16(r_xmm6, row4); r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm2); - r_xmm4 = _mm_or_si128(r_xmm4, *((__m128i*)shortM128_one_corr)); + r_xmm4 = _mm_or_si128(r_xmm4, *((__m128i *) shortM128_one_corr)); r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm1); - r_xmm0 = _mm_or_si128(r_xmm0, *((__m128i*)shortM128_one_corr)); + r_xmm0 = _mm_or_si128(r_xmm0, *((__m128i *) shortM128_one_corr)); r_xmm2 = r_xmm5; r_xmm5 = _mm_adds_epi16(r_xmm5, r_xmm7); r_xmm1 = r_xmm6; - r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i*)shortM128_round_inv_col)); + r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_round_inv_col)); r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm7); r_xmm7 = temp7; r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm3); - r_xmm6 = _mm_adds_epi16(r_xmm6, *((__m128i*)shortM128_round_inv_col)); + r_xmm6 = _mm_adds_epi16(r_xmm6, *((__m128i *) shortM128_round_inv_col)); r_xmm7 = _mm_adds_epi16(r_xmm7, r_xmm5); r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL); r_xmm1 = _mm_subs_epi16(r_xmm1, r_xmm3); - r_xmm1 = _mm_adds_epi16(r_xmm1, *((__m128i*)shortM128_round_inv_corr)); + r_xmm1 = _mm_adds_epi16(r_xmm1, *((__m128i *) shortM128_round_inv_corr)); r_xmm3 = r_xmm6; - r_xmm2 = _mm_adds_epi16(r_xmm2, *((__m128i*)shortM128_round_inv_corr)); + r_xmm2 = _mm_adds_epi16(r_xmm2, *((__m128i *) shortM128_round_inv_corr)); r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm4); //Store results for row 0 @@ -406,7 +406,7 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) r_xmm1 = _mm_adds_epi16(r_xmm1, r_xmm0); //Store results for row 1 - //_mm_store_si128((__m128i *) (&pOutput[1*8]), r_xmm6); + //_mm_store_si128((__m128i *) (&pOutput[1*8]), r_xmm6); __m128i r1 = r_xmm6; r_xmm1 = _mm_srai_epi16(r_xmm1, SHIFT_INV_COL); @@ -415,24 +415,24 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL); //Store results for row 2 - //_mm_store_si128((__m128i *) (&pOutput[2*8]), r_xmm1); + //_mm_store_si128((__m128i *) (&pOutput[2*8]), r_xmm1); __m128i r2 = r_xmm1; - r_xmm5 = _mm_subs_epi16(r_xmm5, temp7); + r_xmm5 = _mm_subs_epi16(r_xmm5, temp7); r_xmm5 = _mm_srai_epi16(r_xmm5, SHIFT_INV_COL); //Store results for row 7 - //_mm_store_si128((__m128i *) (&pOutput[7*8]), r_xmm5); + //_mm_store_si128((__m128i *) (&pOutput[7*8]), r_xmm5); __m128i r7 = r_xmm5; r_xmm3 = _mm_subs_epi16(r_xmm3, r_xmm4); r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2); - r_xmm2 = _mm_subs_epi16(r_xmm2, temp3); + r_xmm2 = _mm_subs_epi16(r_xmm2, temp3); r_xmm6 = _mm_srai_epi16(r_xmm6, SHIFT_INV_COL); r_xmm2 = _mm_srai_epi16(r_xmm2, SHIFT_INV_COL); //Store results for row 3 - //_mm_store_si128((__m128i *) (&pOutput[3*8]), r_xmm6); + //_mm_store_si128((__m128i *) (&pOutput[3*8]), r_xmm6); __m128i r3 = r_xmm6; r_xmm3 = _mm_srai_epi16(r_xmm3, SHIFT_INV_COL); @@ -446,17 +446,17 @@ void idctSSEShortU8(const short* pInput, uint8_t* pOutputUB) __m128i r5 = r_xmm7; __m128i r6 = r_xmm3; - r0 = _mm_add_epi16(*(const __m128i*)shortM128_128, r0); - r1 = _mm_add_epi16(*(const __m128i*)shortM128_128, r1); - r2 = _mm_add_epi16(*(const __m128i*)shortM128_128, r2); - r3 = _mm_add_epi16(*(const __m128i*)shortM128_128, r3); - r4 = _mm_add_epi16(*(const __m128i*)shortM128_128, r4); - r5 = _mm_add_epi16(*(const __m128i*)shortM128_128, r5); - r6 = _mm_add_epi16(*(const __m128i*)shortM128_128, r6); - r7 = _mm_add_epi16(*(const __m128i*)shortM128_128, r7); + r0 = _mm_add_epi16(*(const __m128i *)shortM128_128, r0); + r1 = _mm_add_epi16(*(const __m128i *)shortM128_128, r1); + r2 = _mm_add_epi16(*(const __m128i *)shortM128_128, r2); + r3 = _mm_add_epi16(*(const __m128i *)shortM128_128, r3); + r4 = _mm_add_epi16(*(const __m128i *)shortM128_128, r4); + r5 = _mm_add_epi16(*(const __m128i *)shortM128_128, r5); + r6 = _mm_add_epi16(*(const __m128i *)shortM128_128, r6); + r7 = _mm_add_epi16(*(const __m128i *)shortM128_128, r7); - ((__m128i*)pOutputUB)[0] = _mm_packus_epi16(r0, r1); - ((__m128i*)pOutputUB)[1] = _mm_packus_epi16(r2, r3); - ((__m128i*)pOutputUB)[2] = _mm_packus_epi16(r4, r5); - ((__m128i*)pOutputUB)[3] = _mm_packus_epi16(r6, r7); + ((__m128i *)pOutputUB)[0] = _mm_packus_epi16(r0, r1); + ((__m128i *)pOutputUB)[1] = _mm_packus_epi16(r2, r3); + ((__m128i *)pOutputUB)[2] = _mm_packus_epi16(r4, r5); + ((__m128i *)pOutputUB)[3] = _mm_packus_epi16(r6, r7); }