C99 Restrict keyword does not exist in C++ (yet anyway). MSVC inline assembly should not be floating around by itself. Checking a define for MMX is stupid, but I won't worry about it right now. Link code is severly MSVC only and should be cleaned or moved.

git-svn-id: https://svn.code.sf.net/p/vbam/code/trunk@62 a31d4220-a93d-0410-bf67-fe4944624d44
This commit is contained in:
Nach 2007-11-14 09:16:23 +00:00
parent f188d0727f
commit bc661af3b9
7 changed files with 381 additions and 362 deletions

View File

@ -16,6 +16,7 @@
// along with this program; if not, write to the Free Software Foundation, // along with this program; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "Util.h" #include "Util.h"
#define __STDC_CONSTANT_MACROS
#include <stdint.h> #include <stdint.h>
extern "C" extern "C"

View File

@ -42,7 +42,7 @@
* This effect is a rewritten implementation of the hq4x effect made by Maxim Stepin * This effect is a rewritten implementation of the hq4x effect made by Maxim Stepin
*/ */
void hq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, interp_uint16* restrict dst2, interp_uint16* restrict dst3, const interp_uint16* restrict src0, const interp_uint16* restrict src1, const interp_uint16* restrict src2, unsigned count) void hq4x_16_def(interp_uint16* dst0, interp_uint16* dst1, interp_uint16* dst2, interp_uint16* dst3, const interp_uint16* src0, const interp_uint16* src1, const interp_uint16* src2, unsigned count)
{ {
unsigned i; unsigned i;
@ -126,7 +126,7 @@ void hq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, int
} }
} }
void hq4x_32_def(interp_uint32* restrict dst0, interp_uint32* restrict dst1, interp_uint32* restrict dst2, interp_uint32* restrict dst3, const interp_uint32* restrict src0, const interp_uint32* restrict src1, const interp_uint32* restrict src2, unsigned count) void hq4x_32_def(interp_uint32* dst0, interp_uint32* dst1, interp_uint32* dst2, interp_uint32* dst3, const interp_uint32* src0, const interp_uint32* src1, const interp_uint32* src2, unsigned count)
{ {
unsigned i; unsigned i;

View File

@ -17,359 +17,371 @@
// Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "hq_shared32.h" #include "hq_shared32.h"
#define __STDC_CONSTANT_MACROS
#include <stdint.h>
const unsigned __int64 reg_blank = 0x0000000000000000; const uint64_t reg_blank = UINT64_C(0x0000000000000000);
const unsigned __int64 const7 = 0x0000000700070007; const uint64_t const7 = UINT64_C(0x0000000700070007);
const unsigned __int64 treshold = 0x0000000000300706; const uint64_t treshold = UINT64_C(0x0000000000300706);
void Interp1(unsigned char * pc, unsigned int c1, unsigned int c2) void Interp1(unsigned char * pc, unsigned int c1, unsigned int c2)
{ {
//*((int*)pc) = (c1*3+c2)/4; #ifdef _MSC_VER
#ifdef MMX
#ifdef MMX __asm
__asm {
{ mov eax, pc
mov eax, pc movd mm1, c1
movd mm1, c1 movd mm2, c2
movd mm2, c2 movq mm0, mm1
movq mm0, mm1 pslld mm0, 2
pslld mm0, 2 psubd mm0, mm1
psubd mm0, mm1 paddd mm0, mm2
paddd mm0, mm2 psrld mm0, 2
psrld mm0, 2 movd [eax], mm0
movd [eax], mm0 EMMS
EMMS }
} #else
#else __asm
__asm {
{ mov eax, pc
mov eax, pc mov edx, c1
mov edx, c1 shl edx, 2
shl edx, 2 add edx, c2
add edx, c2 sub edx, c1
sub edx, c1 shr edx, 2
shr edx, 2 mov [eax], edx
mov [eax], edx }
} #endif
#endif #else
} *((int*)pc) = (c1*3+c2)/4;
#endif
void Interp2(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3) }
{
//*((int*)pc) = (c1*2+c2+c3)/4; void Interp2(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
{
#ifdef MMX #ifdef _MSC_VER
__asm #ifdef MMX
{ __asm
mov eax, pc {
movd mm0, c1 mov eax, pc
movd mm1, c2 movd mm0, c1
movd mm2, c3 movd mm1, c2
pslld mm0, 1 movd mm2, c3
paddd mm0, mm1 pslld mm0, 1
paddd mm0, mm2 paddd mm0, mm1
psrad mm0, 2 paddd mm0, mm2
movd [eax], mm0 psrad mm0, 2
EMMS movd [eax], mm0
} EMMS
#else }
__asm #else
{ __asm
mov eax, pc {
mov edx, c1 mov eax, pc
shl edx, 1 mov edx, c1
add edx, c2 shl edx, 1
add edx, c3 add edx, c2
shr edx, 2 add edx, c3
mov [eax], edx shr edx, 2
} mov [eax], edx
#endif }
} #endif
#else
void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2) *((int*)pc) = (c1*2+c2+c3)/4;
{ #endif
//*((int*)pc) = (c1*7+c2)/8; }
//*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
// (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3; void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2)
{
#ifdef MMX #ifdef _MSC_VER
__asm #ifdef MMX
{ __asm
mov eax, pc {
movd mm1, c1 mov eax, pc
movd mm2, c2 movd mm1, c1
punpcklbw mm1, reg_blank movd mm2, c2
punpcklbw mm2, reg_blank punpcklbw mm1, reg_blank
pmullw mm1, const7 punpcklbw mm2, reg_blank
paddw mm1, mm2 pmullw mm1, const7
psrlw mm1, 3 paddw mm1, mm2
packuswb mm1, reg_blank psrlw mm1, 3
movd [eax], mm1 packuswb mm1, reg_blank
EMMS movd [eax], mm1
} EMMS
#else }
__asm #else
{ __asm
mov eax, c1 {
mov ebx, c2 mov eax, c1
mov ecx, eax mov ebx, c2
shl ecx, 3 mov ecx, eax
sub ecx, eax shl ecx, 3
add ecx, ebx sub ecx, eax
shr ecx, 3 add ecx, ebx
mov eax, pc shr ecx, 3
mov [eax], ecx mov eax, pc
} mov [eax], ecx
#endif }
} #endif
#else
void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3) *((int*)pc) = (c1*7+c2)/8;
{ *((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
//*((int*)pc) = (c1*2+(c2+c3)*7)/16; (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
//*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) + #endif
// (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4; }
#ifdef MMX void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
__asm {
{ #ifdef _MSC_VER
mov eax, pc #ifdef MMX
movd mm1, c1 __asm
movd mm2, c2 {
movd mm3, c3 mov eax, pc
punpcklbw mm1, reg_blank movd mm1, c1
punpcklbw mm2, reg_blank movd mm2, c2
punpcklbw mm3, reg_blank movd mm3, c3
psllw mm1, 1 punpcklbw mm1, reg_blank
paddw mm2, mm3 punpcklbw mm2, reg_blank
pmullw mm2, const7 punpcklbw mm3, reg_blank
paddw mm1, mm2 psllw mm1, 1
psrlw mm1, 4 paddw mm2, mm3
packuswb mm1, reg_blank pmullw mm2, const7
movd [eax], mm1 paddw mm1, mm2
EMMS psrlw mm1, 4
} packuswb mm1, reg_blank
#else movd [eax], mm1
EMMS
__asm }
{ #else
mov eax, [c1]
and eax, 0FF00h __asm
shl eax, 1 {
mov ecx, [c2] mov eax, [c1]
and ecx, 0FF00h and eax, 0FF00h
mov edx, [c3] shl eax, 1
and edx, 0FF00h mov ecx, [c2]
add ecx, edx and ecx, 0FF00h
imul ecx, ecx,7 mov edx, [c3]
add eax, ecx and edx, 0FF00h
and eax, 0FF000h add ecx, edx
imul ecx, ecx,7
mov ebx, [c1] add eax, ecx
and ebx, 0FF00FFh and eax, 0FF000h
shl ebx, 1
mov ecx, [c2] mov ebx, [c1]
and ecx, 0FF00FFh and ebx, 0FF00FFh
mov edx, [c3] shl ebx, 1
and edx, 0FF00FFh mov ecx, [c2]
add ecx, edx and ecx, 0FF00FFh
imul ecx, ecx,7 mov edx, [c3]
add ebx, ecx and edx, 0FF00FFh
and ebx, 0FF00FF0h add ecx, edx
imul ecx, ecx,7
add eax, ebx add ebx, ecx
shr eax, 4 and ebx, 0FF00FF0h
mov ebx, pc add eax, ebx
mov [ebx], eax shr eax, 4
}
#endif mov ebx, pc
} mov [ebx], eax
}
void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2) #endif
{ #else
//*((int*)pc) = (c1+c2)/2; *((int*)pc) = (c1*2+(c2+c3)*7)/16;
*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
#ifdef MMX (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
__asm #endif
{ }
mov eax, pc
movd mm0, c1 void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2)
movd mm1, c2 {
paddd mm0, mm1 #ifdef _MSC_VER
psrad mm0, 1 #ifdef MMX
movd [eax], mm0 __asm
EMMS {
} mov eax, pc
#else movd mm0, c1
__asm movd mm1, c2
{ paddd mm0, mm1
mov eax, pc psrad mm0, 1
mov edx, c1 movd [eax], mm0
add edx, c2 EMMS
shr edx, 1 }
mov [eax], edx #else
} __asm
#endif {
} mov eax, pc
mov edx, c1
add edx, c2
bool Diff(unsigned int c1, unsigned int c2) shr edx, 1
{ mov [eax], edx
unsigned int }
YUV1 = RGBtoYUV(c1), #endif
YUV2 = RGBtoYUV(c2); #else
*((int*)pc) = (c1+c2)/2;
if (YUV1 == YUV2) return false; // Save some processing power #endif
}
#ifdef MMX
unsigned int retval;
__asm bool Diff(unsigned int c1, unsigned int c2)
{ {
mov eax, 0x7FFFFFFF unsigned int
movd mm7, eax ;mm7 = ABS_MASK = 0x7FFFFFFF YUV1 = RGBtoYUV(c1),
YUV2 = RGBtoYUV(c2);
; Copy source colors in first reg
movd mm0, YUV1 if (YUV1 == YUV2) return false; // Save some processing power
movd mm1, YUV2
#ifdef MMX
mov eax, 0x00FF0000 unsigned int retval;
movd mm6, eax ;mm6 = Ymask = 0x00FF0000 __asm
{
; Calculate color Y difference mov eax, 0x7FFFFFFF
movq mm2, mm0 movd mm7, eax ;mm7 = ABS_MASK = 0x7FFFFFFF
movq mm3, mm1
pand mm2, mm6 ; Copy source colors in first reg
pand mm3, mm6 movd mm0, YUV1
psubd mm2, mm3 movd mm1, YUV2
pand mm2, mm7
mov eax, 0x00FF0000
mov eax, 0x0000FF00 movd mm6, eax ;mm6 = Ymask = 0x00FF0000
movd mm6, eax ;mm6 = Umask = 0x0000FF00
; Calculate color Y difference
; Calculate color U difference movq mm2, mm0
movq mm3, mm0 movq mm3, mm1
movq mm4, mm1 pand mm2, mm6
pand mm3, mm6 pand mm3, mm6
pand mm4, mm6 psubd mm2, mm3
psubd mm3, mm4 pand mm2, mm7
pand mm3, mm7
mov eax, 0x0000FF00
mov eax, 0x000000FF movd mm6, eax ;mm6 = Umask = 0x0000FF00
movd mm6, eax ;mm6 = Vmask = 0x000000FF
; Calculate color U difference
; Calculate color V difference movq mm3, mm0
movq mm4, mm0 movq mm4, mm1
movq mm5, mm1 pand mm3, mm6
pand mm4, mm6 pand mm4, mm6
pand mm5, mm6 psubd mm3, mm4
psubd mm4, mm5 pand mm3, mm7
pand mm4, mm7
mov eax, 0x000000FF
mov eax, 0x00300000 movd mm6, eax ;mm6 = Vmask = 0x000000FF
movd mm5, eax ;mm5 = trY = 0x00300000
mov eax, 0x00000700 ; Calculate color V difference
movd mm6, eax ;mm6 = trU = 0x00000700 movq mm4, mm0
mov eax, 0x00000006 movq mm5, mm1
movd mm7, eax ;mm7 = trV = 0x00000006 pand mm4, mm6
pand mm5, mm6
; Compare the results psubd mm4, mm5
pcmpgtd mm2, trY pand mm4, mm7
pcmpgtd mm3, trU
pcmpgtd mm4, trV mov eax, 0x00300000
por mm2, mm3 movd mm5, eax ;mm5 = trY = 0x00300000
por mm2, mm4 mov eax, 0x00000700
movd mm6, eax ;mm6 = trU = 0x00000700
movd retval, mm2 mov eax, 0x00000006
movd mm7, eax ;mm7 = trV = 0x00000006
EMMS
} ; Compare the results
return (retval != 0); pcmpgtd mm2, trY
#else pcmpgtd mm3, trU
return pcmpgtd mm4, trV
( abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) || por mm2, mm3
( abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU ) || por mm2, mm4
( abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV );
#endif movd retval, mm2
}
EMMS
}
unsigned int RGBtoYUV(unsigned int c) return (retval != 0);
{ // Division through 3 slows down the emulation about 10% !!! #else
#ifdef MMX return
unsigned int retval; ( abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
__asm ( abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
{ ( abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV );
movd mm0, c #endif
movq mm1, mm0 }
movq mm2, mm0 ;mm0=mm1=mm2=c
mov eax, 0x000000FF unsigned int RGBtoYUV(unsigned int c)
movd mm5, eax ;mm5 = REDMASK = 0x000000FF { // Division through 3 slows down the emulation about 10% !!!
mov eax, 0x0000FF00 #ifdef MMX
movd mm6, eax ;mm6 = GREENMASK = 0x0000FF00 unsigned int retval;
mov eax, 0x00FF0000 __asm
movd mm7, eax ;mm7 = BLUEMASK = 0x00FF0000 {
movd mm0, c
movq mm1, mm0
pand mm0, mm5 movq mm2, mm0 ;mm0=mm1=mm2=c
pand mm1, mm6
pand mm2, mm7 ;mm0=R mm1=G mm2=B mov eax, 0x000000FF
movd mm5, eax ;mm5 = REDMASK = 0x000000FF
movq mm3, mm0 mov eax, 0x0000FF00
paddd mm3, mm1 movd mm6, eax ;mm6 = GREENMASK = 0x0000FF00
paddd mm3, mm2 mov eax, 0x00FF0000
; psrld mm3, 2 ;mm3=Y movd mm7, eax ;mm7 = BLUEMASK = 0x00FF0000
; pslld mm3, 16
pslld mm3, 14 ;mm3=Y<<16
pand mm0, mm5
mov eax, 512 pand mm1, mm6
movd mm7, eax ;mm7 = 128 << 2 = 512 pand mm2, mm7 ;mm0=R mm1=G mm2=B
movq mm4, mm0 movq mm3, mm0
psubd mm4, mm2 paddd mm3, mm1
; psrld mm4, 2 paddd mm3, mm2
; paddd mm4, mm7 ;mm4=U ; psrld mm3, 2 ;mm3=Y
; pslld mm4, 8 ;mm4=U<<8 ; pslld mm3, 16
paddd mm4, mm7 pslld mm3, 14 ;mm3=Y<<16
pslld mm4, 6
mov eax, 512
mov eax, 128 movd mm7, eax ;mm7 = 128 << 2 = 512
movd mm7, eax ;mm7 = 128
movq mm4, mm0
movq mm5, mm1 psubd mm4, mm2
pslld mm5, 1 ; psrld mm4, 2
psubd mm5, mm0 ; paddd mm4, mm7 ;mm4=U
psubd mm5, mm2 ; pslld mm4, 8 ;mm4=U<<8
psrld mm5, 3 paddd mm4, mm7
paddd mm5, mm7 ;mm5=V pslld mm4, 6
paddd mm5, mm4 mov eax, 128
paddd mm5, mm3 movd mm7, eax ;mm7 = 128
movd retval, mm5 movq mm5, mm1
pslld mm5, 1
EMMS psubd mm5, mm0
} psubd mm5, mm2
return retval; psrld mm5, 3
#else paddd mm5, mm7 ;mm5=V
unsigned char r, g, b, Y, u, v;
r = (c & 0x000000FF); paddd mm5, mm4
g = (c & 0x0000FF00) >> 8; paddd mm5, mm3
b = (c & 0x00FF0000) >> 16;
Y = (r + g + b) >> 2; movd retval, mm5
u = 128 + ((r - b) >> 2);
v = 128 + ((-r + 2*g -b)>>3); EMMS
return (Y<<16) + (u<<8) + v; }
return retval;
// Extremely High Quality Code #else
//unsigned char r, g, b; unsigned char r, g, b, Y, u, v;
//r = c & 0xFF; r = (c & 0x000000FF);
//g = (c >> 8) & 0xFF; g = (c & 0x0000FF00) >> 8;
//b = (c >> 16) & 0xFF; b = (c & 0x00FF0000) >> 16;
//unsigned char y, u, v; Y = (r + g + b) >> 2;
//y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16; u = 128 + ((r - b) >> 2);
//u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128; v = 128 + ((-r + 2*g -b)>>3);
//v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128; return (Y<<16) + (u<<8) + v;
//return (y << 16) + (u << 8) + v;
#endif // Extremely High Quality Code
} //unsigned char r, g, b;
//r = c & 0xFF;
//g = (c >> 8) & 0xFF;
//b = (c >> 16) & 0xFF;
//unsigned char y, u, v;
//y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16;
//u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128;
//v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128;
//return (y << 16) + (u << 8) + v;
#endif
}

View File

@ -87,4 +87,4 @@ void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2);
void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3); void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3);
void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2); void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2);
bool Diff(unsigned int c1, unsigned int c2); bool Diff(unsigned int c1, unsigned int c2);
unsigned int RGBtoYUV(unsigned int c); unsigned int RGBtoYUV(unsigned int c);

View File

@ -31,6 +31,12 @@
#ifndef __INTERP_H #ifndef __INTERP_H
#define __INTERP_H #define __INTERP_H
#define __STDC_CONSTANT_MACROS
#include <stdint.h>
typedef uint16_t interp_uint16;
typedef uint32_t interp_uint32;
/***************************************************************************/ /***************************************************************************/
/* Basic types */ /* Basic types */

View File

@ -41,7 +41,7 @@
* This effect is derived from the hq3x effect made by Maxim Stepin * This effect is derived from the hq3x effect made by Maxim Stepin
*/ */
void lq3x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, interp_uint16* restrict dst2, const interp_uint16* restrict src0, const interp_uint16* restrict src1, const interp_uint16* restrict src2, unsigned count) void lq3x_16_def(interp_uint16* dst0, interp_uint16* dst1, interp_uint16* dst2, const interp_uint16* src0, const interp_uint16* src1, const interp_uint16* src2, unsigned count)
{ {
unsigned i; unsigned i;
@ -124,7 +124,7 @@ void lq3x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, int
} }
} }
void lq3x_32_def(interp_uint32* restrict dst0, interp_uint32* restrict dst1, interp_uint32* restrict dst2, const interp_uint32* restrict src0, const interp_uint32* restrict src1, const interp_uint32* restrict src2, unsigned count) void lq3x_32_def(interp_uint32* dst0, interp_uint32* dst1, interp_uint32* dst2, const interp_uint32* src0, const interp_uint32* src1, const interp_uint32* src2, unsigned count)
{ {
unsigned i; unsigned i;

View File

@ -42,7 +42,7 @@
* This effect is derived from the hq4x effect made by Maxim Stepin * This effect is derived from the hq4x effect made by Maxim Stepin
*/ */
void lq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, interp_uint16* restrict dst2, interp_uint16* restrict dst3, const interp_uint16* restrict src0, const interp_uint16* restrict src1, const interp_uint16* restrict src2, unsigned count) void lq4x_16_def(interp_uint16* dst0, interp_uint16* dst1, interp_uint16* dst2, interp_uint16* dst3, const interp_uint16* src0, const interp_uint16* src1, const interp_uint16* src2, unsigned count)
{ {
unsigned i; unsigned i;
@ -126,7 +126,7 @@ void lq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, int
} }
} }
void lq4x_32_def(interp_uint32* restrict dst0, interp_uint32* restrict dst1, interp_uint32* restrict dst2, interp_uint32* restrict dst3, const interp_uint32* restrict src0, const interp_uint32* restrict src1, const interp_uint32* restrict src2, unsigned count) void lq4x_32_def(interp_uint32* dst0, interp_uint32* dst1, interp_uint32* dst2, interp_uint32* dst3, const interp_uint32* src0, const interp_uint32* src1, const interp_uint32* src2, unsigned count)
{ {
unsigned i; unsigned i;