C99 Restrict keyword does not exist in C++ (yet anyway). MSVC inline assembly should not be floating around by itself. Checking a define for MMX is stupid, but I won't worry about it right now. Link code is severly MSVC only and should be cleaned or moved.
git-svn-id: https://svn.code.sf.net/p/vbam/code/trunk@62 a31d4220-a93d-0410-bf67-fe4944624d44
This commit is contained in:
parent
f188d0727f
commit
bc661af3b9
|
@ -16,6 +16,7 @@
|
|||
// along with this program; if not, write to the Free Software Foundation,
|
||||
// Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
#include "Util.h"
|
||||
#define __STDC_CONSTANT_MACROS
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C"
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
* This effect is a rewritten implementation of the hq4x effect made by Maxim Stepin
|
||||
*/
|
||||
|
||||
void hq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, interp_uint16* restrict dst2, interp_uint16* restrict dst3, const interp_uint16* restrict src0, const interp_uint16* restrict src1, const interp_uint16* restrict src2, unsigned count)
|
||||
void hq4x_16_def(interp_uint16* dst0, interp_uint16* dst1, interp_uint16* dst2, interp_uint16* dst3, const interp_uint16* src0, const interp_uint16* src1, const interp_uint16* src2, unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
|
@ -126,7 +126,7 @@ void hq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, int
|
|||
}
|
||||
}
|
||||
|
||||
void hq4x_32_def(interp_uint32* restrict dst0, interp_uint32* restrict dst1, interp_uint32* restrict dst2, interp_uint32* restrict dst3, const interp_uint32* restrict src0, const interp_uint32* restrict src1, const interp_uint32* restrict src2, unsigned count)
|
||||
void hq4x_32_def(interp_uint32* dst0, interp_uint32* dst1, interp_uint32* dst2, interp_uint32* dst3, const interp_uint32* src0, const interp_uint32* src1, const interp_uint32* src2, unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
|
|
|
@ -17,359 +17,371 @@
|
|||
// Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
#include "hq_shared32.h"
|
||||
#define __STDC_CONSTANT_MACROS
|
||||
#include <stdint.h>
|
||||
|
||||
const unsigned __int64 reg_blank = 0x0000000000000000;
|
||||
const unsigned __int64 const7 = 0x0000000700070007;
|
||||
const unsigned __int64 treshold = 0x0000000000300706;
|
||||
|
||||
void Interp1(unsigned char * pc, unsigned int c1, unsigned int c2)
|
||||
{
|
||||
//*((int*)pc) = (c1*3+c2)/4;
|
||||
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm1, c1
|
||||
movd mm2, c2
|
||||
movq mm0, mm1
|
||||
pslld mm0, 2
|
||||
psubd mm0, mm1
|
||||
paddd mm0, mm2
|
||||
psrld mm0, 2
|
||||
movd [eax], mm0
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
mov edx, c1
|
||||
shl edx, 2
|
||||
add edx, c2
|
||||
sub edx, c1
|
||||
shr edx, 2
|
||||
mov [eax], edx
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Interp2(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
||||
{
|
||||
//*((int*)pc) = (c1*2+c2+c3)/4;
|
||||
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm0, c1
|
||||
movd mm1, c2
|
||||
movd mm2, c3
|
||||
pslld mm0, 1
|
||||
paddd mm0, mm1
|
||||
paddd mm0, mm2
|
||||
psrad mm0, 2
|
||||
movd [eax], mm0
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
mov edx, c1
|
||||
shl edx, 1
|
||||
add edx, c2
|
||||
add edx, c3
|
||||
shr edx, 2
|
||||
mov [eax], edx
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2)
|
||||
{
|
||||
//*((int*)pc) = (c1*7+c2)/8;
|
||||
//*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
|
||||
// (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
|
||||
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm1, c1
|
||||
movd mm2, c2
|
||||
punpcklbw mm1, reg_blank
|
||||
punpcklbw mm2, reg_blank
|
||||
pmullw mm1, const7
|
||||
paddw mm1, mm2
|
||||
psrlw mm1, 3
|
||||
packuswb mm1, reg_blank
|
||||
movd [eax], mm1
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
__asm
|
||||
{
|
||||
mov eax, c1
|
||||
mov ebx, c2
|
||||
mov ecx, eax
|
||||
shl ecx, 3
|
||||
sub ecx, eax
|
||||
add ecx, ebx
|
||||
shr ecx, 3
|
||||
mov eax, pc
|
||||
mov [eax], ecx
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
||||
{
|
||||
//*((int*)pc) = (c1*2+(c2+c3)*7)/16;
|
||||
//*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
|
||||
// (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
|
||||
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm1, c1
|
||||
movd mm2, c2
|
||||
movd mm3, c3
|
||||
punpcklbw mm1, reg_blank
|
||||
punpcklbw mm2, reg_blank
|
||||
punpcklbw mm3, reg_blank
|
||||
psllw mm1, 1
|
||||
paddw mm2, mm3
|
||||
pmullw mm2, const7
|
||||
paddw mm1, mm2
|
||||
psrlw mm1, 4
|
||||
packuswb mm1, reg_blank
|
||||
movd [eax], mm1
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
|
||||
__asm
|
||||
{
|
||||
mov eax, [c1]
|
||||
and eax, 0FF00h
|
||||
shl eax, 1
|
||||
mov ecx, [c2]
|
||||
and ecx, 0FF00h
|
||||
mov edx, [c3]
|
||||
and edx, 0FF00h
|
||||
add ecx, edx
|
||||
imul ecx, ecx,7
|
||||
add eax, ecx
|
||||
and eax, 0FF000h
|
||||
|
||||
mov ebx, [c1]
|
||||
and ebx, 0FF00FFh
|
||||
shl ebx, 1
|
||||
mov ecx, [c2]
|
||||
and ecx, 0FF00FFh
|
||||
mov edx, [c3]
|
||||
and edx, 0FF00FFh
|
||||
add ecx, edx
|
||||
imul ecx, ecx,7
|
||||
add ebx, ecx
|
||||
and ebx, 0FF00FF0h
|
||||
|
||||
add eax, ebx
|
||||
shr eax, 4
|
||||
|
||||
mov ebx, pc
|
||||
mov [ebx], eax
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2)
|
||||
{
|
||||
//*((int*)pc) = (c1+c2)/2;
|
||||
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm0, c1
|
||||
movd mm1, c2
|
||||
paddd mm0, mm1
|
||||
psrad mm0, 1
|
||||
movd [eax], mm0
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
mov edx, c1
|
||||
add edx, c2
|
||||
shr edx, 1
|
||||
mov [eax], edx
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
bool Diff(unsigned int c1, unsigned int c2)
|
||||
{
|
||||
unsigned int
|
||||
YUV1 = RGBtoYUV(c1),
|
||||
YUV2 = RGBtoYUV(c2);
|
||||
|
||||
if (YUV1 == YUV2) return false; // Save some processing power
|
||||
|
||||
#ifdef MMX
|
||||
unsigned int retval;
|
||||
__asm
|
||||
{
|
||||
mov eax, 0x7FFFFFFF
|
||||
movd mm7, eax ;mm7 = ABS_MASK = 0x7FFFFFFF
|
||||
|
||||
; Copy source colors in first reg
|
||||
movd mm0, YUV1
|
||||
movd mm1, YUV2
|
||||
|
||||
mov eax, 0x00FF0000
|
||||
movd mm6, eax ;mm6 = Ymask = 0x00FF0000
|
||||
|
||||
; Calculate color Y difference
|
||||
movq mm2, mm0
|
||||
movq mm3, mm1
|
||||
pand mm2, mm6
|
||||
pand mm3, mm6
|
||||
psubd mm2, mm3
|
||||
pand mm2, mm7
|
||||
|
||||
mov eax, 0x0000FF00
|
||||
movd mm6, eax ;mm6 = Umask = 0x0000FF00
|
||||
|
||||
; Calculate color U difference
|
||||
movq mm3, mm0
|
||||
movq mm4, mm1
|
||||
pand mm3, mm6
|
||||
pand mm4, mm6
|
||||
psubd mm3, mm4
|
||||
pand mm3, mm7
|
||||
|
||||
mov eax, 0x000000FF
|
||||
movd mm6, eax ;mm6 = Vmask = 0x000000FF
|
||||
|
||||
; Calculate color V difference
|
||||
movq mm4, mm0
|
||||
movq mm5, mm1
|
||||
pand mm4, mm6
|
||||
pand mm5, mm6
|
||||
psubd mm4, mm5
|
||||
pand mm4, mm7
|
||||
|
||||
mov eax, 0x00300000
|
||||
movd mm5, eax ;mm5 = trY = 0x00300000
|
||||
mov eax, 0x00000700
|
||||
movd mm6, eax ;mm6 = trU = 0x00000700
|
||||
mov eax, 0x00000006
|
||||
movd mm7, eax ;mm7 = trV = 0x00000006
|
||||
|
||||
; Compare the results
|
||||
pcmpgtd mm2, trY
|
||||
pcmpgtd mm3, trU
|
||||
pcmpgtd mm4, trV
|
||||
por mm2, mm3
|
||||
por mm2, mm4
|
||||
|
||||
movd retval, mm2
|
||||
|
||||
EMMS
|
||||
}
|
||||
return (retval != 0);
|
||||
#else
|
||||
return
|
||||
( abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
|
||||
( abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
|
||||
( abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
unsigned int RGBtoYUV(unsigned int c)
|
||||
{ // Division through 3 slows down the emulation about 10% !!!
|
||||
#ifdef MMX
|
||||
unsigned int retval;
|
||||
__asm
|
||||
{
|
||||
movd mm0, c
|
||||
movq mm1, mm0
|
||||
movq mm2, mm0 ;mm0=mm1=mm2=c
|
||||
|
||||
mov eax, 0x000000FF
|
||||
movd mm5, eax ;mm5 = REDMASK = 0x000000FF
|
||||
mov eax, 0x0000FF00
|
||||
movd mm6, eax ;mm6 = GREENMASK = 0x0000FF00
|
||||
mov eax, 0x00FF0000
|
||||
movd mm7, eax ;mm7 = BLUEMASK = 0x00FF0000
|
||||
|
||||
|
||||
pand mm0, mm5
|
||||
pand mm1, mm6
|
||||
pand mm2, mm7 ;mm0=R mm1=G mm2=B
|
||||
|
||||
movq mm3, mm0
|
||||
paddd mm3, mm1
|
||||
paddd mm3, mm2
|
||||
; psrld mm3, 2 ;mm3=Y
|
||||
; pslld mm3, 16
|
||||
pslld mm3, 14 ;mm3=Y<<16
|
||||
|
||||
mov eax, 512
|
||||
movd mm7, eax ;mm7 = 128 << 2 = 512
|
||||
|
||||
movq mm4, mm0
|
||||
psubd mm4, mm2
|
||||
; psrld mm4, 2
|
||||
; paddd mm4, mm7 ;mm4=U
|
||||
; pslld mm4, 8 ;mm4=U<<8
|
||||
paddd mm4, mm7
|
||||
pslld mm4, 6
|
||||
|
||||
mov eax, 128
|
||||
movd mm7, eax ;mm7 = 128
|
||||
|
||||
movq mm5, mm1
|
||||
pslld mm5, 1
|
||||
psubd mm5, mm0
|
||||
psubd mm5, mm2
|
||||
psrld mm5, 3
|
||||
paddd mm5, mm7 ;mm5=V
|
||||
|
||||
paddd mm5, mm4
|
||||
paddd mm5, mm3
|
||||
|
||||
movd retval, mm5
|
||||
|
||||
EMMS
|
||||
}
|
||||
return retval;
|
||||
#else
|
||||
unsigned char r, g, b, Y, u, v;
|
||||
r = (c & 0x000000FF);
|
||||
g = (c & 0x0000FF00) >> 8;
|
||||
b = (c & 0x00FF0000) >> 16;
|
||||
Y = (r + g + b) >> 2;
|
||||
u = 128 + ((r - b) >> 2);
|
||||
v = 128 + ((-r + 2*g -b)>>3);
|
||||
return (Y<<16) + (u<<8) + v;
|
||||
|
||||
// Extremely High Quality Code
|
||||
//unsigned char r, g, b;
|
||||
//r = c & 0xFF;
|
||||
//g = (c >> 8) & 0xFF;
|
||||
//b = (c >> 16) & 0xFF;
|
||||
//unsigned char y, u, v;
|
||||
//y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16;
|
||||
//u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128;
|
||||
//v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128;
|
||||
//return (y << 16) + (u << 8) + v;
|
||||
#endif
|
||||
}
|
||||
const uint64_t reg_blank = UINT64_C(0x0000000000000000);
|
||||
const uint64_t const7 = UINT64_C(0x0000000700070007);
|
||||
const uint64_t treshold = UINT64_C(0x0000000000300706);
|
||||
|
||||
void Interp1(unsigned char * pc, unsigned int c1, unsigned int c2)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm1, c1
|
||||
movd mm2, c2
|
||||
movq mm0, mm1
|
||||
pslld mm0, 2
|
||||
psubd mm0, mm1
|
||||
paddd mm0, mm2
|
||||
psrld mm0, 2
|
||||
movd [eax], mm0
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
mov edx, c1
|
||||
shl edx, 2
|
||||
add edx, c2
|
||||
sub edx, c1
|
||||
shr edx, 2
|
||||
mov [eax], edx
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
*((int*)pc) = (c1*3+c2)/4;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Interp2(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm0, c1
|
||||
movd mm1, c2
|
||||
movd mm2, c3
|
||||
pslld mm0, 1
|
||||
paddd mm0, mm1
|
||||
paddd mm0, mm2
|
||||
psrad mm0, 2
|
||||
movd [eax], mm0
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
mov edx, c1
|
||||
shl edx, 1
|
||||
add edx, c2
|
||||
add edx, c3
|
||||
shr edx, 2
|
||||
mov [eax], edx
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
*((int*)pc) = (c1*2+c2+c3)/4;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm1, c1
|
||||
movd mm2, c2
|
||||
punpcklbw mm1, reg_blank
|
||||
punpcklbw mm2, reg_blank
|
||||
pmullw mm1, const7
|
||||
paddw mm1, mm2
|
||||
psrlw mm1, 3
|
||||
packuswb mm1, reg_blank
|
||||
movd [eax], mm1
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
__asm
|
||||
{
|
||||
mov eax, c1
|
||||
mov ebx, c2
|
||||
mov ecx, eax
|
||||
shl ecx, 3
|
||||
sub ecx, eax
|
||||
add ecx, ebx
|
||||
shr ecx, 3
|
||||
mov eax, pc
|
||||
mov [eax], ecx
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
*((int*)pc) = (c1*7+c2)/8;
|
||||
*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
|
||||
(((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm1, c1
|
||||
movd mm2, c2
|
||||
movd mm3, c3
|
||||
punpcklbw mm1, reg_blank
|
||||
punpcklbw mm2, reg_blank
|
||||
punpcklbw mm3, reg_blank
|
||||
psllw mm1, 1
|
||||
paddw mm2, mm3
|
||||
pmullw mm2, const7
|
||||
paddw mm1, mm2
|
||||
psrlw mm1, 4
|
||||
packuswb mm1, reg_blank
|
||||
movd [eax], mm1
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
|
||||
__asm
|
||||
{
|
||||
mov eax, [c1]
|
||||
and eax, 0FF00h
|
||||
shl eax, 1
|
||||
mov ecx, [c2]
|
||||
and ecx, 0FF00h
|
||||
mov edx, [c3]
|
||||
and edx, 0FF00h
|
||||
add ecx, edx
|
||||
imul ecx, ecx,7
|
||||
add eax, ecx
|
||||
and eax, 0FF000h
|
||||
|
||||
mov ebx, [c1]
|
||||
and ebx, 0FF00FFh
|
||||
shl ebx, 1
|
||||
mov ecx, [c2]
|
||||
and ecx, 0FF00FFh
|
||||
mov edx, [c3]
|
||||
and edx, 0FF00FFh
|
||||
add ecx, edx
|
||||
imul ecx, ecx,7
|
||||
add ebx, ecx
|
||||
and ebx, 0FF00FF0h
|
||||
|
||||
add eax, ebx
|
||||
shr eax, 4
|
||||
|
||||
mov ebx, pc
|
||||
mov [ebx], eax
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
*((int*)pc) = (c1*2+(c2+c3)*7)/16;
|
||||
*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
|
||||
(((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef MMX
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
movd mm0, c1
|
||||
movd mm1, c2
|
||||
paddd mm0, mm1
|
||||
psrad mm0, 1
|
||||
movd [eax], mm0
|
||||
EMMS
|
||||
}
|
||||
#else
|
||||
__asm
|
||||
{
|
||||
mov eax, pc
|
||||
mov edx, c1
|
||||
add edx, c2
|
||||
shr edx, 1
|
||||
mov [eax], edx
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
*((int*)pc) = (c1+c2)/2;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
bool Diff(unsigned int c1, unsigned int c2)
|
||||
{
|
||||
unsigned int
|
||||
YUV1 = RGBtoYUV(c1),
|
||||
YUV2 = RGBtoYUV(c2);
|
||||
|
||||
if (YUV1 == YUV2) return false; // Save some processing power
|
||||
|
||||
#ifdef MMX
|
||||
unsigned int retval;
|
||||
__asm
|
||||
{
|
||||
mov eax, 0x7FFFFFFF
|
||||
movd mm7, eax ;mm7 = ABS_MASK = 0x7FFFFFFF
|
||||
|
||||
; Copy source colors in first reg
|
||||
movd mm0, YUV1
|
||||
movd mm1, YUV2
|
||||
|
||||
mov eax, 0x00FF0000
|
||||
movd mm6, eax ;mm6 = Ymask = 0x00FF0000
|
||||
|
||||
; Calculate color Y difference
|
||||
movq mm2, mm0
|
||||
movq mm3, mm1
|
||||
pand mm2, mm6
|
||||
pand mm3, mm6
|
||||
psubd mm2, mm3
|
||||
pand mm2, mm7
|
||||
|
||||
mov eax, 0x0000FF00
|
||||
movd mm6, eax ;mm6 = Umask = 0x0000FF00
|
||||
|
||||
; Calculate color U difference
|
||||
movq mm3, mm0
|
||||
movq mm4, mm1
|
||||
pand mm3, mm6
|
||||
pand mm4, mm6
|
||||
psubd mm3, mm4
|
||||
pand mm3, mm7
|
||||
|
||||
mov eax, 0x000000FF
|
||||
movd mm6, eax ;mm6 = Vmask = 0x000000FF
|
||||
|
||||
; Calculate color V difference
|
||||
movq mm4, mm0
|
||||
movq mm5, mm1
|
||||
pand mm4, mm6
|
||||
pand mm5, mm6
|
||||
psubd mm4, mm5
|
||||
pand mm4, mm7
|
||||
|
||||
mov eax, 0x00300000
|
||||
movd mm5, eax ;mm5 = trY = 0x00300000
|
||||
mov eax, 0x00000700
|
||||
movd mm6, eax ;mm6 = trU = 0x00000700
|
||||
mov eax, 0x00000006
|
||||
movd mm7, eax ;mm7 = trV = 0x00000006
|
||||
|
||||
; Compare the results
|
||||
pcmpgtd mm2, trY
|
||||
pcmpgtd mm3, trU
|
||||
pcmpgtd mm4, trV
|
||||
por mm2, mm3
|
||||
por mm2, mm4
|
||||
|
||||
movd retval, mm2
|
||||
|
||||
EMMS
|
||||
}
|
||||
return (retval != 0);
|
||||
#else
|
||||
return
|
||||
( abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
|
||||
( abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
|
||||
( abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
unsigned int RGBtoYUV(unsigned int c)
|
||||
{ // Division through 3 slows down the emulation about 10% !!!
|
||||
#ifdef MMX
|
||||
unsigned int retval;
|
||||
__asm
|
||||
{
|
||||
movd mm0, c
|
||||
movq mm1, mm0
|
||||
movq mm2, mm0 ;mm0=mm1=mm2=c
|
||||
|
||||
mov eax, 0x000000FF
|
||||
movd mm5, eax ;mm5 = REDMASK = 0x000000FF
|
||||
mov eax, 0x0000FF00
|
||||
movd mm6, eax ;mm6 = GREENMASK = 0x0000FF00
|
||||
mov eax, 0x00FF0000
|
||||
movd mm7, eax ;mm7 = BLUEMASK = 0x00FF0000
|
||||
|
||||
|
||||
pand mm0, mm5
|
||||
pand mm1, mm6
|
||||
pand mm2, mm7 ;mm0=R mm1=G mm2=B
|
||||
|
||||
movq mm3, mm0
|
||||
paddd mm3, mm1
|
||||
paddd mm3, mm2
|
||||
; psrld mm3, 2 ;mm3=Y
|
||||
; pslld mm3, 16
|
||||
pslld mm3, 14 ;mm3=Y<<16
|
||||
|
||||
mov eax, 512
|
||||
movd mm7, eax ;mm7 = 128 << 2 = 512
|
||||
|
||||
movq mm4, mm0
|
||||
psubd mm4, mm2
|
||||
; psrld mm4, 2
|
||||
; paddd mm4, mm7 ;mm4=U
|
||||
; pslld mm4, 8 ;mm4=U<<8
|
||||
paddd mm4, mm7
|
||||
pslld mm4, 6
|
||||
|
||||
mov eax, 128
|
||||
movd mm7, eax ;mm7 = 128
|
||||
|
||||
movq mm5, mm1
|
||||
pslld mm5, 1
|
||||
psubd mm5, mm0
|
||||
psubd mm5, mm2
|
||||
psrld mm5, 3
|
||||
paddd mm5, mm7 ;mm5=V
|
||||
|
||||
paddd mm5, mm4
|
||||
paddd mm5, mm3
|
||||
|
||||
movd retval, mm5
|
||||
|
||||
EMMS
|
||||
}
|
||||
return retval;
|
||||
#else
|
||||
unsigned char r, g, b, Y, u, v;
|
||||
r = (c & 0x000000FF);
|
||||
g = (c & 0x0000FF00) >> 8;
|
||||
b = (c & 0x00FF0000) >> 16;
|
||||
Y = (r + g + b) >> 2;
|
||||
u = 128 + ((r - b) >> 2);
|
||||
v = 128 + ((-r + 2*g -b)>>3);
|
||||
return (Y<<16) + (u<<8) + v;
|
||||
|
||||
// Extremely High Quality Code
|
||||
//unsigned char r, g, b;
|
||||
//r = c & 0xFF;
|
||||
//g = (c >> 8) & 0xFF;
|
||||
//b = (c >> 16) & 0xFF;
|
||||
//unsigned char y, u, v;
|
||||
//y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16;
|
||||
//u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128;
|
||||
//v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128;
|
||||
//return (y << 16) + (u << 8) + v;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -87,4 +87,4 @@ void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2);
|
|||
void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3);
|
||||
void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2);
|
||||
bool Diff(unsigned int c1, unsigned int c2);
|
||||
unsigned int RGBtoYUV(unsigned int c);
|
||||
unsigned int RGBtoYUV(unsigned int c);
|
||||
|
|
|
@ -31,6 +31,12 @@
|
|||
#ifndef __INTERP_H
|
||||
#define __INTERP_H
|
||||
|
||||
#define __STDC_CONSTANT_MACROS
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint16_t interp_uint16;
|
||||
typedef uint32_t interp_uint32;
|
||||
|
||||
/***************************************************************************/
|
||||
/* Basic types */
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
* This effect is derived from the hq3x effect made by Maxim Stepin
|
||||
*/
|
||||
|
||||
void lq3x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, interp_uint16* restrict dst2, const interp_uint16* restrict src0, const interp_uint16* restrict src1, const interp_uint16* restrict src2, unsigned count)
|
||||
void lq3x_16_def(interp_uint16* dst0, interp_uint16* dst1, interp_uint16* dst2, const interp_uint16* src0, const interp_uint16* src1, const interp_uint16* src2, unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
|
@ -124,7 +124,7 @@ void lq3x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, int
|
|||
}
|
||||
}
|
||||
|
||||
void lq3x_32_def(interp_uint32* restrict dst0, interp_uint32* restrict dst1, interp_uint32* restrict dst2, const interp_uint32* restrict src0, const interp_uint32* restrict src1, const interp_uint32* restrict src2, unsigned count)
|
||||
void lq3x_32_def(interp_uint32* dst0, interp_uint32* dst1, interp_uint32* dst2, const interp_uint32* src0, const interp_uint32* src1, const interp_uint32* src2, unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
* This effect is derived from the hq4x effect made by Maxim Stepin
|
||||
*/
|
||||
|
||||
void lq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, interp_uint16* restrict dst2, interp_uint16* restrict dst3, const interp_uint16* restrict src0, const interp_uint16* restrict src1, const interp_uint16* restrict src2, unsigned count)
|
||||
void lq4x_16_def(interp_uint16* dst0, interp_uint16* dst1, interp_uint16* dst2, interp_uint16* dst3, const interp_uint16* src0, const interp_uint16* src1, const interp_uint16* src2, unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
|
@ -126,7 +126,7 @@ void lq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, int
|
|||
}
|
||||
}
|
||||
|
||||
void lq4x_32_def(interp_uint32* restrict dst0, interp_uint32* restrict dst1, interp_uint32* restrict dst2, interp_uint32* restrict dst3, const interp_uint32* restrict src0, const interp_uint32* restrict src1, const interp_uint32* restrict src2, unsigned count)
|
||||
void lq4x_32_def(interp_uint32* dst0, interp_uint32* dst1, interp_uint32* dst2, interp_uint32* dst3, const interp_uint32* src0, const interp_uint32* src1, const interp_uint32* src2, unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
|
|
Loading…
Reference in New Issue