C99 Restrict keyword does not exist in C++ (yet anyway). MSVC inline assembly should not be floating around by itself. Checking a define for MMX is stupid, but I won't worry about it right now. Link code is severly MSVC only and should be cleaned or moved.
git-svn-id: https://svn.code.sf.net/p/vbam/code/trunk@62 a31d4220-a93d-0410-bf67-fe4944624d44
This commit is contained in:
parent
f188d0727f
commit
bc661af3b9
|
@ -16,6 +16,7 @@
|
||||||
// along with this program; if not, write to the Free Software Foundation,
|
// along with this program; if not, write to the Free Software Foundation,
|
||||||
// Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
// Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
|
#define __STDC_CONSTANT_MACROS
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
* This effect is a rewritten implementation of the hq4x effect made by Maxim Stepin
|
* This effect is a rewritten implementation of the hq4x effect made by Maxim Stepin
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void hq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, interp_uint16* restrict dst2, interp_uint16* restrict dst3, const interp_uint16* restrict src0, const interp_uint16* restrict src1, const interp_uint16* restrict src2, unsigned count)
|
void hq4x_16_def(interp_uint16* dst0, interp_uint16* dst1, interp_uint16* dst2, interp_uint16* dst3, const interp_uint16* src0, const interp_uint16* src1, const interp_uint16* src2, unsigned count)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
@ -126,7 +126,7 @@ void hq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void hq4x_32_def(interp_uint32* restrict dst0, interp_uint32* restrict dst1, interp_uint32* restrict dst2, interp_uint32* restrict dst3, const interp_uint32* restrict src0, const interp_uint32* restrict src1, const interp_uint32* restrict src2, unsigned count)
|
void hq4x_32_def(interp_uint32* dst0, interp_uint32* dst1, interp_uint32* dst2, interp_uint32* dst3, const interp_uint32* src0, const interp_uint32* src1, const interp_uint32* src2, unsigned count)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
|
|
@ -17,359 +17,371 @@
|
||||||
// Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
// Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
#include "hq_shared32.h"
|
#include "hq_shared32.h"
|
||||||
|
#define __STDC_CONSTANT_MACROS
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
const unsigned __int64 reg_blank = 0x0000000000000000;
|
const uint64_t reg_blank = UINT64_C(0x0000000000000000);
|
||||||
const unsigned __int64 const7 = 0x0000000700070007;
|
const uint64_t const7 = UINT64_C(0x0000000700070007);
|
||||||
const unsigned __int64 treshold = 0x0000000000300706;
|
const uint64_t treshold = UINT64_C(0x0000000000300706);
|
||||||
|
|
||||||
void Interp1(unsigned char * pc, unsigned int c1, unsigned int c2)
|
void Interp1(unsigned char * pc, unsigned int c1, unsigned int c2)
|
||||||
{
|
{
|
||||||
//*((int*)pc) = (c1*3+c2)/4;
|
#ifdef _MSC_VER
|
||||||
|
#ifdef MMX
|
||||||
#ifdef MMX
|
__asm
|
||||||
__asm
|
{
|
||||||
{
|
mov eax, pc
|
||||||
mov eax, pc
|
movd mm1, c1
|
||||||
movd mm1, c1
|
movd mm2, c2
|
||||||
movd mm2, c2
|
movq mm0, mm1
|
||||||
movq mm0, mm1
|
pslld mm0, 2
|
||||||
pslld mm0, 2
|
psubd mm0, mm1
|
||||||
psubd mm0, mm1
|
paddd mm0, mm2
|
||||||
paddd mm0, mm2
|
psrld mm0, 2
|
||||||
psrld mm0, 2
|
movd [eax], mm0
|
||||||
movd [eax], mm0
|
EMMS
|
||||||
EMMS
|
}
|
||||||
}
|
#else
|
||||||
#else
|
__asm
|
||||||
__asm
|
{
|
||||||
{
|
mov eax, pc
|
||||||
mov eax, pc
|
mov edx, c1
|
||||||
mov edx, c1
|
shl edx, 2
|
||||||
shl edx, 2
|
add edx, c2
|
||||||
add edx, c2
|
sub edx, c1
|
||||||
sub edx, c1
|
shr edx, 2
|
||||||
shr edx, 2
|
mov [eax], edx
|
||||||
mov [eax], edx
|
}
|
||||||
}
|
#endif
|
||||||
#endif
|
#else
|
||||||
}
|
*((int*)pc) = (c1*3+c2)/4;
|
||||||
|
#endif
|
||||||
void Interp2(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
}
|
||||||
{
|
|
||||||
//*((int*)pc) = (c1*2+c2+c3)/4;
|
void Interp2(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
||||||
|
{
|
||||||
#ifdef MMX
|
#ifdef _MSC_VER
|
||||||
__asm
|
#ifdef MMX
|
||||||
{
|
__asm
|
||||||
mov eax, pc
|
{
|
||||||
movd mm0, c1
|
mov eax, pc
|
||||||
movd mm1, c2
|
movd mm0, c1
|
||||||
movd mm2, c3
|
movd mm1, c2
|
||||||
pslld mm0, 1
|
movd mm2, c3
|
||||||
paddd mm0, mm1
|
pslld mm0, 1
|
||||||
paddd mm0, mm2
|
paddd mm0, mm1
|
||||||
psrad mm0, 2
|
paddd mm0, mm2
|
||||||
movd [eax], mm0
|
psrad mm0, 2
|
||||||
EMMS
|
movd [eax], mm0
|
||||||
}
|
EMMS
|
||||||
#else
|
}
|
||||||
__asm
|
#else
|
||||||
{
|
__asm
|
||||||
mov eax, pc
|
{
|
||||||
mov edx, c1
|
mov eax, pc
|
||||||
shl edx, 1
|
mov edx, c1
|
||||||
add edx, c2
|
shl edx, 1
|
||||||
add edx, c3
|
add edx, c2
|
||||||
shr edx, 2
|
add edx, c3
|
||||||
mov [eax], edx
|
shr edx, 2
|
||||||
}
|
mov [eax], edx
|
||||||
#endif
|
}
|
||||||
}
|
#endif
|
||||||
|
#else
|
||||||
void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2)
|
*((int*)pc) = (c1*2+c2+c3)/4;
|
||||||
{
|
#endif
|
||||||
//*((int*)pc) = (c1*7+c2)/8;
|
}
|
||||||
//*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
|
|
||||||
// (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
|
void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2)
|
||||||
|
{
|
||||||
#ifdef MMX
|
#ifdef _MSC_VER
|
||||||
__asm
|
#ifdef MMX
|
||||||
{
|
__asm
|
||||||
mov eax, pc
|
{
|
||||||
movd mm1, c1
|
mov eax, pc
|
||||||
movd mm2, c2
|
movd mm1, c1
|
||||||
punpcklbw mm1, reg_blank
|
movd mm2, c2
|
||||||
punpcklbw mm2, reg_blank
|
punpcklbw mm1, reg_blank
|
||||||
pmullw mm1, const7
|
punpcklbw mm2, reg_blank
|
||||||
paddw mm1, mm2
|
pmullw mm1, const7
|
||||||
psrlw mm1, 3
|
paddw mm1, mm2
|
||||||
packuswb mm1, reg_blank
|
psrlw mm1, 3
|
||||||
movd [eax], mm1
|
packuswb mm1, reg_blank
|
||||||
EMMS
|
movd [eax], mm1
|
||||||
}
|
EMMS
|
||||||
#else
|
}
|
||||||
__asm
|
#else
|
||||||
{
|
__asm
|
||||||
mov eax, c1
|
{
|
||||||
mov ebx, c2
|
mov eax, c1
|
||||||
mov ecx, eax
|
mov ebx, c2
|
||||||
shl ecx, 3
|
mov ecx, eax
|
||||||
sub ecx, eax
|
shl ecx, 3
|
||||||
add ecx, ebx
|
sub ecx, eax
|
||||||
shr ecx, 3
|
add ecx, ebx
|
||||||
mov eax, pc
|
shr ecx, 3
|
||||||
mov [eax], ecx
|
mov eax, pc
|
||||||
}
|
mov [eax], ecx
|
||||||
#endif
|
}
|
||||||
}
|
#endif
|
||||||
|
#else
|
||||||
void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
*((int*)pc) = (c1*7+c2)/8;
|
||||||
{
|
*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
|
||||||
//*((int*)pc) = (c1*2+(c2+c3)*7)/16;
|
(((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
|
||||||
//*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
|
#endif
|
||||||
// (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
|
}
|
||||||
|
|
||||||
#ifdef MMX
|
void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
||||||
__asm
|
{
|
||||||
{
|
#ifdef _MSC_VER
|
||||||
mov eax, pc
|
#ifdef MMX
|
||||||
movd mm1, c1
|
__asm
|
||||||
movd mm2, c2
|
{
|
||||||
movd mm3, c3
|
mov eax, pc
|
||||||
punpcklbw mm1, reg_blank
|
movd mm1, c1
|
||||||
punpcklbw mm2, reg_blank
|
movd mm2, c2
|
||||||
punpcklbw mm3, reg_blank
|
movd mm3, c3
|
||||||
psllw mm1, 1
|
punpcklbw mm1, reg_blank
|
||||||
paddw mm2, mm3
|
punpcklbw mm2, reg_blank
|
||||||
pmullw mm2, const7
|
punpcklbw mm3, reg_blank
|
||||||
paddw mm1, mm2
|
psllw mm1, 1
|
||||||
psrlw mm1, 4
|
paddw mm2, mm3
|
||||||
packuswb mm1, reg_blank
|
pmullw mm2, const7
|
||||||
movd [eax], mm1
|
paddw mm1, mm2
|
||||||
EMMS
|
psrlw mm1, 4
|
||||||
}
|
packuswb mm1, reg_blank
|
||||||
#else
|
movd [eax], mm1
|
||||||
|
EMMS
|
||||||
__asm
|
}
|
||||||
{
|
#else
|
||||||
mov eax, [c1]
|
|
||||||
and eax, 0FF00h
|
__asm
|
||||||
shl eax, 1
|
{
|
||||||
mov ecx, [c2]
|
mov eax, [c1]
|
||||||
and ecx, 0FF00h
|
and eax, 0FF00h
|
||||||
mov edx, [c3]
|
shl eax, 1
|
||||||
and edx, 0FF00h
|
mov ecx, [c2]
|
||||||
add ecx, edx
|
and ecx, 0FF00h
|
||||||
imul ecx, ecx,7
|
mov edx, [c3]
|
||||||
add eax, ecx
|
and edx, 0FF00h
|
||||||
and eax, 0FF000h
|
add ecx, edx
|
||||||
|
imul ecx, ecx,7
|
||||||
mov ebx, [c1]
|
add eax, ecx
|
||||||
and ebx, 0FF00FFh
|
and eax, 0FF000h
|
||||||
shl ebx, 1
|
|
||||||
mov ecx, [c2]
|
mov ebx, [c1]
|
||||||
and ecx, 0FF00FFh
|
and ebx, 0FF00FFh
|
||||||
mov edx, [c3]
|
shl ebx, 1
|
||||||
and edx, 0FF00FFh
|
mov ecx, [c2]
|
||||||
add ecx, edx
|
and ecx, 0FF00FFh
|
||||||
imul ecx, ecx,7
|
mov edx, [c3]
|
||||||
add ebx, ecx
|
and edx, 0FF00FFh
|
||||||
and ebx, 0FF00FF0h
|
add ecx, edx
|
||||||
|
imul ecx, ecx,7
|
||||||
add eax, ebx
|
add ebx, ecx
|
||||||
shr eax, 4
|
and ebx, 0FF00FF0h
|
||||||
|
|
||||||
mov ebx, pc
|
add eax, ebx
|
||||||
mov [ebx], eax
|
shr eax, 4
|
||||||
}
|
|
||||||
#endif
|
mov ebx, pc
|
||||||
}
|
mov [ebx], eax
|
||||||
|
}
|
||||||
void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2)
|
#endif
|
||||||
{
|
#else
|
||||||
//*((int*)pc) = (c1+c2)/2;
|
*((int*)pc) = (c1*2+(c2+c3)*7)/16;
|
||||||
|
*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
|
||||||
#ifdef MMX
|
(((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
|
||||||
__asm
|
#endif
|
||||||
{
|
}
|
||||||
mov eax, pc
|
|
||||||
movd mm0, c1
|
void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2)
|
||||||
movd mm1, c2
|
{
|
||||||
paddd mm0, mm1
|
#ifdef _MSC_VER
|
||||||
psrad mm0, 1
|
#ifdef MMX
|
||||||
movd [eax], mm0
|
__asm
|
||||||
EMMS
|
{
|
||||||
}
|
mov eax, pc
|
||||||
#else
|
movd mm0, c1
|
||||||
__asm
|
movd mm1, c2
|
||||||
{
|
paddd mm0, mm1
|
||||||
mov eax, pc
|
psrad mm0, 1
|
||||||
mov edx, c1
|
movd [eax], mm0
|
||||||
add edx, c2
|
EMMS
|
||||||
shr edx, 1
|
}
|
||||||
mov [eax], edx
|
#else
|
||||||
}
|
__asm
|
||||||
#endif
|
{
|
||||||
}
|
mov eax, pc
|
||||||
|
mov edx, c1
|
||||||
|
add edx, c2
|
||||||
bool Diff(unsigned int c1, unsigned int c2)
|
shr edx, 1
|
||||||
{
|
mov [eax], edx
|
||||||
unsigned int
|
}
|
||||||
YUV1 = RGBtoYUV(c1),
|
#endif
|
||||||
YUV2 = RGBtoYUV(c2);
|
#else
|
||||||
|
*((int*)pc) = (c1+c2)/2;
|
||||||
if (YUV1 == YUV2) return false; // Save some processing power
|
#endif
|
||||||
|
}
|
||||||
#ifdef MMX
|
|
||||||
unsigned int retval;
|
|
||||||
__asm
|
bool Diff(unsigned int c1, unsigned int c2)
|
||||||
{
|
{
|
||||||
mov eax, 0x7FFFFFFF
|
unsigned int
|
||||||
movd mm7, eax ;mm7 = ABS_MASK = 0x7FFFFFFF
|
YUV1 = RGBtoYUV(c1),
|
||||||
|
YUV2 = RGBtoYUV(c2);
|
||||||
; Copy source colors in first reg
|
|
||||||
movd mm0, YUV1
|
if (YUV1 == YUV2) return false; // Save some processing power
|
||||||
movd mm1, YUV2
|
|
||||||
|
#ifdef MMX
|
||||||
mov eax, 0x00FF0000
|
unsigned int retval;
|
||||||
movd mm6, eax ;mm6 = Ymask = 0x00FF0000
|
__asm
|
||||||
|
{
|
||||||
; Calculate color Y difference
|
mov eax, 0x7FFFFFFF
|
||||||
movq mm2, mm0
|
movd mm7, eax ;mm7 = ABS_MASK = 0x7FFFFFFF
|
||||||
movq mm3, mm1
|
|
||||||
pand mm2, mm6
|
; Copy source colors in first reg
|
||||||
pand mm3, mm6
|
movd mm0, YUV1
|
||||||
psubd mm2, mm3
|
movd mm1, YUV2
|
||||||
pand mm2, mm7
|
|
||||||
|
mov eax, 0x00FF0000
|
||||||
mov eax, 0x0000FF00
|
movd mm6, eax ;mm6 = Ymask = 0x00FF0000
|
||||||
movd mm6, eax ;mm6 = Umask = 0x0000FF00
|
|
||||||
|
; Calculate color Y difference
|
||||||
; Calculate color U difference
|
movq mm2, mm0
|
||||||
movq mm3, mm0
|
movq mm3, mm1
|
||||||
movq mm4, mm1
|
pand mm2, mm6
|
||||||
pand mm3, mm6
|
pand mm3, mm6
|
||||||
pand mm4, mm6
|
psubd mm2, mm3
|
||||||
psubd mm3, mm4
|
pand mm2, mm7
|
||||||
pand mm3, mm7
|
|
||||||
|
mov eax, 0x0000FF00
|
||||||
mov eax, 0x000000FF
|
movd mm6, eax ;mm6 = Umask = 0x0000FF00
|
||||||
movd mm6, eax ;mm6 = Vmask = 0x000000FF
|
|
||||||
|
; Calculate color U difference
|
||||||
; Calculate color V difference
|
movq mm3, mm0
|
||||||
movq mm4, mm0
|
movq mm4, mm1
|
||||||
movq mm5, mm1
|
pand mm3, mm6
|
||||||
pand mm4, mm6
|
pand mm4, mm6
|
||||||
pand mm5, mm6
|
psubd mm3, mm4
|
||||||
psubd mm4, mm5
|
pand mm3, mm7
|
||||||
pand mm4, mm7
|
|
||||||
|
mov eax, 0x000000FF
|
||||||
mov eax, 0x00300000
|
movd mm6, eax ;mm6 = Vmask = 0x000000FF
|
||||||
movd mm5, eax ;mm5 = trY = 0x00300000
|
|
||||||
mov eax, 0x00000700
|
; Calculate color V difference
|
||||||
movd mm6, eax ;mm6 = trU = 0x00000700
|
movq mm4, mm0
|
||||||
mov eax, 0x00000006
|
movq mm5, mm1
|
||||||
movd mm7, eax ;mm7 = trV = 0x00000006
|
pand mm4, mm6
|
||||||
|
pand mm5, mm6
|
||||||
; Compare the results
|
psubd mm4, mm5
|
||||||
pcmpgtd mm2, trY
|
pand mm4, mm7
|
||||||
pcmpgtd mm3, trU
|
|
||||||
pcmpgtd mm4, trV
|
mov eax, 0x00300000
|
||||||
por mm2, mm3
|
movd mm5, eax ;mm5 = trY = 0x00300000
|
||||||
por mm2, mm4
|
mov eax, 0x00000700
|
||||||
|
movd mm6, eax ;mm6 = trU = 0x00000700
|
||||||
movd retval, mm2
|
mov eax, 0x00000006
|
||||||
|
movd mm7, eax ;mm7 = trV = 0x00000006
|
||||||
EMMS
|
|
||||||
}
|
; Compare the results
|
||||||
return (retval != 0);
|
pcmpgtd mm2, trY
|
||||||
#else
|
pcmpgtd mm3, trU
|
||||||
return
|
pcmpgtd mm4, trV
|
||||||
( abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
|
por mm2, mm3
|
||||||
( abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
|
por mm2, mm4
|
||||||
( abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV );
|
|
||||||
#endif
|
movd retval, mm2
|
||||||
}
|
|
||||||
|
EMMS
|
||||||
|
}
|
||||||
unsigned int RGBtoYUV(unsigned int c)
|
return (retval != 0);
|
||||||
{ // Division through 3 slows down the emulation about 10% !!!
|
#else
|
||||||
#ifdef MMX
|
return
|
||||||
unsigned int retval;
|
( abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
|
||||||
__asm
|
( abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
|
||||||
{
|
( abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV );
|
||||||
movd mm0, c
|
#endif
|
||||||
movq mm1, mm0
|
}
|
||||||
movq mm2, mm0 ;mm0=mm1=mm2=c
|
|
||||||
|
|
||||||
mov eax, 0x000000FF
|
unsigned int RGBtoYUV(unsigned int c)
|
||||||
movd mm5, eax ;mm5 = REDMASK = 0x000000FF
|
{ // Division through 3 slows down the emulation about 10% !!!
|
||||||
mov eax, 0x0000FF00
|
#ifdef MMX
|
||||||
movd mm6, eax ;mm6 = GREENMASK = 0x0000FF00
|
unsigned int retval;
|
||||||
mov eax, 0x00FF0000
|
__asm
|
||||||
movd mm7, eax ;mm7 = BLUEMASK = 0x00FF0000
|
{
|
||||||
|
movd mm0, c
|
||||||
|
movq mm1, mm0
|
||||||
pand mm0, mm5
|
movq mm2, mm0 ;mm0=mm1=mm2=c
|
||||||
pand mm1, mm6
|
|
||||||
pand mm2, mm7 ;mm0=R mm1=G mm2=B
|
mov eax, 0x000000FF
|
||||||
|
movd mm5, eax ;mm5 = REDMASK = 0x000000FF
|
||||||
movq mm3, mm0
|
mov eax, 0x0000FF00
|
||||||
paddd mm3, mm1
|
movd mm6, eax ;mm6 = GREENMASK = 0x0000FF00
|
||||||
paddd mm3, mm2
|
mov eax, 0x00FF0000
|
||||||
; psrld mm3, 2 ;mm3=Y
|
movd mm7, eax ;mm7 = BLUEMASK = 0x00FF0000
|
||||||
; pslld mm3, 16
|
|
||||||
pslld mm3, 14 ;mm3=Y<<16
|
|
||||||
|
pand mm0, mm5
|
||||||
mov eax, 512
|
pand mm1, mm6
|
||||||
movd mm7, eax ;mm7 = 128 << 2 = 512
|
pand mm2, mm7 ;mm0=R mm1=G mm2=B
|
||||||
|
|
||||||
movq mm4, mm0
|
movq mm3, mm0
|
||||||
psubd mm4, mm2
|
paddd mm3, mm1
|
||||||
; psrld mm4, 2
|
paddd mm3, mm2
|
||||||
; paddd mm4, mm7 ;mm4=U
|
; psrld mm3, 2 ;mm3=Y
|
||||||
; pslld mm4, 8 ;mm4=U<<8
|
; pslld mm3, 16
|
||||||
paddd mm4, mm7
|
pslld mm3, 14 ;mm3=Y<<16
|
||||||
pslld mm4, 6
|
|
||||||
|
mov eax, 512
|
||||||
mov eax, 128
|
movd mm7, eax ;mm7 = 128 << 2 = 512
|
||||||
movd mm7, eax ;mm7 = 128
|
|
||||||
|
movq mm4, mm0
|
||||||
movq mm5, mm1
|
psubd mm4, mm2
|
||||||
pslld mm5, 1
|
; psrld mm4, 2
|
||||||
psubd mm5, mm0
|
; paddd mm4, mm7 ;mm4=U
|
||||||
psubd mm5, mm2
|
; pslld mm4, 8 ;mm4=U<<8
|
||||||
psrld mm5, 3
|
paddd mm4, mm7
|
||||||
paddd mm5, mm7 ;mm5=V
|
pslld mm4, 6
|
||||||
|
|
||||||
paddd mm5, mm4
|
mov eax, 128
|
||||||
paddd mm5, mm3
|
movd mm7, eax ;mm7 = 128
|
||||||
|
|
||||||
movd retval, mm5
|
movq mm5, mm1
|
||||||
|
pslld mm5, 1
|
||||||
EMMS
|
psubd mm5, mm0
|
||||||
}
|
psubd mm5, mm2
|
||||||
return retval;
|
psrld mm5, 3
|
||||||
#else
|
paddd mm5, mm7 ;mm5=V
|
||||||
unsigned char r, g, b, Y, u, v;
|
|
||||||
r = (c & 0x000000FF);
|
paddd mm5, mm4
|
||||||
g = (c & 0x0000FF00) >> 8;
|
paddd mm5, mm3
|
||||||
b = (c & 0x00FF0000) >> 16;
|
|
||||||
Y = (r + g + b) >> 2;
|
movd retval, mm5
|
||||||
u = 128 + ((r - b) >> 2);
|
|
||||||
v = 128 + ((-r + 2*g -b)>>3);
|
EMMS
|
||||||
return (Y<<16) + (u<<8) + v;
|
}
|
||||||
|
return retval;
|
||||||
// Extremely High Quality Code
|
#else
|
||||||
//unsigned char r, g, b;
|
unsigned char r, g, b, Y, u, v;
|
||||||
//r = c & 0xFF;
|
r = (c & 0x000000FF);
|
||||||
//g = (c >> 8) & 0xFF;
|
g = (c & 0x0000FF00) >> 8;
|
||||||
//b = (c >> 16) & 0xFF;
|
b = (c & 0x00FF0000) >> 16;
|
||||||
//unsigned char y, u, v;
|
Y = (r + g + b) >> 2;
|
||||||
//y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16;
|
u = 128 + ((r - b) >> 2);
|
||||||
//u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128;
|
v = 128 + ((-r + 2*g -b)>>3);
|
||||||
//v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128;
|
return (Y<<16) + (u<<8) + v;
|
||||||
//return (y << 16) + (u << 8) + v;
|
|
||||||
#endif
|
// Extremely High Quality Code
|
||||||
}
|
//unsigned char r, g, b;
|
||||||
|
//r = c & 0xFF;
|
||||||
|
//g = (c >> 8) & 0xFF;
|
||||||
|
//b = (c >> 16) & 0xFF;
|
||||||
|
//unsigned char y, u, v;
|
||||||
|
//y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16;
|
||||||
|
//u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128;
|
||||||
|
//v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128;
|
||||||
|
//return (y << 16) + (u << 8) + v;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
|
@ -87,4 +87,4 @@ void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2);
|
||||||
void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3);
|
void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3);
|
||||||
void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2);
|
void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2);
|
||||||
bool Diff(unsigned int c1, unsigned int c2);
|
bool Diff(unsigned int c1, unsigned int c2);
|
||||||
unsigned int RGBtoYUV(unsigned int c);
|
unsigned int RGBtoYUV(unsigned int c);
|
||||||
|
|
|
@ -31,6 +31,12 @@
|
||||||
#ifndef __INTERP_H
|
#ifndef __INTERP_H
|
||||||
#define __INTERP_H
|
#define __INTERP_H
|
||||||
|
|
||||||
|
#define __STDC_CONSTANT_MACROS
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
typedef uint16_t interp_uint16;
|
||||||
|
typedef uint32_t interp_uint32;
|
||||||
|
|
||||||
/***************************************************************************/
|
/***************************************************************************/
|
||||||
/* Basic types */
|
/* Basic types */
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,7 @@
|
||||||
* This effect is derived from the hq3x effect made by Maxim Stepin
|
* This effect is derived from the hq3x effect made by Maxim Stepin
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void lq3x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, interp_uint16* restrict dst2, const interp_uint16* restrict src0, const interp_uint16* restrict src1, const interp_uint16* restrict src2, unsigned count)
|
void lq3x_16_def(interp_uint16* dst0, interp_uint16* dst1, interp_uint16* dst2, const interp_uint16* src0, const interp_uint16* src1, const interp_uint16* src2, unsigned count)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
@ -124,7 +124,7 @@ void lq3x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void lq3x_32_def(interp_uint32* restrict dst0, interp_uint32* restrict dst1, interp_uint32* restrict dst2, const interp_uint32* restrict src0, const interp_uint32* restrict src1, const interp_uint32* restrict src2, unsigned count)
|
void lq3x_32_def(interp_uint32* dst0, interp_uint32* dst1, interp_uint32* dst2, const interp_uint32* src0, const interp_uint32* src1, const interp_uint32* src2, unsigned count)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
* This effect is derived from the hq4x effect made by Maxim Stepin
|
* This effect is derived from the hq4x effect made by Maxim Stepin
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void lq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, interp_uint16* restrict dst2, interp_uint16* restrict dst3, const interp_uint16* restrict src0, const interp_uint16* restrict src1, const interp_uint16* restrict src2, unsigned count)
|
void lq4x_16_def(interp_uint16* dst0, interp_uint16* dst1, interp_uint16* dst2, interp_uint16* dst3, const interp_uint16* src0, const interp_uint16* src1, const interp_uint16* src2, unsigned count)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
@ -126,7 +126,7 @@ void lq4x_16_def(interp_uint16* restrict dst0, interp_uint16* restrict dst1, int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void lq4x_32_def(interp_uint32* restrict dst0, interp_uint32* restrict dst1, interp_uint32* restrict dst2, interp_uint32* restrict dst3, const interp_uint32* restrict src0, const interp_uint32* restrict src1, const interp_uint32* restrict src2, unsigned count)
|
void lq4x_32_def(interp_uint32* dst0, interp_uint32* dst1, interp_uint32* dst2, interp_uint32* dst3, const interp_uint32* src0, const interp_uint32* src1, const interp_uint32* src2, unsigned count)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue