visualboyadvance-m/src/2xSaImmx.asm

2110 lines
53 KiB
NASM

;/*---------------------------------------------------------------------*
; * The following (piece of) code, (part of) the 2xSaI engine, *
; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa. *
; * Non-Commercial use of this software is allowed and is encouraged, *
; * provided that appropriate credit be given. *
; * You may freely modify this code, but I request *
; * that any improvements to the engine be submitted to me, so *
; * that I can implement these improvements in newer versions of *
; * the software. *
; * If you need more information, have any comments or suggestions, *
; * you can e-mail me. My e-mail: derek-liauw@usa.net. *
; *---------------------------------------------------------------------*/
;----------------------
; 2xSaI version 0.59 WIP, soon to become version 0.60
;----------------------
;%define FAR_POINTER
BITS 32
%ifdef __DJGPP__
GLOBAL __2xSaILine
GLOBAL __2xSaISuperEagleLine
GLOBAL __2xSaISuper2xSaILine
GLOBAL _Init_2xSaIMMX
%else
GLOBAL _2xSaILine
GLOBAL _2xSaISuperEagleLine
GLOBAL _2xSaISuper2xSaILine
GLOBAL Init_2xSaIMMX
%endif
SECTION .text ALIGN = 32
%ifdef FAR_POINTER
;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);
%else
;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
; uint8 *dstPtr, uint32 dstPitch);
%endif
srcPtr equ 8
deltaPtr equ 12
srcPitch equ 16
width equ 20
dstOffset equ 24
dstPitch equ 28
dstSegment equ 32
colorB0 equ -2
colorB1 equ 0
colorB2 equ 2
colorB3 equ 4
color7 equ -2
color8 equ 0
color9 equ 2
color4 equ -2
color5 equ 0
color6 equ 2
colorS2 equ 4
color1 equ -2
color2 equ 0
color3 equ 2
colorS1 equ 4
colorA0 equ -2
colorA1 equ 0
colorA2 equ 2
colorA3 equ 4
%ifdef __DJGPP__
__2xSaISuper2xSaILine:
%else
_2xSaISuper2xSaILine:
%endif
; Store some stuff
push ebp
mov ebp, esp
pushad
; Prepare the destination
%ifdef FAR_POINTER
; Set the selector
mov eax, [ebp+dstSegment]
mov fs, ax
%endif
mov edx, [ebp+dstOffset] ; edx points to the screen
; Prepare the source
; eax points to colorA
mov eax, [ebp+srcPtr] ;eax points to colorA
mov ebx, [ebp+srcPitch] ;ebx contains the source pitch
mov ecx, [ebp+width] ;ecx contains the number of pixels to process
; eax now points to colorB1
sub eax, ebx ;eax points to B1 which is the base
; Main Loop
.Loop: push ecx
;-----Check Delta------------------
mov ecx, [ebp+deltaPtr]
;load source img
movq mm0, [eax+colorB0]
movq mm1, [eax+colorB3]
movq mm2, [eax+ebx+color4]
movq mm3, [eax+ebx+colorS2]
movq mm4, [eax+ebx+ebx+color1]
movq mm5, [eax+ebx+ebx+colorS1]
push eax
add eax, ebx
movq mm6, [eax+ebx+ebx+colorA0]
movq mm7, [eax+ebx+ebx+colorA3]
pop eax
;compare to delta
pcmpeqw mm0, [ecx+2+colorB0]
pcmpeqw mm1, [ecx+2+colorB3]
pcmpeqw mm2, [ecx+ebx+2+color4]
pcmpeqw mm3, [ecx+ebx+2+colorS2]
pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
add ecx, ebx
pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
sub ecx, ebx
;compose results
pand mm0, mm1
pand mm2, mm3
pand mm4, mm5
pand mm6, mm7
pand mm0, mm2
pand mm4, mm6
pxor mm7, mm7
pand mm0, mm4
movq mm6, [eax+colorB0]
pcmpeqw mm7, mm0 ;did any compare give us a zero ?
movq [ecx+2+colorB0], mm6
packsswb mm7, mm7
movd ecx, mm7
test ecx, ecx
jz near .SKIP_PROCESS ;no, so we can skip
;End Delta
;---------------------------------
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
movq [I56Pixel], mm0
movq mm7, mm0
;-------------------
movq mm0, mm7
movq mm1, mm4 ;5,5,5,6
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
movq [I5556Pixel], mm0
;--------------------
movq mm0, mm7
movq mm1, mm5 ;6,6,6,5
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [I5666Pixel], mm0
;-------------------------
;-------------------------
movq mm0, [eax+ebx+ebx+color2]
movq mm1, [eax+ebx+ebx+color3]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [I23Pixel], mm0
movq mm7, mm0
;---------------------
movq mm0, mm7
movq mm1, mm4 ;2,2,2,3
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [I2223Pixel], mm0
;----------------------
movq mm0, mm7
movq mm1, mm5 ;3,3,3,2
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [I2333Pixel], mm0
;--------------------
;////////////////////////////////
; Decide which "branch" to take
;--------------------------------
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm6, mm0
movq mm7, mm1
pcmpeqw mm0, [eax+ebx+ebx+color3]
pcmpeqw mm1, [eax+ebx+ebx+color2]
pcmpeqw mm6, mm7
movq mm2, mm0
movq mm3, mm0
pand mm0, mm1 ;colorA == colorD && colorB == colorC
pxor mm7, mm7
pcmpeqw mm2, mm7
pand mm6, mm0
pand mm2, mm1 ;colorA != colorD && colorB == colorC
pcmpeqw mm1, mm7
pand mm1, mm3 ;colorA == colorD && colorB != colorC
pxor mm0, mm6
por mm1, mm6
movq mm7, mm0
movq [Mask26], mm2
packsswb mm7, mm7
movq [Mask35], mm1
movd ecx, mm7
test ecx, ecx
jz near .SKIP_GUESS
;---------------------------------------------
movq mm6, mm0
movq mm4, [eax+ebx+colorA]
movq mm5, [eax+ebx+colorB]
pxor mm7, mm7
pand mm6, [ONE]
movq mm0, [eax+colorE]
movq mm1, [eax+ebx+colorG]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+colorF]
movq mm1, [eax+ebx+colorK]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
push eax
add eax, ebx
movq mm0, [eax+ebx+colorH]
movq mm1, [eax+ebx+ebx+colorN]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+ebx+colorL]
movq mm1, [eax+ebx+ebx+colorO]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
pop eax
movq mm1, mm7
pxor mm0, mm0
pcmpgtw mm7, mm0
pcmpgtw mm0, mm1
por mm7, [Mask35]
por mm0, [Mask26]
movq [Mask35], mm7
movq [Mask26], mm0
.SKIP_GUESS:
;Start the ASSEMBLY !!! eh... compose all the results together to form the final image...
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+ebx+color2]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
;---------------------------
%ifdef dfhsdfhsdahdsfhdsfh
if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
product2a = INTERPOLATE (color2, color5);
else
if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
product2a = INTERPOLATE(color2, color5);
else
product2a = color2;
if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
product1a = INTERPOLATE (color2, color5);
else
if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
product1a = INTERPOLATE(color2, color5);
else
product1a = color5;
%endif
movq mm7, [Mask26]
movq mm6, [eax+colorB2]
movq mm5, [eax+ebx+ebx+color2]
movq mm4, [eax+ebx+ebx+color1]
pcmpeqw mm4, mm5
pcmpeqw mm6, mm5
pxor mm5, mm5
pand mm7, mm4
pcmpeqw mm6, mm5
pand mm7, mm6
movq mm6, [eax+ebx+ebx+color3]
movq mm5, [eax+ebx+ebx+color2]
movq mm4, [eax+ebx+ebx+color1]
movq mm2, [eax+ebx+color5]
movq mm1, [eax+ebx+color4]
movq mm3, [eax+colorB0]
pcmpeqw mm2, mm4
pcmpeqw mm6, mm5
pcmpeqw mm1, mm5
pcmpeqw mm3, mm5
pxor mm5, mm5
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm6, mm1
pand mm2, mm3
pand mm6, mm2
por mm7, mm6
movq mm6, mm7
pcmpeqw mm6, mm5
pand mm7, mm0
movq mm1, [eax+ebx+color5]
pand mm6, mm1
por mm7, mm6
movq [final1a], mm7 ;finished 1a
;--------------------------------
movq mm7, [Mask35]
push eax
add eax, ebx
movq mm6, [eax+ebx+ebx+colorA2]
pop eax
movq mm5, [eax+ebx+color5]
movq mm4, [eax+ebx+color4]
pcmpeqw mm4, mm5
pcmpeqw mm6, mm5
pxor mm5, mm5
pand mm7, mm4
pcmpeqw mm6, mm5
pand mm7, mm6
movq mm6, [eax+ebx+color6]
movq mm5, [eax+ebx+color5]
movq mm4, [eax+ebx+color4]
movq mm2, [eax+ebx+ebx+color2]
movq mm1, [eax+ebx+ebx+color1]
push eax
add eax, ebx
movq mm3, [eax+ebx+ebx+colorA0]
pop eax
pcmpeqw mm2, mm4
pcmpeqw mm6, mm5
pcmpeqw mm1, mm5
pcmpeqw mm3, mm5
pxor mm5, mm5
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm6, mm1
pand mm2, mm3
pand mm6, mm2
por mm7, mm6
movq mm6, mm7
pcmpeqw mm6, mm5
pand mm7, mm0
movq mm1, [eax+ebx+ebx+color2]
pand mm6, mm1
por mm7, mm6
movq [final2a], mm7 ;finished 2a
;--------------------------------------------
%ifdef dfhsdfhsdahdsfhdsfh
if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
product2b = Q_INTERPOLATE (color3, color3, color3, color2);
else
if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
product2b = Q_INTERPOLATE (color2, color2, color2, color3);
else
product2b = INTERPOLATE (color2, color3);
if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
product1b = Q_INTERPOLATE (color6, color6, color6, color5);
else
if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
product1b = Q_INTERPOLATE (color6, color5, color5, color5);
else
product1b = INTERPOLATE (color5, color6);
%endif
push eax
add eax, ebx
pxor mm7, mm7
movq mm0, [eax+ebx+ebx+colorA0]
movq mm1, [eax+ebx+ebx+colorA1]
movq mm2, [eax+ebx+ebx+colorA2]
movq mm3, [eax+ebx+ebx+colorA3]
pop eax
movq mm4, [eax+ebx+ebx+color2]
movq mm5, [eax+ebx+ebx+color3]
movq mm6, [eax+ebx+color6]
pcmpeqw mm6, mm5
pcmpeqw mm1, mm5
pcmpeqw mm4, mm2
pcmpeqw mm0, mm5
pcmpeqw mm4, mm7
pcmpeqw mm0, mm7
pand mm0, mm4
pand mm6, mm1
pand mm0, mm6
push eax
add eax, ebx
movq mm1, [eax+ebx+ebx+colorA1]
pop eax
movq mm4, [eax+ebx+ebx+color2]
movq mm5, [eax+ebx+color5]
movq mm6, [eax+ebx+ebx+color3]
pcmpeqw mm5, mm4
pcmpeqw mm2, mm4
pcmpeqw mm1, mm6
pcmpeqw mm3, mm4
pcmpeqw mm1, mm7
pcmpeqw mm3, mm7
pand mm2, mm5
pand mm1, mm3
pand mm1, mm2
movq mm7, mm0
por mm7, mm1
movq mm4, [Mask35]
movq mm3, [Mask26]
movq mm6, mm4
pand mm6, mm7
pxor mm4, mm6
movq mm6, mm3
pand mm6, mm7
pxor mm3, mm6
movq mm2, mm0
movq mm7, [I2333Pixel]
movq mm6, [I2223Pixel]
movq mm5, [I23Pixel]
por mm2, mm4
pand mm4, [eax+ebx+ebx+color3]
por mm2, mm3
pand mm3, [eax+ebx+ebx+color2]
por mm2, mm1
pand mm0, mm7
pand mm1, mm6
pxor mm7, mm7
pcmpeqw mm2, mm7
por mm0, mm1
por mm3, mm4
pand mm2, mm5
por mm0, mm3
por mm0, mm2
movq [final2b], mm0
;-----------------------------------
pxor mm7, mm7
movq mm0, [eax+colorB0]
movq mm1, [eax+colorB1]
movq mm2, [eax+colorB2]
movq mm3, [eax+colorB3]
movq mm4, [eax+ebx+color5]
movq mm5, [eax+ebx+color6]
movq mm6, [eax+ebx+ebx+color3]
pcmpeqw mm6, mm5
pcmpeqw mm1, mm5
pcmpeqw mm4, mm2
pcmpeqw mm0, mm5
pcmpeqw mm4, mm7
pcmpeqw mm0, mm7
pand mm0, mm4
pand mm6, mm1
pand mm0, mm6
movq mm1, [eax+colorB1]
movq mm4, [eax+ebx+color5]
movq mm5, [eax+ebx+ebx+color2]
movq mm6, [eax+ebx+color6]
pcmpeqw mm5, mm4
pcmpeqw mm2, mm4
pcmpeqw mm1, mm6
pcmpeqw mm3, mm4
pcmpeqw mm1, mm7
pcmpeqw mm3, mm7
pand mm2, mm5
pand mm1, mm3
pand mm1, mm2
movq mm7, mm0
por mm7, mm1
movq mm4, [Mask35]
movq mm3, [Mask26]
movq mm6, mm4
pand mm6, mm7
pxor mm4, mm6
movq mm6, mm3
pand mm6, mm7
pxor mm3, mm6
movq mm2, mm0
movq mm7, [I5666Pixel]
movq mm6, [I5556Pixel]
movq mm5, [I56Pixel]
por mm2, mm4
pand mm4, [eax+ebx+color5]
por mm2, mm3
pand mm3, [eax+ebx+color6]
por mm2, mm1
pand mm0, mm7
pand mm1, mm6
pxor mm7, mm7
pcmpeqw mm2, mm7
por mm0, mm1
por mm3, mm4
pand mm2, mm5
por mm0, mm3
por mm0, mm2
movq [final1b], mm0
;---------
movq mm0, [final1a]
movq mm4, [final2a]
movq mm2, [final1b]
movq mm6, [final2b]
movq mm1, mm0
movq mm5, mm4
punpcklwd mm0, mm2
punpckhwd mm1, mm2
punpcklwd mm4, mm6
punpckhwd mm5, mm6
%ifdef FAR_POINTER
movq [fs:edx], mm0
movq [fs:edx+8], mm1
push edx
add edx, [ebp+dstPitch]
movq [fs:edx], mm4
movq [fs:edx+8], mm5
pop edx
%else
movq [edx], mm0
movq [edx+8], mm1
push edx
add edx, [ebp+dstPitch]
movq [edx], mm4
movq [edx+8], mm5
pop edx
%endif
.SKIP_PROCESS:
mov ecx, [ebp+deltaPtr]
add ecx, 8
mov [ebp+deltaPtr], ecx
add edx, 16
add eax, 8
pop ecx
sub ecx, 4
cmp ecx, 0
jg near .Loop
; Restore some stuff
popad
mov esp, ebp
pop ebp
emms
ret
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
%ifdef __DJGPP__
__2xSaISuperEagleLine:
%else
_2xSaISuperEagleLine:
%endif
; Store some stuff
push ebp
mov ebp, esp
pushad
; Prepare the destination
%ifdef FAR_POINTER
; Set the selector
mov eax, [ebp+dstSegment]
mov fs, ax
%endif
mov edx, [ebp+dstOffset] ; edx points to the screen
; Prepare the source
; eax points to colorA
mov eax, [ebp+srcPtr]
mov ebx, [ebp+srcPitch]
mov ecx, [ebp+width]
; eax now points to colorB1
sub eax, ebx
; Main Loop
.Loop: push ecx
;-----Check Delta------------------
mov ecx, [ebp+deltaPtr]
movq mm0, [eax+colorB0]
movq mm1, [eax+colorB3]
movq mm2, [eax+ebx+color4]
movq mm3, [eax+ebx+colorS2]
movq mm4, [eax+ebx+ebx+color1]
movq mm5, [eax+ebx+ebx+colorS1]
push eax
add eax, ebx
movq mm6, [eax+ebx+ebx+colorA0]
movq mm7, [eax+ebx+ebx+colorA3]
pop eax
pcmpeqw mm0, [ecx+2+colorB0]
pcmpeqw mm1, [ecx+2+colorB3]
pcmpeqw mm2, [ecx+ebx+2+color4]
pcmpeqw mm3, [ecx+ebx+2+colorS2]
pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
add ecx, ebx
pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
sub ecx, ebx
pand mm0, mm1
pand mm2, mm3
pand mm4, mm5
pand mm6, mm7
pand mm0, mm2
pand mm4, mm6
pxor mm7, mm7
pand mm0, mm4
movq mm6, [eax+colorB0]
pcmpeqw mm7, mm0
movq [ecx+2+colorB0], mm6
packsswb mm7, mm7
movd ecx, mm7
test ecx, ecx
jz near .SKIP_PROCESS
;End Delta
;---------------------------------
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
movq [I56Pixel], mm0
movq mm7, mm0
;-------------------
movq mm0, mm7
movq mm1, mm4 ;5,5,5,6
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
movq [product1a], mm0
;--------------------
movq mm0, mm7
movq mm1, mm5 ;6,6,6,5
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [product1b], mm0
;-------------------------
;-------------------------
movq mm0, [eax+ebx+ebx+color2]
movq mm1, [eax+ebx+ebx+color3]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [I23Pixel], mm0
movq mm7, mm0
;---------------------
movq mm0, mm7
movq mm1, mm4 ;2,2,2,3
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [product2a], mm0
;----------------------
movq mm0, mm7
movq mm1, mm5 ;3,3,3,2
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [product2b], mm0
;////////////////////////////////
; Decide which "branch" to take
;--------------------------------
movq mm4, [eax+ebx+color5]
movq mm5, [eax+ebx+color6]
movq mm6, [eax+ebx+ebx+color3]
movq mm7, [eax+ebx+ebx+color2]
pxor mm3, mm3
movq mm0, mm4
movq mm1, mm5
pcmpeqw mm0, mm6
pcmpeqw mm1, mm7
pcmpeqw mm1, mm3
pand mm0, mm1
movq [Mask35], mm0
movq mm0, [eax+ebx+ebx+colorS1]
movq mm1, [eax+ebx+color4]
push eax
add eax, ebx
movq mm2, [eax+ebx+ebx+colorA2]
pop eax
movq mm3, [eax+colorB1]
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm4
pcmpeqw mm3, mm4
pand mm0, mm1
pand mm2, mm3
por mm0, mm2
pand mm0, [Mask35]
movq [Mask35b], mm0
;-----------
pxor mm3, mm3
movq mm0, mm4
movq mm1, mm5
pcmpeqw mm0, mm6
pcmpeqw mm1, mm7
pcmpeqw mm0, mm3
pand mm0, mm1
movq [Mask26], mm0
movq mm0, [eax+ebx+ebx+color1]
movq mm1, [eax+ebx+colorS2]
push eax
add eax, ebx
movq mm2, [eax+ebx+ebx+colorA1]
pop eax
movq mm3, [eax+colorB2]
pcmpeqw mm0, mm5
pcmpeqw mm1, mm5
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm1
pand mm2, mm3
por mm0, mm2
pand mm0, [Mask26]
movq [Mask26b], mm0
;--------------------
movq mm0, mm4
movq mm1, mm5
movq mm2, mm0
pcmpeqw mm2, mm1
pcmpeqw mm0, mm6
pcmpeqw mm1, mm7
pand mm0, mm1
pand mm2, mm0
pxor mm0, mm2
movq mm7, mm0
;------------------
packsswb mm7, mm7
movd ecx, mm7
test ecx, ecx
jz near .SKIP_GUESS
;---------------------------------------------
; Map of the pixels: I|E F|J
; G|A B|K
; H|C D|L
; M|N O|P
movq mm6, mm0
movq mm4, [eax+ebx+color5]
movq mm5, [eax+ebx+color6]
pxor mm7, mm7
pand mm6, [ONE]
movq mm0, [eax+colorB1]
movq mm1, [eax+ebx+color4]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+colorB2]
movq mm1, [eax+ebx+colorS2]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
push eax
add eax, ebx
movq mm0, [eax+ebx+color1]
movq mm1, [eax+ebx+ebx+colorA1]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+ebx+colorS1]
movq mm1, [eax+ebx+ebx+colorA2]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
pop eax
movq mm1, mm7
pxor mm0, mm0
pcmpgtw mm7, mm0
pcmpgtw mm0, mm1
por mm7, [Mask35]
por mm0, [Mask26]
movq [Mask35], mm7
movq [Mask26], mm0
.SKIP_GUESS:
;Start the ASSEMBLY !!!
movq mm4, [Mask35]
movq mm5, [Mask26]
movq mm6, [Mask35b]
movq mm7, [Mask26b]
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm2, [eax+ebx+ebx+color2]
movq mm3, [eax+ebx+ebx+color3]
pcmpeqw mm0, mm2
pcmpeqw mm1, mm3
movq mm2, mm4
movq mm3, mm5
por mm0, mm1
por mm2, mm3
pand mm2, mm0
pxor mm0, mm2
movq mm3, mm0
movq mm2, mm0
pxor mm0, mm0
por mm2, mm4
pxor mm4, mm6
por mm2, mm5
pxor mm5, mm7
pcmpeqw mm2, mm0
;----------------
movq mm0, [eax+ebx+color5]
movq mm1, mm3
por mm1, mm4
por mm1, mm6
pand mm0, mm1
movq mm1, mm5
pand mm1, [I56Pixel]
por mm0, mm1
movq mm1, mm7
pand mm1, [product1b]
por mm0, mm1
movq mm1, mm2
pand mm1, [product1a]
por mm0, mm1
movq [final1a], mm0
movq mm0, [eax+ebx+color6]
movq mm1, mm3
por mm1, mm5
por mm1, mm7
pand mm0, mm1
movq mm1, mm4
pand mm1, [I56Pixel]
por mm0, mm1
movq mm1, mm6
pand mm1, [product1a]
por mm0, mm1
movq mm1, mm2
pand mm1, [product1b]
por mm0, mm1
movq [final1b], mm0
movq mm0, [eax+ebx+ebx+color2]
movq mm1, mm3
por mm1, mm5
por mm1, mm7
pand mm0, mm1
movq mm1, mm4
pand mm1, [I23Pixel]
por mm0, mm1
movq mm1, mm6
pand mm1, [product2b]
por mm0, mm1
movq mm1, mm2
pand mm1, [product2a]
por mm0, mm1
movq [final2a], mm0
movq mm0, [eax+ebx+ebx+color3]
movq mm1, mm3
por mm1, mm4
por mm1, mm6
pand mm0, mm1
movq mm1, mm5
pand mm1, [I23Pixel]
por mm0, mm1
movq mm1, mm7
pand mm1, [product2a]
por mm0, mm1
movq mm1, mm2
pand mm1, [product2b]
por mm0, mm1
movq [final2b], mm0
movq mm0, [final1a]
movq mm2, [final1b]
movq mm1, mm0
movq mm4, [final2a]
movq mm6, [final2b]
movq mm5, mm4
punpcklwd mm0, mm2
punpckhwd mm1, mm2
punpcklwd mm4, mm6
punpckhwd mm5, mm6
%ifdef FAR_POINTER
movq [fs:edx], mm0
movq [fs:edx+8], mm1
push edx
add edx, [ebp+dstPitch]
movq [fs:edx], mm4
movq [fs:edx+8], mm5
pop edx
%else
movq [edx], mm0
movq [edx+8], mm1
push edx
add edx, [ebp+dstPitch]
movq [edx], mm4
movq [edx+8], mm5
pop edx
%endif
.SKIP_PROCESS:
mov ecx, [ebp+deltaPtr]
add ecx, 8
mov [ebp+deltaPtr], ecx
add edx, 16
add eax, 8
pop ecx
sub ecx, 4
cmp ecx, 0
jg near .Loop
; Restore some stuff
popad
mov esp, ebp
pop ebp
emms
ret
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;This is version 0.50
colorI equ -2
colorE equ 0
colorF equ 2
colorJ equ 4
colorG equ -2
colorA equ 0
colorB equ 2
colorK equ 4
colorH equ -2
colorC equ 0
colorD equ 2
colorL equ 4
colorM equ -2
colorN equ 0
colorO equ 2
colorP equ 4
%ifdef __DJGPP__
__2xSaILine:
%else
_2xSaILine:
%endif
; Store some stuff
push ebp
mov ebp, esp
pushad
; Prepare the destination
%ifdef FAR_POINTER
; Set the selector
mov eax, [ebp+dstSegment]
mov fs, ax
%endif
mov edx, [ebp+dstOffset] ; edx points to the screen
; Prepare the source
; eax points to colorA
mov eax, [ebp+srcPtr]
mov ebx, [ebp+srcPitch]
mov ecx, [ebp+width]
; eax now points to colorE
sub eax, ebx
; Main Loop
.Loop: push ecx
;-----Check Delta------------------
mov ecx, [ebp+deltaPtr]
movq mm0, [eax+colorI]
movq mm1, [eax+colorJ]
movq mm2, [eax+ebx+colorG]
movq mm3, [eax+ebx+colorK]
movq mm4, [eax+ebx+ebx+colorH]
movq mm5, [eax+ebx+ebx+colorL]
push eax
add eax, ebx
movq mm6, [eax+ebx+ebx+colorM]
movq mm7, [eax+ebx+ebx+colorP]
pop eax
pcmpeqw mm0, [ecx+2+colorI]
pcmpeqw mm1, [ecx+2+colorK]
pcmpeqw mm2, [ecx+ebx+2+colorG]
pcmpeqw mm3, [ecx+ebx+2+colorK]
pcmpeqw mm4, [ecx+ebx+ebx+2+colorH]
pcmpeqw mm5, [ecx+ebx+ebx+2+colorL]
add ecx, ebx
pcmpeqw mm6, [ecx+ebx+ebx+2+colorM]
pcmpeqw mm7, [ecx+ebx+ebx+2+colorP]
sub ecx, ebx
pand mm0, mm1
pand mm2, mm3
pand mm4, mm5
pand mm6, mm7
pand mm0, mm2
pand mm4, mm6
pxor mm7, mm7
pand mm0, mm4
movq mm6, [eax+colorI]
pcmpeqw mm7, mm0
movq [ecx+2+colorI], mm6
packsswb mm7, mm7
movd ecx, mm7
test ecx, ecx
jz near .SKIP_PROCESS
;End Delta
;---------------------------------
;1
;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB
movq mm1, mm0
movq mm3, mm2
pcmpeqw mm0, [eax+ebx+ebx+colorD]
pcmpeqw mm1, [eax+colorE]
pcmpeqw mm2, [eax+ebx+ebx+colorL]
pcmpeqw mm3, [eax+ebx+ebx+colorC]
pand mm0, mm1
pxor mm1, mm1
pand mm0, mm2
pcmpeqw mm3, mm1
pand mm0, mm3 ;result in mm0
;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB
movq mm5, mm4
movq mm7, mm6
pcmpeqw mm4, [eax+ebx+ebx+colorC]
pcmpeqw mm5, [eax+colorF]
pcmpeqw mm6, [eax+colorJ]
pcmpeqw mm7, [eax+colorE]
pand mm4, mm5
pxor mm5, mm5
pand mm4, mm6
pcmpeqw mm7, mm5
pand mm4, mm7 ;result in mm4
por mm0, mm4 ;combine the masks
movq [Mask1], mm0
;--------------------------------------------
;2
;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB
movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
movq mm1, mm0
movq mm3, mm2
pcmpeqw mm0, [eax+ebx+ebx+colorC]
pcmpeqw mm1, [eax+colorF]
pcmpeqw mm2, [eax+ebx+ebx+colorH]
pcmpeqw mm3, [eax+ebx+ebx+colorD]
pand mm0, mm1
pxor mm1, mm1
pand mm0, mm2
pcmpeqw mm3, mm1
pand mm0, mm3 ;result in mm0
;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB
movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
movq mm5, mm4
movq mm7, mm6
pcmpeqw mm4, [eax+ebx+ebx+colorD]
pcmpeqw mm5, [eax+colorE]
pcmpeqw mm6, [eax+colorI]
pcmpeqw mm7, [eax+colorF]
pand mm4, mm5
pxor mm5, mm5
pand mm4, mm6
pcmpeqw mm7, mm5
pand mm4, mm7 ;result in mm4
por mm0, mm4 ;combine the masks
movq [Mask2], mm0
;interpolate colorA and colorB
movq mm0, [eax+ebx+colorA]
movq mm1, [eax+ebx+colorB]
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
;assemble the pixels
movq mm1, [eax+ebx+colorA]
movq mm2, [eax+ebx+colorB]
movq mm3, [Mask1]
movq mm5, mm1
movq mm4, [Mask2]
movq mm6, mm1
pand mm1, mm3
por mm3, mm4
pxor mm7, mm7
pand mm2, mm4
pcmpeqw mm3, mm7
por mm1, mm2
pand mm0, mm3
por mm0, mm1
punpcklwd mm5, mm0
punpckhwd mm6, mm0
%ifdef FAR_POINTER
movq [fs:edx], mm5
movq [fs:edx+8], mm6
%else
movq [edx], mm5
movq [edx+8], mm6
%endif
;------------------------------------------------
; Create the Nextline
;------------------------------------------------
;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC
movq mm1, mm0
movq mm3, mm2
push eax
add eax, ebx
pcmpeqw mm0, [eax+ebx+colorD]
pcmpeqw mm1, [eax+colorG]
pcmpeqw mm2, [eax+ebx+ebx+colorO]
pcmpeqw mm3, [eax+colorB]
pop eax
pand mm0, mm1
pxor mm1, mm1
pand mm0, mm2
pcmpeqw mm3, mm1
pand mm0, mm3 ;result in mm0
;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC
movq mm5, mm4
movq mm7, mm6
push eax
add eax, ebx
pcmpeqw mm4, [eax+ebx+colorH]
pcmpeqw mm5, [eax+colorB]
pcmpeqw mm6, [eax+ebx+ebx+colorM]
pcmpeqw mm7, [eax+colorG]
pop eax
pand mm4, mm5
pxor mm5, mm5
pand mm4, mm6
pcmpeqw mm7, mm5
pand mm4, mm7 ;result in mm4
por mm0, mm4 ;combine the masks
movq [Mask1], mm0
;--------------------------------------------
;4
;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC
movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
movq mm1, mm0
movq mm3, mm2
pcmpeqw mm0, [eax+ebx+colorB]
pcmpeqw mm1, [eax+ebx+ebx+colorH]
pcmpeqw mm2, [eax+colorF]
pcmpeqw mm3, [eax+ebx+ebx+colorD]
pand mm0, mm1
pxor mm1, mm1
pand mm0, mm2
pcmpeqw mm3, mm1
pand mm0, mm3 ;result in mm0
;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC
movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
movq mm5, mm4
movq mm7, mm6
pcmpeqw mm4, [eax+ebx+ebx+colorD]
pcmpeqw mm5, [eax+ebx+colorG]
pcmpeqw mm6, [eax+colorI]
pcmpeqw mm7, [eax+ebx+ebx+colorH]
pand mm4, mm5
pxor mm5, mm5
pand mm4, mm6
pcmpeqw mm7, mm5
pand mm4, mm7 ;result in mm4
por mm0, mm4 ;combine the masks
movq [Mask2], mm0
;----------------------------------------------
;interpolate colorA and colorC
movq mm0, [eax+ebx+colorA]
movq mm1, [eax+ebx+ebx+colorC]
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
;-------------
;assemble the pixels
movq mm1, [eax+ebx+colorA]
movq mm2, [eax+ebx+ebx+colorC]
movq mm3, [Mask1]
movq mm4, [Mask2]
pand mm1, mm3
pand mm2, mm4
por mm3, mm4
pxor mm7, mm7
por mm1, mm2
pcmpeqw mm3, mm7
pand mm0, mm3
por mm0, mm1
movq [ACPixel], mm0
;////////////////////////////////
; Decide which "branch" to take
;--------------------------------
movq mm0, [eax+ebx+colorA]
movq mm1, [eax+ebx+colorB]
movq mm6, mm0
movq mm7, mm1
pcmpeqw mm0, [eax+ebx+ebx+colorD]
pcmpeqw mm1, [eax+ebx+ebx+colorC]
pcmpeqw mm6, mm7
movq mm2, mm0
movq mm3, mm0
pand mm0, mm1 ;colorA == colorD && colorB == colorC
pxor mm7, mm7
pcmpeqw mm2, mm7
pand mm6, mm0
pand mm2, mm1 ;colorA != colorD && colorB == colorC
pcmpeqw mm1, mm7
pand mm1, mm3 ;colorA == colorD && colorB != colorC
pxor mm0, mm6
por mm1, mm6
movq mm7, mm0
movq [Mask2], mm2
packsswb mm7, mm7
movq [Mask1], mm1
movd ecx, mm7
test ecx, ecx
jz near .SKIP_GUESS
;---------------------------------------------
; Map of the pixels: I|E F|J
; G|A B|K
; H|C D|L
; M|N O|P
movq mm6, mm0
movq mm4, [eax+ebx+colorA]
movq mm5, [eax+ebx+colorB]
pxor mm7, mm7
pand mm6, [ONE]
movq mm0, [eax+colorE]
movq mm1, [eax+ebx+colorG]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+colorF]
movq mm1, [eax+ebx+colorK]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
push eax
add eax, ebx
movq mm0, [eax+ebx+colorH]
movq mm1, [eax+ebx+ebx+colorN]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+ebx+colorL]
movq mm1, [eax+ebx+ebx+colorO]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
pop eax
movq mm1, mm7
pxor mm0, mm0
pcmpgtw mm7, mm0
pcmpgtw mm0, mm1
por mm7, [Mask1]
por mm0, [Mask2]
movq [Mask1], mm7
movq [Mask2], mm0
.SKIP_GUESS:
;----------------------------
;interpolate A, B, C and D
movq mm0, [eax+ebx+colorA]
movq mm1, [eax+ebx+colorB]
movq mm4, mm0
movq mm2, [eax+ebx+ebx+colorC]
movq mm5, mm1
movq mm3, [qcolorMask]
movq mm6, mm2
movq mm7, [qlowpixelMask]
pand mm0, mm3
pand mm1, mm3
pand mm2, mm3
pand mm3, [eax+ebx+ebx+colorD]
psrlw mm0, 2
pand mm4, mm7
psrlw mm1, 2
pand mm5, mm7
psrlw mm2, 2
pand mm6, mm7
psrlw mm3, 2
pand mm7, [eax+ebx+ebx+colorD]
paddw mm0, mm1
paddw mm2, mm3
paddw mm4, mm5
paddw mm6, mm7
paddw mm4, mm6
paddw mm0, mm2
psrlw mm4, 2
pand mm4, [qlowpixelMask]
paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D
;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
;assemble the pixels
movq mm1, [Mask1]
movq mm2, [Mask2]
movq mm4, [eax+ebx+colorA]
movq mm5, [eax+ebx+colorB]
pand mm4, mm1
pand mm5, mm2
pxor mm7, mm7
por mm1, mm2
por mm4, mm5
pcmpeqw mm1, mm7
pand mm0, mm1
por mm4, mm0 ;mm4 contains the diagonal pixels
movq mm0, [ACPixel]
movq mm1, mm0
punpcklwd mm0, mm4
punpckhwd mm1, mm4
push edx
add edx, [ebp+dstPitch]
%ifdef FAR_POINTER
movq [fs:edx], mm0
movq [fs:edx+8], mm1
%else
movq [edx], mm0
movq [edx+8], mm1
%endif
pop edx
.SKIP_PROCESS:
mov ecx, [ebp+deltaPtr]
add ecx, 8
mov [ebp+deltaPtr], ecx
add edx, 16
add eax, 8
pop ecx
sub ecx, 4
cmp ecx, 0
jg near .Loop
; Restore some stuff
popad
mov esp, ebp
pop ebp
emms
ret
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
%ifdef __DJGPP__
_Init_2xSaIMMX:
%else
Init_2xSaIMMX:
%endif
; Store some stuff
push ebp
mov ebp, esp
push edx
;Damn thing doesn't work
; mov eax,1
; cpuid
; test edx, 0x00800000 ;test bit 23
; jz end2 ;bit not set => no MMX detected
mov eax, [ebp+8] ;PixelFormat
cmp eax, 555
jz Bits555
cmp eax, 565
jz Bits565
end2:
mov eax, 1
jmp end3
Bits555:
mov edx, 0x7BDE7BDE
mov eax, colorMask
mov [eax], edx
mov [eax+4], edx
mov edx, 0x04210421
mov eax, lowPixelMask
mov [eax], edx
mov [eax+4], edx
mov edx, 0x739C739C
mov eax, qcolorMask
mov [eax], edx
mov [eax+4], edx
mov edx, 0x0C630C63
mov eax, qlowpixelMask
mov [eax], edx
mov [eax+4], edx
mov eax, 0
jmp end3
Bits565:
mov edx, 0xF7DEF7DE
mov eax, colorMask
mov [eax], edx
mov [eax+4], edx
mov edx, 0x08210821
mov eax, lowPixelMask
mov [eax], edx
mov [eax+4], edx
mov edx, 0xE79CE79C
mov eax, qcolorMask
mov [eax], edx
mov [eax+4], edx
mov edx, 0x18631863
mov eax, qlowpixelMask
mov [eax], edx
mov [eax+4], edx
mov eax, 0
jmp end3
end3:
pop edx
mov esp, ebp
pop ebp
ret
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
SECTION .data ALIGN = 32
;Some constants
colorMask dd 0xF7DEF7DE,0xF7DEF7DE
lowPixelMask dd 0x08210821,0x08210821
qcolorMask dd 0xE79CE79C,0xE79CE79C
qlowpixelMask dd 0x18631863,0x18631863
darkenMask dd 0xC718C718,0xC718C718
GreenMask dd 0x07E007E0,0x07E007E0
RedBlueMask dd 0xF81FF81F,0xF81FF81F
FALSE dd 0x00000000,0x00000000
TRUE dd 0xffffffff,0xffffffff
ONE dd 0x00010001,0x00010001
SECTION .bss ALIGN = 32
ACPixel resb 8
Mask1 resb 8
Mask2 resb 8
I56Pixel resb 8
I23Pixel resb 8
I5556Pixel resb 8
I2223Pixel resb 8
I5666Pixel resb 8
I2333Pixel resb 8
Mask26 resb 8
Mask35 resb 8
Mask26b resb 8
Mask35b resb 8
product1a resb 8
product1b resb 8
product2a resb 8
product2b resb 8
final1a resb 8
final1b resb 8
final2a resb 8
final2b resb 8