mirror of https://github.com/PCSX2/pcsx2.git
1942 lines
50 KiB
NASM
1942 lines
50 KiB
NASM
; Pcsx2 - Pc Ps2 Emulator
|
|
; Copyright (C) 2002-2008 Pcsx2 Team
|
|
;
|
|
; This program is free software; you can redistribute it and/or modify
|
|
; it under the terms of the GNU General Public License as published by
|
|
; the Free Software Foundation; either version 2 of the License, or
|
|
; (at your option) any later version.
|
|
|
|
; This program is distributed in the hope that it will be useful,
|
|
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
; GNU General Public License for more details.
|
|
;
|
|
; You should have received a copy of the GNU General Public License
|
|
; along with this program; if not, write to the Free Software
|
|
; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|
|
|
;; Fast VIF assembly routines for UNPACK zerofrog(@gmail.com)
|
|
;; NOTE: This file is used to build aVif_proc-[32/64].asm because ml has a very
|
|
;; weak preprocessor. To generate the files, install nasm and run the following command:
|
|
;; aVif_proc-32.asm: nasmw -e aVif.asm > aVif_proc-32.asm
|
|
;; aVif_proc-64.asm: nasmw -e -D__x86_64__ aVif.asm > aVif_proc-64.asm
|
|
;; once the files are built, remove all lines starting with %line
|
|
;; and remove the brackets from the exports
|
|
|
|
%ifndef __x86_64__
|
|
.686
|
|
.model flat, c
|
|
.mmx
|
|
.xmm
|
|
%endif
|
|
|
|
extern _vifRegs:abs
|
|
extern _vifMaskRegs:abs
|
|
extern _vifRow:abs
|
|
extern _vifCol:abs
|
|
extern s_TempDecompress:abs
|
|
|
|
|
|
.code
|
|
|
|
|
|
%ifdef __x86_64__
|
|
%define VIF_ESP rsp
|
|
%define VIF_SRC rdx
|
|
%define VIF_INC rdi
|
|
%define VIF_DST rcx
|
|
%define VIF_SIZE r8d
|
|
%define VIF_TMPADDR rax
|
|
%define VIF_SAVEEBX r9
|
|
%define VIF_SAVEEBXd r9d
|
|
%else
|
|
%define VIF_ESP esp
|
|
%define VIF_SRC esi
|
|
%define VIF_INC ecx
|
|
%define VIF_DST edi
|
|
%define VIF_SIZE edx
|
|
%define VIF_TMPADDR eax
|
|
%define VIF_SAVEEBX ebx
|
|
%define VIF_SAVEEBXd ebx
|
|
%endif
|
|
|
|
%define XMM_R0 xmm0
|
|
%define XMM_R1 xmm1
|
|
%define XMM_R2 xmm2
|
|
%define XMM_WRITEMASK xmm3
|
|
%define XMM_ROWMASK xmm4
|
|
%define XMM_ROWCOLMASK xmm5
|
|
%define XMM_ROW xmm6
|
|
%define XMM_COL xmm7
|
|
%define XMM_R3 XMM_COL
|
|
|
|
;; writing masks
|
|
UNPACK_Write0_Regular macro r0, CL, DEST_OFFSET, MOVDQA
|
|
MOVDQA [VIF_DST+DEST_OFFSET], r0
|
|
endm
|
|
|
|
UNPACK_Write1_Regular macro r0, CL, DEST_OFFSET, MOVDQA
|
|
MOVDQA [VIF_DST], r0
|
|
add VIF_DST, VIF_INC
|
|
endm
|
|
|
|
UNPACK_Write0_Mask macro r0, CL, DEST_OFFSET, MOVDQA
|
|
UNPACK_Write0_Regular r0, CL, DEST_OFFSET, MOVDQA
|
|
endm
|
|
|
|
UNPACK_Write1_Mask macro r0, CL, DEST_OFFSET, MOVDQA
|
|
UNPACK_Write1_Regular r0, CL, DEST_OFFSET, MOVDQA
|
|
endm
|
|
|
|
;; masked write (dest needs to be in edi)
|
|
UNPACK_Write0_WriteMask macro r0, CL, DEST_OFFSET, MOVDQA
|
|
;; masked write (dest needs to be in edi)
|
|
movdqa XMM_WRITEMASK, [VIF_TMPADDR + 64*(CL) + 48]
|
|
pand r0, XMM_WRITEMASK
|
|
pandn XMM_WRITEMASK, [VIF_DST]
|
|
por r0, XMM_WRITEMASK
|
|
MOVDQA [VIF_DST], r0
|
|
add VIF_DST, 16
|
|
endm
|
|
|
|
;; masked write (dest needs to be in edi)
|
|
UNPACK_Write1_WriteMask macro r0, CL, DEST_OFFSET, MOVDQA
|
|
;; masked write (dest needs to be in edi)
|
|
movdqa XMM_WRITEMASK, [VIF_TMPADDR + 64*(0) + 48]
|
|
pand r0, XMM_WRITEMASK
|
|
pandn XMM_WRITEMASK, [VIF_DST]
|
|
por r0, XMM_WRITEMASK
|
|
MOVDQA [VIF_DST], r0
|
|
add VIF_DST, VIF_INC
|
|
endm
|
|
|
|
UNPACK_Mask_SSE_0 macro r0
|
|
pand r0, XMM_WRITEMASK
|
|
por r0, XMM_ROWCOLMASK
|
|
endm
|
|
|
|
;; once a qword is uncomprssed, applies masks and saves
|
|
;; note: modifying XMM_WRITEMASK
|
|
;; dest = row + write (only when mask=0), otherwise write
|
|
UNPACK_Mask_SSE_1 macro r0
|
|
;; dest = row + write (only when mask=0), otherwise write
|
|
pand r0, XMM_WRITEMASK
|
|
por r0, XMM_ROWCOLMASK
|
|
pand XMM_WRITEMASK, XMM_ROW
|
|
paddd r0, XMM_WRITEMASK
|
|
endm
|
|
|
|
;; dest = row + write (only when mask=0), otherwise write
|
|
;; row = row + write (only when mask = 0), otherwise row
|
|
UNPACK_Mask_SSE_2 macro r0
|
|
;; dest = row + write (only when mask=0), otherwise write
|
|
;; row = row + write (only when mask = 0), otherwise row
|
|
pand r0, XMM_WRITEMASK
|
|
pand XMM_WRITEMASK, XMM_ROW
|
|
paddd XMM_ROW, r0
|
|
por r0, XMM_ROWCOLMASK
|
|
paddd r0, XMM_WRITEMASK
|
|
endm
|
|
|
|
UNPACK_WriteMask_SSE_0 macro r0
|
|
UNPACK_Mask_SSE_0 r0
|
|
endm
|
|
UNPACK_WriteMask_SSE_1 macro r0
|
|
UNPACK_Mask_SSE_1 r0
|
|
endm
|
|
UNPACK_WriteMask_SSE_2 macro r0
|
|
UNPACK_Mask_SSE_2 r0
|
|
endm
|
|
|
|
UNPACK_Regular_SSE_0 macro r0
|
|
endm
|
|
|
|
UNPACK_Regular_SSE_1 macro r0
|
|
paddd r0, XMM_ROW
|
|
endm
|
|
|
|
UNPACK_Regular_SSE_2 macro r0
|
|
paddd r0, XMM_ROW
|
|
movdqa XMM_ROW, r0
|
|
endm
|
|
|
|
;; setting up masks
|
|
UNPACK_Setup_Mask_SSE macro CL
|
|
mov VIF_TMPADDR, [_vifMaskRegs]
|
|
movdqa XMM_ROWMASK, [VIF_TMPADDR + 64*(CL) + 16]
|
|
movdqa XMM_ROWCOLMASK, [VIF_TMPADDR + 64*(CL) + 32]
|
|
movdqa XMM_WRITEMASK, [VIF_TMPADDR + 64*(CL)]
|
|
pand XMM_ROWMASK, XMM_ROW
|
|
pand XMM_ROWCOLMASK, XMM_COL
|
|
por XMM_ROWCOLMASK, XMM_ROWMASK
|
|
endm
|
|
|
|
UNPACK_Start_Setup_Mask_SSE_0 macro CL
|
|
UNPACK_Setup_Mask_SSE CL
|
|
endm
|
|
|
|
UNPACK_Start_Setup_Mask_SSE_1 macro CL
|
|
mov VIF_TMPADDR, [_vifMaskRegs]
|
|
movdqa XMM_ROWMASK, [VIF_TMPADDR + 64*(CL) + 16]
|
|
movdqa XMM_ROWCOLMASK, [VIF_TMPADDR + 64*(CL) + 32]
|
|
pand XMM_ROWMASK, XMM_ROW
|
|
pand XMM_ROWCOLMASK, XMM_COL
|
|
por XMM_ROWCOLMASK, XMM_ROWMASK
|
|
endm
|
|
|
|
UNPACK_Start_Setup_Mask_SSE_2 macro CL
|
|
endm
|
|
|
|
UNPACK_Setup_Mask_SSE_0_1 macro CL
|
|
endm
|
|
UNPACK_Setup_Mask_SSE_1_1 macro CL
|
|
mov VIF_TMPADDR, [_vifMaskRegs]
|
|
movdqa XMM_WRITEMASK, [VIF_TMPADDR + 64*(0)]
|
|
endm
|
|
|
|
;; ignore CL, since vif.cycle.wl == 1
|
|
UNPACK_Setup_Mask_SSE_2_1 macro CL
|
|
;; ignore CL, since vif.cycle.wl == 1
|
|
mov VIF_TMPADDR, [_vifMaskRegs]
|
|
movdqa XMM_ROWMASK, [VIF_TMPADDR + 64*(0) + 16]
|
|
movdqa XMM_ROWCOLMASK, [VIF_TMPADDR + 64*(0) + 32]
|
|
movdqa XMM_WRITEMASK, [VIF_TMPADDR + 64*(0)]
|
|
pand XMM_ROWMASK, XMM_ROW
|
|
pand XMM_ROWCOLMASK, XMM_COL
|
|
por XMM_ROWCOLMASK, XMM_ROWMASK
|
|
endm
|
|
|
|
UNPACK_Setup_Mask_SSE_0_0 macro CL
|
|
UNPACK_Setup_Mask_SSE CL
|
|
endm
|
|
UNPACK_Setup_Mask_SSE_1_0 macro CL
|
|
UNPACK_Setup_Mask_SSE CL
|
|
endm
|
|
UNPACK_Setup_Mask_SSE_2_0 macro CL
|
|
UNPACK_Setup_Mask_SSE CL
|
|
endm
|
|
|
|
;; write mask always destroys XMM_WRITEMASK, so 0_0 = 1_0
|
|
UNPACK_Setup_WriteMask_SSE_0_0 macro CL
|
|
UNPACK_Setup_Mask_SSE CL
|
|
endm
|
|
UNPACK_Setup_WriteMask_SSE_1_0 macro CL
|
|
UNPACK_Setup_Mask_SSE CL
|
|
endm
|
|
UNPACK_Setup_WriteMask_SSE_2_0 macro CL
|
|
UNPACK_Setup_Mask_SSE CL
|
|
endm
|
|
UNPACK_Setup_WriteMask_SSE_0_1 macro CL
|
|
UNPACK_Setup_Mask_SSE_1_1 CL
|
|
endm
|
|
|
|
UNPACK_Setup_WriteMask_SSE_1_1 macro CL
|
|
UNPACK_Setup_Mask_SSE_1_1 CL
|
|
endm
|
|
|
|
UNPACK_Setup_WriteMask_SSE_2_1 macro CL
|
|
UNPACK_Setup_Mask_SSE_2_1 CL
|
|
endm
|
|
|
|
UNPACK_Start_Setup_WriteMask_SSE_0 macro CL
|
|
UNPACK_Start_Setup_Mask_SSE_1 CL
|
|
endm
|
|
UNPACK_Start_Setup_WriteMask_SSE_1 macro CL
|
|
UNPACK_Start_Setup_Mask_SSE_1 CL
|
|
endm
|
|
UNPACK_Start_Setup_WriteMask_SSE_2 macro CL
|
|
UNPACK_Start_Setup_Mask_SSE_2 CL
|
|
endm
|
|
|
|
UNPACK_Start_Setup_Regular_SSE_0 macro CL
|
|
endm
|
|
UNPACK_Start_Setup_Regular_SSE_1 macro CL
|
|
endm
|
|
UNPACK_Start_Setup_Regular_SSE_2 macro CL
|
|
endm
|
|
UNPACK_Setup_Regular_SSE_0_0 macro CL
|
|
endm
|
|
UNPACK_Setup_Regular_SSE_1_0 macro CL
|
|
endm
|
|
UNPACK_Setup_Regular_SSE_2_0 macro CL
|
|
endm
|
|
UNPACK_Setup_Regular_SSE_0_1 macro CL
|
|
endm
|
|
UNPACK_Setup_Regular_SSE_1_1 macro CL
|
|
endm
|
|
UNPACK_Setup_Regular_SSE_2_1 macro CL
|
|
endm
|
|
|
|
UNPACK_INC_DST_0_Regular macro qw
|
|
add VIF_DST, (16*qw)
|
|
endm
|
|
UNPACK_INC_DST_1_Regular macro qw
|
|
endm
|
|
UNPACK_INC_DST_0_Mask macro qw
|
|
add VIF_DST, (16*qw)
|
|
endm
|
|
UNPACK_INC_DST_1_Mask macro qw
|
|
endm
|
|
UNPACK_INC_DST_0_WriteMask macro qw
|
|
endm
|
|
UNPACK_INC_DST_1_WriteMask macro qw
|
|
endm
|
|
|
|
;; unpacks for 1,2,3,4 elements (V3 uses this directly)
|
|
UNPACK4_SSE macro CL, TOTALCL, MaskType, ModeType
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+0
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R0
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R0, CL, 0, movdqa
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+1
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R1
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R1, CL+1, 16, movdqa
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+2
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R2
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R2, CL+2, 32, movdqa
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+3
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R3
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R3, CL+3, 48, movdqa
|
|
|
|
@CatStr(UNPACK_INC_DST_, TOTALCL, _, MaskType) 4
|
|
endm
|
|
|
|
;; V3 uses this directly
|
|
UNPACK3_SSE macro CL, TOTALCL, MaskType, ModeType
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R0
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R0, CL, 0, movdqa
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+1
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R1
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R1, CL+1, 16, movdqa
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+2
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R2
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R2, CL+2, 32, movdqa
|
|
|
|
@CatStr(UNPACK_INC_DST_, TOTALCL, _, MaskType) 3
|
|
endm
|
|
|
|
UNPACK2_SSE macro CL, TOTALCL, MaskType, ModeType
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R0
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R0, CL, 0, movdqa
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+1
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R1
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R1, CL+1, 16, movdqa
|
|
|
|
@CatStr(UNPACK_INC_DST_, TOTALCL, _, MaskType) 2
|
|
endm
|
|
|
|
UNPACK1_SSE macro CL, TOTALCL, MaskType, ModeType
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R0
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R0, CL, 0, movdqa
|
|
|
|
@CatStr(UNPACK_INC_DST_, TOTALCL, _, MaskType) 1
|
|
endm
|
|
|
|
;; S-32
|
|
;; only when cl==1
|
|
UNPACK_S_32SSE_4x macro CL, TOTALCL, MaskType, ModeType, MOVDQA
|
|
MOVDQA XMM_R3, [VIF_SRC]
|
|
|
|
pshufd XMM_R0, XMM_R3, 0
|
|
pshufd XMM_R1, XMM_R3, 055h
|
|
pshufd XMM_R2, XMM_R3, 0aah
|
|
pshufd XMM_R3, XMM_R3, 0ffh
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
UNPACK_S_32SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_32SSE_4x CL, TOTALCL, MaskType, ModeType, movdqa
|
|
endm
|
|
UNPACK_S_32SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_32SSE_4x CL, TOTALCL, MaskType, ModeType, movdqu
|
|
endm
|
|
|
|
UNPACK_S_32SSE_3x macro CL, TOTALCL, MaskType, ModeType, MOVDQA
|
|
MOVDQA XMM_R2, [VIF_SRC]
|
|
|
|
pshufd XMM_R0, XMM_R2, 0
|
|
pshufd XMM_R1, XMM_R2, 055h
|
|
pshufd XMM_R2, XMM_R2, 0aah
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 12
|
|
endm
|
|
|
|
UNPACK_S_32SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_32SSE_3x CL, TOTALCL, MaskType, ModeType, movdqa
|
|
endm
|
|
UNPACK_S_32SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_32SSE_3x CL, TOTALCL, MaskType, ModeType, movdqu
|
|
endm
|
|
|
|
UNPACK_S_32SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R1, QWORD PTR [VIF_SRC]
|
|
|
|
pshufd XMM_R0, XMM_R1, 0
|
|
pshufd XMM_R1, XMM_R1, 055h
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_S_32SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_32SSE_2 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_S_32SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
pshufd XMM_R0, XMM_R0, 0
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 4
|
|
endm
|
|
|
|
UNPACK_S_32SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_32SSE_1 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
;; S-16
|
|
UNPACK_S_16SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R3, QWORD PTR [VIF_SRC]
|
|
punpcklwd XMM_R3, XMM_R3
|
|
UNPACK_RIGHTSHIFT XMM_R3, 16
|
|
|
|
pshufd XMM_R0, XMM_R3, 0
|
|
pshufd XMM_R1, XMM_R3, 055h
|
|
pshufd XMM_R2, XMM_R3, 0aah
|
|
pshufd XMM_R3, XMM_R3, 0ffh
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_S_16SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_16SSE_4 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_S_16SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R2, QWORD PTR [VIF_SRC]
|
|
punpcklwd XMM_R2, XMM_R2
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
|
|
pshufd XMM_R0, XMM_R2, 0
|
|
pshufd XMM_R1, XMM_R2, 055h
|
|
pshufd XMM_R2, XMM_R2, 0aah
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
add VIF_SRC, 6
|
|
endm
|
|
|
|
UNPACK_S_16SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_16SSE_3 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_S_16SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R1, dword ptr [VIF_SRC]
|
|
punpcklwd XMM_R1, XMM_R1
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
|
|
pshufd XMM_R0, XMM_R1, 0
|
|
pshufd XMM_R1, XMM_R1, 055h
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 4
|
|
endm
|
|
|
|
UNPACK_S_16SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_16SSE_2 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_S_16SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
pshufd XMM_R0, XMM_R0, 0
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 2
|
|
endm
|
|
|
|
UNPACK_S_16SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_16SSE_1 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
;; S-8
|
|
UNPACK_S_8SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R3, dword ptr [VIF_SRC]
|
|
punpcklbw XMM_R3, XMM_R3
|
|
punpcklwd XMM_R3, XMM_R3
|
|
UNPACK_RIGHTSHIFT XMM_R3, 24
|
|
|
|
pshufd XMM_R0, XMM_R3, 0
|
|
pshufd XMM_R1, XMM_R3, 055h
|
|
pshufd XMM_R2, XMM_R3, 0aah
|
|
pshufd XMM_R3, XMM_R3, 0ffh
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 4
|
|
endm
|
|
|
|
UNPACK_S_8SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_8SSE_4 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_S_8SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R2, dword ptr [VIF_SRC]
|
|
punpcklbw XMM_R2, XMM_R2
|
|
punpcklwd XMM_R2, XMM_R2
|
|
UNPACK_RIGHTSHIFT XMM_R2, 24
|
|
|
|
pshufd XMM_R0, XMM_R2, 0
|
|
pshufd XMM_R1, XMM_R2, 055h
|
|
pshufd XMM_R2, XMM_R2, 0aah
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 3
|
|
endm
|
|
|
|
UNPACK_S_8SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_8SSE_3 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_S_8SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R1, dword ptr [VIF_SRC]
|
|
punpcklbw XMM_R1, XMM_R1
|
|
punpcklwd XMM_R1, XMM_R1
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
|
|
pshufd XMM_R0, XMM_R1, 0
|
|
pshufd XMM_R1, XMM_R1, 055h
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 2
|
|
endm
|
|
|
|
UNPACK_S_8SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_8SSE_2 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_S_8SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
pshufd XMM_R0, XMM_R0, 0
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
inc VIF_SRC
|
|
endm
|
|
|
|
UNPACK_S_8SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_S_8SSE_1 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
;; V2-32
|
|
UNPACK_V2_32SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
MOVDQA XMM_R0, [VIF_SRC]
|
|
MOVDQA XMM_R2, [VIF_SRC+16]
|
|
|
|
pshufd XMM_R1, XMM_R0, 0eeh
|
|
pshufd XMM_R3, XMM_R2, 0eeh
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 32
|
|
endm
|
|
|
|
UNPACK_V2_32SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
movq XMM_R1, QWORD PTR [VIF_SRC+8]
|
|
movq XMM_R2, QWORD PTR [VIF_SRC+16]
|
|
movq XMM_R3, QWORD PTR [VIF_SRC+24]
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 32
|
|
endm
|
|
|
|
UNPACK_V2_32SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
MOVDQA XMM_R0, [VIF_SRC]
|
|
movq XMM_R2, QWORD PTR [VIF_SRC+16]
|
|
pshufd XMM_R1, XMM_R0, 0eeh
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 24
|
|
endm
|
|
|
|
UNPACK_V2_32SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
movq XMM_R1, QWORD PTR [VIF_SRC+8]
|
|
movq XMM_R2, QWORD PTR [VIF_SRC+16]
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 24
|
|
endm
|
|
|
|
UNPACK_V2_32SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
movq XMM_R1, QWORD PTR [VIF_SRC+8]
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
UNPACK_V2_32SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V2_32SSE_2 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V2_32SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_V2_32SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V2_32SSE_1 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
;; V2-16
|
|
;; due to lemmings, have to copy lower qword to the upper qword of every reg
|
|
UNPACK_V2_16SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklwd XMM_R0, [VIF_SRC]
|
|
punpckhwd XMM_R2, [VIF_SRC]
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
;; move the lower 64 bits down
|
|
punpckhqdq XMM_R1, XMM_R0
|
|
punpckhqdq XMM_R3, XMM_R2
|
|
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
punpcklqdq XMM_R2, XMM_R2
|
|
punpckhqdq XMM_R1, XMM_R1
|
|
punpckhqdq XMM_R3, XMM_R3
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
UNPACK_V2_16SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movdqu XMM_R0, [VIF_SRC]
|
|
|
|
punpckhwd XMM_R2, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
|
|
;; move the lower 64 bits down
|
|
punpckhqdq XMM_R1, XMM_R0
|
|
punpckhqdq XMM_R3, XMM_R2
|
|
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
punpcklqdq XMM_R2, XMM_R2
|
|
punpckhqdq XMM_R1, XMM_R1
|
|
punpckhqdq XMM_R3, XMM_R3
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
UNPACK_V2_16SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklwd XMM_R0, [VIF_SRC]
|
|
punpckhwd XMM_R2, [VIF_SRC]
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
|
|
;; move the lower 64 bits down
|
|
punpckhqdq XMM_R1, XMM_R0
|
|
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
punpcklqdq XMM_R2, XMM_R2
|
|
punpckhqdq XMM_R1, XMM_R1
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 12
|
|
endm
|
|
|
|
UNPACK_V2_16SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movdqu XMM_R0, [VIF_SRC]
|
|
|
|
punpckhwd XMM_R2, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
|
|
;; move the lower 64 bits down
|
|
punpckhqdq XMM_R1, XMM_R0
|
|
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
punpcklqdq XMM_R2, XMM_R2
|
|
punpckhqdq XMM_R1, XMM_R1
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 12
|
|
endm
|
|
|
|
UNPACK_V2_16SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklwd XMM_R0, [VIF_SRC]
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
|
|
;; move the lower 64 bits down
|
|
punpckhqdq XMM_R1, XMM_R0
|
|
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
punpckhqdq XMM_R1, XMM_R1
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_V2_16SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
|
|
;; move the lower 64 bits down
|
|
punpckhqdq XMM_R1, XMM_R0
|
|
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
punpckhqdq XMM_R1, XMM_R1
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_V2_16SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklwd XMM_R0, [VIF_SRC]
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 4
|
|
endm
|
|
|
|
UNPACK_V2_16SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 4
|
|
endm
|
|
|
|
;; V2-8
|
|
;; and1 streetball needs to copy lower qword to the upper qword of every reg
|
|
UNPACK_V2_8SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpckhwd XMM_R2, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
UNPACK_RIGHTSHIFT XMM_R2, 24
|
|
|
|
;; move the lower 64 bits down
|
|
punpckhqdq XMM_R1, XMM_R0
|
|
punpckhqdq XMM_R3, XMM_R2
|
|
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
punpcklqdq XMM_R2, XMM_R2
|
|
punpckhqdq XMM_R1, XMM_R1
|
|
punpckhqdq XMM_R3, XMM_R3
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_V2_8SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V2_8SSE_4 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V2_8SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpckhwd XMM_R2, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
UNPACK_RIGHTSHIFT XMM_R2, 24
|
|
|
|
;; move the lower 64 bits down
|
|
punpckhqdq XMM_R1, XMM_R0
|
|
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
punpcklqdq XMM_R2, XMM_R2
|
|
punpckhqdq XMM_R1, XMM_R1
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 6
|
|
endm
|
|
|
|
UNPACK_V2_8SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V2_8SSE_3 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V2_8SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
|
|
;; move the lower 64 bits down
|
|
punpckhqdq XMM_R1, XMM_R0
|
|
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
punpckhqdq XMM_R1, XMM_R1
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 4
|
|
endm
|
|
|
|
UNPACK_V2_8SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V2_8SSE_2 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V2_8SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
punpcklqdq XMM_R0, XMM_R0
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 2
|
|
endm
|
|
|
|
UNPACK_V2_8SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V2_8SSE_1 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
;; V3-32
|
|
UNPACK_V3_32SSE_4x macro CL, TOTALCL, MaskType, ModeType, MOVDQA
|
|
MOVDQA XMM_R0, [VIF_SRC]
|
|
movdqu XMM_R1, [VIF_SRC+12]
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+0
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R0
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R0, CL, 0, movdqa
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+1
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R1
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R1, CL+1, 16, movdqa
|
|
|
|
;; midnight club 2 crashes because reading a qw at +36 is out of bounds
|
|
MOVDQA XMM_R3, [VIF_SRC+32]
|
|
movdqu XMM_R2, [VIF_SRC+24]
|
|
psrldq XMM_R3, 4
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+2
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R2
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R2, CL+2, 32, movdqa
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+3
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R3
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R3, CL+3, 48, movdqa
|
|
|
|
@CatStr(UNPACK_INC_DST_, TOTALCL, _, MaskType) 4
|
|
|
|
add VIF_SRC, 48
|
|
endm
|
|
|
|
UNPACK_V3_32SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_32SSE_4x CL, TOTALCL, MaskType, ModeType, movdqa
|
|
endm
|
|
UNPACK_V3_32SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_32SSE_4x CL, TOTALCL, MaskType, ModeType, movdqu
|
|
endm
|
|
|
|
UNPACK_V3_32SSE_3x macro CL, TOTALCL, MaskType, ModeType, MOVDQA
|
|
MOVDQA XMM_R0, [VIF_SRC]
|
|
movdqu XMM_R1, [VIF_SRC+12]
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R0
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R0, CL, 0, movdqa
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+1
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R1
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R1, CL+1, 16, movdqa
|
|
|
|
movdqu XMM_R2, [VIF_SRC+24]
|
|
|
|
@CatStr(UNPACK_Setup_, MaskType, _SSE_, ModeType, _, TOTALCL) CL+2
|
|
@CatStr(UNPACK_, MaskType, _SSE_, ModeType) XMM_R2
|
|
@CatStr(UNPACK_Write, TOTALCL, _, MaskType) XMM_R2, CL+2, 32, movdqa
|
|
|
|
@CatStr(UNPACK_INC_DST_, TOTALCL, _, MaskType) 3
|
|
|
|
add VIF_SRC, 36
|
|
endm
|
|
|
|
UNPACK_V3_32SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_32SSE_3x CL, TOTALCL, MaskType, ModeType, movdqa
|
|
endm
|
|
UNPACK_V3_32SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_32SSE_3x CL, TOTALCL, MaskType, ModeType, movdqu
|
|
endm
|
|
|
|
UNPACK_V3_32SSE_2x macro CL, TOTALCL, MaskType, ModeType, MOVDQA
|
|
MOVDQA XMM_R0, [VIF_SRC]
|
|
movdqu XMM_R1, [VIF_SRC+12]
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 24
|
|
endm
|
|
|
|
UNPACK_V3_32SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_32SSE_2x CL, TOTALCL, MaskType, ModeType, movdqa
|
|
endm
|
|
UNPACK_V3_32SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_32SSE_2x CL, TOTALCL, MaskType, ModeType, movdqu
|
|
endm
|
|
|
|
UNPACK_V3_32SSE_1x macro CL, TOTALCL, MaskType, ModeType, MOVDQA
|
|
MOVDQA XMM_R0, [VIF_SRC]
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 12
|
|
endm
|
|
|
|
UNPACK_V3_32SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_32SSE_1x CL, TOTALCL, MaskType, ModeType, movdqa
|
|
endm
|
|
UNPACK_V3_32SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_32SSE_1x CL, TOTALCL, MaskType, ModeType, movdqu
|
|
endm
|
|
|
|
;; V3-16
|
|
UNPACK_V3_16SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
movq XMM_R1, QWORD PTR [VIF_SRC+6]
|
|
|
|
punpcklwd XMM_R0, XMM_R0
|
|
movq XMM_R2, QWORD PTR [VIF_SRC+12]
|
|
punpcklwd XMM_R1, XMM_R1
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
movq XMM_R3, QWORD PTR [VIF_SRC+18]
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
punpcklwd XMM_R2, XMM_R2
|
|
punpcklwd XMM_R3, XMM_R3
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
UNPACK_RIGHTSHIFT XMM_R3, 16
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 24
|
|
endm
|
|
|
|
UNPACK_V3_16SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_16SSE_4 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V3_16SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
movq XMM_R1, QWORD PTR [VIF_SRC+6]
|
|
|
|
punpcklwd XMM_R0, XMM_R0
|
|
movq XMM_R2, QWORD PTR [VIF_SRC+12]
|
|
punpcklwd XMM_R1, XMM_R1
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
punpcklwd XMM_R2, XMM_R2
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 18
|
|
endm
|
|
|
|
UNPACK_V3_16SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_16SSE_3 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V3_16SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
movq XMM_R1, QWORD PTR [VIF_SRC+6]
|
|
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklwd XMM_R1, XMM_R1
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 12
|
|
endm
|
|
|
|
UNPACK_V3_16SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_16SSE_2 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V3_16SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 6
|
|
endm
|
|
|
|
UNPACK_V3_16SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_16SSE_1 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
;; V3-8
|
|
UNPACK_V3_8SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R1, QWORD PTR [VIF_SRC]
|
|
movq XMM_R3, QWORD PTR [VIF_SRC+6]
|
|
|
|
punpcklbw XMM_R1, XMM_R1
|
|
punpcklbw XMM_R3, XMM_R3
|
|
punpcklwd XMM_R0, XMM_R1
|
|
psrldq XMM_R1, 6
|
|
punpcklwd XMM_R2, XMM_R3
|
|
psrldq XMM_R3, 6
|
|
punpcklwd XMM_R1, XMM_R1
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
punpcklwd XMM_R3, XMM_R3
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R2, 24
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
UNPACK_RIGHTSHIFT XMM_R3, 24
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 12
|
|
endm
|
|
|
|
UNPACK_V3_8SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_8SSE_4 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V3_8SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
movd XMM_R1, dword ptr [VIF_SRC+3]
|
|
|
|
punpcklbw XMM_R0, XMM_R0
|
|
movd XMM_R2, dword ptr [VIF_SRC+6]
|
|
punpcklbw XMM_R1, XMM_R1
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklbw XMM_R2, XMM_R2
|
|
|
|
punpcklwd XMM_R1, XMM_R1
|
|
punpcklwd XMM_R2, XMM_R2
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
UNPACK_RIGHTSHIFT XMM_R2, 24
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 9
|
|
endm
|
|
|
|
UNPACK_V3_8SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_8SSE_3 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V3_8SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
movd XMM_R1, dword ptr [VIF_SRC+3]
|
|
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpcklbw XMM_R1, XMM_R1
|
|
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklwd XMM_R1, XMM_R1
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 6
|
|
endm
|
|
|
|
UNPACK_V3_8SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_8SSE_2 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V3_8SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 3
|
|
endm
|
|
|
|
UNPACK_V3_8SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V3_8SSE_1 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
;; V4-32
|
|
UNPACK_V4_32SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
movdqa XMM_R0, [VIF_SRC]
|
|
movdqa XMM_R1, [VIF_SRC+16]
|
|
movdqa XMM_R2, [VIF_SRC+32]
|
|
movdqa XMM_R3, [VIF_SRC+48]
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 64
|
|
endm
|
|
|
|
UNPACK_V4_32SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movdqu XMM_R0, [VIF_SRC]
|
|
movdqu XMM_R1, [VIF_SRC+16]
|
|
movdqu XMM_R2, [VIF_SRC+32]
|
|
movdqu XMM_R3, [VIF_SRC+48]
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 64
|
|
endm
|
|
|
|
UNPACK_V4_32SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
movdqa XMM_R0, [VIF_SRC]
|
|
movdqa XMM_R1, [VIF_SRC+16]
|
|
movdqa XMM_R2, [VIF_SRC+32]
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 48
|
|
endm
|
|
|
|
UNPACK_V4_32SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movdqu XMM_R0, [VIF_SRC]
|
|
movdqu XMM_R1, [VIF_SRC+16]
|
|
movdqu XMM_R2, [VIF_SRC+32]
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 48
|
|
endm
|
|
|
|
UNPACK_V4_32SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
movdqa XMM_R0, [VIF_SRC]
|
|
movdqa XMM_R1, [VIF_SRC+16]
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 32
|
|
endm
|
|
|
|
UNPACK_V4_32SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movdqu XMM_R0, [VIF_SRC]
|
|
movdqu XMM_R1, [VIF_SRC+16]
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 32
|
|
endm
|
|
|
|
UNPACK_V4_32SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
movdqa XMM_R0, [VIF_SRC]
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
UNPACK_V4_32SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movdqu XMM_R0, [VIF_SRC]
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
;; V4-16
|
|
UNPACK_V4_16SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
|
|
punpcklwd XMM_R0, [VIF_SRC]
|
|
punpckhwd XMM_R1, [VIF_SRC]
|
|
punpcklwd XMM_R2, [VIF_SRC+16]
|
|
punpckhwd XMM_R3, [VIF_SRC+16]
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
UNPACK_RIGHTSHIFT XMM_R3, 16
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 32
|
|
endm
|
|
|
|
UNPACK_V4_16SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movdqu XMM_R0, [VIF_SRC]
|
|
movdqu XMM_R2, [VIF_SRC+16]
|
|
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpckhwd XMM_R3, XMM_R2
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklwd XMM_R2, XMM_R2
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
UNPACK_RIGHTSHIFT XMM_R3, 16
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 32
|
|
endm
|
|
|
|
UNPACK_V4_16SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklwd XMM_R0, [VIF_SRC]
|
|
punpckhwd XMM_R1, [VIF_SRC]
|
|
punpcklwd XMM_R2, [VIF_SRC+16]
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 24
|
|
endm
|
|
|
|
UNPACK_V4_16SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movdqu XMM_R0, [VIF_SRC]
|
|
movq XMM_R2, QWORD PTR [VIF_SRC+16]
|
|
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklwd XMM_R2, XMM_R2
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
UNPACK_RIGHTSHIFT XMM_R2, 16
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 24
|
|
endm
|
|
|
|
UNPACK_V4_16SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklwd XMM_R0, [VIF_SRC]
|
|
punpckhwd XMM_R1, [VIF_SRC]
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
UNPACK_V4_16SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
movq XMM_R1, QWORD PTR [VIF_SRC+8]
|
|
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklwd XMM_R1, XMM_R1
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
UNPACK_RIGHTSHIFT XMM_R1, 16
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
UNPACK_V4_16SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklwd XMM_R0, [VIF_SRC]
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_V4_16SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 16
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
;; V4-8
|
|
UNPACK_V4_8SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklbw XMM_R0, [VIF_SRC]
|
|
punpckhbw XMM_R2, [VIF_SRC]
|
|
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpckhwd XMM_R3, XMM_R2
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklwd XMM_R2, XMM_R2
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
UNPACK_RIGHTSHIFT XMM_R3, 24
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
UNPACK_RIGHTSHIFT XMM_R2, 24
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
UNPACK_V4_8SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
movdqu XMM_R0, [VIF_SRC]
|
|
|
|
punpckhbw XMM_R2, XMM_R0
|
|
punpcklbw XMM_R0, XMM_R0
|
|
|
|
punpckhwd XMM_R3, XMM_R2
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpcklwd XMM_R2, XMM_R2
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R3, 24
|
|
UNPACK_RIGHTSHIFT XMM_R2, 24
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 16
|
|
endm
|
|
|
|
UNPACK_V4_8SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklbw XMM_R0, [VIF_SRC]
|
|
punpckhbw XMM_R2, [VIF_SRC]
|
|
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklwd XMM_R2, XMM_R2
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
UNPACK_RIGHTSHIFT XMM_R2, 24
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 12
|
|
endm
|
|
|
|
UNPACK_V4_8SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
movd XMM_R2, dword ptr [VIF_SRC+8]
|
|
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpcklbw XMM_R2, XMM_R2
|
|
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpcklwd XMM_R2, XMM_R2
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
UNPACK_RIGHTSHIFT XMM_R2, 24
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 12
|
|
endm
|
|
|
|
UNPACK_V4_8SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklbw XMM_R0, [VIF_SRC]
|
|
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_V4_8SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
movq XMM_R0, QWORD PTR [VIF_SRC]
|
|
|
|
punpcklbw XMM_R0, XMM_R0
|
|
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
UNPACK_RIGHTSHIFT XMM_R1, 24
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_V4_8SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
punpcklbw XMM_R0, [VIF_SRC]
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 4
|
|
endm
|
|
|
|
UNPACK_V4_8SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
movd XMM_R0, dword ptr [VIF_SRC]
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
UNPACK_RIGHTSHIFT XMM_R0, 24
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 4
|
|
endm
|
|
|
|
;; V4-5
|
|
DECOMPRESS_RGBA macro OFFSET
|
|
mov bl, al
|
|
shl bl, 3
|
|
mov byte ptr [s_TempDecompress+OFFSET], bl
|
|
|
|
mov bx, ax
|
|
shr bx, 2
|
|
and bx, 0f8h
|
|
mov byte ptr [s_TempDecompress+OFFSET+1], bl
|
|
|
|
mov bx, ax
|
|
shr bx, 7
|
|
and bx, 0f8h
|
|
mov byte ptr [s_TempDecompress+OFFSET+2], bl
|
|
mov bx, ax
|
|
shr bx, 8
|
|
and bx, 080h
|
|
mov byte ptr [s_TempDecompress+OFFSET+3], bl
|
|
endm
|
|
|
|
UNPACK_V4_5SSE_4 macro CL, TOTALCL, MaskType, ModeType
|
|
mov eax, dword ptr [VIF_SRC]
|
|
DECOMPRESS_RGBA 0
|
|
|
|
shr eax, 16
|
|
DECOMPRESS_RGBA 4
|
|
|
|
mov eax, dword ptr [VIF_SRC+4]
|
|
DECOMPRESS_RGBA 8
|
|
|
|
shr eax, 16
|
|
DECOMPRESS_RGBA 12
|
|
|
|
;; have to use movaps instead of movdqa
|
|
%ifdef __x86_64__
|
|
movdqa XMM_R0, XMMWORD PTR [s_TempDecompress]
|
|
%else
|
|
movaps XMM_R0, [s_TempDecompress]
|
|
%endif
|
|
|
|
punpckhbw XMM_R2, XMM_R0
|
|
punpcklbw XMM_R0, XMM_R0
|
|
|
|
punpckhwd XMM_R3, XMM_R2
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklwd XMM_R2, XMM_R2
|
|
|
|
psrld XMM_R0, 24
|
|
psrld XMM_R1, 24
|
|
psrld XMM_R2, 24
|
|
psrld XMM_R3, 24
|
|
|
|
UNPACK4_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 8
|
|
endm
|
|
|
|
UNPACK_V4_5SSE_4A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V4_5SSE_4 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V4_5SSE_3 macro CL, TOTALCL, MaskType, ModeType
|
|
mov eax, dword ptr [VIF_SRC]
|
|
DECOMPRESS_RGBA 0
|
|
|
|
shr eax, 16
|
|
DECOMPRESS_RGBA 4
|
|
|
|
mov eax, dword ptr [VIF_SRC]
|
|
DECOMPRESS_RGBA 8
|
|
|
|
;; have to use movaps instead of movdqa
|
|
%ifdef __x86_64__
|
|
movdqa XMM_R0, XMMWORD PTR [s_TempDecompress]
|
|
%else
|
|
movaps XMM_R0, [s_TempDecompress]
|
|
%endif
|
|
|
|
punpckhbw XMM_R2, XMM_R0
|
|
punpcklbw XMM_R0, XMM_R0
|
|
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
punpcklwd XMM_R2, XMM_R2
|
|
|
|
psrld XMM_R0, 24
|
|
psrld XMM_R1, 24
|
|
psrld XMM_R2, 24
|
|
|
|
UNPACK3_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 6
|
|
endm
|
|
|
|
UNPACK_V4_5SSE_3A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V4_5SSE_3 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V4_5SSE_2 macro CL, TOTALCL, MaskType, ModeType
|
|
mov eax, dword ptr [VIF_SRC]
|
|
DECOMPRESS_RGBA 0
|
|
|
|
shr eax, 16
|
|
DECOMPRESS_RGBA 4
|
|
|
|
movq XMM_R0, QWORD PTR [s_TempDecompress]
|
|
|
|
punpcklbw XMM_R0, XMM_R0
|
|
|
|
punpckhwd XMM_R1, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
psrld XMM_R0, 24
|
|
psrld XMM_R1, 24
|
|
|
|
UNPACK2_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 4
|
|
endm
|
|
|
|
UNPACK_V4_5SSE_2A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V4_5SSE_2 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
UNPACK_V4_5SSE_1 macro CL, TOTALCL, MaskType, ModeType
|
|
mov ax, word ptr [VIF_SRC]
|
|
DECOMPRESS_RGBA 0
|
|
|
|
movd XMM_R0, DWORD PTR [s_TempDecompress]
|
|
punpcklbw XMM_R0, XMM_R0
|
|
punpcklwd XMM_R0, XMM_R0
|
|
|
|
psrld XMM_R0, 24
|
|
|
|
UNPACK1_SSE CL, TOTALCL, MaskType, ModeType
|
|
|
|
add VIF_SRC, 2
|
|
endm
|
|
|
|
UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType
|
|
UNPACK_V4_5SSE_1 CL, TOTALCL, MaskType, ModeType
|
|
endm
|
|
|
|
;; save the row reg
|
|
SAVE_ROW_REG_BASE macro
|
|
mov VIF_TMPADDR, [_vifRow]
|
|
movdqa [VIF_TMPADDR], XMM_ROW
|
|
mov VIF_TMPADDR, [_vifRegs]
|
|
movss dword ptr [VIF_TMPADDR+0100h], XMM_ROW
|
|
psrldq XMM_ROW, 4
|
|
movss dword ptr [VIF_TMPADDR+0110h], XMM_ROW
|
|
psrldq XMM_ROW, 4
|
|
movss dword ptr [VIF_TMPADDR+0120h], XMM_ROW
|
|
psrldq XMM_ROW, 4
|
|
movss dword ptr [VIF_TMPADDR+0130h], XMM_ROW
|
|
endm
|
|
|
|
SAVE_NO_REG macro
|
|
endm
|
|
|
|
%ifdef __x86_64__
|
|
|
|
INIT_ARGS macro
|
|
mov rax, qword ptr [_vifRow]
|
|
mov r9, qword ptr [_vifCol]
|
|
movaps xmm6, XMMWORD PTR [rax]
|
|
movaps xmm7, XMMWORD PTR [r9]
|
|
endm
|
|
|
|
INC_STACK macro reg
|
|
add rsp, 8
|
|
endm
|
|
|
|
%else
|
|
|
|
%define STACKOFFSET 12
|
|
|
|
;; 32 bit versions have the args on the stack
|
|
INIT_ARGS macro
|
|
mov VIF_DST, dword ptr [esp+4+STACKOFFSET]
|
|
mov VIF_SRC, dword ptr [esp+8+STACKOFFSET]
|
|
mov VIF_SIZE, dword ptr [esp+12+STACKOFFSET]
|
|
endm
|
|
|
|
INC_STACK macro reg
|
|
add esp, 4
|
|
endm
|
|
|
|
%endif
|
|
|
|
;; qsize - bytes of compressed size of 1 decompressed qword
|
|
;; int UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType(u32* dest, u32* data, int dmasize)
|
|
defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_REG
|
|
@CatStr(UNPACK_SkippingWrite_, name, _, sign, _, MaskType, _, ModeType) proc public
|
|
%ifdef __x86_64__
|
|
push rdi
|
|
%else
|
|
push edi
|
|
push esi
|
|
push ebx
|
|
%endif
|
|
INIT_ARGS
|
|
mov VIF_TMPADDR, [_vifRegs]
|
|
movzx VIF_INC, byte ptr [VIF_TMPADDR + 040h]
|
|
movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 041h]
|
|
sub VIF_INC, VIF_SAVEEBX
|
|
shl VIF_INC, 4
|
|
|
|
cmp VIF_SAVEEBXd, 1
|
|
je @CatStr(name, _, sign, _, MaskType, _, ModeType, _WL1)
|
|
cmp VIF_SAVEEBXd, 2
|
|
je @CatStr(name, _, sign, _, MaskType, _, ModeType, _WL2)
|
|
cmp VIF_SAVEEBXd, 3
|
|
je @CatStr(name, _, sign, _, MaskType, _, ModeType, _WL3)
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _WL4)
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _WL1):
|
|
@CatStr(UNPACK_Start_Setup_, MaskType, _SSE_, ModeType) 0
|
|
|
|
cmp VIF_SIZE, qsize
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Done3)
|
|
|
|
add VIF_INC, 16
|
|
|
|
;; first align VIF_SRC to 16 bytes
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Align16):
|
|
|
|
test VIF_SRC, 15
|
|
jz @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_UnpackAligned)
|
|
|
|
@CatStr(UNPACK_, name, SSE_1) 0, 1, MaskType, ModeType
|
|
|
|
cmp VIF_SIZE, (2*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_DoneWithDec)
|
|
sub VIF_SIZE, qsize
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Align16)
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_UnpackAligned):
|
|
|
|
cmp VIF_SIZE, (2*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack1)
|
|
cmp VIF_SIZE, (3*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack2)
|
|
cmp VIF_SIZE, (4*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack3)
|
|
prefetchnta [VIF_SRC + 64]
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack4):
|
|
@CatStr(UNPACK_, name, SSE_4A) 0, 1, MaskType, ModeType
|
|
|
|
cmp VIF_SIZE, (8*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_DoneUnpack4)
|
|
sub VIF_SIZE, (4*qsize)
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack4)
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_DoneUnpack4):
|
|
|
|
sub VIF_SIZE, (4*qsize)
|
|
cmp VIF_SIZE, qsize
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Done3)
|
|
cmp VIF_SIZE, (2*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack1)
|
|
cmp VIF_SIZE, (3*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack2)
|
|
;; fall through
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack3):
|
|
@CatStr(UNPACK_, name, SSE_3A) 0, 1, MaskType, ModeType
|
|
|
|
sub VIF_SIZE, (3*qsize)
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Done3)
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack2):
|
|
@CatStr(UNPACK_, name, SSE_2A) 0, 1, MaskType, ModeType
|
|
|
|
sub VIF_SIZE, (2*qsize)
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Done3)
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Unpack1):
|
|
@CatStr(UNPACK_, name, SSE_1A) 0, 1, MaskType, ModeType
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_DoneWithDec):
|
|
sub VIF_SIZE, qsize
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C1_Done3):
|
|
SAVE_ROW_REG
|
|
mov eax, VIF_SIZE
|
|
%ifdef __x86_64__
|
|
pop rdi
|
|
%else
|
|
pop ebx
|
|
pop esi
|
|
pop edi
|
|
%endif
|
|
ret
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _WL2):
|
|
cmp VIF_SIZE, (2*qsize)
|
|
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C2_Done3)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C2_Unpack):
|
|
@CatStr(UNPACK_, name, SSE_2) 0, 0, MaskType, ModeType
|
|
|
|
;; take into account wl
|
|
add VIF_DST, VIF_INC
|
|
cmp VIF_SIZE, (4*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C2_Done2)
|
|
sub VIF_SIZE, (2*qsize)
|
|
;; unpack next
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C2_Unpack)
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C2_Done2):
|
|
sub VIF_SIZE, (2*qsize)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C2_Done3):
|
|
cmp VIF_SIZE, qsize
|
|
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C2_Done4)
|
|
|
|
;; execute left over qw
|
|
@CatStr(UNPACK_, name, SSE_1) 0, 0, MaskType, ModeType
|
|
|
|
sub VIF_SIZE, qsize
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C2_Done4):
|
|
|
|
SAVE_ROW_REG
|
|
mov eax, VIF_SIZE
|
|
%ifdef __x86_64__
|
|
pop rdi
|
|
%else
|
|
pop ebx
|
|
pop esi
|
|
pop edi
|
|
%endif
|
|
ret
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _WL3):
|
|
cmp VIF_SIZE, (3*qsize)
|
|
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Done5)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Unpack):
|
|
@CatStr(UNPACK_, name, SSE_3) 0, 0, MaskType, ModeType
|
|
|
|
add VIF_DST, VIF_INC
|
|
cmp VIF_SIZE, (6*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Done2)
|
|
sub VIF_SIZE, (3*qsize)
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Unpack)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Done2):
|
|
sub VIF_SIZE, (3*qsize)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Done5):
|
|
cmp VIF_SIZE, qsize
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Done4)
|
|
|
|
cmp VIF_SIZE, (2*qsize)
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Done3)
|
|
|
|
@CatStr(UNPACK_, name, SSE_2) 0, 0, MaskType, ModeType
|
|
|
|
sub VIF_SIZE, (2*qsize)
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Done4)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Done3):
|
|
sub VIF_SIZE, qsize
|
|
@CatStr(UNPACK_, name, SSE_1) 0, 0, MaskType, ModeType
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C3_Done4):
|
|
SAVE_ROW_REG
|
|
mov eax, VIF_SIZE
|
|
%ifdef __x86_64__
|
|
pop rdi
|
|
%else
|
|
pop ebx
|
|
pop esi
|
|
pop edi
|
|
%endif
|
|
ret
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _WL4):
|
|
sub VIF_SAVEEBX, 3
|
|
push VIF_INC
|
|
cmp VIF_SIZE, qsize
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Done)
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Unpack):
|
|
cmp VIF_SIZE, (3*qsize)
|
|
jge @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Unpack3)
|
|
cmp VIF_SIZE, (2*qsize)
|
|
jge @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Unpack2)
|
|
|
|
@CatStr(UNPACK_, name, SSE_1) 0, 0, MaskType, ModeType
|
|
|
|
;; not enough data left
|
|
sub VIF_SIZE, qsize
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Done)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Unpack2):
|
|
@CatStr(UNPACK_, name, SSE_2) 0, 0, MaskType, ModeType
|
|
|
|
;; not enough data left
|
|
sub VIF_SIZE, (2*qsize)
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Done)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Unpack3):
|
|
@CatStr(UNPACK_, name, SSE_3) 0, 0, MaskType, ModeType
|
|
|
|
;; more data left, process 1qw at a time
|
|
sub VIF_SIZE, (3*qsize)
|
|
mov VIF_INC, VIF_SAVEEBX
|
|
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_UnpackX):
|
|
|
|
;; check if any data left
|
|
cmp VIF_SIZE, qsize
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Done)
|
|
|
|
@CatStr(UNPACK_, name, SSE_1) 3, 0, MaskType, ModeType
|
|
|
|
sub VIF_SIZE, qsize
|
|
cmp VIF_INC, 1
|
|
je @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_DoneLoop)
|
|
sub VIF_INC, 1
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_UnpackX)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_DoneLoop):
|
|
add VIF_DST, [VIF_ESP]
|
|
cmp VIF_SIZE, qsize
|
|
jl @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Done)
|
|
jmp @CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Unpack)
|
|
@CatStr(name, _, sign, _, MaskType, _, ModeType, _C4_Done):
|
|
|
|
SAVE_ROW_REG
|
|
INC_STACK()
|
|
mov eax, VIF_SIZE
|
|
|
|
%ifdef __x86_64__
|
|
pop rdi
|
|
%else
|
|
pop ebx
|
|
pop esi
|
|
pop edi
|
|
%endif
|
|
ret
|
|
@CatStr(UNPACK_SkippingWrite_, name, _, sign, _, MaskType, _, ModeType endp)
|
|
endm
|
|
|
|
UNPACK_RIGHTSHIFT macro reg, shift
|
|
psrld reg, shift
|
|
endm
|
|
|
|
defUNPACK_SkippingWrite2 macro name, qsize
|
|
defUNPACK_SkippingWrite name, Regular, 0, qsize, u, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, Regular, 1, qsize, u, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, Regular, 2, qsize, u, SAVE_ROW_REG_BASE
|
|
defUNPACK_SkippingWrite name, Mask, 0, qsize, u, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, Mask, 1, qsize, u, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, Mask, 2, qsize, u, SAVE_ROW_REG_BASE
|
|
defUNPACK_SkippingWrite name, WriteMask, 0, qsize, u, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, WriteMask, 1, qsize, u, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, WriteMask, 2, qsize, u, SAVE_ROW_REG_BASE
|
|
endm
|
|
|
|
defUNPACK_SkippingWrite2 S_32, 4
|
|
defUNPACK_SkippingWrite2 S_16, 2
|
|
defUNPACK_SkippingWrite2 S_8, 1
|
|
defUNPACK_SkippingWrite2 V2_32, 8
|
|
defUNPACK_SkippingWrite2 V2_16, 4
|
|
defUNPACK_SkippingWrite2 V2_8, 2
|
|
defUNPACK_SkippingWrite2 V3_32, 12
|
|
defUNPACK_SkippingWrite2 V3_16, 6
|
|
defUNPACK_SkippingWrite2 V3_8, 3
|
|
defUNPACK_SkippingWrite2 V4_32, 16
|
|
defUNPACK_SkippingWrite2 V4_16, 8
|
|
defUNPACK_SkippingWrite2 V4_8, 4
|
|
defUNPACK_SkippingWrite2 V4_5, 2
|
|
|
|
UNPACK_RIGHTSHIFT macro reg, shift
|
|
psrad reg, shift
|
|
endm
|
|
|
|
|
|
defUNPACK_SkippingWrite2a macro name, qsize
|
|
defUNPACK_SkippingWrite name, Mask, 0, qsize, s, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, Regular, 0, qsize, s, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, Regular, 1, qsize, s, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, Regular, 2, qsize, s, SAVE_ROW_REG_BASE
|
|
defUNPACK_SkippingWrite name, Mask, 1, qsize, s, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, Mask, 2, qsize, s, SAVE_ROW_REG_BASE
|
|
defUNPACK_SkippingWrite name, WriteMask, 0, qsize, s, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, WriteMask, 1, qsize, s, SAVE_NO_REG
|
|
defUNPACK_SkippingWrite name, WriteMask, 2, qsize, s, SAVE_ROW_REG_BASE
|
|
endm
|
|
|
|
defUNPACK_SkippingWrite2a S_16, 2
|
|
defUNPACK_SkippingWrite2a S_8, 1
|
|
defUNPACK_SkippingWrite2a V2_16, 4
|
|
defUNPACK_SkippingWrite2a V2_8, 2
|
|
defUNPACK_SkippingWrite2a V3_16, 6
|
|
defUNPACK_SkippingWrite2a V3_8, 3
|
|
defUNPACK_SkippingWrite2a V4_16, 8
|
|
defUNPACK_SkippingWrite2a V4_8, 4
|
|
|
|
end
|