Glide64: Now able to compile

This commit is contained in:
zilmar 2013-04-09 22:02:27 +10:00
parent 6d1519faeb
commit e015346578
33 changed files with 4603 additions and 4760 deletions

View File

@ -203,17 +203,400 @@ TRANSFORMVECTOR InverseTransformVector = InverseTransformVectorC;
DOTPRODUCT DotProduct = DotProductC;
NORMALIZEVECTOR NormalizeVector = NormalizeVectorC;
extern "C" void TransformVectorSSE(float *src, float *dst, float mat[4][4]);
extern "C" void TransformVector3DNOW(float *src, float *dst, float mat[4][4]);
extern "C" void InverseTransformVector3DNOW(float *src, float *dst, float mat[4][4]);
extern "C" void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4]);
extern "C" void MulMatrices3DNOW(float m1[4][4],float m2[4][4],float r[4][4]);
extern "C" float DotProductSSE3(register float *v1, register float *v2);
void TransformVectorSSE(float *src, float *dst, float mat[4][4])
{
__asm
{
mov ecx,[src]
mov eax,[dst]
mov edx,[mat]
movss xmm0,[ecx] ; 0 0 0 src[0]
movss xmm5,[edx] ; 0 0 0 mat[0][0]
movhps xmm5,[edx+4] ; mat[0][2] mat[0][1] 0 mat[0][0]
shufps xmm0,xmm0, 0 ; src[0] src[0] src[0] src[0]
movss xmm1,[ecx+4] ; 0 0 0 src[1]
movss xmm3,[edx+16] ; 0 0 0 mat[1][0]
movhps xmm3,[edx+20] ; mat[1][2] mat[1][1] 0 mat[1][0]
shufps xmm1,xmm1, 0 ; src[1] src[1] src[1] src[1]
mulps xmm0,xmm5 ; mat[0][2]*src[0] mat[0][1]*src[0] 0 mat[0][0]*src[0]
mulps xmm1,xmm3 ; mat[1][2]*src[1] mat[1][1]*src[1] 0 mat[1][0]*src[1]
movss xmm2,[ecx+8] ; 0 0 0 src[2]
shufps xmm2,xmm2, 0 ; src[2] src[2] src[2] src[2]
movss xmm4,[edx+32] ; 0 0 0 mat[2][0]
movhps xmm4,[edx+36] ; mat[2][2] mat[2][1] 0 mat[2][0]
addps xmm0,xmm1 ; mat[0][2]*src[0]+mat[1][2]*src[1] mat[0][1]*src[0]+mat[1][1]*src[1] 0 mat[0][0]*src[0]+mat[1][0]*src[1]
mulps xmm2,xmm4 ; mat[2][2]*src[2] mat[2][1]*src[2] 0 mat[2][0]*src[2]
addps xmm0,xmm2 ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2] mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2] 0 mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
movss [eax],xmm0 ; mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
movhps [eax+4],xmm0 ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2] mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2]
}
}
void TransformVector3DNOW(float *src, float *dst, float mat[4][4])
{
_asm {
femms
mov ecx,[src]
mov eax,[dst]
mov edx,[mat]
movq mm0,[ecx] ; src[1] src[0]
movd mm2,[ecx+8] ; 0 src[2]
movq mm1,mm0 ; src[1] src[0]
punpckldq mm0,mm0 ; src[0] src[0]
punpckhdq mm1,mm1 ; src[1] src[1]
punpckldq mm2,mm2 ; src[2] src[2]
movq mm3,mm0 ; src[0] src[0]
movq mm4,mm1 ; src[1] src[1]
movq mm5,mm2 ; src[2] src[2]
pfmul mm0,[edx] ; src[0]*mat[0][1] src[0]*mat[0][0]
pfmul mm3,[edx+8] ; 0 src[0]*mat[0][2]
pfmul mm1,[edx+16] ; src[1]*mat[1][1] src[1]*mat[1][0]
pfmul mm4,[edx+24] ; 0 src[1]*mat[1][2]
pfmul mm2,[edx+32] ; src[2]*mat[2][1] src[2]*mat[2][0]
pfmul mm5,[edx+40] ; 0 src[2]*mat[2][2]
pfadd mm0,mm1 ; src[0]*mat[0][1]+src[1]*mat[1][1] src[0]*mat[0][0]+src[1]*mat[1][0]
pfadd mm3,mm4 ; 0 src[0]*mat[0][2]+src[1]*mat[1][2]
pfadd mm0,mm2 ; src[0]*mat[0][1]+src[1]*mat[1][1]+src[2]*mat[2][1] src[0]*mat[0][0]+src[1]*mat[1][0]+src[2]*mat[2][0]
pfadd mm3,mm5 ; 0 src[0]*mat[0][2]+src[1]*mat[1][2]+src[2]*mat[2][2]
movq [eax],mm0 ; mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2] mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
movd [eax+8],mm3 ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2]
femms
}
}
void InverseTransformVector3DNOW(float *src, float *dst, float mat[4][4])
{
_asm {
femms
mov ecx,[src]
mov eax,[dst]
mov edx,[mat]
movq mm0,[ecx] ; src[1] src[0]
movd mm4,[ecx+8] ; 0 src[2]
movq mm1,mm0 ; src[1] src[0]
pfmul mm0,[edx] ; src[1]*mat[0][1] src[0]*mat[0][0]
movq mm5,mm4 ; 0 src[2]
pfmul mm4,[edx+8] ; 0 src[2]*mat[0][2]
movq mm2,mm1 ; src[1] src[0]
pfmul mm1,[edx+16] ; src[1]*mat[1][1] src[0]*mat[1][0]
movq mm6,mm5 ; 0 src[2]
pfmul mm5,[edx+24] ; 0 src[2]*mat[1][2]
movq mm3,mm2 ; src[1] src[0]
pfmul mm2,[edx+32] ; src[1]*mat[2][1] src[0]*mat[2][0]
movq mm7,mm6 ; 0 src[2]
pfmul mm6,[edx+40] ; 0 src[2]*mat[2][2]
pfacc mm0,mm4 ; src[2]*mat[0][2] src[1]*mat[0][1]+src[0]*mat[0][0]
pfacc mm1,mm5 ; src[2]*mat[1][2] src[1]*mat[1][1]+src[0]*mat[1][0]
pfacc mm2,mm6 ; src[2]*mat[2][2] src[1]*mat[2][1]+src[0]*mat[2][0]
pfacc mm0,mm1 ; src[2]*mat[1][2]+src[1]*mat[1][1]+src[0]*mat[1][0] src[2]*mat[0][2]+src[1]*mat[0][1]+src[0]*mat[0][0]
pfacc mm2,mm3 ; 0 src[2]*mat[2][2]+src[1]*mat[2][1]+src[0]*mat[2][0]
movq [eax],mm0 ; mat[1][0]*src[0]+mat[1][1]*src[1]+mat[1][2]*src[2] mat[0][0]*src[0]+mat[0][1]*src[1]+mat[0][2]*src[2]
movd [eax+8],mm2 ; mat[2][0]*src[0]+mat[2][1]*src[1]+mat[2][2]*src[2]
femms
}
}
void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
{
__asm
{
mov eax,[r]
mov ecx,[m1]
mov edx,[m2]
movaps xmm0,[edx]
movaps xmm1,[edx+16]
movaps xmm2,[edx+32]
movaps xmm3,[edx+48]
; r[0][0],r[0][1],r[0][2],r[0][3]
movaps xmm4,[ecx]
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps [eax],xmm4
; r[1][0],r[1][1],r[1][2],r[1][3]
movaps xmm4,[ecx+16]
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps [eax+16],xmm4
; r[2][0],r[2][1],r[2][2],r[2][3]
movaps xmm4,[ecx+32]
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps [eax+32],xmm4
; r[3][0],r[3][1],r[3][2],r[3][3]
movaps xmm4,[ecx+48]
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps [eax+48],xmm4
}
}
void MulMatrices3DNOW(float m1[4][4],float m2[4][4],float r[4][4])
{
_asm
{
femms
mov ecx,[m1]
mov eax,[r]
mov edx,[m2]
movq mm0,[ecx]
movq mm1,[ecx+8]
movq mm4,[edx]
punpckhdq mm2,mm0
movq mm5,[edx+16]
punpckhdq mm3,mm1
movq mm6,[edx+32]
punpckldq mm0,mm0
punpckldq mm1,mm1
pfmul mm4,mm0
punpckhdq mm2,mm2
pfmul mm0,[edx+8]
movq mm7,[edx+48]
pfmul mm5,mm2
punpckhdq mm3,mm3
pfmul mm2,[edx+24]
pfmul mm6,mm1
pfadd mm5,mm4
pfmul mm1,[edx+40]
pfadd mm2,mm0
pfmul mm7,mm3
pfadd mm6,mm5
pfmul mm3,[edx+56]
pfadd mm2,mm1
pfadd mm7,mm6
movq mm0,[ecx+16]
pfadd mm3,mm2
movq mm1,[ecx+24]
movq [eax],mm7
movq mm4,[edx]
movq [eax+8],mm3
punpckhdq mm2,mm0
movq mm5,[edx+16]
punpckhdq mm3,mm1
movq mm6,[edx+32]
punpckldq mm0,mm0
punpckldq mm1,mm1
pfmul mm4,mm0
punpckhdq mm2,mm2
pfmul mm0,[edx+8]
movq mm7,[edx+48]
pfmul mm5,mm2
punpckhdq mm3,mm3
pfmul mm2,[edx+24]
pfmul mm6,mm1
pfadd mm5,mm4
pfmul mm1,[edx+40]
pfadd mm2,mm0
pfmul mm7,mm3
pfadd mm6,mm5
pfmul mm3,[edx+56]
pfadd mm2,mm1
pfadd mm7,mm6
movq mm0,[ecx+32]
pfadd mm3,mm2
movq mm1,[ecx+40]
movq [eax+16],mm7
movq mm4,[edx]
movq [eax+24],mm3
punpckhdq mm2,mm0
movq mm5,[edx+16]
punpckhdq mm3,mm1
movq mm6,[edx+32]
punpckldq mm0,mm0
punpckldq mm1,mm1
pfmul mm4,mm0
punpckhdq mm2,mm2
pfmul mm0,[edx+8]
movq mm7,[edx+48]
pfmul mm5,mm2
punpckhdq mm3,mm3
pfmul mm2,[edx+24]
pfmul mm6,mm1
pfadd mm5,mm4
pfmul mm1,[edx+40]
pfadd mm2,mm0
pfmul mm7,mm3
pfadd mm6,mm5
pfmul mm3,[edx+56]
pfadd mm2,mm1
pfadd mm7,mm6
movq mm0,[ecx+48]
pfadd mm3,mm2
movq mm1,[ecx+56]
movq [eax+32],mm7
movq mm4,[edx]
movq [eax+40],mm3
punpckhdq mm2,mm0
movq mm5,[edx+16]
punpckhdq mm3,mm1
movq mm6,[edx+32]
punpckldq mm0,mm0
punpckldq mm1,mm1
pfmul mm4,mm0
punpckhdq mm2,mm2
pfmul mm0,[edx+8]
movq mm7,[edx+48]
pfmul mm5,mm2
punpckhdq mm3,mm3
pfmul mm2,[edx+24]
pfmul mm6,mm1
pfadd mm5,mm4
pfmul mm1,[edx+40]
pfadd mm2,mm0
pfmul mm7,mm3
pfadd mm6,mm5
pfmul mm3,[edx+56]
pfadd mm2,mm1
pfadd mm7,mm6
pfadd mm3,mm2
movq [eax+48],mm7
movq [eax+56],mm3
femms
}
}
float DotProductSSE3(register float *v1, register float *v2)
{
_asm {
mov eax,[v1]
mov edx,[v2]
movaps xmm0, [eax]
mulps xmm0, [edx]
haddps xmm0, xmm0
haddps xmm0, xmm0
; movss eax, xmm0
}
}
extern "C" float DotProduct3DNOW(register float *v1, register float *v2);
extern "C" void NormalizeVectorSSE(float *v);
extern "C" void NormalizeVector3DNOW(float *v);
extern "C" void DetectSIMD(int function, int * iedx, int * iecx);
void NormalizeVector3DNOW(float *v)
{
_asm{
femms
mov edx,[v]
movq mm0,[edx]
movq mm3,[edx+8]
movq mm1,mm0
movq mm2,mm3
pfmul mm0,mm0
pfmul mm3,mm3
pfacc mm0,mm0
pfadd mm0,mm3
;movq mm4,mm0 ; prepare for 24bit precision
;punpckldq mm4,mm4 ; prepare for 24bit precision
pfrsqrt mm0,mm0 ; 15bit precision 1/sqrtf(v)
;movq mm3,mm0
;pfmul mm0,mm0
;pfrsqit1 mm0,mm4
;pfrcpit2 mm0,mm3 ; 24bit precision 1/sqrtf(v)
pfmul mm1,mm0
pfmul mm2,mm0
movq [edx],mm1
movq [edx+8],mm2
femms
}
}
void DetectSIMD(int func, int * iedx, int * iecx)
{
unsigned long reg, reg2;
__asm
{
mov eax, func
cpuid
mov reg, edx
mov reg2, ecx
}
if (iedx)
{
*iedx = reg;
}
if (iecx)
{
*iecx = reg2;
}
}
void math_init()
{

View File

@ -39,164 +39,15 @@
segment .text
proc DetectSIMD
%$func arg
%$iedx arg
%$iecx arg
mov eax,[ebp + %$func]
cpuid
mov eax,[ebp + %$iedx]
mov [eax],edx
mov eax,[ebp + %$iecx]
mov [eax],ecx
endproc ;DetectSIMD
;****************************************************************
;
; ******** SSE ********
;
;****************************************************************
proc TransformVectorSSE
CPU P3
%$src arg ; float *src
%$dst arg ; float *dst
%$mat arg ; float mat[4][4]
mov ecx,[ebp + %$src]
mov eax,[ebp + %$dst]
mov edx,[ebp + %$mat]
movss xmm0,[ecx] ; 0 0 0 src[0]
movss xmm5,[edx] ; 0 0 0 mat[0][0]
movhps xmm5,[edx+4] ; mat[0][2] mat[0][1] 0 mat[0][0]
shufps xmm0,xmm0, 0 ; src[0] src[0] src[0] src[0]
movss xmm1,[ecx+4] ; 0 0 0 src[1]
movss xmm3,[edx+16] ; 0 0 0 mat[1][0]
movhps xmm3,[edx+20] ; mat[1][2] mat[1][1] 0 mat[1][0]
shufps xmm1,xmm1, 0 ; src[1] src[1] src[1] src[1]
mulps xmm0,xmm5 ; mat[0][2]*src[0] mat[0][1]*src[0] 0 mat[0][0]*src[0]
mulps xmm1,xmm3 ; mat[1][2]*src[1] mat[1][1]*src[1] 0 mat[1][0]*src[1]
movss xmm2,[ecx+8] ; 0 0 0 src[2]
shufps xmm2,xmm2, 0 ; src[2] src[2] src[2] src[2]
movss xmm4,[edx+32] ; 0 0 0 mat[2][0]
movhps xmm4,[edx+36] ; mat[2][2] mat[2][1] 0 mat[2][0]
addps xmm0,xmm1 ; mat[0][2]*src[0]+mat[1][2]*src[1] mat[0][1]*src[0]+mat[1][1]*src[1] 0 mat[0][0]*src[0]+mat[1][0]*src[1]
mulps xmm2,xmm4 ; mat[2][2]*src[2] mat[2][1]*src[2] 0 mat[2][0]*src[2]
addps xmm0,xmm2 ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2] mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2] 0 mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
movss [eax],xmm0 ; mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
movhps [eax+4],xmm0 ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2] mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2]
endproc ;TransformVectorSSE
proc MulMatricesSSE
CPU P3
%$m1 arg ; float m1[4][4]
%$m2 arg ; float m2[4][4]
%$r arg ; float r[4][4]
mov eax,[ebp + %$r]
mov ecx,[ebp + %$m1]
mov edx,[ebp + %$m2]
movaps xmm0,[edx]
movaps xmm1,[edx+16]
movaps xmm2,[edx+32]
movaps xmm3,[edx+48]
; r[0][0],r[0][1],r[0][2],r[0][3]
movaps xmm4,[ecx]
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps [eax],xmm4
; r[1][0],r[1][1],r[1][2],r[1][3]
movaps xmm4,[ecx+16]
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps [eax+16],xmm4
; r[2][0],r[2][1],r[2][2],r[2][3]
movaps xmm4,[ecx+32]
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps [eax+32],xmm4
; r[3][0],r[3][1],r[3][2],r[3][3]
movaps xmm4,[ecx+48]
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps [eax+48],xmm4
endproc ;MulMatricesSSE
proc NormalizeVectorSSE
@ -225,20 +76,6 @@ endproc ;NormalizeVectorSSE
;
;****************************************************************
proc DotProductSSE3
CPU PRESCOTT
%$v1 arg
%$v2 arg
mov eax,[ebp + %$v1]
mov edx,[ebp + %$v2]
movaps xmm0, [eax]
mulps xmm0, [edx]
haddps xmm0, xmm0
haddps xmm0, xmm0
; movss eax, xmm0
endproc ;DotProductSSE3
;****************************************************************
;
@ -246,205 +83,8 @@ endproc ;DotProductSSE3
;
;****************************************************************
proc TransformVector3DNOW
CPU 586
%$src arg ; float *src
%$dst arg ; float *dst
%$mat arg ; float mat[4][4]
femms
mov ecx,[ebp + %$src]
mov eax,[ebp + %$dst]
mov edx,[ebp + %$mat]
movq mm0,[ecx] ; src[1] src[0]
movd mm2,[ecx+8] ; 0 src[2]
movq mm1,mm0 ; src[1] src[0]
punpckldq mm0,mm0 ; src[0] src[0]
punpckhdq mm1,mm1 ; src[1] src[1]
punpckldq mm2,mm2 ; src[2] src[2]
movq mm3,mm0 ; src[0] src[0]
movq mm4,mm1 ; src[1] src[1]
movq mm5,mm2 ; src[2] src[2]
pfmul mm0,[edx] ; src[0]*mat[0][1] src[0]*mat[0][0]
pfmul mm3,[edx+8] ; 0 src[0]*mat[0][2]
pfmul mm1,[edx+16] ; src[1]*mat[1][1] src[1]*mat[1][0]
pfmul mm4,[edx+24] ; 0 src[1]*mat[1][2]
pfmul mm2,[edx+32] ; src[2]*mat[2][1] src[2]*mat[2][0]
pfmul mm5,[edx+40] ; 0 src[2]*mat[2][2]
pfadd mm0,mm1 ; src[0]*mat[0][1]+src[1]*mat[1][1] src[0]*mat[0][0]+src[1]*mat[1][0]
pfadd mm3,mm4 ; 0 src[0]*mat[0][2]+src[1]*mat[1][2]
pfadd mm0,mm2 ; src[0]*mat[0][1]+src[1]*mat[1][1]+src[2]*mat[2][1] src[0]*mat[0][0]+src[1]*mat[1][0]+src[2]*mat[2][0]
pfadd mm3,mm5 ; 0 src[0]*mat[0][2]+src[1]*mat[1][2]+src[2]*mat[2][2]
movq [eax],mm0 ; mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2] mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
movd [eax+8],mm3 ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2]
femms
endproc ;TransformVector3DNOW
proc InverseTransformVector3DNOW
CPU 586
%$src arg ; float *src
%$dst arg ; float *dst
%$mat arg ; float mat[4][4]
femms
mov ecx,[ebp + %$src]
mov eax,[ebp + %$dst]
mov edx,[ebp + %$mat]
movq mm0,[ecx] ; src[1] src[0]
movd mm4,[ecx+8] ; 0 src[2]
movq mm1,mm0 ; src[1] src[0]
pfmul mm0,[edx] ; src[1]*mat[0][1] src[0]*mat[0][0]
movq mm5,mm4 ; 0 src[2]
pfmul mm4,[edx+8] ; 0 src[2]*mat[0][2]
movq mm2,mm1 ; src[1] src[0]
pfmul mm1,[edx+16] ; src[1]*mat[1][1] src[0]*mat[1][0]
movq mm6,mm5 ; 0 src[2]
pfmul mm5,[edx+24] ; 0 src[2]*mat[1][2]
movq mm3,mm2 ; src[1] src[0]
pfmul mm2,[edx+32] ; src[1]*mat[2][1] src[0]*mat[2][0]
movq mm7,mm6 ; 0 src[2]
pfmul mm6,[edx+40] ; 0 src[2]*mat[2][2]
pfacc mm0,mm4 ; src[2]*mat[0][2] src[1]*mat[0][1]+src[0]*mat[0][0]
pfacc mm1,mm5 ; src[2]*mat[1][2] src[1]*mat[1][1]+src[0]*mat[1][0]
pfacc mm2,mm6 ; src[2]*mat[2][2] src[1]*mat[2][1]+src[0]*mat[2][0]
pfacc mm0,mm1 ; src[2]*mat[1][2]+src[1]*mat[1][1]+src[0]*mat[1][0] src[2]*mat[0][2]+src[1]*mat[0][1]+src[0]*mat[0][0]
pfacc mm2,mm3 ; 0 src[2]*mat[2][2]+src[1]*mat[2][1]+src[0]*mat[2][0]
movq [eax],mm0 ; mat[1][0]*src[0]+mat[1][1]*src[1]+mat[1][2]*src[2] mat[0][0]*src[0]+mat[0][1]*src[1]+mat[0][2]*src[2]
movd [eax+8],mm2 ; mat[2][0]*src[0]+mat[2][1]*src[1]+mat[2][2]*src[2]
femms
endproc ;InverseTransformVector3DNOW
proc MulMatrices3DNOW
CPU 586
%$m1 arg ; float m1[4][4]
%$m2 arg ; float m2[4][4]
%$r arg ; float r[4][4]
femms
mov ecx,[ebp + %$m1]
mov eax,[ebp + %$r]
mov edx,[ebp + %$m2]
movq mm0,[ecx]
movq mm1,[ecx+8]
movq mm4,[edx]
punpckhdq mm2,mm0
movq mm5,[edx+16]
punpckhdq mm3,mm1
movq mm6,[edx+32]
punpckldq mm0,mm0
punpckldq mm1,mm1
pfmul mm4,mm0
punpckhdq mm2,mm2
pfmul mm0,[edx+8]
movq mm7,[edx+48]
pfmul mm5,mm2
punpckhdq mm3,mm3
pfmul mm2,[edx+24]
pfmul mm6,mm1
pfadd mm5,mm4
pfmul mm1,[edx+40]
pfadd mm2,mm0
pfmul mm7,mm3
pfadd mm6,mm5
pfmul mm3,[edx+56]
pfadd mm2,mm1
pfadd mm7,mm6
movq mm0,[ecx+16]
pfadd mm3,mm2
movq mm1,[ecx+24]
movq [eax],mm7
movq mm4,[edx]
movq [eax+8],mm3
punpckhdq mm2,mm0
movq mm5,[edx+16]
punpckhdq mm3,mm1
movq mm6,[edx+32]
punpckldq mm0,mm0
punpckldq mm1,mm1
pfmul mm4,mm0
punpckhdq mm2,mm2
pfmul mm0,[edx+8]
movq mm7,[edx+48]
pfmul mm5,mm2
punpckhdq mm3,mm3
pfmul mm2,[edx+24]
pfmul mm6,mm1
pfadd mm5,mm4
pfmul mm1,[edx+40]
pfadd mm2,mm0
pfmul mm7,mm3
pfadd mm6,mm5
pfmul mm3,[edx+56]
pfadd mm2,mm1
pfadd mm7,mm6
movq mm0,[ecx+32]
pfadd mm3,mm2
movq mm1,[ecx+40]
movq [eax+16],mm7
movq mm4,[edx]
movq [eax+24],mm3
punpckhdq mm2,mm0
movq mm5,[edx+16]
punpckhdq mm3,mm1
movq mm6,[edx+32]
punpckldq mm0,mm0
punpckldq mm1,mm1
pfmul mm4,mm0
punpckhdq mm2,mm2
pfmul mm0,[edx+8]
movq mm7,[edx+48]
pfmul mm5,mm2
punpckhdq mm3,mm3
pfmul mm2,[edx+24]
pfmul mm6,mm1
pfadd mm5,mm4
pfmul mm1,[edx+40]
pfadd mm2,mm0
pfmul mm7,mm3
pfadd mm6,mm5
pfmul mm3,[edx+56]
pfadd mm2,mm1
pfadd mm7,mm6
movq mm0,[ecx+48]
pfadd mm3,mm2
movq mm1,[ecx+56]
movq [eax+32],mm7
movq mm4,[edx]
movq [eax+40],mm3
punpckhdq mm2,mm0
movq mm5,[edx+16]
punpckhdq mm3,mm1
movq mm6,[edx+32]
punpckldq mm0,mm0
punpckldq mm1,mm1
pfmul mm4,mm0
punpckhdq mm2,mm2
pfmul mm0,[edx+8]
movq mm7,[edx+48]
pfmul mm5,mm2
punpckhdq mm3,mm3
pfmul mm2,[edx+24]
pfmul mm6,mm1
pfadd mm5,mm4
pfmul mm1,[edx+40]
pfadd mm2,mm0
pfmul mm7,mm3
pfadd mm6,mm5
pfmul mm3,[edx+56]
pfadd mm2,mm1
pfadd mm7,mm6
pfadd mm3,mm2
movq [eax+48],mm7
movq [eax+56],mm3
femms
endproc ;MulMatrices3DNOW
proc DotProduct3DNOW
CPU 586
@ -467,31 +107,3 @@ CPU 586
endproc ;DotProduct3DNOW
proc NormalizeVector3DNOW
CPU 586
%$v arg
femms
mov edx,[ebp + %$v]
movq mm0,[edx]
movq mm3,[edx+8]
movq mm1,mm0
movq mm2,mm3
pfmul mm0,mm0
pfmul mm3,mm3
pfacc mm0,mm0
pfadd mm0,mm3
;movq mm4,mm0 ; prepare for 24bit precision
;punpckldq mm4,mm4 ; prepare for 24bit precision
pfrsqrt mm0,mm0 ; 15bit precision 1/sqrtf(v)
;movq mm3,mm0
;pfmul mm0,mm0
;pfrsqit1 mm0,mm4
;pfrcpit2 mm0,mm3 ; 24bit precision 1/sqrtf(v)
pfmul mm1,mm0
pfmul mm2,mm0
movq [edx],mm1
movq [edx+8],mm2
femms
endproc ;NormalizeVector3DNOW

View File

@ -1244,7 +1244,7 @@ void CALL DllConfig ( HWND hParent )
Glide64Config->ShowModal();
}
#ifndef _DEBUG
/*#ifndef _DEBUG
//#if 1
#ifndef __GNUG__
void wxStringData::Free()
@ -1253,7 +1253,7 @@ void wxStringData::Free()
}
#endif
#endif
*/
void CloseConfig()
{
if (romopen)

View File

@ -44,6 +44,8 @@
// -*- C++ -*- generated by wxGlade 0.6.3 on Wed Oct 08 18:56:23 2008
#define wxNO_QA_LIB
#include <wx/wx.h>
#include <wx/image.h>
// begin wxGlade: ::dependencies

View File

@ -85,9 +85,37 @@ static int right_height, left_height;
static int right_x, right_dxdy, left_x, left_dxdy;
static int left_z, left_dzdy;
extern "C" int imul16(int x, int y);
extern "C" int imul14(int x, int y);
extern "C" int idiv16(int x, int y);
int imul16(int x, int y)
{
_asm {
mov eax, [x]
mov edx, [y]
imul edx
shrd eax,edx,16
}
}
int imul14(int x, int y)
{
_asm {
mov eax, [x]
mov edx, [y]
imul edx
shrd eax,edx,14
}
}
int idiv16(int x, int y)
{
_asm {
mov eax, [x]
mov ebx, [y]
mov edx,eax
sar edx,16
shl eax,16
idiv ebx
}
}
__inline int iceil(int x)
{
@ -95,7 +123,7 @@ __inline int iceil(int x)
return (x >> 16);
}
static void RightSection(void)
void RightSection(void)
{
// Walk backwards trough the vertex array

View File

@ -47,6 +47,7 @@
#ifdef __MSC__
typedef __int64 int64;
typedef unsigned __int64 uint64;
typedef unsigned char boolean;
#else
typedef long long int64;
typedef unsigned long long uint64;

View File

@ -39,30 +39,6 @@
segment .text
; (x * y) >> 16
proc imul16
CPU 586
%$x arg
%$y arg
mov eax, [ebp + %$x]
mov edx, [ebp + %$y]
imul edx
shrd eax,edx,16
endproc ;imul16
;(x * y) >> 14
proc imul14
CPU 586
%$x arg
%$y arg
mov eax, [ebp + %$x]
mov edx, [ebp + %$y]
imul edx
shrd eax,edx,14
endproc ;imul14
;(x << 16) / y
@ -71,11 +47,5 @@ CPU 586
%$x arg
%$y arg
mov eax, [ebp + %$x]
mov ebx, [ebp + %$y]
mov edx,eax
sar edx,16
shl eax,16
idiv ebx
endproc ;idiv16

View File

@ -62,6 +62,25 @@ the plugin
#ifndef _GFX_H_INCLUDED__
#define _GFX_H_INCLUDED__
#define wxNO_GL_LIB
#define wxNO_HTML_LIB
#define wxNO_ADV_LIB
#define wxNO_ZLIB_LIB
#define wxNO_TIFF_LIB
#define wxNO_EXPAT_LIB
#define wxNO_REGEX_LIB
#define wxNO_XML_LIB
#define wxNO_NET_LIB
#define wxNO_QA_LIB
#define wxNO_XRC_LIB
#define wxNO_AUI_LIB
#define wxNO_PROPGRID_LIB
#define wxNO_RIBBON_LIB
#define wxNO_RICHTEXT_LIB
#define wxNO_MEDIA_LIB
#define wxNO_STC_LIB
#include <wx/wx.h>
#include <wx/dynlib.h>
#include <wx/filename.h>
@ -75,6 +94,7 @@ the plugin
#include "Keys.h"
#if defined __VISUALC__
typedef unsigned char boolean;
#define GLIDE64_TRY __try
#define GLIDE64_CATCH __except (EXCEPTION_EXECUTE_HANDLER)
#else

View File

@ -0,0 +1,466 @@
<?xml version="1.0" encoding="windows-1251"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="Glide64"
ProjectGUID="{A4D13408-A794-4199-8FC7-4A9A32505005}"
RootNamespace="n64Glide"
TargetFrameworkVersion="131072"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
ConfigurationType="2"
InheritedPropertySheets="$(SolutionDir)PropertySheets/Win32.Debug.vsprops"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="&quot;$(Root)Source/3rd Party/wx/include&quot;;&quot;$(Root)Source/3rd Party/wx/include/msvc&quot;;inc"
PreprocessorDefinitions="__MSC__;"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="glide3x.lib"
OutputFile="$(Root)Plugin/GFX/Glide64_d.dll"
AdditionalLibraryDirectories="&quot;$(Root)Source\3rd Party\glide\lib&quot;;&quot;$(Root)Source\3rd Party\wx\lib\vc_lib&quot;"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
ConfigurationType="2"
InheritedPropertySheets="$(SolutionDir)PropertySheets/Win32.Release.vsprops"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="&quot;$(Root)Source/3rd Party/wx/include&quot;;&quot;$(Root)Source/3rd Party/wx/include/msvc&quot;;inc"
PreprocessorDefinitions="__MSC__"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="glide3x.lib"
OutputFile="$(Root)Plugin/GFX/Glide64.dll"
AdditionalLibraryDirectories="&quot;$(Root)Source\3rd Party\glide\lib&quot;;&quot;$(Root)Source\3rd Party\wx\lib\vc_lib&quot;"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="ucode"
>
<File
RelativePath="turbo3D.h"
>
</File>
<File
RelativePath="ucode.h"
>
</File>
<File
RelativePath="ucode00.h"
>
</File>
<File
RelativePath="ucode01.h"
>
</File>
<File
RelativePath="ucode02.h"
>
</File>
<File
RelativePath="ucode03.h"
>
</File>
<File
RelativePath="ucode04.h"
>
</File>
<File
RelativePath="ucode05.h"
>
</File>
<File
RelativePath="ucode06.h"
>
</File>
<File
RelativePath="ucode07.h"
>
</File>
<File
RelativePath="ucode08.h"
>
</File>
<File
RelativePath="ucode09.h"
>
</File>
<File
RelativePath="ucode09rdp.h"
>
</File>
<File
RelativePath="ucodeFB.h"
>
</File>
</Filter>
<Filter
Name="Texture"
>
<File
RelativePath="CRC.cpp"
>
</File>
<File
RelativePath="CRC.h"
>
</File>
<File
RelativePath="MiClWr16b.h"
>
</File>
<File
RelativePath="MiClWr32b.h"
>
</File>
<File
RelativePath="MiClWr8b.h"
>
</File>
<File
RelativePath="TexCache.cpp"
>
</File>
<File
RelativePath="TexCache.h"
>
</File>
<File
RelativePath=".\texConv.cpp"
>
</File>
<File
RelativePath="TexConv.h"
>
</File>
<File
RelativePath="TexLoad.h"
>
</File>
<File
RelativePath=".\TexLoad16b.cpp"
>
</File>
<File
RelativePath="TexLoad16b.h"
>
</File>
<File
RelativePath=".\TexLoad32b.cpp"
>
</File>
<File
RelativePath="TexLoad32b.h"
>
</File>
<File
RelativePath=".\TexLoad4b.cpp"
>
</File>
<File
RelativePath="TexLoad4b.h"
>
</File>
<File
RelativePath=".\TexLoad8b.cpp"
>
</File>
<File
RelativePath="TexLoad8b.h"
>
</File>
<File
RelativePath="TexMod.h"
>
</File>
<File
RelativePath="TexModCI.h"
>
</File>
<File
RelativePath="Texture.asm"
>
</File>
</Filter>
<Filter
Name="Config"
>
<File
RelativePath="Config.cpp"
>
</File>
<File
RelativePath="Config.h"
>
</File>
</Filter>
<Filter
Name="Doc"
Filter="txt"
>
<File
RelativePath="gpl.txt"
>
</File>
</Filter>
<Filter
Name="Icons"
>
<File
RelativePath="australia.xpm"
>
</File>
<File
RelativePath="brazil.xpm"
>
</File>
<File
RelativePath="france.xpm"
>
</File>
<File
RelativePath="japan.xpm"
>
</File>
<File
RelativePath="logo.xpm"
>
</File>
<File
RelativePath="russia.xpm"
>
</File>
<File
RelativePath="usa.xpm"
>
</File>
</Filter>
<Filter
Name="Res"
>
<File
RelativePath="cursor.h"
>
</File>
<File
RelativePath="font.h"
>
</File>
</Filter>
<File
RelativePath="3dmath.cpp"
>
</File>
<File
RelativePath="3dmath.h"
>
</File>
<File
RelativePath="3dmathSIMD.asm"
>
</File>
<File
RelativePath="Combine.cpp"
>
</File>
<File
RelativePath="Combine.h"
>
</File>
<File
RelativePath="Debugger.cpp"
>
</File>
<File
RelativePath="Debugger.h"
>
</File>
<File
RelativePath="DepthBufferRender.cpp"
>
</File>
<File
RelativePath="DepthBufferRender.h"
>
</File>
<File
RelativePath="Ext_TxFilter.cpp"
>
</File>
<File
RelativePath="Ext_TxFilter.h"
>
</File>
<File
RelativePath="FBtoScreen.cpp"
>
</File>
<File
RelativePath="FBtoScreen.h"
>
</File>
<File
RelativePath="FixedPoint.asm"
>
</File>
<File
RelativePath="Gfx #1.3.h"
>
</File>
<File
RelativePath="GlideExtensions.h"
>
</File>
<File
RelativePath="Keys.cpp"
>
</File>
<File
RelativePath="Keys.h"
>
</File>
<File
RelativePath="Main.cpp"
>
</File>
<File
RelativePath="rdp.cpp"
>
</File>
<File
RelativePath="rdp.h"
>
</File>
<File
RelativePath="TexBuffer.cpp"
>
</File>
<File
RelativePath="TexBuffer.h"
>
</File>
<File
RelativePath="Util.cpp"
>
</File>
<File
RelativePath="Util.h"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

Binary file not shown.

View File

@ -1 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><assembly manifestVersion="1.0" xmlns="urn:schemas-microsoft-com:asm.v1"></assembly>

Binary file not shown.

Binary file not shown.

View File

@ -1,518 +0,0 @@
<?xml version="1.0" encoding="windows-1251"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8,00"
Name="Glide64"
ProjectGUID="{A4D13408-A794-4199-8FC7-4A9A32505005}"
RootNamespace="n64Glide"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="2"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="_DEBUG"
MkTypLibCompatible="true"
SuppressStartupBanner="true"
TargetEnvironment="1"
TypeLibraryName=".\Debug/n64Glide.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="&quot;$(WXDIR)\lib\vc_lib\msw&quot;;&quot;$(WXDIR)\include&quot;;..\inc"
PreprocessorDefinitions="_CRT_SECURE_NO_WARNINGS;__MSC__;WIN32"
ExceptionHandling="2"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
PrecompiledHeaderFile=".\Debug/n64Glide.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="3"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1033"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="3dmathSIMD.obj Texture.obj FixedPoint.obj glide3x.lib wxmsw28d_core.lib wxbase28d.lib wxexpatd.lib wxjpegd.lib wxpngd.lib wxzlibd.lib winmm.lib comctl32.lib rpcrt4.lib wsock32.lib msvcrtd.lib"
OutputFile="$(OutDir)\Glide64.dll"
LinkIncremental="2"
SuppressStartupBanner="true"
AdditionalLibraryDirectories="..\lib;&quot;$(WXDIR)\lib\vc_lib&quot;"
IgnoreAllDefaultLibraries="false"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/Glide64.pdb"
ImportLibrary=".\Debug/Glide64.lib"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/n64Glide.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="D:\Games\N64\plugin\Glide64.dll"
IntermediateDirectory=".\Release"
ConfigurationType="2"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="NDEBUG"
MkTypLibCompatible="true"
SuppressStartupBanner="true"
TargetEnvironment="1"
TypeLibraryName=".\Release/n64Glide.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
EnableFiberSafeOptimizations="false"
AdditionalIncludeDirectories="&quot;$(WXDIR)\lib\vc_lib\msw&quot;;&quot;$(WXDIR)\include&quot;;..\inc"
PreprocessorDefinitions="_CRT_SECURE_NO_WARNINGS;__MSC__;WIN32"
StringPooling="true"
ExceptionHandling="2"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
EnableEnhancedInstructionSet="0"
PrecompiledHeaderFile=".\Release/n64Glide.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="3"
SuppressStartupBanner="true"
Detect64BitPortabilityProblems="false"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1033"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="3dmathSIMD.obj Texture.obj FixedPoint.obj glide3x.lib wxmsw28_core.lib wxbase28.lib wxexpat.lib wxjpeg.lib wxpng.lib wxzlib.lib winmm.lib comctl32.lib rpcrt4.lib wsock32.lib msvcrt.lib LIBCMT.lib"
OutputFile="$(OutDir)\Glide64.dll"
LinkIncremental="1"
SuppressStartupBanner="true"
AdditionalLibraryDirectories="..\lib;&quot;$(WXDIR)\lib\vc_lib&quot;"
GenerateManifest="true"
ProgramDatabaseFile=".\Release/Glide64.pdb"
ImportLibrary=".\Release/Glide64.lib"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/n64Glide.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="ucode"
>
<File
RelativePath="..\turbo3D.h"
>
</File>
<File
RelativePath="..\ucode.h"
>
</File>
<File
RelativePath="..\ucode00.h"
>
</File>
<File
RelativePath="..\ucode01.h"
>
</File>
<File
RelativePath="..\ucode02.h"
>
</File>
<File
RelativePath="..\ucode03.h"
>
</File>
<File
RelativePath="..\ucode04.h"
>
</File>
<File
RelativePath="..\ucode05.h"
>
</File>
<File
RelativePath="..\ucode06.h"
>
</File>
<File
RelativePath="..\ucode07.h"
>
</File>
<File
RelativePath="..\ucode08.h"
>
</File>
<File
RelativePath="..\ucode09.h"
>
</File>
<File
RelativePath="..\ucode09rdp.h"
>
</File>
<File
RelativePath="..\ucodeFB.h"
>
</File>
</Filter>
<Filter
Name="Texture"
>
<File
RelativePath="..\CRC.cpp"
>
</File>
<File
RelativePath="..\CRC.h"
>
</File>
<File
RelativePath="..\MiClWr16b.h"
>
</File>
<File
RelativePath="..\MiClWr32b.h"
>
</File>
<File
RelativePath="..\MiClWr8b.h"
>
</File>
<File
RelativePath="..\TexCache.cpp"
>
</File>
<File
RelativePath="..\TexCache.h"
>
</File>
<File
RelativePath="..\TexConv.h"
>
</File>
<File
RelativePath="..\TexLoad.h"
>
</File>
<File
RelativePath="..\TexLoad16b.h"
>
</File>
<File
RelativePath="..\TexLoad32b.h"
>
</File>
<File
RelativePath="..\TexLoad4b.h"
>
</File>
<File
RelativePath="..\TexLoad8b.h"
>
</File>
<File
RelativePath="..\TexMod.h"
>
</File>
<File
RelativePath="..\TexModCI.h"
>
</File>
<File
RelativePath="..\Texture.asm"
>
</File>
</Filter>
<Filter
Name="Config"
>
<File
RelativePath="..\Config.cpp"
>
</File>
<File
RelativePath="..\Config.h"
>
</File>
</Filter>
<Filter
Name="Doc"
Filter="txt"
>
<File
RelativePath="..\gpl.txt"
>
</File>
</Filter>
<Filter
Name="Icons"
>
<File
RelativePath="..\australia.xpm"
>
</File>
<File
RelativePath="..\brazil.xpm"
>
</File>
<File
RelativePath="..\france.xpm"
>
</File>
<File
RelativePath="..\japan.xpm"
>
</File>
<File
RelativePath="..\logo.xpm"
>
</File>
<File
RelativePath="..\russia.xpm"
>
</File>
<File
RelativePath="..\usa.xpm"
>
</File>
</Filter>
<Filter
Name="Res"
>
<File
RelativePath="..\cursor.h"
>
</File>
<File
RelativePath="..\font.h"
>
</File>
</Filter>
<File
RelativePath="..\3dmath.cpp"
>
</File>
<File
RelativePath="..\3dmath.h"
>
</File>
<File
RelativePath="..\3dmathSIMD.asm"
>
</File>
<File
RelativePath="..\Combine.cpp"
>
</File>
<File
RelativePath="..\Combine.h"
>
</File>
<File
RelativePath="..\Debugger.cpp"
>
</File>
<File
RelativePath="..\Debugger.h"
>
</File>
<File
RelativePath="..\DepthBufferRender.cpp"
>
</File>
<File
RelativePath="..\DepthBufferRender.h"
>
</File>
<File
RelativePath="..\Ext_TxFilter.cpp"
>
</File>
<File
RelativePath="..\Ext_TxFilter.h"
>
</File>
<File
RelativePath="..\FBtoScreen.cpp"
>
</File>
<File
RelativePath="..\FBtoScreen.h"
>
</File>
<File
RelativePath="..\FixedPoint.asm"
>
</File>
<File
RelativePath="..\Gfx #1.3.h"
>
</File>
<File
RelativePath="..\GlideExtensions.h"
>
</File>
<File
RelativePath="..\Keys.cpp"
>
</File>
<File
RelativePath="..\Keys.h"
>
</File>
<File
RelativePath="..\Main.cpp"
>
</File>
<File
RelativePath="..\rdp.cpp"
>
</File>
<File
RelativePath="..\rdp.h"
>
</File>
<File
RelativePath="..\TexBuffer.cpp"
>
</File>
<File
RelativePath="..\TexBuffer.h"
>
</File>
<File
RelativePath="..\Util.cpp"
>
</File>
<File
RelativePath="..\Util.h"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -36,7 +36,6 @@
// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
//
//****************************************************************
#include "Gfx #1.3.h"
#include <wx/fileconf.h>
#include <wx/wfstream.h>
@ -54,7 +53,12 @@
int ghq_dmptex_toggle_key = 0;
#endif
#define G64_VERSION "'Final' "
#ifdef _DEBUG
#define G64_VERSION "For PJ64 (Debug) "
#else
#define G64_VERSION "For PJ64 "
#endif
#define RELTIME "Date: " __DATE__// " Time: " __TIME__
#ifdef EXT_LOGGING
@ -1290,6 +1294,8 @@ int DllUnload(void)
}
#ifdef __WINDOWS__
void wxSetInstance(HINSTANCE hInstance);
extern "C" int WINAPI DllMain (HINSTANCE hinstDLL,
wxUint32 fdwReason,
LPVOID lpReserved)

View File

@ -37,9 +37,9 @@
//
//****************************************************************
extern "C" void asmMirror16bS (int tex, int start, int width, int height, int mask, int line, int full, int count);
extern "C" void asmWrap16bS (int tex, int start, int height, int mask, int line, int full, int count);
extern "C" void asmClamp16bS (int tex, int constant, int height,int line, int full, int count);
void asmMirror16bS (int tex, int start, int width, int height, int mask, int line, int full, int count);
void asmWrap16bS (int tex, int start, int height, int mask, int line, int full, int count);
void asmClamp16bS (int tex, int constant, int height,int line, int full, int count);
//****************************************************************
// 16-bit Horizontal Mirror

View File

@ -41,9 +41,9 @@
//
//****************************************************************
extern "C" void asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count);
extern "C" void asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count);
extern "C" void asmClamp32bS (int tex, int constant, int height,int line, int full, int count);
void asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count);
void asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count);
void asmClamp32bS (int tex, int constant, int height,int line, int full, int count);
//****************************************************************
// 32-bit Horizontal Mirror

View File

@ -40,9 +40,9 @@
//****************************************************************
// 8-bit Horizontal Mirror
extern "C" void asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count);
extern "C" void asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count);
extern "C" void asmClamp8bS (int tex, int constant, int height,int line, int full, int count);
void asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count);
void asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count);
void asmClamp8bS (int tex, int constant, int height,int line, int full, int count);
void Mirror8bS (wxUint32 tex, wxUint32 mask, wxUint32 max_width, wxUint32 real_width, wxUint32 height)
{

View File

@ -149,7 +149,48 @@ void ClearCache ()
//****************************************************************
// GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found
extern "C" int asmTextureCRC(int addr, int width, int height, int line);
int asmTextureCRC(int addr, int width, int height, int line)
{
_asm {
push ebx
push edi
xor eax,eax ; eax is final result
mov ebx,[line]
mov ecx,[height] ; ecx is height counter
mov edi,[addr] ; edi is ptr to texture memory
crc_loop_y:
push ecx
mov ecx,[width]
crc_loop_x:
add eax,[edi] ; MUST be 64-bit aligned, so manually unroll
add eax,[edi+4]
mov edx,ecx
mul edx
add eax,edx
add edi,8
dec ecx
jnz crc_loop_x
pop ecx
mov edx,ecx
mul edx
add eax,edx
add edi,ebx
dec ecx
jnz crc_loop_y
pop edi
pop ebx
}
}
void GetTexInfo (int id, int tile)
{
FRDP (" | |-+ GetTexInfo (id: %d, tile: %d)\n", id, tile);

View File

@ -37,10 +37,10 @@
//
//****************************************************************
extern "C" void asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
extern "C" void asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
extern "C" void asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
extern "C" void asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
void asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
void asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
void asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
void asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
void TexConv_ARGB1555_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
{

View File

@ -0,0 +1,303 @@
#include "Gfx #1.3.h"
/*****************************************************************
16b textures load
*****************************************************************/
/*****************************************************************
; Size: 2, Format: 0
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read both pixels
mov ebx,[esi+4] ; read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov ebx,[esi] ; read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/****************************************************************
; Size: 2, Format: 3
;
; ** by Gugaman/Dave2001 **
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read both pixels
mov ebx,[esi+4] ; read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov ebx,[esi] ; read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
;16b textures mirror/clamp/wrap
;*****************************************************************/
void asmMirror16bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
shl eax,1
and eax,[mask]
add esi,eax
mov ax,[esi]
mov [edi],ax
add edi,2
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
shl eax,1
and eax,[mask]
sub esi,eax
mov ax,[esi]
mov [edi],ax
add edi,2
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmWrap16bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmClamp16bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov ax,[esi]
mov edx,[count]
x_loop:
mov [edi],ax ; don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
add edi,2
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
}
}

View File

@ -37,8 +37,8 @@
//
//****************************************************************
extern "C" void asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
extern "C" void asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
void asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
void asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
//****************************************************************
@ -133,4 +133,4 @@ wxUint32 Load16bYUV (wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int l
*(tex++) = c;
}
return (1 << 16) | GR_TEXFMT_RGB_565;
}
}

View File

@ -0,0 +1,139 @@
#include "Gfx #1.3.h"
/*****************************************************************
;32b textures mirror/clamp/wrap
;*****************************************************************/
void asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
shl eax,2
and eax,[mask]
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
shl eax,2
and eax,[mask]
sub esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmClamp32bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov eax,[esi]
mov edx,[count]
x_loop:
mov [edi],eax ; don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
add edi,4
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
}
}

1912
Source/Glide64/TexLoad4b.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -37,10 +37,10 @@
//
//****************************************************************
extern "C" void asmLoad4bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
extern "C" void asmLoad4bIAPal (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
extern "C" void asmLoad4bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
extern "C" void asmLoad4bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext);
void asmLoad4bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
void asmLoad4bIAPal (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
void asmLoad4bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
void asmLoad4bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext);
//****************************************************************
// Size: 0, Format: 2

View File

@ -0,0 +1,771 @@
#include "Gfx #1.3.h"
/*****************************************************************
;8b textures load
;****************************************************************/
/*****************************************************************
; Size: 1, Format: 2
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] ; read all 4 pixels
bswap eax
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
mov edx,esi
add edx,8
mov esi,[src]
sub edx,esi
and edx,0x7FF
add esi,edx
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
void asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] ; read all 4 pixels
bswap eax
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,8
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
; Size: 1, Format: 3
;
; ** by Gugaman **
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read all 4 pixels
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax ; save dword
add edi,4
mov eax,[esi] ; read all 4 pixels
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax ; save dword
add edi,4
; *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax ;save dword
add edi,4
mov eax,[esi] ; read both pixels
add esi,8
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax ;save dword
add edi,4
; *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
; Size: 1, Format: 4
;
; ** by Gugaman **
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read all 4 pixels
add esi,4
mov [edi],eax ; save dword
add edi,4
mov eax,[esi] ; read all 4 pixels
add esi,4
mov [edi],eax ; save dword
add edi,4
; *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov [edi],eax ;save dword
add edi,4
mov eax,[esi] ; read both pixels
add esi,8
mov [edi],eax ;save dword
add edi,4
; *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
;
; ******** Textures mirror/clamp/wrap ********
;
;*****************************************************************/
/*****************************************************************
;8b textures mirror/clamp/wrap
;*****************************************************************/
void asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm{
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
and eax,[mask]
add esi,eax
mov al,[esi]
mov [edi],al
inc edi
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
and eax,[mask]
sub esi,eax
mov al,[esi]
mov [edi],al
inc edi
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmClamp8bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov al,[esi]
mov edx,[count]
x_loop:
mov [edi],al ; don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
inc edi
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
}
}

View File

@ -37,10 +37,10 @@
//
//****************************************************************
extern "C" void asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
extern "C" void asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
extern "C" void asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
extern "C" void asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext);
void asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
void asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
void asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
void asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext);
//****************************************************************
// Size: 1, Format: 2

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -48,9 +48,34 @@
#include "FBtoScreen.h"
#include "CRC.h"
extern "C" void SwapBlock32 ();
extern "C" void SwapBlock64 ();
/*****************************************************************
; SwapBlock - swaps every other 32-bit word at addr
;
; ecx = num_words -> 0
; edi = addr -> end of dest
;*****************************************************************/
__declspec(naked) void SwapBlock32 ()
{
_asm {
push eax
push ebx
or ecx,ecx
jz swapblock32_end
swapblock32_loop:
mov eax,[edi]
mov ebx,[edi+4]
mov [edi],ebx
mov [edi+4],eax
add edi,8
dec ecx
jnz swapblock32_loop
swapblock32_end:
pop ebx
pop eax
ret
}
}
const int NumOfFormats = 3;
SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} };
@ -1825,7 +1850,165 @@ void setTBufTex(wxUint16 t_mem, wxUint32 cnt)
}
}
extern "C" void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, int swp);
/*****************************************************************
; CopyBlock - copies a block from base_addr+offset to dest_addr, while unswapping the
; data within.
;
; edi = dest_addr -> end of dest
; ecx = num_words
; esi = base_addr (preserved)
; edx = offset (preserved)
;*****************************************************************/
__declspec(naked) void CopyBlock ( void )
{
_asm {
push eax
push ebx
push esi
push edx
or ecx,ecx
jz near copyblock_end
push ecx
; first, set the source address and check if not on a dword boundary
push esi
push edx
mov ebx,edx
and edx,0FFFFFFFCh
add esi,edx
and ebx,3 ; ebx = # we DON'T need to copy
jz copyblock_copy
mov edx,4 ; ecx = # we DO need to copy
sub edx,ebx
; load the first word, accounting for swapping
mov eax,[esi]
add esi,4
copyblock_precopy_skip:
rol eax,8
dec ebx
jnz copyblock_precopy_skip
copyblock_precopy_copy:
rol eax,8
mov [edi],al
inc edi
dec edx
jnz copyblock_precopy_copy
mov eax,[esi]
add esi,4
bswap eax
mov [edi],eax
add edi,4
dec ecx ; 1 less word to copy
jz copyblock_postcopy
copyblock_copy:
mov eax,[esi]
bswap eax
mov [edi],eax
mov eax,[esi+4]
bswap eax
mov [edi+4],eax
add esi,8
add edi,8
dec ecx
jnz copyblock_copy
copyblock_postcopy:
pop edx
pop esi
pop ecx
; check again if on dword boundary
mov ebx,edx ; ebx = # we DO need to copy
and ebx,3
jz copyblock_end
shl ecx,3 ; ecx = num_words * 8
add edx,ecx
and edx,0FFFFFFFCh
add esi,edx
mov eax,[esi]
copyblock_postcopy_copy:
rol eax,8
mov [edi],al
inc edi
dec ebx
jnz copyblock_postcopy_copy
copyblock_end:
pop edx
pop esi
pop ebx
pop eax
ret
}
}
void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp)
{
_asm {
push ebx
push esi
push edi
; copy the data
mov esi,[src]
mov edi,[dst]
mov ecx,[cnt]
mov edx,[off]
call CopyBlock
; now swap it
mov eax,[cnt] ; eax = count remaining
xor edx,edx ; edx = dxt counter
mov edi,[dst]
mov ebx,[dxt]
xor ecx,ecx ; ecx = how much to copy
dxt_test:
add edi,8
dec eax
jz end_dxt_test
add edx,ebx
jns dxt_test
dxt_s_test:
inc ecx
dec eax
jz end_dxt_test
add edx,ebx
js dxt_s_test
; swap this data (ecx set, dst set)
call [swp] ; (ecx reset to 0 after)
jmp dxt_test ; and repeat
end_dxt_test:
; swap any remaining data
call [swp]
pop edi
pop esi
pop ebx
}
}
void LoadBlock32b(wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 lr_s, wxUint32 dxt);
static void rdp_loadblock()
{
@ -1917,7 +2100,61 @@ static void rdp_loadblock()
setTBufTex(rdp.tiles[tile].t_mem, cnt);
}
extern "C" void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end, int swap);
void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end, int swap)
{
_asm {
push ebx
push esi
push edi
; set initial values
mov edi,[dst]
mov ecx,[width]
mov esi,[src]
mov edx,[off]
xor ebx,ebx ; swap this line?
mov eax,[height]
loadtile_loop:
cmp [end],edi ; end of tmem: error
jc loadtile_end
; copy this line
push edi
push ecx
call CopyBlock
pop ecx
; swap it?
xor ebx,1
jnz loadtile_no_swap
; (ecx set, restore edi)
pop edi
push ecx
int 3
mov ecx,[swap]
call ecx
pop ecx
jmp loadtile_swap_end
loadtile_no_swap:
add sp,4 ; forget edi, we are already at the next position
loadtile_swap_end:
add edx,[line]
dec eax
jnz loadtile_loop
loadtile_end:
pop edi
pop esi
pop ebx
}
}
void LoadTile32b (wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 width, wxUint32 height);
static void rdp_loadtile()
{

248
Source/Glide64/texConv.cpp Normal file
View File

@ -0,0 +1,248 @@
#include "Gfx #1.3.h"
/*****************************************************************
;
; ******** Textures conversion ********
;
;*****************************************************************/
void asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; arrr rrgg gggb bbbb
; aaaa rrrr gggg bbbb
mov edx,eax
and eax,0x80008000
mov ebx,eax ; ebx = 0xa000000000000000
shr eax,1
or ebx,eax ; ebx = 0xaa00000000000000
shr eax,1
or ebx,eax ; ebx = 0xaaa0000000000000
shr eax,1
or ebx,eax ; ebx = 0xaaaa000000000000
mov eax,edx
and eax,0x78007800 ; eax = 0x0rrrr00000000000
shr eax,3 ; eax = 0x0000rrrr00000000
or ebx,eax ; ebx = 0xaaaarrrr00000000
mov eax,edx
and eax,0x03c003c0 ; eax = 0x000000gggg000000
shr eax,2 ; eax = 0x00000000gggg0000
or ebx,eax ; ebx = 0xaaaarrrrgggg0000
and edx,0x001e001e ; edx = 0x00000000000bbbb0
shr edx,1 ; edx = 0x000000000000bbbb
or ebx,edx ; ebx = 0xaaaarrrrggggbbbb
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}
void asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; aaaa aaaa iiii iiii
; aaaa rrrr gggg bbbb
mov edx,eax
and eax,0xF000F000 ; eax = 0xaaaa000000000000
mov ebx,eax ; ebx = 0xaaaa000000000000
and edx,0x00F000F0 ; edx = 0x00000000iiii0000
shl edx,4 ; edx = 0x0000iiii00000000
or ebx,edx ; ebx = 0xaaaaiiii00000000
shr edx,4 ; edx = 0x00000000iiii0000
or ebx,edx ; ebx = 0xaaaaiiiiiiii0000
shr edx,4 ; edx = 0x000000000000iiii
or ebx,edx ; ebx = 0xaaaaiiiiiiiiiiii
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}
void asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
; aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
; aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax ; eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,16 ; eax = aaaa1 iiii1 aaaa0 iiii0 0000 0000 0000 0000
and eax,0xFF000000 ; eax = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
mov ebx,eax ; ebx = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 ; eax = 0000 iiii1 0000 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 iiii1 0000 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 0000 iiii1 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 0000 0000 0000 0000
mov eax,edx ; eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,8 ; eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x0000FF00 ; eax = 0000 0000 0000 0000 aaaa0 iiii0 0000 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x00000F00 ; eax = 0000 0000 0000 0000 0000 iiii0 0000 0000
shr eax,4 ; eax = 0000 0000 0000 0000 0000 0000 iiii0 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000
shr eax,4 ; eax = 0000 0000 0000 0000 0000 0000 0000 iiii0
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0
mov [edi],ebx
add edi,4
mov eax,edx ; eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
and eax,0xFF000000 ; eax = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
mov ebx,eax ; ebx = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 ; eax = 0000 iiii3 0000 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 iiii3 0000 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa3 iiii3 iiii3 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 0000 iiii3 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa3 iiii3 iiii3 iiii3 0000 0000 0000 0000
; edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shr edx,8 ; edx = 0000 0000 aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1
and edx,0x0000FF00 ; edx = 0000 0000 0000 0000 aaaa2 iiii2 0000 0000
or ebx,edx ; ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000 0000
and edx,0x00000F00 ; edx = 0000 0000 0000 0000 0000 iiii2 0000 0000
shr edx,4 ; edx = 0000 0000 0000 0000 0000 0000 iiii2 0000
or ebx,edx ; ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000
shr edx,4 ; edx = 0000 0000 0000 0000 0000 0000 0000 iiii2
or ebx,edx ; ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}
void asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0
; aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
; aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax
and eax,0x0000F000 ; eax = 00 00 00 00 a1 00 00 00
shl eax,16 ; eax = a1 00 00 00 00 00 00 00
mov ebx,eax ; ebx = a1 00 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a1 a1 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a1 a1 a1 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a1 a1 a1 a1 00 00 00 00
mov eax,edx
and eax,0x000000F0 ; eax = 00 00 00 00 00 00 a0 00
shl eax,8 ; eax = 00 00 00 00 a0 00 00 00
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax ; ebx = a1 a1 a1 a1 a0 a0 a0 a0
mov [edi],ebx
add edi,4
mov eax,edx ; eax = a3 a3 a2 a2 a1 a1 a0 a0
and eax,0xF0000000 ; eax = a3 00 00 00 00 00 00 00
mov ebx,eax ; ebx = a3 00 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a3 a3 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a3 a3 a3 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a3 a3 a3 a3 00 00 00 00
and edx,0x00F00000 ; eax = 00 00 a2 00 00 00 00 00
shr edx,8 ; eax = 00 00 00 00 a2 00 00 00
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx ; ebx = a3 a3 a3 a3 a2 a2 a2 a2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}