FPPS4/sys/jit/kern_jit_ops_sse.pas

1018 lines
24 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

unit kern_jit_ops_sse;
{$mode ObjFPC}{$H+}
{$CALLING SysV_ABI_CDecl}
interface
implementation
uses
kern_thr,
x86_fpdbgdisas,
x86_jit,
kern_jit_ops,
kern_jit_asm,
kern_jit_ctx;
var
_SSE4aSupport:Boolean=False;
procedure _ins_op(var op:DWORD;i:Byte); inline;
begin
case op of
$00..$FF:
begin
op:=op or (DWORD(i) shl 8);
end;
$100..$FFFF:
begin
op:=op or (DWORD(i) shl 16);
end;
else
begin
op:=op or (DWORD(i) shl 24);
end;
end;
end;
procedure _ins_op(var desc:t_op_desc;i:Byte); inline;
begin
_ins_op(desc.mem_reg.op,i);
_ins_op(desc.reg_mem.op,i);
_ins_op(desc.reg_imm.op,i);
_ins_op(desc.reg_im8.op,i);
end;
procedure op_emit2_simd(var ctx:t_jit_context2;const desc:t_op_desc);
var
tmp:t_op_desc;
begin
tmp:=desc;
case ctx.dis.SimdOpcode of
soNone:;
so66:_ins_op(tmp,$66);
soF2:_ins_op(tmp,$F2);
soF3:_ins_op(tmp,$F3);
else
Assert(False);
end;
op_emit2(ctx,tmp);
end;
procedure op_emit2_simd_mem_reg(var ctx:t_jit_context2;hint:t_op_hint);
const
desc:t_op_desc=(
mem_reg:(op:0;opt:[not_os8,not_prefix]);
reg_mem:(opt:[not_impl]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[];
);
var
tmp:t_op_desc;
begin
tmp:=desc;
tmp.mem_reg.op:=ctx.dis.opcode;
tmp.hint:=hint;
op_emit2_simd(ctx,tmp);
end;
procedure op_emit2_simd_reg_mem(var ctx:t_jit_context2;hint:t_op_hint);
const
desc:t_op_desc=(
mem_reg:(opt:[not_impl]);
reg_mem:(op:0;opt:[not_os8,not_prefix]);
reg_imm:(op:0;opt:[not_os8,not_prefix]);
reg_im8:(op:0;opt:[not_os8,not_prefix]);
hint:[];
);
var
tmp:t_op_desc;
begin
tmp:=desc;
tmp.reg_mem.op:=ctx.dis.opcode;
tmp.reg_imm.op:=ctx.dis.opcode;
tmp.reg_im8.op:=ctx.dis.opcode;
tmp.hint:=hint;
op_emit2_simd(ctx,tmp);
end;
//
procedure op_reg_mem_rw(var ctx:t_jit_context2);
begin
if is_preserved(ctx.din) or is_memory(ctx.din) then
begin
op_emit2_simd_reg_mem(ctx,[his_rw]);
end else
begin
add_orig(ctx);
end;
end;
procedure op_reg_mem_wo(var ctx:t_jit_context2);
begin
if is_preserved(ctx.din) or is_memory(ctx.din) then
begin
op_emit2_simd_reg_mem(ctx,[his_wo]);
end else
begin
add_orig(ctx);
end;
end;
procedure op_reg_mem_ro(var ctx:t_jit_context2);
begin
if is_preserved(ctx.din) or is_memory(ctx.din) then
begin
op_emit2_simd_reg_mem(ctx,[his_ro]);
end else
begin
add_orig(ctx);
end;
end;
//
procedure op_mem_reg_mov_wo(var ctx:t_jit_context2);
begin
if is_preserved(ctx.din) or is_memory(ctx.din) then
begin
op_emit2_simd_mem_reg(ctx,[his_mov,his_wo]);
end else
begin
add_orig(ctx);
end;
end;
procedure op_reg_mem_mov_wo(var ctx:t_jit_context2);
begin
if is_preserved(ctx.din) or is_memory(ctx.din) then
begin
op_emit2_simd_reg_mem(ctx,[his_mov,his_wo]);
end else
begin
add_orig(ctx);
end;
end;
//
const
movsd_desc:t_op_desc=(
mem_reg:(op:$F20F11;opt:[not_os8,not_prefix]);
reg_mem:(op:$F20F10;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo];
);
procedure op_movsd(var ctx:t_jit_context2);
begin
if is_preserved(ctx.din) or is_memory(ctx.din) then
begin
op_emit2(ctx,movsd_desc);
end else
begin
add_orig(ctx);
end;
end;
const
movss_desc:t_op_desc=(
mem_reg:(op:$F30F11;opt:[not_os8,not_prefix]);
reg_mem:(op:$F30F10;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo];
);
procedure op_movss(var ctx:t_jit_context2);
begin
if is_preserved(ctx.din) or is_memory(ctx.din) then
begin
op_emit2(ctx,movss_desc);
end else
begin
add_orig(ctx);
end;
end;
const
mov_dq_desc:t_op_desc=(
mem_reg:(op:$0F7E;opt:[not_os8,not_prefix]);
reg_mem:(op:$0F6E;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo];
);
procedure op_movd_dq(var ctx:t_jit_context2);
begin
if is_preserved(ctx.din) or is_memory(ctx.din) then
begin
op_emit2_simd(ctx,mov_dq_desc);
end else
begin
add_orig(ctx);
end;
end;
const
movdqa_desc:t_op_desc=(
mem_reg:(op:$660F7F;opt:[not_os8,not_prefix]);
reg_mem:(op:$660F6F;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo,his_align];
);
procedure op_movdqa(var ctx:t_jit_context2);
begin
if is_memory(ctx.din) then
begin
op_emit2(ctx,movdqa_desc);
end else
begin
add_orig(ctx);
end;
end;
const
movdqu_desc:t_op_desc=(
mem_reg:(op:$F30F7F;opt:[not_os8,not_prefix]);
reg_mem:(op:$F30F6F;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo];
);
procedure op_movdqu(var ctx:t_jit_context2);
begin
if is_memory(ctx.din) then
begin
op_emit2(ctx,movdqu_desc);
end else
begin
add_orig(ctx);
end;
end;
//
const
movu_ps_pd_desc:t_op_desc=(
mem_reg:(op:$0F11;opt:[not_os8,not_prefix]);
reg_mem:(op:$0F10;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo];
);
procedure op_movu_ps_pd(var ctx:t_jit_context2);
begin
if is_memory(ctx.din) then
begin
op_emit2_simd(ctx,movu_ps_pd_desc);
end else
begin
add_orig(ctx);
end;
end;
//
const
mova_ps_pd_desc:t_op_desc=(
mem_reg:(op:$0F29;opt:[not_os8,not_prefix]);
reg_mem:(op:$0F28;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo,his_align];
);
procedure op_mova_ps_pd(var ctx:t_jit_context2);
begin
if is_memory(ctx.din) then
begin
op_emit2_simd(ctx,mova_ps_pd_desc);
end else
begin
add_orig(ctx);
end;
end;
//
const
movntdqa_desc:t_op_desc=(
mem_reg:(opt:[not_impl]);
reg_mem:(op:$660F382A;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo,his_align];
);
procedure op_movntdqa(var ctx:t_jit_context2);
begin
if is_memory(ctx.din) then
begin
op_emit2(ctx,movntdqa_desc);
end else
begin
add_orig(ctx);
end;
end;
//SSE4a
{
AMD64 Architecture
Programmers Manual
Volume 4:
128-Bit and 256-Bit
Media Instructions
}
procedure op_movnt_sd_ss(var ctx:t_jit_context2);
begin
op_emit2_simd_mem_reg(ctx,[his_mov,his_wo]);
end;
{
a = xmm0[0:63]
b = xmm1[0:63]
mask = 0xFFFFFFFFFFFFFFFF;
m = mask shl (64 - (idx + len));
m = m shr (64 - len);
m = m shl idx;
b = b shl idx;
b = b and m;
a = (not m) and a;
a = a or b;
xmm0[0:63] = a;
}
procedure movq_r_xmm(var ctx:t_jit_context2;reg0,reg1:TRegValue);
const
desc:t_op_type=(op:$660F7E;index:0);
begin
ctx.builder._RR(desc,reg0,reg1,reg0.ASize); //66 REX.W 0F 7E /r MOVQ r/m64, xmm
end;
procedure pinsrq(var ctx:t_jit_context2;reg0,reg1:TRegValue;imm8:Byte);
const
desc:t_op_type=(op:$660F3A22;index:0);
begin
ctx.builder._RRI8(desc,reg1,reg0,imm8,reg1.ASize);
end;
procedure pextrq(var ctx:t_jit_context2;reg0,reg1:TRegValue;imm8:Byte);
const
desc:t_op_type=(op:$660F3A16;index:0);
begin
ctx.builder._RRI8(desc,reg0,reg1,imm8,reg0.ASize);
end;
procedure op_insertq(var ctx:t_jit_context2);
var
len,idx:Int64;
mask:QWORD;
xmm_a,xmm_b:TRegValue;
a,b,m,t:TRegValue;
begin
xmm_a:=new_reg(ctx.din.Operand[1]);
xmm_b:=new_reg(ctx.din.Operand[2]);
a:=r_tmp0;
b:=r_tmp1;
m:=r_thrd;
with ctx.builder do
begin
if (ctx.din.OperCnt=4) then
begin
//insertq xmm0,xmm1,$10,$30
len:=0;
GetTargetOfs(ctx.din,ctx.code,3,len);
idx:=0;
GetTargetOfs(ctx.din,ctx.code,4,idx);
len:=len and $3F;
idx:=idx and $3F;
mask:=QWORD($FFFFFFFFFFFFFFFF);
mask:=mask shl (64 - (idx + len));
mask:=mask shr (64 - len);
mask:=mask shl idx;
if (mask=0) then
begin
//nop
Exit;
end;
if (mask=QWORD($FFFFFFFFFFFFFFFF)) then
begin
//special case
//b = xmm1[0:63]
movq_r_xmm(ctx,b,xmm_b);
//xmm0[0:63] = b;
pinsrq(ctx,xmm_a,b,0);
Exit;
end;
pushfq(os64);
{
//swap
xchgq(rbp,rax);
//load flags to al,ah
seto(al);
lahf;
}
if (classif_offset_u64(mask)=os64) then
begin
//64bit mask
movi64(m,mask);
end else
begin
//32bit zero extend
movi(new_reg_size(m,os32),mask);
end;
//b = xmm1[0:63]
movq_r_xmm(ctx,b,xmm_b);
if (idx<>0) then
begin
shli8(b,idx); // b = b shl idx
end;
end else
begin
//insertq xmm0,xmm1
pushfq(os64);
{
//swap
xchgq(rbp,rax);
//load flags to al,ah
seto(al);
lahf;
}
//PEXTRQ r/m64, xmm2, imm8
pextrq (ctx,m,xmm_b,1);
movq (b,m);
andi8se(b,$3F); // b = len = m[0]
t:=new_reg_size(a,os32);
movi (t,64); // a = 64 (zero extended)
subq (a,b); // a = (64 - len)
shri8 (m,8); // m[0] = 0
shli8 (m,8);
movq (b,a);
andi8se(b,$3F);
orq (m,b); // m[0] = (64 - len)
movq (b,m);
shri8 (b,8);
andi8se(b,$3F); // b = idx = m[1]
subq (a,b); // a = (64 - len - idx)
movi (b,-1); // b = 0xFFFFFFFFFFFFFFFF (sign extended to 64-bit)
shlx (b,b,a); // b = b shl (64 - idx - len)
shrx (b,b,m); // b = b shr (64 - len):[0x3F]
shri8 (m,8); // m[0] = m[1]
shlx (b,b,m); // b = b shl idx:[0x3F]
//reassign
//m -> a (idx)
//b -> m (mask)
//a -> b
t:=a;
a:=m;
m:=b;
b:=t;
//b = xmm1[0:63]
movq_r_xmm(ctx,b,xmm_b);
shlx (b,b,a); // b = b shl idx:[0x3F]
end;
//a = xmm0[0:63]
movq_r_xmm(ctx,a,xmm_a);
andq(b,m);
notq(m);
andq(a,m);
orq (a,b);
//xmm0[0:63] = a;
//PINSRQ xmm1, r/m64, imm8
pinsrq(ctx,xmm_a,a,0);
popfq(os64);
{
//store flags from al,ah
addi(al,127);
sahf;
//swap
xchgq(rbp,rax);
//restore rbp
movq(rbp,rsp);
}
//restore jit_frame
movq(r13,[GS +Integer(teb_thread)]);
leaq(r13,[r13+jit_frame_offset ]);
end;
end;
//SSE4a
const
movl_ps_pd_desc:t_op_desc=(
mem_reg:(op:$0F13;opt:[not_os8,not_prefix]);
reg_mem:(op:$0F12;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo];
);
procedure op_movl_ps_pd(var ctx:t_jit_context2);
begin
if is_memory(ctx.din) then
begin
op_emit2_simd(ctx,movl_ps_pd_desc);
end else
begin
add_orig(ctx);
end;
end;
//
const
movh_ps_pd_desc:t_op_desc=(
mem_reg:(op:$0F17;opt:[not_os8,not_prefix]);
reg_mem:(op:$0F16;opt:[not_os8,not_prefix]);
reg_imm:(opt:[not_impl]);
reg_im8:(opt:[not_impl]);
hint:[his_mov,his_wo];
);
procedure op_movh_ps_pd(var ctx:t_jit_context2);
begin
if is_memory(ctx.din) then
begin
op_emit2_simd(ctx,movh_ps_pd_desc);
end else
begin
add_orig(ctx);
end;
end;
procedure op_maskmov(var ctx:t_jit_context2);
begin
if jit_memory_guard then
begin
with ctx.builder do
begin
//save
movq(r_tmp1,rdi);
op_uplift(ctx,rdi,os64,[not_use_r_tmp1]); //in/out:rdi
add_orig(ctx);
//restore
movq(rdi,r_tmp1);
end;
end else
begin
add_orig(ctx);
end;
end;
//
const
ldmxcsr_desc:t_op_type=(
op:$0FAE;index:2;opt:[not_os8,not_prefix];
);
procedure op_ldmxcsr(var ctx:t_jit_context2);
begin
op_emit1(ctx,ldmxcsr_desc,[his_ro]);
end;
const
stmxcsr_desc:t_op_type=(
op:$0FAE;index:3;opt:[not_os8,not_prefix];
);
procedure op_stmxcsr(var ctx:t_jit_context2);
begin
op_emit1(ctx,stmxcsr_desc,[his_wo]);
end;
//REX.W
//CVTSD2SI
//CVTSI2SD
//CVTSI2SS
//CVTSS2SI
//CVTTSD2SI
//CVTTSS2SI
//
procedure init_cbs_sse;
begin
jit_cbs[OPPnone,OPmov ,OPSx_sd]:=@op_movsd;
jit_cbs[OPPnone,OPmov ,OPSx_ss]:=@op_movss;
jit_cbs[OPPnone,OPmov ,OPSx_d ]:=@op_movd_dq;
jit_cbs[OPPnone,OPmov ,OPSx_q ]:=@op_movd_dq;
jit_cbs[OPPnone,OPmov,OPSx_dqa]:=@op_movdqa;
jit_cbs[OPPnone,OPmov,OPSx_dqu]:=@op_movdqu;
jit_cbs[OPPnone,OPlddqu,OPSnone]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPmovu,OPSx_ps]:=@op_movu_ps_pd;
jit_cbs[OPPnone,OPmovu,OPSx_pd]:=@op_movu_ps_pd;
jit_cbs[OPPnone,OPmova,OPSx_ps]:=@op_mova_ps_pd;
jit_cbs[OPPnone,OPmova,OPSx_pd]:=@op_mova_ps_pd;
jit_cbs[OPPnone,OPmovl,OPSx_ps]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPmovl,OPSx_pd]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPmovh,OPSx_ps]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPmovh,OPSx_pd]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPmovhlps,OPSnone]:=@add_orig;
jit_cbs[OPPnone,OPmovlh,OPSx_ps]:=@add_orig;
jit_cbs[OPPnone,OPmovsldup,OPSnone]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPmovshdup,OPSnone]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPmovnt,OPSx_dqa]:=@op_movntdqa;
jit_cbs[OPPnone,OPmovnt,OPSx_dq ]:=@op_mem_reg_mov_wo;
jit_cbs[OPPnone,OPmovnt,OPSx_i ]:=@op_mem_reg_mov_wo;
jit_cbs[OPPnone,OPmovnt,OPSx_ps ]:=@op_mem_reg_mov_wo;
jit_cbs[OPPnone,OPmovnt,OPSx_pd ]:=@op_mem_reg_mov_wo;
jit_cbs[OPPnone,OPmovnt,OPSx_q ]:=@op_mem_reg_mov_wo;
jit_cbs[OPPnone,OPmovdq2q,OPSnone]:=@add_orig;
jit_cbs[OPPnone,OPmovq2dq,OPSnone]:=@add_orig;
jit_cbs[OPPnone,OPmovddup,OPSnone]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPmovl,OPSx_ps]:=@op_movl_ps_pd;
jit_cbs[OPPnone,OPmovl,OPSx_pd]:=@op_movl_ps_pd;
jit_cbs[OPPnone,OPmovh,OPSx_ps]:=@op_movh_ps_pd;
jit_cbs[OPPnone,OPmovh,OPSx_pd]:=@op_movh_ps_pd;
jit_cbs[OPPnone,OPpmovsx,OPSv_bw]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovsx,OPSv_bd]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovsx,OPSv_bq]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovsx,OPSv_wd]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovsx,OPSv_wq]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovsx,OPSv_dq]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovzx,OPSv_bw]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovzx,OPSv_bd]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovzx,OPSv_bq]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovzx,OPSv_wd]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovzx,OPSv_wq]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPpmovzx,OPSv_dq]:=@op_reg_mem_mov_wo;
jit_cbs[OPPnone,OPmovmsk ,OPSx_ps]:=@op_reg_mem_mov_wo; //reg reg
jit_cbs[OPPnone,OPmovmsk ,OPSx_pd]:=@op_reg_mem_mov_wo; //reg reg
jit_cbs[OPPnone,OPpmovmskb,OPSnone]:=@op_reg_mem_mov_wo; //reg reg
jit_cbs[OPPnone,OPmaskmov,OPSx_q ]:=@op_maskmov;
jit_cbs[OPPnone,OPmaskmov,OPSx_dqu]:=@op_maskmov;
jit_cbs[OPPnone,OPcomi ,OPSx_ss]:=@op_reg_mem_ro;
jit_cbs[OPPnone,OPcomi ,OPSx_sd]:=@op_reg_mem_ro;
jit_cbs[OPPnone,OPucomi,OPSx_ss]:=@op_reg_mem_ro;
jit_cbs[OPPnone,OPucomi,OPSx_sd]:=@op_reg_mem_ro;
jit_cbs[OPPnone,OPxor,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPxor,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpxor,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPor ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPor ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpor,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPand ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPand ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPandn ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPandn ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpand ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpandn,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpeq,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpeq,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpeq,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpeq,OPSx_q ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpgt,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpgt,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpgt,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpgt,OPSx_q ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpestrm,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpestri,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpistrm,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpcmpistri,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPsub ,OPSx_sd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPsub ,OPSx_ss]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPsub ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPsub ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsub ,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsub ,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsub ,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsub ,OPSx_q ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsubs ,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsubs ,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsubus,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsubus,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPadd ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPadd ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPadd ,OPSx_ss]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPadd ,OPSx_sd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadd ,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadd ,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadd ,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadd ,OPSx_q ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadds ,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadds ,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpaddus,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpaddus,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPphadd ,OPSx_w]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPphadd ,OPSx_d]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPaddsub,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPaddsub,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPdiv ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPdiv ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPdiv ,OPSx_ss]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPdiv ,OPSx_sd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmul ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmul ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmul ,OPSx_sd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmul ,OPSx_ss]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmull ,OPSx_d]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmull ,OPSx_w]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmuludq ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmulhuw ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmulhrsw,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmulhw ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmuldq ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPadd ,OPSx_sd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPadd ,OPSx_ss]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadd ,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadd ,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadd ,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadd ,OPSx_q ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadds ,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpadds ,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpaddus,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpaddus,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPsub ,OPSx_sd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPsub ,OPSx_ss]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsub,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsub,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsub,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmaddubsw,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmaddwd ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPcvtsi2 ,OPSx_ss]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtsi2 ,OPSx_sd]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtss2 ,OPSx_sd]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtss2 ,OPSx_si]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtsd2 ,OPSx_ss]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtsd2 ,OPSx_si]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvttps2,OPSx_dq]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvttps2,OPSx_pi]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvttpd2,OPSx_dq]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvttpd2,OPSx_pi]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtdq2 ,OPSx_ps]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtdq2 ,OPSx_pd]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvttss2,OPSx_si]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvttsd2,OPSx_si]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtpd2 ,OPSx_ps]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtpd2 ,OPSx_dq]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtpd2 ,OPSx_pi]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtps2 ,OPSx_pd]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtps2 ,OPSx_dq]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtps2 ,OPSx_pi]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtpi2 ,OPSx_pd]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPcvtpi2 ,OPSx_ps]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPsqrt,OPSx_ps]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPsqrt,OPSx_pd]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPsqrt,OPSx_sd]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPsqrt,OPSx_ss]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPrsqrt,OPSx_ps]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPrsqrt,OPSx_ss]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPrcp ,OPSx_ps]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPrcp ,OPSx_ss]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPpshuf,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpshuf,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpshuf,OPSx_hw]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpshuf,OPSx_lw]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpshuf,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsra,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsra,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsrl,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsrl,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsrl,OPSx_q ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsrl,OPSx_dq]:=@add_orig;
jit_cbs[OPPnone,OPpsll,OPSx_w ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsll,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsll,OPSx_q ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsll,OPSx_dq]:=@add_orig;
jit_cbs[OPPnone,OPpminu,OPSx_b]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpminu,OPSx_w]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpminu,OPSx_d]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmins,OPSx_b]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmins,OPSx_w]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmins,OPSx_d]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmin ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmin ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmin ,OPSx_ss]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmin ,OPSx_sd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmaxu,OPSx_b]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmaxu,OPSx_w]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmaxu,OPSx_d]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmaxs,OPSx_b]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmaxs,OPSx_w]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpmaxs,OPSx_d]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmax ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmax ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmax ,OPSx_ss]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPmax ,OPSx_sd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpinsr,OPSx_b]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpinsr,OPSx_d]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpinsr,OPSx_q]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpinsr,OPSx_w]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpacksswb,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpackssdw,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpackusdw,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpackuswb,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpunpcklbw ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpunpcklwd ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpunpckldq ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpunpcklqdq,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpunpckhbw ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpunpckhwd ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpunpckhdq ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpunpckhqdq,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPunpckl ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPunpckl ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPunpckh ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPunpckh ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpalignr,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsign,OPSx_b]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsign,OPSx_w]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpsign,OPSx_d]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPcmp,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPcmp,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPcmp,OPSx_sd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPcmp,OPSx_ss]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpshuf ,OPSx_b ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpshuf ,OPSx_d ]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpshuf ,OPSx_hw]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPpshuf ,OPSx_lw]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPshuf ,OPSx_ps]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPshuf ,OPSx_pd]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPldmxcsr,OPSnone]:=@op_ldmxcsr;
jit_cbs[OPPnone,OPstmxcsr,OPSnone]:=@op_stmxcsr;
if _SSE4aSupport then
begin
jit_cbs[OPPnone,OPmovnt,OPSx_sd]:=@op_movnt_sd_ss;
jit_cbs[OPPnone,OPmovnt,OPSx_ss]:=@op_movnt_sd_ss;
jit_cbs[OPPnone,OPinsert,OPSx_q]:=@add_orig;
end else
begin
jit_cbs[OPPnone,OPmovnt,OPSx_sd]:=@op_movsd;
jit_cbs[OPPnone,OPmovnt,OPSx_ss]:=@op_movss;
jit_cbs[OPPnone,OPinsert,OPSx_q]:=@op_insertq;
end;
jit_cbs[OPPnone,OPaeskeygenassist,OPSnone]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPaesimc ,OPSnone]:=@op_reg_mem_wo;
jit_cbs[OPPnone,OPaesenc ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPaesenclast,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPaesdec ,OPSnone]:=@op_reg_mem_rw;
jit_cbs[OPPnone,OPaesdeclast,OPSnone]:=@op_reg_mem_rw;
end;
procedure SetupSupport;
var
_ecx:longint;
begin
asm
movl $0x80000001,%eax
cpuid
movl %ecx,_ecx
end ['rax','rbx','rcx','rdx'];
_SSE4aSupport:=(_ecx and $40)<>0;
end;
initialization
SetupSupport;
init_cbs_sse;
end.