From a289460d499cd97bc760622ac0be8ecaa0327a69 Mon Sep 17 00:00:00 2001 From: Pavel <68122101+red-prig@users.noreply.github.com> Date: Sun, 8 Oct 2023 23:00:23 +0300 Subject: [PATCH] + --- rtl/x86_jit.pas | 59 +++--- sys/test/kern_jit2.pas | 78 +++---- sys/test/kern_jit2_ctx.pas | 294 ++++++++++++-------------- sys/test/kern_jit2_ops.pas | 144 ++++++++++--- sys/test/kern_jit2_ops_avx.pas | 375 +++++++++++++++++++++++++++++---- 5 files changed, 634 insertions(+), 316 deletions(-) diff --git a/rtl/x86_jit.pas b/rtl/x86_jit.pas index 08859821..5f880a70 100644 --- a/rtl/x86_jit.pas +++ b/rtl/x86_jit.pas @@ -389,6 +389,8 @@ type procedure lahf; procedure seto(reg:TRegValue); procedure int3; + procedure testq(reg0:TRegValue;reg1:TRegValue); + procedure bti8 (mem:t_jit_leas;imm:Byte); end; operator :=(const A:TRegValue):t_jit_lea; @@ -1978,8 +1980,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2060,8 +2061,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2150,8 +2150,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2233,8 +2232,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2304,8 +2302,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2381,8 +2378,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2440,8 +2436,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2485,8 +2480,7 @@ begin Prefix:=0; case Size of - os16, - os128: + os16: if (not not_prefix) then begin Prefix:=$66; @@ -2552,8 +2546,7 @@ begin op:=desc.op; case reg.ASize of - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2653,8 +2646,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2729,8 +2721,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2797,8 +2788,7 @@ begin begin Dec(op); end; - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2873,8 +2863,7 @@ begin op:=desc.op; case reg.ASize of - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -2930,8 +2919,7 @@ begin op:=desc.op; case mreg.AMemSize of - os16, - os128: + os16: if (not (not_prefix in desc.opt)) then begin Prefix:=$66; @@ -4430,6 +4418,21 @@ begin _O($CC); end; +procedure t_jit_builder.testq(reg0:TRegValue;reg1:TRegValue); +const + desc:t_op_type=(op:$85;index:0); +begin + _RR(desc,reg0,reg1,os0); +end; + +procedure t_jit_builder.bti8(mem:t_jit_leas;imm:Byte); +const + desc:t_op_type=(op:$0FBA;index:4); +begin + _MI8(desc,mem,imm); +end; + + end. diff --git a/sys/test/kern_jit2.pas b/sys/test/kern_jit2.pas index de33b49b..30280a8b 100644 --- a/sys/test/kern_jit2.pas +++ b/sys/test/kern_jit2.pas @@ -427,7 +427,6 @@ end; procedure op_push_rip(var ctx:t_jit_context2); var - i:Integer; stack:TRegValue; imm:Int64; begin @@ -438,10 +437,9 @@ begin begin stack:=r_tmp0; - i:=GetFrameOffset(rsp); - movq(stack,[r_thrd+i]); + op_load_rsp(ctx,stack); leaq(stack,[stack-8]); - movq([r_thrd+i],stack); + op_save_rsp(ctx,stack); call_far(@uplift_jit); //in/out:rax uses:r14 @@ -471,7 +469,6 @@ end; procedure op_pop_rip(var ctx:t_jit_context2); //out:rax var - i:Integer; stack:TRegValue; begin //mov rax,[rsp] @@ -481,18 +478,15 @@ begin begin stack:=r_tmp0; - i:=GetFrameOffset(rsp); - movq(stack,[r_thrd+i]); + op_load_rsp(ctx,stack); call_far(@uplift_jit); //in/out:rax uses:r14 movq(r_tmp1,[stack]); - seto(al); - lahf; - addi8([r_thrd+i,os64],8); - addi(al,127); - sahf; + op_load_rsp(ctx,stack); + leaq(stack,[stack+8]); + op_save_rsp(ctx,stack); movq(r_tmp0,r_tmp1); end; @@ -518,7 +512,6 @@ var ofs:Int64; dst:Pointer; new1,new2:TRegValue; - i:Integer; link:t_jit_i_link; begin op_push_rip(ctx); @@ -570,8 +563,7 @@ begin begin new1:=new_reg_size(r_tmp0,ctx.din.Operand[1]); // - i:=GetFrameOffset(ctx.din.Operand[1].RegValue[0]); - ctx.builder.movq(new1,[r_thrd+i]); + op_load(ctx,new1,1); // if is_rsp(ctx.din.Operand[1].RegValue[0]) then begin @@ -610,7 +602,6 @@ var ofs:Int64; dst:Pointer; new1,new2:TRegValue; - i:Integer; link:t_jit_i_link; begin if (ctx.din.Operand[1].RegValue[0].AType=regNone) then @@ -658,8 +649,7 @@ begin begin new1:=new_reg_size(r_tmp0,ctx.din.Operand[1]); // - i:=GetFrameOffset(ctx.din.Operand[1].RegValue[0]); - ctx.builder.movq(new1,[r_thrd+i]); + op_load(ctx,new1,1); // op_jmp_dispatcher(ctx); end else @@ -723,7 +713,6 @@ const procedure op_push(var ctx:t_jit_context2); var - i:Integer; imm:Int64; stack,new:TRegValue; begin @@ -757,8 +746,7 @@ begin begin new:=new_reg_size(r_tmp1,ctx.din.Operand[1]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(new,[r_thrd+i]); + op_load(ctx,new,1); end else begin new:=new_reg(ctx.din.Operand[1]); @@ -779,10 +767,9 @@ begin else; end; - i:=GetFrameOffset(rsp); - movq(stack,[r_thrd+i]); + op_load_rsp(ctx,stack); leaq(stack,[stack-OPERAND_BYTES[new.ASize]]); - movq([r_thrd+i],stack); + op_save_rsp(ctx,stack); call_far(@uplift_jit); //in/out:rax uses:r14 @@ -792,7 +779,6 @@ end; procedure op_pushfq(var ctx:t_jit_context2); var - i:Integer; mem_size:TOperandSize; stack,new:TRegValue; begin @@ -810,10 +796,9 @@ begin pushfq(mem_size); pop(new); - i:=GetFrameOffset(rsp); - movq(stack,[r_thrd+i]); + op_load_rsp(ctx,stack); leaq(stack,[stack-OPERAND_BYTES[new.ASize]]); - movq([r_thrd+i],stack); + op_save_rsp(ctx,stack); call_far(@uplift_jit); //in/out:rax uses:r14 @@ -823,7 +808,6 @@ end; procedure op_pop(var ctx:t_jit_context2); var - i:Integer; new,stack:TRegValue; begin //mov reg,[rsp] @@ -833,8 +817,7 @@ begin begin stack:=r_tmp0; - i:=GetFrameOffset(rsp); - movq(stack,[r_thrd+i]); + op_load_rsp(ctx,stack); call_far(@uplift_jit); //in/out:rax uses:r14 @@ -856,8 +839,7 @@ begin movq(new,[stack]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],new); + op_save(ctx,1,fix_size(new)); end else begin new:=new_reg(ctx.din.Operand[1]); @@ -865,13 +847,9 @@ begin movq(new,[stack]); end; - i:=GetFrameOffset(rsp); - - seto(al); - lahf; - addi8([r_thrd+i,os64],OPERAND_BYTES[new.ASize]); - addi(al,127); - sahf; + op_load_rsp(ctx,stack); + leaq(stack,[stack+OPERAND_BYTES[new.ASize]]); + op_save_rsp(ctx,stack); end; end; @@ -918,14 +896,14 @@ procedure op_ud2(var ctx:t_jit_context2); begin //exit proc? ctx.builder.call_far(@jit_exit_proc); //TODO exit dispatcher - ctx.trim:=True; + ctx.trim:=True; end; procedure op_iretq(var ctx:t_jit_context2); begin //exit proc? ctx.builder.call_far(@jit_exit_proc); //TODO exit dispatcher - ctx.trim:=True; + ctx.trim:=True; end; procedure op_hlt(var ctx:t_jit_context2); @@ -950,10 +928,6 @@ begin //align? end; -const - test_desc:t_op_type=(op:$85;index:0); - bt_desc_imm:t_op_type=(op:$0FBA;index:4); - procedure _op_rep_cmps(var ctx:t_jit_context2;dflag:Integer); var op:DWORD; @@ -995,7 +969,7 @@ begin //repeat seto(al); lahf; - _RR(test_desc,rcx,rcx,os0); + testq(rcx,rcx); link_jmp0:=jcc(OPSc_z,nil_link,os8); addi(al,127); sahf; @@ -1068,7 +1042,7 @@ begin //get d flag pushfq(os64); - _MI8(bt_desc_imm,[rsp,os64],10); //bt rax, 10 + bti8([rsp,os64],10); //bt rax, 10 link_jmp0:=jcc(OPSc_b,nil_link,os8); @@ -1091,7 +1065,6 @@ end; procedure _op_rep_stos(var ctx:t_jit_context2;dflag:Integer); var - i:Integer; size:TOperandSize; new:TRegValue; @@ -1122,15 +1095,14 @@ begin new:=new_reg_size(r_tmp1,size); - i:=GetFrameOffset(rax); - movq(new,[r_thrd+i]); + op_load_rax(ctx,new); link_start:=ctx.builder.get_curr_label.after; //repeat seto(al); lahf; - _RR(test_desc,rcx,rcx,os0); + testq(rcx,rcx); link_jmp0:=jcc(OPSc_z,nil_link,os8); addi(al,127); sahf; @@ -1175,7 +1147,7 @@ begin //get d flag pushfq(os64); - _MI8(bt_desc_imm,[rsp,os64],10); //bt rax, 10 + bti8([rsp,os64],10); //bt rax, 10 link_jmp0:=jcc(OPSc_b,nil_link,os8); diff --git a/sys/test/kern_jit2_ctx.pas b/sys/test/kern_jit2_ctx.pas index ca8259e7..77268140 100644 --- a/sys/test/kern_jit2_ctx.pas +++ b/sys/test/kern_jit2_ctx.pas @@ -230,9 +230,16 @@ function is_rep_prefix(const i:TInstruction):Boolean; function flags(const i:TInstruction):t_jit_lea; function flags(const ctx:t_jit_context2):t_jit_lea; -procedure add_orig(var ctx:t_jit_context2); procedure op_load_rax(var ctx:t_jit_context2;reg:TRegValue); procedure op_save_rax(var ctx:t_jit_context2;reg:TRegValue); + +procedure op_load_rsp(var ctx:t_jit_context2;reg:TRegValue); +procedure op_save_rsp(var ctx:t_jit_context2;reg:TRegValue); + +procedure op_load(var ctx:t_jit_context2;reg:TRegValue;opr:Byte); +procedure op_save(var ctx:t_jit_context2;opr:Byte;reg:TRegValue); + +procedure add_orig(var ctx:t_jit_context2); procedure op_emit1(var ctx:t_jit_context2;const desc:t_op_type;hint:t_op_hint); procedure op_emit2(var ctx:t_jit_context2;const desc:t_op_desc); procedure op_emit_shift2(var ctx:t_jit_context2;const desc:t_op_shift); @@ -435,35 +442,20 @@ begin case RegValue.AType of regGeneral: begin - case RegValue.ASize of - os8, - os16, - os32, - os64: - begin - case RegValue.AIndex of - 0:Result:=Integer(@p_jit_frame(nil)^.tf_rax); - 4:Result:=Integer(@p_jit_frame(nil)^.tf_rsp); - 5:Result:=Integer(@p_jit_frame(nil)^.tf_rbp); - 14:Result:=Integer(@p_jit_frame(nil)^.tf_r14); - 15:Result:=Integer(@p_jit_frame(nil)^.tf_r15); - else; - end; - end; - else; + case RegValue.AIndex of + 0:Result:=Integer(@p_jit_frame(nil)^.tf_rax); + 4:Result:=Integer(@p_jit_frame(nil)^.tf_rsp); + 5:Result:=Integer(@p_jit_frame(nil)^.tf_rbp); + 14:Result:=Integer(@p_jit_frame(nil)^.tf_r14); + 15:Result:=Integer(@p_jit_frame(nil)^.tf_r15); + else; end; end; regGeneralH: begin - case RegValue.ASize of - os8: - begin - case RegValue.AIndex of - 0:Result:=Integer(@p_jit_frame(nil)^.tf_rax)+1; - else; - end; - end; - else; + case RegValue.AIndex of + 0:Result:=Integer(@p_jit_frame(nil)^.tf_rax)+1; + else; end; end; else; @@ -1456,6 +1448,8 @@ begin end; end; +// + procedure op_load_rax(var ctx:t_jit_context2;reg:TRegValue); var i:Integer; @@ -1478,6 +1472,56 @@ begin end; end; +// + +procedure op_load_rsp(var ctx:t_jit_context2;reg:TRegValue); +var + i:Integer; +begin + with ctx.builder do + begin + i:=GetFrameOffset(rsp); + movq(reg,[r_thrd+i]); + end; +end; + +procedure op_save_rsp(var ctx:t_jit_context2;reg:TRegValue); +var + i:Integer; +begin + with ctx.builder do + begin + i:=GetFrameOffset(rsp); + movq([r_thrd+i],reg); + end; +end; + +// + +procedure op_load(var ctx:t_jit_context2;reg:TRegValue;opr:Byte); +var + i:Integer; +begin + with ctx.builder do + begin + i:=GetFrameOffset(ctx.din.Operand[opr]); + movq(reg,[r_thrd+i]); + end; +end; + +procedure op_save(var ctx:t_jit_context2;opr:Byte;reg:TRegValue); +var + i:Integer; +begin + with ctx.builder do + begin + i:=GetFrameOffset(ctx.din.Operand[opr]); + movq([r_thrd+i],reg); + end; +end; + +// + procedure op_emit1(var ctx:t_jit_context2;const desc:t_op_type;hint:t_op_hint); var i:Integer; @@ -1644,16 +1688,14 @@ begin if (not (his_wo in hint)) or (his_ro in hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(fix_size(new),[r_thrd+i]); + op_load(ctx,new,1); end; _R(desc,new); if not (his_ro in hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new)); + op_save(ctx,1,fix_size(new)); end; end; @@ -1877,8 +1919,7 @@ var begin new2:=new_reg_size(r_tmp1,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); end; r_tmp2:=alloc_tmp(ctx,os64); @@ -1896,8 +1937,7 @@ var begin new2:=new_reg_size(r_tmp1,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); _RM(desc.mem_reg,new2,[flags(ctx)+r_tmp0]); end; @@ -1905,7 +1945,7 @@ var if (not tmp2_used2) then if (his_xchg in desc.hint) then begin - movq([r_thrd+i],fix_size(new2)); + op_save(ctx,2,fix_size(new2)); end; end; @@ -1952,8 +1992,7 @@ var if (not (his_wo in desc.hint)) or (his_ro in desc.hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(fix_size(new1),[r_thrd+i]); + op_load(ctx,new1,1); end; imm:=0; @@ -1973,8 +2012,7 @@ var if not (his_ro in desc.hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; end; @@ -2010,10 +2048,9 @@ var Assert(not (his_rax in desc.hint)); - new1:=new_reg_size(r_tmp1,ctx.din.Operand[2]); + new2:=new_reg_size(r_tmp1,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(fix_size(new1),[r_thrd+i]); + op_load(ctx,new2,2); imm:=0; if GetTargetOfs(ctx.din,ctx.code,3,imm) then @@ -2022,10 +2059,10 @@ var mem_size:=ctx.din.Operand[2].Size; Assert(mem_size<>os0); - op_rmi(ctx,desc,new1,[flags(ctx)+r_tmp0,mem_size],imm,imm_size); + op_rmi(ctx,desc,new2,[flags(ctx)+r_tmp0,mem_size],imm,imm_size); end else begin - _RM(desc.mem_reg,new1,[flags(ctx)+r_tmp0]); + _RM(desc.mem_reg,new2,[flags(ctx)+r_tmp0]); end; end; @@ -2155,8 +2192,7 @@ begin op_rri(ctx,desc,new2,new1,imm,mem_size,imm_size); //swapped - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end else begin @@ -2201,8 +2237,7 @@ begin if (not (his_wo in desc.hint)) or (his_ro in desc.hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(fix_size(new1),[r_thrd+i]); + op_load(ctx,new1,1); end; op_rr(ctx,desc,new1,new2,mem_size); @@ -2210,8 +2245,7 @@ begin if not tmp2_used1 then if not (his_ro in desc.hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; if (his_rax in desc.hint) then @@ -2230,8 +2264,6 @@ begin Assert(not (his_rax in desc.hint)); - i:=GetFrameOffset(ctx.din.Operand[2]); - imm:=0; if GetTargetOfs(ctx.din,ctx.code,3,imm) then begin @@ -2239,6 +2271,7 @@ begin mem_size:=ctx.din.Operand[1].Size; Assert(mem_size<>os0); + i:=GetFrameOffset(ctx.din.Operand[2]); op_rmi(ctx,desc,new1,[r_thrd+i,mem_size],imm,imm_size); end else @@ -2255,8 +2288,7 @@ begin new2:=new_reg_size(r_tmp0,ctx.din.Operand[2]); end; - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(fix_size(new2),[r_thrd+i]); + op_load(ctx,new2,2); mem_size:=ctx.din.Operand[1].RegValue[0].ASize; Assert(mem_size<>os0); @@ -2264,6 +2296,7 @@ begin _RR(desc.mem_reg,new1,new2,mem_size); end else begin + i:=GetFrameOffset(ctx.din.Operand[2]); _RM(desc.reg_mem,new1,[r_thrd+i]); end; @@ -2285,13 +2318,11 @@ begin new1:=new_reg_size(r_tmp0,ctx.din.Operand[1]); new2:=new_reg_size(r_tmp1,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(fix_size(new2),[r_thrd+i]); + op_load(ctx,new2,2); op_rri(ctx,desc,new2,new1,imm,mem_size,imm_size); //swapped - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end else begin @@ -2319,8 +2350,7 @@ begin if not tmp2_used2 then begin - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(fix_size(new2),[r_thrd+i]); + op_load(ctx,new2,2); end; i:=GetFrameOffset(ctx.din.Operand[1]); @@ -2351,8 +2381,7 @@ begin if (not (his_wo in desc.hint)) or (his_ro in desc.hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(fix_size(new1),[r_thrd+i]); + op_load(ctx,new1,1); new1_load:=True; end; @@ -2364,8 +2393,7 @@ begin if not tmp2_used2 then if not new1_load then begin - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(fix_size(new2),[r_thrd+i]); + op_load(ctx,new2,2); end; end else begin @@ -2379,8 +2407,7 @@ begin if not tmp2_used2 then begin - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(fix_size(new2),[r_thrd+i]); + op_load(ctx,new2,2); end; end; @@ -2389,8 +2416,7 @@ begin if not tmp2_used1 then if not (his_ro in desc.hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; end; @@ -2412,8 +2438,6 @@ begin mem_size:=ctx.din.Operand[1].Size; Assert(mem_size<>os0); - i:=GetFrameOffset(ctx.din.Operand[1]); - Assert(not (his_rax in desc.hint)); imm:=0; @@ -2430,13 +2454,14 @@ begin movi64(new1,imm); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end else begin if (his_ro in desc.hint) or (mem_size<>os32) then begin + i:=GetFrameOffset(ctx.din.Operand[1]); op_mi(ctx,desc,[r_thrd+i,mem_size],imm,imm_size); end else begin @@ -2445,16 +2470,14 @@ begin if (not (his_wo in desc.hint)) or (his_ro in desc.hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(fix_size(new1),[r_thrd+i]); + op_load(ctx,new1,1); end; op_ri(ctx,desc,new1,imm,mem_size,imm_size); if not (his_ro in desc.hint) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; end; @@ -2567,8 +2590,6 @@ begin mem_size:=ctx.din.Operand[1].Size; Assert(mem_size<>os0); - i:=GetFrameOffset(ctx.din.Operand[1]); - imm:=0; GetTargetOfs(ctx.din,ctx.code,2,imm); @@ -2577,59 +2598,57 @@ begin if (mem_size<>os32) then begin + i:=GetFrameOffset(ctx.din.Operand[1]); _MI8(desc.reg_im8,[r_thrd+i,mem_size],imm); end else begin new:=new_reg_size(r_tmp0,ctx.din.Operand[1]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(fix_size(new),[r_thrd+i]); + op_load(ctx,new,1); _RI8(desc.reg_im8,new,imm); - movq([r_thrd+i],fix_size(new)); + op_save(ctx,1,fix_size(new)); end; end; mo_ctx_cl: begin mem_size:=ctx.din.Operand[1].Size; - i:=GetFrameOffset(ctx.din.Operand[1]); if (mem_size<>os32) then begin + i:=GetFrameOffset(ctx.din.Operand[1]); _M(desc.mem__cl,[r_thrd+i,mem_size]); end else begin new:=new_reg_size(r_tmp0,ctx.din.Operand[1]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(fix_size(new),[r_thrd+i]); + op_load(ctx,new,1); _R(desc.mem__cl,new); - movq([r_thrd+i],fix_size(new)); + op_save(ctx,1,fix_size(new)); end; end; mo_ctx_one: begin mem_size:=ctx.din.Operand[1].Size; - i:=GetFrameOffset(ctx.din.Operand[1]); if (mem_size<>os32) then begin + i:=GetFrameOffset(ctx.din.Operand[1]); _M(desc.mem_one,[r_thrd+i,mem_size]); end else begin new:=new_reg_size(r_tmp0,ctx.din.Operand[1]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(fix_size(new),[r_thrd+i]); + op_load(ctx,new,1); _R(desc.mem_one,new); - movq([r_thrd+i],fix_size(new)); + op_save(ctx,1,fix_size(new)); end; end; @@ -2644,7 +2663,6 @@ end; procedure op_emit_shift3(var ctx:t_jit_context2;const desc:t_op_shift); var - i:Integer; memop:t_memop_shift; mem_size:TOperandSize; link_next:t_jit_i_link; @@ -2675,8 +2693,7 @@ var //mem_ctx_imm new2:=new_reg_size(r_tmp1,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); end else begin //mem_reg_imm @@ -2694,8 +2711,7 @@ var //mem_ctx_cl new2:=new_reg_size(r_tmp1,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); end else begin //mem_reg_cl @@ -2769,8 +2785,7 @@ begin new1:=new_reg_size(r_tmp0,ctx.din.Operand[1]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(new1,[r_thrd+i]); + op_load(ctx,new1,1); if cmp_reg(ctx.din.Operand[1],ctx.din.Operand[2]) then begin @@ -2779,8 +2794,7 @@ begin begin new2:=new_reg_size(r_tmp1,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); end; end else @@ -2789,14 +2803,12 @@ begin new1:=new_reg_size(r_tmp0,ctx.din.Operand[1]); new2:=new_reg(ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(new1,[r_thrd+i]); + op_load(ctx,new1,1); end; _RRI8(desc.reg_im8,new1,new2,imm,mem_size); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; mo_ctx_cl: begin @@ -2809,8 +2821,7 @@ begin new1:=new_reg_size(r_tmp0,ctx.din.Operand[1]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(new1,[r_thrd+i]); + op_load(ctx,new1,1); if cmp_reg(ctx.din.Operand[1],ctx.din.Operand[2]) then begin @@ -2819,8 +2830,7 @@ begin begin new2:=new_reg_size(r_tmp1,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); end; end else @@ -2829,14 +2839,12 @@ begin new1:=new_reg_size(r_tmp0,ctx.din.Operand[1]); new2:=new_reg(ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq(new1,[r_thrd+i]); + op_load(ctx,new1,1); end; _RR(desc.mem__cl,new1,new2,mem_size); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; mo_reg_imm8: @@ -2855,8 +2863,7 @@ begin new1:=new_reg(ctx.din.Operand[1]); new2:=new_reg_size(r_tmp0,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); _RRI8(desc.reg_im8,new1,new2,imm,mem_size); end; @@ -2871,8 +2878,7 @@ begin new1:=new_reg(ctx.din.Operand[1]); new2:=new_reg_size(r_tmp0,ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); _RR(desc.mem__cl,new1,new2,mem_size); end @@ -2887,7 +2893,6 @@ end; procedure op_emit_avx2_rr(var ctx:t_jit_context2;const desc:t_op_type); var - i:Integer; new1,new2:TRegValue; begin if is_preserved(ctx.din.Operand[1]) then @@ -2899,8 +2904,7 @@ begin _VV(desc,new1,new2,new2.ASize); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; end else begin @@ -3011,10 +3015,9 @@ begin begin new2:=new_reg_size(r_tmp0,ctx.din.Operand[1]); - _VV(desc.reg_mem,new2,new1,mem_size); + _VV(desc.reg_mem,new2,new1,mem_size); //swapped - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new2)); + op_save(ctx,1,fix_size(new2)); end else begin i:=GetFrameOffset(ctx.din.Operand[1]); @@ -3137,14 +3140,14 @@ begin new1:=new_reg(ctx.din.Operand[1]); new2:=new_reg(ctx.din.Operand[2]); - i:=GetFrameOffset(ctx.din.Operand[3]); - imm:=0; if GetTargetOfs(ctx.din,ctx.code,4,imm) then begin + i:=GetFrameOffset(ctx.din.Operand[3]); _VVMI8(desc,new1,new2,[r_thrd+i,mem_size],imm); end else begin + i:=GetFrameOffset(ctx.din.Operand[3]); _VVM(desc,new1,new2,[r_thrd+i,mem_size]); end; @@ -3194,7 +3197,6 @@ end; //rri,mri procedure op_emit_avx3_imm8(var ctx:t_jit_context2;const desc:t_op_avx3_imm); var - i:Integer; memop:t_memop_type2; mem_size:TOperandSize; link_next:t_jit_i_link; @@ -3299,8 +3301,6 @@ begin mem_size:=ctx.din.Operand[1].Size; Assert(mem_size<>os0); - i:=GetFrameOffset(ctx.din.Operand[1]); - new2:=new_reg(ctx.din.Operand[2]); imm:=0; @@ -3312,7 +3312,7 @@ begin _VVI8(desc.mri,new2,new1,imm,mem_size); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; mo_reg_ctx: @@ -3320,15 +3320,12 @@ begin mem_size:=ctx.din.Operand[2].Size; Assert(mem_size<>os0); - i:=GetFrameOffset(ctx.din.Operand[2]); - new1:=new_reg(ctx.din.Operand[1]); - new2:=new_reg_size(r_tmp0,ctx.din.Operand[2]); //rmi - movq(fix_size(new2),[r_thrd+i]); + op_load(ctx,new2,2); _VVI8(desc.rmi,new1,new2,imm,mem_size); end @@ -3342,7 +3339,6 @@ end; procedure op_emit_avx_F3(var ctx:t_jit_context2;const desc:t_op_type); var - i:Integer; memop:t_memop_type2; mem_size:TOperandSize; @@ -3367,10 +3363,10 @@ var //load? new1:=new_reg_size(r_tmp1,ctx.din.Operand[1]); + _VM_F3(desc,new1,[flags(ctx)+r_tmp0,mem_size]); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; else Assert(False); @@ -3426,8 +3422,7 @@ begin _VV_F3(desc,new1,new2,mem_size); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; mo_reg_ctx: begin @@ -3437,8 +3432,7 @@ begin mem_size:=ctx.din.Operand[2].Size; Assert(mem_size<>os0); - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(fix_size(new2),[r_thrd+i]); + op_load(ctx,new2,2); _VV_F3(desc,new1,new2,mem_size); end; @@ -3450,16 +3444,14 @@ begin //load? - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(fix_size(new2),[r_thrd+i]); + op_load(ctx,new2,2); mem_size:=ctx.din.Operand[1].RegValue[0].ASize; Assert(mem_size<>os0); _VV_F3(desc,new1,new2,mem_size); - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; else @@ -3519,7 +3511,6 @@ end; procedure op_emit_bmi_rmr(var ctx:t_jit_context2;const desc:t_op_type); var - i:Integer; mem_size:TOperandSize; new1,new2,new3:TRegValue; @@ -3555,8 +3546,7 @@ begin begin new2:=new_reg_size(r_tmp0,ctx.din.Operand[2]); // - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); end else begin new2:=new_reg(ctx.din.Operand[2]); @@ -3566,8 +3556,7 @@ begin begin new3:=new_reg_size(r_tmp1,ctx.din.Operand[3]); // - i:=GetFrameOffset(ctx.din.Operand[3]); - movq(new3,[r_thrd+i]); + op_load(ctx,new3,3); end else begin new3:=new_reg(ctx.din.Operand[3]); @@ -3577,18 +3566,16 @@ begin if is_preserved(ctx.din.Operand[1]) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; end; end; - // +// procedure op_emit_bmi_rrm(var ctx:t_jit_context2;const desc:t_op_type); var - i:Integer; mem_size:TOperandSize; new1,new2,new3:TRegValue; @@ -3609,8 +3596,7 @@ begin begin new2:=new_reg_size(r_tmp0,ctx.din.Operand[2]); // - i:=GetFrameOffset(ctx.din.Operand[2]); - movq(new2,[r_thrd+i]); + op_load(ctx,new2,2); end else begin new2:=new_reg(ctx.din.Operand[2]); @@ -3635,8 +3621,7 @@ begin begin new3:=new_reg_size(r_tmp1,ctx.din.Operand[3]); // - i:=GetFrameOffset(ctx.din.Operand[3]); - movq(new3,[r_thrd+i]); + op_load(ctx,new3,3); end else begin new3:=new_reg(ctx.din.Operand[3]); @@ -3646,8 +3631,7 @@ begin if is_preserved(ctx.din.Operand[1]) then begin - i:=GetFrameOffset(ctx.din.Operand[1]); - movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new1)); end; end; diff --git a/sys/test/kern_jit2_ops.pas b/sys/test/kern_jit2_ops.pas index 846b4b7d..d5e161b9 100644 --- a/sys/test/kern_jit2_ops.pas +++ b/sys/test/kern_jit2_ops.pas @@ -489,7 +489,7 @@ end; const movx_desc:t_op_desc=( mem_reg:(opt:[not_impl]); - reg_mem:(op:$00;index:0); + reg_mem:(op:$00;opt:[not_prefix]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); hint:[his_mov,his_wo]; @@ -583,8 +583,8 @@ end; const movbe_desc:t_op_desc=( - mem_reg:(op:$0F38F1;index:0); - reg_mem:(op:$0F38F0;index:0); + mem_reg:(op:$0F38F1;opt:[not_prefix]); + reg_mem:(op:$0F38F0;opt:[not_prefix]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); hint:[his_mov,his_wo]; @@ -602,18 +602,25 @@ begin end; const - movd_desc:t_op_desc=( //movq - mem_reg:(op:$0F7E;index:0); - reg_mem:(op:$0F6E;index:0); + mov_dq_xr_desc:t_op_desc=( + mem_reg:(op:$660F7E;opt:[not_prefix]); + reg_mem:(op:$660F6E;opt:[not_prefix]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); hint:[his_mov,his_wo]; ); -const - movd_xmm_desc:t_op_desc=( //movq_xmm - mem_reg:(op:$660F7E;index:0); - reg_mem:(op:$660F6E;index:0); + mov_dq_mm_desc:t_op_desc=( + mem_reg:(op:$0F7E;opt:[not_prefix]); + reg_mem:(op:$0F6E;opt:[not_prefix]); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_mov,his_wo]; + ); + + movq_xx_desc:t_op_desc=( + mem_reg:(opt:[not_impl]); + reg_mem:(op:$F30F7E;opt:[not_prefix]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); hint:[his_mov,his_wo]; @@ -623,12 +630,33 @@ procedure op_movd(var ctx:t_jit_context2); begin if is_preserved(ctx.din) or is_memory(ctx.din) then begin - if is_xmm(ctx.din) then + if (ctx.dis.SimdOpcode=so66) then begin - op_emit2(ctx,movd_xmm_desc); + op_emit2(ctx,mov_dq_xr_desc); end else begin - op_emit2(ctx,movd_desc); + op_emit2(ctx,mov_dq_mm_desc); + end; + end else + begin + add_orig(ctx); + end; +end; + +procedure op_movq(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + if (ctx.dis.SimdOpcode=soF3) then + begin + op_emit2(ctx,movq_xx_desc); + end else + if (ctx.dis.SimdOpcode=so66) then + begin + op_emit2(ctx,mov_dq_xr_desc); + end else + begin + op_emit2(ctx,mov_dq_mm_desc); end; end else begin @@ -639,7 +667,7 @@ end; const movsxd_desc:t_op_desc=( mem_reg:(opt:[not_impl]); - reg_mem:(op:$63;index:0); + reg_mem:(op:$63;opt:[not_prefix]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); hint:[his_mov,his_wo]; @@ -656,6 +684,50 @@ begin end; end; +// + +const + movdqa_desc:t_op_desc=( + mem_reg:(op:$660F7F;opt:[not_prefix]); + reg_mem:(op:$660F6F;opt:[not_prefix]); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_mov,his_wo,his_align]; + ); + +procedure op_movdqa(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit2(ctx,movdqa_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + movdqu_desc:t_op_desc=( + mem_reg:(op:$F30F7F;opt:[not_prefix]); + reg_mem:(op:$F30F6F;opt:[not_prefix]); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_mov,his_wo]; + ); + +procedure op_movdqu(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit2(ctx,movdqu_desc); + end else + begin + add_orig(ctx); + end; +end; + +// + const SETcc_8:array[OPSc_o..OPSc_nle] of Byte=( $90,$91,$92,$93,$94,$95,$96,$97, @@ -1033,20 +1105,18 @@ const procedure op_bswap(var ctx:t_jit_context2); var new:TRegValue; - i:Integer; begin if is_preserved(ctx.din) then begin with ctx.builder do begin - i:=GetFrameOffset(ctx.din.Operand[1]); new:=new_reg_size(r_tmp0,ctx.din.Operand[1]); - movq(new,[r_thrd+i]); + op_load(ctx,new,1); _O(bswap_desc,new); - movq([r_thrd+i],fix_size(new)); + op_save(ctx,1,fix_size(new)); end; end else begin @@ -1056,31 +1126,29 @@ end; procedure op_lea(var ctx:t_jit_context2); var - new1:TRegValue; - i:Integer; + new:TRegValue; begin if is_preserved(ctx.din) then begin if is_preserved(ctx.din.Operand[1]) then begin - new1:=new_reg_size(r_tmp0,ctx.din.Operand[1]); - build_lea(ctx,2,new1,[not_use_segment]); + new:=new_reg_size(r_tmp0,ctx.din.Operand[1]); + build_lea(ctx,2,new,[not_use_segment]); // - i:=GetFrameOffset(ctx.din.Operand[1].RegValue[0]); - ctx.builder.movq([r_thrd+i],fix_size(new1)); + op_save(ctx,1,fix_size(new)); end else begin - new1:=new_reg(ctx.din.Operand[1]); + new:=new_reg(ctx.din.Operand[1]); // - if (new1.ASize=os16) then + if (new.ASize=os16) then begin //low part build_lea(ctx,2,r_tmp0); // - ctx.builder.movq(new1,r_tmp0); + ctx.builder.movq(new,r_tmp0); end else begin - build_lea(ctx,2,new1); + build_lea(ctx,2,new); end; end; end else @@ -1775,7 +1843,7 @@ procedure op_pxor(var ctx:t_jit_context2); begin if is_memory(ctx.din) then begin - if is_xmm(ctx.din) then + if (ctx.dis.SimdOpcode=so66) then begin op_emit2(ctx,pxor_xmm_desc); end else @@ -1915,7 +1983,7 @@ end; const cvtsi2ss_desc:t_op_desc=( mem_reg:(opt:[not_impl]); - reg_mem:(op:$F30F2A); + reg_mem:(op:$F30F2A;opt:[not_prefix]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); hint:[his_wo]; @@ -1935,7 +2003,7 @@ end; const cvtsd2si_desc:t_op_desc=( mem_reg:(opt:[not_impl]); - reg_mem:(op:$F20F2D); + reg_mem:(op:$F20F2D;opt:[not_prefix]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); hint:[his_wo]; @@ -1955,13 +2023,13 @@ end; const cvtss2si_desc:t_op_desc=( mem_reg:(opt:[not_impl]); - reg_mem:(op:$F30F2D); + reg_mem:(op:$F30F2D;opt:[not_prefix]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); hint:[his_wo]; ); -procedure op_cvtss2s(var ctx:t_jit_context2); +procedure op_cvtss2si(var ctx:t_jit_context2); begin if is_preserved(ctx.din) or is_memory(ctx.din) then begin @@ -2065,10 +2133,16 @@ begin jit_cbs[OPPnone,OPmov ,OPSc_be]:=@op_movbe; jit_cbs[OPPnone,OPmov ,OPSx_d ]:=@op_movd; - jit_cbs[OPPnone,OPmov ,OPSx_q ]:=@op_movd; + jit_cbs[OPPnone,OPmov ,OPSx_q ]:=@op_movq; jit_cbs[OPPnone,OPmovsx,OPSx_d ]:=@op_movsxd; + jit_cbs[OPPnone,OPmovdq2q,OPSnone]:=@add_orig; + jit_cbs[OPPnone,OPmovq2dq,OPSnone]:=@add_orig; + + jit_cbs[OPPnone,OPmov,OPSx_dqa]:=@op_movdqa; + jit_cbs[OPPnone,OPmov,OPSx_dqu]:=@op_movdqu; + jit_cbs[OPPnone,OPtest,OPSnone]:=@op_test; jit_cbs[OPPnone,OPcmp ,OPSnone]:=@op_cmp; @@ -2240,7 +2314,7 @@ begin jit_cbs[OPPnone,OPcvtsi2,OPSx_ss]:=@op_cvtsi2ss; jit_cbs[OPPnone,OPcvtsd2,OPSx_si]:=@op_cvtsd2si; - jit_cbs[OPPnone,OPcvtss2,OPSx_si]:=@op_cvtss2s; + jit_cbs[OPPnone,OPcvtss2,OPSx_si]:=@op_cvtss2si; jit_cbs[OPPnone,OPsqrt,OPSx_sd]:=@op_sqrtsd; jit_cbs[OPPnone,OPsqrt,OPSx_ss]:=@op_sqrtss; diff --git a/sys/test/kern_jit2_ops_avx.pas b/sys/test/kern_jit2_ops_avx.pas index 3316da1b..a7bb7b7b 100644 --- a/sys/test/kern_jit2_ops_avx.pas +++ b/sys/test/kern_jit2_ops_avx.pas @@ -194,7 +194,7 @@ begin end; const - vmovq_desc:t_op_desc=( //vmovd_desc + vmov_dq_desc:t_op_desc=( mem_reg:(op:$7E;index:1;mm:1); reg_mem:(op:$6E;index:1;mm:1); reg_imm:(opt:[not_impl]); @@ -202,13 +202,38 @@ const hint:[his_mov,his_wo]; ); -procedure op_vmovq(var ctx:t_jit_context2); //op_vmovd +procedure op_vmovd(var ctx:t_jit_context2); begin - //TODO fix this - if is_preserved(ctx.din) or is_memory(ctx.din) then begin - op_emit_avx2(ctx,vmovq_desc); + op_emit_avx2(ctx,vmov_dq_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vmovq_desc:t_op_desc=( + mem_reg:(op:$D6;index:1;mm:1); + reg_mem:(op:$7E;index:2;mm:1); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_mov,his_wo]; + ); + +procedure op_vmovq(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + if (ctx.dis.SimdOpcode=so66) and + (ctx.dis.opcode in [$6E,$7E]) then + begin + op_emit_avx2(ctx,vmov_dq_desc); + end else + begin + op_emit_avx2(ctx,vmovq_desc); + end; end else begin add_orig(ctx); @@ -666,6 +691,46 @@ begin end; end; +const + vpmovmskb_desc:t_op_type=(op:$D7;index:1;mm:1); + +procedure op_vpmovmskb(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) then + begin + op_emit_avx2_rr(ctx,vpmovmskb_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vmaskmovps_rrm_desc:t_op_type=( + op:$2C;index:1;mm:2; + ); + + vmaskmovps_mrr_desc:t_op_type=( + op:$2E;index:1;mm:2; + ); + +procedure op_vmaskmovps(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + if is_memory(ctx.din.Operand[3]) then + begin + op_emit_avx3(ctx,vmaskmovps_rrm_desc); + end else + begin + op_emit_avx3(ctx,vmaskmovps_mrr_desc); + end; + end else + begin + add_orig(ctx); + end; +end; + // const @@ -674,7 +739,7 @@ const reg_mem:(op:$2E;index:0;mm:1); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vucomiss(var ctx:t_jit_context2); @@ -694,7 +759,7 @@ const reg_mem:(op:$2E;index:1;mm:1); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vucomisd(var ctx:t_jit_context2); @@ -1791,6 +1856,40 @@ end; // +const + vshufps_desc:t_op_type=( + op:$C6;index:0;mm:1; + ); + +procedure op_vshufps(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx3(ctx,vshufps_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vshufpd_desc:t_op_type=( + op:$C6;index:1;mm:1; + ); + +procedure op_vshufpd(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx3(ctx,vshufpd_desc); + end else + begin + add_orig(ctx); + end; +end; + +// + const vpermilps_rrm_desc:t_op_type=( op:$0C;index:1;mm:2; @@ -1933,34 +2032,6 @@ begin end; end; - -const - vmaskmovps_rrm_desc:t_op_type=( - op:$2C;index:1;mm:2; - ); - - vmaskmovps_mrr_desc:t_op_type=( - op:$2E;index:1;mm:2; - ); - - -procedure op_vmaskmovps(var ctx:t_jit_context2); -begin - if is_memory(ctx.din) then - begin - if is_memory(ctx.din.Operand[3]) then - begin - op_emit_avx3(ctx,vmaskmovps_rrm_desc); - end else - begin - op_emit_avx3(ctx,vmaskmovps_mrr_desc); - end; - end else - begin - add_orig(ctx); - end; -end; - const vpxor_desc:t_op_type=( op:$EF;index:1;mm:1; @@ -2169,6 +2240,22 @@ begin end; end; +const + vpblendvb_desc:t_op_type=( + op:$4C;index:1;mm:3 + ); + +procedure op_vpblendvb(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx4(ctx,vpblendvb_desc); + end else + begin + add_orig(ctx); + end; +end; + const vblendvps_desc:t_op_type=( op:$4A;index:1;mm:3 @@ -2271,7 +2358,7 @@ const reg_mem:(op:$5B;index:2;mm:1); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vcvttps2dq(var ctx:t_jit_context2); @@ -2291,7 +2378,7 @@ const reg_mem:(op:$E6;index:1;mm:1); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vcvttpd2dq(var ctx:t_jit_context2); @@ -2311,7 +2398,7 @@ const reg_mem:(op:$5B;index:0;mm:1); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vcvtdq2ps(var ctx:t_jit_context2); @@ -2331,7 +2418,7 @@ const reg_mem:(op:$E6;index:2;mm:1); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vcvtdq2pd(var ctx:t_jit_context2); @@ -2351,7 +2438,7 @@ const reg_mem:(op:$2C;index:2;mm:1); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vcvttss2si(var ctx:t_jit_context2); @@ -2371,7 +2458,7 @@ const reg_mem:(op:$2C;index:3;mm:1); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vcvttsd2si(var ctx:t_jit_context2); @@ -2385,6 +2472,86 @@ begin end; end; +const + vcvtpd2ps_desc:t_op_desc=( + mem_reg:(opt:[not_impl]); + reg_mem:(op:$5A;index:1;mm:1); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_wo]; + ); + +procedure op_vcvtpd2ps(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + op_emit_avx2(ctx,vcvtpd2ps_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vcvtpd2dq_desc:t_op_desc=( + mem_reg:(opt:[not_impl]); + reg_mem:(op:$E6;index:3;mm:1); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_wo]; + ); + +procedure op_vcvtpd2dq(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + op_emit_avx2(ctx,vcvtpd2dq_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vcvtps2pd_desc:t_op_desc=( + mem_reg:(opt:[not_impl]); + reg_mem:(op:$5A;index:0;mm:1); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_wo]; + ); + +procedure op_vcvtps2pd(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx2(ctx,vcvtps2pd_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vcvtps2dq_desc:t_op_desc=( + mem_reg:(opt:[not_impl]); + reg_mem:(op:$5B;index:1;mm:1); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_wo]; + ); + +procedure op_vcvtps2dq(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx2(ctx,vcvtps2dq_desc); + end else + begin + add_orig(ctx); + end; +end; + // const @@ -2535,6 +2702,8 @@ begin end; end; +// + const vsqrtss_desc:t_op_type=( op:$51;index:2;mm:1 @@ -2567,13 +2736,54 @@ begin end; end; +// + +const + vrsqrtps_desc:t_op_desc=( + mem_reg:(opt:[not_impl]); + reg_mem:(op:$52;index:0;mm:1); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_wo]; + ); + +procedure op_vrsqrtps(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx2(ctx,vrsqrtps_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vrsqrtss_desc:t_op_type=( + op:$52;index:2;mm:1 + ); + +procedure op_vrsqrtss(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx3(ctx,vrsqrtss_desc); + end else + begin + add_orig(ctx); + end; +end; + + +// + const vbroadcastss_desc:t_op_desc=( mem_reg:(opt:[not_impl]); reg_mem:(op:$18;index:1;mm:2;opt:[not_vex_len]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vbroadcastss(var ctx:t_jit_context2); @@ -2613,7 +2823,7 @@ const reg_mem:(op:$1A;index:1;mm:2;opt:[not_vex_len]); reg_imm:(opt:[not_impl]); reg_im8:(opt:[not_impl]); - hint:[]; + hint:[his_wo]; ); procedure op_vbroadcastf128(var ctx:t_jit_context2); @@ -2743,6 +2953,58 @@ begin end; end; +// + +const + vpabsb_desc:t_op_type=( + op:$1C;index:1;mm:2 + ); + +procedure op_vpabsb(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx3(ctx,vpabsb_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vpabsw_desc:t_op_type=( + op:$1D;index:1;mm:2 + ); + +procedure op_vpabsw(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx3(ctx,vpabsw_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vpabsd_desc:t_op_type=( + op:$1E;index:1;mm:2 + ); + +procedure op_vpabsd(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx3(ctx,vpabsd_desc); + end else + begin + add_orig(ctx); + end; +end; + +// + const vpsrad_desc:t_op_type=( op:$E2;index:1;mm:1 @@ -2903,7 +3165,7 @@ begin jit_cbs[OPPv,OPmovddup,OPSnone]:=@op_vmovddup; - jit_cbs[OPPv,OPmov ,OPSx_d ]:=@op_vmovq; + jit_cbs[OPPv,OPmov ,OPSx_d ]:=@op_vmovd; jit_cbs[OPPv,OPmov ,OPSx_q ]:=@op_vmovq; jit_cbs[OPPv,OPmov ,OPSx_ss ]:=@op_vmovss; @@ -2914,6 +3176,8 @@ begin jit_cbs[OPPv,OPmovl,OPSx_pd]:=@op_vmovlpd; jit_cbs[OPPv,OPmovh,OPSx_pd]:=@op_vmovhpd; + jit_cbs[OPPv,OPmovlh,OPSx_ps]:=@add_orig; + jit_cbs[OPPv,OPmovsldup,OPSnone]:=@op_vmovsldup; jit_cbs[OPPv,OPmovshdup,OPSnone]:=@op_vmovshdup; @@ -2934,6 +3198,10 @@ begin jit_cbs[OPPv,OPmovmsk,OPSx_ps]:=@op_vmovmskps; jit_cbs[OPPv,OPmovmsk,OPSx_pd]:=@op_vmovmskpd; + jit_cbs[OPPv,OPpmovmskb,OPSnone]:=@op_vpmovmskb; + + jit_cbs[OPPv,OPmaskmov,OPSx_ps]:=@op_vmaskmovps; + jit_cbs[OPPv,OPucomi,OPSx_ss]:=@op_vucomiss; jit_cbs[OPPv,OPucomi,OPSx_sd]:=@op_vucomisd; @@ -3024,6 +3292,9 @@ begin jit_cbs[OPPv,OPpshuf ,OPSx_hw]:=@op_vpshufhw; jit_cbs[OPPv,OPpshuf ,OPSx_lw]:=@op_vpshuflw; + jit_cbs[OPPv,OPshuf ,OPSx_ps]:=@op_vshufps; + jit_cbs[OPPv,OPshuf ,OPSx_pd]:=@op_vshufpd; + jit_cbs[OPPnone,OPvpermil,OPSx_ps]:=@op_vpermilps; jit_cbs[OPPnone,OPvpermil,OPSx_pd]:=@op_vpermilpd; @@ -3035,7 +3306,6 @@ begin jit_cbs[OPPv,OPpsrl,OPSx_dq]:=@add_orig; jit_cbs[OPPv,OPpminu ,OPSx_d ]:=@op_vpminud; - jit_cbs[OPPv,OPmaskmov,OPSx_ps]:=@op_vmaskmovps; jit_cbs[OPPv,OPpxor ,OPSnone]:=@op_vpxor; jit_cbs[OPPv,OPor ,OPSx_ps]:=@op_vorps; jit_cbs[OPPv,OPor ,OPSx_pd]:=@op_vorpd; @@ -3051,6 +3321,8 @@ begin jit_cbs[OPPv,OPblend ,OPSx_pd]:=@op_vblendpd; jit_cbs[OPPv,OPpblend ,OPSx_w ]:=@op_vpblendw; + jit_cbs[OPPv,OPpblendvb,OPSnone]:=@op_vpblendvb; + jit_cbs[OPPv,OPblendv ,OPSx_ps]:=@op_vblendvps; jit_cbs[OPPv,OPblendv ,OPSx_pd]:=@op_vblendvpd; @@ -3066,6 +3338,12 @@ begin jit_cbs[OPPv,OPcvttss2,OPSx_si]:=@op_vcvttss2si; jit_cbs[OPPv,OPcvttsd2,OPSx_si]:=@op_vcvttsd2si; + jit_cbs[OPPv,OPcvtpd2 ,OPSx_ps]:=@op_vcvtpd2ps; + jit_cbs[OPPv,OPcvtpd2 ,OPSx_dq]:=@op_vcvtpd2dq; + + jit_cbs[OPPv,OPcvtps2,OPSx_pd]:=@op_vcvtps2pd; + jit_cbs[OPPv,OPcvtps2,OPSx_dq]:=@op_vcvtps2dq; + jit_cbs[OPPnone,OPbextr,OPSnone]:=@op_bextr; jit_cbs[OPPnone,OPandn ,OPSnone]:=@op_andn; @@ -3085,6 +3363,9 @@ begin jit_cbs[OPPv,OPsqrt ,OPSx_sd]:=@op_vsqrtsd; jit_cbs[OPPv,OPsqrt ,OPSx_ss]:=@op_vsqrtss; + jit_cbs[OPPv,OPrsqrt,OPSx_ps]:=@op_vrsqrtps; + jit_cbs[OPPv,OPrsqrt,OPSx_ss]:=@op_vrsqrtss; + jit_cbs[OPPnone,OPvbroadcast,OPSx_ss ]:=@op_vbroadcastss; jit_cbs[OPPnone,OPvbroadcast,OPSx_sd ]:=@op_vbroadcastsd; jit_cbs[OPPnone,OPvbroadcast,OPSx_f128]:=@op_vbroadcastf128; @@ -3099,6 +3380,10 @@ begin jit_cbs[OPPv,OPpackusdw,OPSnone]:=@op_vpackusdw; jit_cbs[OPPv,OPpackuswb,OPSnone]:=@op_vpackuswb; + jit_cbs[OPPv,OPpabs,OPSx_b]:=@op_vpabsb; + jit_cbs[OPPv,OPpabs,OPSx_w]:=@op_vpabsw; + jit_cbs[OPPv,OPpabs,OPSx_d]:=@op_vpabsd; + jit_cbs[OPPv,OPpsra,OPSx_d]:=@op_vpsrad; jit_cbs[OPPv,OPpsll,OPSx_d]:=@op_vpslld; jit_cbs[OPPv,OPpsll,OPSx_q]:=@op_vpsllq;