diff --git a/rtl/x86_jit.pas b/rtl/x86_jit.pas index fcbac476..de9c31de 100644 --- a/rtl/x86_jit.pas +++ b/rtl/x86_jit.pas @@ -301,6 +301,7 @@ type function jmp (_label_id:t_jit_i_link;size:TOperandSize=os32):t_jit_i_link; function jcc (op:TOpCodeSuffix;_label_id:t_jit_i_link;size:TOperandSize=os32):t_jit_i_link; function loop(op:TOpCodeSuffix;_label_id:t_jit_i_link;size:TAddressSize):t_jit_i_link; + function jcxz(_label_id:t_jit_i_link;size:TAddressSize):t_jit_i_link; function movj(reg:TRegValue;mem:t_jit_leas;_label_id:t_jit_i_link):t_jit_i_link; function leaj(reg:TRegValue;mem:t_jit_leas;_label_id:t_jit_i_link):t_jit_i_link; // @@ -1321,6 +1322,33 @@ begin LinkLabel(Result.ALink); end; +function t_jit_builder.jcxz(_label_id:t_jit_i_link;size:TAddressSize):t_jit_i_link; +var + ji:t_jit_instruction; +begin + ji:=default_jit_instruction; + + if (size=as32) then + begin + ji.EmitByte($67); //Address-size override prefix (32) + end; + + ji.EmitByte($E3); + + ji.ALink.AType :=_label_id.AType; + ji.ALink.ASize :=1; + ji.ALink.AOffset:=ji.ASize; + ji.ALink.ALink :=_label_id.ALink; + + ji.EmitByte(0); + + _add(ji); + + Result.ALink:=TAILQ_LAST(@ACodeChunkCurr^.AInstructions); + Result.AType:=lnkLabelBefore; + LinkLabel(Result.ALink); +end; + function t_jit_builder.movj(reg:TRegValue;mem:t_jit_leas;_label_id:t_jit_i_link):t_jit_i_link; begin movq(reg,mem); diff --git a/sys/test/kern_jit2.pas b/sys/test/kern_jit2.pas index 65bdd814..717e8c6e 100644 --- a/sys/test/kern_jit2.pas +++ b/sys/test/kern_jit2.pas @@ -512,6 +512,49 @@ begin end; end; +procedure op_jcxz(var ctx:t_jit_context2); +var + id1,id2,id3:t_jit_i_link; + ofs:Int64; + dst:Pointer; + link:t_jit_i_link; +begin + ofs:=0; + GetTargetOfs(ctx.din,ctx.code,1,ofs); + + dst:=ctx.ptr_next+ofs; + + id1:=ctx.builder.jcxz(nil_link,ctx.dis.AddressSize); + + if ctx.is_text_addr(QWORD(dst)) and + (not exist_entry(dst)) then + begin + link:=ctx.get_link(dst); + + id2:=ctx.builder.jmp(nil_link,os8); + id1._label:=ctx.builder.get_curr_label.after; + id3:=ctx.builder.jmp(nil_link); + id2._label:=ctx.builder.get_curr_label.after; + + if (link<>nil_link) then + begin + ctx.add_forward_point(fpCall,dst); + end else + begin + ctx.add_forward_point(fpCall,id3,dst); + end; + end else + begin + + id2:=ctx.builder.jmp(nil_link,os8); + id1._label:=ctx.builder.get_curr_label.after; + op_set_rax_imm(ctx,Int64(dst)); + op_jmp_dispatcher(ctx); + id2._label:=ctx.builder.get_curr_label.after; + + end; +end; + const movsx8_desc:t_op_type=(op:$0FBE); movsxd_desc:t_op_type=(op:$63); @@ -611,6 +654,36 @@ begin end; end; +procedure op_leave(var ctx:t_jit_context2); +var + new,stack:TRegValue; +begin + //mov rsp,rbp + //mov rbp,[rsp] + //lea rsp,[rsp+len] + + Assert(ctx.dis.AddressSize=as64,'prefix $67 TODO'); + + with ctx.builder do + begin + stack:=r_tmp0; + new :=r_tmp0; + + op_load_rbp(ctx,stack); + op_save_rsp(ctx,stack); + + call_far(@uplift_jit); //in/out:rax uses:r14 + + movq(new,[stack]); + op_save_rbp(ctx,new); + + op_load_rsp(ctx,stack); + leaq(stack,[stack+OPERAND_BYTES[os64]]); + op_save_rsp(ctx,stack); + end; + +end; + procedure op_popf(var ctx:t_jit_context2); var mem_size:TOperandSize; @@ -833,12 +906,18 @@ begin jit_cbs[OPPnone,OPloop,OPSc_ne]:=@op_loop; jit_cbs[OPPnone,OPloop,OPSc_e ]:=@op_loop; + jit_cbs[OPPnone,OPjcxz ,OPSnone]:=@op_jcxz; + jit_cbs[OPPnone,OPjecxz,OPSnone]:=@op_jcxz; + jit_cbs[OPPnone,OPjrcxz,OPSnone]:=@op_jcxz; + jit_cbs[OPPnone,OPpush,OPSnone]:=@op_push; jit_cbs[OPPnone,OPpop ,OPSnone]:=@op_pop; jit_cbs[OPPnone,OPpushf ,OPSnone]:=@op_pushf; jit_cbs[OPPnone,OPpushf ,OPSx_q ]:=@op_pushf; + jit_cbs[OPPnone,OPleave ,OPSnone]:=@op_leave; + jit_cbs[OPPnone,OPpopf ,OPSnone]:=@op_popf; jit_cbs[OPPnone,OPpopf ,OPSx_q ]:=@op_popf; diff --git a/sys/test/kern_jit2_ctx.pas b/sys/test/kern_jit2_ctx.pas index fd85140a..abe81662 100644 --- a/sys/test/kern_jit2_ctx.pas +++ b/sys/test/kern_jit2_ctx.pas @@ -211,6 +211,9 @@ procedure op_save_rax(var ctx:t_jit_context2;reg:TRegValue); procedure op_load_rsp(var ctx:t_jit_context2;reg:TRegValue); procedure op_save_rsp(var ctx:t_jit_context2;reg:TRegValue); +procedure op_load_rbp(var ctx:t_jit_context2;reg:TRegValue); +procedure op_save_rbp(var ctx:t_jit_context2;reg:TRegValue); + procedure op_load(var ctx:t_jit_context2;reg:TRegValue;opr:Byte); procedure op_save(var ctx:t_jit_context2;opr:Byte;reg:TRegValue); @@ -1341,6 +1344,30 @@ end; // +procedure op_load_rbp(var ctx:t_jit_context2;reg:TRegValue); +var + i:Integer; +begin + with ctx.builder do + begin + i:=GetFrameOffset(rbp); + movq(reg,[r_thrd+i]); + end; +end; + +procedure op_save_rbp(var ctx:t_jit_context2;reg:TRegValue); +var + i:Integer; +begin + with ctx.builder do + begin + i:=GetFrameOffset(rbp); + movq([r_thrd+i],reg); + end; +end; + +// + procedure op_load(var ctx:t_jit_context2;reg:TRegValue;opr:Byte); var i:Integer; @@ -2183,7 +2210,8 @@ begin end; if ((his_ro in desc.hint) or (mem_size<>os32)) and - (not (not_impl in desc.mem_reg.opt)) then + (not (not_impl in desc.mem_reg.opt)) and + (not cmp_reg(ctx.din.Operand[1],ctx.din.Operand[2])) then begin if tmp2_used2 then begin diff --git a/sys/test/kern_jit2_ops.pas b/sys/test/kern_jit2_ops.pas index 4a075640..4d96815d 100644 --- a/sys/test/kern_jit2_ops.pas +++ b/sys/test/kern_jit2_ops.pas @@ -58,12 +58,7 @@ begin link_start:=ctx.builder.get_curr_label.after; //repeat - seto(al); - lahf; - testq(rcx,rcx); - link_jmp0:=jcc(OPSc_z,nil_link,os8); - addi(al,127); - sahf; + link_jmp0:=jcxz(nil_link,ctx.dis.AddressSize); movq(r_tmp0,rsi); call_far(@uplift_jit); //in/out:rax uses:r14 @@ -106,18 +101,11 @@ begin //until jmp(link_start,os8); - //exit1 - addi(al,127); - sahf; + //exit - //exit2 - - link___end:=ctx.builder.get_curr_label.before; //exit1 + link___end:=ctx.builder.get_curr_label.after; //exit link_jmp0._label:=link___end; - - link___end:=link___end.after; //exit2 - link_jmp1._label:=link___end; end; @@ -192,11 +180,7 @@ begin link_start:=ctx.builder.get_curr_label.after; //repeat - - //flags saved in up proc - testq(rcx,rcx); - link_jmp0:=jcc(OPSc_z,nil_link,os8); - //flags saved in up proc + link_jmp0:=jcxz(nil_link,ctx.dis.AddressSize); movq(r_tmp0,rdi); call_far(@uplift_jit); //in/out:rax uses:r14 @@ -288,10 +272,7 @@ begin link_start:=ctx.builder.get_curr_label.after; //repeat - //flags saved in up proc - testq(rcx,rcx); - link_jmp0:=jcc(OPSc_z,nil_link,os8); - //flags saved in up proc + link_jmp0:=jcxz(nil_link,ctx.dis.AddressSize); movq(r_tmp0,rsi); call_far(@uplift_jit); //in/out:rax uses:r14 @@ -608,6 +589,82 @@ end; // +procedure _op_lods(var ctx:t_jit_context2;dflag:Integer); +var + size:TOperandSize; + + new:TRegValue; +begin + //rdi,rsi + + Assert(ctx.dis.AddressSize=as64,'prefix $67 TODO'); + + case ctx.din.OpCode.Suffix of + OPSx_b:size:=os8; + OPSx_w:size:=os16; + OPSx_d:size:=os32; + OPSx_q:size:=os64; + else; + Assert(False); + end; + + //(r_tmp0)rax <-> rdi + //(r_tmp1)r14 <-> rax + with ctx.builder do + begin + + new:=new_reg_size(r_tmp1,size); + + movq(r_tmp0,rdi); + call_far(@uplift_jit); //in/out:rax uses:r14 + + movq(new,[r_tmp0]); + + op_save_rax(ctx,fix_size(new)); + + if (dflag=0) then + begin + leaq(rdi,[rdi+OPERAND_BYTES[size]]); + end else + begin + leaq(rdi,[rdi-OPERAND_BYTES[size]]); + end; + + end; + +end; + +procedure op_lods(var ctx:t_jit_context2); +var + link_jmp0:t_jit_i_link; + link_jmp1:t_jit_i_link; +begin + with ctx.builder do + begin + + //get d flag + pushfq(os64); + bti8([rsp,os64],10); //bt rax, 10 + + link_jmp0:=jcc(OPSc_b,nil_link,os8); + + _op_lods(ctx,0); + + link_jmp1:=jmp(nil_link,os8); + + link_jmp0._label:=ctx.builder.get_curr_label.after; + + _op_lods(ctx,1); + + link_jmp1._label:=ctx.builder.get_curr_label.after; + + popfq(os64); + + end; +end; + +// + const xor_desc:t_op_desc=( mem_reg:(op:$31;index:0); @@ -924,6 +981,51 @@ begin end; end; +// + +const + bsf_desc:t_op_desc=( + mem_reg:(opt:[not_impl]); + reg_mem:(op:$0FBC;index:0); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_wo]; + ); + +procedure op_bsf(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + op_emit2(ctx,bsf_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + bsr_desc:t_op_desc=( + mem_reg:(opt:[not_impl]); + reg_mem:(op:$0FBD;index:0); + reg_imm:(opt:[not_impl]); + reg_im8:(opt:[not_impl]); + hint:[his_wo]; + ); + +procedure op_bsr(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + op_emit2(ctx,bsr_desc); + end else + begin + add_orig(ctx); + end; +end; + + +// + const xchg_desc:t_op_desc=( mem_reg:(op:$87;index:0); @@ -965,14 +1067,6 @@ begin Exit; //skip segment change end; - //mov eax,eax - if (not is_memory(ctx.din.Operand[1])) and - (not is_memory(ctx.din.Operand[2])) then - if cmp_reg(ctx.din.Operand[1],ctx.din.Operand[2]) then - begin - Exit; - end; - if is_segment(ctx.din.Operand[2]) then begin if is_preserved(ctx.din) or is_memory(ctx.din) then @@ -1030,14 +1124,6 @@ procedure op_cmov(var ctx:t_jit_context2); var desc:t_op_desc; begin - //mov eax,eax - if (not is_memory(ctx.din.Operand[1])) and - (not is_memory(ctx.din.Operand[2])) then - if cmp_reg(ctx.din.Operand[1],ctx.din.Operand[2]) then - begin - Exit; - end; - if is_preserved(ctx.din) or is_memory(ctx.din) then begin desc:=cmov_desc; @@ -2189,6 +2275,11 @@ begin jit_cbs[OPPnone,OPstos,OPSx_d]:=@op_stos; jit_cbs[OPPnone,OPstos,OPSx_q]:=@op_stos; + jit_cbs[OPPnone,OPlods,OPSx_b]:=@op_lods; + jit_cbs[OPPnone,OPlods,OPSx_w]:=@op_lods; + jit_cbs[OPPnone,OPlods,OPSx_d]:=@op_lods; + jit_cbs[OPPnone,OPlods,OPSx_q]:=@op_lods; + // jit_cbs[OPPnone,OPxor ,OPSnone]:=@op_xor; @@ -2209,6 +2300,9 @@ begin jit_cbs[OPPnone,OPbts ,OPSnone]:=@op_bts; jit_cbs[OPPnone,OPbtr ,OPSnone]:=@op_btr; + jit_cbs[OPPnone,OPbsf,OPSnone]:=@op_bsf; + jit_cbs[OPPnone,OPbsr,OPSnone]:=@op_bsr; + jit_cbs[OPPnone,OPxchg,OPSnone]:=@op_xchg; jit_cbs[OPPnone,OPmov ,OPSnone]:=@op_mov; diff --git a/sys/test/kern_jit2_ops_avx.pas b/sys/test/kern_jit2_ops_avx.pas index 0e289ef9..10111bde 100644 --- a/sys/test/kern_jit2_ops_avx.pas +++ b/sys/test/kern_jit2_ops_avx.pas @@ -207,6 +207,29 @@ begin end; end; +procedure op_avx3_mri_not_vex_len(var ctx:t_jit_context2); +const + desc:t_op_avx3_imm=( + rmi:(opt:[not_impl]); + mri:(op:0;index:0;simdop:0;mm:0;opt:[not_vex_len]); + ); +var + tmp:t_op_avx3_imm; +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + tmp:=desc; + tmp.mri.op :=ctx.dis.opcode and $FF; + tmp.mri.simdop:=SCODES[ctx.dis.SimdOpcode]; + tmp.mri.mm :=ctx.dis.mm; + + op_emit_avx3_imm8(ctx,tmp); + end else + begin + add_orig(ctx); + end; +end; + // const @@ -836,6 +859,8 @@ begin jit_cbs[OPPv,OPmovh,OPSx_ps]:=@op_vmovhps; jit_cbs[OPPv,OPmovh,OPSx_pd]:=@op_vmovhpd; + jit_cbs[OPPv,OPmovhlps,OPSnone]:=@add_orig; + jit_cbs[OPPv,OPmovlh,OPSx_ps]:=@add_orig; jit_cbs[OPPv,OPmovsldup,OPSnone]:=@op_avx2_reg_mem_mov_wo; @@ -860,6 +885,7 @@ begin jit_cbs[OPPv,OPpmovmskb,OPSnone]:=@op_avx2_rr; jit_cbs[OPPv,OPmaskmov,OPSx_ps]:=@op_avx3_gen; + jit_cbs[OPPv,OPmaskmov,OPSx_pd]:=@op_avx3_gen; jit_cbs[OPPv,OPucomi,OPSx_ss]:=@op_avx2_reg_mem_wo; jit_cbs[OPPv,OPucomi,OPSx_sd]:=@op_avx2_reg_mem_wo; @@ -1032,12 +1058,11 @@ begin jit_cbs[OPPv,OPpextr,OPSx_q]:=@op_avx3_mri; jit_cbs[OPPv,OPpextr,OPSx_w]:=@op_avx3_mri; - jit_cbs[OPPv,OPextract,OPSx_ps]:=@op_avx3_mri; + jit_cbs[OPPv,OPextract,OPSx_ps ]:=@op_avx3_mri; + jit_cbs[OPPv,OPextract,OPSx_f128]:=@op_avx3_mri_not_vex_len; - jit_cbs[OPPv,OPinsert ,OPSx_f128]:=@op_avx3_not_vex_len; jit_cbs[OPPv,OPinsert ,OPSx_ps ]:=@op_avx3_gen; - - jit_cbs[OPPv,OPextract,OPSx_f128]:=@op_avx3_not_vex_len; + jit_cbs[OPPv,OPinsert ,OPSx_f128]:=@op_avx3_not_vex_len; jit_cbs[OPPv,OPround,OPSx_ps]:=@op_avx3_gen; jit_cbs[OPPv,OPround,OPSx_pd]:=@op_avx3_gen; @@ -1045,7 +1070,7 @@ begin jit_cbs[OPPv,OPround,OPSx_sd]:=@op_avx3_gen; jit_cbs[OPPv,OPsqrt ,OPSx_ps]:=@op_avx2_reg_mem_wo; - jit_cbs[OPPv,OPsqrt ,OPSx_ss]:=@op_avx2_reg_mem_wo; + jit_cbs[OPPv,OPsqrt ,OPSx_pd]:=@op_avx2_reg_mem_wo; jit_cbs[OPPv,OPsqrt ,OPSx_sd]:=@op_avx3_gen; jit_cbs[OPPv,OPsqrt ,OPSx_ss]:=@op_avx3_gen; @@ -1130,6 +1155,9 @@ begin jit_cbs[OPPv,OPpavg,OPSx_b]:=@op_avx3_gen; jit_cbs[OPPv,OPpavg,OPSx_w]:=@op_avx3_gen; + jit_cbs[OPPv,OPaeskeygenassist,OPSnone]:=@op_avx3_rmi; + jit_cbs[OPPv,OPaesimc ,OPSnone]:=@op_avx2_reg_mem_wo; + end; initialization diff --git a/sys/test/kern_jit2_ops_sse.pas b/sys/test/kern_jit2_ops_sse.pas index ba7dd7ed..bad3137e 100644 --- a/sys/test/kern_jit2_ops_sse.pas +++ b/sys/test/kern_jit2_ops_sse.pas @@ -133,6 +133,17 @@ begin end; end; +procedure op_reg_mem_mov_wo(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + op_emit2_simd_reg_mem(ctx,[his_mov,his_wo]); + end else + begin + add_orig(ctx); + end; +end; + // const @@ -389,6 +400,19 @@ begin jit_cbs[OPPnone,OPmova,OPSx_ps]:=@op_movaps; jit_cbs[OPPnone,OPmova,OPSx_pd]:=@op_movapd; + jit_cbs[OPPnone,OPmovl,OPSx_ps]:=@op_reg_mem_mov_wo; + jit_cbs[OPPnone,OPmovl,OPSx_pd]:=@op_reg_mem_mov_wo; + + jit_cbs[OPPnone,OPmovh,OPSx_ps]:=@op_reg_mem_mov_wo; + jit_cbs[OPPnone,OPmovh,OPSx_pd]:=@op_reg_mem_mov_wo; + + jit_cbs[OPPnone,OPmovhlps,OPSnone]:=@add_orig; + + jit_cbs[OPPnone,OPmovlh,OPSx_ps]:=@add_orig; + + jit_cbs[OPPnone,OPmovsldup,OPSnone]:=@op_reg_mem_mov_wo; + jit_cbs[OPPnone,OPmovshdup,OPSnone]:=@op_reg_mem_mov_wo; + jit_cbs[OPPnone,OPmovnt,OPSx_dqa]:=@op_movntdqa; jit_cbs[OPPnone,OPmovnt,OPSx_dq ]:=@op_mem_reg_mov_wo; jit_cbs[OPPnone,OPmovnt,OPSx_i ]:=@op_mem_reg_mov_wo; @@ -456,9 +480,14 @@ begin jit_cbs[OPPnone,OPcvtsd2,OPSx_si]:=@op_reg_mem_wo; jit_cbs[OPPnone,OPcvtss2,OPSx_si]:=@op_reg_mem_wo; + jit_cbs[OPPnone,OPsqrt,OPSx_ps]:=@op_reg_mem_wo; + jit_cbs[OPPnone,OPsqrt,OPSx_pd]:=@op_reg_mem_wo; jit_cbs[OPPnone,OPsqrt,OPSx_sd]:=@op_reg_mem_wo; jit_cbs[OPPnone,OPsqrt,OPSx_ss]:=@op_reg_mem_wo; + jit_cbs[OPPnone,OPrsqrt,OPSx_ps]:=@op_reg_mem_wo; + jit_cbs[OPPnone,OPrsqrt,OPSx_ss]:=@op_reg_mem_wo; + jit_cbs[OPPnone,OPrcp ,OPSx_ps]:=@op_reg_mem_wo; jit_cbs[OPPnone,OPrcp ,OPSx_ss]:=@op_reg_mem_wo;