diff --git a/rtl/x86_jit.pas b/rtl/x86_jit.pas index 58921734..6218d922 100644 --- a/rtl/x86_jit.pas +++ b/rtl/x86_jit.pas @@ -289,8 +289,8 @@ type Function _add_data(P:Pointer):p_jit_data; Function _get_data_offset(ALink:p_jit_data;AInstructionEnd:Integer):Integer; // - Procedure call_far(P:Pointer); - Procedure jmp_far (P:Pointer); + function call_far(P:Pointer):t_jit_i_link; + function jmp_far (P:Pointer):t_jit_i_link; // function call(_label_id:t_jit_i_link):t_jit_i_link; function jmp (_label_id:t_jit_i_link;size:TOperandSize=os32):t_jit_i_link; @@ -1086,7 +1086,7 @@ begin Result:=((ALink^.AInstructionOffset+ALink^.ASize)-AInstructionEnd); end; -Procedure t_jit_builder.call_far(P:Pointer); +Function t_jit_builder.call_far(P:Pointer):t_jit_i_link; var ji:t_jit_instruction; begin @@ -1103,9 +1103,13 @@ begin ji.EmitInt32(0); _add(ji); + + Result.ALink:=TAILQ_LAST(@ACodeChunkCurr^.AInstructions); + Result.AType:=lnkLabelBefore; + LinkLabel(Result.ALink); end; -Procedure t_jit_builder.jmp_far(P:Pointer); +Function t_jit_builder.jmp_far(P:Pointer):t_jit_i_link; var ji:t_jit_instruction; begin @@ -1122,6 +1126,10 @@ begin ji.EmitInt32(0); _add(ji); + + Result.ALink:=TAILQ_LAST(@ACodeChunkCurr^.AInstructions); + Result.AType:=lnkLabelBefore; + LinkLabel(Result.ALink); end; function t_jit_builder.call(_label_id:t_jit_i_link):t_jit_i_link; @@ -1336,10 +1344,17 @@ Procedure LinkLabel(node:p_jit_instruction); var d:Integer; begin + //Pre-linking, for debugging only d:=0; if (node=nil) then Exit; if (node^.ALink.ALink=nil) then Exit; case node^.ALink.AType of + lnkData: + With node^ do + begin + d:=(p_jit_data(ALink.ALink)^.pId*SizeOf(Pointer)); + _set_data(node,d); + end; lnkLabelBefore: With node^ do begin @@ -4102,7 +4117,6 @@ var ji:t_jit_instruction; begin Assert(not (not_impl in desc.opt)); - Assert(desc.mm=3); Assert(is_reg_size(reg0,[os8,os16,os32,os64,os128,os256])); Assert(is_reg_type(reg0,[regGeneral,regXmm])); @@ -4140,7 +4154,7 @@ begin ji.EmitByte($C4); //VEX3 - ji.EmitRXBm(modrm_info.rexB,modrm_info.rexX,modrm_info.rexR,3); + ji.EmitRXBm(modrm_info.rexB,modrm_info.rexX,modrm_info.rexR,desc.mm); ji.EmitWvvv(Vex.rexW,0,Vex.Length,desc.index); ji.EmitByte(desc.op); @@ -4166,7 +4180,6 @@ var ji:t_jit_instruction; begin Assert(not (not_impl in desc.opt)); - Assert(desc.mm=3); Assert(is_reg_size(reg,[os8,os16,os32,os64,os128,os256])); Assert(is_reg_type(reg,[regXmm])); @@ -4218,7 +4231,7 @@ begin ji.EmitByte($C4); //VEX3 - ji.EmitRXBm(modrm_info.rexB,modrm_info.rexX,modrm_info.rexR,3); + ji.EmitRXBm(modrm_info.rexB,modrm_info.rexX,modrm_info.rexR,desc.mm); ji.EmitWvvv(Vex.rexW,0,Vex.Length,desc.index); ji.EmitByte(desc.op); diff --git a/sys/test/kern_jit2.pas b/sys/test/kern_jit2.pas index a298c918..95d3a343 100644 --- a/sys/test/kern_jit2.pas +++ b/sys/test/kern_jit2.pas @@ -294,10 +294,82 @@ begin //Assert(False); end; -procedure jit_cpuid; -begin - Writeln('TODO:jit_cpuid'); - Assert(False); +//0x0 +//0x1 +//0x4 +//0x6 +//0xb + +//0x40000000 +//0x40000010 + +//0x80000001 +//0x80000002 +//0x80000004 +//0x80000005 +//0x80000006 +//0x80000008 + +//0xc0000000 +//0xc0000001 +procedure jit_cpuid; assembler; nostackframe; +label + _cpuid_0, + _cpuid_1; +asm + pushf + + mov jit_frame.tf_rax(%r15),%rax + + cmp $0,%eax + je _cpuid_0 + + cmp $1,%eax + je _cpuid_1 + + ud2 + + _cpuid_0: + + //cpu_high TODO check + mov $0xF,%eax + + //cpu_vendor + mov $0x68747541,%ebx + mov $0x69746E65,%edx + mov $0x444D4163,%ecx + + mov %rax,jit_frame.tf_rax(%r15) + popf + ret + + _cpuid_1: + + //get host + cpuid + + //if ((cpu_id & 0xffffff80) == 0x740f00) then + //if "machdep.bootparams.base_ps4_mode" then sceKernelHasNeoMode + + //if ((cpu_id & 0xffffff80) == 0x740f00) then sceKernelIsAuthenticNeo + + mov $0x00710f13,%eax //cpu_id + mov $0x178bfbff,%edx //cpu_feature + mov $0x36d8220b,%ecx //cpu_feature2 + +//CPUID_BRAND_INDEX 0x000000ff +//CPUID_CLFUSH_SIZE 0x0000ff00 +//CPUID_HTT_CORES 0x00ff0000 //sceKernelGetCurrentCpu 0..7 +//CPUID_LOCAL_APIC_ID 0xff000000 + + and $0xFF070000,%ebx //filter CPUID_LOCAL_APIC_ID|CPUID_HTT_CORES + + or $0x00000800,%ebx //cpu_procinfo + + mov %rax,jit_frame.tf_rax(%r15) + popf + ret + end; procedure op_jmp_dispatcher(var ctx:t_jit_context2); @@ -413,10 +485,7 @@ begin //imm offset ofs:=0; - if not GetTargetOfs(ctx.din,ctx.code,1,ofs) then - begin - Assert(false); - end; + GetTargetOfs(ctx.din,ctx.code,1,ofs); dst:=ctx.ptr_next+ofs; @@ -428,6 +497,7 @@ begin if (link<>nil_link) then begin ctx.builder.jmp(link); + ctx.add_forward_point(nil_link,dst); end else begin id:=ctx.builder.jmp(nil_link); @@ -503,10 +573,7 @@ begin if (ctx.din.Operand[1].RegValue[0].AType=regNone) then begin ofs:=0; - if not GetTargetOfs(ctx.din,ctx.code,1,ofs) then - begin - Assert(false); - end; + GetTargetOfs(ctx.din,ctx.code,1,ofs); dst:=ctx.ptr_next+ofs; @@ -518,6 +585,7 @@ begin if (link<>nil_link) then begin ctx.builder.jmp(link); + ctx.add_forward_point(nil_link,dst); end else begin id:=ctx.builder.jmp(nil_link); @@ -572,10 +640,7 @@ var link:t_jit_i_link; begin ofs:=0; - if not GetTargetOfs(ctx.din,ctx.code,1,ofs) then - begin - Assert(false); - end; + GetTargetOfs(ctx.din,ctx.code,1,ofs); dst:=ctx.ptr_next+ofs; @@ -587,6 +652,7 @@ begin if (link<>nil_link) then begin ctx.builder.jcc(ctx.din.OpCode.Suffix,link); + ctx.add_forward_point(nil_link,dst); end else begin id:=ctx.builder.jcc(ctx.din.OpCode.Suffix,nil_link); @@ -843,8 +909,9 @@ end; const test_desc:t_op_type=(op:$85;index:0); + bt_desc_imm:t_op_type=(op:$0FBA;index:4); -procedure op_rep_cmps(var ctx:t_jit_context2); +procedure _op_rep_cmps(var ctx:t_jit_context2;dflag:Integer); var op:DWORD; size:TOperandSize; @@ -907,10 +974,15 @@ begin leaq(rcx,[rcx-1]); - - - leaq(rdi,[rdi+OPERAND_BYTES[size]]); - leaq(rsi,[rsi+OPERAND_BYTES[size]]); + if (dflag=0) then + begin + leaq(rdi,[rdi+OPERAND_BYTES[size]]); + leaq(rsi,[rsi+OPERAND_BYTES[size]]); + end else + begin + leaq(rdi,[rdi-OPERAND_BYTES[size]]); + leaq(rsi,[rsi-OPERAND_BYTES[size]]); + end; if (ifPrefixRepE in ctx.din.Flags) then begin @@ -943,9 +1015,38 @@ begin end; +procedure op_rep_cmps(var ctx:t_jit_context2); +var + link_jmp0:t_jit_i_link; + link_jmp1:t_jit_i_link; +begin + with ctx.builder do + begin + + //get d flag + pushfq(os64); + _MI8(bt_desc_imm,os64,[rsp],10); //bt rax, 10 + + link_jmp0:=jcc(OPSc_b,nil_link,os8); + + popfq(os64); + _op_rep_cmps(ctx,0); + + link_jmp1:=jmp(nil_link,os8); + + link_jmp0._label:=ctx.builder.get_curr_label.after; + + popfq(os64); + _op_rep_cmps(ctx,1); + + link_jmp1._label:=ctx.builder.get_curr_label.after; + + end; +end; + /// -procedure op_rep_stos(var ctx:t_jit_context2); +procedure _op_rep_stos(var ctx:t_jit_context2;dflag:Integer); var i:Integer; size:TOperandSize; @@ -997,7 +1098,14 @@ begin movq([r_tmp0],new); leaq(rcx,[rcx-1]); - leaq(rdi,[rdi+OPERAND_BYTES[size]]); + + if (dflag=0) then + begin + leaq(rdi,[rdi+OPERAND_BYTES[size]]); + end else + begin + leaq(rdi,[rdi-OPERAND_BYTES[size]]); + end; //until jmp(link_start,os8); @@ -1014,6 +1122,35 @@ begin end; +procedure op_rep_stos(var ctx:t_jit_context2); +var + link_jmp0:t_jit_i_link; + link_jmp1:t_jit_i_link; +begin + with ctx.builder do + begin + + //get d flag + pushfq(os64); + _MI8(bt_desc_imm,os64,[rsp],10); //bt rax, 10 + + link_jmp0:=jcc(OPSc_b,nil_link,os8); + + popfq(os64); + _op_rep_stos(ctx,0); + + link_jmp1:=jmp(nil_link,os8); + + link_jmp0._label:=ctx.builder.get_curr_label.after; + + popfq(os64); + _op_rep_stos(ctx,1); + + link_jmp1._label:=ctx.builder.get_curr_label.after; + + end; +end; + procedure init_cbs; begin jit_cbs[OPPnone,OPcall,OPSnone]:=@op_call; diff --git a/sys/test/kern_jit2_ops.pas b/sys/test/kern_jit2_ops.pas index 854e12d6..bbef88ea 100644 --- a/sys/test/kern_jit2_ops.pas +++ b/sys/test/kern_jit2_ops.pas @@ -273,13 +273,33 @@ begin end; end; +const + btc_desc:t_op_desc=( + mem_reg:(op:$0FBB;index:0); + reg_mem:(opt:[not_impl]); + reg_imm:(opt:[not_impl]); + reg_im8:(op:$0FBA;index:7); + hint:[his_rw]; + ); + +procedure op_btc(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + op_emit2(ctx,btc_desc); + end else + begin + add_orig(ctx); + end; +end; + const bts_desc:t_op_desc=( mem_reg:(op:$0FAB;index:0); reg_mem:(opt:[not_impl]); reg_imm:(opt:[not_impl]); reg_im8:(op:$0FBA;index:5); - hint:[his_ro]; + hint:[his_rw]; ); procedure op_bts(var ctx:t_jit_context2); @@ -293,6 +313,25 @@ begin end; end; +const + btr_desc:t_op_desc=( + mem_reg:(op:$0FB3;index:0); + reg_mem:(opt:[not_impl]); + reg_imm:(opt:[not_impl]); + reg_im8:(op:$0FBA;index:6); + hint:[his_rw]; + ); + +procedure op_btr(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + op_emit2(ctx,btr_desc); + end else + begin + add_orig(ctx); + end; +end; const xchg_desc:t_op_desc=( @@ -1970,7 +2009,9 @@ begin jit_cbs[OPPnone,OPdiv ,OPSnone]:=@op_div; jit_cbs[OPPnone,OPbt ,OPSnone]:=@op_bt; + jit_cbs[OPPnone,OPbtc ,OPSnone]:=@op_btc; jit_cbs[OPPnone,OPbts ,OPSnone]:=@op_bts; + jit_cbs[OPPnone,OPbtr ,OPSnone]:=@op_btr; jit_cbs[OPPnone,OPxchg,OPSnone]:=@op_xchg; diff --git a/sys/test/kern_jit2_ops_avx.pas b/sys/test/kern_jit2_ops_avx.pas index 8e05f5eb..af305f4f 100644 --- a/sys/test/kern_jit2_ops_avx.pas +++ b/sys/test/kern_jit2_ops_avx.pas @@ -1718,21 +1718,7 @@ begin end; end; -const - vpshufd_desc:t_op_type=( - op:$70;index:1;mm:1; - ); - -procedure op_vpshufd(var ctx:t_jit_context2); -begin - if is_memory(ctx.din) then - begin - op_emit_avx3(ctx,vpshufd_desc); - end else - begin - add_orig(ctx); - end; -end; +// const vpshufb_desc:t_op_type=( @@ -1750,6 +1736,59 @@ begin end; end; +const + vpshufd_desc:t_op_avx3_imm=( + rmi:(op:$70;index:1;mm:1); + mri:(opt:[not_impl]); + ); + +procedure op_vpshufd(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx3_imm8(ctx,vpshufd_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vpshufhw_desc:t_op_avx3_imm=( + rmi:(op:$70;index:2;mm:1); + mri:(opt:[not_impl]); + ); + +procedure op_vpshufhw(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx3_imm8(ctx,vpshufhw_desc); + end else + begin + add_orig(ctx); + end; +end; + +const + vpshuflw_desc:t_op_avx3_imm=( + rmi:(op:$70;index:3;mm:1); + mri:(opt:[not_impl]); + ); + +procedure op_vpshuflw(var ctx:t_jit_context2); +begin + if is_memory(ctx.din) then + begin + op_emit_avx3_imm8(ctx,vpshuflw_desc); + end else + begin + add_orig(ctx); + end; +end; + +// + const vpermilps_rrm_desc:t_op_type=( op:$0C;index:1;mm:2; @@ -2586,6 +2625,24 @@ begin end; end; +// + +const + vpinsrb_desc:t_op_type=( + op:$20;index:1;mm:3 + ); + +procedure op_vpinsrb(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + op_emit_avx3(ctx,vpinsrb_desc); + end else + begin + add_orig(ctx); + end; +end; + const vpinsrd_desc:t_op_type=( op:$22;index:1;mm:3 @@ -2593,7 +2650,7 @@ const procedure op_vpinsrd(var ctx:t_jit_context2); //vpinsrq begin - if is_memory(ctx.din) then + if is_preserved(ctx.din) or is_memory(ctx.din) then begin op_emit_avx3(ctx,vpinsrd_desc); end else @@ -2602,6 +2659,24 @@ begin end; end; +const + vpinsrw_desc:t_op_type=( + op:$C4;index:1;mm:1 + ); + +procedure op_vpinsrw(var ctx:t_jit_context2); +begin + if is_preserved(ctx.din) or is_memory(ctx.din) then + begin + op_emit_avx3(ctx,vpinsrw_desc); + end else + begin + add_orig(ctx); + end; +end; + +// + const vpackusdw_desc:t_op_type=( op:$2B;index:1;mm:2 @@ -2910,8 +2985,10 @@ begin jit_cbs[OPPv,OPcmp ,OPSx_sd]:=@op_vcmpsd; jit_cbs[OPPv,OPcmp ,OPSx_ss]:=@op_vcmpss; - jit_cbs[OPPv,OPpshuf ,OPSx_d ]:=@op_vpshufd; jit_cbs[OPPv,OPpshuf ,OPSx_b ]:=@op_vpshufb; + jit_cbs[OPPv,OPpshuf ,OPSx_d ]:=@op_vpshufd; + jit_cbs[OPPv,OPpshuf ,OPSx_hw]:=@op_vpshufhw; + jit_cbs[OPPv,OPpshuf ,OPSx_lw]:=@op_vpshuflw; jit_cbs[OPPnone,OPvpermil,OPSx_ps]:=@op_vpermilps; jit_cbs[OPPnone,OPvpermil,OPSx_pd]:=@op_vpermilpd; @@ -2978,8 +3055,10 @@ begin jit_cbs[OPPnone,OPvbroadcast,OPSx_sd ]:=@op_vbroadcastsd; jit_cbs[OPPnone,OPvbroadcast,OPSx_f128]:=@op_vbroadcastf128; + jit_cbs[OPPv,OPpinsr,OPSx_b]:=@op_vpinsrb; jit_cbs[OPPv,OPpinsr,OPSx_d]:=@op_vpinsrd; jit_cbs[OPPv,OPpinsr,OPSx_q]:=@op_vpinsrd; + jit_cbs[OPPv,OPpinsr,OPSx_w]:=@op_vpinsrw; jit_cbs[OPPv,OPpackusdw,OPSnone]:=@op_vpackusdw; jit_cbs[OPPv,OPpackuswb,OPSnone]:=@op_vpackuswb;