diff --git a/fpPS4.lpi b/fpPS4.lpi index 5a85b3db..6a627dc3 100644 --- a/fpPS4.lpi +++ b/fpPS4.lpi @@ -1009,6 +1009,22 @@ + + + + + + + + + + + + + + + + diff --git a/rtl/ntapi.pas b/rtl/ntapi.pas index 614413aa..b1ae07ad 100644 --- a/rtl/ntapi.pas +++ b/rtl/ntapi.pas @@ -157,7 +157,7 @@ const INITIAL_MXCSR =$1f80; - CONTEXT_THREAD =CONTEXT_CONTROL or CONTEXT_INTEGER or CONTEXT_SEGMENTS; + CONTEXT_THREAD =CONTEXT_CONTROL or CONTEXT_INTEGER or CONTEXT_SEGMENTS or CONTEXT_FLOATING_POINT; ViewShare=1; ViewUnmap=2; diff --git a/rtl/x86_jit.pas b/rtl/x86_jit.pas index 14f21bb2..3a7e3211 100644 --- a/rtl/x86_jit.pas +++ b/rtl/x86_jit.pas @@ -328,6 +328,8 @@ type function leaj(reg:TRegValue;mem:t_jit_leas;_label_id:t_jit_i_link):t_jit_i_link; function leap(reg:TRegValue):t_jit_i_link; // + Procedure jmp(reg:TRegValue); + Procedure call(reg:TRegValue); Procedure reta; Procedure ud2; // @@ -1502,6 +1504,24 @@ begin LinkLabel(Result.ALink); end; +Procedure t_jit_builder.jmp(reg:TRegValue); +const + desc:t_op_type=(op:$FF;index:4); +begin + Assert(is_reg_size(reg,[os64])); + + _R(desc,reg); +end; + +Procedure t_jit_builder.call(reg:TRegValue); +const + desc:t_op_type=(op:$FF;index:2); +begin + Assert(is_reg_size(reg,[os64])); + + _R(desc,reg); +end; + Procedure t_jit_builder.reta; begin _O($C3); diff --git a/sys/jit/kern_jit.pas b/sys/jit/kern_jit.pas index b1ed9da1..193b3943 100644 --- a/sys/jit/kern_jit.pas +++ b/sys/jit/kern_jit.pas @@ -265,7 +265,8 @@ begin with ctx.builder do begin leap(r15); - call_far(@jit_plt_cache); //input:r14,r15 + call_far(@jit_jmp_plt_cache); //input:r14,r15 out:r14 + jmp(r14); end; end; @@ -274,7 +275,8 @@ begin with ctx.builder do begin leap(r15); - call_far(@jit_plt_cache); //input:r14,r15 + call_far(@jit_jmp_plt_cache); //input:r14,r15 out:r14 + jmp(r14); end; end; @@ -826,7 +828,7 @@ begin with ctx.builder do begin stack:=r_tmp0; - new :=r_tmp0; + new :=r_tmp1; op_load_rbp(ctx,stack); @@ -860,7 +862,7 @@ begin begin stack:=r_tmp0; - new:=new_reg_size(r_tmp0,ctx.din.Operand[1]); + new:=new_reg_size(r_tmp1,ctx.din.Operand[1]); mem_size:=ctx.din.Operand[1].Size; @@ -884,6 +886,7 @@ end; procedure op_pop(var ctx:t_jit_context2); var new,stack:TRegValue; + reload_rsp:Boolean; begin //mov reg,[rsp] //lea rsp,[rsp+len] @@ -896,21 +899,25 @@ begin op_uplift(ctx,os64); //in/out:r14 + reload_rsp:=False; + if is_memory(ctx.din) then begin new:=new_reg_size(r_tmp1,ctx.din.Operand[1]); movq(new,[stack]); - build_lea(ctx,1,r_tmp0); + build_lea(ctx,1,stack,[not_use_r_tmp1]); op_uplift(ctx,os64,[not_use_r_tmp1]); //in/out:r14 - movq([r_tmp0],new); + movq([stack],new); + + reload_rsp:=True; end else if is_preserved(ctx.din) then begin - new:=new_reg_size(r_tmp0,ctx.din.Operand[1]); + new:=new_reg_size(r_tmp1,ctx.din.Operand[1]); movq(new,[stack]); @@ -925,7 +932,10 @@ begin //For transactionality, //first we move the memory, //then we update the register - op_load_rsp(ctx,stack); + if reload_rsp then + begin + op_load_rsp(ctx,stack); + end; leaq(stack,[stack+OPERAND_BYTES[new.ASize]]); op_save_rsp(ctx,stack); end; diff --git a/sys/jit/kern_jit_asm.pas b/sys/jit/kern_jit_asm.pas index 8afe387a..d53a7bad 100644 --- a/sys/jit/kern_jit_asm.pas +++ b/sys/jit/kern_jit_asm.pas @@ -49,7 +49,7 @@ type end; procedure jit_syscall; assembler; -procedure jit_plt_cache; assembler; +procedure jit_jmp_plt_cache; assembler; procedure jit_jmp_dispatch; assembler; procedure jit_jmp_internal; assembler; @@ -416,8 +416,8 @@ asm //tf_r15=tf_r15 end; -//in:r14(addr) r15(plt) -procedure jit_plt_cache; assembler; nostackframe; +//in:r14(addr) r15(plt) out:r14(addr) +procedure jit_jmp_plt_cache; assembler; nostackframe; label _exit; asm @@ -440,13 +440,11 @@ asm popf - //pop internal - lea 8(%rsp),%rsp - //restore rbp movq %rsp,%rbp + leaq 8(%rbp),%rbp - jmp %r14 + ret _exit: @@ -459,7 +457,7 @@ asm jmp jit_jmp_dispatch end; -//in:r14(addr) r15(plt) +//in:r14(addr) r15(plt) out:r14(addr) procedure jit_jmp_dispatch; assembler; nostackframe; asm //prolog (debugger) @@ -484,10 +482,6 @@ asm //epilog movq %rbp,%rsp pop %rbp - - //pop internal - lea 8(%rsp),%rsp - jmp %r14 end; procedure stack_set_user; assembler; nostackframe; @@ -529,9 +523,6 @@ end; procedure jit_jmp_internal; assembler; nostackframe; asm - //push internal call - lea -8(%rsp),%rsp - //prolog (debugger) push %rbp movq %rsp,%rbp @@ -608,8 +599,8 @@ begin (rip<=(QWORD(@jit_jmp_dispatch)+$2C)) //jit_jmp_dispatch func size ) or ( - (rip>=QWORD(@jit_plt_cache)) and - (rip<=(QWORD(@jit_plt_cache)+$33)) //jit_plt_cache func size + (rip>=QWORD(@jit_jmp_plt_cache)) and + (rip<=(QWORD(@jit_jmp_plt_cache)+$33)) //jit_jmp_plt_cache func size ); end; diff --git a/sys/jit/kern_jit_dynamic.pas b/sys/jit/kern_jit_dynamic.pas index 8cbfeb68..15f8e318 100644 --- a/sys/jit/kern_jit_dynamic.pas +++ b/sys/jit/kern_jit_dynamic.pas @@ -252,17 +252,38 @@ var node:p_jit_entry_point; jctx:p_td_jctx; frame:p_jit_frame; - //jit_state:Boolean; begin if (td=nil) then Exit; - //jit_state:=((td^.pcb_flags and PCB_IS_JIT)<>0); - - if not is_guest_addr(td^.td_frame.tf_rip) then + if is_guest_addr(td^.td_frame.tf_rip) then begin - //clear jit flag - td^.pcb_flags:=td^.pcb_flags and (not PCB_IS_JIT); - Exit; //internal? + //host->jit + //jit->jit + end else + begin + if ((td^.pcb_flags and PCB_IS_JIT)<>0) then + begin + //jit->host + + if ((td^.td_pflags and TDP_KTHREAD)<>0) then + begin + //clear jit flag + td^.pcb_flags:=td^.pcb_flags and (not PCB_IS_JIT); + + Exit; //internal? + end else + begin + //forbidden + + Assert(false,'forbidden jump to 0x'+HexStr(td^.td_frame.tf_rip,16)); + end; + + end else + begin + //host->host + + Exit; //internal? + end; end; _start: diff --git a/sys/kern/kern_exec.pas b/sys/kern/kern_exec.pas index 4a8d886d..e748cb1f 100644 --- a/sys/kern/kern_exec.pas +++ b/sys/kern/kern_exec.pas @@ -1813,7 +1813,7 @@ begin if (Result=0) then begin - jit_prepare(0); + jit_prepare(curkthread,0); ipi_sigreturn; Writeln(stderr,'I''m a teapot!'); end; diff --git a/sys/kern/kern_thread.pas b/sys/kern/kern_thread.pas index 212d96b5..89da8ec9 100644 --- a/sys/kern/kern_thread.pas +++ b/sys/kern/kern_thread.pas @@ -353,10 +353,19 @@ begin } end; +const + p__INITIAL_FPUCW__:Word =__INITIAL_FPUCW__; + p__INITIAL_MXCSR__:DWord=__INITIAL_MXCSR__; + procedure before_start(td:p_kthread); begin InitThread(td^.td_ustack.stack-td^.td_ustack.sttop); + asm + fldcw p__INITIAL_FPUCW__(%rip) + ldmxcsr p__INITIAL_MXCSR__(%rip) + end; + if (init_tty_cb<>nil) then begin init_tty_cb(); @@ -373,6 +382,11 @@ type begin InitThread(td^.td_ustack.stack-td^.td_ustack.sttop); + asm + fldcw p__INITIAL_FPUCW__(%rip) + ldmxcsr p__INITIAL_MXCSR__(%rip) + end; + if (init_tty_cb<>nil) then begin init_tty_cb(); @@ -549,6 +563,9 @@ begin // Setup user TLS address and TLS pointer register. cpu_set_fsbase(newtd,tls_base); Writeln('set_fsbase=0x',HexStr(tls_base)); + // + fpuinit(@newtd^.td_fpstate); + newtd^.td_frame.tf_flags:=newtd^.td_frame.tf_flags or TF_HASFPXSTATE; end; //jit wrapper @@ -656,6 +673,9 @@ begin cpu_set_upcall_kse(newtd,func,arg,@stack); + fpuinit(@newtd^.td_fpstate); + newtd^.td_frame.tf_flags:=newtd^.td_frame.tf_flags or TF_HASFPXSTATE; + //jit wrapper switch_to_jit(newtd); //jit wrapper diff --git a/sys/kern/trap.pas b/sys/kern/trap.pas index 2281184d..fa5d235d 100644 --- a/sys/kern/trap.pas +++ b/sys/kern/trap.pas @@ -97,7 +97,7 @@ procedure sig_unlock; procedure fast_syscall; procedure amd64_syscall; -procedure jit_prepare(rip:QWORD); +procedure jit_prepare(td:p_kthread;rip:QWORD); procedure host_sigcode; procedure host_sigipi; @@ -324,15 +324,13 @@ begin cpu_set_syscall_retval(td,error); - jit_prepare(rip); + jit_prepare(td,rip); end; -procedure jit_prepare(rip:QWORD); +procedure jit_prepare(td:p_kthread;rip:QWORD); var - td:p_kthread; is_jit:Boolean; begin - td:=curkthread; if (td=nil) then Exit; is_jit:=((td^.pcb_flags and PCB_IS_JIT)<>0); diff --git a/sys/md/md_context.pas b/sys/md/md_context.pas index 0ef30f51..82f9cad8 100644 --- a/sys/md/md_context.pas +++ b/sys/md/md_context.pas @@ -74,6 +74,16 @@ type _align:QWORD; end; +const + _ucodesel=(8 shl 3) or 3; + _udatasel=(7 shl 3) or 3; + _ufssel =(2 shl 3) or 3; + _ugssel =(3 shl 3) or 3; + + __INITIAL_FPUCW__ =$037F; + __INITIAL_MXCSR__ =$1F80; + __INITIAL_MXCSR_MASK__=$FFBF; + procedure teb_set_kernel(td:p_kthread); procedure teb_set_user (td:p_kthread); @@ -85,6 +95,8 @@ function _get_ctx_flags(src:p_ucontext_t):DWORD; procedure _get_fpcontext(src:PCONTEXT;xstate:Pointer); procedure _set_fpcontext(dst:PCONTEXT;xstate:Pointer); +procedure fpuinit(xstate:Pointer); + procedure _get_frame(src:PCONTEXT;dst:p_trapframe;xstate:Pointer;is_jit:Boolean); procedure _set_frame(dst:PCONTEXT;src:p_trapframe;xstate:Pointer;is_jit:Boolean); @@ -137,12 +149,6 @@ begin //teb stack end; -const - _ucodesel=(8 shl 3) or 3; - _udatasel=(7 shl 3) or 3; - _ufssel =(2 shl 3) or 3; - _ugssel =(3 shl 3) or 3; - function GetEnabledXStateFeatures:QWORD; stdcall external 'kernel32'; function InitializeContext( @@ -271,6 +277,25 @@ begin xs^:=uc_xstate^; end; +procedure fpuinit(xstate:Pointer); +var + uc_xsave :PXmmSaveArea; + uc_xstate:PXSTATE; +begin + if (xstate=nil) then Exit; + + uc_xsave :=PXmmSaveArea(xstate); + uc_xstate:=PXSTATE(uc_xsave+1); + + //uc_xstate^.Mask:= + //uc_xstate^.CompactionMask:= + + uc_xsave^.ControlWord:=__INITIAL_FPUCW__; + //uc_xsave^.StatusWord: WORD; + uc_xsave^.MxCsr :=__INITIAL_MXCSR__; + uc_xsave^.MxCsr_Mask :=__INITIAL_MXCSR_MASK__; +end; + procedure _get_frame(src:PCONTEXT;dst:p_trapframe;xstate:Pointer;is_jit:Boolean); var flags:DWORD; diff --git a/sys/md/md_thread.pas b/sys/md/md_thread.pas index ada6a1aa..aab7200f 100644 --- a/sys/md/md_thread.pas +++ b/sys/md/md_thread.pas @@ -45,6 +45,7 @@ procedure seh_wrapper_after (td:p_kthread;func:Pointer); implementation uses + md_context, vmparam; // @@ -194,7 +195,12 @@ begin Context^.EFlags:=$3000 or EFLAGS_INTERRUPT_MASK; - Context^.MxCsr:=INITIAL_MXCSR; + Context^.MxCsr:=__INITIAL_MXCSR__; + + Context^.FltSave.ControlWord:=__INITIAL_FPUCW__; + //Context^.FltSave.StatusWord: WORD; + Context^.FltSave.MxCsr :=__INITIAL_MXCSR__; + Context^.FltSave.MxCsr_Mask :=__INITIAL_MXCSR_MASK__; Context^.ContextFlags:=CONTEXT_THREAD; end;