diff --git a/rtl/x86_jit.pas b/rtl/x86_jit.pas index feb6bec6..63e7513d 100644 --- a/rtl/x86_jit.pas +++ b/rtl/x86_jit.pas @@ -320,6 +320,7 @@ type Function GetInstructionsSize:Integer; Function GetDataSize:Integer; Function GetPltSize:Integer; + Function GetPltStart:Integer; Function GetMemSize:Integer; Procedure RebuldChunkList; Procedure RebuldInstructionOffset; @@ -1455,6 +1456,11 @@ begin Result:=APltCount*SizeOf(t_jit_plt); end; +Function t_jit_builder.GetPltStart:Integer; +begin + Result:=AInstructionSize+GetDataSize; +end; + Function t_jit_builder.GetMemSize:Integer; begin Result:=AInstructionSize+GetDataSize+GetPltSize; diff --git a/sys/jit/kern_jit.pas b/sys/jit/kern_jit.pas index 0c0b7bf5..3a99c434 100644 --- a/sys/jit/kern_jit.pas +++ b/sys/jit/kern_jit.pas @@ -180,7 +180,7 @@ begin with ctx.builder do begin leap(r15); - call_far(@jit_jmp_dispatch); //input:r14 + call_far(@jit_plt_cache); //input:r14,r15 end; end; @@ -189,7 +189,7 @@ begin with ctx.builder do begin leap(r15); - call_far(@jit_call_dispatch); //input:r14 + call_far(@jit_plt_cache); //input:r14,r15 end; end; diff --git a/sys/jit/kern_jit_asm.pas b/sys/jit/kern_jit_asm.pas index 7fd5dad3..6fdb9244 100644 --- a/sys/jit/kern_jit_asm.pas +++ b/sys/jit/kern_jit_asm.pas @@ -45,11 +45,6 @@ type blk:Pointer; end; - t_jctx_asm=object - frame:p_jit_frame; - cache:p_jplt_cache_asm; - end; - procedure uplift_jit; assembler; procedure copyout_mov_1; assembler; @@ -75,10 +70,9 @@ procedure copyin_mov_64; assembler; procedure copyin_mov_512; assembler; procedure jit_syscall; assembler; +procedure jit_plt_cache; assembler; procedure jit_jmp_dispatch; assembler; -procedure jit_call_dispatch; assembler; -procedure jit_call_internal; assembler; procedure jit_jmp_internal; assembler; function IS_JIT_FUNC(rip:qword):Boolean; @@ -126,7 +120,7 @@ uses // -function jmp_dispatcher(addr,plt:Pointer;is_call:Boolean):Pointer; external; +function jmp_dispatcher(addr,plt:Pointer):Pointer; external; // @@ -737,18 +731,15 @@ asm push %r15 movq (%r15),%r15 //plt^ - test %r15,%r15 - jz _exit - cmpq t_jplt_cache_asm.src(%r15),%r14 jne _exit - //get jctx - movqq - kthread.td_frame.tf_r13 + kthread.td_jctx(%r13), %r14 + //get blk + movq t_jplt_cache_asm.blk(%r15),%r14 - //set cache - movqq %r15,t_jctx_asm.cache(%r14) + //save current block + movqq %r14, - kthread.td_frame.tf_r13 + kthread.td_jctx.block(%r13) //get dst movq t_jplt_cache_asm.dst(%r15),%r14 @@ -757,19 +748,19 @@ asm popf //pop internal - lea 16(%rsp),%rsp + lea 8(%rsp),%rsp jmp %r14 _exit: pop %r15 popf + + jmp jit_jmp_dispatch end; //in:r14(addr) r15(plt) procedure jit_jmp_dispatch; assembler; nostackframe; asm - call jit_plt_cache - //prolog (debugger) push %rbp movq %rsp,%rbp @@ -778,43 +769,9 @@ asm call jit_save_ctx - //rdi, rsi, rdx + //rdi, rsi mov %r14,%rdi mov %r15,%rsi - mov $0,%rdx - - call jmp_dispatcher - - mov %rax,%r14 - - call jit_load_ctx - - //epilog - movq %rbp,%rsp - pop %rbp - - //pop internal - lea 8(%rsp),%rsp - jmp %r14 -end; - -//in:r14(addr) r15(plt) -procedure jit_call_dispatch; assembler; nostackframe; -asm - call jit_plt_cache - - //prolog (debugger) - push %rbp - movq %rsp,%rbp - - andq $-16,%rsp //align stack - - call jit_save_ctx - - //rdi, rsi, rdx - mov %r14,%rdi - mov %r15,%rsi - mov $1,%rdx call jmp_dispatcher @@ -866,44 +823,6 @@ asm //uplift %rsp/%rbp ??? end; -procedure jit_call_internal; assembler; nostackframe; -asm - //pop host call - mov jit_frame.tf_rsp(%r13),%r14 - lea 8(%r14),%r14 - mov %r14,jit_frame.tf_rsp(%r13) - - //push internal call - lea -8(%rsp),%rsp - - //prolog (debugger) - push %rbp - movq %rsp,%rbp - - //set - //%r13 ABI preserve the registers - //%r14 ABI preserve the registers - //%r15 ABI preserve the registers - - //call stack_set_user - - //call - call %gs:teb.jitcall - - //restore guard - movq %gs:teb.thread ,%r13 //curkthread - leaq kthread.td_frame.tf_r13(%r13),%r13 //jit_frame - - //call stack_set_jit - - //%r13 ABI preserve the registers - //%r14 ABI preserve the registers - //%r15 ABI preserve the registers - - //epilog - pop %rbp -end; - procedure jit_jmp_internal; assembler; nostackframe; asm //push internal call @@ -931,12 +850,24 @@ asm pop %rbp //pop host call + mov jit_frame.tf_rsp(%r13),%r14 + + pushfq // + call uplift_jit_notsafe + popfq // + + //get addr + mov (%r14),%r14 + + //lea rsp,[rsp+8] mov jit_frame.tf_rsp(%r13),%r15 - mov (%r15),%r14 lea 8(%r15),%r15 mov %r15,jit_frame.tf_rsp(%r13) - jmp jit_call_dispatch + //set zero plt + mov $0, %r15 + + jmp jit_jmp_dispatch end; function IS_JIT_FUNC(rip:qword):Boolean; public; @@ -947,11 +878,11 @@ begin ) or ( (rip>=QWORD(@jit_jmp_dispatch)) and - (rip<=(QWORD(@jit_jmp_dispatch)+$30)) //jit_jmp_dispatch func size + (rip<=(QWORD(@jit_jmp_dispatch)+$2C)) //jit_jmp_dispatch func size ) or ( - (rip>=QWORD(@jit_call_dispatch)) and - (rip<=(QWORD(@jit_call_dispatch)+$30)) //jit_call_dispatch func size + (rip>=QWORD(@jit_plt_cache)) and + (rip<=(QWORD(@jit_plt_cache)+$33)) //jit_plt_cache func size ); end; diff --git a/sys/jit/kern_jit_dynamic.pas b/sys/jit/kern_jit_dynamic.pas index 984ea7c7..850eb03b 100644 --- a/sys/jit/kern_jit_dynamic.pas +++ b/sys/jit/kern_jit_dynamic.pas @@ -85,6 +85,11 @@ type base:Pointer; size:ptruint; + plta:p_jit_plt; + pltc:ptruint; + + plt_stub:t_jplt_cache_asm; + lock:Pointer; refs:Integer; @@ -92,6 +97,7 @@ type procedure dec_ref; procedure Free; function add_entry_point(src,dst:Pointer):p_entry_point; + procedure init_plt; function add_plt_cache(plt:p_jit_plt;src,dst:Pointer;blk:p_jit_dynamic):p_jplt_cache; function new_chunk(count:QWORD):p_jcode_chunk; procedure alloc_base(_size:ptruint); @@ -105,13 +111,6 @@ type procedure detach; end; - p_jctx=^t_jctx; - t_jctx=object(t_jctx_asm) - pstub:t_jit_dynamic.t_jplt_cache; - procedure free_stub; - procedure make_stub(src,dst:Pointer;blk:p_jit_dynamic); - end; - function new_blob(_size:ptruint):p_jit_dynamic; var @@ -129,7 +128,7 @@ function preload_entry(addr:Pointer):t_jit_dynamic.p_entry_point; procedure jit_ctx_free(td:p_kthread); procedure switch_to_jit(td:p_kthread); -function jmp_dispatcher(addr:Pointer;plt:p_jit_plt;is_call:Boolean):Pointer; +function jmp_dispatcher(addr:Pointer;plt:p_jit_plt):Pointer; procedure build(var ctx:t_jit_context2); @@ -149,11 +148,6 @@ procedure pick(var ctx:t_jit_context2); external name 'kern_jit_pick'; // -var - size_of_jctx:Integer=SizeOf(t_jctx); public; - -// - function scan_up_exc(addr:QWORD):QWORD; begin addr:=(addr+PAGE_MASK) and (not PAGE_MASK); @@ -217,36 +211,9 @@ begin end; end; -procedure t_jctx.free_stub; -begin - cache:=nil; - if (pstub.blk<>nil) then - begin - p_jit_dynamic(pstub.blk)^.dec_ref; - pstub.blk:=nil; - end; -end; - -procedure t_jctx.make_stub(src,dst:Pointer;blk:p_jit_dynamic); -begin - free_stub; - // - pstub.src:=src; - pstub.dst:=dst; - pstub.blk:=blk; - // - blk^.inc_ref; - // - cache:=@pstub; -end; - procedure jit_ctx_free(td:p_kthread); public; -var - jctx:p_jctx; begin - jctx:=td^.td_jctx; - - jctx^.free_stub; + td^.td_jctx.block:=nil; end; procedure switch_to_jit(td:p_kthread); public; @@ -254,7 +221,8 @@ label _start; var node:t_jit_dynamic.p_entry_point; - jctx:p_jctx; + jctx:p_td_jctx; + frame:p_jit_frame; //jit_state:Boolean; begin if (td=nil) then Exit; @@ -278,27 +246,24 @@ begin goto _start; end; - jctx:=td^.td_jctx; + jctx:=@td^.td_jctx; - if (jctx^.frame=nil) then - begin - jctx^.frame:=@td^.td_frame.tf_r13; - end; + frame:=@td^.td_frame.tf_r13; - jctx^.make_stub(node^.src,node^.dst,node^.blob); + jctx^.block:=node^.blob; //tf_r14 not need to move //tf_r15 not need to move - jctx^.frame^.tf_r13:=td^.td_frame.tf_r13; - jctx^.frame^.tf_rsp:=td^.td_frame.tf_rsp; - jctx^.frame^.tf_rbp:=td^.td_frame.tf_rbp; + frame^.tf_r13:=td^.td_frame.tf_r13; + frame^.tf_rsp:=td^.td_frame.tf_rsp; + frame^.tf_rbp:=td^.td_frame.tf_rbp; td^.td_frame.tf_rsp:=QWORD(td^.td_kstack.stack); td^.td_frame.tf_rbp:=QWORD(td^.td_kstack.stack); td^.td_frame.tf_rip:=QWORD(node^.dst); - td^.td_frame.tf_r13:=QWORD(jctx^.frame); + td^.td_frame.tf_r13:=QWORD(frame); set_pcb_flags(td,PCB_FULL_IRET or PCB_IS_JIT); @@ -407,14 +372,15 @@ begin end; end; -function jmp_dispatcher(addr:Pointer;plt:p_jit_plt;is_call:Boolean):Pointer; public; +function jmp_dispatcher(addr:Pointer;plt:p_jit_plt):Pointer; public; label _start; var td:p_kthread; node:t_jit_dynamic.p_entry_point; - jctx:p_jctx; + jctx:p_td_jctx; curr:p_jit_dynamic; + cache:t_jit_dynamic.p_jplt_cache; begin td:=curkthread; if (td=nil) then Exit(nil); @@ -425,16 +391,8 @@ begin begin //switch to internal - if is_call then - begin - td^.td_teb^.jitcall:=addr; - Exit(@jit_call_internal); - end else - begin - td^.td_teb^.jitcall:=addr; - Exit(@jit_jmp_internal); - end; - + td^.td_teb^.jitcall:=addr; + Exit(@jit_jmp_internal); end; _start: @@ -453,23 +411,21 @@ begin goto _start; end; - jctx:=td^.td_jctx; + jctx:=@td^.td_jctx; - curr:=nil; - if (jctx^.cache<>nil) then - begin - curr:=jctx^.cache^.blk; - end; + curr:=jctx^.block; - if (curr=nil) then + if (curr=nil) or (plt=nil) then begin - jctx^.make_stub(node^.src,node^.dst,node^.blob); + jctx^.block:=node^.blob; end else begin - jctx^.cache:=curr^.add_plt_cache(plt,node^.src,node^.dst,node^.blob); + cache:=curr^.add_plt_cache(plt,node^.src,node^.dst,node^.blob); + + jctx^.block:=node^.blob; //one element plt cache - System.InterlockedExchange(plt^.cache,jctx^.cache); + System.InterlockedExchange(plt^.cache,cache); end; Result:=node^.dst; @@ -616,6 +572,11 @@ begin ctx.builder.SaveTo(blob^.base,ctx.builder.GetMemSize); + blob^.plta:=blob^.base+ctx.builder.GetPltStart; + blob^.pltc:=ctx.builder.APltCount; + + blob^.init_plt; + Writeln('build:0x',HexStr(ctx.text_start,16),'->0x',HexStr(blob^.base),'..',HexStr(blob^.base+blob^.size)); //F:=FileCreate('recompile.bin'); @@ -827,34 +788,82 @@ begin entry_list:=Result; end; +procedure t_jit_dynamic.init_plt; +var + i:Integer; +begin + if (pltc<>0) then + For i:=0 to pltc-1 do + begin + plta[i].cache:=@plt_stub; + end; +end; + function t_jit_dynamic.add_plt_cache(plt:p_jit_plt;src,dst:Pointer;blk:p_jit_dynamic):p_jplt_cache; var node:t_jplt_cache; + dec_blk:p_jit_dynamic; + _insert:Boolean; begin Assert(plt<>nil); Assert(blk<>nil); + dec_blk:=nil; + node.plt:=plt; node.src:=src; - rw_wlock(lock); + repeat - Result:=jpltc_list.Find(@node); + rw_wlock(lock); + Result:=jpltc_list.Find(@node); + if (Result<>nil) then + begin + //update + Result^.dst:=dst; + if (Result^.blk<>blk) then + begin + dec_blk:=Result^.blk; + Result^.blk:=blk; + // + blk^.inc_ref; + end; + end; + rw_wunlock(lock); - if (Result=nil) then - begin - Result:=AllocMem(Sizeof(t_jplt_cache)); - Result^.plt:=plt; - Result^.src:=src; - Result^.dst:=dst; - Result^.blk:=blk; - // - blk^.inc_ref; - // - jpltc_list.Insert(Result); - end; + if (dec_blk<>nil) then + begin + dec_blk^.dec_ref; + dec_blk:=nil; + end; + + if (Result<>nil) then + begin + Break; + end else + begin + Result:=AllocMem(Sizeof(t_jplt_cache)); + Result^.plt:=plt; + Result^.src:=src; + Result^.dst:=dst; + Result^.blk:=blk; + // + rw_wlock(lock); + _insert:=jpltc_list.Insert(Result); + if _insert then + begin + blk^.inc_ref; + end; + rw_wunlock(lock); + // + if _insert then + begin + Break; + end; + end; + + until false; - rw_wunlock(lock); end; function t_jit_dynamic.new_chunk(count:QWORD):p_jcode_chunk; diff --git a/sys/kern/kern_thr.pas b/sys/kern/kern_thr.pas index be0b57e4..e0093c94 100644 --- a/sys/kern/kern_thr.pas +++ b/sys/kern/kern_thr.pas @@ -169,6 +169,11 @@ type sttop:Pointer; end; + p_td_jctx=^t_td_jctx; + t_td_jctx=packed record + block:Pointer; + end; + pp_kthread=^p_kthread; p_kthread=^kthread; kthread=record @@ -198,9 +203,10 @@ type td_oldsigmask :sigset_t; td_sigqueue :sigqueue_t; td_retval :array[0..1] of QWORD; - td_jctx :Pointer; + td_align :Pointer; td_frame :trapframe; td_fpstate :t_fpstate; + td_jctx :t_td_jctx; td_ustack :t_td_stack; td_kstack :t_td_stack; // diff --git a/sys/md/md_thread.pas b/sys/md/md_thread.pas index e394dd13..bf504720 100644 --- a/sys/md/md_thread.pas +++ b/sys/md/md_thread.pas @@ -40,7 +40,6 @@ implementation var size_of_umtx_q:Integer; external; - size_of_jctx :Integer; external; // @@ -67,7 +66,7 @@ begin if (R<>0) then Exit; //header - size:=SizeOf(kthread)+size_of_umtx_q+size_of_jctx; + size:=SizeOf(kthread)+size_of_umtx_q; size:=System.Align(size,4*1024); R:=NtAllocateVirtualMemory( @@ -82,7 +81,6 @@ begin td:=data; td^.td_umtxq:=Pointer(td+1); - td^.td_jctx :=Pointer(td^.td_umtxq)+size_of_umtx_q; //footer data:=data+SYS_STACK_RSRV-SYS_STACK_SIZE;