From f8ed61d9975e4edf4c32b360d21ca9d2c653119a Mon Sep 17 00:00:00 2001 From: Pavel <68122101+red-prig@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:14:26 +0300 Subject: [PATCH] + --- rtl/x86_fpdbgdisas.pp | 26 ++---- rtl/x86_jit.pas | 79 +++++++++++++++- sys/jit/kern_jit.pas | 4 +- sys/jit/kern_jit_ctx.pas | 11 +-- sys/jit/kern_jit_ops_sse.pas | 175 ++++++++++++++++++++++++++++++++++- sys/kern/kern_mtx.pas | 5 +- sys/md/md_exception.pas | 14 +-- 7 files changed, 277 insertions(+), 37 deletions(-) diff --git a/rtl/x86_fpdbgdisas.pp b/rtl/x86_fpdbgdisas.pp index 46db31c8..e14cabb6 100644 --- a/rtl/x86_fpdbgdisas.pp +++ b/rtl/x86_fpdbgdisas.pp @@ -403,7 +403,6 @@ type procedure AddGw; procedure AddGy; procedure AddGz; - procedure AddHdq; procedure AddHpd; procedure AddHps; procedure AddHsd; @@ -1498,11 +1497,6 @@ begin else AddModReg(regGeneral, os32); end; -procedure TX86Disassembler.AddHdq; -begin - AddVexReg(regXmm, Vex.VectorLength); -end; - procedure TX86Disassembler.AddHpd; begin if flagVex in Flags @@ -3677,8 +3671,8 @@ begin $C4: begin DecodeSIMD([soNone, so66]); case SimdOpcode of - soNone: begin SetOpcode(OPpinsr, OPSx_w ); AddPq; AddRy_Mw; AddIb end; - so66: begin SetOpcode(OPpinsr, OPSx_w, True); AddVdq; AddHdq; AddRy_Mw; AddIb end; + soNone: begin SetOpcode(OPpinsr, OPSx_w ); AddPq; AddRy_Mw; AddIb end; + so66: begin SetOpcode(OPpinsr, OPSx_w, True); AddVdq; AddHx; AddRy_Mw; AddIb end; end; end; $C5: begin @@ -3937,10 +3931,10 @@ begin $BE: begin SetOpcode(OPvfnmsub231, OPS_ps_d ); AddVx; AddHx; AddWx; CheckVex; end; $BF: begin SetOpcode(OPvfnmsub231, OPS_ss_d ); AddVx; AddHx; AddWx; CheckVex; end; $DB: begin SetOpcode(OPaesimc, OPSnone, True); AddVdq; AddWdq; end; - $DC: begin SetOpcode(OPaesenc, OPSnone, True); AddVdq; AddHdq; AddWdq; end; - $DD: begin SetOpcode(OPaesenclast, OPSnone, True); AddVdq; AddHdq; AddWdq; end; - $DE: begin SetOpcode(OPaesdec, OPSnone, True); AddVdq; AddHdq; AddWdq; end; - $DF: begin SetOpcode(OPaesdeclast, OPSnone, True); AddVdq; AddHdq; AddWdq; end; + $DC: begin SetOpcode(OPaesenc, OPSnone, True); AddVdq; AddHx; AddWdq; end; + $DD: begin SetOpcode(OPaesenclast, OPSnone, True); AddVdq; AddHx; AddWdq; end; + $DE: begin SetOpcode(OPaesdec, OPSnone, True); AddVdq; AddHx; AddWdq; end; + $DF: begin SetOpcode(OPaesdeclast, OPSnone, True); AddVdq; AddHx; AddWdq; end; $F0: begin SetOpcode(OPmov, OPSc_be ); AddGw; AddMw; end; $F1: begin SetOpcode(OPmov, OPSc_be ); AddMw; AddGw; end; $F6: begin SetOpcode(OPadcx ); AddGy; AddEy; end; @@ -4007,15 +4001,15 @@ begin $18: begin SetOpcode(OPinsert, OPSx_f128,True); AddVqq; AddHqq; AddWqq; AddIb; CheckVex; end; $19: begin SetOpcode(OPextract, OPSx_f128,True); AddWdq; AddVqq; AddIb; CheckVex; end; $1D: begin SetOpcode(OPcvtps2, OPSx_ph, True); AddWx_Mq; AddVx; AddIb; CheckVex; end; - $20: begin SetOpcode(OPpinsr, OPSx_b, True); AddVdq; AddHdq; AddRy_Mb; AddIb; end; - $21: begin SetOpcode(OPinsert, OPSx_ps, True); AddVdq; AddHdq; AddUdq_Md; AddIb; end; - $22: begin SetOpcode(OPpinsr, OPS_d_q, True); AddVdq; AddHdq; AddEy; AddIb; end; + $20: begin SetOpcode(OPpinsr, OPSx_b, True); AddVdq; AddHx; AddRy_Mb; AddIb; end; + $21: begin SetOpcode(OPinsert, OPSx_ps, True); AddVdq; AddHx; AddUdq_Md; AddIb; end; + $22: begin SetOpcode(OPpinsr, OPS_d_q, True); AddVdq; AddHx; AddEy; AddIb; end; $38: begin SetOpcode(OPinsert, OPSx_i128,True); AddVqq; AddHqq; AddWqq; AddIb; CheckVex; end; $39: begin SetOpcode(OPextract, OPSx_i128,True); AddWdq; AddVqq; AddIb; CheckVex; end; $40: begin SetOpcode(OPdp, OPSx_ps, True); AddVx; AddHx; AddWx; AddIb; end; $41: begin SetOpcode(OPdp, OPSx_pd, True); AddVdq; AddHx; AddWdq; AddIb; end; $42: begin SetOpcode(OPmpsadbw, OPSnone, True); AddVx; AddHx; AddWx; AddIb; end; - $44: begin SetOpcode(OPpclmulqdq, OPSnone, True); AddVdq; AddHdq; AddWdq; AddIb; end; + $44: begin SetOpcode(OPpclmulqdq, OPSnone, True); AddVdq; AddHx; AddWdq; AddIb; end; $46: begin SetOpcode(OPvperm2, OPSx_i128 ); AddVqq; AddHqq; AddWqq; AddIb; CheckVex; end; $4A: begin SetOpcode(OPblendv, OPSx_ps, True); AddVx; AddHx; AddWx; AddLx; CheckVex; end; $4B: begin SetOpcode(OPblendv, OPSx_pd, True); AddVx; AddHx; AddWx; AddLx; CheckVex; end; diff --git a/rtl/x86_jit.pas b/rtl/x86_jit.pas index 190435d8..949b60a2 100644 --- a/rtl/x86_jit.pas +++ b/rtl/x86_jit.pas @@ -387,7 +387,14 @@ type procedure subi (reg:TRegValue ;imm:Int64); procedure subi8 (reg:TRegValue ;imm:Byte); procedure subi8 (mem:t_jit_leas ;imm:Byte); + procedure shli8 (reg:TRegValue ;imm:Byte); + procedure shri8 (reg:TRegValue ;imm:Byte); + procedure andi (reg:TRegValue ;imm:Int64); + procedure andi8 (reg:TRegValue ;imm:Byte); + procedure andq (reg0:TRegValue ;reg1:TRegValue); + procedure orq (reg0:TRegValue ;reg1:TRegValue); procedure xorq (reg0:TRegValue ;reg1:TRegValue); + procedure notq (reg:TRegValue); procedure cmpq (mem:t_jit_leas ;reg:TRegValue); procedure cmpq (reg:TRegValue ;mem:t_jit_leas); procedure cmpq (reg0:TRegValue ;reg1:TRegValue); @@ -433,6 +440,8 @@ type procedure int3; procedure testq(reg0:TRegValue;reg1:TRegValue); procedure bti8 (mem:t_jit_leas;imm:Byte); + procedure shlx (reg0,reg1,reg2:TRegValue); + procedure shrx (reg0,reg1,reg2:TRegValue); end; operator :=(const A:TRegValue):t_jit_lea; @@ -3746,6 +3755,50 @@ end; /// +procedure t_jit_builder.shli8(reg:TRegValue;imm:Byte); +const + desc:t_op_type=(op:$C1;index:4); +begin + _RI8(desc,reg,imm); +end; + +procedure t_jit_builder.shri8(reg:TRegValue;imm:Byte); +const + desc:t_op_type=(op:$C1;index:5); +begin + _RI8(desc,reg,imm); +end; + +/// + +procedure t_jit_builder.andi(reg:TRegValue;imm:Int64); +const + desc:t_op_type=(op:$81;index:4); +begin + _RI(desc,reg,imm); +end; + +procedure t_jit_builder.andi8(reg:TRegValue;imm:Byte); +const + desc:t_op_type=(op:$83;index:4); +begin + _RI8(desc,reg,imm); +end; + +procedure t_jit_builder.andq(reg0:TRegValue;reg1:TRegValue); +const + desc:t_op_type=(op:$21;index:0); +begin + _RR(desc,reg0,reg1); +end; + +procedure t_jit_builder.orq(reg0:TRegValue;reg1:TRegValue); +const + desc:t_op_type=(op:$09;index:0); +begin + _RR(desc,reg0,reg1); +end; + procedure t_jit_builder.xorq(reg0:TRegValue;reg1:TRegValue); const desc:t_op_type=(op:$31;index:0); @@ -3753,6 +3806,13 @@ begin _RR(desc,reg0,reg1); end; +procedure t_jit_builder.notq(reg:TRegValue); +const + desc:t_op_type=(op:$F7;index:2); +begin + _R(desc,reg); +end; + /// procedure t_jit_builder.cmpq(mem:t_jit_leas;reg:TRegValue); @@ -4879,7 +4939,7 @@ end; procedure t_jit_builder.seto(reg:TRegValue); const - desc:t_op_type=(op:$0F90;opt:[not_prefix]); + desc:t_op_type=(op:$0F90;opt:[not_prefix,not_os8]); begin _R(desc,reg); end; @@ -4903,6 +4963,23 @@ begin _MI8(desc,mem,imm); end; +procedure t_jit_builder.shlx(reg0,reg1,reg2:TRegValue); +const + desc:t_op_type=( + op:$F7;simdop:1;mm:2;vw_mode:vwR64; + ); +begin + _VVV(desc,reg0,reg2,reg1,os64); //1 3 2 +end; + +procedure t_jit_builder.shrx(reg0,reg1,reg2:TRegValue); +const + desc:t_op_type=( + op:$F7;simdop:3;mm:2;vw_mode:vwR64; + ); +begin + _VVV(desc,reg0,reg2,reg1,os64); //1 3 2 +end; end. diff --git a/sys/jit/kern_jit.pas b/sys/jit/kern_jit.pas index d0fe1daf..b55e8757 100644 --- a/sys/jit/kern_jit.pas +++ b/sys/jit/kern_jit.pas @@ -1037,8 +1037,8 @@ begin movq([GS+Integer(teb_jitcall)],r13); //load curkthread,jit ctx - movq(r13,[GS+Integer(teb_thread)]); - leaq(r13,[r13+jit_frame_offset]); + movq(r13,[GS +Integer(teb_thread)]); + leaq(r13,[r13+jit_frame_offset ]); //load r14,r15 movq([r13+Integer(@p_jit_frame(nil)^.tf_r14)],r14); diff --git a/sys/jit/kern_jit_ctx.pas b/sys/jit/kern_jit_ctx.pas index 37f07461..f05d76ff 100644 --- a/sys/jit/kern_jit_ctx.pas +++ b/sys/jit/kern_jit_ctx.pas @@ -1610,13 +1610,6 @@ end; // 64-40 = 24 procedure op_uplift(var ctx:t_jit_context2;const dst:TRegValue;mem_size:TOperandSize;hint:t_lea_hint=[]); -const - shlx_desc:t_op_type=( - op:$F7;simdop:1;mm:2;vw_mode:vwR64; - ); - shrx_desc:t_op_type=( - op:$F7;simdop:3;mm:2;vw_mode:vwR64; - ); var rbits:TRegValue; begin @@ -1647,8 +1640,8 @@ begin movi(new_reg_size(rbits,os8),24); //mov $24,%bpl //clear hi - _VVV(shlx_desc,dst,rbits,dst,os64); //1 3 2 | shlx %rbp,%r14,%r14 - _VVV(shrx_desc,dst,rbits,dst,os64); //1 3 2 | shrx %rbp,%r14,%r14 + shlx(dst,dst,rbits); //shlx %rbp,%r14,%r14 + shrx(dst,dst,rbits); //shrx %rbp,%r14,%r14 if (rbits.AIndex=rbp.AIndex) then begin diff --git a/sys/jit/kern_jit_ops_sse.pas b/sys/jit/kern_jit_ops_sse.pas index 5f92c087..cf53387d 100644 --- a/sys/jit/kern_jit_ops_sse.pas +++ b/sys/jit/kern_jit_ops_sse.pas @@ -8,9 +8,11 @@ interface implementation uses + kern_thr, x86_fpdbgdisas, x86_jit, kern_jit_ops, + kern_jit_asm, kern_jit_ctx; var @@ -323,14 +325,180 @@ begin end; end; -// +//SSE4a + +{ +AMD64 Architecture +Programmer’s Manual +Volume 4: +128-Bit and 256-Bit +Media Instructions +} procedure op_movnt_sd_ss(var ctx:t_jit_context2); begin op_emit2_simd_mem_reg(ctx,[his_mov,his_wo]); end; -// +{ + a = xmm0[0:63] + b = xmm1[0:63] + + mask = 0xFFFFFFFFFFFFFFFF; + + m = mask shl (64 - (idx + len)); + m = m shr (64 - len); + m = m shl idx; + + b = b shl idx; + b = b and m; + + a = (not m) and a; + a = a or b; + + xmm0[0:63] = a; +} + +procedure movq_r_xmm(var ctx:t_jit_context2;reg0,reg1:TRegValue); +const + desc:t_op_type=(op:$660F7E;index:0); +begin + ctx.builder._RR(desc,reg0,reg1,reg0.ASize); //66 REX.W 0F 7E /r MOVQ r/m64, xmm +end; + +procedure pinsrq(var ctx:t_jit_context2;reg0,reg1:TRegValue;imm8:Byte); +const + desc:t_op_type=(op:$660F3A22;index:0); +begin + ctx.builder._RRI8(desc,reg0,reg1,imm8,reg1.ASize); +end; + +procedure pextrq(var ctx:t_jit_context2;reg0,reg1:TRegValue;imm8:Byte); +const + desc:t_op_type=(op:$660F3A16;index:0); +begin + ctx.builder._RRI8(desc,reg0,reg1,imm8,reg0.ASize); +end; + +procedure op_insertq(var ctx:t_jit_context2); +var + len,idx:Int64; + mask:QWORD; + xmm_a,xmm_b:TRegValue; + a,b,m:TRegValue; +begin + + xmm_a:=new_reg(ctx.din.Operand[1]); + xmm_b:=new_reg(ctx.din.Operand[2]); + + a:=r_tmp0; + b:=r_tmp1; + m:=r_thrd; + + with ctx.builder do + begin + //swap + xchgq(rbp,rax); + //load flags to al,ah + seto(al); + lahf; + + if (ctx.din.OperCnt=4) then + begin + //insertq xmm0,xmm1,$10,$30 + + len:=0; + GetTargetOfs(ctx.din,ctx.code,3,len); + + idx:=0; + GetTargetOfs(ctx.din,ctx.code,4,idx); + + len:=len and $3F; + idx:=idx and $3F; + + mask:=QWORD($FFFFFFFFFFFFFFFF); + mask:=mask shl (64 - (idx + len)); + mask:=mask shr (64 - len); + mask:=mask shl idx; + + if (classif_offset_u64(mask)=os64) then + begin + //64bit mask + movi64(m,mask); + end else + begin + //32bit zero extend + movi(new_reg_size(m,os32),mask); + end; + + end else + begin + //insertq xmm0,xmm1 + + //PEXTRQ r/m64, xmm2, imm8 + pextrq(ctx,m,xmm_b,1); + + movq (b,m); + andi8(b,$3F); //b = len with m[0] + + movi (new_reg_size(a,os32),64); //a = 64 + subq (a,b); //a = (64 - len) + + andi (m,$3F00); //filter + + movq (b,a); + andi8(b,$FF); + orq (m,b); // save (64 - len) to m[0] + + movq (b,m); + shri8(b,8); + andi8(b,$3F); // b = idx with m[1] + + subq (a,b); // a = (64 - len - idx) + + movi (b,-1); // b = 0xFFFFFFFFFFFFFFFF + + shlx (b,b,a); // b = b shl (64 - idx - len) + + shrx (b,b,m); // b = b shr (64 - len):[0x3F]; + + shli8(m,8); // m[0] = m[1] + + shlx (b,b,m); // b = b shl idx + + movq (m,b); // m = b + end; + + //a = xmm0[0:63] + movq_r_xmm(ctx,a,xmm_a); + movq_r_xmm(ctx,b,xmm_b); + + andq(b,m); + notq(m); + andq(a,m); + orq (a,b); + + //xmm0[0:63] = a; + //PINSRQ xmm1, r/m64, imm8 + pinsrq(ctx,xmm_a,a,0); + + //store flags from al,ah + addi(al,127); + sahf; + //swap + xchgq(rbp,rax); + + //restore rbp + movq(rbp,rsp); + + //restore jit_frame + movq(r13,[GS +Integer(teb_thread)]); + leaq(r13,[r13+jit_frame_offset ]); + end; + +end; + +//SSE4a const movl_ps_pd_desc:t_op_desc=( @@ -633,12 +801,15 @@ begin begin jit_cbs[OPPnone,OPmovnt,OPSx_sd]:=@op_movnt_sd_ss; jit_cbs[OPPnone,OPmovnt,OPSx_ss]:=@op_movnt_sd_ss; + jit_cbs[OPPnone,OPinsert,OPSx_q]:=@add_orig; end else begin jit_cbs[OPPnone,OPmovnt,OPSx_sd]:=@op_movsd; jit_cbs[OPPnone,OPmovnt,OPSx_ss]:=@op_movss; + jit_cbs[OPPnone,OPinsert,OPSx_q]:=@op_insertq; end; + jit_cbs[OPPnone,OPaeskeygenassist,OPSnone]:=@op_reg_mem_wo; jit_cbs[OPPnone,OPaesimc ,OPSnone]:=@op_reg_mem_wo; diff --git a/sys/kern/kern_mtx.pas b/sys/kern/kern_mtx.pas index eeeb86de..a51e5c89 100644 --- a/sys/kern/kern_mtx.pas +++ b/sys/kern/kern_mtx.pas @@ -5,6 +5,9 @@ unit kern_mtx; interface +uses + sysutils; + type p_mtx=^mtx; mtx=packed record @@ -133,7 +136,7 @@ end; procedure mtx_assert(var m:mtx); inline; begin - Assert(mtx_owned(m)); + Assert(mtx_owned(m),IntToStr(m.c.OwningThread)+'<>'+IntToStr(GetCurrentThreadId)); end; end. diff --git a/sys/md/md_exception.pas b/sys/md/md_exception.pas index 8faa485f..e555b0ba 100644 --- a/sys/md/md_exception.pas +++ b/sys/md/md_exception.pas @@ -26,6 +26,7 @@ uses vm_pmap_prot, vm_tracking_map, kern_proc, + kern_jit_ctx, kern_jit_dynamic; const @@ -256,6 +257,8 @@ begin end; function ProcessException(p:PExceptionPointers):longint; stdcall; +var + instr:t_instruction_info; begin Result:=EXCEPTION_CONTINUE_SEARCH; if (curkthread=nil) then Exit; @@ -271,10 +274,11 @@ begin STATUS_ACCESS_VIOLATION: begin + instr:=get_instruction_info(Pointer(p^.ContextRecord^.Rip)); if pmap_danger_zone(vm_map_t(p_proc.p_vmspace)^.pmap, get_pageflt_addr(p), - 256 //TODO: access len + instr.mema_size ) then begin Exit(EXCEPTION_CONTINUE_EXECUTION); @@ -283,23 +287,21 @@ begin case get_pageflt_err(p) of VM_PROT_READ: begin - //TODO: access len - if ((ppmap_get_prot(get_pageflt_addr(p),256) and VM_PROT_READ)<>0) then + if ((ppmap_get_prot(get_pageflt_addr(p),instr.mema_size) and VM_PROT_READ)<>0) then begin Writeln(stderr,'Unhandled VM_PROT_READ'); end; end; VM_PROT_WRITE: begin - //TODO: access len - if ((ppmap_get_prot(get_pageflt_addr(p),256) and VM_PROT_WRITE)<>0) then + if ((ppmap_get_prot(get_pageflt_addr(p),instr.mema_size) and VM_PROT_WRITE)<>0) then begin Writeln('TRACK_WRITE:',HexStr(get_pageflt_addr(p),10)); //trigger and restore vm_map_track_trigger(p_proc.p_vmspace, get_pageflt_addr(p), - get_pageflt_addr(p)+256, //TODO: access len + get_pageflt_addr(p)+instr.mema_size, nil, M_CPU_WRITE); //