diff --git a/fpPS4.lpi b/fpPS4.lpi index 6a627dc3..41e0780c 100644 --- a/fpPS4.lpi +++ b/fpPS4.lpi @@ -1025,6 +1025,10 @@ + + + + diff --git a/rtl/atomic.pas b/rtl/atomic.pas index 098db820..e09f6148 100644 --- a/rtl/atomic.pas +++ b/rtl/atomic.pas @@ -99,8 +99,10 @@ function bits1(P:Pointer):SizeUInt; inline; function bits1(P:SizeUInt):SizeUInt; inline; procedure spin_pause; -procedure atomic_set_int (addr:PInteger;val:Integer); sysv_abi_default; -procedure atomic_clear_int(addr:PInteger;val:Integer); sysv_abi_default; +procedure atomic_set_byte (addr:PByte;val:byte); sysv_abi_default; +procedure atomic_clear_byte(addr:PByte;val:byte); sysv_abi_default; +procedure atomic_set_int (addr:PInteger;val:Integer); sysv_abi_default; +procedure atomic_clear_int (addr:PInteger;val:Integer); sysv_abi_default; implementation @@ -389,6 +391,18 @@ asm pause end; + +procedure atomic_set_byte(addr:PByte;val:byte); assembler; nostackframe; sysv_abi_default; +asm + lock orb %sil,(%rdi) +end; + +procedure atomic_clear_byte(addr:PByte;val:byte); assembler; nostackframe; sysv_abi_default; +asm + not %sil + lock andb %sil,(%rdi) +end; + procedure atomic_set_int(addr:PInteger;val:Integer); assembler; nostackframe; sysv_abi_default; asm lock orl %esi,(%rdi) diff --git a/sys/kern/kern_rangelock.pas b/sys/kern/kern_rangelock.pas new file mode 100644 index 00000000..5335e207 --- /dev/null +++ b/sys/kern/kern_rangelock.pas @@ -0,0 +1,284 @@ +unit kern_rangelock; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +uses + mqueue, + kern_thr, + kern_mtx; + +const + RL_LOCK_READ =$0001; + RL_LOCK_WRITE =$0002; + RL_LOCK_TYPE_MASK=$0003; + RL_LOCK_GRANTED =$0004; + +type + off_t=Int64; + + p_rl_q_entry=^rl_q_entry; + rl_q_entry=record + rl_q_link :TAILQ_ENTRY; //rl_q_entry + rl_q_start:off_t; + rl_q_end :off_t; + rl_q_flags:Integer; + end; + + p_rangelock=^rangelock; + rangelock=record + rl_waiters:TAILQ_HEAD; //rl_q_entry + rl_currdep:p_rl_q_entry; + end; + +procedure rangelock_init(lock:p_rangelock); +procedure rangelock_destroy(lock:p_rangelock); +procedure rangelock_unlock(lock:p_rangelock;cookie:Pointer;ilk:p_mtx); +function rangelock_unlock_range(lock:p_rangelock;cookie:Pointer;start,__end:off_t;ilk:p_mtx):Pointer; +function rangelock_rlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer; +function rangelock_wlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer; +procedure rlqentry_free(rleq:p_rl_q_entry); + +implementation + +// + +function msleep(ident :Pointer; + lock :p_mtx; + priority:Integer; + wmesg :PChar; + timo :Int64):Integer; external; + +procedure wakeup(ident:Pointer); external; + +// + +function rlqentry_alloc():p_rl_q_entry; inline; +begin + Result:=AllocMem(SizeOf(rl_q_entry)); +end; + +procedure rlqentry_free(rleq:p_rl_q_entry); inline; +begin + if (rleq<>nil) then + begin + FreeMem(rleq); + end; +end; + +procedure rangelock_init(lock:p_rangelock); +begin + TAILQ_INIT(@lock^.rl_waiters); + lock^.rl_currdep:=nil; +end; + +procedure rangelock_destroy(lock:p_rangelock); +begin + Assert(TAILQ_EMPTY(@lock^.rl_waiters), 'Dangling waiters'); +end; + +{ + * Verifies the supplied rl_q_entries for compatibility. Returns true + * if the rangelock queue entries are not compatible, false if they are. + * + * Two entries are compatible if their ranges do not overlap, or both + * entries are for read. +} +function rangelock_incompatible(e1,e2:p_rl_q_entry):Integer; +begin + if ((e1^.rl_q_flags and RL_LOCK_TYPE_MASK)=RL_LOCK_READ) and + ((e2^.rl_q_flags and RL_LOCK_TYPE_MASK)=RL_LOCK_READ) then + begin + Exit(0); + end; + if (e1^.rl_q_start < e2^.rl_q_end) and (e1^.rl_q_end > e2^.rl_q_start) then + begin + Exit(1); + end; + Exit(0); +end; + +{ + * Recalculate the lock^.rl_currdep after an unlock. +} +procedure rangelock_calc_block(lock:p_rangelock); +label + _out; +var + entry,entry1,whead:p_rl_q_entry; +begin + + if (lock^.rl_currdep=TAILQ_FIRST(@lock^.rl_waiters)) and + (lock^.rl_currdep<>nil) then + begin + lock^.rl_currdep:=TAILQ_NEXT(lock^.rl_currdep, @lock^.rl_currdep^.rl_q_link); + end; + + entry:=lock^.rl_currdep; + + while (entry<>nil) do + begin + entry1:=TAILQ_FIRST(@lock^.rl_waiters); + + while (entry1<>nil) do + begin + if (rangelock_incompatible(entry, entry1)<>0) then + begin + goto _out; + end; + if (entry1=entry) then + begin + break; + end; + // + entry1:=TAILQ_NEXT(entry1, @entry1^.rl_q_link) + end; + + //next + entry:=TAILQ_NEXT(entry, @entry^.rl_q_link); + end; + +_out: + lock^.rl_currdep:=entry; + + whead:=TAILQ_FIRST(@lock^.rl_waiters); + + while (whead<>nil) do + begin + if (whead=lock^.rl_currdep) then + begin + break; + end; + if ((whead^.rl_q_flags and RL_LOCK_GRANTED)=0) then + begin + whead^.rl_q_flags:=whead^.rl_q_flags or RL_LOCK_GRANTED; + wakeup(whead); + end; + // + whead:=TAILQ_NEXT(whead, @whead^.rl_q_link) + end; +end; + +procedure rangelock_unlock_locked(lock:p_rangelock;entry:p_rl_q_entry;ilk:p_mtx); +begin + Assert((lock<>nil) and (entry<>nil) and (ilk<>nil)); + mtx_assert(ilk^); + Assert(entry<>lock^.rl_currdep, 'stuck currdep'); + + TAILQ_REMOVE(@lock^.rl_waiters, entry, @entry^.rl_q_link); + rangelock_calc_block(lock); + mtx_unlock(ilk^); + + if (curkthread^.td_rlqe=nil) then + curkthread^.td_rlqe:=entry + else + rlqentry_free(entry); +end; + +procedure rangelock_unlock(lock:p_rangelock;cookie:Pointer;ilk:p_mtx); +begin + Assert((lock<>nil) and (cookie<>nil) and (ilk<>nil)); + + mtx_lock(ilk^); + rangelock_unlock_locked(lock, cookie, ilk); +end; + +{ + * Unlock the sub-range of granted lock. +} +function rangelock_unlock_range(lock:p_rangelock;cookie:Pointer;start,__end:off_t;ilk:p_mtx):Pointer; +var + entry:p_rl_q_entry; +begin + Assert((lock<>nil) and (cookie<>nil) and (ilk<>nil)); + + entry:=cookie; + + Assert((entry^.rl_q_flags and RL_LOCK_GRANTED)<>0,'Unlocking non-granted lock'); + Assert(entry^.rl_q_start=start, 'wrong start'); + Assert(entry^.rl_q_end >= __end, 'wrong end'); + + mtx_lock(ilk^); + + if (entry^.rl_q_end=__end) then + begin + rangelock_unlock_locked(lock, cookie, ilk); + Exit(nil); + end; + + entry^.rl_q_end:=__end; + rangelock_calc_block(lock); + mtx_unlock(ilk^); + Exit(cookie); +end; + +{ + * Add the lock request to the queue of the pending requests for + * rangelock. Sleep until the request can be granted. +} +function rangelock_enqueue(lock:p_rangelock;start,__end:off_t;mode:Integer;ilk:p_mtx):Pointer; +var + entry:p_rl_q_entry; + td:p_kthread; +begin + Assert((lock<>nil) and (ilk<>nil)); + + td:=curkthread; + if (td^.td_rlqe<>nil) then + begin + entry:=td^.td_rlqe; + td^.td_rlqe:=nil; + end else + begin + entry:=rlqentry_alloc(); + end; + + Assert(entry<>nil); + entry^.rl_q_flags:=mode; + entry^.rl_q_start:=start; + entry^.rl_q_end:=__end; + + mtx_lock(ilk^); + { + * XXXKIB TODO. Check that a thread does not try to enqueue a + * lock that is incompatible with another request from the same + * thread. + } + + TAILQ_INSERT_TAIL(@lock^.rl_waiters, entry, @entry^.rl_q_link); + + if (lock^.rl_currdep=nil) then + begin + lock^.rl_currdep:=entry; + end; + + rangelock_calc_block(lock); + + while ((entry^.rl_q_flags and RL_LOCK_GRANTED)=0) do + begin + msleep(entry, ilk, 0, 'range', 0); + end; + + mtx_unlock(ilk^); + Exit(entry); +end; + +function rangelock_rlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer; +begin + Result:=rangelock_enqueue(lock, start, __end, RL_LOCK_READ, ilk); +end; + +function rangelock_wlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer; +begin + Result:=rangelock_enqueue(lock, start, __end, RL_LOCK_WRITE, ilk); +end; + + + +end. + + + + diff --git a/sys/kern/kern_thr.pas b/sys/kern/kern_thr.pas index caee7c9e..a4086669 100644 --- a/sys/kern/kern_thr.pas +++ b/sys/kern/kern_thr.pas @@ -231,6 +231,7 @@ type td_slpcallout :Pointer; tdq_lock :mtx; // + td_rlqe :Pointer; td_fpop :Pointer; td_map_def_user :Pointer; td_dmap_def_user:Pointer; diff --git a/sys/kern/kern_thread.pas b/sys/kern/kern_thread.pas index 89da8ec9..55f4693e 100644 --- a/sys/kern/kern_thread.pas +++ b/sys/kern/kern_thread.pas @@ -78,6 +78,7 @@ uses kern_rwlock, kern_sig, kern_proc, + kern_rangelock, sched_ule, sys_sleepqueue; @@ -200,6 +201,7 @@ procedure thread_free(td:p_kthread); begin mtx_destroy(td^.tdq_lock); sleepq_free(td^.td_sleepqueue); + rlqentry_free(td^.td_rlqe); umtx_thread_fini(td); cpu_thread_free(td); end; diff --git a/sys/md/md_exception.pas b/sys/md/md_exception.pas index 0fef7913..5efda0fe 100644 --- a/sys/md/md_exception.pas +++ b/sys/md/md_exception.pas @@ -21,6 +21,10 @@ uses signal, ucontext, vm, + vm_map, + vm_pmap, + vm_pmap_prot, + kern_proc, kern_jit_dynamic; const @@ -101,7 +105,7 @@ const FPC_EXCEPTION_CODE=$E0465043; FPC_SET_EH_HANDLER=$E0465044; -function translate_pageflt_err(v:QWORD):QWORD; inline; +function translate_pageflt_err(v:QWORD):Byte; inline; begin Result:=VM_PROT_NONE; case v of @@ -111,12 +115,21 @@ begin end; end; +function get_pageflt_err(p:PExceptionPointers):Byte; inline; +begin + Result:=translate_pageflt_err(p^.ExceptionRecord^.ExceptionInformation[0]); +end; + +function get_pageflt_addr(p:PExceptionPointers):QWORD; inline; +begin + Result:=p^.ExceptionRecord^.ExceptionInformation[1]; +end; + procedure jit_save_to_sys_save(td:p_kthread); external; procedure sys_save_to_jit_save(td:p_kthread); external; function ProcessException3(td:p_kthread;p:PExceptionPointers):longint; SysV_ABI_CDecl; var - ExceptionCode:DWORD; tf_addr:QWORD; rv:Integer; is_jit:Boolean; @@ -153,14 +166,12 @@ begin td^.td_frame.tf_trapno:=0; - ExceptionCode:=p^.ExceptionRecord^.ExceptionCode; - rv:=-1; - case ExceptionCode of + case p^.ExceptionRecord^.ExceptionCode of STATUS_ACCESS_VIOLATION: begin - tf_addr:=p^.ExceptionRecord^.ExceptionInformation[1]; + tf_addr:=get_pageflt_addr(p); Writeln('tf_addr:0x',HexStr(tf_addr,16)); @@ -170,7 +181,7 @@ begin //_get_frame(p^.ContextRecord,@td^.td_frame,{@td^.td_fpstate}nil); td^.td_frame.tf_trapno:=T_PAGEFLT; - td^.td_frame.tf_err :=translate_pageflt_err(p^.ExceptionRecord^.ExceptionInformation[0]); + td^.td_frame.tf_err :=get_pageflt_err(p); td^.td_frame.tf_addr :=tf_addr; rv:=trap.trap(@td^.td_frame,is_jit); @@ -251,6 +262,37 @@ begin EXCEPTION_SET_THREADNAME :Exit; DBG_PRINTEXCEPTION_C :Exit(EXCEPTION_CONTINUE_EXECUTION); DBG_PRINTEXCEPTION_WIDE_C:Exit(EXCEPTION_CONTINUE_EXECUTION); //RenderDoc issuse + + STATUS_ACCESS_VIOLATION: + begin + + if pmap_danger_zone(@vm_map_t(p_proc.p_vmspace)^.pmap, + get_pageflt_addr(p), + 256) then + begin + Exit(EXCEPTION_CONTINUE_EXECUTION); + end; + + case get_pageflt_err(p) of + VM_PROT_READ: + begin + if ((pmap_get_prot(get_pageflt_addr(p),256) and VM_PROT_READ)<>0) then + begin + Writeln(stderr,'Unhandled VM_PROT_READ'); + end; + end; + VM_PROT_WRITE: + begin + if ((pmap_get_prot(get_pageflt_addr(p),256) and VM_PROT_WRITE)<>0) then + begin + Writeln(stderr,'Unhandled VM_PROT_WRITE'); + end; + end; + else; + end; + + end; + else if not IsDefaultExceptions(p^.ExceptionRecord^.ExceptionCode) then begin diff --git a/sys/md/vm_pmap.pas b/sys/md/vm_pmap.pas index 769f3e56..3dbb46c1 100644 --- a/sys/md/vm_pmap.pas +++ b/sys/md/vm_pmap.pas @@ -95,6 +95,10 @@ procedure pmap_mirror_unmap(pmap:pmap_t; base:Pointer; size:QWORD); +function pmap_danger_zone(pmap:pmap_t; + addr:vm_offset_t; + size:vm_offset_t):Boolean; + implementation uses @@ -858,7 +862,7 @@ begin end; end; - pmap_mark(info.start,info.__end,prot and VM_RWX); + pmap_mark_rwx(info.start,info.__end,prot); //upper pages delta:=(paddi and PAGE_MASK); @@ -881,7 +885,7 @@ begin end; end; - pmap_mark(start,__end,prot and VM_RWX); + pmap_mark_rwx(start,__end,prot); end; procedure pmap_protect(pmap :pmap_t; @@ -942,7 +946,7 @@ begin end; end; - pmap_mark(start,__end,prot and VM_RWX); + pmap_mark_rwx(start,__end,prot); end; procedure pmap_madvise(pmap :pmap_t; @@ -1018,7 +1022,7 @@ begin Writeln('pmap_remove:',HexStr(start,11),':',HexStr(__end,11)); end; - pmap_unmark(start,__end); + pmap_unmark_rwx(start,__end); r:=0; case vm_object_type(obj) of @@ -1099,6 +1103,18 @@ begin end; end; +function pmap_danger_zone(pmap:pmap_t; + addr:vm_offset_t; + size:vm_offset_t):Boolean; +begin + Result:=False; + while (pmap^.nt_map.danger_zone.in_range(addr,size)) do + begin + Result:=True; + pmap^.nt_map.danger_zone.d_wait(addr,size); + end; +end; + end. diff --git a/sys/md/vm_pmap_prot.pas b/sys/md/vm_pmap_prot.pas index 370bf1f2..af51e819 100644 --- a/sys/md/vm_pmap_prot.pas +++ b/sys/md/vm_pmap_prot.pas @@ -6,6 +6,7 @@ unit vm_pmap_prot; interface uses + atomic, vm, vmparam; @@ -24,18 +25,23 @@ const PAGE_PROT_RW =PAGE_PROT_READ or PAGE_PROT_WRITE; PAGE_PROT_RWX =PAGE_PROT_READ or PAGE_PROT_WRITE or PAGE_PROT_EXECUTE; - PAGE_PROT_LIFT =$40; - - //PAGE_BUSY_FLAG =DWORD($10000000); - //PAGE_PATCH_FLAG =DWORD($08000000); + PAGE_TRACK_R =$08; + PAGE_TRACK_W =$10; + PAGE_TRACK_X =$20; + PAGE_TRACK_RWX =PAGE_TRACK_R or PAGE_TRACK_W or PAGE_TRACK_X; + PAGE_TRACK_SHIFT =3; var PAGE_PROT:PBYTE=nil; -procedure pmap_mark (start,__end:vm_offset_t;prots:Byte); -procedure pmap_unmark (start,__end:vm_offset_t); +procedure pmap_mark_rwx (start,__end:vm_offset_t;prots:Byte); +procedure pmap_unmark_rwx(start,__end:vm_offset_t); +procedure pmap_track (start,__end:vm_offset_t;prots:Byte); +procedure pmap_untrack (start,__end:vm_offset_t;prots:Byte); +function pmap_scan (start,__end:vm_offset_t):vm_offset_t; function pmap_scan_rwx (start,__end:vm_offset_t):vm_offset_t; function pmap_get_prot (addr:vm_offset_t):Byte; +function pmap_get_prot (addr,size:vm_offset_t):Byte; implementation @@ -59,21 +65,27 @@ begin Result:=x; end; -procedure pmap_mark(start,__end:vm_offset_t;prots:Byte); +procedure pmap_mark_rwx(start,__end:vm_offset_t;prots:Byte); +var + clear:Byte; begin + prots:=prots and PAGE_PROT_RWX; + clear:=(not prots) and PAGE_PROT_RWX; start:=OFF_TO_IDX(start); __end:=OFF_TO_IDX(__end); start:=MAX_IDX(start); __end:=MAX_IDX(__end); while (start<__end) do begin - PAGE_PROT[start]:=prots; + atomic_clear_byte(@PAGE_PROT[start],clear); + atomic_set_byte (@PAGE_PROT[start],prots); + //PAGE_PROT[start]:=prots; Inc(start); end; WriteBarrier; end; -procedure pmap_unmark(start,__end:vm_offset_t); +procedure pmap_unmark_rwx(start,__end:vm_offset_t); begin start:=OFF_TO_IDX(start); __end:=OFF_TO_IDX(__end); @@ -81,12 +93,75 @@ begin __end:=MAX_IDX(__end); while (start<__end) do begin - PAGE_PROT[start]:=0; + atomic_clear_byte(@PAGE_PROT[start],PAGE_PROT_RWX); + //PAGE_PROT[start]:=0; Inc(start); end; WriteBarrier; end; +procedure pmap_track(start,__end:vm_offset_t;prots:Byte); +begin + prots:=prots and PAGE_TRACK_RWX; + start:=OFF_TO_IDX(start); + __end:=OFF_TO_IDX(__end); + start:=MAX_IDX(start); + __end:=MAX_IDX(__end); + while (start<__end) do + begin + atomic_set_byte(@PAGE_PROT[start],prots); + Inc(start); + end; + WriteBarrier; +end; + +procedure pmap_untrack(start,__end:vm_offset_t;prots:Byte); +begin + prots:=prots and PAGE_TRACK_RWX; + start:=OFF_TO_IDX(start); + __end:=OFF_TO_IDX(__end); + start:=MAX_IDX(start); + __end:=MAX_IDX(__end); + while (start<__end) do + begin + atomic_clear_byte(@PAGE_PROT[start],prots); + Inc(start); + end; + WriteBarrier; +end; + +function pmap_scan(start,__end:vm_offset_t):vm_offset_t; +var + b,v:Byte; +begin + start:=OFF_TO_IDX(start); + __end:=OFF_TO_IDX(__end); + start:=MAX_IDX(start); + __end:=MAX_IDX(__end); + + ReadBarrier; + + b:=PAGE_PROT[start]; + Inc(start); + + while (start<__end) do + begin + v:=PAGE_PROT[start]; + + if (b<>v) then + begin + start:=IDX_TO_OFF(start); + Exit(start); + end; + + Inc(start); + end; + + __end:=IDX_TO_OFF(__end); + + Result:=__end; +end; + function pmap_scan_rwx(start,__end:vm_offset_t):vm_offset_t; var b,v:Byte; @@ -122,10 +197,19 @@ end; function pmap_get_prot(addr:vm_offset_t):Byte; begin addr:=OFF_TO_IDX(addr); - addr:=MAX_IDX(addr); - Result:=PAGE_PROT[addr]; + if (addr>PAGE_MAP_MASK) then + begin + Result:=0 + end else + begin + Result:=PAGE_PROT[addr]; + end; end; +function pmap_get_prot(addr,size:vm_offset_t):Byte; +begin + Result:=pmap_get_prot(addr) or pmap_get_prot(addr+size); +end; end. diff --git a/sys/vfs/vfs_subr.pas b/sys/vfs/vfs_subr.pas index 3b227666..45febcf5 100644 --- a/sys/vfs/vfs_subr.pas +++ b/sys/vfs/vfs_subr.pas @@ -192,6 +192,7 @@ uses subr_uio, sys_vm_object, vsys_generic, + kern_rangelock, rtprio, sys_conf; @@ -826,7 +827,7 @@ begin vp^.v_vflag:=vp^.v_vflag or VV_NOKNOTE; end; end; - //rangelock_init(@vp^.v_rl); + rangelock_init(@vp^.v_rl); { * For the filesystems which do not use vfs_hash_insert(), @@ -2279,7 +2280,7 @@ begin { XXX Elsewhere we detect an already freed vnode via nil v_op. } vp^.v_op:=nil; - //rangelock_destroy(@vp^.v_rl); + rangelock_destroy(@vp^.v_rl); //lockdestroy(vp^.v_vnlock); mtx_destroy(vp^.v_vnlock^); mtx_destroy(vp^.v_interlock); diff --git a/sys/vfs/vfs_vnops.pas b/sys/vfs/vfs_vnops.pas index 18f1ed0c..b625415c 100644 --- a/sys/vfs/vfs_vnops.pas +++ b/sys/vfs/vfs_vnops.pas @@ -13,6 +13,7 @@ uses vfile, vstat, vuio, + vm, vmparam, vfilio, vnode; @@ -980,27 +981,34 @@ unlock: Exit(error); end; +const + vn_io_fault_enable:Boolean=False; + function vn_io_fault(fp:p_file;uio:p_uio;flags:Integer):Integer; label out_last; var - td:p_kthread; + //td:p_kthread; //vm_page_t ma[io_hold_cnt + 2]; - uio_clone:p_uio; - short_uio:T_uio; - short_iovec:array[0..0] of iovec; + //uio_clone:p_uio; + //short_uio:T_uio; + //short_iovec:array[0..0] of iovec; doio:fo_rdwr_t; vp:p_vnode; rl_cookie:Pointer; mp:p_mount; //vm_page_t *prev_td_ma; - error,cnt,save,saveheld,prev_td_ma_cnt:Integer; - addr,__end:QWORD; - //vm_prot_t prot; - len,resid:QWORD; - adv:Int64; + error:Integer; + //cnt,save,saveheld,prev_td_ma_cnt:Integer; + //addr,__end:QWORD; + //prot:Integer; + //len,resid:QWORD; + //adv:Int64; + NO_IOPF:Boolean; begin - td:=curkthread; + //td:=curkthread; + + rl_cookie:=nil; if (uio^.uio_rw=UIO_READ) then doio:=@vn_read @@ -1010,22 +1018,41 @@ begin vp:=fp^.f_vnode; foffset_lock_uio(fp, uio, flags); + NO_IOPF:=False; mp:=vp^.v_mount; if (mp<>nil) then - if ((mp^.mnt_kern_flag and MNTK_NO_IOPF)=0) then + begin + NO_IOPF:=((mp^.mnt_kern_flag and MNTK_NO_IOPF)=0); + end; + + if (uio^.uio_segflg<>UIO_USERSPACE) or + (vp^.v_type<>VREG) or + NO_IOPF or + (not vn_io_fault_enable) then begin error:=doio(fp, uio, flags or FOF_OFFSET); goto out_last; end; - if (uio^.uio_segflg<>UIO_USERSPACE) or - (vp^.v_type<>VREG) or - {(not vn_io_fault_enable)} false then + if (uio^.uio_rw=UIO_READ) then begin - error:=doio(fp, uio, flags or FOF_OFFSET); - goto out_last; + //prot:=VM_PROT_WRITE; + rl_cookie:=vn_rangelock_rlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid); + end else + begin + //prot:=VM_PROT_READ; + if ((fp^.f_flag and O_APPEND)<>0) or ((flags and FOF_OFFSET)=0) then + begin + { For appenders, punt and lock the whole range. } + rl_cookie:=vn_rangelock_wlock(vp, 0, High(Int64)) + end else + begin + rl_cookie:=vn_rangelock_wlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid); + end; end; + error:=doio(fp, uio, flags or FOF_OFFSET); + { uio_clone:=cloneuio(uio); resid:=uio^.uio_resid; @@ -1041,11 +1068,14 @@ begin end else begin prot:=VM_PROT_READ; - if ((fp^.f_flag and O_APPEND)<>0 or (flags and FOF_OFFSET)=0) then + if ((fp^.f_flag and O_APPEND)<>0) or ((flags and FOF_OFFSET)=0) then + begin { For appenders, punt and lock the whole range. } rl_cookie:=vn_rangelock_wlock(vp, 0, High(Int64)) - else + end else + begin rl_cookie:=vn_rangelock_wlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid); + end; end; save:=vm_fault_disable_pagefaults(); @@ -1127,6 +1157,10 @@ _out: } out_last: + if (rl_cookie<>nil) then + begin + vn_rangelock_unlock(vp, rl_cookie); + end; foffset_unlock_uio(fp, uio, flags); Exit(error); end; diff --git a/sys/vfs/vnode.pas b/sys/vfs/vnode.pas index 8241aae4..5a2303be 100644 --- a/sys/vfs/vnode.pas +++ b/sys/vfs/vnode.pas @@ -9,6 +9,7 @@ uses mqueue, kern_mtx, vselinfo, + kern_rangelock, time; const @@ -253,6 +254,8 @@ type v_pollinfo:p_vpollinfo; // i Poll events, p for *v_pi + v_rl:rangelock; //Byte-range lock + property v_mountedhere:Pointer read v_un{.vu_mount } write v_un; //mount property v_socket :Pointer read v_un{.vu_socket } write v_un; //socket property v_rdev :Pointer read v_un{.vu_cdev } write v_un; //cdev @@ -445,24 +448,22 @@ end; procedure vn_rangelock_unlock(vp:p_vnode;cookie:Pointer); begin - //rangelock_unlock(@vp^.v_rl, (cookie), VI_MTX(vp)) + rangelock_unlock(@vp^.v_rl, (cookie), VI_MTX(vp)) end; procedure vn_rangelock_unlock_range(vp:p_vnode;cookie:Pointer;start,__end:Int64); begin - //rangelock_unlock_range(@vp^.v_rl, (cookie), start, __end, VI_MTX(vp)) + rangelock_unlock_range(@vp^.v_rl, (cookie), start, __end, VI_MTX(vp)) end; function vn_rangelock_rlock(vp:p_vnode;start,__end:Int64):Pointer; begin - Result:=nil; - //Result:=rangelock_rlock(@vp^.v_rl, start, __end, VI_MTX(vp)) + Result:=rangelock_rlock(@vp^.v_rl, start, __end, VI_MTX(vp)) end; function vn_rangelock_wlock(vp:p_vnode;start,__end:Int64):Pointer; begin - Result:=nil; - //Result:=rangelock_wlock(@vp^.v_rl, start, __end, VI_MTX(vp)) + Result:=rangelock_wlock(@vp^.v_rl, start, __end, VI_MTX(vp)) end; const diff --git a/sys/vm/vm_nt_map.pas b/sys/vm/vm_nt_map.pas index 969916ee..c6e38252 100644 --- a/sys/vm/vm_nt_map.pas +++ b/sys/vm/vm_nt_map.pas @@ -8,6 +8,8 @@ interface uses sysutils, vm, + vmparam, + kern_mtx, vm_pmap_prot; const @@ -18,6 +20,24 @@ const MAX_UNION_SIZE=256*1024*1024; type + t_danger_range=packed record + start:DWORD; + __end:DWORD; + end; + + t_danger_zone=object + Flock :mtx; + Frange:t_danger_range; + procedure Init; + procedure Done; + procedure set_range(start,__end:vm_offset_t); + function in_range(addr,size:vm_offset_t):Boolean; + procedure d_wait(addr,size:vm_offset_t); + procedure d_wakeup; + procedure lock; + procedure unlock; + end; + pp_vm_nt_file_obj=^p_vm_nt_file_obj; p_vm_nt_file_obj=^vm_nt_file_obj; vm_nt_file_obj=packed record @@ -43,10 +63,11 @@ type p_vm_nt_map=^_vm_nt_map; _vm_nt_map=object - header :vm_nt_entry; // List of entries - size :vm_size_t; // virtual size - nentries :Integer; // Number of entries - root :p_vm_nt_entry; // Root of a binary search tree + header :vm_nt_entry; // List of entries + size :vm_size_t; // virtual size + nentries :Integer; // Number of entries + root :p_vm_nt_entry; // Root of a binary search tree + danger_zone:t_danger_zone; property min_offset:vm_offset_t read header.start write header.start; property max_offset:vm_offset_t read header.__end write header.__end; end; @@ -82,6 +103,11 @@ procedure vm_nt_map_protect(map:p_vm_nt_map; __end:vm_offset_t; prot :Integer); +procedure vm_nt_map_prot_fix(map:p_vm_nt_map; + start:vm_offset_t; + __end:vm_offset_t; + mode :Integer); + procedure vm_nt_map_madvise(map:p_vm_nt_map; start:vm_offset_t; __end:vm_offset_t; @@ -96,6 +122,8 @@ procedure vm_nt_entry_deallocate(entry:p_vm_nt_entry); implementation uses + time, + kern_param, md_map; type @@ -175,7 +203,8 @@ end; procedure vm_prot_fixup(map:p_vm_nt_map; start:vm_offset_t; __end:vm_offset_t; - max :Integer); + max :Integer; + mode :Integer); var next:vm_offset_t; base,size:vm_size_t; @@ -187,16 +216,26 @@ begin while (start<__end) do begin - next:=pmap_scan_rwx(start,__end); + if ((mode and 1)=0) then + begin + next:=pmap_scan_rwx(start,__end); + + prot:=pmap_get_prot(start); + prot:=(prot and VM_RW); + end else + begin + next:=pmap_scan(start,__end); + + prot:=pmap_get_prot(start); + prot:=(prot and VM_RW) and (not (prot shr PAGE_TRACK_SHIFT)); + end; base:=start; size:=next-start; - prot:=pmap_get_prot(start); - - if ((prot and VM_RW)<>(max and VM_RW)) then + if ((mode and 2)<>0) or (prot<>(max and VM_RW)) then begin - r:=md_protect(Pointer(base),size,(prot and VM_RW)); + r:=md_protect(Pointer(base),size,prot); if (r<>0) then begin Writeln('failed md_protect(',HexStr(base,11),',',HexStr(base+size,11),'):0x',HexStr(r,8)); @@ -293,7 +332,7 @@ begin if (r<>0) then begin Writeln('failed md_protect(',HexStr(entry^.start,11),',',HexStr(entry^.start+size,11),'):0x',HexStr(r,8)); - Assert(false,'vm_prot_fixup'); + Assert(false,'vm_map'); end; end; @@ -301,6 +340,86 @@ begin end; end; +// + +function IDX_TO_OFF(x:DWORD):QWORD; inline; +begin + Result:=QWORD(x) shl PAGE_SHIFT; +end; + +function OFF_TO_IDX(x:QWORD):DWORD; inline; +begin + Result:=QWORD(x) shr PAGE_SHIFT; +end; + +// + +procedure t_danger_zone.Init; +begin + mtx_init(Flock,'danger_zone'); +end; + +procedure t_danger_zone.Done; +begin + mtx_destroy(Flock); +end; + +procedure t_danger_zone.set_range(start,__end:vm_offset_t); +var + range:t_danger_range; +begin + range.start:=OFF_TO_IDX(start); + range.__end:=OFF_TO_IDX(__end); + + System.InterlockedExchange64(QWORD(Frange),QWORD(range)); +end; + +function t_danger_zone.in_range(addr,size:vm_offset_t):Boolean; +var + range:t_danger_range; +begin + QWORD(range):=System.InterlockedExchangeAdd64(QWORD(Frange),0); + + Result:=(addr>=IDX_TO_OFF(range.start)) and ((addr+size)nil) then @@ -372,6 +493,8 @@ begin size:=__end-start; //danger zone + map^.danger_zone.set_range(start,__end); + map^.danger_zone.lock; //unmap all For i:=Low(stat.range) to High(stat.range) do @@ -473,13 +596,17 @@ begin vm_prot_fixup(map, ets[i]^.start, ets[i]^.__end, - max + max, + 2 ); end; end; end; //danger zone + map^.danger_zone.set_range(0,0); + map^.danger_zone.unlock; + map^.danger_zone.d_wakeup; Result:=True; end; @@ -555,6 +682,7 @@ begin map^.min_offset:=min; map^.max_offset:=max; map^.root:=nil; + map^.danger_zone.Init; end; procedure vm_nt_entry_dispose(map:p_vm_nt_map;entry:p_vm_nt_entry); inline; @@ -1035,6 +1163,7 @@ procedure vm_nt_map_protect(map:p_vm_nt_map; var entry:p_vm_nt_entry; base,size:vm_size_t; + max:Integer; r:Integer; begin if (start=__end) then Exit; @@ -1064,7 +1193,15 @@ begin size:=size-base; - r:=md_protect(Pointer(base),size,(prot and VM_RW)); + if (entry^.obj<>nil) then + begin + max:=entry^.obj^.maxp; + end else + begin + max:=0; + end; + + r:=md_protect(Pointer(base),size,(prot and max and VM_RW)); if (r<>0) then begin Writeln('failed md_protect(',HexStr(base,11),',',HexStr(base+size,11),'):0x',HexStr(r,8)); @@ -1075,6 +1212,49 @@ begin end; end; +procedure vm_nt_map_prot_fix(map:p_vm_nt_map; + start:vm_offset_t; + __end:vm_offset_t; + mode :Integer); +var + entry:p_vm_nt_entry; + e_start,e___end:vm_size_t; +begin + if (start=__end) then Exit; + + if (not vm_nt_map_lookup_entry(map, start, @entry)) then + begin + entry:=entry^.next; + end else + begin + entry:=entry; + end; + + while (entry<>@map^.header) and (entry^.start<__end) do + begin + + if (entry^.obj<>nil) then + begin + e_start:=entry^.start; + e___end:=entry^.__end; + + if (e_start__end) then + begin + e___end:=__end; + end; + + vm_prot_fixup(map,e_start,e___end,entry^.obj^.maxp,mode); + end; + + entry:=entry^.next; + end; +end; + //rdi, rsi procedure ZeroPages(addr:Pointer;size:Ptruint); assembler nostackframe SysV_ABI_CDecl; label