diff --git a/fpPS4.lpi b/fpPS4.lpi
index 6a627dc3..41e0780c 100644
--- a/fpPS4.lpi
+++ b/fpPS4.lpi
@@ -1025,6 +1025,10 @@
+
+
+
+
diff --git a/rtl/atomic.pas b/rtl/atomic.pas
index 098db820..e09f6148 100644
--- a/rtl/atomic.pas
+++ b/rtl/atomic.pas
@@ -99,8 +99,10 @@ function bits1(P:Pointer):SizeUInt; inline;
function bits1(P:SizeUInt):SizeUInt; inline;
procedure spin_pause;
-procedure atomic_set_int (addr:PInteger;val:Integer); sysv_abi_default;
-procedure atomic_clear_int(addr:PInteger;val:Integer); sysv_abi_default;
+procedure atomic_set_byte (addr:PByte;val:byte); sysv_abi_default;
+procedure atomic_clear_byte(addr:PByte;val:byte); sysv_abi_default;
+procedure atomic_set_int (addr:PInteger;val:Integer); sysv_abi_default;
+procedure atomic_clear_int (addr:PInteger;val:Integer); sysv_abi_default;
implementation
@@ -389,6 +391,18 @@ asm
pause
end;
+
+procedure atomic_set_byte(addr:PByte;val:byte); assembler; nostackframe; sysv_abi_default;
+asm
+ lock orb %sil,(%rdi)
+end;
+
+procedure atomic_clear_byte(addr:PByte;val:byte); assembler; nostackframe; sysv_abi_default;
+asm
+ not %sil
+ lock andb %sil,(%rdi)
+end;
+
procedure atomic_set_int(addr:PInteger;val:Integer); assembler; nostackframe; sysv_abi_default;
asm
lock orl %esi,(%rdi)
diff --git a/sys/kern/kern_rangelock.pas b/sys/kern/kern_rangelock.pas
new file mode 100644
index 00000000..5335e207
--- /dev/null
+++ b/sys/kern/kern_rangelock.pas
@@ -0,0 +1,284 @@
+unit kern_rangelock;
+
+{$mode ObjFPC}{$H+}
+{$CALLING SysV_ABI_CDecl}
+
+interface
+
+uses
+ mqueue,
+ kern_thr,
+ kern_mtx;
+
+const
+ RL_LOCK_READ =$0001;
+ RL_LOCK_WRITE =$0002;
+ RL_LOCK_TYPE_MASK=$0003;
+ RL_LOCK_GRANTED =$0004;
+
+type
+ off_t=Int64;
+
+ p_rl_q_entry=^rl_q_entry;
+ rl_q_entry=record
+ rl_q_link :TAILQ_ENTRY; //rl_q_entry
+ rl_q_start:off_t;
+ rl_q_end :off_t;
+ rl_q_flags:Integer;
+ end;
+
+ p_rangelock=^rangelock;
+ rangelock=record
+ rl_waiters:TAILQ_HEAD; //rl_q_entry
+ rl_currdep:p_rl_q_entry;
+ end;
+
+procedure rangelock_init(lock:p_rangelock);
+procedure rangelock_destroy(lock:p_rangelock);
+procedure rangelock_unlock(lock:p_rangelock;cookie:Pointer;ilk:p_mtx);
+function rangelock_unlock_range(lock:p_rangelock;cookie:Pointer;start,__end:off_t;ilk:p_mtx):Pointer;
+function rangelock_rlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer;
+function rangelock_wlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer;
+procedure rlqentry_free(rleq:p_rl_q_entry);
+
+implementation
+
+//
+
+function msleep(ident :Pointer;
+ lock :p_mtx;
+ priority:Integer;
+ wmesg :PChar;
+ timo :Int64):Integer; external;
+
+procedure wakeup(ident:Pointer); external;
+
+//
+
+function rlqentry_alloc():p_rl_q_entry; inline;
+begin
+ Result:=AllocMem(SizeOf(rl_q_entry));
+end;
+
+procedure rlqentry_free(rleq:p_rl_q_entry); inline;
+begin
+ if (rleq<>nil) then
+ begin
+ FreeMem(rleq);
+ end;
+end;
+
+procedure rangelock_init(lock:p_rangelock);
+begin
+ TAILQ_INIT(@lock^.rl_waiters);
+ lock^.rl_currdep:=nil;
+end;
+
+procedure rangelock_destroy(lock:p_rangelock);
+begin
+ Assert(TAILQ_EMPTY(@lock^.rl_waiters), 'Dangling waiters');
+end;
+
+{
+ * Verifies the supplied rl_q_entries for compatibility. Returns true
+ * if the rangelock queue entries are not compatible, false if they are.
+ *
+ * Two entries are compatible if their ranges do not overlap, or both
+ * entries are for read.
+}
+function rangelock_incompatible(e1,e2:p_rl_q_entry):Integer;
+begin
+ if ((e1^.rl_q_flags and RL_LOCK_TYPE_MASK)=RL_LOCK_READ) and
+ ((e2^.rl_q_flags and RL_LOCK_TYPE_MASK)=RL_LOCK_READ) then
+ begin
+ Exit(0);
+ end;
+ if (e1^.rl_q_start < e2^.rl_q_end) and (e1^.rl_q_end > e2^.rl_q_start) then
+ begin
+ Exit(1);
+ end;
+ Exit(0);
+end;
+
+{
+ * Recalculate the lock^.rl_currdep after an unlock.
+}
+procedure rangelock_calc_block(lock:p_rangelock);
+label
+ _out;
+var
+ entry,entry1,whead:p_rl_q_entry;
+begin
+
+ if (lock^.rl_currdep=TAILQ_FIRST(@lock^.rl_waiters)) and
+ (lock^.rl_currdep<>nil) then
+ begin
+ lock^.rl_currdep:=TAILQ_NEXT(lock^.rl_currdep, @lock^.rl_currdep^.rl_q_link);
+ end;
+
+ entry:=lock^.rl_currdep;
+
+ while (entry<>nil) do
+ begin
+ entry1:=TAILQ_FIRST(@lock^.rl_waiters);
+
+ while (entry1<>nil) do
+ begin
+ if (rangelock_incompatible(entry, entry1)<>0) then
+ begin
+ goto _out;
+ end;
+ if (entry1=entry) then
+ begin
+ break;
+ end;
+ //
+ entry1:=TAILQ_NEXT(entry1, @entry1^.rl_q_link)
+ end;
+
+ //next
+ entry:=TAILQ_NEXT(entry, @entry^.rl_q_link);
+ end;
+
+_out:
+ lock^.rl_currdep:=entry;
+
+ whead:=TAILQ_FIRST(@lock^.rl_waiters);
+
+ while (whead<>nil) do
+ begin
+ if (whead=lock^.rl_currdep) then
+ begin
+ break;
+ end;
+ if ((whead^.rl_q_flags and RL_LOCK_GRANTED)=0) then
+ begin
+ whead^.rl_q_flags:=whead^.rl_q_flags or RL_LOCK_GRANTED;
+ wakeup(whead);
+ end;
+ //
+ whead:=TAILQ_NEXT(whead, @whead^.rl_q_link)
+ end;
+end;
+
+procedure rangelock_unlock_locked(lock:p_rangelock;entry:p_rl_q_entry;ilk:p_mtx);
+begin
+ Assert((lock<>nil) and (entry<>nil) and (ilk<>nil));
+ mtx_assert(ilk^);
+ Assert(entry<>lock^.rl_currdep, 'stuck currdep');
+
+ TAILQ_REMOVE(@lock^.rl_waiters, entry, @entry^.rl_q_link);
+ rangelock_calc_block(lock);
+ mtx_unlock(ilk^);
+
+ if (curkthread^.td_rlqe=nil) then
+ curkthread^.td_rlqe:=entry
+ else
+ rlqentry_free(entry);
+end;
+
+procedure rangelock_unlock(lock:p_rangelock;cookie:Pointer;ilk:p_mtx);
+begin
+ Assert((lock<>nil) and (cookie<>nil) and (ilk<>nil));
+
+ mtx_lock(ilk^);
+ rangelock_unlock_locked(lock, cookie, ilk);
+end;
+
+{
+ * Unlock the sub-range of granted lock.
+}
+function rangelock_unlock_range(lock:p_rangelock;cookie:Pointer;start,__end:off_t;ilk:p_mtx):Pointer;
+var
+ entry:p_rl_q_entry;
+begin
+ Assert((lock<>nil) and (cookie<>nil) and (ilk<>nil));
+
+ entry:=cookie;
+
+ Assert((entry^.rl_q_flags and RL_LOCK_GRANTED)<>0,'Unlocking non-granted lock');
+ Assert(entry^.rl_q_start=start, 'wrong start');
+ Assert(entry^.rl_q_end >= __end, 'wrong end');
+
+ mtx_lock(ilk^);
+
+ if (entry^.rl_q_end=__end) then
+ begin
+ rangelock_unlock_locked(lock, cookie, ilk);
+ Exit(nil);
+ end;
+
+ entry^.rl_q_end:=__end;
+ rangelock_calc_block(lock);
+ mtx_unlock(ilk^);
+ Exit(cookie);
+end;
+
+{
+ * Add the lock request to the queue of the pending requests for
+ * rangelock. Sleep until the request can be granted.
+}
+function rangelock_enqueue(lock:p_rangelock;start,__end:off_t;mode:Integer;ilk:p_mtx):Pointer;
+var
+ entry:p_rl_q_entry;
+ td:p_kthread;
+begin
+ Assert((lock<>nil) and (ilk<>nil));
+
+ td:=curkthread;
+ if (td^.td_rlqe<>nil) then
+ begin
+ entry:=td^.td_rlqe;
+ td^.td_rlqe:=nil;
+ end else
+ begin
+ entry:=rlqentry_alloc();
+ end;
+
+ Assert(entry<>nil);
+ entry^.rl_q_flags:=mode;
+ entry^.rl_q_start:=start;
+ entry^.rl_q_end:=__end;
+
+ mtx_lock(ilk^);
+ {
+ * XXXKIB TODO. Check that a thread does not try to enqueue a
+ * lock that is incompatible with another request from the same
+ * thread.
+ }
+
+ TAILQ_INSERT_TAIL(@lock^.rl_waiters, entry, @entry^.rl_q_link);
+
+ if (lock^.rl_currdep=nil) then
+ begin
+ lock^.rl_currdep:=entry;
+ end;
+
+ rangelock_calc_block(lock);
+
+ while ((entry^.rl_q_flags and RL_LOCK_GRANTED)=0) do
+ begin
+ msleep(entry, ilk, 0, 'range', 0);
+ end;
+
+ mtx_unlock(ilk^);
+ Exit(entry);
+end;
+
+function rangelock_rlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer;
+begin
+ Result:=rangelock_enqueue(lock, start, __end, RL_LOCK_READ, ilk);
+end;
+
+function rangelock_wlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer;
+begin
+ Result:=rangelock_enqueue(lock, start, __end, RL_LOCK_WRITE, ilk);
+end;
+
+
+
+end.
+
+
+
+
diff --git a/sys/kern/kern_thr.pas b/sys/kern/kern_thr.pas
index caee7c9e..a4086669 100644
--- a/sys/kern/kern_thr.pas
+++ b/sys/kern/kern_thr.pas
@@ -231,6 +231,7 @@ type
td_slpcallout :Pointer;
tdq_lock :mtx;
//
+ td_rlqe :Pointer;
td_fpop :Pointer;
td_map_def_user :Pointer;
td_dmap_def_user:Pointer;
diff --git a/sys/kern/kern_thread.pas b/sys/kern/kern_thread.pas
index 89da8ec9..55f4693e 100644
--- a/sys/kern/kern_thread.pas
+++ b/sys/kern/kern_thread.pas
@@ -78,6 +78,7 @@ uses
kern_rwlock,
kern_sig,
kern_proc,
+ kern_rangelock,
sched_ule,
sys_sleepqueue;
@@ -200,6 +201,7 @@ procedure thread_free(td:p_kthread);
begin
mtx_destroy(td^.tdq_lock);
sleepq_free(td^.td_sleepqueue);
+ rlqentry_free(td^.td_rlqe);
umtx_thread_fini(td);
cpu_thread_free(td);
end;
diff --git a/sys/md/md_exception.pas b/sys/md/md_exception.pas
index 0fef7913..5efda0fe 100644
--- a/sys/md/md_exception.pas
+++ b/sys/md/md_exception.pas
@@ -21,6 +21,10 @@ uses
signal,
ucontext,
vm,
+ vm_map,
+ vm_pmap,
+ vm_pmap_prot,
+ kern_proc,
kern_jit_dynamic;
const
@@ -101,7 +105,7 @@ const
FPC_EXCEPTION_CODE=$E0465043;
FPC_SET_EH_HANDLER=$E0465044;
-function translate_pageflt_err(v:QWORD):QWORD; inline;
+function translate_pageflt_err(v:QWORD):Byte; inline;
begin
Result:=VM_PROT_NONE;
case v of
@@ -111,12 +115,21 @@ begin
end;
end;
+function get_pageflt_err(p:PExceptionPointers):Byte; inline;
+begin
+ Result:=translate_pageflt_err(p^.ExceptionRecord^.ExceptionInformation[0]);
+end;
+
+function get_pageflt_addr(p:PExceptionPointers):QWORD; inline;
+begin
+ Result:=p^.ExceptionRecord^.ExceptionInformation[1];
+end;
+
procedure jit_save_to_sys_save(td:p_kthread); external;
procedure sys_save_to_jit_save(td:p_kthread); external;
function ProcessException3(td:p_kthread;p:PExceptionPointers):longint; SysV_ABI_CDecl;
var
- ExceptionCode:DWORD;
tf_addr:QWORD;
rv:Integer;
is_jit:Boolean;
@@ -153,14 +166,12 @@ begin
td^.td_frame.tf_trapno:=0;
- ExceptionCode:=p^.ExceptionRecord^.ExceptionCode;
-
rv:=-1;
- case ExceptionCode of
+ case p^.ExceptionRecord^.ExceptionCode of
STATUS_ACCESS_VIOLATION:
begin
- tf_addr:=p^.ExceptionRecord^.ExceptionInformation[1];
+ tf_addr:=get_pageflt_addr(p);
Writeln('tf_addr:0x',HexStr(tf_addr,16));
@@ -170,7 +181,7 @@ begin
//_get_frame(p^.ContextRecord,@td^.td_frame,{@td^.td_fpstate}nil);
td^.td_frame.tf_trapno:=T_PAGEFLT;
- td^.td_frame.tf_err :=translate_pageflt_err(p^.ExceptionRecord^.ExceptionInformation[0]);
+ td^.td_frame.tf_err :=get_pageflt_err(p);
td^.td_frame.tf_addr :=tf_addr;
rv:=trap.trap(@td^.td_frame,is_jit);
@@ -251,6 +262,37 @@ begin
EXCEPTION_SET_THREADNAME :Exit;
DBG_PRINTEXCEPTION_C :Exit(EXCEPTION_CONTINUE_EXECUTION);
DBG_PRINTEXCEPTION_WIDE_C:Exit(EXCEPTION_CONTINUE_EXECUTION); //RenderDoc issuse
+
+ STATUS_ACCESS_VIOLATION:
+ begin
+
+ if pmap_danger_zone(@vm_map_t(p_proc.p_vmspace)^.pmap,
+ get_pageflt_addr(p),
+ 256) then
+ begin
+ Exit(EXCEPTION_CONTINUE_EXECUTION);
+ end;
+
+ case get_pageflt_err(p) of
+ VM_PROT_READ:
+ begin
+ if ((pmap_get_prot(get_pageflt_addr(p),256) and VM_PROT_READ)<>0) then
+ begin
+ Writeln(stderr,'Unhandled VM_PROT_READ');
+ end;
+ end;
+ VM_PROT_WRITE:
+ begin
+ if ((pmap_get_prot(get_pageflt_addr(p),256) and VM_PROT_WRITE)<>0) then
+ begin
+ Writeln(stderr,'Unhandled VM_PROT_WRITE');
+ end;
+ end;
+ else;
+ end;
+
+ end;
+
else
if not IsDefaultExceptions(p^.ExceptionRecord^.ExceptionCode) then
begin
diff --git a/sys/md/vm_pmap.pas b/sys/md/vm_pmap.pas
index 769f3e56..3dbb46c1 100644
--- a/sys/md/vm_pmap.pas
+++ b/sys/md/vm_pmap.pas
@@ -95,6 +95,10 @@ procedure pmap_mirror_unmap(pmap:pmap_t;
base:Pointer;
size:QWORD);
+function pmap_danger_zone(pmap:pmap_t;
+ addr:vm_offset_t;
+ size:vm_offset_t):Boolean;
+
implementation
uses
@@ -858,7 +862,7 @@ begin
end;
end;
- pmap_mark(info.start,info.__end,prot and VM_RWX);
+ pmap_mark_rwx(info.start,info.__end,prot);
//upper pages
delta:=(paddi and PAGE_MASK);
@@ -881,7 +885,7 @@ begin
end;
end;
- pmap_mark(start,__end,prot and VM_RWX);
+ pmap_mark_rwx(start,__end,prot);
end;
procedure pmap_protect(pmap :pmap_t;
@@ -942,7 +946,7 @@ begin
end;
end;
- pmap_mark(start,__end,prot and VM_RWX);
+ pmap_mark_rwx(start,__end,prot);
end;
procedure pmap_madvise(pmap :pmap_t;
@@ -1018,7 +1022,7 @@ begin
Writeln('pmap_remove:',HexStr(start,11),':',HexStr(__end,11));
end;
- pmap_unmark(start,__end);
+ pmap_unmark_rwx(start,__end);
r:=0;
case vm_object_type(obj) of
@@ -1099,6 +1103,18 @@ begin
end;
end;
+function pmap_danger_zone(pmap:pmap_t;
+ addr:vm_offset_t;
+ size:vm_offset_t):Boolean;
+begin
+ Result:=False;
+ while (pmap^.nt_map.danger_zone.in_range(addr,size)) do
+ begin
+ Result:=True;
+ pmap^.nt_map.danger_zone.d_wait(addr,size);
+ end;
+end;
+
end.
diff --git a/sys/md/vm_pmap_prot.pas b/sys/md/vm_pmap_prot.pas
index 370bf1f2..af51e819 100644
--- a/sys/md/vm_pmap_prot.pas
+++ b/sys/md/vm_pmap_prot.pas
@@ -6,6 +6,7 @@ unit vm_pmap_prot;
interface
uses
+ atomic,
vm,
vmparam;
@@ -24,18 +25,23 @@ const
PAGE_PROT_RW =PAGE_PROT_READ or PAGE_PROT_WRITE;
PAGE_PROT_RWX =PAGE_PROT_READ or PAGE_PROT_WRITE or PAGE_PROT_EXECUTE;
- PAGE_PROT_LIFT =$40;
-
- //PAGE_BUSY_FLAG =DWORD($10000000);
- //PAGE_PATCH_FLAG =DWORD($08000000);
+ PAGE_TRACK_R =$08;
+ PAGE_TRACK_W =$10;
+ PAGE_TRACK_X =$20;
+ PAGE_TRACK_RWX =PAGE_TRACK_R or PAGE_TRACK_W or PAGE_TRACK_X;
+ PAGE_TRACK_SHIFT =3;
var
PAGE_PROT:PBYTE=nil;
-procedure pmap_mark (start,__end:vm_offset_t;prots:Byte);
-procedure pmap_unmark (start,__end:vm_offset_t);
+procedure pmap_mark_rwx (start,__end:vm_offset_t;prots:Byte);
+procedure pmap_unmark_rwx(start,__end:vm_offset_t);
+procedure pmap_track (start,__end:vm_offset_t;prots:Byte);
+procedure pmap_untrack (start,__end:vm_offset_t;prots:Byte);
+function pmap_scan (start,__end:vm_offset_t):vm_offset_t;
function pmap_scan_rwx (start,__end:vm_offset_t):vm_offset_t;
function pmap_get_prot (addr:vm_offset_t):Byte;
+function pmap_get_prot (addr,size:vm_offset_t):Byte;
implementation
@@ -59,21 +65,27 @@ begin
Result:=x;
end;
-procedure pmap_mark(start,__end:vm_offset_t;prots:Byte);
+procedure pmap_mark_rwx(start,__end:vm_offset_t;prots:Byte);
+var
+ clear:Byte;
begin
+ prots:=prots and PAGE_PROT_RWX;
+ clear:=(not prots) and PAGE_PROT_RWX;
start:=OFF_TO_IDX(start);
__end:=OFF_TO_IDX(__end);
start:=MAX_IDX(start);
__end:=MAX_IDX(__end);
while (start<__end) do
begin
- PAGE_PROT[start]:=prots;
+ atomic_clear_byte(@PAGE_PROT[start],clear);
+ atomic_set_byte (@PAGE_PROT[start],prots);
+ //PAGE_PROT[start]:=prots;
Inc(start);
end;
WriteBarrier;
end;
-procedure pmap_unmark(start,__end:vm_offset_t);
+procedure pmap_unmark_rwx(start,__end:vm_offset_t);
begin
start:=OFF_TO_IDX(start);
__end:=OFF_TO_IDX(__end);
@@ -81,12 +93,75 @@ begin
__end:=MAX_IDX(__end);
while (start<__end) do
begin
- PAGE_PROT[start]:=0;
+ atomic_clear_byte(@PAGE_PROT[start],PAGE_PROT_RWX);
+ //PAGE_PROT[start]:=0;
Inc(start);
end;
WriteBarrier;
end;
+procedure pmap_track(start,__end:vm_offset_t;prots:Byte);
+begin
+ prots:=prots and PAGE_TRACK_RWX;
+ start:=OFF_TO_IDX(start);
+ __end:=OFF_TO_IDX(__end);
+ start:=MAX_IDX(start);
+ __end:=MAX_IDX(__end);
+ while (start<__end) do
+ begin
+ atomic_set_byte(@PAGE_PROT[start],prots);
+ Inc(start);
+ end;
+ WriteBarrier;
+end;
+
+procedure pmap_untrack(start,__end:vm_offset_t;prots:Byte);
+begin
+ prots:=prots and PAGE_TRACK_RWX;
+ start:=OFF_TO_IDX(start);
+ __end:=OFF_TO_IDX(__end);
+ start:=MAX_IDX(start);
+ __end:=MAX_IDX(__end);
+ while (start<__end) do
+ begin
+ atomic_clear_byte(@PAGE_PROT[start],prots);
+ Inc(start);
+ end;
+ WriteBarrier;
+end;
+
+function pmap_scan(start,__end:vm_offset_t):vm_offset_t;
+var
+ b,v:Byte;
+begin
+ start:=OFF_TO_IDX(start);
+ __end:=OFF_TO_IDX(__end);
+ start:=MAX_IDX(start);
+ __end:=MAX_IDX(__end);
+
+ ReadBarrier;
+
+ b:=PAGE_PROT[start];
+ Inc(start);
+
+ while (start<__end) do
+ begin
+ v:=PAGE_PROT[start];
+
+ if (b<>v) then
+ begin
+ start:=IDX_TO_OFF(start);
+ Exit(start);
+ end;
+
+ Inc(start);
+ end;
+
+ __end:=IDX_TO_OFF(__end);
+
+ Result:=__end;
+end;
+
function pmap_scan_rwx(start,__end:vm_offset_t):vm_offset_t;
var
b,v:Byte;
@@ -122,10 +197,19 @@ end;
function pmap_get_prot(addr:vm_offset_t):Byte;
begin
addr:=OFF_TO_IDX(addr);
- addr:=MAX_IDX(addr);
- Result:=PAGE_PROT[addr];
+ if (addr>PAGE_MAP_MASK) then
+ begin
+ Result:=0
+ end else
+ begin
+ Result:=PAGE_PROT[addr];
+ end;
end;
+function pmap_get_prot(addr,size:vm_offset_t):Byte;
+begin
+ Result:=pmap_get_prot(addr) or pmap_get_prot(addr+size);
+end;
end.
diff --git a/sys/vfs/vfs_subr.pas b/sys/vfs/vfs_subr.pas
index 3b227666..45febcf5 100644
--- a/sys/vfs/vfs_subr.pas
+++ b/sys/vfs/vfs_subr.pas
@@ -192,6 +192,7 @@ uses
subr_uio,
sys_vm_object,
vsys_generic,
+ kern_rangelock,
rtprio,
sys_conf;
@@ -826,7 +827,7 @@ begin
vp^.v_vflag:=vp^.v_vflag or VV_NOKNOTE;
end;
end;
- //rangelock_init(@vp^.v_rl);
+ rangelock_init(@vp^.v_rl);
{
* For the filesystems which do not use vfs_hash_insert(),
@@ -2279,7 +2280,7 @@ begin
{ XXX Elsewhere we detect an already freed vnode via nil v_op. }
vp^.v_op:=nil;
- //rangelock_destroy(@vp^.v_rl);
+ rangelock_destroy(@vp^.v_rl);
//lockdestroy(vp^.v_vnlock);
mtx_destroy(vp^.v_vnlock^);
mtx_destroy(vp^.v_interlock);
diff --git a/sys/vfs/vfs_vnops.pas b/sys/vfs/vfs_vnops.pas
index 18f1ed0c..b625415c 100644
--- a/sys/vfs/vfs_vnops.pas
+++ b/sys/vfs/vfs_vnops.pas
@@ -13,6 +13,7 @@ uses
vfile,
vstat,
vuio,
+ vm,
vmparam,
vfilio,
vnode;
@@ -980,27 +981,34 @@ unlock:
Exit(error);
end;
+const
+ vn_io_fault_enable:Boolean=False;
+
function vn_io_fault(fp:p_file;uio:p_uio;flags:Integer):Integer;
label
out_last;
var
- td:p_kthread;
+ //td:p_kthread;
//vm_page_t ma[io_hold_cnt + 2];
- uio_clone:p_uio;
- short_uio:T_uio;
- short_iovec:array[0..0] of iovec;
+ //uio_clone:p_uio;
+ //short_uio:T_uio;
+ //short_iovec:array[0..0] of iovec;
doio:fo_rdwr_t;
vp:p_vnode;
rl_cookie:Pointer;
mp:p_mount;
//vm_page_t *prev_td_ma;
- error,cnt,save,saveheld,prev_td_ma_cnt:Integer;
- addr,__end:QWORD;
- //vm_prot_t prot;
- len,resid:QWORD;
- adv:Int64;
+ error:Integer;
+ //cnt,save,saveheld,prev_td_ma_cnt:Integer;
+ //addr,__end:QWORD;
+ //prot:Integer;
+ //len,resid:QWORD;
+ //adv:Int64;
+ NO_IOPF:Boolean;
begin
- td:=curkthread;
+ //td:=curkthread;
+
+ rl_cookie:=nil;
if (uio^.uio_rw=UIO_READ) then
doio:=@vn_read
@@ -1010,22 +1018,41 @@ begin
vp:=fp^.f_vnode;
foffset_lock_uio(fp, uio, flags);
+ NO_IOPF:=False;
mp:=vp^.v_mount;
if (mp<>nil) then
- if ((mp^.mnt_kern_flag and MNTK_NO_IOPF)=0) then
+ begin
+ NO_IOPF:=((mp^.mnt_kern_flag and MNTK_NO_IOPF)=0);
+ end;
+
+ if (uio^.uio_segflg<>UIO_USERSPACE) or
+ (vp^.v_type<>VREG) or
+ NO_IOPF or
+ (not vn_io_fault_enable) then
begin
error:=doio(fp, uio, flags or FOF_OFFSET);
goto out_last;
end;
- if (uio^.uio_segflg<>UIO_USERSPACE) or
- (vp^.v_type<>VREG) or
- {(not vn_io_fault_enable)} false then
+ if (uio^.uio_rw=UIO_READ) then
begin
- error:=doio(fp, uio, flags or FOF_OFFSET);
- goto out_last;
+ //prot:=VM_PROT_WRITE;
+ rl_cookie:=vn_rangelock_rlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
+ end else
+ begin
+ //prot:=VM_PROT_READ;
+ if ((fp^.f_flag and O_APPEND)<>0) or ((flags and FOF_OFFSET)=0) then
+ begin
+ { For appenders, punt and lock the whole range. }
+ rl_cookie:=vn_rangelock_wlock(vp, 0, High(Int64))
+ end else
+ begin
+ rl_cookie:=vn_rangelock_wlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
+ end;
end;
+ error:=doio(fp, uio, flags or FOF_OFFSET);
+
{
uio_clone:=cloneuio(uio);
resid:=uio^.uio_resid;
@@ -1041,11 +1068,14 @@ begin
end else
begin
prot:=VM_PROT_READ;
- if ((fp^.f_flag and O_APPEND)<>0 or (flags and FOF_OFFSET)=0) then
+ if ((fp^.f_flag and O_APPEND)<>0) or ((flags and FOF_OFFSET)=0) then
+ begin
{ For appenders, punt and lock the whole range. }
rl_cookie:=vn_rangelock_wlock(vp, 0, High(Int64))
- else
+ end else
+ begin
rl_cookie:=vn_rangelock_wlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
+ end;
end;
save:=vm_fault_disable_pagefaults();
@@ -1127,6 +1157,10 @@ _out:
}
out_last:
+ if (rl_cookie<>nil) then
+ begin
+ vn_rangelock_unlock(vp, rl_cookie);
+ end;
foffset_unlock_uio(fp, uio, flags);
Exit(error);
end;
diff --git a/sys/vfs/vnode.pas b/sys/vfs/vnode.pas
index 8241aae4..5a2303be 100644
--- a/sys/vfs/vnode.pas
+++ b/sys/vfs/vnode.pas
@@ -9,6 +9,7 @@ uses
mqueue,
kern_mtx,
vselinfo,
+ kern_rangelock,
time;
const
@@ -253,6 +254,8 @@ type
v_pollinfo:p_vpollinfo; // i Poll events, p for *v_pi
+ v_rl:rangelock; //Byte-range lock
+
property v_mountedhere:Pointer read v_un{.vu_mount } write v_un; //mount
property v_socket :Pointer read v_un{.vu_socket } write v_un; //socket
property v_rdev :Pointer read v_un{.vu_cdev } write v_un; //cdev
@@ -445,24 +448,22 @@ end;
procedure vn_rangelock_unlock(vp:p_vnode;cookie:Pointer);
begin
- //rangelock_unlock(@vp^.v_rl, (cookie), VI_MTX(vp))
+ rangelock_unlock(@vp^.v_rl, (cookie), VI_MTX(vp))
end;
procedure vn_rangelock_unlock_range(vp:p_vnode;cookie:Pointer;start,__end:Int64);
begin
- //rangelock_unlock_range(@vp^.v_rl, (cookie), start, __end, VI_MTX(vp))
+ rangelock_unlock_range(@vp^.v_rl, (cookie), start, __end, VI_MTX(vp))
end;
function vn_rangelock_rlock(vp:p_vnode;start,__end:Int64):Pointer;
begin
- Result:=nil;
- //Result:=rangelock_rlock(@vp^.v_rl, start, __end, VI_MTX(vp))
+ Result:=rangelock_rlock(@vp^.v_rl, start, __end, VI_MTX(vp))
end;
function vn_rangelock_wlock(vp:p_vnode;start,__end:Int64):Pointer;
begin
- Result:=nil;
- //Result:=rangelock_wlock(@vp^.v_rl, start, __end, VI_MTX(vp))
+ Result:=rangelock_wlock(@vp^.v_rl, start, __end, VI_MTX(vp))
end;
const
diff --git a/sys/vm/vm_nt_map.pas b/sys/vm/vm_nt_map.pas
index 969916ee..c6e38252 100644
--- a/sys/vm/vm_nt_map.pas
+++ b/sys/vm/vm_nt_map.pas
@@ -8,6 +8,8 @@ interface
uses
sysutils,
vm,
+ vmparam,
+ kern_mtx,
vm_pmap_prot;
const
@@ -18,6 +20,24 @@ const
MAX_UNION_SIZE=256*1024*1024;
type
+ t_danger_range=packed record
+ start:DWORD;
+ __end:DWORD;
+ end;
+
+ t_danger_zone=object
+ Flock :mtx;
+ Frange:t_danger_range;
+ procedure Init;
+ procedure Done;
+ procedure set_range(start,__end:vm_offset_t);
+ function in_range(addr,size:vm_offset_t):Boolean;
+ procedure d_wait(addr,size:vm_offset_t);
+ procedure d_wakeup;
+ procedure lock;
+ procedure unlock;
+ end;
+
pp_vm_nt_file_obj=^p_vm_nt_file_obj;
p_vm_nt_file_obj=^vm_nt_file_obj;
vm_nt_file_obj=packed record
@@ -43,10 +63,11 @@ type
p_vm_nt_map=^_vm_nt_map;
_vm_nt_map=object
- header :vm_nt_entry; // List of entries
- size :vm_size_t; // virtual size
- nentries :Integer; // Number of entries
- root :p_vm_nt_entry; // Root of a binary search tree
+ header :vm_nt_entry; // List of entries
+ size :vm_size_t; // virtual size
+ nentries :Integer; // Number of entries
+ root :p_vm_nt_entry; // Root of a binary search tree
+ danger_zone:t_danger_zone;
property min_offset:vm_offset_t read header.start write header.start;
property max_offset:vm_offset_t read header.__end write header.__end;
end;
@@ -82,6 +103,11 @@ procedure vm_nt_map_protect(map:p_vm_nt_map;
__end:vm_offset_t;
prot :Integer);
+procedure vm_nt_map_prot_fix(map:p_vm_nt_map;
+ start:vm_offset_t;
+ __end:vm_offset_t;
+ mode :Integer);
+
procedure vm_nt_map_madvise(map:p_vm_nt_map;
start:vm_offset_t;
__end:vm_offset_t;
@@ -96,6 +122,8 @@ procedure vm_nt_entry_deallocate(entry:p_vm_nt_entry);
implementation
uses
+ time,
+ kern_param,
md_map;
type
@@ -175,7 +203,8 @@ end;
procedure vm_prot_fixup(map:p_vm_nt_map;
start:vm_offset_t;
__end:vm_offset_t;
- max :Integer);
+ max :Integer;
+ mode :Integer);
var
next:vm_offset_t;
base,size:vm_size_t;
@@ -187,16 +216,26 @@ begin
while (start<__end) do
begin
- next:=pmap_scan_rwx(start,__end);
+ if ((mode and 1)=0) then
+ begin
+ next:=pmap_scan_rwx(start,__end);
+
+ prot:=pmap_get_prot(start);
+ prot:=(prot and VM_RW);
+ end else
+ begin
+ next:=pmap_scan(start,__end);
+
+ prot:=pmap_get_prot(start);
+ prot:=(prot and VM_RW) and (not (prot shr PAGE_TRACK_SHIFT));
+ end;
base:=start;
size:=next-start;
- prot:=pmap_get_prot(start);
-
- if ((prot and VM_RW)<>(max and VM_RW)) then
+ if ((mode and 2)<>0) or (prot<>(max and VM_RW)) then
begin
- r:=md_protect(Pointer(base),size,(prot and VM_RW));
+ r:=md_protect(Pointer(base),size,prot);
if (r<>0) then
begin
Writeln('failed md_protect(',HexStr(base,11),',',HexStr(base+size,11),'):0x',HexStr(r,8));
@@ -293,7 +332,7 @@ begin
if (r<>0) then
begin
Writeln('failed md_protect(',HexStr(entry^.start,11),',',HexStr(entry^.start+size,11),'):0x',HexStr(r,8));
- Assert(false,'vm_prot_fixup');
+ Assert(false,'vm_map');
end;
end;
@@ -301,6 +340,86 @@ begin
end;
end;
+//
+
+function IDX_TO_OFF(x:DWORD):QWORD; inline;
+begin
+ Result:=QWORD(x) shl PAGE_SHIFT;
+end;
+
+function OFF_TO_IDX(x:QWORD):DWORD; inline;
+begin
+ Result:=QWORD(x) shr PAGE_SHIFT;
+end;
+
+//
+
+procedure t_danger_zone.Init;
+begin
+ mtx_init(Flock,'danger_zone');
+end;
+
+procedure t_danger_zone.Done;
+begin
+ mtx_destroy(Flock);
+end;
+
+procedure t_danger_zone.set_range(start,__end:vm_offset_t);
+var
+ range:t_danger_range;
+begin
+ range.start:=OFF_TO_IDX(start);
+ range.__end:=OFF_TO_IDX(__end);
+
+ System.InterlockedExchange64(QWORD(Frange),QWORD(range));
+end;
+
+function t_danger_zone.in_range(addr,size:vm_offset_t):Boolean;
+var
+ range:t_danger_range;
+begin
+ QWORD(range):=System.InterlockedExchangeAdd64(QWORD(Frange),0);
+
+ Result:=(addr>=IDX_TO_OFF(range.start)) and ((addr+size)nil) then
@@ -372,6 +493,8 @@ begin
size:=__end-start;
//danger zone
+ map^.danger_zone.set_range(start,__end);
+ map^.danger_zone.lock;
//unmap all
For i:=Low(stat.range) to High(stat.range) do
@@ -473,13 +596,17 @@ begin
vm_prot_fixup(map,
ets[i]^.start,
ets[i]^.__end,
- max
+ max,
+ 2
);
end;
end;
end;
//danger zone
+ map^.danger_zone.set_range(0,0);
+ map^.danger_zone.unlock;
+ map^.danger_zone.d_wakeup;
Result:=True;
end;
@@ -555,6 +682,7 @@ begin
map^.min_offset:=min;
map^.max_offset:=max;
map^.root:=nil;
+ map^.danger_zone.Init;
end;
procedure vm_nt_entry_dispose(map:p_vm_nt_map;entry:p_vm_nt_entry); inline;
@@ -1035,6 +1163,7 @@ procedure vm_nt_map_protect(map:p_vm_nt_map;
var
entry:p_vm_nt_entry;
base,size:vm_size_t;
+ max:Integer;
r:Integer;
begin
if (start=__end) then Exit;
@@ -1064,7 +1193,15 @@ begin
size:=size-base;
- r:=md_protect(Pointer(base),size,(prot and VM_RW));
+ if (entry^.obj<>nil) then
+ begin
+ max:=entry^.obj^.maxp;
+ end else
+ begin
+ max:=0;
+ end;
+
+ r:=md_protect(Pointer(base),size,(prot and max and VM_RW));
if (r<>0) then
begin
Writeln('failed md_protect(',HexStr(base,11),',',HexStr(base+size,11),'):0x',HexStr(r,8));
@@ -1075,6 +1212,49 @@ begin
end;
end;
+procedure vm_nt_map_prot_fix(map:p_vm_nt_map;
+ start:vm_offset_t;
+ __end:vm_offset_t;
+ mode :Integer);
+var
+ entry:p_vm_nt_entry;
+ e_start,e___end:vm_size_t;
+begin
+ if (start=__end) then Exit;
+
+ if (not vm_nt_map_lookup_entry(map, start, @entry)) then
+ begin
+ entry:=entry^.next;
+ end else
+ begin
+ entry:=entry;
+ end;
+
+ while (entry<>@map^.header) and (entry^.start<__end) do
+ begin
+
+ if (entry^.obj<>nil) then
+ begin
+ e_start:=entry^.start;
+ e___end:=entry^.__end;
+
+ if (e_start__end) then
+ begin
+ e___end:=__end;
+ end;
+
+ vm_prot_fixup(map,e_start,e___end,entry^.obj^.maxp,mode);
+ end;
+
+ entry:=entry^.next;
+ end;
+end;
+
//rdi, rsi
procedure ZeroPages(addr:Pointer;size:Ptruint); assembler nostackframe SysV_ABI_CDecl;
label