This commit is contained in:
Pavel 2024-05-23 13:43:36 +03:00
parent 5000ec1d00
commit fd82297702
12 changed files with 727 additions and 64 deletions

View File

@ -1025,6 +1025,10 @@
<Filename Value="vulkan\vSetsPoolManager.pas"/>
<IsPartOfProject Value="True"/>
</Unit>
<Unit>
<Filename Value="sys\kern\kern_rangelock.pas"/>
<IsPartOfProject Value="True"/>
</Unit>
</Units>
</ProjectOptions>
<CompilerOptions>

View File

@ -99,8 +99,10 @@ function bits1(P:Pointer):SizeUInt; inline;
function bits1(P:SizeUInt):SizeUInt; inline;
procedure spin_pause;
procedure atomic_set_int (addr:PInteger;val:Integer); sysv_abi_default;
procedure atomic_clear_int(addr:PInteger;val:Integer); sysv_abi_default;
procedure atomic_set_byte (addr:PByte;val:byte); sysv_abi_default;
procedure atomic_clear_byte(addr:PByte;val:byte); sysv_abi_default;
procedure atomic_set_int (addr:PInteger;val:Integer); sysv_abi_default;
procedure atomic_clear_int (addr:PInteger;val:Integer); sysv_abi_default;
implementation
@ -389,6 +391,18 @@ asm
pause
end;
procedure atomic_set_byte(addr:PByte;val:byte); assembler; nostackframe; sysv_abi_default;
asm
lock orb %sil,(%rdi)
end;
procedure atomic_clear_byte(addr:PByte;val:byte); assembler; nostackframe; sysv_abi_default;
asm
not %sil
lock andb %sil,(%rdi)
end;
procedure atomic_set_int(addr:PInteger;val:Integer); assembler; nostackframe; sysv_abi_default;
asm
lock orl %esi,(%rdi)

284
sys/kern/kern_rangelock.pas Normal file
View File

@ -0,0 +1,284 @@
unit kern_rangelock;
{$mode ObjFPC}{$H+}
{$CALLING SysV_ABI_CDecl}
interface
uses
mqueue,
kern_thr,
kern_mtx;
const
RL_LOCK_READ =$0001;
RL_LOCK_WRITE =$0002;
RL_LOCK_TYPE_MASK=$0003;
RL_LOCK_GRANTED =$0004;
type
off_t=Int64;
p_rl_q_entry=^rl_q_entry;
rl_q_entry=record
rl_q_link :TAILQ_ENTRY; //rl_q_entry
rl_q_start:off_t;
rl_q_end :off_t;
rl_q_flags:Integer;
end;
p_rangelock=^rangelock;
rangelock=record
rl_waiters:TAILQ_HEAD; //rl_q_entry
rl_currdep:p_rl_q_entry;
end;
procedure rangelock_init(lock:p_rangelock);
procedure rangelock_destroy(lock:p_rangelock);
procedure rangelock_unlock(lock:p_rangelock;cookie:Pointer;ilk:p_mtx);
function rangelock_unlock_range(lock:p_rangelock;cookie:Pointer;start,__end:off_t;ilk:p_mtx):Pointer;
function rangelock_rlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer;
function rangelock_wlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer;
procedure rlqentry_free(rleq:p_rl_q_entry);
implementation
//
function msleep(ident :Pointer;
lock :p_mtx;
priority:Integer;
wmesg :PChar;
timo :Int64):Integer; external;
procedure wakeup(ident:Pointer); external;
//
function rlqentry_alloc():p_rl_q_entry; inline;
begin
Result:=AllocMem(SizeOf(rl_q_entry));
end;
procedure rlqentry_free(rleq:p_rl_q_entry); inline;
begin
if (rleq<>nil) then
begin
FreeMem(rleq);
end;
end;
procedure rangelock_init(lock:p_rangelock);
begin
TAILQ_INIT(@lock^.rl_waiters);
lock^.rl_currdep:=nil;
end;
procedure rangelock_destroy(lock:p_rangelock);
begin
Assert(TAILQ_EMPTY(@lock^.rl_waiters), 'Dangling waiters');
end;
{
* Verifies the supplied rl_q_entries for compatibility. Returns true
* if the rangelock queue entries are not compatible, false if they are.
*
* Two entries are compatible if their ranges do not overlap, or both
* entries are for read.
}
function rangelock_incompatible(e1,e2:p_rl_q_entry):Integer;
begin
if ((e1^.rl_q_flags and RL_LOCK_TYPE_MASK)=RL_LOCK_READ) and
((e2^.rl_q_flags and RL_LOCK_TYPE_MASK)=RL_LOCK_READ) then
begin
Exit(0);
end;
if (e1^.rl_q_start < e2^.rl_q_end) and (e1^.rl_q_end > e2^.rl_q_start) then
begin
Exit(1);
end;
Exit(0);
end;
{
* Recalculate the lock^.rl_currdep after an unlock.
}
procedure rangelock_calc_block(lock:p_rangelock);
label
_out;
var
entry,entry1,whead:p_rl_q_entry;
begin
if (lock^.rl_currdep=TAILQ_FIRST(@lock^.rl_waiters)) and
(lock^.rl_currdep<>nil) then
begin
lock^.rl_currdep:=TAILQ_NEXT(lock^.rl_currdep, @lock^.rl_currdep^.rl_q_link);
end;
entry:=lock^.rl_currdep;
while (entry<>nil) do
begin
entry1:=TAILQ_FIRST(@lock^.rl_waiters);
while (entry1<>nil) do
begin
if (rangelock_incompatible(entry, entry1)<>0) then
begin
goto _out;
end;
if (entry1=entry) then
begin
break;
end;
//
entry1:=TAILQ_NEXT(entry1, @entry1^.rl_q_link)
end;
//next
entry:=TAILQ_NEXT(entry, @entry^.rl_q_link);
end;
_out:
lock^.rl_currdep:=entry;
whead:=TAILQ_FIRST(@lock^.rl_waiters);
while (whead<>nil) do
begin
if (whead=lock^.rl_currdep) then
begin
break;
end;
if ((whead^.rl_q_flags and RL_LOCK_GRANTED)=0) then
begin
whead^.rl_q_flags:=whead^.rl_q_flags or RL_LOCK_GRANTED;
wakeup(whead);
end;
//
whead:=TAILQ_NEXT(whead, @whead^.rl_q_link)
end;
end;
procedure rangelock_unlock_locked(lock:p_rangelock;entry:p_rl_q_entry;ilk:p_mtx);
begin
Assert((lock<>nil) and (entry<>nil) and (ilk<>nil));
mtx_assert(ilk^);
Assert(entry<>lock^.rl_currdep, 'stuck currdep');
TAILQ_REMOVE(@lock^.rl_waiters, entry, @entry^.rl_q_link);
rangelock_calc_block(lock);
mtx_unlock(ilk^);
if (curkthread^.td_rlqe=nil) then
curkthread^.td_rlqe:=entry
else
rlqentry_free(entry);
end;
procedure rangelock_unlock(lock:p_rangelock;cookie:Pointer;ilk:p_mtx);
begin
Assert((lock<>nil) and (cookie<>nil) and (ilk<>nil));
mtx_lock(ilk^);
rangelock_unlock_locked(lock, cookie, ilk);
end;
{
* Unlock the sub-range of granted lock.
}
function rangelock_unlock_range(lock:p_rangelock;cookie:Pointer;start,__end:off_t;ilk:p_mtx):Pointer;
var
entry:p_rl_q_entry;
begin
Assert((lock<>nil) and (cookie<>nil) and (ilk<>nil));
entry:=cookie;
Assert((entry^.rl_q_flags and RL_LOCK_GRANTED)<>0,'Unlocking non-granted lock');
Assert(entry^.rl_q_start=start, 'wrong start');
Assert(entry^.rl_q_end >= __end, 'wrong end');
mtx_lock(ilk^);
if (entry^.rl_q_end=__end) then
begin
rangelock_unlock_locked(lock, cookie, ilk);
Exit(nil);
end;
entry^.rl_q_end:=__end;
rangelock_calc_block(lock);
mtx_unlock(ilk^);
Exit(cookie);
end;
{
* Add the lock request to the queue of the pending requests for
* rangelock. Sleep until the request can be granted.
}
function rangelock_enqueue(lock:p_rangelock;start,__end:off_t;mode:Integer;ilk:p_mtx):Pointer;
var
entry:p_rl_q_entry;
td:p_kthread;
begin
Assert((lock<>nil) and (ilk<>nil));
td:=curkthread;
if (td^.td_rlqe<>nil) then
begin
entry:=td^.td_rlqe;
td^.td_rlqe:=nil;
end else
begin
entry:=rlqentry_alloc();
end;
Assert(entry<>nil);
entry^.rl_q_flags:=mode;
entry^.rl_q_start:=start;
entry^.rl_q_end:=__end;
mtx_lock(ilk^);
{
* XXXKIB TODO. Check that a thread does not try to enqueue a
* lock that is incompatible with another request from the same
* thread.
}
TAILQ_INSERT_TAIL(@lock^.rl_waiters, entry, @entry^.rl_q_link);
if (lock^.rl_currdep=nil) then
begin
lock^.rl_currdep:=entry;
end;
rangelock_calc_block(lock);
while ((entry^.rl_q_flags and RL_LOCK_GRANTED)=0) do
begin
msleep(entry, ilk, 0, 'range', 0);
end;
mtx_unlock(ilk^);
Exit(entry);
end;
function rangelock_rlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer;
begin
Result:=rangelock_enqueue(lock, start, __end, RL_LOCK_READ, ilk);
end;
function rangelock_wlock(lock:p_rangelock;start,__end:off_t;ilk:p_mtx):Pointer;
begin
Result:=rangelock_enqueue(lock, start, __end, RL_LOCK_WRITE, ilk);
end;
end.

View File

@ -231,6 +231,7 @@ type
td_slpcallout :Pointer;
tdq_lock :mtx;
//
td_rlqe :Pointer;
td_fpop :Pointer;
td_map_def_user :Pointer;
td_dmap_def_user:Pointer;

View File

@ -78,6 +78,7 @@ uses
kern_rwlock,
kern_sig,
kern_proc,
kern_rangelock,
sched_ule,
sys_sleepqueue;
@ -200,6 +201,7 @@ procedure thread_free(td:p_kthread);
begin
mtx_destroy(td^.tdq_lock);
sleepq_free(td^.td_sleepqueue);
rlqentry_free(td^.td_rlqe);
umtx_thread_fini(td);
cpu_thread_free(td);
end;

View File

@ -21,6 +21,10 @@ uses
signal,
ucontext,
vm,
vm_map,
vm_pmap,
vm_pmap_prot,
kern_proc,
kern_jit_dynamic;
const
@ -101,7 +105,7 @@ const
FPC_EXCEPTION_CODE=$E0465043;
FPC_SET_EH_HANDLER=$E0465044;
function translate_pageflt_err(v:QWORD):QWORD; inline;
function translate_pageflt_err(v:QWORD):Byte; inline;
begin
Result:=VM_PROT_NONE;
case v of
@ -111,12 +115,21 @@ begin
end;
end;
function get_pageflt_err(p:PExceptionPointers):Byte; inline;
begin
Result:=translate_pageflt_err(p^.ExceptionRecord^.ExceptionInformation[0]);
end;
function get_pageflt_addr(p:PExceptionPointers):QWORD; inline;
begin
Result:=p^.ExceptionRecord^.ExceptionInformation[1];
end;
procedure jit_save_to_sys_save(td:p_kthread); external;
procedure sys_save_to_jit_save(td:p_kthread); external;
function ProcessException3(td:p_kthread;p:PExceptionPointers):longint; SysV_ABI_CDecl;
var
ExceptionCode:DWORD;
tf_addr:QWORD;
rv:Integer;
is_jit:Boolean;
@ -153,14 +166,12 @@ begin
td^.td_frame.tf_trapno:=0;
ExceptionCode:=p^.ExceptionRecord^.ExceptionCode;
rv:=-1;
case ExceptionCode of
case p^.ExceptionRecord^.ExceptionCode of
STATUS_ACCESS_VIOLATION:
begin
tf_addr:=p^.ExceptionRecord^.ExceptionInformation[1];
tf_addr:=get_pageflt_addr(p);
Writeln('tf_addr:0x',HexStr(tf_addr,16));
@ -170,7 +181,7 @@ begin
//_get_frame(p^.ContextRecord,@td^.td_frame,{@td^.td_fpstate}nil);
td^.td_frame.tf_trapno:=T_PAGEFLT;
td^.td_frame.tf_err :=translate_pageflt_err(p^.ExceptionRecord^.ExceptionInformation[0]);
td^.td_frame.tf_err :=get_pageflt_err(p);
td^.td_frame.tf_addr :=tf_addr;
rv:=trap.trap(@td^.td_frame,is_jit);
@ -251,6 +262,37 @@ begin
EXCEPTION_SET_THREADNAME :Exit;
DBG_PRINTEXCEPTION_C :Exit(EXCEPTION_CONTINUE_EXECUTION);
DBG_PRINTEXCEPTION_WIDE_C:Exit(EXCEPTION_CONTINUE_EXECUTION); //RenderDoc issuse
STATUS_ACCESS_VIOLATION:
begin
if pmap_danger_zone(@vm_map_t(p_proc.p_vmspace)^.pmap,
get_pageflt_addr(p),
256) then
begin
Exit(EXCEPTION_CONTINUE_EXECUTION);
end;
case get_pageflt_err(p) of
VM_PROT_READ:
begin
if ((pmap_get_prot(get_pageflt_addr(p),256) and VM_PROT_READ)<>0) then
begin
Writeln(stderr,'Unhandled VM_PROT_READ');
end;
end;
VM_PROT_WRITE:
begin
if ((pmap_get_prot(get_pageflt_addr(p),256) and VM_PROT_WRITE)<>0) then
begin
Writeln(stderr,'Unhandled VM_PROT_WRITE');
end;
end;
else;
end;
end;
else
if not IsDefaultExceptions(p^.ExceptionRecord^.ExceptionCode) then
begin

View File

@ -95,6 +95,10 @@ procedure pmap_mirror_unmap(pmap:pmap_t;
base:Pointer;
size:QWORD);
function pmap_danger_zone(pmap:pmap_t;
addr:vm_offset_t;
size:vm_offset_t):Boolean;
implementation
uses
@ -858,7 +862,7 @@ begin
end;
end;
pmap_mark(info.start,info.__end,prot and VM_RWX);
pmap_mark_rwx(info.start,info.__end,prot);
//upper pages
delta:=(paddi and PAGE_MASK);
@ -881,7 +885,7 @@ begin
end;
end;
pmap_mark(start,__end,prot and VM_RWX);
pmap_mark_rwx(start,__end,prot);
end;
procedure pmap_protect(pmap :pmap_t;
@ -942,7 +946,7 @@ begin
end;
end;
pmap_mark(start,__end,prot and VM_RWX);
pmap_mark_rwx(start,__end,prot);
end;
procedure pmap_madvise(pmap :pmap_t;
@ -1018,7 +1022,7 @@ begin
Writeln('pmap_remove:',HexStr(start,11),':',HexStr(__end,11));
end;
pmap_unmark(start,__end);
pmap_unmark_rwx(start,__end);
r:=0;
case vm_object_type(obj) of
@ -1099,6 +1103,18 @@ begin
end;
end;
function pmap_danger_zone(pmap:pmap_t;
addr:vm_offset_t;
size:vm_offset_t):Boolean;
begin
Result:=False;
while (pmap^.nt_map.danger_zone.in_range(addr,size)) do
begin
Result:=True;
pmap^.nt_map.danger_zone.d_wait(addr,size);
end;
end;
end.

View File

@ -6,6 +6,7 @@ unit vm_pmap_prot;
interface
uses
atomic,
vm,
vmparam;
@ -24,18 +25,23 @@ const
PAGE_PROT_RW =PAGE_PROT_READ or PAGE_PROT_WRITE;
PAGE_PROT_RWX =PAGE_PROT_READ or PAGE_PROT_WRITE or PAGE_PROT_EXECUTE;
PAGE_PROT_LIFT =$40;
//PAGE_BUSY_FLAG =DWORD($10000000);
//PAGE_PATCH_FLAG =DWORD($08000000);
PAGE_TRACK_R =$08;
PAGE_TRACK_W =$10;
PAGE_TRACK_X =$20;
PAGE_TRACK_RWX =PAGE_TRACK_R or PAGE_TRACK_W or PAGE_TRACK_X;
PAGE_TRACK_SHIFT =3;
var
PAGE_PROT:PBYTE=nil;
procedure pmap_mark (start,__end:vm_offset_t;prots:Byte);
procedure pmap_unmark (start,__end:vm_offset_t);
procedure pmap_mark_rwx (start,__end:vm_offset_t;prots:Byte);
procedure pmap_unmark_rwx(start,__end:vm_offset_t);
procedure pmap_track (start,__end:vm_offset_t;prots:Byte);
procedure pmap_untrack (start,__end:vm_offset_t;prots:Byte);
function pmap_scan (start,__end:vm_offset_t):vm_offset_t;
function pmap_scan_rwx (start,__end:vm_offset_t):vm_offset_t;
function pmap_get_prot (addr:vm_offset_t):Byte;
function pmap_get_prot (addr,size:vm_offset_t):Byte;
implementation
@ -59,21 +65,27 @@ begin
Result:=x;
end;
procedure pmap_mark(start,__end:vm_offset_t;prots:Byte);
procedure pmap_mark_rwx(start,__end:vm_offset_t;prots:Byte);
var
clear:Byte;
begin
prots:=prots and PAGE_PROT_RWX;
clear:=(not prots) and PAGE_PROT_RWX;
start:=OFF_TO_IDX(start);
__end:=OFF_TO_IDX(__end);
start:=MAX_IDX(start);
__end:=MAX_IDX(__end);
while (start<__end) do
begin
PAGE_PROT[start]:=prots;
atomic_clear_byte(@PAGE_PROT[start],clear);
atomic_set_byte (@PAGE_PROT[start],prots);
//PAGE_PROT[start]:=prots;
Inc(start);
end;
WriteBarrier;
end;
procedure pmap_unmark(start,__end:vm_offset_t);
procedure pmap_unmark_rwx(start,__end:vm_offset_t);
begin
start:=OFF_TO_IDX(start);
__end:=OFF_TO_IDX(__end);
@ -81,12 +93,75 @@ begin
__end:=MAX_IDX(__end);
while (start<__end) do
begin
PAGE_PROT[start]:=0;
atomic_clear_byte(@PAGE_PROT[start],PAGE_PROT_RWX);
//PAGE_PROT[start]:=0;
Inc(start);
end;
WriteBarrier;
end;
procedure pmap_track(start,__end:vm_offset_t;prots:Byte);
begin
prots:=prots and PAGE_TRACK_RWX;
start:=OFF_TO_IDX(start);
__end:=OFF_TO_IDX(__end);
start:=MAX_IDX(start);
__end:=MAX_IDX(__end);
while (start<__end) do
begin
atomic_set_byte(@PAGE_PROT[start],prots);
Inc(start);
end;
WriteBarrier;
end;
procedure pmap_untrack(start,__end:vm_offset_t;prots:Byte);
begin
prots:=prots and PAGE_TRACK_RWX;
start:=OFF_TO_IDX(start);
__end:=OFF_TO_IDX(__end);
start:=MAX_IDX(start);
__end:=MAX_IDX(__end);
while (start<__end) do
begin
atomic_clear_byte(@PAGE_PROT[start],prots);
Inc(start);
end;
WriteBarrier;
end;
function pmap_scan(start,__end:vm_offset_t):vm_offset_t;
var
b,v:Byte;
begin
start:=OFF_TO_IDX(start);
__end:=OFF_TO_IDX(__end);
start:=MAX_IDX(start);
__end:=MAX_IDX(__end);
ReadBarrier;
b:=PAGE_PROT[start];
Inc(start);
while (start<__end) do
begin
v:=PAGE_PROT[start];
if (b<>v) then
begin
start:=IDX_TO_OFF(start);
Exit(start);
end;
Inc(start);
end;
__end:=IDX_TO_OFF(__end);
Result:=__end;
end;
function pmap_scan_rwx(start,__end:vm_offset_t):vm_offset_t;
var
b,v:Byte;
@ -122,10 +197,19 @@ end;
function pmap_get_prot(addr:vm_offset_t):Byte;
begin
addr:=OFF_TO_IDX(addr);
addr:=MAX_IDX(addr);
Result:=PAGE_PROT[addr];
if (addr>PAGE_MAP_MASK) then
begin
Result:=0
end else
begin
Result:=PAGE_PROT[addr];
end;
end;
function pmap_get_prot(addr,size:vm_offset_t):Byte;
begin
Result:=pmap_get_prot(addr) or pmap_get_prot(addr+size);
end;
end.

View File

@ -192,6 +192,7 @@ uses
subr_uio,
sys_vm_object,
vsys_generic,
kern_rangelock,
rtprio,
sys_conf;
@ -826,7 +827,7 @@ begin
vp^.v_vflag:=vp^.v_vflag or VV_NOKNOTE;
end;
end;
//rangelock_init(@vp^.v_rl);
rangelock_init(@vp^.v_rl);
{
* For the filesystems which do not use vfs_hash_insert(),
@ -2279,7 +2280,7 @@ begin
{ XXX Elsewhere we detect an already freed vnode via nil v_op. }
vp^.v_op:=nil;
//rangelock_destroy(@vp^.v_rl);
rangelock_destroy(@vp^.v_rl);
//lockdestroy(vp^.v_vnlock);
mtx_destroy(vp^.v_vnlock^);
mtx_destroy(vp^.v_interlock);

View File

@ -13,6 +13,7 @@ uses
vfile,
vstat,
vuio,
vm,
vmparam,
vfilio,
vnode;
@ -980,27 +981,34 @@ unlock:
Exit(error);
end;
const
vn_io_fault_enable:Boolean=False;
function vn_io_fault(fp:p_file;uio:p_uio;flags:Integer):Integer;
label
out_last;
var
td:p_kthread;
//td:p_kthread;
//vm_page_t ma[io_hold_cnt + 2];
uio_clone:p_uio;
short_uio:T_uio;
short_iovec:array[0..0] of iovec;
//uio_clone:p_uio;
//short_uio:T_uio;
//short_iovec:array[0..0] of iovec;
doio:fo_rdwr_t;
vp:p_vnode;
rl_cookie:Pointer;
mp:p_mount;
//vm_page_t *prev_td_ma;
error,cnt,save,saveheld,prev_td_ma_cnt:Integer;
addr,__end:QWORD;
//vm_prot_t prot;
len,resid:QWORD;
adv:Int64;
error:Integer;
//cnt,save,saveheld,prev_td_ma_cnt:Integer;
//addr,__end:QWORD;
//prot:Integer;
//len,resid:QWORD;
//adv:Int64;
NO_IOPF:Boolean;
begin
td:=curkthread;
//td:=curkthread;
rl_cookie:=nil;
if (uio^.uio_rw=UIO_READ) then
doio:=@vn_read
@ -1010,22 +1018,41 @@ begin
vp:=fp^.f_vnode;
foffset_lock_uio(fp, uio, flags);
NO_IOPF:=False;
mp:=vp^.v_mount;
if (mp<>nil) then
if ((mp^.mnt_kern_flag and MNTK_NO_IOPF)=0) then
begin
NO_IOPF:=((mp^.mnt_kern_flag and MNTK_NO_IOPF)=0);
end;
if (uio^.uio_segflg<>UIO_USERSPACE) or
(vp^.v_type<>VREG) or
NO_IOPF or
(not vn_io_fault_enable) then
begin
error:=doio(fp, uio, flags or FOF_OFFSET);
goto out_last;
end;
if (uio^.uio_segflg<>UIO_USERSPACE) or
(vp^.v_type<>VREG) or
{(not vn_io_fault_enable)} false then
if (uio^.uio_rw=UIO_READ) then
begin
error:=doio(fp, uio, flags or FOF_OFFSET);
goto out_last;
//prot:=VM_PROT_WRITE;
rl_cookie:=vn_rangelock_rlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
end else
begin
//prot:=VM_PROT_READ;
if ((fp^.f_flag and O_APPEND)<>0) or ((flags and FOF_OFFSET)=0) then
begin
{ For appenders, punt and lock the whole range. }
rl_cookie:=vn_rangelock_wlock(vp, 0, High(Int64))
end else
begin
rl_cookie:=vn_rangelock_wlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
end;
end;
error:=doio(fp, uio, flags or FOF_OFFSET);
{
uio_clone:=cloneuio(uio);
resid:=uio^.uio_resid;
@ -1041,11 +1068,14 @@ begin
end else
begin
prot:=VM_PROT_READ;
if ((fp^.f_flag and O_APPEND)<>0 or (flags and FOF_OFFSET)=0) then
if ((fp^.f_flag and O_APPEND)<>0) or ((flags and FOF_OFFSET)=0) then
begin
{ For appenders, punt and lock the whole range. }
rl_cookie:=vn_rangelock_wlock(vp, 0, High(Int64))
else
end else
begin
rl_cookie:=vn_rangelock_wlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
end;
end;
save:=vm_fault_disable_pagefaults();
@ -1127,6 +1157,10 @@ _out:
}
out_last:
if (rl_cookie<>nil) then
begin
vn_rangelock_unlock(vp, rl_cookie);
end;
foffset_unlock_uio(fp, uio, flags);
Exit(error);
end;

View File

@ -9,6 +9,7 @@ uses
mqueue,
kern_mtx,
vselinfo,
kern_rangelock,
time;
const
@ -253,6 +254,8 @@ type
v_pollinfo:p_vpollinfo; // i Poll events, p for *v_pi
v_rl:rangelock; //Byte-range lock
property v_mountedhere:Pointer read v_un{.vu_mount } write v_un; //mount
property v_socket :Pointer read v_un{.vu_socket } write v_un; //socket
property v_rdev :Pointer read v_un{.vu_cdev } write v_un; //cdev
@ -445,24 +448,22 @@ end;
procedure vn_rangelock_unlock(vp:p_vnode;cookie:Pointer);
begin
//rangelock_unlock(@vp^.v_rl, (cookie), VI_MTX(vp))
rangelock_unlock(@vp^.v_rl, (cookie), VI_MTX(vp))
end;
procedure vn_rangelock_unlock_range(vp:p_vnode;cookie:Pointer;start,__end:Int64);
begin
//rangelock_unlock_range(@vp^.v_rl, (cookie), start, __end, VI_MTX(vp))
rangelock_unlock_range(@vp^.v_rl, (cookie), start, __end, VI_MTX(vp))
end;
function vn_rangelock_rlock(vp:p_vnode;start,__end:Int64):Pointer;
begin
Result:=nil;
//Result:=rangelock_rlock(@vp^.v_rl, start, __end, VI_MTX(vp))
Result:=rangelock_rlock(@vp^.v_rl, start, __end, VI_MTX(vp))
end;
function vn_rangelock_wlock(vp:p_vnode;start,__end:Int64):Pointer;
begin
Result:=nil;
//Result:=rangelock_wlock(@vp^.v_rl, start, __end, VI_MTX(vp))
Result:=rangelock_wlock(@vp^.v_rl, start, __end, VI_MTX(vp))
end;
const

View File

@ -8,6 +8,8 @@ interface
uses
sysutils,
vm,
vmparam,
kern_mtx,
vm_pmap_prot;
const
@ -18,6 +20,24 @@ const
MAX_UNION_SIZE=256*1024*1024;
type
t_danger_range=packed record
start:DWORD;
__end:DWORD;
end;
t_danger_zone=object
Flock :mtx;
Frange:t_danger_range;
procedure Init;
procedure Done;
procedure set_range(start,__end:vm_offset_t);
function in_range(addr,size:vm_offset_t):Boolean;
procedure d_wait(addr,size:vm_offset_t);
procedure d_wakeup;
procedure lock;
procedure unlock;
end;
pp_vm_nt_file_obj=^p_vm_nt_file_obj;
p_vm_nt_file_obj=^vm_nt_file_obj;
vm_nt_file_obj=packed record
@ -43,10 +63,11 @@ type
p_vm_nt_map=^_vm_nt_map;
_vm_nt_map=object
header :vm_nt_entry; // List of entries
size :vm_size_t; // virtual size
nentries :Integer; // Number of entries
root :p_vm_nt_entry; // Root of a binary search tree
header :vm_nt_entry; // List of entries
size :vm_size_t; // virtual size
nentries :Integer; // Number of entries
root :p_vm_nt_entry; // Root of a binary search tree
danger_zone:t_danger_zone;
property min_offset:vm_offset_t read header.start write header.start;
property max_offset:vm_offset_t read header.__end write header.__end;
end;
@ -82,6 +103,11 @@ procedure vm_nt_map_protect(map:p_vm_nt_map;
__end:vm_offset_t;
prot :Integer);
procedure vm_nt_map_prot_fix(map:p_vm_nt_map;
start:vm_offset_t;
__end:vm_offset_t;
mode :Integer);
procedure vm_nt_map_madvise(map:p_vm_nt_map;
start:vm_offset_t;
__end:vm_offset_t;
@ -96,6 +122,8 @@ procedure vm_nt_entry_deallocate(entry:p_vm_nt_entry);
implementation
uses
time,
kern_param,
md_map;
type
@ -175,7 +203,8 @@ end;
procedure vm_prot_fixup(map:p_vm_nt_map;
start:vm_offset_t;
__end:vm_offset_t;
max :Integer);
max :Integer;
mode :Integer);
var
next:vm_offset_t;
base,size:vm_size_t;
@ -187,16 +216,26 @@ begin
while (start<__end) do
begin
next:=pmap_scan_rwx(start,__end);
if ((mode and 1)=0) then
begin
next:=pmap_scan_rwx(start,__end);
prot:=pmap_get_prot(start);
prot:=(prot and VM_RW);
end else
begin
next:=pmap_scan(start,__end);
prot:=pmap_get_prot(start);
prot:=(prot and VM_RW) and (not (prot shr PAGE_TRACK_SHIFT));
end;
base:=start;
size:=next-start;
prot:=pmap_get_prot(start);
if ((prot and VM_RW)<>(max and VM_RW)) then
if ((mode and 2)<>0) or (prot<>(max and VM_RW)) then
begin
r:=md_protect(Pointer(base),size,(prot and VM_RW));
r:=md_protect(Pointer(base),size,prot);
if (r<>0) then
begin
Writeln('failed md_protect(',HexStr(base,11),',',HexStr(base+size,11),'):0x',HexStr(r,8));
@ -293,7 +332,7 @@ begin
if (r<>0) then
begin
Writeln('failed md_protect(',HexStr(entry^.start,11),',',HexStr(entry^.start+size,11),'):0x',HexStr(r,8));
Assert(false,'vm_prot_fixup');
Assert(false,'vm_map');
end;
end;
@ -301,6 +340,86 @@ begin
end;
end;
//
function IDX_TO_OFF(x:DWORD):QWORD; inline;
begin
Result:=QWORD(x) shl PAGE_SHIFT;
end;
function OFF_TO_IDX(x:QWORD):DWORD; inline;
begin
Result:=QWORD(x) shr PAGE_SHIFT;
end;
//
procedure t_danger_zone.Init;
begin
mtx_init(Flock,'danger_zone');
end;
procedure t_danger_zone.Done;
begin
mtx_destroy(Flock);
end;
procedure t_danger_zone.set_range(start,__end:vm_offset_t);
var
range:t_danger_range;
begin
range.start:=OFF_TO_IDX(start);
range.__end:=OFF_TO_IDX(__end);
System.InterlockedExchange64(QWORD(Frange),QWORD(range));
end;
function t_danger_zone.in_range(addr,size:vm_offset_t):Boolean;
var
range:t_danger_range;
begin
QWORD(range):=System.InterlockedExchangeAdd64(QWORD(Frange),0);
Result:=(addr>=IDX_TO_OFF(range.start)) and ((addr+size)<IDX_TO_OFF(range.__end));
end;
function msleep(ident :Pointer;
lock :p_mtx;
priority:Integer;
wmesg :PChar;
timo :Int64):Integer; external;
procedure wakeup(ident:Pointer); external;
procedure t_danger_zone.d_wait(addr,size:vm_offset_t);
begin
mtx_lock(Flock);
if in_range(addr,size) then
begin
msleep(@Self,@Flock,PCATCH,'danger_zone',hz);
end;
mtx_unlock(Flock);
end;
procedure t_danger_zone.d_wakeup;
begin
wakeup(@Self);
end;
procedure t_danger_zone.lock;
begin
mtx_lock(Flock);
end;
procedure t_danger_zone.unlock;
begin
mtx_unlock(Flock);
end;
//
function vm_remap(map:p_vm_nt_map;
entry1:p_vm_nt_entry;
entry2:p_vm_nt_entry;
@ -330,6 +449,7 @@ begin
ets[1]:=entry2;
ets[2]:=entry3;
//get first entry
first:=nil;
For i:=Low(ets) to High(ets) do
begin
@ -350,6 +470,7 @@ begin
e_count:=0;
r_count:=0;
//get range
For i:=Low(ets) to High(ets) do
begin
if (ets[i]<>nil) then
@ -372,6 +493,8 @@ begin
size:=__end-start;
//danger zone
map^.danger_zone.set_range(start,__end);
map^.danger_zone.lock;
//unmap all
For i:=Low(stat.range) to High(stat.range) do
@ -473,13 +596,17 @@ begin
vm_prot_fixup(map,
ets[i]^.start,
ets[i]^.__end,
max
max,
2
);
end;
end;
end;
//danger zone
map^.danger_zone.set_range(0,0);
map^.danger_zone.unlock;
map^.danger_zone.d_wakeup;
Result:=True;
end;
@ -555,6 +682,7 @@ begin
map^.min_offset:=min;
map^.max_offset:=max;
map^.root:=nil;
map^.danger_zone.Init;
end;
procedure vm_nt_entry_dispose(map:p_vm_nt_map;entry:p_vm_nt_entry); inline;
@ -1035,6 +1163,7 @@ procedure vm_nt_map_protect(map:p_vm_nt_map;
var
entry:p_vm_nt_entry;
base,size:vm_size_t;
max:Integer;
r:Integer;
begin
if (start=__end) then Exit;
@ -1064,7 +1193,15 @@ begin
size:=size-base;
r:=md_protect(Pointer(base),size,(prot and VM_RW));
if (entry^.obj<>nil) then
begin
max:=entry^.obj^.maxp;
end else
begin
max:=0;
end;
r:=md_protect(Pointer(base),size,(prot and max and VM_RW));
if (r<>0) then
begin
Writeln('failed md_protect(',HexStr(base,11),',',HexStr(base+size,11),'):0x',HexStr(r,8));
@ -1075,6 +1212,49 @@ begin
end;
end;
procedure vm_nt_map_prot_fix(map:p_vm_nt_map;
start:vm_offset_t;
__end:vm_offset_t;
mode :Integer);
var
entry:p_vm_nt_entry;
e_start,e___end:vm_size_t;
begin
if (start=__end) then Exit;
if (not vm_nt_map_lookup_entry(map, start, @entry)) then
begin
entry:=entry^.next;
end else
begin
entry:=entry;
end;
while (entry<>@map^.header) and (entry^.start<__end) do
begin
if (entry^.obj<>nil) then
begin
e_start:=entry^.start;
e___end:=entry^.__end;
if (e_start<start) then
begin
e_start:=start;
end;
if (e___end>__end) then
begin
e___end:=__end;
end;
vm_prot_fixup(map,e_start,e___end,entry^.obj^.maxp,mode);
end;
entry:=entry^.next;
end;
end;
//rdi, rsi
procedure ZeroPages(addr:Pointer;size:Ptruint); assembler nostackframe SysV_ABI_CDecl;
label