diff --git a/kernel/libthr/thr_init.pas b/kernel/libthr/thr_init.pas index b248405d..f721f791 100644 --- a/kernel/libthr/thr_init.pas +++ b/kernel/libthr/thr_init.pas @@ -95,6 +95,9 @@ procedure _libpthread_init(curthread:p_pthread); implementation +uses + sys_mmap; + const g_user_stacksize=$10000; @@ -147,12 +150,12 @@ begin thread^.attr:=_pthread_attr_default; - //ret:=mmap(_usrstack-_thr_guard_default-_thr_stack_initial)), - // _thr_guard_default,0,$1000,-1,0); //MAP_ANON + ret:=mmap(_usrstack-_thr_guard_default-_thr_stack_initial,_thr_guard_default,0,MAP_ANON,-1,0); - if (ret=Pointer(-1)) then //MAP_FAILED + if (ret=MAP_FAILED) then begin Assert(false,'Cannot allocate red zone for initial thread'); + Exit; end; thread^.attr.stackaddr_attr:=(_usrstack-_thr_stack_initial); thread^.attr.stacksize_attr:=_thr_stack_initial; diff --git a/rtl/atomic.pas b/rtl/atomic.pas index 52ac896a..098db820 100644 --- a/rtl/atomic.pas +++ b/rtl/atomic.pas @@ -35,93 +35,63 @@ Const function load_consume(Var addr:Pointer):Pointer; inline; function load_consume(Var addr:SizeUInt):SizeUInt; inline; -{$IF defined(CPUX86_64)} function load_consume(Var addr:DWORD):DWORD; inline; -{$ENDIF} function load_acquire(Var addr:Pointer):Pointer; inline; function load_acquire(Var addr:SizeUInt):SizeUInt; inline; -{$IF defined(CPUX86_64)} function load_acquire(Var addr:DWORD):DWORD; inline; -{$ENDIF} function load_acq_rel(Var addr:Pointer):Pointer; inline; function load_acq_rel(Var addr:SizeUInt):SizeUInt; inline; -{$IF defined(CPUX86_64)} function load_acq_rel(Var addr:DWORD):DWORD; inline; function load_acq_rel(Var addr:Integer):Integer; inline; -{$ENDIF} Procedure store_release(Var addr:Pointer;v:Pointer); inline; Procedure store_release(Var addr:SizeUInt;v:SizeUInt); inline; -{$IF defined(CPUX86_64)} Procedure store_release(Var addr:DWORD;v:DWORD); inline; Procedure store_release(Var addr:Integer;v:Integer); inline; -{$ENDIF} Procedure store_seq_cst(Var addr:Pointer;v:Pointer); inline; Procedure store_seq_cst(Var addr:SizeUInt;v:SizeUInt); inline; -{$IF defined(CPUX86_64)} Procedure store_seq_cst(Var addr:DWORD;v:DWORD); inline; Procedure store_seq_cst(Var addr:Integer;v:Integer); inline; -{$ENDIF} function _CAS(Var addr:Pointer;Comp,New:Pointer):Pointer; inline; function _CAS(Var addr:SizeUInt;Comp,New:SizeUInt):SizeUInt; inline; -{$IF defined(CPUX86_64)} function _CAS(Var addr:DWORD;Comp,New:DWORD):DWORD; inline; -{$ENDIF} function CAS(Var addr:Pointer;Comp,New:Pointer):Boolean; inline; function CAS(Var addr:SizeUInt;Comp,New:SizeUInt):Boolean; inline; -{$IF defined(CPUX86_64)} function CAS(Var addr:DWORD;Comp,New:DWORD):Boolean; inline; function CAS(Var addr:Integer;Comp,New:Integer):Boolean; inline; -{$ENDIF} function XCHG(Var addr:Pointer;New:Pointer):Pointer; inline; function XCHG(Var addr:SizeUInt;New:SizeUInt):SizeUInt; inline; -{$IF defined(CPUX86_64)} function XCHG(Var addr:DWORD;New:DWORD):DWORD; inline; function XCHG(Var addr:Integer;New:Integer):Integer; inline; -{$ENDIF} function fetch_add(Var addr:SizeUInt;i:SizeUInt):SizeUInt; inline; -{$IF defined(CPUX86_64)} function fetch_add(Var addr:DWORD;i:DWORD):DWORD; inline; function fetch_add(Var addr:Integer;i:Integer):Integer; inline; -{$ENDIF} function fetch_sub(Var addr:SizeUInt;i:SizeUInt):SizeUInt; inline; -{$IF defined(CPUX86_64)} function fetch_sub(Var addr:DWORD;i:DWORD):DWORD; inline; function fetch_sub(Var addr:Integer;i:Integer):Integer; inline; -{$ENDIF} function fetch_xor(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; -{$IFDEF CPUX86_64} function fetch_xor(var Target:DWORD;mask:DWORD):Boolean; ms_abi_default; -{$ENDIF} function fetch_or(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; -{$IFDEF CPUX86_64} function fetch_or(var Target:DWORD;mask:DWORD):Boolean; ms_abi_default; -{$ENDIF} function fetch_and(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; -{$IFDEF CPUX86_64} function fetch_and(var Target:DWORD;mask:DWORD):Boolean; ms_abi_default; -{$ENDIF} function test_and_set(var Target:SizeUInt;bit:byte):Boolean; ms_abi_default; -{$IFDEF CPUX86_64} -function test_and_set(var Target:DWORD;bit:byte):Boolean; ms_abi_default; -{$ENDIF} +function test_and_set(var Target:DWORD;bit:byte):Boolean; ms_abi_default; function test_and_reset(var Target:qword;bit:byte):Boolean; ms_abi_default; -{$IFDEF CPUX86_64} function test_and_reset(var Target:DWORD;bit:byte):Boolean; ms_abi_default; -{$ENDIF} function marked_ptr(P:Pointer;B:SizeUInt=0):Pointer; inline; function ptr1(P:Pointer):Pointer; inline; @@ -129,6 +99,9 @@ function bits1(P:Pointer):SizeUInt; inline; function bits1(P:SizeUInt):SizeUInt; inline; procedure spin_pause; +procedure atomic_set_int (addr:PInteger;val:Integer); sysv_abi_default; +procedure atomic_clear_int(addr:PInteger;val:Integer); sysv_abi_default; + implementation function load_consume(Var addr:Pointer):Pointer; inline; @@ -143,13 +116,11 @@ begin Result:=addr; end; -{$IF defined(CPUX86_64)} function load_consume(Var addr:DWORD):DWORD; inline; begin ReadDependencyBarrier; Result:=addr; end; -{$ENDIF} function load_acquire(Var addr:Pointer):Pointer; inline; begin @@ -163,13 +134,11 @@ begin Result:=addr; end; -{$IF defined(CPUX86_64)} function load_acquire(Var addr:DWORD):DWORD; inline; begin ReadBarrier; Result:=addr; end; -{$ENDIF} function load_acq_rel(Var addr:Pointer):Pointer; inline; begin @@ -181,7 +150,6 @@ begin Result:=SizeUInt(load_acq_rel(Pointer(addr))); end; -{$IF defined(CPUX86_64)} function load_acq_rel(Var addr:DWORD):DWORD; inline; begin Result:=System.InterLockedExchangeAdd(addr,0); @@ -191,7 +159,6 @@ function load_acq_rel(Var addr:Integer):Integer; inline; begin Result:=System.InterLockedExchangeAdd(addr,0); end; -{$ENDIF} Procedure store_release(Var addr:Pointer;v:Pointer); inline; begin @@ -205,7 +172,6 @@ begin addr:=v; end; -{$IF defined(CPUX86_64)} Procedure store_release(Var addr:DWORD;v:DWORD); inline; begin WriteBarrier; @@ -217,7 +183,6 @@ begin WriteBarrier; addr:=v; end; -{$ENDIF} Procedure store_seq_cst(Var addr:Pointer;v:Pointer); inline; begin @@ -229,7 +194,6 @@ begin store_seq_cst(Pointer(addr),Pointer(v)); end; -{$IF defined(CPUX86_64)} Procedure store_seq_cst(Var addr:DWORD;v:DWORD); inline; begin System.InterLockedExchange(addr,v); @@ -239,7 +203,6 @@ Procedure store_seq_cst(Var addr:Integer;v:Integer); inline; begin System.InterLockedExchange(addr,v); end; -{$ENDIF} function _CAS(Var addr:Pointer;Comp,New:Pointer):Pointer; inline; begin @@ -251,7 +214,6 @@ begin Result:=SizeUInt(system.InterlockedCompareExchange(Pointer(addr),Pointer(New),Pointer(Comp))); end; -{$IF defined(CPUX86_64)} function _CAS(Var addr:DWORD;Comp,New:DWORD):DWORD; inline; begin Result:=system.InterlockedCompareExchange(addr,New,Comp); @@ -261,7 +223,6 @@ function _CAS(Var addr:Integer;Comp,New:Integer):Integer; inline; begin Result:=system.InterlockedCompareExchange(addr,New,Comp); end; -{$ENDIF} function CAS(Var addr:Pointer;Comp,New:Pointer):Boolean; inline; begin @@ -273,7 +234,6 @@ begin Result:=system.InterlockedCompareExchange(Pointer(addr),Pointer(New),Pointer(Comp))=Pointer(Comp); end; -{$IF defined(CPUX86_64)} function CAS(Var addr:DWORD;Comp,New:DWORD):Boolean; inline; begin Result:=system.InterlockedCompareExchange(addr,New,Comp)=Comp; @@ -283,7 +243,6 @@ function CAS(Var addr:Integer;Comp,New:Integer):Boolean; inline; begin Result:=system.InterlockedCompareExchange(addr,New,Comp)=Comp; end; -{$ENDIF} function XCHG(Var addr:Pointer;New:Pointer):Pointer; inline; begin @@ -295,7 +254,6 @@ begin Result:=SizeUInt(System.InterLockedExchange(Pointer(addr),Pointer(New))); end; -{$IF defined(CPUX86_64)} function XCHG(Var addr:DWORD;New:DWORD):DWORD; inline; begin Result:=System.InterLockedExchange(addr,New); @@ -305,14 +263,12 @@ function XCHG(Var addr:Integer;New:Integer):Integer; inline; begin Result:=System.InterLockedExchange(addr,New); end; -{$ENDIF} function fetch_add(Var addr:SizeUInt;i:SizeUInt):SizeUInt; inline; begin Result:=SizeUInt(System.InterLockedExchangeAdd(Pointer(addr),Pointer(i))); end; -{$IF defined(CPUX86_64)} function fetch_add(Var addr:DWORD;i:DWORD):DWORD; inline; begin Result:=System.InterLockedExchangeAdd(addr,i); @@ -322,14 +278,12 @@ function fetch_add(Var addr:Integer;i:Integer):Integer; inline; begin Result:=System.InterLockedExchangeAdd(addr,i); end; -{$ENDIF} function fetch_sub(Var addr:SizeUInt;i:SizeUInt):SizeUInt; inline; begin Result:=fetch_add(addr,SizeUInt(-SizeInt(i))); end; -{$IF defined(CPUX86_64)} function fetch_sub(Var addr:DWORD;i:DWORD):DWORD; inline; begin Result:=fetch_add(addr,DWORD(-Integer(i))); @@ -339,18 +293,9 @@ function fetch_sub(Var addr:Integer;i:Integer):Integer; inline; begin Result:=fetch_add(addr,-i); end; -{$ENDIF} //xor -{$IFDEF CPU386} -function fetch_xor(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; assembler; nostackframe; -asm - lock xor %edx,(%ecx) - setz %al -end; -{$ELSE} -{$IFDEF CPUX86_64} function fetch_xor(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; assembler; nostackframe; asm lock xor %rdx,(%rcx) @@ -362,30 +307,9 @@ asm lock xor %edx,(%rcx) setz %al end; -{$ELSE} -function fetch_xor(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; -Var - P,N:SizeUInt; -begin - repeat - P:=load_consume(Target); - N:=P xor mask; - until CAS(Target,P,N); - Result:=(N=0); -end; -{$ENDIF} -{$ENDIF} //or -{$IFDEF CPU386} -function fetch_or(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; assembler; nostackframe; -asm - lock or %edx,(%ecx) - setz %al -end; -{$ELSE} -{$IFDEF CPUX86_64} function fetch_or(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; assembler; nostackframe; asm lock or %rdx,(%rcx) @@ -397,30 +321,9 @@ asm lock or %edx,(%rcx) setz %al end; -{$ELSE} -function fetch_or(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; -Var - P,N:SizeUInt; -begin - repeat - P:=load_consume(Target); - N:=P or mask; - until CAS(Target,P,N); - Result:=(N=0); -end; -{$ENDIF} -{$ENDIF} //and -{$IFDEF CPU386} -function fetch_and(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; assembler; nostackframe; -asm - lock and %edx,(%ecx) - setz %al -end; -{$ELSE} -{$IFDEF CPUX86_64} function fetch_and(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; assembler; nostackframe; asm lock and %rdx,(%rcx) @@ -432,30 +335,9 @@ asm lock and %edx,(%rcx) setz %al end; -{$ELSE} -function fetch_and(var Target:SizeUInt;mask:SizeUInt):Boolean; ms_abi_default; -Var - P,N:SizeUInt; -begin - repeat - P:=load_consume(Target); - N:=P and mask; - until CAS(Target,P,N); - Result:=(N=0); -end; -{$ENDIF} -{$ENDIF} //bts -{$IFDEF CPU386} -function test_and_set(var Target:SizeUInt;bit:byte):Boolean; ms_abi_default; assembler; nostackframe; -asm - lock bts %edx,(%ecx) - setc %al -end; -{$ELSE} -{$IFDEF CPUX86_64} function test_and_set(var Target:SizeUInt;bit:byte):Boolean; ms_abi_default; assembler; nostackframe; asm lock bts %rdx,(%rcx) @@ -467,31 +349,9 @@ asm lock bts %edx,(%rcx) setc %al end; -{$ELSE} -function test_and_set(var Target:SizeUInt;bit:byte):Boolean; ms_abi_default; -Var - P,N,M:SizeUInt; -begin - M:=1 shl bit; - repeat - P:=load_consume(Target); - N:=P or M; - until CAS(Target,P,N); - Result:=(P and M)<>0; -end; -{$ENDIF} -{$ENDIF} //btr -{$IFDEF CPU386} -function test_and_reset(var Target:qword;bit:byte):Boolean; ms_abi_default; assembler; nostackframe; -asm - lock btr %edx,(%ecx) - setc %al -end; -{$ELSE} -{$IFDEF CPUX86_64} function test_and_reset(var Target:qword;bit:byte):Boolean; ms_abi_default; assembler; nostackframe; asm lock btr %rdx,(%rcx) @@ -503,20 +363,6 @@ asm lock btr %edx,(%rcx) setc %al end; -{$ELSE} -function test_and_reset(var Target:SizeUInt;bit:byte):Boolean; ms_abi_default; -Var - P,N,M:SizeUInt; -begin - M:=not (1 shl bit); - repeat - P:=load_consume(Target); - N:=P and M; - until CAS(Target,P,N); - Result:=(P and M)<>0; -end; -{$ENDIF} -{$ENDIF} function marked_ptr(P:Pointer;B:SizeUInt=0):Pointer; inline; begin @@ -538,16 +384,21 @@ begin Result:=SizeUInt(P) and SizeUInt(1); end; -{$if defined(CPU386) or defined(CPUX86_64)} procedure spin_pause; assembler; nostackframe; asm pause end; -{$ELSE} -procedure spin_pause; inline; -begin + +procedure atomic_set_int(addr:PInteger;val:Integer); assembler; nostackframe; sysv_abi_default; +asm + lock orl %esi,(%rdi) +end; + +procedure atomic_clear_int(addr:PInteger;val:Integer); assembler; nostackframe; sysv_abi_default; +asm + not %esi + lock andl %esi,(%rdi) end; -{$ENDIF} end. diff --git a/rtl/mqueue.pas b/rtl/mqueue.pas index a6e35d00..38b2451f 100644 --- a/rtl/mqueue.pas +++ b/rtl/mqueue.pas @@ -24,6 +24,23 @@ type le_prev:PPointer; end; + P_STAILQ_HEAD=^STAILQ_HEAD; + STAILQ_HEAD=packed record + stqh_first:Pointer; + stqh_last :PPointer; + end; + + P_STAILQ_ENTRY=^STAILQ_ENTRY; + STAILQ_ENTRY=packed record + stqe_next:Pointer; + end; + + + P_SLIST_ENTRY=^SLIST_ENTRY; + SLIST_ENTRY=packed record + sle_next:Pointer; + end; + procedure TAILQ_INIT (head:Pointer); inline; function TAILQ_EMPTY (head:Pointer):Boolean; inline; function TAILQ_FIRST (head:Pointer):Pointer; inline; @@ -42,6 +59,17 @@ function LIST_NEXT (elm,field:Pointer):Pointer; inline; procedure LIST_INSERT_HEAD (head,elm,field:Pointer); inline; procedure LIST_REMOVE (elm,field:Pointer); inline; +procedure STAILQ_INIT(head:Pointer); inline; +function STAILQ_EMPTY(head:Pointer):Boolean; inline; +function STAILQ_FIRST(head:Pointer):Pointer; inline; +function STAILQ_NEXT(elm,field:Pointer):Pointer; inline; +procedure STAILQ_INSERT_AFTER(head,tqelm,elm,field:Pointer); inline; +procedure STAILQ_INSERT_HEAD(head,elm,field:Pointer); inline; +procedure STAILQ_INSERT_TAIL(head,elm,field:Pointer); inline; +procedure STAILQ_REMOVE_AFTER(head,elm,field:Pointer); inline; +procedure STAILQ_REMOVE_HEAD(head,elm,field:Pointer); inline; +procedure STAILQ_REMOVE(head,elm,field:Pointer); inline; + implementation procedure TAILQ_INIT(head:Pointer); inline; @@ -187,6 +215,101 @@ end; // +procedure STAILQ_INIT(head:Pointer); inline; +begin + P_STAILQ_HEAD(head)^.stqh_first:=nil; + P_STAILQ_HEAD(head)^.stqh_last :=@P_STAILQ_HEAD(head)^.stqh_first; +end; + +function STAILQ_EMPTY(head:Pointer):Boolean; inline; +begin + Result:=P_STAILQ_HEAD(head)^.stqh_first=nil; +end; + +function STAILQ_FIRST(head:Pointer):Pointer; inline; +begin + Result:=P_STAILQ_HEAD(head)^.stqh_first; +end; + +function STAILQ_NEXT(elm,field:Pointer):Pointer; inline; +begin + Result:=P_STAILQ_ENTRY(field)^.stqe_next; +end; + +procedure STAILQ_INSERT_AFTER(head,tqelm,elm,field:Pointer); inline; +var + offset:ptruint; +begin + offset:=ptruint(field-elm); + P_STAILQ_ENTRY(field)^.stqe_next:=P_STAILQ_ENTRY(tqelm+offset)^.stqe_next; + if (P_STAILQ_ENTRY(field)^.stqe_next=nil) then + begin + P_STAILQ_HEAD(head)^.stqh_last:=@P_STAILQ_ENTRY(field)^.stqe_next; + end; + P_STAILQ_ENTRY(tqelm+offset)^.stqe_next:=elm; +end; + +procedure STAILQ_INSERT_HEAD(head,elm,field:Pointer); inline; +begin + P_STAILQ_ENTRY(field)^.stqe_next:=P_STAILQ_HEAD(head)^.stqh_first; + if (P_STAILQ_HEAD(head)^.stqh_first=nil) then + begin + P_STAILQ_HEAD(head)^.stqh_last:=@P_STAILQ_ENTRY(field)^.stqe_next; + end; + P_STAILQ_HEAD(head)^.stqh_first:=elm; +end; + +procedure STAILQ_INSERT_TAIL(head,elm,field:Pointer); inline; +begin + P_STAILQ_ENTRY(field)^.stqe_next:=nil; + P_STAILQ_HEAD(head)^.stqh_last^:=elm; + P_STAILQ_HEAD(head)^.stqh_last:=@P_STAILQ_ENTRY(field)^.stqe_next; +end; + +procedure STAILQ_REMOVE_AFTER(head,elm,field:Pointer); inline; +var + offset:ptruint; +begin + offset:=ptruint(field-elm); + P_STAILQ_ENTRY(field)^.stqe_next:=P_STAILQ_ENTRY(P_STAILQ_ENTRY(field)^.stqe_next+offset)^.stqe_next; + if (P_STAILQ_ENTRY(field)^.stqe_next=nil) then + begin + P_STAILQ_HEAD(head)^.stqh_last:=@P_STAILQ_ENTRY(field)^.stqe_next; + end; +end; + +procedure STAILQ_REMOVE_HEAD(head,elm,field:Pointer); inline; +var + offset:ptruint; +begin + offset:=ptruint(field-elm); + P_STAILQ_HEAD(head)^.stqh_first:=P_STAILQ_ENTRY(P_STAILQ_HEAD(head)^.stqh_first+offset)^.stqe_next; + if (P_STAILQ_HEAD(head)^.stqh_first=nil) then + begin + P_STAILQ_HEAD(head)^.stqh_last:=@P_STAILQ_HEAD(head)^.stqh_first; + end; +end; + +procedure STAILQ_REMOVE(head,elm,field:Pointer); inline; +var + offset:ptruint; + curelm:Pointer; +begin + if (P_STAILQ_HEAD(head)^.stqh_first=elm) then + begin + STAILQ_REMOVE_HEAD(head,elm,field); + end else + begin + offset:=ptruint(field-elm); + curelm:=P_STAILQ_HEAD(head)^.stqh_first; + while (P_STAILQ_ENTRY(curelm+offset)^.stqe_next<>elm) do + begin + curelm:=P_STAILQ_ENTRY(curelm+offset)^.stqe_next; + end; + STAILQ_REMOVE_AFTER(head,curelm,curelm+offset); + end; +end; + end. diff --git a/sys/kern/kern_condvar.pas b/sys/kern/kern_condvar.pas index 24dfad69..f6f4172e 100644 --- a/sys/kern/kern_condvar.pas +++ b/sys/kern/kern_condvar.pas @@ -15,7 +15,6 @@ type t_cv=packed record cv_description:Pchar; cv_waiters :Integer; - _align :Integer; end; procedure cv_init(cvp:p_cv;desc:Pchar); diff --git a/sys/kern/kern_evf.pas b/sys/kern/kern_evf.pas index 33a89972..75d67009 100644 --- a/sys/kern/kern_evf.pas +++ b/sys/kern/kern_evf.pas @@ -51,6 +51,7 @@ type bitPattern:QWORD; mtx :mtx; cv :t_cv; + _align :Integer; list :TAILQ_HEAD; attr :DWORD; wait_count:Integer; @@ -84,7 +85,7 @@ function evf_init(evf:p_evf;attr:DWORD;initPattern:QWORD):Integer; begin evf^.desc.free:=@evf_free; evf^.bitPattern:=initPattern; - mtx_init(evf^.mtx); + mtx_init(evf^.mtx,'evf mtx'); cv_init(@evf^.cv,'evf cv'); TAILQ_INIT(@evf^.list); evf^.wait_count:=0; diff --git a/sys/kern/kern_mtx.pas b/sys/kern/kern_mtx.pas index b64d4f10..d4b7932f 100644 --- a/sys/kern/kern_mtx.pas +++ b/sys/kern/kern_mtx.pas @@ -7,7 +7,10 @@ interface type p_mtx=^mtx; - mtx=TRTLCriticalSection; + mtx=packed record + n:PChar; + c:TRTLCriticalSection; + end; const //Flags for lockinit(). @@ -41,7 +44,7 @@ const LK_TOTAL_MASK=(LK_INIT_MASK or LK_EATTR_MASK or LK_TYPE_MASK); -procedure mtx_init (var m:mtx); +procedure mtx_init (var m:mtx;name:PChar); procedure mtx_destroy(var m:mtx); procedure mtx_lock (var m:mtx); function mtx_trylock(var m:mtx):Boolean; @@ -51,34 +54,35 @@ procedure mtx_assert (var m:mtx); implementation -procedure mtx_init(var m:mtx); inline; +procedure mtx_init(var m:mtx;name:PChar); inline; begin - InitCriticalSection(m); + m.n:=name; + InitCriticalSection(m.c); end; procedure mtx_destroy(var m:mtx); inline; begin - DoneCriticalSection(m); + DoneCriticalSection(m.c); end; procedure mtx_lock(var m:mtx); inline; begin - EnterCriticalSection(m); + EnterCriticalSection(m.c); end; function mtx_trylock(var m:mtx):Boolean; inline; begin - Result:=TryEnterCriticalSection(m)<>0; + Result:=TryEnterCriticalSection(m.c)<>0; end; procedure mtx_unlock(var m:mtx); inline; begin - LeaveCriticalSection(m); + LeaveCriticalSection(m.c); end; function mtx_owned(var m:mtx):Boolean; inline; begin - Result:=m.OwningThread=GetCurrentThreadId; + Result:=m.c.OwningThread=GetCurrentThreadId; end; procedure mtx_assert(var m:mtx); inline; diff --git a/sys/kern/kern_mtxpool.pas b/sys/kern/kern_mtxpool.pas new file mode 100644 index 00000000..19217b02 --- /dev/null +++ b/sys/kern/kern_mtxpool.pas @@ -0,0 +1,150 @@ +unit kern_mtxpool; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +uses + kern_mtx; + +const + { Pool sizes must be a power of two } + MTX_POOL_LOCKBUILDER_SIZE=128; + MTX_POOL_SLEEP_SIZE =128; + +type + mtxpool_header=packed record + mtxpool_size :Integer; + mtxpool_mask :Integer; + mtxpool_shift:Integer; + mtxpool_next :Integer; + end; + + pp_mtx_pool=^p_mtx_pool; + p_mtx_pool=^mtx_pool; + mtx_pool=packed object + mtx_pool_header:mtxpool_header; + mtx_pool_ary :array[0..0] of mtx; + property mtx_pool_size :Integer read mtx_pool_header.mtxpool_size write mtx_pool_header.mtxpool_size ; + property mtx_pool_mask :Integer read mtx_pool_header.mtxpool_mask write mtx_pool_header.mtxpool_mask ; + property mtx_pool_shift:Integer read mtx_pool_header.mtxpool_shift write mtx_pool_header.mtxpool_shift; + property mtx_pool_next :Integer read mtx_pool_header.mtxpool_next write mtx_pool_header.mtxpool_next ; + end; + + mtx_pool_lockbuilder=packed record + mtx_pool_header:mtxpool_header; + mtx_pool_ary :array[0..MTX_POOL_LOCKBUILDER_SIZE-1] of mtx; + end; + +function mtx_pool_find(pool:p_mtx_pool;ptr:Pointer):p_mtx; +procedure mtx_pool_initialize(pool:p_mtx_pool;mtx_name:PChar;pool_size:Integer); +function mtx_pool_create(mtx_name:PChar;pool_size:Integer):p_mtx_pool; +procedure mtx_pool_destroy(poolp:pp_mtx_pool); +function mtx_pool_alloc(pool:p_mtx_pool):p_mtx; + +implementation + +const + POINTER_BITS =64; + HASH_MULTIPLIER=11400714819323198485; { (2^64)*(sqrt(5)-1)/2 } + +{ + * Exitthe (shared) pool mutex associated with the specified address. + * The returned mutex is a leaf level mutex, meaning that if you obtain it + * you cannot obtain any other mutexes until you release it. You can + * legally msleep() on the mutex. + } +function mtx_pool_find(pool:p_mtx_pool;ptr:Pointer):p_mtx; +var + p:Integer; +begin + Assert(pool<>nil, ('_mtx_pool_find(): nil pool')); + { + * Fibonacci hash, see Knuth's + * _Art of Computer Programming, Volume 3 / Sorting and Searching_ + } + p:=((HASH_MULTIPLIER * ptruint(ptr)) shr pool^.mtx_pool_shift) and pool^.mtx_pool_mask; + Exit(@pool^.mtx_pool_ary[p]); +end; + +procedure mtx_pool_initialize(pool:p_mtx_pool;mtx_name:PChar;pool_size:Integer); +var + i,maskbits:Integer; +begin + pool^.mtx_pool_size:=pool_size; + pool^.mtx_pool_mask:=pool_size - 1; + i:=1; + maskbits:=0; + while ((i and pool_size)=0) do + begin + Inc(maskbits); + i:=i shl 1; + end; + pool^.mtx_pool_shift:=POINTER_BITS - maskbits; + pool^.mtx_pool_next:=0; + For i:=0 to pool_size-1 do + mtx_init(pool^.mtx_pool_ary[i], mtx_name); +end; + +function powerof2(i:DWord):Boolean; inline; +begin + Result:=popcnt(i)=1; +end; + +function mtx_pool_create(mtx_name:PChar;pool_size:Integer):p_mtx_pool; +var + pool:p_mtx_pool; +begin + if (pool_size <= 0) or (not powerof2(pool_size)) then + begin + Writeln('WARNING: %s pool size is not a power of 2.', mtx_name); + pool_size:=128; + end; + pool:=AllocMem(sizeof(mtx_pool)+ + ((pool_size - 1) * sizeof(mtx)) + ); + mtx_pool_initialize(pool, mtx_name, pool_size); + Exit(pool); +end; + +procedure mtx_pool_destroy(poolp:pp_mtx_pool); +var + i:Integer; + pool:p_mtx_pool; +begin + pool:=poolp^; + + For i:=(pool^.mtx_pool_size-1) downto 0 do + begin + mtx_destroy(pool^.mtx_pool_ary[i]); + end; + + FreeMem(pool); + poolp^:=nil; +end; + +{ + * Obtain a (shared) mutex from the pool. The returned mutex is a leaf + * level mutex, meaning that if you obtain it you cannot obtain any other + * mutexes until you release it. You can legally msleep() on the mutex. + } +function mtx_pool_alloc(pool:p_mtx_pool):p_mtx; +var + i:Integer; +begin + Assert(pool<>nil, ('mtx_pool_alloc(): nil pool')); + { + * mtx_pool_next is unprotected against multiple accesses, + * but simultaneous access by two CPUs should not be very + * harmful. + } + i:=pool^.mtx_pool_next; + pool^.mtx_pool_next:=(i + 1) and pool^.mtx_pool_mask; + Exit(@pool^.mtx_pool_ary[i]); +end; + + + +end. + diff --git a/sys/kern/kern_osem.pas b/sys/kern/kern_osem.pas index 7619de8c..0ae03a8f 100644 --- a/sys/kern/kern_osem.pas +++ b/sys/kern/kern_osem.pas @@ -39,6 +39,7 @@ type desc :t_id_desc; mtx :mtx; cv :t_cv; + _align :Integer; list :TAILQ_HEAD; count :Integer; attr :DWORD; @@ -76,7 +77,7 @@ begin sem^.count :=initCount; sem^.init_count:=initCount; sem^.max_count :=max_count; - mtx_init(sem^.mtx); + mtx_init(sem^.mtx,'osem mtx'); cv_init(@sem^.cv,'osem cv'); TAILQ_INIT(@sem^.list); sem^.wait_count:=0; diff --git a/sys/kern/kern_sig.pas b/sys/kern/kern_sig.pas index bdedc1a3..f0edb550 100644 --- a/sys/kern/kern_sig.pas +++ b/sys/kern/kern_sig.pas @@ -652,7 +652,7 @@ procedure siginit; var i:Integer; begin - mtx_init(p_sigacts.ps_mtx); + mtx_init(p_sigacts.ps_mtx,'sigacts'); For i:=1 to NSIG do begin diff --git a/sys/kern/kern_thr.pas b/sys/kern/kern_thr.pas index 76bd9593..c13b5b23 100644 --- a/sys/kern/kern_thr.pas +++ b/sys/kern/kern_thr.pas @@ -150,6 +150,7 @@ type td_timeo :Int64; // td_map_def_user :Pointer; + td_sel :Pointer; end; p_thr_param=^thr_param; @@ -399,7 +400,7 @@ begin end; initialization - mtx_init(p_mtx); + mtx_init(p_mtx,'process lock'); end. diff --git a/sys/kern/kern_time.pas b/sys/kern/kern_time.pas index a4047d7c..2e641906 100644 --- a/sys/kern/kern_time.pas +++ b/sys/kern/kern_time.pas @@ -10,8 +10,13 @@ uses ntapi, time; +procedure timevalfix(t1:ptimeval); +procedure timevaladd(t1,t2:ptimeval); +procedure timevalsub(t1,t2:ptimeval); + function cputick2usec(time:QWORD):QWORD; inline; function get_unit_uptime:Int64; +procedure getmicrouptime(tvp:ptimeval); procedure getnanotime(tp:Ptimespec); function kern_clock_gettime_unit(clock_id:Integer;time:PInt64):Integer; @@ -33,6 +38,34 @@ Const POW10_7 =10000000; POW10_9 =1000000000; +procedure timevalfix(t1:ptimeval); +begin + if (t1^.tv_usec < 0) then + begin + Dec(t1^.tv_sec); + Inc(t1^.tv_usec,1000000); + end; + if (t1^.tv_usec >= 1000000) then + begin + Inc(t1^.tv_sec); + Dec(t1^.tv_usec,1000000); + end; +end; + +procedure timevaladd(t1,t2:ptimeval); +begin + Inc(t1^.tv_sec ,t2^.tv_sec); + Inc(t1^.tv_usec,t2^.tv_usec); + timevalfix(t1); +end; + +procedure timevalsub(t1,t2:ptimeval); +begin + Dec(t1^.tv_sec ,t2^.tv_sec); + Dec(t1^.tv_usec,t2^.tv_usec); + timevalfix(t1); +end; + function cputick2usec(time:QWORD):QWORD; inline; begin Result:=time div 10; @@ -63,6 +96,15 @@ begin end; end; +procedure getmicrouptime(tvp:ptimeval); +var + time:Int64; +begin + time:=get_unit_uptime; + tvp^.tv_sec :=(time div POW10_7); + tvp^.tv_usec:=(time mod POW10_7) div 10; +end; + type tunittime=procedure(time:PInt64); stdcall; diff --git a/sys/kern/subr_sleepqueue.pas b/sys/kern/subr_sleepqueue.pas index 8858d365..6aa56f54 100644 --- a/sys/kern/subr_sleepqueue.pas +++ b/sys/kern/subr_sleepqueue.pas @@ -81,7 +81,7 @@ var begin For i:=0 to SC_MASK do begin - mtx_init(sleepq_chains[i].sc_lock); + mtx_init(sleepq_chains[i].sc_lock,'sleepq chain'); end; end; diff --git a/sys/kern/systm.pas b/sys/kern/systm.pas index 91dc4da4..cee3a98b 100644 --- a/sys/kern/systm.pas +++ b/sys/kern/systm.pas @@ -15,6 +15,7 @@ const function copyin(udaddr,kaddr:Pointer;len:ptruint):Integer; inline; function copyinstr(udaddr,kaddr:Pointer;len:ptruint;lencopied:pptruint):Integer; function copyout(kaddr,udaddr:Pointer;len:ptruint):Integer; inline; +function fubyte(var base:Byte):Byte; inline; function fuword32(var base:DWORD):DWORD; inline; function fuword64(var base:QWORD):QWORD; inline; function casuword32(var base:DWORD;oldval,newval:DWORD):DWORD; inline; @@ -72,6 +73,14 @@ begin end; end; +function fubyte(var base:Byte):Byte; inline; +begin + if (NtReadVirtualMemory(NtCurrentProcess,@base,@Result,SizeOf(Byte),nil)<>0) then + begin + Result:=BYTE(-1); + end; +end; + function fuword32(var base:DWORD):DWORD; inline; begin if (NtReadVirtualMemory(NtCurrentProcess,@base,@Result,SizeOf(DWORD),nil)<>0) then diff --git a/sys/signal.pas b/sys/signal.pas index 2a6b2e7e..2abbfa9f 100644 --- a/sys/signal.pas +++ b/sys/signal.pas @@ -1,6 +1,7 @@ unit signal; {$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} interface diff --git a/sys/test/project1.lpi b/sys/test/project1.lpi index b193b911..c4abb24d 100644 --- a/sys/test/project1.lpi +++ b/sys/test/project1.lpi @@ -349,6 +349,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sys/test/project1.lpr b/sys/test/project1.lpr index f5c03ccb..7afa8722 100644 --- a/sys/test/project1.lpr +++ b/sys/test/project1.lpr @@ -44,7 +44,10 @@ uses vfs_mount, vfs_default, sysent, - vfs_syscalls; + vfs_syscalls, + vsys_generic, + vsocket, + vsocketvar; var mtx:umutex; diff --git a/sys/time.pas b/sys/time.pas index 9b29a15c..324646aa 100644 --- a/sys/time.pas +++ b/sys/time.pas @@ -54,27 +54,75 @@ type tz_dstsec :DWORD; end; +const + tick=100; + hz=10000000; + +function _usec2msec(usec:QWORD):QWORD; //Microsecond to Milisecond +function _msec2usec(msec:QWORD):QWORD; //Milisecond to Microsecond +function _usec2nsec(usec:QWORD):QWORD; //Microsecond to Nanosecond +function _nsec2usec(nsec:QWORD):QWORD; //Nanosecond to Microsecond +function _msec2nsec(msec:QWORD):QWORD; //Milisecond to Nanosecond +function _nsec2msec(nsec:QWORD):QWORD; //Nanosecond to Milisecond + function timespeccmp_lt(tvp,uvp:ptimespec):Integer; inline; procedure TIMEVAL_TO_TIMESPEC(tv:ptimeval;ts:ptimespec); inline; procedure TIMESPEC_TO_TIMEVAL(tv:ptimeval;ts:ptimespec); inline; + function TIMESPEC_TO_UNIT(ts:ptimespec):Int64; inline; //Unit +function TIMEVAL_TO_UNIT (tv:ptimeval ):Int64; inline; //Unit +function USEC_TO_UNIT (usec:QWORD ):Int64; inline; //Unit + function tvtohz(time:Int64):Int64; inline; procedure usec2timespec(ts:ptimespec;timeo:DWORD); procedure TIMESPEC_ADD(dst,src,val:ptimespec); procedure TIMESPEC_SUB(dst,src,val:ptimespec); +function itimerfix(tv:ptimeval):Integer; + function clock_gettime(clock_id:Integer;tp:Ptimespec):Integer; function clock_getres(clock_id:Integer;tp:Ptimespec):Integer; implementation uses + errno, trap, thr_error, kern_time; +function _usec2msec(usec:QWORD):QWORD; //Microsecond to Milisecond +begin + Result:=(usec+999) div 1000; +end; + +function _msec2usec(msec:QWORD):QWORD; //Milisecond to Microsecond +begin + Result:=msec*1000; +end; + +function _usec2nsec(usec:QWORD):QWORD; //Microsecond to Nanosecond +begin + Result:=usec*1000; +end; + +function _nsec2usec(nsec:QWORD):QWORD; //Nanosecond to Microsecond +begin + Result:=(nsec+999) div 1000; +end; + +function _msec2nsec(msec:QWORD):QWORD; //Milisecond to Nanosecond +begin + Result:=msec*1000000; +end; + +function _nsec2msec(nsec:QWORD):QWORD; //Nanosecond to Milisecond +begin + Result:=(nsec+999999) div 1000000; +end; + function timespeccmp_lt(tvp,uvp:ptimespec):Integer; inline; begin if (tvp^.tv_sec=uvp^.tv_sec) then @@ -103,6 +151,16 @@ begin Result:=(QWORD(ts^.tv_sec)*10000000)+(QWORD(ts^.tv_nsec) div 100); end; +function TIMEVAL_TO_UNIT(tv:ptimeval):Int64; inline; //Unit +begin + Result:=(QWORD(tv^.tv_sec)*10000000)+(QWORD(tv^.tv_usec)*10); +end; + +function USEC_TO_UNIT(usec:QWORD):Int64; inline; //Unit +begin + Result:=(usec*10); +end; + function tvtohz(time:Int64):Int64; inline; begin Result:=time; @@ -136,6 +194,21 @@ begin end; end; +{ + * Check that a proposed value to load into the .it_value or + * .it_interval part of an interval timer is acceptable, and + * fix it to have at least minimal value (i.e. if it is less + * than the resolution of the clock, round it up.) + } +function itimerfix(tv:ptimeval):Integer; +begin + if (tv^.tv_sec < 0) or (tv^.tv_usec < 0) or (tv^.tv_usec >= 1000000) then + Exit(EINVAL); + if (tv^.tv_sec=0) and (tv^.tv_usec<>0) and (tv^.tv_usec < tick) then + tv^.tv_usec:=tick; + Exit(0); +end; + function clock_gettime(clock_id:Integer;tp:Ptimespec):Integer; assembler; nostackframe; asm movq sys_clock_gettime,%rax diff --git a/sys/vfs/kern_descrip.pas b/sys/vfs/kern_descrip.pas index f3ab2e76..d2cea664 100644 --- a/sys/vfs/kern_descrip.pas +++ b/sys/vfs/kern_descrip.pas @@ -15,7 +15,8 @@ uses vfcntl, vfilio, vmount, - vfs_vnode; + vfs_vnode, + vsocketvar; const FGET_GETCAP=$00000001; @@ -80,22 +81,22 @@ function fgetvp_write(fd:Integer; function fgetsock(fd:Integer; rights:cap_rights_t; - spp:PPointer; //socket ** + spp:pp_socket; fflagp:PDWORD):Integer; procedure fdclose(fp:p_file;idx:Integer); function dupfdopen(indx,dfd,mode,error:Integer):Integer; procedure finit(fp:p_file;flag:DWORD;_type:Word;data:Pointer;ops:p_fileops); -function badfo_readwrite(fp:p_file;uio:p_uio;flags:Integer):Integer; -function badfo_truncate(fp:p_file;length:Int64):Integer; -function badfo_ioctl(fp:p_file;com:QWORD;data:Pointer):Integer; -function badfo_poll(fp:p_file;events:Integer):Integer; -function badfo_kqfilter(fp:p_file;kn:Pointer):Integer; -function badfo_stat(fp:p_file;sb:p_stat):Integer; -function badfo_close(fp:p_file):Integer; -function badfo_chmod(fp:p_file;mode:mode_t):Integer; -function badfo_chown(fp:p_file;uid:uid_t;gid:gid_t):Integer; +function badfo_readwrite(fp:p_file;uio:p_uio;flags:Integer):Integer; +function badfo_truncate(fp:p_file;length:Int64):Integer; +function badfo_ioctl(fp:p_file;com:QWORD;data:Pointer):Integer; +function badfo_poll(fp:p_file;events:Integer):Integer; +function badfo_kqfilter(fp:p_file;kn:Pointer):Integer; +function badfo_stat(fp:p_file;sb:p_stat):Integer; +function badfo_close(fp:p_file):Integer; +function badfo_chmod(fp:p_file;mode:mode_t):Integer; +function badfo_chown(fp:p_file;uid:uid_t;gid:gid_t):Integer; const badfileops:fileops=( @@ -117,12 +118,10 @@ var p_flag:Integer; end; -procedure atomic_set_int(addr:PInteger;val:Integer); sysv_abi_default; -procedure atomic_clear_int(addr:PInteger;val:Integer); sysv_abi_default; - implementation uses + atomic, errno, systm, vfiledesc, @@ -304,17 +303,6 @@ begin Exit(0); end; -procedure atomic_set_int(addr:PInteger;val:Integer); assembler; nostackframe; sysv_abi_default; -asm - lock orl %esi,(%rdi) -end; - -procedure atomic_clear_int(addr:PInteger;val:Integer); assembler; nostackframe; sysv_abi_default; -asm - not %esi - lock andl %esi,(%rdi) -end; - function kern_fcntl(fd,cmd:Integer;arg:QWORD):Integer; label _break, @@ -391,10 +379,10 @@ begin end; if ((arg and FD_CLOEXEC)<>0) then begin - fp^.f_exclose:=fp^.f_exclose or UF_EXCLOSE; + atomic_set_int(@fp^.f_exclose,UF_EXCLOSE); end else begin - fp^.f_exclose:=(fp^.f_exclose and (not UF_EXCLOSE)); + atomic_clear_int(@fp^.f_exclose,UF_EXCLOSE); end; end; @@ -724,7 +712,7 @@ begin begin Exit(EBADF); end; - fp^.f_exclose:=fp^.f_exclose or UF_EXCLOSE; + atomic_set_int(@fp^.f_exclose,UF_EXCLOSE); fdrop(fp); end; Exit(0); @@ -1194,7 +1182,7 @@ begin if ((flags and O_CLOEXEC)<>0) then begin - fp^.f_exclose:=fp^.f_exclose or UF_EXCLOSE; + atomic_set_int(@fp^.f_exclose,UF_EXCLOSE); end; Exit(0); @@ -1490,7 +1478,7 @@ end; } function fgetsock(fd:Integer; rights:cap_rights_t; - spp:PPointer; //socket ** + spp:pp_socket; fflagp:PDWORD):Integer; var fp:p_file; @@ -1510,9 +1498,9 @@ begin spp^:=fp^.f_data; if (fflagp<>nil) then fflagp^:=fp^.f_flag; - //SOCK_LOCK(spp^); - //soref(spp^); - //SOCK_UNLOCK(spp^); + SOCK_LOCK(spp^); + soref(spp^); + SOCK_UNLOCK(spp^); end; fdrop(fp); diff --git a/sys/vfs/subr_uio.pas b/sys/vfs/subr_uio.pas index d8354a06..6141e3a0 100644 --- a/sys/vfs/subr_uio.pas +++ b/sys/vfs/subr_uio.pas @@ -14,8 +14,13 @@ function uiomove(cp:Pointer;n:Integer;uio:p_uio):Integer; function uiomove_nofault(cp:Pointer;n:Integer;uio:p_uio):Integer; function uiomove_faultflag(cp:Pointer;n:Integer;uio:p_uio;nofault:Integer):Integer; +function copyinuio(iovp:p_iovec;iovcnt:DWORD;uiop:pp_uio):Integer; + implementation +uses + errno; + function uiomove(cp:Pointer;n:Integer;uio:p_uio):Integer; begin Exit(uiomove_faultflag(cp, n, uio, 0)); @@ -101,5 +106,45 @@ _out: Exit(error); end; +function copyinuio(iovp:p_iovec;iovcnt:DWORD;uiop:pp_uio):Integer; +var + iov:p_iovec; + uio:p_uio; + iovlen:DWORD; + error,i:Integer; +begin + uiop^:=nil; + if (iovcnt > UIO_MAXIOV) then + Exit(EINVAL); + iovlen:=iovcnt * sizeof (iovec); + uio:=AllocMem(iovlen + sizeof(t_uio)); + iov:=p_iovec(uio + 1); + error:=copyin(iovp, iov, iovlen); + if (error<>0) then + begin + FreeMem(uio); + Exit(error); + end; + uio^.uio_iov :=iov; + uio^.uio_iovcnt:=iovcnt; + uio^.uio_segflg:=UIO_USERSPACE; + uio^.uio_offset:=-1; + uio^.uio_resid :=0; + + For i:=0 to iovcnt-1 do + begin + if (iov^.iov_len > IOSIZE_MAX - uio^.uio_resid) then + begin + FreeMem(uio); + Exit(EINVAL); + end; + Inc(uio^.uio_resid,iov^.iov_len); + Inc(iov); + end; + + uiop^:=uio; + Exit(0); +end; + end. diff --git a/sys/vfs/vfs_default.pas b/sys/vfs/vfs_default.pas index 9c5c8d67..c3b0e206 100644 --- a/sys/vfs/vfs_default.pas +++ b/sys/vfs/vfs_default.pas @@ -16,10 +16,11 @@ uses vfs_mount, vnamei, vfcntl, + vpoll, + vsocketvar, kern_thr, kern_mtx; - function vop_eopnotsupp(ap:Pointer):Integer; function vop_ebadf(ap:Pointer):Integer; function vop_enotty(ap:Pointer):Integer; @@ -176,7 +177,8 @@ uses errno, vfs_subr, vfs_vnops, - vfs_lookup; + vfs_lookup, + vsys_generic; { * Series of placeholder functions for various error returns for @@ -564,8 +566,7 @@ end; } function vop_nopoll(ap:p_vop_poll_args):Integer; begin - Exit(EOPNOTSUPP); - //Exit(poll_no_poll(ap^.a_events)); + Exit(poll_no_poll(ap^.a_events)); end; { @@ -573,10 +574,9 @@ end; } function vop_stdpoll(ap:p_vop_poll_args):Integer; begin - Exit(EOPNOTSUPP); - //if ((ap^.a_events and (not POLLSTANDARD))<>0) then - // Exit(vn_pollrecord(ap^.a_vp, ap^.a_td, ap^.a_events)); - //Exit(ap^.a_events and (POLLIN or POLLOUT or POLLRDNORM or POLLWRNORM)); + if ((ap^.a_events and (not POLLSTANDARD))<>0) then + Exit(vn_pollrecord(ap^.a_vp, ap^.a_events)); + Exit(ap^.a_events and (POLLIN or POLLOUT or POLLRDNORM or POLLWRNORM)); end; { @@ -1064,7 +1064,7 @@ end; function vop_stdunp_connect(ap:p_vop_unp_connect_args):Integer; begin - //ap^.a_socket^:=ap^.a_vp^.v_socket; + ap^.a_socket^:=ap^.a_vp^.v_socket; Exit(0); end; @@ -1127,14 +1127,12 @@ label loop; var vp,mvp:p_vnode; - td:p_kthread; error,lockreq,allerror:Integer; begin error:=0; lockreq:=0; allerror:=0; - td:=curkthread; lockreq:=LK_EXCLUSIVE or LK_INTERLOCK; if (waitfor<>MNT_WAIT) then lockreq:=lockreq or LK_NOWAIT; diff --git a/sys/vfs/vfs_mount.pas b/sys/vfs/vfs_mount.pas index 0ae151ce..ae92c315 100644 --- a/sys/vfs/vfs_mount.pas +++ b/sys/vfs/vfs_mount.pas @@ -41,8 +41,8 @@ end; procedure mount_init(mp:p_mount); begin - mtx_init(mp^.mnt_mtx); - mtx_init(mp^.mnt_explock); + mtx_init(mp^.mnt_mtx ,'struct mount mtx'); + mtx_init(mp^.mnt_explock,'explock'); end; procedure mount_fini(mp:p_mount); diff --git a/sys/vfs/vfs_subr.pas b/sys/vfs/vfs_subr.pas index 63752791..9193b917 100644 --- a/sys/vfs/vfs_subr.pas +++ b/sys/vfs/vfs_subr.pas @@ -46,6 +46,17 @@ procedure assert_vop_elocked (vp:p_vnode;str:PChar); procedure vop_rename_fail(ap:p_vop_rename_args); procedure vop_rename_pre(ap:p_vop_rename_args); +procedure vop_create_post(ap:p_vop_create_args;rc:Integer); +procedure vop_deleteextattr_post(ap:p_vop_deleteextattr_args;rc:Integer); +procedure vop_link_post(ap:p_vop_link_args;rc:Integer); +procedure vop_mkdir_post(ap:p_vop_mkdir_args;rc:Integer); +procedure vop_mknod_post(ap:p_vop_mknod_args;rc:Integer); +procedure vop_remove_post(ap:p_vop_remove_args;rc:Integer); +procedure vop_rename_post(ap:p_vop_rename_args;rc:Integer); +procedure vop_rmdir_post(ap:p_vop_rmdir_args;rc:Integer); +procedure vop_setattr_post(ap:p_vop_setattr_args;rc:Integer); +procedure vop_setextattr_post(ap:p_vop_setextattr_args;rc:Integer); +procedure vop_symlink_post(ap:p_vop_symlink_args;rc:Integer); function vfs_read_dirent(ap:p_vop_readdir_args;dp:p_dirent;off:QWORD):Integer; procedure vfs_mark_atime(vp:p_vnode); @@ -56,6 +67,11 @@ function count_dev(dev:Pointer):Integer; //cdev procedure vfs_msync(mp:p_mount;flags:Integer); +procedure destroy_vpollinfo_free(vi:p_vpollinfo); +procedure destroy_vpollinfo(vi:p_vpollinfo); +procedure v_addpollinfo(vp:p_vnode); +function vn_pollrecord(vp:p_vnode;events:Integer):Integer; + function __mnt_vnode_next_all(mvp:pp_vnode;mp:p_mount):p_vnode; function __mnt_vnode_first_all(mvp:pp_vnode;mp:p_mount):p_vnode; procedure __mnt_vnode_markerfree_all(mvp:pp_vnode;mp:p_mount); @@ -66,7 +82,8 @@ uses errno, vfs_vnops, subr_uio, - vm_object; + vm_object, + vsys_generic; { * List of vnodes that are ready for recycling. @@ -155,16 +172,16 @@ begin desiredvnodes:=MAXVNODES_MAX; end; wantfreevnodes:=desiredvnodes div 4; - mtx_init(mntid_mtx); + mtx_init(mntid_mtx,'mntid'); TAILQ_INIT(@vnode_free_list); - mtx_init(vnode_free_list_mtx); + mtx_init(vnode_free_list_mtx,'vnode_free_list'); { * Initialize the filesystem syncer. } //syncer_workitem_pending[WI_MPSAFEQ]:=hashinit(syncer_maxdelay, M_VNODE,&syncer_mask); //syncer_workitem_pending[WI_GIANTQ]:=hashinit(syncer_maxdelay, M_VNODE,&syncer_mask); syncer_maxdelay:=syncer_mask + 1; - mtx_init(sync_mtx); + mtx_init(sync_mtx,'Syncer mtx'); cv_init(@sync_wakeup,'syncer'); i:=1; @@ -290,28 +307,28 @@ end; } procedure vattr_null(vap:p_vattr); begin - vap^.va_type:=VNON; - vap^.va_size:=VNOVAL; - vap^.va_bytes:=VNOVAL; - vap^.va_mode:=VNOVAL; - vap^.va_nlink:=VNOVAL; - vap^.va_uid:=VNOVAL; - vap^.va_gid:=VNOVAL; - vap^.va_fsid:=VNOVAL; - vap^.va_fileid:=VNOVAL; - vap^.va_blocksize:=VNOVAL; - vap^.va_rdev:=VNOVAL; - vap^.va_atime.tv_sec:=VNOVAL; - vap^.va_atime.tv_nsec:=VNOVAL; - vap^.va_mtime.tv_sec:=VNOVAL; - vap^.va_mtime.tv_nsec:=VNOVAL; - vap^.va_ctime.tv_sec:=VNOVAL; - vap^.va_ctime.tv_nsec:=VNOVAL; - vap^.va_birthtime.tv_sec:=VNOVAL; + vap^.va_type :=VNON; + vap^.va_size :=QWORD(VNOVAL); + vap^.va_bytes :=QWORD(VNOVAL); + vap^.va_mode :=WORD(VNOVAL); + vap^.va_nlink :=WORD(VNOVAL); + vap^.va_uid :=VNOVAL; + vap^.va_gid :=VNOVAL; + vap^.va_fsid :=VNOVAL; + vap^.va_fileid :=QWORD(VNOVAL); + vap^.va_blocksize :=QWORD(VNOVAL); + vap^.va_rdev :=VNOVAL; + vap^.va_atime.tv_sec :=VNOVAL; + vap^.va_atime.tv_nsec :=VNOVAL; + vap^.va_mtime.tv_sec :=VNOVAL; + vap^.va_mtime.tv_nsec :=VNOVAL; + vap^.va_ctime.tv_sec :=VNOVAL; + vap^.va_ctime.tv_nsec :=VNOVAL; + vap^.va_birthtime.tv_sec :=VNOVAL; vap^.va_birthtime.tv_nsec:=VNOVAL; - vap^.va_flags:=VNOVAL; - vap^.va_gen:=VNOVAL; - vap^.va_vaflags:=0; + vap^.va_flags :=QWORD(VNOVAL); + vap^.va_gen :=QWORD(VNOVAL); + vap^.va_vaflags :=0; end; { @@ -2594,21 +2611,16 @@ begin } end; -{ -static void -destroy_vpollinfo_free(struct vpollinfo *vi) +procedure destroy_vpollinfo_free(vi:p_vpollinfo); begin - - knlist_destroy(@vi^.vpi_selinfo.si_note); - mtx_destroy(@vi^.vpi_lock); - uma_zfree(vnodepoll_zone, vi); + //knlist_destroy(@vi^.vpi_selinfo.si_note); + mtx_destroy(vi^.vpi_lock); + FreeMem(vi); end; -static void -destroy_vpollinfo(struct vpollinfo *vi) +procedure destroy_vpollinfo(vi:p_vpollinfo); begin - - knlist_clear(@vi^.vpi_selinfo.si_note, 1); + //knlist_clear(@vi^.vpi_selinfo.si_note, 1); seldrain(@vi^.vpi_selinfo); destroy_vpollinfo_free(vi); end; @@ -2616,19 +2628,18 @@ end; { * Initalize per-vnode helper structure to hold poll-related state. } -void -v_addpollinfo(vp:p_vnode) +procedure v_addpollinfo(vp:p_vnode); +var + vi:p_vpollinfo; begin - struct vpollinfo *vi; - - if (vp^.v_pollinfo<>nil) + if (vp^.v_pollinfo<>nil) then Exit; - vi:=uma_zalloc(vnodepoll_zone, M_WAITOK); - mtx_init(@vi^.vpi_lock, "vnode pollinfo", nil, MTX_DEF); - knlist_init(@vi^.vpi_selinfo.si_note, vp, vfs_knllock, - vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked); + vi:=AllocMem(SizeOf(vpollinfo)); + mtx_init(vi^.vpi_lock,'vnode pollinfo'); + //knlist_init(@vi^.vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked); VI_LOCK(vp); - if (vp^.v_pollinfo<>nil) begin + if (vp^.v_pollinfo<>nil) then + begin VI_UNLOCK(vp); destroy_vpollinfo_free(vi); Exit; @@ -2645,13 +2656,12 @@ end; * functions. (These are done together, while the lock is held, * to avoid race conditions.) } -int -vn_pollrecord(vp:p_vnode, struct thread *td, int events) +function vn_pollrecord(vp:p_vnode;events:Integer):Integer; begin - v_addpollinfo(vp); - mtx_lock(@vp^.v_pollinfo^.vpi_lock); - if (vp^.v_pollinfo^.vpi_revents and events) begin + mtx_lock(vp^.v_pollinfo^.vpi_lock); + if ((vp^.v_pollinfo^.vpi_revents and events)<>0) then + begin { * This leaves events we are not interested * in available for the other process which @@ -2659,18 +2669,17 @@ begin * (otherwise they would never have been * recorded). } - events:= and vp^.v_pollinfo^.vpi_revents; - vp^.v_pollinfo^.vpi_revents:= and ~events; + events:=events and vp^.v_pollinfo^.vpi_revents; + vp^.v_pollinfo^.vpi_revents:=vp^.v_pollinfo^.vpi_revents and (not events); - mtx_unlock(@vp^.v_pollinfo^.vpi_lock); + mtx_unlock(vp^.v_pollinfo^.vpi_lock); Exit(events); end; - vp^.v_pollinfo^.vpi_events:= or events; - selrecord(td, &vp^.v_pollinfo^.vpi_selinfo); - mtx_unlock(@vp^.v_pollinfo^.vpi_lock); + vp^.v_pollinfo^.vpi_events:=vp^.v_pollinfo^.vpi_events or events; + selrecord(curkthread, @vp^.v_pollinfo^.vpi_selinfo); + mtx_unlock(vp^.v_pollinfo^.vpi_lock); Exit(0); end; -} { * Routine to create and manage a filesystem syncer vnode. @@ -2870,6 +2879,7 @@ begin //if ((vp^.v_rdev^.si_devsw^.d_flags and D_DISK)=0) then // error:=ENOTBLK; //dev_unlock(); + error:=ENOTBLK; if (errp<>nil) then errp^:=error; Exit(error=0); @@ -3063,154 +3073,111 @@ begin vhold(ap^.a_tvp); end; +procedure vop_create_post(ap:p_vop_create_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE); + end; +end; + +procedure vop_deleteextattr_post(ap:p_vop_deleteextattr_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_vp, NOTE_ATTRIB); + end; +end; + +procedure vop_link_post(ap:p_vop_link_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_vp, NOTE_LINK); + //VFS_KNOTE_LOCKED(a^.a_tdvp, NOTE_WRITE); + end; +end; + +procedure vop_mkdir_post(ap:p_vop_mkdir_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE or NOTE_LINK); + end; +end; + +procedure vop_mknod_post(ap:p_vop_mknod_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE); + end; +end; + +procedure vop_remove_post(ap:p_vop_remove_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE); + //VFS_KNOTE_LOCKED(a^.a_vp, NOTE_DELETE); + end; +end; + +procedure vop_rename_post(ap:p_vop_rename_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_UNLOCKED(a^.a_fdvp, NOTE_WRITE); + //VFS_KNOTE_UNLOCKED(a^.a_tdvp, NOTE_WRITE); + //VFS_KNOTE_UNLOCKED(a^.a_fvp, NOTE_RENAME); + if (ap^.a_tvp<>nil) then + begin + //VFS_KNOTE_UNLOCKED(a^.a_tvp, NOTE_DELETE); + end; + end; + if (ap^.a_tdvp<>ap^.a_fdvp) then + vdrop(ap^.a_fdvp); + if (ap^.a_tvp<>ap^.a_fvp) then + vdrop(ap^.a_fvp); + vdrop(ap^.a_tdvp); + if (ap^.a_tvp<>nil) then + vdrop(ap^.a_tvp); +end; + +procedure vop_rmdir_post(ap:p_vop_rmdir_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE or NOTE_LINK); + //VFS_KNOTE_LOCKED(a^.a_vp, NOTE_DELETE); + end; +end; + +procedure vop_setattr_post(ap:p_vop_setattr_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_vp, NOTE_ATTRIB); + end; +end; + +procedure vop_setextattr_post(ap:p_vop_setextattr_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_vp, NOTE_ATTRIB); + end; +end; + +procedure vop_symlink_post(ap:p_vop_symlink_args;rc:Integer); +begin + if (rc=0) then + begin + //VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE); + end; +end; + { -void -vop_strategy_pre(void *ap) -begin - -end; - -void -vop_lock_pre(void *ap) -begin - -end; - -void -vop_lock_post(void *ap, int rc) -begin - -end; - -void -vop_unlock_pre(void *ap) -begin - -end; - -void -vop_unlock_post(void *ap, int rc) -begin - -end; - -void -vop_create_post(void *ap, int rc) -begin - struct vop_create_args *a:=ap; - - if (!rc) - VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE); -end; - -void -vop_deleteextattr_post(void *ap, int rc) -begin - struct vop_deleteextattr_args *a:=ap; - - if (!rc) - VFS_KNOTE_LOCKED(a^.a_vp, NOTE_ATTRIB); -end; - -void -vop_link_post(void *ap, int rc) -begin - struct vop_link_args *a:=ap; - - if (!rc) begin - VFS_KNOTE_LOCKED(a^.a_vp, NOTE_LINK); - VFS_KNOTE_LOCKED(a^.a_tdvp, NOTE_WRITE); - end; -end; - -void -vop_mkdir_post(void *ap, int rc) -begin - struct vop_mkdir_args *a:=ap; - - if (!rc) - VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE or NOTE_LINK); -end; - -void -vop_mknod_post(void *ap, int rc) -begin - struct vop_mknod_args *a:=ap; - - if (!rc) - VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE); -end; - -void -vop_remove_post(void *ap, int rc) -begin - struct vop_remove_args *a:=ap; - - if (!rc) begin - VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE); - VFS_KNOTE_LOCKED(a^.a_vp, NOTE_DELETE); - end; -end; - -void -vop_rename_post(void *ap, int rc) -begin - struct vop_rename_args *a:=ap; - - if (!rc) begin - VFS_KNOTE_UNLOCKED(a^.a_fdvp, NOTE_WRITE); - VFS_KNOTE_UNLOCKED(a^.a_tdvp, NOTE_WRITE); - VFS_KNOTE_UNLOCKED(a^.a_fvp, NOTE_RENAME); - if (a^.a_tvp) - VFS_KNOTE_UNLOCKED(a^.a_tvp, NOTE_DELETE); - end; - if (a^.a_tdvp<>a^.a_fdvp) - vdrop(a^.a_fdvp); - if (a^.a_tvp<>a^.a_fvp) - vdrop(a^.a_fvp); - vdrop(a^.a_tdvp); - if (a^.a_tvp) - vdrop(a^.a_tvp); -end; - -void -vop_rmdir_post(void *ap, int rc) -begin - struct vop_rmdir_args *a:=ap; - - if (!rc) begin - VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE or NOTE_LINK); - VFS_KNOTE_LOCKED(a^.a_vp, NOTE_DELETE); - end; -end; - -void -vop_setattr_post(void *ap, int rc) -begin - struct vop_setattr_args *a:=ap; - - if (!rc) - VFS_KNOTE_LOCKED(a^.a_vp, NOTE_ATTRIB); -end; - -void -vop_setextattr_post(void *ap, int rc) -begin - struct vop_setextattr_args *a:=ap; - - if (!rc) - VFS_KNOTE_LOCKED(a^.a_vp, NOTE_ATTRIB); -end; - -void -vop_symlink_post(void *ap, int rc) -begin - struct vop_symlink_args *a:=ap; - - if (!rc) - VFS_KNOTE_LOCKED(a^.a_dvp, NOTE_WRITE); -end; - static struct knlist fs_knlist; static void diff --git a/sys/vfs/vfs_syscalls.pas b/sys/vfs/vfs_syscalls.pas index 33006a35..d15ac54a 100644 --- a/sys/vfs/vfs_syscalls.pas +++ b/sys/vfs/vfs_syscalls.pas @@ -85,6 +85,7 @@ function sys_revoke(path:PChar):Integer; implementation uses + atomic, mqueue, systm, errno, diff --git a/sys/vfs/vfs_vnode.pas b/sys/vfs/vfs_vnode.pas index 01ced3e0..e4127465 100644 --- a/sys/vfs/vfs_vnode.pas +++ b/sys/vfs/vfs_vnode.pas @@ -9,6 +9,7 @@ uses mqueue, kern_thr, kern_mtx, + vselinfo, time, vmparam; @@ -173,6 +174,14 @@ type vtype=(VNON,VREG,VDIR,VBLK,VCHR,VLNK,VSOCK,VFIFO,VBAD,VMARKER); + p_vpollinfo=^vpollinfo; + vpollinfo=packed record + vpi_lock :mtx; // lock to protect below + vpi_selinfo:t_selinfo; // identity of poller(s) + vpi_events :Word; // what they are looking for + vpi_revents:Word; // what has happened + end; + t_vnode=packed object v_type:vtype; v_tag :PChar; @@ -196,10 +205,12 @@ type v_iflag:QWORD; v_vflag:QWORD; - property v_mountedhere:Pointer read v_un{.vu_mount }; - property v_socket :Pointer read v_un{.vu_socket }; - property v_rdev :Pointer read v_un{.vu_cdev }; - property v_fifoinfo :Pointer read v_un{.vu_fifoinfo}; + v_pollinfo:p_vpollinfo; // i Poll events, p for *v_pi + + property v_mountedhere:Pointer read v_un{.vu_mount } write v_un; + property v_socket :Pointer read v_un{.vu_socket } write v_un; + property v_rdev :Pointer read v_un{.vu_cdev } write v_un; + property v_fifoinfo :Pointer read v_un{.vu_fifoinfo} write v_un; end; p_vattr=^t_vattr; diff --git a/sys/vfs/vfs_vnops.pas b/sys/vfs/vfs_vnops.pas index e67cc659..4ca898b3 100644 --- a/sys/vfs/vfs_vnops.pas +++ b/sys/vfs/vfs_vnops.pas @@ -475,7 +475,7 @@ begin mode:=mode or S_IFIFO; else Exit(EBADF); - end;; + end; sb^.st_mode:=mode; sb^.st_nlink:=vap^.va_nlink; sb^.st_uid:=vap^.va_uid; diff --git a/sys/vfs/vioccom.pas b/sys/vfs/vioccom.pas new file mode 100644 index 00000000..37062a69 --- /dev/null +++ b/sys/vfs/vioccom.pas @@ -0,0 +1,47 @@ +unit vioccom; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +{ + * Ioctl's have the command encoded in the lower word, and the size of + * any in or out parameters in the upper word. The high 3 bits of the + * upper word are used to encode the in/out status of the parameter. + } +const + IOCPARM_SHIFT=13; { number of bits for ioctl size } + IOCPARM_MASK =((1 shl IOCPARM_SHIFT) - 1); { parameter length mask } + + IOCPARM_MAX =(1 shl IOCPARM_SHIFT); { max size of ioctl } + IOC_VOID =$20000000; { no parameters } + IOC_OUT =$40000000; { copy out parameters } + IOC_IN =$80000000; { copy in parameters } + IOC_INOUT =(IOC_IN or IOC_OUT); + IOC_DIRMASK =(IOC_VOID or IOC_OUT or IOC_IN); + +function IOCPARM_LEN(x:QWORD):QWORD; inline; +function IOCBASECMD (x:QWORD):QWORD; inline; +function IOCGROUP (x:QWORD):QWORD; inline; + +implementation + +function IOCPARM_LEN(x:QWORD):QWORD; inline; +begin + Result:=(x shr 16) and IOCPARM_MASK; +end; + +function IOCBASECMD(x:QWORD):QWORD; inline; +begin + Result:=x and (not (IOCPARM_MASK shl 16)); +end; + +function IOCGROUP(x:QWORD):QWORD; inline; +begin + Result:=(x shr 8) and $ff; +end; + + +end. + diff --git a/sys/vfs/vmount.pas b/sys/vfs/vmount.pas index 713936ab..cb4819ba 100644 --- a/sys/vfs/vmount.pas +++ b/sys/vfs/vmount.pas @@ -582,8 +582,8 @@ begin end; initialization - mtx_init(mountlist_mtx); - mtx_init(VFS_Giant); + mtx_init(mountlist_mtx,'mountlist'); + mtx_init(VFS_Giant ,'VFS_Giant'); end. diff --git a/sys/vfs/vnode_if.pas b/sys/vfs/vnode_if.pas index 90eb54f3..c0da6723 100644 --- a/sys/vfs/vnode_if.pas +++ b/sys/vfs/vnode_if.pas @@ -11,7 +11,8 @@ uses vfile, vuio, vmount, - vfcntl; + vfcntl, + vsocketvar; type PPPtrUint =^PPtrUint; @@ -25,8 +26,6 @@ type acl_type_t =Integer; p_acl =Pointer; p_label =Pointer; - p_socket =Pointer; - pp_socket =Pointer; p_vop_islocked_args=^vop_islocked_args; vop_islocked_args=packed record @@ -643,6 +642,7 @@ begin a.a_cnp:=cnp; a.a_vap:=vap; Result:=vop_create_t(dvp^.v_op^.vop_create)(@a); + vop_create_post(@a,Result); end; function VOP_WHITEOUT(dvp:p_vnode;cnp:p_componentname;flags:Integer):Integer; @@ -664,6 +664,7 @@ begin a.a_cnp:=cnp; a.a_vap:=vap; Result:=vop_mknod_t(dvp^.v_op^.vop_mknod)(@a); + vop_mknod_post(@a,Result); end; function VOP_OPEN(vp:p_vnode;mode:Integer;fp:p_file):Integer; @@ -719,6 +720,7 @@ begin a.a_vp :=vp; a.a_vap:=vap; Result:=vop_setattr_t(vp^.v_op^.vop_setattr)(@a); + vop_setattr_post(@a,Result); end; function VOP_MARKATIME(vp:p_vnode):Integer; @@ -801,6 +803,7 @@ begin a.a_vp :=vp; a.a_cnp:=cnp; Result:=vop_remove_t(dvp^.v_op^.vop_remove)(@a); + vop_remove_post(@a,Result); end; function VOP_LINK(tdvp:p_vnode;vp:p_vnode;cnp:p_componentname):Integer; @@ -811,6 +814,7 @@ begin a.a_vp :=vp; a.a_cnp :=cnp; Result:=vop_link_t(tdvp^.v_op^.vop_link)(@a); + vop_link_post(@a,Result); end; function VOP_RENAME(fdvp:p_vnode;fvp:p_vnode;fcnp:p_componentname;tdvp:p_vnode;tvp:p_vnode;tcnp:p_componentname):Integer; @@ -825,6 +829,7 @@ begin a.a_tcnp:=tcnp; vop_rename_pre(@a); Result:=vop_rename_t(fdvp^.v_op^.vop_rename)(@a); + vop_rename_post(@a,Result); end; function VOP_MKDIR(dvp:p_vnode;vpp:pp_vnode;cnp:p_componentname;vap:p_vattr):Integer; @@ -836,6 +841,7 @@ begin a.a_cnp:=cnp; a.a_vap:=vap; Result:=vop_mkdir_t(dvp^.v_op^.vop_mkdir)(@a); + vop_mkdir_post(@a,Result); end; function VOP_RMDIR(dvp:p_vnode;vp:p_vnode;cnp:p_componentname):Integer; @@ -846,6 +852,7 @@ begin a.a_vp :=vp; a.a_cnp:=cnp; Result:=vop_rmdir_t(dvp^.v_op^.vop_rmdir)(@a); + vop_rmdir_post(@a,Result); end; function VOP_SYMLINK(dvp:p_vnode;vpp:pp_vnode;cnp:p_componentname;vap:p_vattr;target:PChar):Integer; @@ -858,6 +865,7 @@ begin a.a_vap :=vap; a.a_target:=target; Result:=vop_symlink_t(dvp^.v_op^.vop_symlink)(@a); + vop_symlink_post(@a,Result); end; function VOP_READDIR(vp:p_vnode;uio:p_uio;eofflag:PInteger;ncookies:PInteger;cookies:PPPtrUint):Integer; @@ -1097,6 +1105,7 @@ begin a.a_attrnamespace:=attrnamespace; a.a_name :=name; Result:=vop_deleteextattr_t(vp^.v_op^.vop_deleteextattr)(@a); + vop_deleteextattr_post(@a,Result); end; function VOP_SETEXTATTR(vp:p_vnode;attrnamespace:Integer;name:PChar;uio:p_uio):Integer; @@ -1108,6 +1117,7 @@ begin a.a_name :=name; a.a_uio :=uio; Result:=vop_setextattr_t(vp^.v_op^.vop_setextattr)(@a); + vop_setextattr_post(@a,Result); end; function VOP_SETLABEL(vp:p_vnode;_label:p_label):Integer; diff --git a/sys/vfs/vpoll.pas b/sys/vfs/vpoll.pas new file mode 100644 index 00000000..cea7da78 --- /dev/null +++ b/sys/vfs/vpoll.pas @@ -0,0 +1,64 @@ +unit vpoll; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +type + nfds_t=DWORD; + + { + * This structure is passed as an array to poll(2). + } + p_pollfd=^t_pollfd; + t_pollfd=packed record + fd :Integer; { which file descriptor to poll } + events :Word; { events we are interested in } + revents:Word; { events found on return } + end; + +const +{ + * Requestable events. If poll(2) finds any of these set, they are + * copied to revents on return. + * XXX Note that FreeBSD doesn't make much distinction between POLLPRI + * and POLLRDBAND since none of the file types have distinct priority + * bands - and only some have an urgent "mode". + * XXX Note POLLIN isn't really supported in true SVSV terms. Under SYSV + * POLLIN includes all of normal, band and urgent data. Most poll handlers + * on FreeBSD only treat it as "normal" data. + } + POLLIN =$0001; { any readable data available } + POLLPRI =$0002; { OOB/Urgent readable data } + POLLOUT =$0004; { file descriptor is writeable } + POLLRDNORM=$0040; { non-OOB/URG data available } + POLLWRNORM=POLLOUT; { no write type differentiation } + POLLRDBAND=$0080; { OOB/Urgent readable data } + POLLWRBAND=$0100; { OOB/Urgent data can be written } + +{ General FreeBSD extension (currently only supported for sockets): } + POLLINIGNEOF=$2000; { like POLLIN, except ignore EOF } + +{ + * These events are set if they occur regardless of whether they were + * requested. + } + POLLERR =$0008; { some poll error occurred } + POLLHUP =$0010; { file descriptor was "hung up" } + POLLNVAL=$0020; { requested events "invalid" } + + POLLSTANDARD=(POLLIN or POLLPRI or POLLOUT or POLLRDNORM or POLLRDBAND or + POLLWRBAND or POLLERR or POLLHUP or POLLNVAL); + +{ + * Request that poll() wait forever. + * XXX in SYSV, this is defined in stropts.h, which is not included + * by poll.h. + } + INFTIM=-1; + +implementation + +end. + diff --git a/sys/vfs/vselect.pas b/sys/vfs/vselect.pas new file mode 100644 index 00000000..84d1623d --- /dev/null +++ b/sys/vfs/vselect.pas @@ -0,0 +1,73 @@ +unit vselect; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +uses + signal, + time; + +type + pp_fd_mask=^p_fd_mask; + p_fd_mask=^fd_mask; + fd_mask=QWORD; + +{ + * Select uses bit masks of file descriptors in longs. These macros + * manipulate such bit fields (the filesystem macros use chars). + * FD_SETSIZE may be defined by the user, but the default here should + * be enough for most uses. + } +const + FD_SETSIZE=1024; + NFDBITS =(sizeof(fd_mask) * 8); { bits per mask } + +type + p_fd_set=^t_fd_set; + t_fd_set=packed record + fds_bits:array[0..((FD_SETSIZE + (NFDBITS - 1)) div NFDBITS)-1] of fd_mask; + end; + +procedure FD_CLR(n:Integer;p:p_fd_set); +procedure FD_COPY(f,t:p_fd_set); +function FD_ISSET(n:Integer;p:p_fd_set):Boolean; +procedure FD_SET(n:Integer;p:p_fd_set); +procedure FD_ZERO(p:p_fd_set); + +implementation + +function __fdset_mask(n:Integer):fd_mask; inline; +begin + Result:=fd_mask(1) shl (n mod NFDBITS); +end; + +procedure FD_CLR(n:Integer;p:p_fd_set); +begin + p^.fds_bits[n div NFDBITS]:=p^.fds_bits[n div NFDBITS] and (not __fdset_mask(n)); +end; + +procedure FD_COPY(f,t:p_fd_set); +begin + f^:=t^; +end; + +function FD_ISSET(n:Integer;p:p_fd_set):Boolean; +begin + Result:=(p^.fds_bits[n div NFDBITS] and __fdset_mask(n))<>0; +end; + +procedure FD_SET(n:Integer;p:p_fd_set); +begin + p^.fds_bits[n div NFDBITS]:=p^.fds_bits[n div NFDBITS] or __fdset_mask(n); +end; + +procedure FD_ZERO(p:p_fd_set); +begin + p^:=Default(t_fd_set); +end; + + +end. + diff --git a/sys/vfs/vselinfo.pas b/sys/vfs/vselinfo.pas new file mode 100644 index 00000000..67e71d3e --- /dev/null +++ b/sys/vfs/vselinfo.pas @@ -0,0 +1,34 @@ +unit vselinfo; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +uses + mqueue, + kern_mtx; + +type + { + * Used to maintain information about processes that wish to be + * notified when I/O becomes possible. + } + p_selinfo=^t_selinfo; + t_selinfo=packed record + si_tdlist:TAILQ_HEAD; { List of sleeping threads. } + //si_note:knlist; { kernel note list } + si_mtx:p_mtx; { Lock for tdlist. } + end; + +function SEL_WAITING(si:p_selinfo):Boolean; inline; + +implementation + +function SEL_WAITING(si:p_selinfo):Boolean; inline; +begin + Result:=(not TAILQ_EMPTY(@si^.si_tdlist)); +end; + +end. + diff --git a/sys/vfs/vsockbuf.pas b/sys/vfs/vsockbuf.pas new file mode 100644 index 00000000..603e2f0f --- /dev/null +++ b/sys/vfs/vsockbuf.pas @@ -0,0 +1,148 @@ +unit vsockbuf; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +uses + vselinfo, + kern_mtx; + +const + SB_MAX=(2*1024*1024); { default for max chars in sockbuf } + +{ + * Constants for sb_flags field of struct sockbuf. + } + SB_WAIT =$04 ; { someone is waiting for data/space } + SB_SEL =$08 ; { someone is selecting } + SB_ASYNC =$10 ; { ASYNC I/O, need signals } + SB_UPCALL =$20 ; { someone wants an upcall } + SB_NOINTR =$40 ; { operations not interruptible } + SB_AIO =$80 ; { AIO operations queued } + SB_KNOTE =$100; { kernel note attached } + SB_NOCOALESCE=$200; { don't coalesce new data into existing mbufs } + SB_IN_TOE =$400; { socket buffer is in the middle of an operation } + SB_AUTOSIZE =$800; { automatically size socket buffer } + + SBS_CANTSENDMORE=$0010; { can't send more data to peer } + SBS_CANTRCVMORE =$0020; { can't receive more data from peer } + SBS_RCVATMARK =$0040; { at mark on input } + +//struct mbuf; +//struct sockaddr; +//struct socket; +//struct thread; + +type + t_xsockbuf=packed record + sb_cc :DWORD; + sb_hiwat:DWORD; + sb_mbcnt:DWORD; + sb_mcnt :DWORD; + sb_ccnt :DWORD; + sb_mbmax:DWORD; + sb_lowat:Integer; + sb_timeo:Integer; + sb_flags:Word; + end; + +{ + * Variables for socket buffering. + } + t_sb_upcall=function(s,p:Pointer;i:Integer):Integer; + + p_sockbuf=^t_sockbuf; + t_sockbuf=record + sb_sel :p_selinfo ; { process selecting read/write } + sb_mtx :mtx ; { sockbuf lock } + sb_sx :Pointer ; { prevent I/O interlacing } + sb_state :Word ; { (c/d) socket state on sockbuf } + //sb_startzero=sb_mb + sb_mb :Pointer ; { (c/d) the mbuf chain } + sb_mbtail :Pointer ; { (c/d) the last mbuf in the chain } + sb_lastrecord:Pointer ; { (c/d) first mbuf of last record in socket buffer } + sb_sndptr :Pointer ; { (c/d) pointer into mbuf chain } + sb_sndptroff :DWORD ; { (c/d) byte offset of ptr into chain } + sb_cc :DWORD ; { (c/d) actual chars in buffer } + sb_hiwat :DWORD ; { (c/d) max actual char count } + sb_mbcnt :DWORD ; { (c/d) chars of mbufs used } + sb_mcnt :DWORD ; { (c/d) number of mbufs in buffer } + sb_ccnt :DWORD ; { (c/d) number of clusters in buffer } + sb_mbmax :DWORD ; { (c/d) max chars of mbufs to use } + sb_ctl :DWORD ; { (c/d) non-data chars in buffer } + sb_lowat :Integer ; { (c/d) low water mark } + sb_timeo :Integer ; { (c/d) timeout for read/write } + sb_flags :Word ; { (c/d) flags, see below } + sb_upcall :t_sb_upcall; { (c/d) } + sb_upcallarg :Pointer ; { (c/d) } + end; + +function SOCKBUF_MTX(_sb:p_sockbuf):p_mtx; inline; +procedure SOCKBUF_LOCK_INIT(_sb:p_sockbuf;_name:PChar); inline; +procedure SOCKBUF_LOCK_DESTROY(_sb:p_sockbuf); inline; +procedure SOCKBUF_LOCK(_sb:p_sockbuf); inline; +function SOCKBUF_OWNED(_sb:p_sockbuf):Boolean; inline; +procedure SOCKBUF_UNLOCK(_sb:p_sockbuf); inline; +procedure SOCKBUF_LOCK_ASSERT(_sb:p_sockbuf); inline; + +function sbspace(sb:p_sockbuf):DWORD; inline; + +implementation + +{ + * Per-socket buffer mutex used to protect most fields in the socket + * buffer. + } +function SOCKBUF_MTX(_sb:p_sockbuf):p_mtx; inline; +begin + Result:=@_sb^.sb_mtx +end; + +procedure SOCKBUF_LOCK_INIT(_sb:p_sockbuf;_name:PChar); inline; +begin + mtx_init(SOCKBUF_MTX(_sb)^,_name); +end; + +procedure SOCKBUF_LOCK_DESTROY(_sb:p_sockbuf); inline; +begin + mtx_destroy(SOCKBUF_MTX(_sb)^); +end; + +procedure SOCKBUF_LOCK(_sb:p_sockbuf); inline; +begin + mtx_lock(SOCKBUF_MTX(_sb)^); +end; + +function SOCKBUF_OWNED(_sb:p_sockbuf):Boolean; inline; +begin + Result:=mtx_owned(SOCKBUF_MTX(_sb)^); +end; + +procedure SOCKBUF_UNLOCK(_sb:p_sockbuf); inline; +begin + mtx_unlock(SOCKBUF_MTX(_sb)^); +end; + +procedure SOCKBUF_LOCK_ASSERT(_sb:p_sockbuf); inline; +begin + mtx_assert(SOCKBUF_MTX(_sb)^); +end; + +// + +function sbspace(sb:p_sockbuf):DWORD; inline; +var + s1,s2:DWORD; +begin + s1:=sb^.sb_hiwat-sb^.sb_cc; + s2:=sb^.sb_mbmax-sb^.sb_mbcnt; + if (s1=SizeOf(t_cmsghdr)) then + Result:=mhdr^.msg_control + else + Result:=nil; +end; + +{ given pointer to struct cmsghdr, return pointer to next cmsghdr } +function CMSG_NXTHDR(mhdr:p_msghdr;cmsg:p_cmsghdr):p_msghdr; inline; +begin + if (cmsg=nil) then + Result:=CMSG_FIRSTHDR(mhdr) + else + if (Ptruint(cmsg)+CMSG_ALIGN(cmsg^.cmsg_len)+CMSG_ALIGN(SizeOf(t_cmsghdr)))> + (Ptruint(mhdr^.msg_control)+mhdr^.msg_controllen) then + begin + Result:=nil; + end else + begin + Result:=Pointer(cmsg)+CMSG_ALIGN(cmsg^.cmsg_len); + end; +end; + +{ RFC 2292 additions } +function CMSG_SPACE(l:Ptruint):Ptruint; +begin + Result:=CMSG_ALIGN(SizeOf(t_cmsghdr))+CMSG_ALIGN(l); +end; + +function CMSG_LEN(l:Ptruint):Ptruint; +begin + Result:=CMSG_ALIGN(SizeOf(t_cmsghdr))+l; +end; + + +end. + diff --git a/sys/vfs/vsocketvar.pas b/sys/vfs/vsocketvar.pas new file mode 100644 index 00000000..1fb34b79 --- /dev/null +++ b/sys/vfs/vsocketvar.pas @@ -0,0 +1,305 @@ +unit vsocketvar; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +uses + mqueue, + vfile, + vselinfo, + vsocket, + vsockstate, + vsockbuf, + kern_mtx; + +//#include + +{ + * Kernel structure per socket. + * Contains send and receive buffer queues, + * handle on protocol and pointer to protocol + * private data and error information. + } +type + so_gen_t=QWORD; + + t_accf_callback=function(so,arg:Pointer;waitflag:Integer):Integer; + t_accf_create =function(so,arg:Pointer):Pointer; + t_accf_destroy =procedure(so:Pointer); + + p_accept_filter=^t_accept_filter; + t_accept_filter=packed record + accf_name :array[0..15] of AnsiChar; + accf_callback:t_accf_callback; + accf_create :t_accf_create ; + accf_destroy :t_accf_destroy ; + accf_next :SLIST_ENTRY ; + end; + +{- + * Locking key to struct socket: + * (a) constant after allocation, no locking required. + * (b) locked by SOCK_LOCK(so). + * (c) locked by SOCKBUF_LOCK(@so^.so_rcv). + * (d) locked by SOCKBUF_LOCK(@so^.so_snd). + * (e) locked by ACCEPT_LOCK(). + * (f) not locked since integer reads/writes are atomic. + * (g) used only as a sleep/wakeup address, no value. + * (h) locked by global mutex so_global_mtx. + } + pp_socket=^p_socket; + p_socket=^t_socket; + t_socket=packed record + so_count :Integer; { (b) reference count } + so_type :Word; { (a) generic type, see socket.h } + so_options:Word; { from socket call, see socket.h } + so_linger :Word; { time to linger while closing } + so_state :Word; { (b) internal state flags SS_* } + so_qstate :Integer; { (e) internal state flags SQ_* } + so_pcb :Pointer; { protocol control block } + so_vnet :Pointer; { network stack instance } + so_proto :Pointer; { (a) protocol handle } + { + * Variables for connection queuing. + * Socket where accepts occur is so_head in all subsidiary sockets. + * If so_head is 0, socket is not related to an accept. + * For head socket so_incomp queues partially completed connections, + * while so_comp is a queue of connections ready to be accepted. + * If a connection is aborted and it has so_head set, then + * it has to be pulled out of either so_incomp or so_comp. + * We allow connections to queue up based on current queue lengths + * and limit on number of queued connections for this socket. + } + so_head :p_socket ; { (e) back pointer to listen socket } + so_incomp :TAILQ_HEAD ; { (e) queue of partial unaccepted connections } + so_comp :TAILQ_HEAD ; { (e) queue of complete unaccepted connections } + so_list :TAILQ_ENTRY; { (e) list of unaccepted connections } + so_qlen :Word ; { (e) number of unaccepted connections } + so_incqlen:Word ; { (e) number of unaccepted incomplete connections } + so_qlimit :Word ; { (e) max number queued connections } + so_timeo :Word ; { (g) connection timeout } + so_error :Word ; { (f) error affecting connection } + so_sigio :Pointer ; { [sg] information for async I/O or out of band data (SIGURG) } + so_oobmark:QWORD ; { (c) chars to oob mark } + so_aiojobq:TAILQ_HEAD ; { AIO ops waiting on socket } + + so_rcv :t_sockbuf; + so_snd :t_sockbuf; + + //label *so_label ; { (b) MAC label for socket } + //label *so_peerlabel ; { (b) cached MAC label for peer } + { NB: generation count must not be first. } + so_gencnt :so_gen_t; { (h) generation count } + so_emuldata:Pointer ; { (b) private data for emulators } + + so_accf:packed record + so_accept_filter :p_accept_filter; + so_accept_filter_arg:Pointer; { saved filter args } + so_accept_filter_str:Pointer; { saved user args } + end; + { + * so_fibnum, so_user_cookie and friends can be used to attach + * some user-specified metadata to a socket, which then can be + * used by the kernel for various actions. + * so_user_cookie is used by ipfw/dummynet. + } + so_fibnum :Integer; { routing domain for this socket } + so_user_cookie:DWORD; + end; + +{ + * Global accept mutex to serialize access to accept queues and + * fields associated with multiple sockets. This allows us to + * avoid defining a lock order between listen and accept sockets + * until such time as it proves to be a good idea. + } +var + accept_mtx:mtx; + +const +{ + * Socket state bits stored in so_qstate. + } + SQ_INCOMP=$0800; { unaccepted, incomplete connection } + SQ_COMP =$1000; { unaccepted, complete connection } + +type +{ + * Externalized form of struct socket used by the sysctl(3) interface. + } + xsocket=packed record + xso_len :QWORD ; { length of this structure } + xso_so :p_socket; { makes a convenient handle sometimes } + so_type :Word; + so_options :Word; + so_linger :Word; + so_state :Word; + so_pcb :Pointer; { another convenient handle } + xso_protocol:Integer; + xso_family :Integer; + so_qlen :Word; + so_incqlen :Word; + so_qlimit :Word; + so_timeo :Word; + so_error :Word; + so_pgid :Integer; + so_oobmark :QWORD; + so_rcv :t_xsockbuf; + so_snd :t_xsockbuf; + so_uid :uid_t; { XXX } + end; + +{ + * Macros for sockets and socket buffering. + } + +const +{ + * Flags to sblock(). + } + SBL_WAIT =$00000001; { Wait if not immediately available. } + SBL_NOINTR=$00000002; { Force non-interruptible sleep. } + SBL_VALID =(SBL_WAIT or SBL_NOINTR); + +{ 'which' values for socket upcalls. } + SO_RCV=1; + SO_SND=2; + +{ Return values for socket upcalls. } + SU_OK =0; + SU_ISCONNECTED=1; + +procedure ACCEPT_LOCK_ASSERT(); inline; +procedure ACCEPT_LOCK(); inline; +procedure ACCEPT_UNLOCK(); inline; + +function SOCK_MTX(_so:p_socket):p_mtx; inline; +procedure SOCK_LOCK(_so:p_socket); inline; +function SOCK_OWNED(_so:p_socket):Boolean; inline; +procedure SOCK_UNLOCK(_so:p_socket); inline; +procedure SOCK_LOCK_ASSERT(_so:p_socket); inline; + +function sb_notify(sb:p_sockbuf):Boolean; inline; +function sosendallatonce(so:p_socket):Boolean; inline; +function soreadabledata(so:p_socket):Boolean; inline; +function soreadable(so:p_socket):Boolean; inline; +function sowriteable(so:p_socket):Boolean; inline; +procedure soref(so:p_socket); +procedure sorele(so:p_socket); + +implementation + +procedure ACCEPT_LOCK_ASSERT(); inline; +begin + mtx_assert(accept_mtx); +end; + +procedure ACCEPT_LOCK(); inline; +begin + mtx_lock(accept_mtx); +end; + +procedure ACCEPT_UNLOCK(); inline; +begin + mtx_unlock(accept_mtx); +end; + +// + +function SOCK_MTX(_so:p_socket):p_mtx; inline; +begin + Result:=SOCKBUF_MTX(@_so^.so_rcv) +end; + +procedure SOCK_LOCK(_so:p_socket); inline; +begin + SOCKBUF_LOCK(@_so^.so_rcv) +end; + +function SOCK_OWNED(_so:p_socket):Boolean; inline; +begin + Result:=SOCKBUF_OWNED(@_so^.so_rcv); +end; + +procedure SOCK_UNLOCK(_so:p_socket); inline; +begin + SOCKBUF_UNLOCK(@_so^.so_rcv) +end; + +procedure SOCK_LOCK_ASSERT(_so:p_socket); inline; +begin + SOCKBUF_LOCK_ASSERT(@_so^.so_rcv); +end; + +// + +{ + * Do we need to notify the other side when I/O is possible? + } +function sb_notify(sb:p_sockbuf):Boolean; inline; +begin + Result:=(sb^.sb_flags and (SB_WAIT or SB_SEL or SB_ASYNC or SB_UPCALL or SB_AIO or SB_KNOTE))<>0; +end; + +{ do we have to send all at once on a socket? } +function sosendallatonce(so:p_socket):Boolean; inline; +begin + //Result:=((so^.so_proto^.pr_flags and PR_ATOMIC)<>0); + Result:=False; +end; + +{ can we read something from so? } +function soreadabledata(so:p_socket):Boolean; inline; +begin + Result:=(so^.so_rcv.sb_cc >= so^.so_rcv.sb_lowat) or + (not TAILQ_EMPTY(@so^.so_comp)) or (so^.so_error<>0); +end; + +function soreadable(so:p_socket):Boolean; inline; +begin + Result:=soreadabledata(so) or ((so^.so_rcv.sb_state and SBS_CANTRCVMORE)<>0); +end; + +{ can we write something to so? } +function sowriteable(so:p_socket):Boolean; inline; +begin + Result:=((sbspace(@so^.so_snd)>=so^.so_snd.sb_lowat) and + (((so^.so_state and SS_ISCONNECTED)<>0) or + ({(so^.so_proto^.pr_flags and PR_CONNREQUIRED)=0}False))) or + ((so^.so_snd.sb_state and SBS_CANTSENDMORE)<>0) or + (so^.so_error<>0); +end; + +{ + * soref()/sorele() ref-count the socket structure. Note that you must + * still explicitly close the socket, but the last ref count will free + * the structure. + } +procedure soref(so:p_socket); +begin + SOCK_LOCK_ASSERT(so); + Inc(so^.so_count); +end; + +procedure sorele(so:p_socket); +begin + ACCEPT_LOCK_ASSERT(); + SOCK_LOCK_ASSERT(so); + if (so^.so_count <= 0) then + Assert(false,'sorele'); + Dec(so^.so_count); + if (so^.so_count=0) then + begin + //sofree(so); + end else + begin + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + end +end; + + +end. + diff --git a/sys/vfs/vsockstate.pas b/sys/vfs/vsockstate.pas new file mode 100644 index 00000000..e108a71f --- /dev/null +++ b/sys/vfs/vsockstate.pas @@ -0,0 +1,49 @@ +unit vsockstate; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +const +{ + * Socket state bits. + * + * Historically, this bits were all kept in the so_state field. For + * locking reasons, they are now in multiple fields, as they are + * locked differently. so_state maintains basic socket state protected + * by the socket lock. so_qstate holds information about the socket + * accept queues. Each socket buffer also has a state field holding + * information relevant to that socket buffer (can't send, rcv). Many + * fields will be read without locks to improve performance and avoid + * lock order issues. However, this approach must be used with caution. + } + SS_NOFDREF =$0001; { no file table ref any more } + SS_ISCONNECTED =$0002; { socket connected to a peer } + SS_ISCONNECTING =$0004; { in process of connecting to peer } + SS_ISDISCONNECTING=$0008; { in process of disconnecting } + SS_NBIO =$0100; { non-blocking ops } + SS_ASYNC =$0200; { async i/o notify } + SS_ISCONFIRMING =$0400; { deciding to accept connection req } + SS_ISDISCONNECTED =$2000; { socket disconnected from peer } + +{ + * Protocols can mark a socket as SS_PROTOREF to indicate that, following + * pru_detach, they still want the socket to persist, and will free it + * themselves when they are done. Protocols should only ever call sofree() + * following setting this flag in pru_detach(), and never otherwise, as + * sofree() bypasses socket reference counting. + } + SS_PROTOREF =$4000; { strong protocol reference } + +{ + * Socket state bits now stored in the socket buffer state field. + } + SBS_CANTSENDMORE=$0010; { can't send more data to peer } + SBS_CANTRCVMORE =$0020; { can't receive more data from peer } + SBS_RCVATMARK =$0040; { at mark on input } + +implementation + +end. + diff --git a/sys/vfs/vsys_generic.pas b/sys/vfs/vsys_generic.pas new file mode 100644 index 00000000..450eddd4 --- /dev/null +++ b/sys/vfs/vsys_generic.pas @@ -0,0 +1,1477 @@ +unit vsys_generic; + +{$mode ObjFPC}{$H+} +{$CALLING SysV_ABI_CDecl} + +interface + +uses + mqueue, + kern_mtx, + kern_mtxpool, + kern_condvar, + kern_thr, + vselinfo, + vioccom, + vpoll, + vselect, + vuio, + vfile, + vcapability, + vfs_vnode, + vfcntl, + vfilio, + vfiledesc, + time, + signal, + vm, + vmparam; + +const + SYS_IOCTL_SMALL_SIZE =128; { bytes } + SYS_IOCTL_SMALL_ALIGN=8; { bytes } + + SELTD_PENDING=$0001; { We have pending events. } + SELTD_RESCAN =$0002; { Doing a rescan. } + +type +{ + * One seltd per-thread allocated on demand as needed. + * + * t - protected by st_mtx + * k - Only accessed by curthread or read-only + } + p_selfd=^t_selfd; + + p_seltd=^t_seltd; + t_seltd=packed record + st_selq :STAILQ_HEAD; { (k) List of selfds. } + st_free1:p_selfd; { (k) free fd for read set. } + st_free2:p_selfd; { (k) free fd for write set. } + st_mtx :mtx; { Protects struct seltd } + st_wait :t_cv; { (t) Wait channel. } + st_flags:Integer; { (t) SELTD_ flags. } + end; + + { + * One selfd allocated per-thread per-file-descriptor. + * f - protected by sf_mtx + } + t_selfd=packed record + sf_link :STAILQ_ENTRY; { (k) fds owned by this td. } + sf_threads:TAILQ_ENTRY; { (f) fds on this selinfo. } + sf_si :p_selinfo; { (f) selinfo when linked. } + sf_mtx :p_mtx; { Pointer to selinfo mtx. } + sf_td :p_seltd; { (k) owning seltd. } + sf_cookie :Pointer; { (k) fd or pollfd. } + end; + +function _pollout(td:p_kthread;fds,ufds:p_pollfd;nfd:DWORD):Integer; +function pollscan(td:p_kthread;fds:p_pollfd;nfd:DWORD):Integer; +function pollrescan(td:p_kthread):Integer; +function selscan(td:p_kthread;ibits,obits:pp_fd_mask;nfd:Integer):Integer; +function selrescan(td:p_kthread;ibits,obits:pp_fd_mask):Integer; +procedure selfdalloc(td:p_kthread;cookie:Pointer); +procedure selfdfree(stp:p_seltd;sfp:p_selfd); +procedure seldrain(sip:p_selinfo); +procedure selrecord(selector:p_kthread;sip:p_selinfo); +function dofileread(fd:Integer;fp:p_file;auio:p_uio;offset:Int64;flags:Integer):Integer; +function dofilewrite(fd:Integer;fp:p_file;auio:p_uio;offset:Int64;flags:Integer):Integer; +procedure doselwakeup(sip:p_selinfo;pri:Integer); +procedure seltdinit(td:p_kthread); +function seltdwait(td:p_kthread;timo:Int64):Integer; +procedure seltdclear(td:p_kthread); +function poll_no_poll(events:Integer):Integer; + +// + +function sys_read(fd:Integer;buf:Pointer;nbyte:QWORD):Integer; +function sys_pread(fd:Integer;buf:Pointer;nbyte:QWORD;offset:Int64):Integer; +function sys_readv(fd:Integer;iovp:p_iovec;iovcnt:DWORD):Integer; +function sys_preadv(fd:Integer;iovp:p_iovec;iovcnt:DWORD;offset:Int64):Integer; +function sys_write(fd:Integer;buf:Pointer;nbyte:QWORD):Integer; +function sys_pwrite(fd:Integer;buf:Pointer;nbyte:QWORD;offset:Int64):Integer; +function sys_writev(fd:Integer;iovp:p_iovec;iovcnt:DWORD):Integer; +function sys_pwritev(fd:Integer;iovp:p_iovec;iovcnt:DWORD;offset:Int64):Integer; +function sys_ftruncate(fd:Integer;length:Int64):Integer; +function sys_ioctl(fd:Integer;com:QWORD;data:Pointer):Integer; +function sys_pselect(nd:Integer; + uin,uou,uex:p_fd_set; + uts:ptimespec; + sm:p_sigset_t):Integer; +function sys_select(nd:Integer; + uin,uou,uex:p_fd_set; + utv:ptimeval):Integer; +function sys_poll(fds:p_pollfd;nfds:DWORD;timeout:Integer):Integer; + +implementation + +uses + atomic, + systm, + errno, + kern_descrip, + subr_uio, + kern_sig, + kern_thread, + kern_time, + sys_capability; + +var + mtxpool_select:p_mtx_pool; + +function kern_readv(fd:Integer;auio:p_uio):Integer; +var + fp:p_file; + error:Integer; +begin + error:=fget_read(fd, CAP_READ or CAP_SEEK, @fp); + if (error<>0) then + Exit(error); + error:=dofileread(fd, fp, auio, -1, 0); + fdrop(fp); + Exit(error); +end; + +function kern_preadv(fd:Integer;auio:p_uio;offset:Int64):Integer; +var + fp:p_file; + error:Integer; +begin + error:=fget_read(fd, CAP_READ, @fp); + if (error<>0) then + Exit(error); + if ((fp^.f_ops^.fo_flags and DFLAG_SEEKABLE)=0) then + error:=ESPIPE + else + if (offset < 0) and (fp^.f_vnode^.v_type<>VCHR) then + error:=EINVAL + else + error:=dofileread(fd, fp, auio, offset, FOF_OFFSET); + fdrop(fp); + Exit(error); +end; + +function kern_writev(fd:Integer;auio:p_uio):Integer; +var + fp:p_file; + error:Integer; +begin + error:=fget_write(fd, CAP_WRITE or CAP_SEEK, @fp); + if (error<>0) then + Exit(error); + error:=dofilewrite(fd, fp, auio, -1, 0); + fdrop(fp); + Exit(error); +end; + +function kern_pwritev(fd:Integer;auio:p_uio;offset:Int64):Integer; +var + fp:p_file; + error:Integer; +begin + error:=fget_write(fd, CAP_WRITE, @fp); + if (error<>0) then + Exit(error); + if ((fp^.f_ops^.fo_flags and DFLAG_SEEKABLE)=0) then + error:=ESPIPE + else + if (offset < 0) and (fp^.f_vnode^.v_type<>VCHR) then + error:=EINVAL + else + error:=dofilewrite(fd, fp, auio, offset, FOF_OFFSET); + fdrop(fp); + Exit(error); +end; + +// + +function sys_read(fd:Integer;buf:Pointer;nbyte:QWORD):Integer; +var + auio:t_uio; + aiov:iovec; + error:Integer; +begin + if (nbyte > IOSIZE_MAX) then + Exit(EINVAL); + aiov.iov_base :=buf; + aiov.iov_len :=nbyte; + auio.uio_iov :=@aiov; + auio.uio_iovcnt:=1; + auio.uio_resid :=nbyte; + auio.uio_segflg:=UIO_USERSPACE; + error:=kern_readv(fd, @auio); + Exit(error); +end; + +{ + * Positioned read system call + } +function sys_pread(fd:Integer;buf:Pointer;nbyte:QWORD;offset:Int64):Integer; +var + auio:t_uio; + aiov:iovec; + error:Integer; +begin + if (nbyte > IOSIZE_MAX) then + Exit(EINVAL); + aiov.iov_base :=buf; + aiov.iov_len :=nbyte; + auio.uio_iov :=@aiov; + auio.uio_iovcnt:=1; + auio.uio_resid :=nbyte; + auio.uio_segflg:=UIO_USERSPACE; + error:=kern_preadv(fd, @auio, offset); + Exit(error); +end; + +{ + * Scatter read system call. + } +function sys_readv(fd:Integer;iovp:p_iovec;iovcnt:DWORD):Integer; +var + auio:p_uio; + error:Integer; +begin + error:=copyinuio(iovp, iovcnt, @auio); + if (error<>0) then + Exit(error); + error:=kern_readv(fd, auio); + FreeMem(auio); + Exit(error); +end; + +{ + * Scatter positioned read system call. + } +function sys_preadv(fd:Integer;iovp:p_iovec;iovcnt:DWORD;offset:Int64):Integer; +var + auio:p_uio; + error:Integer; +begin + error:=copyinuio(iovp, iovcnt, @auio); + if (error<>0) then + Exit(error); + error:=kern_preadv(fd, auio, offset); + FreeMem(auio); + Exit(error); +end; + +{ + * Common code for readv and preadv that reads data in + * from a file using the passed in uio, offset, and flags. + } +function dofileread(fd:Integer;fp:p_file;auio:p_uio;offset:Int64;flags:Integer):Integer; +var + td:p_kthread; + cnt:Int64; + error:Integer; +begin + td:=curkthread; + { Finish zero length reads right here } + if (auio^.uio_resid=0) then + begin + td^.td_retval[0]:=0; + Exit(0); + end; + auio^.uio_rw:=UIO_READ; + auio^.uio_offset:=offset; + auio^.uio_td:=td; + + cnt:=auio^.uio_resid; + error:=fo_read(fp, auio, flags); + if (error<>0) then + begin + if (auio^.uio_resid<>cnt) and ((error=ERESTART) or (error=EINTR) or (error=EWOULDBLOCK)) then + error:=0; + end; + Dec(cnt,auio^.uio_resid); + + td^.td_retval[0]:=cnt; + Exit(error); +end; + +function sys_write(fd:Integer;buf:Pointer;nbyte:QWORD):Integer; +var + auio:t_uio; + aiov:iovec; + error:Integer; +begin + if (nbyte > IOSIZE_MAX) then + Exit(EINVAL); + aiov.iov_base :=buf; + aiov.iov_len :=nbyte; + auio.uio_iov :=@aiov; + auio.uio_iovcnt:=1; + auio.uio_resid :=nbyte; + auio.uio_segflg:=UIO_USERSPACE; + error:=kern_writev(fd, @auio); + Exit(error); +end; + +{ + * Positioned write system call. + } +function sys_pwrite(fd:Integer;buf:Pointer;nbyte:QWORD;offset:Int64):Integer; +var + auio:t_uio; + aiov:iovec; + error:Integer; +begin + if (nbyte > IOSIZE_MAX) then + Exit(EINVAL); + aiov.iov_base :=buf; + aiov.iov_len :=nbyte; + auio.uio_iov :=@aiov; + auio.uio_iovcnt:=1; + auio.uio_resid :=nbyte; + auio.uio_segflg:=UIO_USERSPACE; + error:=kern_pwritev(fd, @auio, offset); + Exit(error); +end; + +{ + * Gather write system call. + } +function sys_writev(fd:Integer;iovp:p_iovec;iovcnt:DWORD):Integer; +var + auio:p_uio; + error:Integer; +begin + error:=copyinuio(iovp, iovcnt, @auio); + if (error<>0) then + Exit(error); + error:=kern_writev(fd, auio); + FreeMem(auio); + Exit(error); +end; + +{ + * Gather positioned write system call. + } +function sys_pwritev(fd:Integer;iovp:p_iovec;iovcnt:DWORD;offset:Int64):Integer; +var + auio:p_uio; + error:Integer; +begin + error:=copyinuio(iovp, iovcnt, @auio); + if (error<>0) then + Exit(error); + error:=kern_pwritev(fd, auio, offset); + FreeMem(auio); + Exit(error); +end; + +{ + * Common code for writev and pwritev that writes data to + * a file using the passed in uio, offset, and flags. + } +function dofilewrite(fd:Integer;fp:p_file;auio:p_uio;offset:Int64;flags:Integer):Integer; +var + td:p_kthread; + cnt:Int64; + error:Integer; +begin + td:=curkthread; + + auio^.uio_rw :=UIO_WRITE; + auio^.uio_td :=td; + auio^.uio_offset:=offset; + + cnt:=auio^.uio_resid; + //if (fp^.f_type=DTYPE_VNODE) and + // ((fp^.f_vnread_flags and FDEVFS_VNODE)=0) then + // bwillwrite(); + error:=fo_write(fp, auio, flags); + if (error<>0) then + begin + if (auio^.uio_resid<>cnt) and ((error=ERESTART) or (error=EINTR) or (error=EWOULDBLOCK)) then + error:=0; + { Socket layer is responsible for issuing SIGPIPE. } + if (fp^.f_type<>DTYPE_SOCKET) and (error=EPIPE) then + begin + PROC_LOCK(); + tdsignal(td, SIGPIPE); + PROC_UNLOCK(); + end; + end; + Dec(cnt,auio^.uio_resid); + + td^.td_retval[0]:=cnt; + Exit(error); +end; + +{ + * Truncate a file given a file descriptor. + * + * Can't use fget_write() here, since must ExitEINVAL and not EBADF if the + * descriptor isn't writable. + } +function kern_ftruncate(fd:Integer;length:Int64):Integer; +var + fp:p_file; + error:Integer; +begin + if (length < 0) then + Exit(EINVAL); + error:=fget(fd, CAP_FTRUNCATE, @fp); + if (error<>0) then + Exit(error); + if ((fp^.f_flag and FWRITE)=0) then + begin + fdrop(fp); + Exit(EINVAL); + end; + error:=fo_truncate(fp, length); + fdrop(fp); + Exit(error); +end; + +function sys_ftruncate(fd:Integer;length:Int64):Integer; +begin + Exit(kern_ftruncate(fd, length)); +end; + +function kern_ioctl(fd:Integer;com:QWORD;data:Pointer):Integer; +label + _out; +var + fp:p_file; + error:Integer; + tmp:Integer; +begin + error:=fget(fd, CAP_IOCTL, @fp); + if (error<>0) then + Exit(error); + if ((fp^.f_flag and (FREAD or FWRITE))=0) then + begin + fdrop(fp); + Exit(EBADF); + end; + case com of + FIONCLEX: + begin + atomic_clear_int(@fp^.f_exclose,UF_EXCLOSE); + goto _out; + end; + FIOCLEX: + begin + atomic_set_int(@fp^.f_exclose,UF_EXCLOSE); + goto _out; + end; + FIONBIO: + begin + tmp:=PInteger(data)^; + if (tmp<>0) then + atomic_set_int(@fp^.f_flag, FNONBLOCK) + else + atomic_clear_int(@fp^.f_flag, FNONBLOCK); + data:=@tmp; + end; + FIOASYNC: + begin + tmp:=PInteger(data)^; + if (tmp<>0) then + atomic_set_int(@fp^.f_flag, FASYNC) + else + atomic_clear_int(@fp^.f_flag, FASYNC); + data:=@tmp; + end; + end; + error:=fo_ioctl(fp, com, data); +_out: + fdrop(fp); + Exit(error); +end; + +function sys_ioctl(fd:Integer;com:QWORD;data:Pointer):Integer; +label + _out; +var + smalldata:array[0..SYS_IOCTL_SMALL_SIZE-1] of Byte; //__aligned(SYS_IOCTL_SMALL_ALIGN) + arg,error:Integer; + size:DWORD; +begin + if (com > $ffffffff) then + begin + Writeln('WARNING pid %d (%s): ioctl sign-extension ioctl ',com); + com:=com and $ffffffff; + end; + + { + * Interpret high order word to find amount of data to be + * copied to/from the user's address space. + } + size:=IOCPARM_LEN(com); + if (size > IOCPARM_MAX) or + ((com and (IOC_VOID or IOC_IN or IOC_OUT))=0) or + (((com and (IOC_IN or IOC_OUT))<>0) and (size=0)) or + (((com and IOC_VOID)<>0) and (size > 0) and (size<>sizeof(Integer))) then + Exit(ENOTTY); + + if (size > 0) then + begin + if ((com and IOC_VOID)<>0) then + begin + { Integer argument. } + arg:=ptrint(data); + data:=@arg; + size:=0; + end else + begin + if (size > SYS_IOCTL_SMALL_SIZE) then + data:=AllocMem(size) + else + data:=@smalldata; + end; + end else + data:=@data; + if ((com and IOC_IN)<>0) then + begin + error:=copyin(data, data, size); + if (error<>0) then + goto _out; + end else + if ((com and IOC_OUT)<>0) then + begin + { + * Zero the buffer so the user always + * gets back something deterministic. + } + FillChar(data,size,0); + end; + + error:=kern_ioctl(fd, com, data); + + if (error=0) and ((com and IOC_OUT)<>0) then + error:=copyout(data, data, size); + +_out: + if (size > SYS_IOCTL_SMALL_SIZE) then + FreeMem(data); + Exit(error); +end; + +function poll_no_poll(events:Integer):Integer; +begin + { + * return true for read/write. If the user asked for something + * special, return POLLNVAL, so that clients have a way of + * determining reliably whether or not the extended + * functionality is present without hard-coding knowledge + * of specific filesystem implementations. + } + if (events and (not POLLSTANDARD))<>0 then + Exit(POLLNVAL); + + Exit(events and (POLLIN or POLLOUT or POLLRDNORM or POLLWRNORM)); +end; + +function kern_select(nd:Integer; + fd_in,fd_ou,fd_ex:p_fd_set; + tvp:ptimeval; + abi_nfdbits:Integer):Integer; forward; + +function kern_pselect(nd:Integer; + uin,uou,uex:p_fd_set; + tvp:ptimeval; + uset:p_sigset_t; + abi_nfdbits:Integer):Integer; +var + td:p_kthread; + error:Integer; +begin + td:=curkthread; + if (uset<>nil) then + begin + error:=kern_sigprocmask(td, SIG_SETMASK, uset, @td^.td_oldsigmask, 0); + if (error<>0) then + Exit(error); + td^.td_pflags:=td^.td_pflags or TDP_OLDMASK; + { + * Make sure that ast() is called on Exitto + * usermode and TDP_OLDMASK is cleared, restoring old + * sigmask. + } + thread_lock(td); + td^.td_flags:=td^.td_flags or TDF_ASTPENDING; + thread_unlock(td); + end; + error:=kern_select(nd, uin, uou, uex, tvp, abi_nfdbits); + Exit(error); +end; + +function sys_pselect(nd:Integer; + uin,uou,uex:p_fd_set; + uts:ptimespec; + sm:p_sigset_t):Integer; +var + ts:timespec; + tv:timeval; + tvp:ptimeval; + _set:sigset_t; + uset:p_sigset_t; + error:Integer; +begin + if (uts<>nil) then + begin + error:=copyin(uts, @ts, sizeof(ts)); + if (error<>0) then + Exit(error); + TIMESPEC_TO_TIMEVAL(@tv, @ts); + tvp:=@tv; + end else + tvp:=nil; + if (sm<>nil) then + begin + error:=copyin(sm, @_set, sizeof(_set)); + if (error<>0) then + Exit(error); + uset:=@_set; + end else + uset:=nil; + Exit(kern_pselect(nd, uin, uou, uex, tvp, uset, NFDBITS)); +end; + +function sys_select(nd:Integer; + uin,uou,uex:p_fd_set; + utv:ptimeval):Integer; +var + tv:timeval; + tvp:ptimeval; + error:Integer; +begin + if (utv<>nil) then + begin + error:=copyin(utv, @tv, sizeof(tv)); + if (error<>0) then + Exit(error); + tvp:=@tv; + end else + tvp:=nil; + + Exit(kern_select(nd, uin, uou, uex, tvp, NFDBITS)); +end; + +{ + * In the unlikely case when user specified n greater then the last + * open file descriptor, check that no bits are set after the last + * valid fd. We must ExitEBADF if any is set. + * + * There are applications that rely on the behaviour. + * + * nd is fd_lastfile + 1. + } +function select_check_badfd(fd_in:p_fd_set;nd,ndu,abi_nfdbits:Integer):Integer; +var + addr,oaddr:PByte; + b,i,res:Integer; + bits:Byte; +begin + if (nd >= ndu) or (fd_in=nil) then + Exit(0); + + oaddr:=nil; + bits:=0; { silence gcc } + For i:=nd to ndu-1 do + begin + b:=i div NBBY; + addr:=PByte(fd_in) + b; + if (addr<>oaddr) then + begin + res:=fubyte(addr^); + if (res=-1) then + Exit(EFAULT); + oaddr:=addr; + bits:=res; + end; + if ((bits and (1 shl (i mod NBBY)))<>0) then + Exit(EBADF); + end; + Exit(0); +end; + +function kern_select(nd:Integer; + fd_in,fd_ou,fd_ex:p_fd_set; + tvp:ptimeval; + abi_nfdbits:Integer):Integer; +label + done; +var + td:p_kthread; + { + * The magic 2048 here is chosen to be just enough for FD_SETSIZE + * infds with the new FD_SETSIZE of 1024, and more than enough for + * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE + * of 256. + } + s_selbits:array[0..((2048 + (NFDBITS - 1)) div NFDBITS)-1] of fd_mask; + ibits,obits,selbits:p_fd_mask; + _atv:timeval; + atv,rtv,ttv,timo:Int64; + error,lf,ndu:Integer; + nbufbytes,ncpbytes,ncpubytes,_nfdbits:DWORD; + + procedure putbits(name:p_fd_set;x:Integer); + var + error2:Integer; + begin + if (name<>nil) then + begin + error2:=copyout(@obits[x], name, ncpubytes); + if (error2<>0) then + error:=error2; + end; + end; + +begin + if (nd < 0) then + Exit(EINVAL); + + td:=curkthread; + ndu:=nd; + + //lf:=fd_table.fd_lastfile; + //if (nd > lf + 1) + // nd:=lf + 1; + + error:=select_check_badfd(fd_in, nd, ndu, abi_nfdbits); + if (error<>0) then + Exit(error); + error:=select_check_badfd(fd_ou, nd, ndu, abi_nfdbits); + if (error<>0) then + Exit(error); + error:=select_check_badfd(fd_ex, nd, ndu, abi_nfdbits); + if (error<>0) then + Exit(error); + + { + * Allocate just enough bits for the non-nil fd_sets. Use the + * preallocated auto buffer if possible. + } + _nfdbits:=roundup(nd, NFDBITS); + ncpbytes:=_nfdbits div NBBY; + ncpubytes:=roundup(nd, abi_nfdbits) div NBBY; + nbufbytes:=0; + + if (fd_in<>nil) then + Inc(nbufbytes,2 * ncpbytes); + if (fd_ou<>nil) then + Inc(nbufbytes,2 * ncpbytes); + if (fd_ex<>nil) then + Inc(nbufbytes,2 * ncpbytes); + + if (nbufbytes <= sizeof(s_selbits)) then + selbits:=@s_selbits[0] + else + selbits:=AllocMem(nbufbytes); + + { + * Assign pointers into the bit buffers and fetch the input bits. + * Put the output buffers together so that they can be bzeroed + * together. + } + if (nbufbytes<>0) then + FillChar(selbits, nbufbytes div 2,0); + + if (tvp<>nil) then + begin + _atv:=tvp^; + if (itimerfix(@_atv)<>0) then + begin + error:=EINVAL; + goto done; + end; + atv:=TIMEVAL_TO_UNIT(@_atv); + rtv:=get_unit_uptime; + atv:=atv+rtv; + end else + begin + atv:=0; + end; + timo:=0; + seltdinit(td); + { Iterate until the timeout expires or descriptors become ready. } + repeat + error:=selscan(td, @ibits, @obits, nd); + if (error<>0) or (td^.td_retval[0]<>0) then + break; + if (atv<>0) then + begin + rtv:=get_unit_uptime; + if (rtv>=atv) then + break; + ttv:=atv-rtv; + + if (ttv>24*60*60*hz) then + timo:=24*60*60*hz + else + timo:=tvtohz(ttv); + end; + error:=seltdwait(td, timo); + if (error<>0) then + break; + error:=selrescan(td, @ibits, @obits); + if (error<>0) or (td^.td_retval[0]<>0) then + break; + until false; + seltdclear(td); + +done: + { select is not restarted after signals... } + if (error=ERESTART) then + error:=EINTR; + if (error=EWOULDBLOCK) then + error:=0; + + if (error=0) then + begin + putbits(fd_in, 0); + putbits(fd_ou, 1); + putbits(fd_ex, 2); + end; + + if (selbits<>@s_selbits[0]) then + FreeMem(selbits); + + Exit(error); +end; +{ + * Convert a select bit set to poll flags. + * + * The backend always Exits POLLHUP/POLLERR if appropriate and we + * Exitthis as a set bit in any set. + } +const + select_flags:array[0..2] of Integer=( + POLLRDNORM or POLLHUP or POLLERR, + POLLWRNORM or POLLHUP or POLLERR, + POLLRDBAND or POLLERR); + +{ + * Compute the fo_poll flags required for a fd given by the index and + * bit position in the fd_mask array. + } +function selflags(ibits:pp_fd_mask;idx:Integer;bit:fd_mask):Integer; +var + flags,msk:Integer; +begin + flags:=0; + For msk:=0 to 2 do + begin + if (ibits[msk]=nil) then + continue; + if ((ibits[msk][idx] and bit)=0) then + continue; + flags:=flags or select_flags[msk]; + end; + Exit(flags); +end; + +{ + * Set the appropriate output bits given a mask of fired events and the + * input bits originally requested. + } +function selsetbits(ibits,obits:pp_fd_mask;idx:Integer;bit:fd_mask;events:Integer):Integer; +var + msk,n:Integer; +begin + n:=0; + For msk:=0 to 2 do + begin + if ((events and select_flags[msk])=0) then + continue; + if (ibits[msk]=nil) then + continue; + if ((ibits[msk][idx] and bit)=0) then + continue; + { + * XXX Check for a duplicate set. This can occur because a + * socket calls selrecord() twice for each poll() call + * resulting in two selfds per real fd. selrescan() will + * call selsetbits twice as a result. + } + if ((obits[msk][idx] and bit)<>0) then + continue; + obits[msk][idx]:=obits[msk][idx] or bit; + Inc(n); + end; + + Exit(n); +end; + +function getselfd_cap(fd:Integer;fpp:pp_file):Integer; +var + fp:p_file; + fp_fromcap:p_file; + error:Integer; +begin + fp:=fget_unlocked(fd); + if (fp=nil) then + Exit(EBADF); + { + * If the file descriptor is for a capability, test rights and use + * the file descriptor references by the capability. + } + error:=cap_funwrap(fp, CAP_POLL_EVENT, @fp_fromcap); + if (error<>0) then + begin + fdrop(fp); + Exit(error); + end; + if (fp<>fp_fromcap) then + begin + fhold(fp_fromcap); + fdrop(fp); + fp:=fp_fromcap; + end; + fpp^:=fp; + Exit(0); +end; + +{ + * Traverse the list of fds attached to this thread's seltd and check for + * completion. + } +function selrescan(td:p_kthread;ibits,obits:pp_fd_mask):Integer; +var + si:p_selinfo; + stp:p_seltd; + sfp:p_selfd; + sfn:p_selfd; + fp:p_file; + bit:fd_mask; + fd,ev,n,idx:Integer; + error:Integer; +begin + stp:=td^.td_sel; + n:=0; + + sfp:=STAILQ_FIRST(@stp^.st_selq); + while (sfp<>nil) do + begin + sfn:=STAILQ_NEXT(sfp,@sfp^.sf_link); + // + fd:=ptrint(sfp^.sf_cookie); + si:=sfp^.sf_si; + selfdfree(stp, sfp); + { If the selinfo wasn't cleared the event didn't fire. } + if (si<>nil) then + continue; + error:=getselfd_cap(fd, @fp); + if (error<>0) then + Exit(error); + idx:=fd div NFDBITS; + bit:=fd_mask(1) shl (fd mod NFDBITS); + ev:=fo_poll(fp, selflags(ibits, idx, bit)); + fdrop(fp); + if (ev<>0) then + Inc(n,selsetbits(ibits, obits, idx, bit, ev)); + // + sfp:=sfn; + end; + stp^.st_flags:=0; + td^.td_retval[0]:=n; + Exit(0); +end; + +{ + * Perform the initial filedescriptor scan and register ourselves with + * each selinfo. + } +function selscan(td:p_kthread;ibits,obits:pp_fd_mask;nfd:Integer):Integer; +var + fp:p_file; + bit:fd_mask; + ev,flags,_end,fd:Integer; + n,idx:Integer; + error:Integer; +begin + n:=0; + idx:=0; + fd:=0; + while (fd < nfd) do + begin + _end:=fd+NFDBITS; + if (_end>nfd) then _end:=nfd; + bit:=1; + while (fd < _end) do + begin + { Compute the list of events we're interested in. } + flags:=selflags(ibits, idx, bit); + if (flags=0) then + continue; + error:=getselfd_cap(fd, @fp); + if (error<>0) then + Exit(error); + selfdalloc(td, Pointer(ptrint(fd))); + ev:=fo_poll(fp, flags); + fdrop(fp); + if (ev<>0) then + Inc(n,selsetbits(ibits, obits, idx, bit, ev)); + // + bit:=bit shl 1; + Inc(fd); + end; + // + Inc(idx); + end; + + td^.td_retval[0]:=n; + Exit(0); +end; + +function sys_poll(fds:p_pollfd;nfds:DWORD;timeout:Integer):Integer; +label + done, + _out; +var + td:p_kthread; + bits:p_pollfd; + smallbits:array[0..31] of t_pollfd; + atv,rtv,ttv,timo:Int64; + error:Integer; + ni:QWORD; +begin + if (nfds > maxfilesperproc) and (nfds > FD_SETSIZE) then + Exit(EINVAL); + td:=curkthread; + ni:=nfds * sizeof(t_pollfd); + if (ni > sizeof(smallbits)) then + bits:=AllocMem(ni) + else + bits:=@smallbits; + error:=copyin(fds, bits, ni); + if (error<>0) then + goto done; + if (timeout<>INFTIM) then + begin + atv:=USEC_TO_UNIT(_msec2usec(timeout)); + rtv:=get_unit_uptime; + atv:=atv+rtv; + end else + begin + atv:=0; + end; + timo:=0; + seltdinit(td); + { Iterate until the timeout expires or descriptors become ready. } + repeat + error:=pollscan(td, bits, nfds); + if (error<>0) or (td^.td_retval[0]<>0) then + break; + if (atv<>0) then + begin + rtv:=get_unit_uptime; + if (rtv>=atv) then + break; + ttv:=atv-rtv; + + if (ttv>24*60*60*hz) then + timo:=24*60*60*hz + else + timo:=tvtohz(ttv); + end; + error:=seltdwait(td, timo); + if (error<>0) then + break; + error:=pollrescan(td); + if (error<>0) or (td^.td_retval[0]<>0) then + break; + until false; + seltdclear(td); + +done: + { poll is not restarted after signals... } + if (error=ERESTART) then + error:=EINTR; + if (error=EWOULDBLOCK) then + error:=0; + if (error=0) then + begin + error:=_pollout(td, bits, fds, nfds); + if (error<>0) then + goto _out; + end; +_out: + if (ni > sizeof(smallbits)) then + FreeMem(bits); + Exit(error); +end; + +function pollrescan(td:p_kthread):Integer; +var + stp:p_seltd; + sfp:p_selfd; + sfn:p_selfd; + si:p_selinfo; + fp:p_file; + fd:p_pollfd; + n:Integer; +begin + n:=0; + stp:=td^.td_sel; + FILEDESC_SLOCK(@fd_table); + sfp:=STAILQ_FIRST(@stp^.st_selq); + while (sfp<>nil) do + begin + sfn:=STAILQ_NEXT(sfp,@sfp^.sf_link); + // + fd:=p_pollfd(sfp^.sf_cookie); + si:=sfp^.sf_si; + selfdfree(stp, sfp); + { If the selinfo wasn't cleared the event didn't fire. } + if (si<>nil) then + continue; + fp:=fget_unlocked(fd^.fd); + if (fp=nil) or (cap_funwrap(fp, CAP_POLL_EVENT, @fp)<>0) then + begin + if (fp<>nil) then + fdrop(fp); + // + fd^.revents:=POLLNVAL; + Inc(n); + continue; + end; + { + * Note: backend also Exits POLLHUP and + * POLLERR if appropriate. + } + fd^.revents:=fo_poll(fp, fd^.events); + if (fd^.revents<>0) then + Inc(n); + // + fdrop(fp); + // + sfp:=sfn; + end; + FILEDESC_SUNLOCK(@fd_table); + stp^.st_flags:=0; + td^.td_retval[0]:=n; + Exit(0); +end; + +function _pollout(td:p_kthread;fds,ufds:p_pollfd;nfd:DWORD):Integer; +var + error:Integer; + i,n:DWORD; +begin + error:=0; + i:=0; + n:=0; + For i:=0 to nfd-1 do + begin + error:=copyout(@fds^.revents, @ufds^.revents, sizeof(ufds^.revents)); + if (error<>0) then + Exit(error); + if (fds^.revents<>0) then + Inc(n); + Inc(fds); + Inc(ufds); + end; + td^.td_retval[0]:=n; + Exit(0); +end; + +function pollscan(td:p_kthread;fds:p_pollfd;nfd:DWORD):Integer; +var + i:Integer; + fp:p_file; + n:Integer; +begin + n:=0; + i:=0; + FILEDESC_SLOCK(@fd_table); + While (i < nfd) do + begin + //if (fds^.fd >= fd_table.fd_nfiles) then + //begin + // fds^.revents:=POLLNVAL; + // Inc(n); + //end else + if (fds^.fd < 0) then + begin + fds^.revents:=0; + end else + begin + fp:=fget_unlocked(fds^.fd); + if ((fp=nil) or (cap_funwrap(fp, CAP_POLL_EVENT, @fp)<>0)) then + begin + fds^.revents:=POLLNVAL; + Inc(n); + end else + begin + { + * Note: backend also Exits POLLHUP and + * POLLERR if appropriate. + } + selfdalloc(td, fds); + fds^.revents:=fo_poll(fp, fds^.events); + { + * POSIX requires POLLOUT to be never + * set simultaneously with POLLHUP. + } + if ((fds^.revents and POLLHUP)<>0) then + fds^.revents:=fds^.revents and (not POLLOUT); + + if (fds^.revents<>0) then + Inc(n); + end; + // + if (fp<>nil) then + fdrop(fp); + end; + // + Inc(i); + Inc(fds); + end; + FILEDESC_SUNLOCK(@fd_table); + td^.td_retval[0]:=n; + Exit(0); +end; + +{ + * Preallocate two selfds associated with 'cookie'. Some fo_poll routines + * have two select sets, one for read and another for write. + } +procedure selfdalloc(td:p_kthread;cookie:Pointer); +var + stp:p_seltd; +begin + stp:=td^.td_sel; + if (stp^.st_free1=nil) then + stp^.st_free1:=AllocMem(SizeOf(t_selfd)); + stp^.st_free1^.sf_td:=stp; + stp^.st_free1^.sf_cookie:=cookie; + if (stp^.st_free2=nil) then + stp^.st_free2:=AllocMem(SizeOf(t_selfd)); + stp^.st_free2^.sf_td:=stp; + stp^.st_free2^.sf_cookie:=cookie; +end; + +procedure selfdfree(stp:p_seltd;sfp:p_selfd); +begin + STAILQ_REMOVE(@stp^.st_selq,sfp,@sfp^.sf_link); + mtx_lock(sfp^.sf_mtx^); + if (sfp^.sf_si<>nil) then + TAILQ_REMOVE(@sfp^.sf_si^.si_tdlist,sfp,@sfp^.sf_threads); + mtx_unlock(sfp^.sf_mtx^); + FreeMem(sfp); +end; + +{ Drain the waiters tied to all the selfd belonging the specified selinfo. } +procedure seldrain(sip:p_selinfo); +begin + { + * This feature is already provided by doselwakeup(), thus it is + * enough to go for it. + * Eventually, the context, should take care to avoid races + * between thread calling select()/poll() and file descriptor + * detaching, but, again, the races are just the same as + * selwakeup(). + } + doselwakeup(sip, -1); +end; + +{ + * Record a select request. + } +procedure selrecord(selector:p_kthread;sip:p_selinfo); +var + sfp:p_selfd; + stp:p_seltd; + mtxp:p_mtx; +begin + stp:=selector^.td_sel; + { + * Don't record when doing a rescan. + } + if ((stp^.st_flags and SELTD_RESCAN)<>0) then + Exit; + { + * Grab one of the preallocated descriptors. + } + sfp:=stp^.st_free1; + if (sfp<>nil) then + stp^.st_free1:=nil + else + begin + sfp:=stp^.st_free2; + if (sfp<>nil) then + stp^.st_free2:=nil + else + begin + Assert(false,'selrecord: No free selfd on selq'); + Exit; + end; + end; + mtxp:=sip^.si_mtx; + if (mtxp=nil) then + mtxp:=mtx_pool_find(mtxpool_select, sip); + { + * Initialize the sfp and queue it in the thread. + } + sfp^.sf_si:=sip; + sfp^.sf_mtx:=mtxp; + STAILQ_INSERT_TAIL(@stp^.st_selq,sfp,@sfp^.sf_link); + { + * Now that we've locked the sip, check for initialization. + } + mtx_lock(mtxp^); + if (sip^.si_mtx=nil) then + begin + sip^.si_mtx:=mtxp; + TAILQ_INIT(@sip^.si_tdlist); + end; + { + * Add this thread to the list of selfds listening on this selinfo. + } + TAILQ_INSERT_TAIL(@sip^.si_tdlist,sfp,@sfp^.sf_threads); + mtx_unlock(sip^.si_mtx^); +end; + +{ Wake up a selecting thread. } + +procedure selwakeup(sip:p_selinfo); +begin + doselwakeup(sip, -1); +end; + +{ Wake up a selecting thread, and set its priority. } +procedure selwakeuppri(sip:p_selinfo;pri:Integer); +begin + doselwakeup(sip, pri); +end; + +{ + * Do a wakeup when a selectable event occurs. + } +procedure doselwakeup(sip:p_selinfo;pri:Integer); +var + sfp:p_selfd; + sfn:p_selfd; + stp:p_seltd; +begin + { If it's not initialized there can't be any waiters. } + if (sip^.si_mtx=nil) then + Exit; + { + * Locking the selinfo locks all selfds associated with it. + } + mtx_lock(sip^.si_mtx^); + sfp:=TAILQ_FIRST(@sip^.si_tdlist); + while (sfp<>nil) do + begin + sfn:=TAILQ_NEXT(sfp,@sfp^.sf_threads); + { + * Once we remove this sfp from the list and clear the + * sf_si seltdclear will know to ignore this si. + } + TAILQ_REMOVE(@sip^.si_tdlist,sfp,@sfp^.sf_threads); + sfp^.sf_si:=nil; + stp:=sfp^.sf_td; + mtx_lock(stp^.st_mtx); + stp^.st_flags:=stp^.st_flags or SELTD_PENDING; + cv_broadcastpri(@stp^.st_wait, pri); + mtx_unlock(stp^.st_mtx); + // + sfp:=sfn; + end; + mtx_unlock(sip^.si_mtx^); +end; + +procedure seltdinit(td:p_kthread); +label + _out; +var + stp:p_seltd; +begin + stp:=td^.td_sel; + if (stp<>nil) then + goto _out; + stp:=AllocMem(sizeof(t_seltd)); + td^.td_sel:=stp; + mtx_init(stp^.st_mtx, 'sellck'); + cv_init(@stp^.st_wait,'select'); +_out: + stp^.st_flags:=0; + STAILQ_INIT(@stp^.st_selq); +end; + +function seltdwait(td:p_kthread;timo:Int64):Integer; +var + stp:p_seltd; + error:Integer; +begin + stp:=td^.td_sel; + { + * An event of interest may occur while we do not hold the seltd + * locked so check the pending flag before we sleep. + } + mtx_lock(stp^.st_mtx); + { + * Any further calls to selrecord will be a rescan. + } + stp^.st_flags:=stp^.st_flags or SELTD_RESCAN; + if ((stp^.st_flags and SELTD_PENDING)<>0) then + begin + mtx_unlock(stp^.st_mtx); + Exit(0); + end; + if (timo > 0) then + error:=_cv_timedwait_sig(@stp^.st_wait, @stp^.st_mtx, timo) + else + error:=_cv_wait_sig(@stp^.st_wait, @stp^.st_mtx); + mtx_unlock(stp^.st_mtx); + + Exit(error); +end; + +procedure seltdfini(td:p_kthread); +var + stp:p_seltd; +begin + stp:=td^.td_sel; + if (stp=nil) then + Exit; + if (stp^.st_free1<>nil) then + FreeMem(stp^.st_free1); + if (stp^.st_free2<>nil) then + FreeMem(stp^.st_free2); + td^.td_sel:=nil; + FreeMem(stp); +end; + +{ + * Remove the references to the thread from all of the objects we were + * polling. + } +procedure seltdclear(td:p_kthread); +var + stp:p_seltd; + sfp:p_selfd; + sfn:p_selfd; +begin + stp:=td^.td_sel; + sfp:=STAILQ_FIRST(@stp^.st_selq); + while (sfp<>nil) do + begin + sfn:=STAILQ_NEXT(sfp,@sfp^.sf_link); + // + selfdfree(stp, sfp); + // + sfp:=sfn; + end; + stp^.st_flags:=0; +end; + +procedure selectinit(); +begin + mtxpool_select:=mtx_pool_create('select mtxpool', 128); +end; + +initialization + selectinit(); + +end. + diff --git a/sys/vfs/vuio.pas b/sys/vfs/vuio.pas index 8ce5ca01..eda389ce 100644 --- a/sys/vfs/vuio.pas +++ b/sys/vfs/vuio.pas @@ -21,6 +21,7 @@ type UIO_NOCOPY // don't copy, already in object ); + pp_uio=^p_uio; p_uio=^t_uio; t_uio=packed record uio_iov :p_iovec; // scatter/gather list diff --git a/sys/vm/vm_map.pas b/sys/vm/vm_map.pas index ad343139..06a5b973 100644 --- a/sys/vm/vm_map.pas +++ b/sys/vm/vm_map.pas @@ -307,7 +307,7 @@ procedure vm_map_zinit(map:vm_map_t); begin map^.nentries:=0; map^.size:=0; - mtx_init(map^.lock); + mtx_init(map^.lock,'vm map (system)'); end; function vmspace_pmap(vm:p_vmspace):pmap_t; inline; @@ -433,7 +433,7 @@ end; procedure vm_map_init(map:vm_map_t;pmap:pmap_t;min,max:vm_offset_t); begin _vm_map_init(map, pmap, min, max); - mtx_init(map^.lock); + mtx_init(map^.lock,'user map'); end; { diff --git a/sys/vm/vm_object.pas b/sys/vm/vm_object.pas index ecc1d1bf..32887309 100644 --- a/sys/vm/vm_object.pas +++ b/sys/vm/vm_object.pas @@ -134,7 +134,7 @@ function vm_object_allocate(t:objtype_t;size:vm_pindex_t):vm_object_t; begin Result:=AllocMem(SizeOf(_vm_object)); - mtx_init(Result^.mtx); + mtx_init(Result^.mtx,'vm_object'); TAILQ_INIT(@Result^.memq);