mirror of https://github.com/red-prig/fpPS4.git
3881 lines
91 KiB
Plaintext
3881 lines
91 KiB
Plaintext
unit vfs_subr;
|
|
|
|
{$mode ObjFPC}{$H+}
|
|
{$CALLING SysV_ABI_CDecl}
|
|
|
|
interface
|
|
|
|
uses
|
|
mqueue,
|
|
vmount,
|
|
kern_param,
|
|
sys_event,
|
|
vfile,
|
|
vstat,
|
|
vnode,
|
|
vnode_if,
|
|
vdirent,
|
|
vfcntl,
|
|
kern_mtx,
|
|
kern_condvar,
|
|
kern_synch,
|
|
time,
|
|
kern_time,
|
|
kern_thr;
|
|
|
|
type
|
|
t_insmntque1_dtr=procedure(v:p_vnode;p:Pointer);
|
|
|
|
function makedev(x,y:Integer):Integer;
|
|
|
|
function vfs_busy(mp:p_mount;flags:Integer):Integer;
|
|
procedure vfs_unbusy(mp:p_mount);
|
|
function vfs_getvfs(fsid:p_fsid):p_mount;
|
|
procedure vfs_getnewfsid(mp:p_mount);
|
|
procedure vfs_timestamp(tsp:p_timespec);
|
|
procedure vattr_null(vap:p_vattr);
|
|
procedure v_incr_usecount(vp:p_vnode);
|
|
|
|
procedure vholdl(vp:p_vnode);
|
|
procedure vdropl(vp:p_vnode);
|
|
procedure vgonel(vp:p_vnode);
|
|
|
|
procedure vhold(vp:p_vnode);
|
|
procedure vdrop(vp:p_vnode);
|
|
function vrecycle(vp:p_vnode):Integer;
|
|
procedure vgone(vp:p_vnode);
|
|
|
|
function vget(vp:p_vnode;flags:Integer):Integer;
|
|
procedure vref(vp:p_vnode);
|
|
function vrefcnt(vp:p_vnode):Integer;
|
|
procedure vrele(vp:p_vnode);
|
|
procedure vput(vp:p_vnode);
|
|
procedure vunref(vp:p_vnode);
|
|
|
|
procedure vinactive(vp:p_vnode);
|
|
function vflush(mp:p_mount;rootrefs,flags:Integer):Integer;
|
|
|
|
procedure assert_vi_locked (vp:p_vnode;str:PChar);
|
|
procedure assert_vi_unlocked (vp:p_vnode;str:PChar);
|
|
procedure assert_vop_locked (vp:p_vnode;str:PChar);
|
|
procedure assert_vop_unlocked(vp:p_vnode;str:PChar);
|
|
procedure assert_vop_elocked (vp:p_vnode;str:PChar);
|
|
|
|
function VOP_WRITE_PRE(ap:p_vop_write_args;var osize,ooffset:Int64):Integer;
|
|
procedure VOP_WRITE_POST(ap:p_vop_write_args;ret:Integer;var osize,ooffset:Int64);
|
|
procedure vop_rename_fail(ap:p_vop_rename_args);
|
|
procedure vop_rename_pre(ap:p_vop_rename_args);
|
|
procedure vop_create_post(ap:p_vop_create_args;rc:Integer);
|
|
procedure vop_link_post(ap:p_vop_link_args;rc:Integer);
|
|
procedure vop_mkdir_post(ap:p_vop_mkdir_args;rc:Integer);
|
|
procedure vop_mknod_post(ap:p_vop_mknod_args;rc:Integer);
|
|
procedure vop_remove_post(ap:p_vop_remove_args;rc:Integer);
|
|
procedure vop_rename_post(ap:p_vop_rename_args;rc:Integer);
|
|
procedure vop_rmdir_post(ap:p_vop_rmdir_args;rc:Integer);
|
|
procedure vop_setattr_post(ap:p_vop_setattr_args;rc:Integer);
|
|
procedure vop_symlink_post(ap:p_vop_symlink_args;rc:Integer);
|
|
|
|
procedure vfs_event_init(); //SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL);
|
|
procedure vfs_event_signal(fsid:p_fsid;event:DWORD;data:ptrint);
|
|
function vfs_kqfilter(ap:p_vop_kqfilter_args):Integer;
|
|
|
|
function vfs_read_dirent(ap:p_vop_readdir_args;dp:p_dirent;off:QWORD):Integer;
|
|
procedure vfs_mark_atime(vp:p_vnode);
|
|
function vfs_unixify_accmode(accmode:p_accmode_t):Integer;
|
|
|
|
function vcount(vp:p_vnode):Integer;
|
|
function count_dev(dev:Pointer):Integer; //cdev
|
|
|
|
procedure vfs_msync(mp:p_mount;flags:Integer);
|
|
|
|
procedure destroy_vpollinfo_free(vi:p_vpollinfo);
|
|
procedure destroy_vpollinfo(vi:p_vpollinfo);
|
|
procedure v_addpollinfo(vp:p_vnode);
|
|
function vn_pollrecord(vp:p_vnode;events:Integer):Integer;
|
|
|
|
function vn_isdisk(vp:p_vnode;errp:PInteger):Boolean;
|
|
|
|
function vaccess(_type:vtype;
|
|
file_mode:mode_t;
|
|
file_uid:uid_t;
|
|
file_gid:gid_t;
|
|
accmode:accmode_t;
|
|
privused:PInteger):Integer;
|
|
|
|
function getnewvnode(tag:PChar;mp:p_mount;vops:p_vop_vector;vpp:pp_vnode):Integer;
|
|
|
|
procedure insmntque_stddtr(vp:p_vnode;dtr_arg:Pointer);
|
|
function insmntque1(vp:p_vnode;mp:p_mount;dtr:t_insmntque1_dtr;dtr_arg:Pointer):Integer;
|
|
function insmntque(vp:p_vnode;mp:p_mount):Integer;
|
|
|
|
function vinvalbuf(vp:p_vnode;flags,slpflag,slptimeo:Integer):Integer;
|
|
|
|
function __mnt_vnode_next_all(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
function __mnt_vnode_first_all(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
procedure __mnt_vnode_markerfree_all(mvp:pp_vnode;mp:p_mount);
|
|
|
|
procedure mnt_vnode_markerfree_active(mvp:pp_vnode;mp:p_mount);
|
|
function mnt_vnode_next_active(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
|
|
function __mnt_vnode_next_active(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
function __mnt_vnode_first_active(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
procedure __mnt_vnode_markerfree_active(mvp:pp_vnode;mp:p_mount);
|
|
|
|
procedure vntblinit; //SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);
|
|
procedure vnlru_proc(); //SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, @vnlru_kp);
|
|
|
|
function filt_fsattach(kn:p_knote):Integer;
|
|
procedure filt_fsdetach(kn:p_knote);
|
|
function filt_fsevent(kn:p_knote;hint:QWORD):Integer;
|
|
|
|
const
|
|
fs_filtops:t_filterops=(
|
|
f_isfd :0;
|
|
f_attach:@filt_fsattach;
|
|
f_detach:@filt_fsdetach;
|
|
f_event :@filt_fsevent;
|
|
);
|
|
|
|
{
|
|
* List of vnodes that are ready for recycling.
|
|
}
|
|
var
|
|
numvnodes:QWORD=0;
|
|
|
|
vnode_free_list:TAILQ_HEAD=(tqh_first:nil;tqh_last:@vnode_free_list.tqh_first); //vnode
|
|
|
|
mntid_mtx:mtx;
|
|
vnode_free_list_mtx:mtx;
|
|
|
|
syncer_delayno:Integer;
|
|
syncer_mask:QWORD;
|
|
|
|
//LIST_HEAD(synclist, bufobj);
|
|
//static struct synclist *syncer_workitem_pending[2];
|
|
|
|
sync_mtx:mtx;
|
|
sync_wakeup:t_cv;
|
|
|
|
syncer_maxdelay:Integer=32;
|
|
syncdelay:Integer=30;
|
|
filedelay:Integer=30;
|
|
dirdelay:Integer=29;
|
|
metadelay:Integer=28;
|
|
|
|
rushjob:Integer;
|
|
stat_rush_requests:Integer;
|
|
|
|
fs_knlist:t_knlist;
|
|
|
|
const
|
|
SYNCER_SHUTDOWN_SPEEDUP=4;
|
|
|
|
var
|
|
sync_vnode_count:Integer;
|
|
syncer_worklist_len:Integer;
|
|
|
|
type
|
|
syncer_state=(SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY);
|
|
|
|
var
|
|
desiredvnodes :Integer=0;
|
|
wantfreevnodes:Integer=0;
|
|
freevnodes :Integer=0;
|
|
vnlru_nowhere :Integer=0;
|
|
|
|
implementation
|
|
|
|
uses
|
|
errno,
|
|
vfs_vnops,
|
|
subr_uio,
|
|
sys_vm_object,
|
|
vsys_generic,
|
|
kern_rangelock,
|
|
rtprio,
|
|
sys_conf;
|
|
|
|
//
|
|
|
|
var
|
|
dead_vnodeops:vop_vector; external;
|
|
|
|
//
|
|
|
|
{
|
|
* Macros to control when a vnode is freed and recycled. All require
|
|
* the vnode interlock.
|
|
}
|
|
function VCANRECYCLE(vp:p_vnode):Boolean; inline;
|
|
begin
|
|
Result:=((vp^.v_iflag and VI_FREE)<>0) and (vp^.v_holdcnt=0);
|
|
end;
|
|
|
|
function VSHOULDFREE(vp:p_vnode):Boolean; inline;
|
|
begin
|
|
Result:=((vp^.v_iflag and VI_FREE)=0) and (vp^.v_holdcnt=0);
|
|
end;
|
|
|
|
function VSHOULDBUSY(vp:p_vnode):Boolean; inline;
|
|
begin
|
|
Result:=((vp^.v_iflag and VI_FREE)<>0) and (vp^.v_holdcnt<>0);
|
|
end;
|
|
|
|
|
|
var
|
|
{ Shift count for (uintptr_t)vp to initialize vp^.v_hash. }
|
|
vnsz2log:Integer;
|
|
|
|
{
|
|
* Initialize the vnode management data structures.
|
|
*
|
|
* Reevaluate the following cap on the number of vnodes after the physical
|
|
* memory size exceeds 512GB. In the limit, as the physical memory size
|
|
* grows, the ratio of physical pages to vnodes approaches sixteen to one.
|
|
}
|
|
const
|
|
MAXVNODES_MAX=(512 * (1024 * 1024 * 1024 div (16*1024) div 16));
|
|
v_page_count=524288;
|
|
|
|
procedure vntblinit;
|
|
var
|
|
i:DWORD;
|
|
begin
|
|
desiredvnodes:=10000;
|
|
if (desiredvnodes > MAXVNODES_MAX) then
|
|
begin
|
|
desiredvnodes:=MAXVNODES_MAX;
|
|
end;
|
|
wantfreevnodes:=desiredvnodes div 4;
|
|
|
|
mtx_init(mntid_mtx,'mntid');
|
|
mtx_init(vnode_free_list_mtx,'vnode_free_list');
|
|
{
|
|
* Initialize the filesystem syncer.
|
|
}
|
|
//syncer_workitem_pending[WI_MPSAFEQ]:=hashinit(syncer_maxdelay, M_VNODE,&syncer_mask);
|
|
//syncer_workitem_pending[WI_GIANTQ]:=hashinit(syncer_maxdelay, M_VNODE,&syncer_mask);
|
|
syncer_maxdelay:=syncer_mask + 1;
|
|
mtx_init(sync_mtx,'Syncer mtx');
|
|
cv_init(@sync_wakeup,'syncer');
|
|
|
|
i:=1;
|
|
While (i<=sizeof(t_vnode)) do
|
|
begin
|
|
Inc(vnsz2log);
|
|
i:=i shl 1;
|
|
end;
|
|
Dec(vnsz2log);
|
|
end;
|
|
|
|
function vfs_busy(mp:p_mount;flags:Integer):Integer;
|
|
begin
|
|
MNT_ILOCK(mp);
|
|
MNT_REF(mp);
|
|
|
|
while ((mp^.mnt_kern_flag and MNTK_UNMOUNT)<>0) do
|
|
begin
|
|
if ((flags and MBF_NOWAIT)<>0) or ((mp^.mnt_kern_flag and MNTK_REFEXPIRE)<>0) then
|
|
begin
|
|
MNT_REL(mp);
|
|
MNT_IUNLOCK(mp);
|
|
Exit(ENOENT);
|
|
end;
|
|
if ((flags and MBF_MNTLSTLOCK)<>0) then
|
|
begin
|
|
mtx_unlock(mountlist_mtx);
|
|
end;
|
|
mp^.mnt_kern_flag:=mp^.mnt_kern_flag or MNTK_MWAIT;
|
|
msleep(mp, MNT_MTX(mp), PVFS or PDROP,'vfs_busy', 0);
|
|
if ((flags and MBF_MNTLSTLOCK)<>0) then
|
|
begin
|
|
mtx_lock(mountlist_mtx);
|
|
end;
|
|
MNT_ILOCK(mp);
|
|
end;
|
|
|
|
if ((flags and MBF_MNTLSTLOCK)<>0) then
|
|
begin
|
|
mtx_unlock(mountlist_mtx);
|
|
end;
|
|
|
|
Inc(mp^.mnt_lockref);
|
|
MNT_IUNLOCK(mp);
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* Free a busy filesystem.
|
|
}
|
|
procedure vfs_unbusy(mp:p_mount);
|
|
begin
|
|
MNT_ILOCK(mp);
|
|
MNT_REL(mp);
|
|
Assert(mp^.mnt_lockref>0,'negative mnt_lockref');
|
|
Dec(mp^.mnt_lockref);
|
|
if (mp^.mnt_lockref=0) and ((mp^.mnt_kern_flag and MNTK_DRAINING)<>0) then
|
|
begin
|
|
mp^.mnt_kern_flag:=mp^.mnt_kern_flag and (not MNTK_DRAINING);
|
|
wakeup(@mp^.mnt_lockref);
|
|
end;
|
|
MNT_IUNLOCK(mp);
|
|
end;
|
|
|
|
{
|
|
* Lookup a mount point by filesystem identifier.
|
|
}
|
|
function vfs_getvfs(fsid:p_fsid):p_mount;
|
|
var
|
|
mp:p_mount;
|
|
begin
|
|
mtx_lock(mountlist_mtx);
|
|
|
|
mp:=TAILQ_FIRST(@mountlist);
|
|
while (mp<>nil) do
|
|
begin
|
|
if (mp^.mnt_stat.f_fsid.val[0]=fsid^.val[0]) and
|
|
(mp^.mnt_stat.f_fsid.val[1]=fsid^.val[1]) then
|
|
begin
|
|
MNT_REF(mp);
|
|
mtx_unlock(mountlist_mtx);
|
|
Exit(mp);
|
|
end;
|
|
mp:=TAILQ_NEXT(mp,@mp^.mnt_list);
|
|
end;
|
|
mtx_unlock(mountlist_mtx);
|
|
Exit(nil);
|
|
end;
|
|
|
|
function makedev(x,y:Integer):Integer; inline;
|
|
begin
|
|
Result:=(x shl 8) or y;
|
|
end;
|
|
|
|
procedure vfs_getnewfsid(mp:p_mount);
|
|
var
|
|
mntid_base:Word;
|
|
nmp:p_mount;
|
|
tfsid:fsid_t;
|
|
mtype:Integer;
|
|
begin
|
|
mtx_lock(mntid_mtx);
|
|
mtype:=mp^.mnt_vfc^.vfc_typenum;
|
|
tfsid.val[1]:=mtype;
|
|
mntid_base:=0;
|
|
mtype:=(mtype and $FF) shl 24;
|
|
repeat
|
|
tfsid.val[0]:=makedev(255,mtype or ((mntid_base and $FF00) shl 8) or (mntid_base and $FF));
|
|
Inc(mntid_base);
|
|
nmp:=vfs_getvfs(@tfsid);
|
|
if (nmp=nil) then break;
|
|
MNT_REL(nmp);
|
|
until false;
|
|
mp^.mnt_stat.f_fsid.val[0]:=tfsid.val[0];
|
|
mp^.mnt_stat.f_fsid.val[1]:=tfsid.val[1];
|
|
mtx_unlock(mntid_mtx);
|
|
end;
|
|
|
|
{
|
|
* Get a current timestamp.
|
|
}
|
|
procedure vfs_timestamp(tsp:p_timespec);
|
|
begin
|
|
getnanotime(tsp);
|
|
end;
|
|
|
|
{
|
|
* Set vnode attributes to VNOVAL
|
|
}
|
|
procedure vattr_null(vap:p_vattr);
|
|
begin
|
|
vap^.va_type :=VNON;
|
|
vap^.va_size :=VNOVAL;
|
|
vap^.va_bytes :=VNOVAL;
|
|
vap^.va_mode :=VNOVAL;
|
|
vap^.va_nlink :=VNOVAL;
|
|
vap^.va_uid :=VNOVAL;
|
|
vap^.va_gid :=VNOVAL;
|
|
vap^.va_fsid :=VNOVAL;
|
|
vap^.va_fileid :=VNOVAL;
|
|
vap^.va_blocksize :=VNOVAL;
|
|
vap^.va_rdev :=VNOVAL;
|
|
vap^.va_atime.tv_sec :=VNOVAL;
|
|
vap^.va_atime.tv_nsec :=VNOVAL;
|
|
vap^.va_mtime.tv_sec :=VNOVAL;
|
|
vap^.va_mtime.tv_nsec :=VNOVAL;
|
|
vap^.va_ctime.tv_sec :=VNOVAL;
|
|
vap^.va_ctime.tv_nsec :=VNOVAL;
|
|
vap^.va_birthtime.tv_sec :=VNOVAL;
|
|
vap^.va_birthtime.tv_nsec:=VNOVAL;
|
|
vap^.va_flags :=VNOVAL;
|
|
vap^.va_gen :=VNOVAL;
|
|
vap^.va_vaflags :=0;
|
|
end;
|
|
|
|
function vlrureclaim(mp:p_mount):Integer;
|
|
label
|
|
next_iter,
|
|
next_iter_mntunlocked,
|
|
yield,
|
|
relock_mnt;
|
|
var
|
|
vp:p_vnode;
|
|
done :Integer;
|
|
trigger :Integer;
|
|
usevnodes:Integer;
|
|
count :Integer;
|
|
begin
|
|
usevnodes:=desiredvnodes;
|
|
if (usevnodes <= 0) then usevnodes:=1;
|
|
trigger:=v_page_count * 2 div usevnodes;
|
|
done:=0;
|
|
vn_start_write(nil, @mp, V_WAIT);
|
|
MNT_ILOCK(mp);
|
|
count:=mp^.mnt_nvnodelistsize div 10 + 1;
|
|
|
|
while (count<>0) do
|
|
begin
|
|
vp:=TAILQ_FIRST(@mp^.mnt_nvnodelist);
|
|
while (vp<>nil) do
|
|
begin
|
|
if (vp^.v_type<>VMARKER) then Break;
|
|
vp:=TAILQ_NEXT(vp,@vp^.v_nmntvnodes);
|
|
end;
|
|
|
|
if (vp=nil) then break;
|
|
|
|
TAILQ_REMOVE (@mp^.mnt_nvnodelist,vp,@vp^.v_nmntvnodes);
|
|
TAILQ_INSERT_TAIL(@mp^.mnt_nvnodelist,vp,@vp^.v_nmntvnodes);
|
|
Dec(count);
|
|
|
|
if (not VI_TRYLOCK(vp)) then
|
|
begin
|
|
goto next_iter;
|
|
end;
|
|
|
|
{
|
|
* If it's been deconstructed already, it's still
|
|
* referenced, or it exceeds the trigger, skip it.
|
|
}
|
|
if (vp^.v_usecount<>0) or
|
|
{((vlru_allow_cache_src=0) and (not LIST_EMPTY(@vp^.v_cache_src))) or}
|
|
((vp^.v_iflag and VI_DOOMED)<>0) {or
|
|
((vp^.v_object<>nil) and (vp^.v_object^.resident_page_count > trigger))} then
|
|
begin
|
|
VI_UNLOCK(vp);
|
|
goto next_iter;
|
|
end;
|
|
|
|
MNT_IUNLOCK(mp);
|
|
vholdl(vp);
|
|
if (VOP_LOCK(vp, LK_INTERLOCK or LK_EXCLUSIVE or LK_NOWAIT,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%})<>0) then
|
|
begin
|
|
vdrop(vp);
|
|
goto next_iter_mntunlocked;
|
|
end;
|
|
VI_LOCK(vp);
|
|
{
|
|
* v_usecount may have been bumped after VOP_LOCK() dropped
|
|
* the vnode interlock and before it was locked again.
|
|
*
|
|
* It is not necessary to recheck VI_DOOMED because it can
|
|
* only be set by another thread that holds both the vnode
|
|
* lock and vnode interlock. If another thread has the
|
|
* vnode lock before we get to VOP_LOCK() and obtains the
|
|
* vnode interlock after VOP_LOCK() drops the vnode
|
|
* interlock, the other thread will be unable to drop the
|
|
* vnode lock before our VOP_LOCK() call fails.
|
|
}
|
|
if (vp^.v_usecount<>0) {or
|
|
((vlru_allow_cache_src=0) and (not LIST_EMPTY(@vp^.v_cache_src))) or
|
|
((vp^.v_object<>nil) and (vp^.v_object^.resident_page_count > trigger))} then
|
|
begin
|
|
VOP_UNLOCK(vp, LK_INTERLOCK);
|
|
goto next_iter_mntunlocked;
|
|
end;
|
|
|
|
Assert((vp^.v_iflag and VI_DOOMED)=0,'VI_DOOMED unexpectedly detected in vlrureclaim()');
|
|
//atomic_add_long(@recycles_count, 1);
|
|
vgonel(vp);
|
|
VOP_UNLOCK(vp, 0);
|
|
vdropl(vp);
|
|
Inc(done);
|
|
next_iter_mntunlocked:
|
|
//if (not should_yield()) then
|
|
goto relock_mnt;
|
|
//goto yield;
|
|
next_iter:
|
|
//if (not should_yield()) then
|
|
continue;
|
|
MNT_IUNLOCK(mp);
|
|
yield:
|
|
kern_yield(PRI_UNCHANGED);
|
|
relock_mnt:
|
|
MNT_ILOCK(mp);
|
|
end;
|
|
MNT_IUNLOCK(mp);
|
|
vn_finished_write(mp);
|
|
Exit(done);
|
|
end;
|
|
|
|
function vtryrecycle(vp:p_vnode):Integer; forward;
|
|
|
|
procedure vnlru_free(count:Integer);
|
|
var
|
|
vp:p_vnode;
|
|
vfslocked:Integer;
|
|
begin
|
|
mtx_assert(vnode_free_list_mtx);
|
|
For count:=count downto 0 do
|
|
begin
|
|
vp:=TAILQ_FIRST(@vnode_free_list);
|
|
{
|
|
* The list can be modified while the free_list_mtx
|
|
* has been dropped and vp could be nil here.
|
|
}
|
|
if (vp=nil) then
|
|
break;
|
|
|
|
Assert(vp^.v_op<>nil,'vnlru_free: vnode already reclaimed.');
|
|
Assert((vp^.v_iflag and VI_FREE)<>0,'Removing vnode not on freelist');
|
|
Assert((vp^.v_iflag and VI_ACTIVE)=0,'Mangling active vnode');
|
|
|
|
TAILQ_REMOVE(@vnode_free_list,vp,@vp^.v_actfreelist);
|
|
{
|
|
* Don't recycle if we can't get the interlock.
|
|
}
|
|
if (not VI_TRYLOCK(vp)) then
|
|
begin
|
|
TAILQ_INSERT_TAIL(@vnode_free_list,vp,@vp^.v_actfreelist);
|
|
continue;
|
|
end;
|
|
Assert(VCANRECYCLE(vp),'vp inconsistent on freelist');
|
|
Dec(freevnodes);
|
|
vp^.v_iflag:=vp^.v_iflag and (not VI_FREE);
|
|
vholdl(vp);
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
VI_UNLOCK(vp);
|
|
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
|
|
vtryrecycle(vp);
|
|
VFS_UNLOCK_GIANT(vfslocked);
|
|
{
|
|
* If the recycled succeeded this vdrop will actually free
|
|
* the vnode. If not it will simply place it back on
|
|
* the free list.
|
|
}
|
|
vdrop(vp);
|
|
mtx_lock(vnode_free_list_mtx);
|
|
end;
|
|
end;
|
|
|
|
//static struct proc *vnlruproc;
|
|
var
|
|
vnlruproc_sig:Integer=0;
|
|
|
|
procedure vnlru_proc();
|
|
var
|
|
mp,nmp:p_mount;
|
|
done,vfslocked:Integer;
|
|
begin
|
|
//EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, p, SHUTDOWN_PRI_FIRST);
|
|
|
|
//kproc_suspend_check(p);
|
|
mtx_lock(vnode_free_list_mtx);
|
|
if (freevnodes > wantfreevnodes) then
|
|
begin
|
|
vnlru_free(freevnodes - wantfreevnodes);
|
|
end;
|
|
if (numvnodes <= desiredvnodes * 9 div 10) then
|
|
begin
|
|
vnlruproc_sig:=0;
|
|
wakeup(@vnlruproc_sig);
|
|
//
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
Exit;
|
|
end;
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
|
|
done:=0;
|
|
mtx_lock(mountlist_mtx);
|
|
mp:=TAILQ_FIRST(@mountlist);
|
|
While (mp<>nil) do
|
|
begin
|
|
if (vfs_busy(mp, MBF_NOWAIT or MBF_MNTLSTLOCK)<>0) then
|
|
begin
|
|
nmp:=TAILQ_NEXT(mp,@mp^.mnt_list);
|
|
continue;
|
|
end;
|
|
vfslocked:=VFS_LOCK_GIANT(mp);
|
|
Inc(done,vlrureclaim(mp));
|
|
VFS_UNLOCK_GIANT(vfslocked);
|
|
mtx_lock(mountlist_mtx);
|
|
nmp:=TAILQ_NEXT(mp,@mp^.mnt_list);
|
|
vfs_unbusy(mp);
|
|
//
|
|
mp:=nmp
|
|
end;
|
|
mtx_unlock(mountlist_mtx);
|
|
|
|
if (done=0) then
|
|
begin
|
|
Inc(vnlru_nowhere);
|
|
end;
|
|
|
|
end;
|
|
|
|
function vtryrecycle(vp:p_vnode):Integer;
|
|
var
|
|
vnmp:p_mount;
|
|
begin
|
|
Assert(vp^.v_holdcnt<>0,'vtryrecycle: Recycling vp %p without a reference.');
|
|
{
|
|
* This vnode may found and locked via some other list, if so we
|
|
* can't recycle it yet.
|
|
}
|
|
if (VOP_LOCK(vp, LK_EXCLUSIVE or LK_NOWAIT,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%})<>0) then
|
|
begin
|
|
Exit(EWOULDBLOCK);
|
|
end;
|
|
{
|
|
* Don't recycle if its filesystem is being suspended.
|
|
}
|
|
if (vn_start_write(vp, @vnmp, V_NOWAIT)<>0) then
|
|
begin
|
|
VOP_UNLOCK(vp, 0);
|
|
Exit(EBUSY);
|
|
end;
|
|
{
|
|
* If we got this far, we need to acquire the interlock and see if
|
|
* anyone picked up this vnode from another list. If not, we will
|
|
* mark it with DOOMED via vgonel() so that anyone who does find it
|
|
* will skip over it.
|
|
}
|
|
VI_LOCK(vp);
|
|
if (vp^.v_usecount<>0) then
|
|
begin
|
|
VOP_UNLOCK(vp, LK_INTERLOCK);
|
|
vn_finished_write(vnmp);
|
|
Exit(EBUSY);
|
|
end;
|
|
if ((vp^.v_iflag and VI_DOOMED)=0) then
|
|
begin
|
|
//atomic_add_long(@recycles_count, 1);
|
|
vgonel(vp);
|
|
end;
|
|
VOP_UNLOCK(vp, LK_INTERLOCK);
|
|
vn_finished_write(vnmp);
|
|
Exit(0);
|
|
end;
|
|
|
|
function getnewvnode_wait(suspended:Integer):Integer;
|
|
begin
|
|
mtx_assert(vnode_free_list_mtx);
|
|
|
|
if (curkthread<>nil) and (numvnodes > desiredvnodes) then
|
|
begin
|
|
if (suspended<>0) then
|
|
begin
|
|
{
|
|
* File system is beeing suspended, we cannot risk a
|
|
* deadlock here, so allocate new vnode anyway.
|
|
}
|
|
if (freevnodes > wantfreevnodes) then
|
|
begin
|
|
vnlru_free(freevnodes - wantfreevnodes);
|
|
end;
|
|
Exit(0);
|
|
end;
|
|
if (vnlruproc_sig=0) then
|
|
begin
|
|
vnlruproc_sig:=1; { avoid unnecessary wakeups }
|
|
wakeup(@vnlruproc_sig);
|
|
end;
|
|
msleep(@vnlruproc_sig,@vnode_free_list_mtx,PVFS,'vlruwk', hz);
|
|
end;
|
|
|
|
if (numvnodes>desiredvnodes) then
|
|
Exit(ENFILE)
|
|
else
|
|
Exit(0);
|
|
end;
|
|
|
|
procedure getnewvnode_reserve(count:DWORD);
|
|
var
|
|
td:p_kthread;
|
|
begin
|
|
td:=curkthread;
|
|
{ First try to be quick and racy. }
|
|
if (System.InterlockedExchangeAdd64(numvnodes,count) + count <= desiredvnodes) then
|
|
begin
|
|
Inc(td^.td_vp_reserv,count);
|
|
Exit;
|
|
end else
|
|
begin
|
|
System.InterlockedExchangeAdd64(numvnodes, -count);
|
|
end;
|
|
|
|
mtx_lock(vnode_free_list_mtx);
|
|
while (count > 0) do
|
|
begin
|
|
if (getnewvnode_wait(0)=0) then
|
|
begin
|
|
Dec(count);
|
|
Inc(td^.td_vp_reserv);
|
|
System.InterlockedIncrement64(numvnodes);
|
|
end;
|
|
end;
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
end;
|
|
|
|
procedure getnewvnode_drop_reserve();
|
|
var
|
|
td:p_kthread;
|
|
begin
|
|
td:=curkthread;
|
|
System.InterlockedExchangeAdd64(numvnodes,-td^.td_vp_reserv);
|
|
td^.td_vp_reserv:=0;
|
|
end;
|
|
|
|
{
|
|
* Return the next vnode from the free list.
|
|
}
|
|
function getnewvnode(tag:PChar;mp:p_mount;vops:p_vop_vector;vpp:pp_vnode):Integer;
|
|
label
|
|
alloc;
|
|
var
|
|
td:p_kthread;
|
|
vp:p_vnode;
|
|
//struct bufobj *bo;
|
|
error,susp:Integer;
|
|
begin
|
|
vp:=nil;
|
|
td:=curkthread;
|
|
if (td<>nil) then
|
|
begin
|
|
if (td^.td_vp_reserv>0) then
|
|
begin
|
|
Dec(td^.td_vp_reserv,1);
|
|
goto alloc;
|
|
end;
|
|
end;
|
|
mtx_lock(vnode_free_list_mtx);
|
|
{
|
|
* Lend our context to reclaim vnodes if they've exceeded the max.
|
|
}
|
|
if (freevnodes > wantfreevnodes) then
|
|
begin
|
|
vnlru_free(1);
|
|
end;
|
|
|
|
susp:=ord(False);
|
|
if (mp<>nil) then
|
|
begin
|
|
susp:=ord(((mp^.mnt_kern_flag and MNTK_SUSPEND)<>0));
|
|
end;
|
|
error:=getnewvnode_wait(susp);
|
|
|
|
System.InterlockedIncrement64(numvnodes);
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
alloc:
|
|
//atomic_add_long(@vnodes_created, 1);
|
|
vp:=AllocMem(SizeOf(t_vnode));
|
|
{
|
|
* Setup locks.
|
|
}
|
|
vp^.v_vnlock:=@vp^.v_lock;
|
|
mtx_init(vp^.v_interlock,'vnode interlock');
|
|
{
|
|
* By default, don't allow shared locks unless filesystems
|
|
* opt-in.
|
|
}
|
|
mtx_init(vp^.v_vnlock^,'PVFS');
|
|
//lockinit(vp^.v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOSHARE);
|
|
{
|
|
* Initialize bufobj.
|
|
}
|
|
//bo:=@vp^.v_bufobj;
|
|
//bo^.__bo_vnode:=vp;
|
|
//mtx_init(BO_MTX(bo), "bufobj interlock", nil, MTX_DEF);
|
|
//bo^.bo_ops:=@buf_ops_bio;
|
|
//bo^.bo_private:=vp;
|
|
//TAILQ_INIT(@bo^.bo_clean.bv_hd);
|
|
//TAILQ_INIT(@bo^.bo_dirty.bv_hd);
|
|
{
|
|
* Initialize namecache.
|
|
}
|
|
//LIST_INIT(@vp^.v_cache_src);
|
|
//TAILQ_INIT(@vp^.v_cache_dst);
|
|
{
|
|
* Finalize various vnode identity bits.
|
|
}
|
|
vp^.v_type:=VNON;
|
|
vp^.v_tag:=tag;
|
|
vp^.v_op:=vops;
|
|
v_incr_usecount(vp);
|
|
vp^.v_data:=nil;
|
|
|
|
//mac_vnode_init(vp);
|
|
//if (mp<>nil and (mp^.mnt_flag and MNT_MULTILABEL)=0)
|
|
// mac_vnode_associate_singlelabel(mp, vp);
|
|
//else if (mp=nil and vops<>@dead_vnodeops)
|
|
// printf'nil mp in getnewvnode()\n';
|
|
|
|
if (mp<>nil) then
|
|
begin
|
|
//bo^.bo_bsize:=mp^.mnt_stat.f_iosize;
|
|
if ((mp^.mnt_kern_flag and MNTK_NOKNOTE)<>0) then
|
|
begin
|
|
vp^.v_vflag:=vp^.v_vflag or VV_NOKNOTE;
|
|
end;
|
|
end;
|
|
rangelock_init(@vp^.v_rl);
|
|
|
|
{
|
|
* For the filesystems which do not use vfs_hash_insert(),
|
|
* still initialize v_hash to have vfs_hash_index() useful.
|
|
* E.g., nilfs uses vfs_hash_index() on the lower vnode for
|
|
* its own hashing.
|
|
}
|
|
vp^.v_hash:=ptruint(vp) shr vnsz2log;
|
|
|
|
vpp^:=vp;
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* Delete from old mount point vnode list, if on one.
|
|
}
|
|
procedure delmntque(vp:p_vnode);
|
|
var
|
|
mp:p_mount;
|
|
active:Integer;
|
|
begin
|
|
mp:=vp^.v_mount;
|
|
if (mp=nil) then Exit;
|
|
MNT_ILOCK(mp);
|
|
VI_LOCK(vp);
|
|
Assert(mp^.mnt_activevnodelistsize <= mp^.mnt_nvnodelistsize,
|
|
'Active vnode list size %d > Vnode list size %d');
|
|
active:=vp^.v_iflag and VI_ACTIVE;
|
|
vp^.v_iflag:=vp^.v_iflag and (not VI_ACTIVE);
|
|
if (active<>0) then
|
|
begin
|
|
mtx_lock(vnode_free_list_mtx);
|
|
TAILQ_REMOVE(@mp^.mnt_activevnodelist,vp,@vp^.v_actfreelist);
|
|
Dec(mp^.mnt_activevnodelistsize);
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
end;
|
|
vp^.v_mount:=nil;
|
|
VI_UNLOCK(vp);
|
|
Assert(mp^.mnt_nvnodelistsize > 0,'bad mount point vnode list size');
|
|
TAILQ_REMOVE(@mp^.mnt_nvnodelist,vp,@vp^.v_nmntvnodes);
|
|
Dec(mp^.mnt_nvnodelistsize);
|
|
MNT_REL(mp);
|
|
MNT_IUNLOCK(mp);
|
|
end;
|
|
|
|
procedure insmntque_stddtr(vp:p_vnode;dtr_arg:Pointer);
|
|
begin
|
|
vp^.v_data:=nil;
|
|
vp^.v_op:=@dead_vnodeops;
|
|
{ XXX non mp-safe fs may still call insmntque with vnode
|
|
unlocked }
|
|
if (VOP_ISLOCKED(vp)=0) then
|
|
begin
|
|
vn_lock(vp, LK_EXCLUSIVE or LK_RETRY,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
|
|
end;
|
|
vgone(vp);
|
|
vput(vp);
|
|
end;
|
|
|
|
{
|
|
* Insert into list of vnodes for the new mount point, if available.
|
|
}
|
|
function insmntque1(vp:p_vnode;mp:p_mount;dtr:t_insmntque1_dtr;dtr_arg:Pointer):Integer;
|
|
var
|
|
locked:Integer;
|
|
begin
|
|
Assert(vp^.v_mount=nil,'insmntque: vnode already on per mount vnode list');
|
|
Assert(mp<>nil, 'Dont call insmntque(foo, nil)');
|
|
|
|
{
|
|
* We acquire the vnode interlock early to ensure that the
|
|
* vnode cannot be recycled by another process releasing a
|
|
* holdcnt on it before we get it on both the vnode list
|
|
* and the active vnode list. The mount mutex protects only
|
|
* manipulation of the vnode list and the vnode freelist
|
|
* mutex protects only manipulation of the active vnode list.
|
|
* Hence the need to hold the vnode interlock throughout.
|
|
}
|
|
MNT_ILOCK(mp);
|
|
VI_LOCK(vp);
|
|
if ((mp^.mnt_kern_flag and MNTK_NOINSMNTQ)<>0) and
|
|
(((mp^.mnt_kern_flag and MNTK_UNMOUNTF)<>0) or
|
|
(mp^.mnt_nvnodelistsize=0)) then
|
|
begin
|
|
locked:=VOP_ISLOCKED(vp);
|
|
if (locked=0) or
|
|
((locked=LK_EXCLUSIVE) and
|
|
((vp^.v_vflag and VV_FORCEINSMQ)=0)) then
|
|
begin
|
|
VI_UNLOCK(vp);
|
|
MNT_IUNLOCK(mp);
|
|
if (dtr<>nil) then
|
|
begin
|
|
dtr(vp, dtr_arg);
|
|
end;
|
|
Exit(EBUSY);
|
|
end;
|
|
end;
|
|
vp^.v_mount:=mp;
|
|
MNT_REF(mp);
|
|
TAILQ_INSERT_TAIL(@mp^.mnt_nvnodelist,vp,@vp^.v_nmntvnodes);
|
|
Assert(mp^.mnt_nvnodelistsize >= 0,'neg mount point vnode list size');
|
|
Inc(mp^.mnt_nvnodelistsize);
|
|
Assert((vp^.v_iflag and VI_ACTIVE)=0,'Activating already active vnode');
|
|
vp^.v_iflag:=vp^.v_iflag or VI_ACTIVE;
|
|
mtx_lock(vnode_free_list_mtx);
|
|
TAILQ_INSERT_HEAD(@mp^.mnt_activevnodelist,vp,@vp^.v_actfreelist);
|
|
Inc(mp^.mnt_activevnodelistsize);
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
VI_UNLOCK(vp);
|
|
MNT_IUNLOCK(mp);
|
|
Exit(0);
|
|
end;
|
|
|
|
function insmntque(vp:p_vnode;mp:p_mount):Integer;
|
|
begin
|
|
Exit(insmntque1(vp, mp, @insmntque_stddtr, nil));
|
|
end;
|
|
|
|
{
|
|
{
|
|
* Flush out and invalidate all buffers associated with a bufobj
|
|
* Called with the underlying object locked.
|
|
}
|
|
int
|
|
bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo)
|
|
begin
|
|
int error;
|
|
|
|
BO_LOCK(bo);
|
|
if (flags and V_SAVE) begin
|
|
error:=bufobj_wwait(bo, slpflag, slptimeo);
|
|
if (error) begin
|
|
BO_UNLOCK(bo);
|
|
Exit(error);
|
|
end;
|
|
if (bo^.bo_dirty.bv_cnt > 0) begin
|
|
BO_UNLOCK(bo);
|
|
if ((error:=BO_SYNC(bo, MNT_WAIT))<>0)
|
|
Exit(error);
|
|
{
|
|
* XXX We could save a lock/unlock if this was only
|
|
* enabled under INVARIANTS
|
|
}
|
|
BO_LOCK(bo);
|
|
if (bo^.bo_numoutput > 0 or bo^.bo_dirty.bv_cnt > 0)
|
|
panic'vinvalbuf: dirty bufs';
|
|
end;
|
|
end;
|
|
{
|
|
* If you alter this loop please notice that interlock is dropped and
|
|
* reacquired in flushbuflist. Special care is needed to ensure that
|
|
* no race conditions occur from this.
|
|
}
|
|
do begin
|
|
error:=flushbuflist(@bo^.bo_clean,
|
|
flags, bo, slpflag, slptimeo);
|
|
if (error=0 and !(flags and V_CLEANONLY))
|
|
error:=flushbuflist(@bo^.bo_dirty,
|
|
flags, bo, slpflag, slptimeo);
|
|
if (error<>0 and error<>EAGAIN) begin
|
|
BO_UNLOCK(bo);
|
|
Exit(error);
|
|
end;
|
|
end; while (error<>0);
|
|
|
|
{
|
|
* Wait for I/O to complete. XXX needs cleaning up. The vnode can
|
|
* have write I/O in-progress but if there is a VM object then the
|
|
* VM object can also have read-I/O in-progress.
|
|
}
|
|
do begin
|
|
bufobj_wwait(bo, 0, 0);
|
|
BO_UNLOCK(bo);
|
|
if (bo^.bo_object<>nil) begin
|
|
VM_OBJECT_LOCK(bo^.bo_object);
|
|
vm_object_pip_wait(bo^.bo_object, "bovlbx';
|
|
VM_OBJECT_UNLOCK(bo^.bo_object);
|
|
end;
|
|
BO_LOCK(bo);
|
|
end; while (bo^.bo_numoutput > 0);
|
|
BO_UNLOCK(bo);
|
|
|
|
{
|
|
* Destroy the copy in the VM cache, too.
|
|
}
|
|
if (bo^.bo_object<>nil and
|
|
(flags and (V_ALT or V_NORMAL or V_CLEANONLY))=0) begin
|
|
VM_OBJECT_LOCK(bo^.bo_object);
|
|
vm_object_page_remove(bo^.bo_object, 0, 0, (flags and V_SAVE) ?
|
|
OBJPR_CLEANONLY : 0);
|
|
VM_OBJECT_UNLOCK(bo^.bo_object);
|
|
end;
|
|
|
|
#ifdef INVARIANTS
|
|
BO_LOCK(bo);
|
|
if ((flags and (V_ALT or V_NORMAL or V_CLEANONLY))=0 and
|
|
(bo^.bo_dirty.bv_cnt > 0 or bo^.bo_clean.bv_cnt > 0))
|
|
panic'vinvalbuf: flush failed';
|
|
BO_UNLOCK(bo);
|
|
#endif
|
|
Exit(0);
|
|
end;
|
|
}
|
|
|
|
{
|
|
* Flush out and invalidate all buffers associated with a vnode.
|
|
* Called with the underlying object locked.
|
|
}
|
|
function vinvalbuf(vp:p_vnode;flags,slpflag,slptimeo:Integer):Integer;
|
|
begin
|
|
ASSERT_VOP_LOCKED(vp, 'vinvalbuf');
|
|
|
|
if (vp^.v_object<>nil) then
|
|
if (vm_object_t(vp^.v_object)^.handle<>vp) then
|
|
begin
|
|
Exit(0);
|
|
end;
|
|
|
|
//Exit(bufobj_invalbuf(@vp^.v_bufobj, flags, slpflag, slptimeo));
|
|
|
|
Result:=0;
|
|
end;
|
|
|
|
{
|
|
{
|
|
* Flush out buffers on the specified list.
|
|
*
|
|
}
|
|
static int
|
|
flushbuflist( struct bufv *bufv, int flags, struct bufobj *bo, int slpflag,
|
|
int slptimeo)
|
|
begin
|
|
struct buf *bp, *nbp;
|
|
int retval, error;
|
|
daddr_t lblkno;
|
|
b_xflags_t xflags;
|
|
|
|
ASSERT_BO_LOCKED(bo);
|
|
|
|
retval:=0;
|
|
TAILQ_FOREACH_SAFE(bp, &bufv^.bv_hd, b_bobufs, nbp) begin
|
|
if (((flags and V_NORMAL) and (bp^.b_xflags and BX_ALTDATA)) or
|
|
((flags and V_ALT) and (bp^.b_xflags and BX_ALTDATA)=0)) begin
|
|
continue;
|
|
end;
|
|
lblkno:=0;
|
|
xflags:=0;
|
|
if (nbp<>nil) begin
|
|
lblkno:=nbp^.b_lblkno;
|
|
xflags:=nbp^.b_xflags &
|
|
(BX_BKGRDMARKER or BX_VNDIRTY or BX_VNCLEAN);
|
|
end;
|
|
retval:=EAGAIN;
|
|
error:=BUF_TIMELOCK(bp,
|
|
LK_EXCLUSIVE or LK_SLEEPFAIL or LK_INTERLOCK, BO_MTX(bo),
|
|
"flushbuf", slpflag, slptimeo);
|
|
if (error) begin
|
|
BO_LOCK(bo);
|
|
Exit(error<>ENOLCK ? error : EAGAIN);
|
|
end;
|
|
Assert(bp^.b_bufobj=bo,
|
|
'bp %p wrong b_bufobj %p should be %p",
|
|
bp, bp^.b_bufobj, bo));
|
|
if (bp^.b_bufobj<>bo) begin { XXX: necessary ? }
|
|
BUF_UNLOCK(bp);
|
|
BO_LOCK(bo);
|
|
Exit(EAGAIN);
|
|
end;
|
|
{
|
|
* XXX Since there are no node locks for NFS, I
|
|
* believe there is a slight chance that a delayed
|
|
* write will occur while sleeping just above, so
|
|
* check for it.
|
|
}
|
|
if (((bp^.b_flags and (B_DELWRI or B_INVAL))=B_DELWRI) and
|
|
(flags and V_SAVE)) begin
|
|
BO_LOCK(bo);
|
|
bremfree(bp);
|
|
BO_UNLOCK(bo);
|
|
bp^.b_flags:= or B_ASYNC;
|
|
bwrite(bp);
|
|
BO_LOCK(bo);
|
|
Exit(EAGAIN); { XXX: why not loop ? }
|
|
end;
|
|
BO_LOCK(bo);
|
|
bremfree(bp);
|
|
BO_UNLOCK(bo);
|
|
bp^.b_flags:= or (B_INVAL or B_RELBUF);
|
|
bp^.b_flags:= and ~B_ASYNC;
|
|
brelse(bp);
|
|
BO_LOCK(bo);
|
|
if (nbp<>nil and
|
|
(nbp^.b_bufobj<>bo or
|
|
nbp^.b_lblkno<>lblkno or
|
|
(nbp^.b_xflags &
|
|
(BX_BKGRDMARKER or BX_VNDIRTY or BX_VNCLEAN))<>xflags))
|
|
break; { nbp invalid }
|
|
end;
|
|
Exit(retval);
|
|
end;
|
|
|
|
{
|
|
* Truncate a file's buffer and pages to a specified length. This
|
|
* is in lieu of the old vinvalbuf mechanism, which performed unneeded
|
|
* sync activity.
|
|
}
|
|
int
|
|
vtruncbuf(vp:p_vnode, struct ucred *cred, struct thread *td,
|
|
off_t length, int blksize)
|
|
begin
|
|
struct buf *bp, *nbp;
|
|
int anyfreed;
|
|
int trunclbn;
|
|
struct bufobj *bo;
|
|
|
|
CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", {$I %LINE%},
|
|
vp, cred, blksize, (uintmax_t)length);
|
|
|
|
{
|
|
* Round up to the *next* lbn.
|
|
}
|
|
trunclbn:=(length + blksize - 1) div blksize;
|
|
|
|
ASSERT_VOP_LOCKED(vp, "vtruncbuf';
|
|
restart:
|
|
bo:=@vp^.v_bufobj;
|
|
BO_LOCK(bo);
|
|
anyfreed:=1;
|
|
for (;anyfreed;) begin
|
|
anyfreed:=0;
|
|
TAILQ_FOREACH_SAFE(bp, &bo^.bo_clean.bv_hd, b_bobufs, nbp) begin
|
|
if (bp^.b_lblkno < trunclbn)
|
|
continue;
|
|
if (BUF_LOCK(bp,
|
|
LK_EXCLUSIVE or LK_SLEEPFAIL or LK_INTERLOCK,
|
|
BO_MTX(bo))=ENOLCK)
|
|
goto restart;
|
|
|
|
BO_LOCK(bo);
|
|
bremfree(bp);
|
|
BO_UNLOCK(bo);
|
|
bp^.b_flags:= or (B_INVAL or B_RELBUF);
|
|
bp^.b_flags:= and ~B_ASYNC;
|
|
brelse(bp);
|
|
anyfreed:=1;
|
|
|
|
BO_LOCK(bo);
|
|
if (nbp<>nil and
|
|
(((nbp^.b_xflags and BX_VNCLEAN)=0) or
|
|
(nbp^.b_vp<>vp) or
|
|
(nbp^.b_flags and B_DELWRI))) begin
|
|
BO_UNLOCK(bo);
|
|
goto restart;
|
|
end;
|
|
end;
|
|
|
|
TAILQ_FOREACH_SAFE(bp, &bo^.bo_dirty.bv_hd, b_bobufs, nbp) begin
|
|
if (bp^.b_lblkno < trunclbn)
|
|
continue;
|
|
if (BUF_LOCK(bp,
|
|
LK_EXCLUSIVE or LK_SLEEPFAIL or LK_INTERLOCK,
|
|
BO_MTX(bo))=ENOLCK)
|
|
goto restart;
|
|
BO_LOCK(bo);
|
|
bremfree(bp);
|
|
BO_UNLOCK(bo);
|
|
bp^.b_flags:= or (B_INVAL or B_RELBUF);
|
|
bp^.b_flags:= and ~B_ASYNC;
|
|
brelse(bp);
|
|
anyfreed:=1;
|
|
|
|
BO_LOCK(bo);
|
|
if (nbp<>nil and
|
|
(((nbp^.b_xflags and BX_VNDIRTY)=0) or
|
|
(nbp^.b_vp<>vp) or
|
|
(nbp^.b_flags and B_DELWRI)=0)) begin
|
|
BO_UNLOCK(bo);
|
|
goto restart;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
if (length > 0) begin
|
|
restartsync:
|
|
TAILQ_FOREACH_SAFE(bp, &bo^.bo_dirty.bv_hd, b_bobufs, nbp) begin
|
|
if (bp^.b_lblkno > 0)
|
|
continue;
|
|
{
|
|
* Since we hold the vnode lock this should only
|
|
* fail if we're racing with the buf daemon.
|
|
}
|
|
if (BUF_LOCK(bp,
|
|
LK_EXCLUSIVE or LK_SLEEPFAIL or LK_INTERLOCK,
|
|
BO_MTX(bo))=ENOLCK) begin
|
|
goto restart;
|
|
end;
|
|
Assert((bp^.b_flags and B_DELWRI), vp,
|
|
'buf(%p) on dirty queue without DELWRI", bp));
|
|
|
|
BO_LOCK(bo);
|
|
bremfree(bp);
|
|
BO_UNLOCK(bo);
|
|
bawrite(bp);
|
|
BO_LOCK(bo);
|
|
goto restartsync;
|
|
end;
|
|
end;
|
|
|
|
bufobj_wwait(bo, 0, 0);
|
|
BO_UNLOCK(bo);
|
|
vnode_pager_setsize(vp, length);
|
|
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* buf_splay() - splay tree core for the clean/dirty list of buffers in
|
|
* a vnode.
|
|
*
|
|
* NOTE: We have to deal with the special case of a background bitmap
|
|
* buffer, a situation where two buffers will have the same logical
|
|
* block offset. We want (1) only the foreground buffer to be accessed
|
|
* in a lookup and (2) must differentiate between the foreground and
|
|
* background buffer in the splay tree algorithm because the splay
|
|
* tree cannot normally handle multiple entities with the same 'index'.
|
|
* We accomplish this by adding differentiating flags to the splay tree's
|
|
* numerical domain.
|
|
}
|
|
static
|
|
struct buf *
|
|
buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root)
|
|
begin
|
|
struct buf dummy;
|
|
struct buf *lefttreemax, *righttreemin, *y;
|
|
|
|
if (root=nil)
|
|
Exit(nil);
|
|
lefttreemax:=righttreemin:=@dummy;
|
|
for (;;) begin
|
|
if (lblkno < root^.b_lblkno or
|
|
(lblkno=root^.b_lblkno and
|
|
(xflags and BX_BKGRDMARKER) < (root^.b_xflags and BX_BKGRDMARKER))) begin
|
|
if ((y:=root^.b_left)=nil)
|
|
break;
|
|
if (lblkno < y^.b_lblkno) begin
|
|
{ Rotate right. }
|
|
root^.b_left:=y^.b_right;
|
|
y^.b_right:=root;
|
|
root:=y;
|
|
if ((y:=root^.b_left)=nil)
|
|
break;
|
|
end;
|
|
{ Link into the new root's right tree. }
|
|
righttreemin^.b_left:=root;
|
|
righttreemin:=root;
|
|
end; else if (lblkno > root^.b_lblkno or
|
|
(lblkno=root^.b_lblkno and
|
|
(xflags and BX_BKGRDMARKER) > (root^.b_xflags and BX_BKGRDMARKER))) begin
|
|
if ((y:=root^.b_right)=nil)
|
|
break;
|
|
if (lblkno > y^.b_lblkno) begin
|
|
{ Rotate left. }
|
|
root^.b_right:=y^.b_left;
|
|
y^.b_left:=root;
|
|
root:=y;
|
|
if ((y:=root^.b_right)=nil)
|
|
break;
|
|
end;
|
|
{ Link into the new root's left tree. }
|
|
lefttreemax^.b_right:=root;
|
|
lefttreemax:=root;
|
|
end; else begin
|
|
break;
|
|
end;
|
|
root:=y;
|
|
end;
|
|
{ Assemble the new root. }
|
|
lefttreemax^.b_right:=root^.b_left;
|
|
righttreemin^.b_left:=root^.b_right;
|
|
root^.b_left:=dummy.b_right;
|
|
root^.b_right:=dummy.b_left;
|
|
Exit(root);
|
|
end;
|
|
|
|
static void
|
|
buf_vlist_remove(struct buf *bp)
|
|
begin
|
|
struct buf *root;
|
|
struct bufv *bv;
|
|
|
|
Assert(bp^.b_bufobj<>nil, 'No b_bufobj %p", bp));
|
|
ASSERT_BO_LOCKED(bp^.b_bufobj);
|
|
Assert((bp^.b_xflags and (BX_VNDIRTY|BX_VNCLEAN)) !=
|
|
(BX_VNDIRTY|BX_VNCLEAN),
|
|
'buf_vlist_remove: Buf %p is on two lists", bp));
|
|
if (bp^.b_xflags and BX_VNDIRTY)
|
|
bv:=@bp^.b_bufobj^.bo_dirty;
|
|
else
|
|
bv:=@bp^.b_bufobj^.bo_clean;
|
|
if (bp<>bv^.bv_root) begin
|
|
root:=buf_splay(bp^.b_lblkno, bp^.b_xflags, bv^.bv_root);
|
|
Assert(root=bp, 'splay lookup failed in remove');
|
|
end;
|
|
if (bp^.b_left=nil) begin
|
|
root:=bp^.b_right;
|
|
end; else begin
|
|
root:=buf_splay(bp^.b_lblkno, bp^.b_xflags, bp^.b_left);
|
|
root^.b_right:=bp^.b_right;
|
|
end;
|
|
bv^.bv_root:=root;
|
|
TAILQ_REMOVE(@bv^.bv_hd, bp, b_bobufs);
|
|
bv^.bv_cnt--;
|
|
bp^.b_xflags:= and ~(BX_VNDIRTY or BX_VNCLEAN);
|
|
end;
|
|
|
|
{
|
|
* Add the buffer to the sorted clean or dirty block list using a
|
|
* splay tree algorithm.
|
|
*
|
|
* NOTE: xflags is passed as a constant, optimizing this inline function!
|
|
}
|
|
static void
|
|
buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
|
|
begin
|
|
struct buf *root;
|
|
struct bufv *bv;
|
|
|
|
ASSERT_BO_LOCKED(bo);
|
|
Assert((bp^.b_xflags and (BX_VNDIRTY|BX_VNCLEAN))=0,
|
|
'buf_vlist_add: Buf %p has existing xflags %d", bp, bp^.b_xflags));
|
|
bp^.b_xflags:= or xflags;
|
|
if (xflags and BX_VNDIRTY)
|
|
bv:=@bo^.bo_dirty;
|
|
else
|
|
bv:=@bo^.bo_clean;
|
|
|
|
root:=buf_splay(bp^.b_lblkno, bp^.b_xflags, bv^.bv_root);
|
|
if (root=nil) begin
|
|
bp^.b_left:=nil;
|
|
bp^.b_right:=nil;
|
|
TAILQ_INSERT_TAIL(@bv^.bv_hd, bp, b_bobufs);
|
|
end; else if (bp^.b_lblkno < root^.b_lblkno or
|
|
(bp^.b_lblkno=root^.b_lblkno and
|
|
(bp^.b_xflags and BX_BKGRDMARKER) < (root^.b_xflags and BX_BKGRDMARKER))) begin
|
|
bp^.b_left:=root^.b_left;
|
|
bp^.b_right:=root;
|
|
root^.b_left:=nil;
|
|
TAILQ_INSERT_BEFORE(root, bp, b_bobufs);
|
|
end; else begin
|
|
bp^.b_right:=root^.b_right;
|
|
bp^.b_left:=root;
|
|
root^.b_right:=nil;
|
|
TAILQ_INSERT_AFTER(@bv^.bv_hd, root, bp, b_bobufs);
|
|
end;
|
|
bv^.bv_cnt++;
|
|
bv^.bv_root:=bp;
|
|
end;
|
|
|
|
{
|
|
* Lookup a buffer using the splay tree. Note that we specifically avoid
|
|
* shadow buffers used in background bitmap writes.
|
|
*
|
|
* This code isn't quite efficient as it could be because we are maintaining
|
|
* two sorted lists and do not know which list the block resides in.
|
|
*
|
|
* During a "make buildworld" the desired buffer is found at one of
|
|
* the roots more than 60% of the time. Thus, checking both roots
|
|
* before performing either splay eliminates unnecessary splays on the
|
|
* first tree splayed.
|
|
}
|
|
struct buf *
|
|
gbincore(struct bufobj *bo, daddr_t lblkno)
|
|
begin
|
|
struct buf *bp;
|
|
|
|
ASSERT_BO_LOCKED(bo);
|
|
if ((bp:=bo^.bo_clean.bv_root)<>nil and
|
|
bp^.b_lblkno=lblkno and !(bp^.b_xflags and BX_BKGRDMARKER))
|
|
Exit(bp);
|
|
if ((bp:=bo^.bo_dirty.bv_root)<>nil and
|
|
bp^.b_lblkno=lblkno and !(bp^.b_xflags and BX_BKGRDMARKER))
|
|
Exit(bp);
|
|
if ((bp:=bo^.bo_clean.bv_root)<>nil) begin
|
|
bo^.bo_clean.bv_root:=bp:=buf_splay(lblkno, 0, bp);
|
|
if (bp^.b_lblkno=lblkno and !(bp^.b_xflags and BX_BKGRDMARKER))
|
|
Exit(bp);
|
|
end;
|
|
if ((bp:=bo^.bo_dirty.bv_root)<>nil) begin
|
|
bo^.bo_dirty.bv_root:=bp:=buf_splay(lblkno, 0, bp);
|
|
if (bp^.b_lblkno=lblkno and !(bp^.b_xflags and BX_BKGRDMARKER))
|
|
Exit(bp);
|
|
end;
|
|
Exit(nil);
|
|
end;
|
|
|
|
{
|
|
* Associate a buffer with a vnode.
|
|
}
|
|
void
|
|
bgetvp(vp:p_vnode, struct buf *bp)
|
|
begin
|
|
struct bufobj *bo;
|
|
|
|
bo:=@vp^.v_bufobj;
|
|
ASSERT_BO_LOCKED(bo);
|
|
Assert(bp^.b_vp=nil, bp^.b_vp, 'bgetvp: not free');
|
|
|
|
CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp^.b_flags);
|
|
Assert((bp^.b_xflags and (BX_VNDIRTY|BX_VNCLEAN))=0, vp,
|
|
'bgetvp: bp already attached! %p", bp));
|
|
|
|
vhold(vp);
|
|
if (VFS_NEEDSGIANT(vp^.v_mount) or bo^.bo_flag and BO_NEEDSGIANT)
|
|
bp^.b_flags:= or B_NEEDSGIANT;
|
|
bp^.b_vp:=vp;
|
|
bp^.b_bufobj:=bo;
|
|
{
|
|
* Insert onto list for new vnode.
|
|
}
|
|
buf_vlist_add(bp, bo, BX_VNCLEAN);
|
|
end;
|
|
|
|
{
|
|
* Disassociate a buffer from a vnode.
|
|
}
|
|
void
|
|
brelvp(struct buf *bp)
|
|
begin
|
|
struct bufobj *bo;
|
|
vp:p_vnode;
|
|
|
|
CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp^.b_vp, bp^.b_flags);
|
|
Assert(bp^.b_vp<>nil, 'brelvp: nil');
|
|
|
|
{
|
|
* Delete from old vnode list, if on one.
|
|
}
|
|
vp:=bp^.b_vp; { XXX }
|
|
bo:=bp^.b_bufobj;
|
|
BO_LOCK(bo);
|
|
if (bp^.b_xflags and (BX_VNDIRTY or BX_VNCLEAN))
|
|
buf_vlist_remove(bp);
|
|
else
|
|
panic'brelvp: Buffer %p not on queue.", bp);
|
|
if ((bo^.bo_flag and BO_ONWORKLST) and bo^.bo_dirty.bv_cnt=0) begin
|
|
bo^.bo_flag:= and ~BO_ONWORKLST;
|
|
mtx_lock(@sync_mtx);
|
|
LIST_REMOVE(bo, bo_synclist);
|
|
syncer_worklist_len--;
|
|
mtx_unlock(@sync_mtx);
|
|
end;
|
|
bp^.b_flags:= and ~B_NEEDSGIANT;
|
|
bp^.b_vp:=nil;
|
|
bp^.b_bufobj:=nil;
|
|
BO_UNLOCK(bo);
|
|
vdrop(vp);
|
|
end;
|
|
|
|
{
|
|
* Add an item to the syncer work queue.
|
|
}
|
|
static void
|
|
vn_syncer_add_to_worklist(struct bufobj *bo, int delay)
|
|
begin
|
|
int queue, slot;
|
|
|
|
ASSERT_BO_LOCKED(bo);
|
|
|
|
mtx_lock(@sync_mtx);
|
|
if (bo^.bo_flag and BO_ONWORKLST)
|
|
LIST_REMOVE(bo, bo_synclist);
|
|
else begin
|
|
bo^.bo_flag:= or BO_ONWORKLST;
|
|
syncer_worklist_len++;
|
|
end;
|
|
|
|
if (delay > syncer_maxdelay - 2)
|
|
delay:=syncer_maxdelay - 2;
|
|
slot:=(syncer_delayno + delay) and syncer_mask;
|
|
|
|
queue:=VFS_NEEDSGIANT(bo^.__bo_vnode^.v_mount) ? WI_GIANTQ :
|
|
WI_MPSAFEQ;
|
|
LIST_INSERT_HEAD(@syncer_workitem_pending[queue][slot], bo,
|
|
bo_synclist);
|
|
mtx_unlock(@sync_mtx);
|
|
end;
|
|
|
|
static int
|
|
sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS)
|
|
begin
|
|
int error, len;
|
|
|
|
mtx_lock(@sync_mtx);
|
|
len:=syncer_worklist_len - sync_vnode_count;
|
|
mtx_unlock(@sync_mtx);
|
|
error:=SYSCTL_OUT(req, &len, sizeof(len));
|
|
Exit(error);
|
|
end;
|
|
|
|
SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT or CTLFLAG_RD, nil, 0,
|
|
sysctl_vfs_worklist_len, "I", "Syncer thread worklist length';
|
|
|
|
static struct proc *updateproc;
|
|
static void sched_sync(void);
|
|
static struct kproc_desc up_kp:=begin
|
|
"syncer",
|
|
sched_sync,
|
|
&updateproc
|
|
end;
|
|
SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp);
|
|
|
|
static int
|
|
sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td)
|
|
begin
|
|
vp:p_vnode;
|
|
mp:p_mount;
|
|
|
|
*bo:=LIST_FIRST(slp);
|
|
if (*bo=nil)
|
|
Exit(0);
|
|
vp:=(*bo)^.__bo_vnode; { XXX }
|
|
if (VOP_ISLOCKED(vp)<>0 or VI_TRYLOCK(vp)=0)
|
|
Exit(1);
|
|
{
|
|
* We use vhold in case the vnode does not
|
|
* successfully sync. vhold prevents the vnode from
|
|
* going away when we unlock the sync_mtx so that
|
|
* we can acquire the vnode interlock.
|
|
}
|
|
vholdl(vp);
|
|
mtx_unlock(@sync_mtx);
|
|
VI_UNLOCK(vp);
|
|
if (vn_start_write(vp, &mp, V_NOWAIT)<>0) begin
|
|
vdrop(vp);
|
|
mtx_lock(@sync_mtx);
|
|
Exit(*bo=LIST_FIRST(slp));
|
|
end;
|
|
vn_lock(vp, LK_EXCLUSIVE or LK_RETRY);
|
|
(void) VOP_FSYNC(vp, MNT_LAZY, td);
|
|
VOP_UNLOCK(vp, 0);
|
|
vn_finished_write(mp);
|
|
BO_LOCK(*bo);
|
|
if (((*bo)^.bo_flag and BO_ONWORKLST)<>0) begin
|
|
{
|
|
* Put us back on the worklist. The worklist
|
|
* routine will remove us from our current
|
|
* position and then add us back in at a later
|
|
* position.
|
|
}
|
|
vn_syncer_add_to_worklist(*bo, syncdelay);
|
|
end;
|
|
BO_UNLOCK(*bo);
|
|
vdrop(vp);
|
|
mtx_lock(@sync_mtx);
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* System filesystem synchronizer daemon.
|
|
}
|
|
static void
|
|
sched_sync(void)
|
|
begin
|
|
struct synclist *gnext, *next;
|
|
struct synclist *gslp, *slp;
|
|
struct bufobj *bo;
|
|
long starttime;
|
|
struct thread *td:=curthread;
|
|
int last_work_seen;
|
|
int net_worklist_len;
|
|
int syncer_final_iter;
|
|
int first_printf;
|
|
int error;
|
|
|
|
last_work_seen:=0;
|
|
syncer_final_iter:=0;
|
|
first_printf:=1;
|
|
syncer_state:=SYNCER_RUNNING;
|
|
starttime:=time_uptime;
|
|
td^.td_pflags:= or TDP_NORUNNINGBUF;
|
|
|
|
EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td^.td_proc,
|
|
SHUTDOWN_PRI_LAST);
|
|
|
|
mtx_lock(@sync_mtx);
|
|
for (;;) begin
|
|
if (syncer_state=SYNCER_FINAL_DELAY and
|
|
syncer_final_iter=0) begin
|
|
mtx_unlock(@sync_mtx);
|
|
kproc_suspend_check(td^.td_proc);
|
|
mtx_lock(@sync_mtx);
|
|
end;
|
|
net_worklist_len:=syncer_worklist_len - sync_vnode_count;
|
|
if (syncer_state<>SYNCER_RUNNING and
|
|
starttime<>time_uptime) begin
|
|
if (first_printf) begin
|
|
printf'\nSyncing disks, vnodes remaining...';
|
|
first_printf:=0;
|
|
end;
|
|
printf'%d ", net_worklist_len);
|
|
end;
|
|
starttime:=time_uptime;
|
|
|
|
{
|
|
* Push files whose dirty time has expired. Be careful
|
|
* of interrupt race on slp queue.
|
|
*
|
|
* Skip over empty worklist slots when shutting down.
|
|
}
|
|
do begin
|
|
slp:=@syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno];
|
|
gslp:=@syncer_workitem_pending[WI_GIANTQ][syncer_delayno];
|
|
syncer_delayno += 1;
|
|
if (syncer_delayno=syncer_maxdelay)
|
|
syncer_delayno:=0;
|
|
next:=@syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno];
|
|
gnext:=@syncer_workitem_pending[WI_GIANTQ][syncer_delayno];
|
|
{
|
|
* If the worklist has wrapped since the
|
|
* it was emptied of all but syncer vnodes,
|
|
* switch to the FINAL_DELAY state and run
|
|
* for one more second.
|
|
}
|
|
if (syncer_state=SYNCER_SHUTTING_DOWN and
|
|
net_worklist_len=0 and
|
|
last_work_seen=syncer_delayno) begin
|
|
syncer_state:=SYNCER_FINAL_DELAY;
|
|
syncer_final_iter:=SYNCER_SHUTDOWN_SPEEDUP;
|
|
end;
|
|
end; while (syncer_state<>SYNCER_RUNNING and LIST_EMPTY(slp) and
|
|
LIST_EMPTY(gslp) and syncer_worklist_len > 0);
|
|
|
|
{
|
|
* Keep track of the last time there was anything
|
|
* on the worklist other than syncer vnodes.
|
|
* Exitto the SHUTTING_DOWN state if any
|
|
* new work appears.
|
|
}
|
|
if (net_worklist_len > 0 or syncer_state=SYNCER_RUNNING)
|
|
last_work_seen:=syncer_delayno;
|
|
if (net_worklist_len > 0 and syncer_state=SYNCER_FINAL_DELAY)
|
|
syncer_state:=SYNCER_SHUTTING_DOWN;
|
|
while (!LIST_EMPTY(slp)) begin
|
|
error:=sync_vnode(slp, &bo, td);
|
|
if (error=1) begin
|
|
LIST_REMOVE(bo, bo_synclist);
|
|
LIST_INSERT_HEAD(next, bo, bo_synclist);
|
|
continue;
|
|
end;
|
|
|
|
if (first_printf=0)
|
|
wdog_kern_pat(WD_LASTVAL);
|
|
|
|
end;
|
|
if (!LIST_EMPTY(gslp)) begin
|
|
mtx_unlock(@sync_mtx);
|
|
mtx_lock(@Giant);
|
|
mtx_lock(@sync_mtx);
|
|
while (!LIST_EMPTY(gslp)) begin
|
|
error:=sync_vnode(gslp, &bo, td);
|
|
if (error=1) begin
|
|
LIST_REMOVE(bo, bo_synclist);
|
|
LIST_INSERT_HEAD(gnext, bo,
|
|
bo_synclist);
|
|
continue;
|
|
end;
|
|
end;
|
|
mtx_unlock(@Giant);
|
|
end;
|
|
if (syncer_state=SYNCER_FINAL_DELAY and syncer_final_iter > 0)
|
|
syncer_final_iter--;
|
|
{
|
|
* The variable rushjob allows the kernel to speed up the
|
|
* processing of the filesystem syncer process. A rushjob
|
|
* value of N tells the filesystem syncer to process the next
|
|
* N seconds worth of work on its queue ASAP. Currently rushjob
|
|
* is used by the soft update code to speed up the filesystem
|
|
* syncer process when the incore state is getting so far
|
|
* ahead of the disk that the kernel memory pool is being
|
|
* threatened with exhaustion.
|
|
}
|
|
if (rushjob > 0) begin
|
|
rushjob -= 1;
|
|
continue;
|
|
end;
|
|
{
|
|
* Just sleep for a short period of time between
|
|
* iterations when shutting down to allow some I/O
|
|
* to happen.
|
|
*
|
|
* If it has taken us less than a second to process the
|
|
* current work, then wait. Otherwise start right over
|
|
* again. We can still lose time if any single round
|
|
* takes more than two seconds, but it does not really
|
|
* matter as we are just trying to generally pace the
|
|
* filesystem activity.
|
|
}
|
|
if (syncer_state<>SYNCER_RUNNING or
|
|
time_uptime=starttime) begin
|
|
thread_lock(td);
|
|
sched_prio(td, PPAUSE);
|
|
thread_unlock(td);
|
|
end;
|
|
if (syncer_state<>SYNCER_RUNNING)
|
|
cv_timedwait(@sync_wakeup, &sync_mtx,
|
|
hz div SYNCER_SHUTDOWN_SPEEDUP);
|
|
else if (time_uptime=starttime)
|
|
cv_timedwait(@sync_wakeup, &sync_mtx, hz);
|
|
end;
|
|
end;
|
|
|
|
{
|
|
* Request the syncer daemon to speed up its work.
|
|
* We never push it to speed up more than half of its
|
|
* normal turn time, otherwise it could take over the cpu.
|
|
}
|
|
int
|
|
speedup_syncer(void)
|
|
begin
|
|
int ret:=0;
|
|
|
|
mtx_lock(@sync_mtx);
|
|
if (rushjob < syncdelay div 2) begin
|
|
rushjob += 1;
|
|
stat_rush_requests += 1;
|
|
ret:=1;
|
|
end;
|
|
mtx_unlock(@sync_mtx);
|
|
cv_broadcast(@sync_wakeup);
|
|
Exit(ret);
|
|
end;
|
|
|
|
{
|
|
* Tell the syncer to speed up its work and run though its work
|
|
* list several times, then tell it to shut down.
|
|
}
|
|
static void
|
|
syncer_shutdown(void *arg, int howto)
|
|
begin
|
|
|
|
if (howto and RB_NOSYNC)
|
|
Exit;
|
|
mtx_lock(@sync_mtx);
|
|
syncer_state:=SYNCER_SHUTTING_DOWN;
|
|
rushjob:=0;
|
|
mtx_unlock(@sync_mtx);
|
|
cv_broadcast(@sync_wakeup);
|
|
kproc_shutdown(arg, howto);
|
|
end;
|
|
|
|
{
|
|
* Reassign a buffer from one vnode to another.
|
|
* Used to assign file specific control information
|
|
* (indirect blocks) to the vnode to which they belong.
|
|
}
|
|
void
|
|
reassignbuf(struct buf *bp)
|
|
begin
|
|
vp:p_vnode;
|
|
struct bufobj *bo;
|
|
int delay;
|
|
#ifdef INVARIANTS
|
|
struct bufv *bv;
|
|
#endif
|
|
|
|
vp:=bp^.b_vp;
|
|
bo:=bp^.b_bufobj;
|
|
++reassignbufcalls;
|
|
|
|
CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X",
|
|
bp, bp^.b_vp, bp^.b_flags);
|
|
{
|
|
* B_PAGING flagged buffers cannot be reassigned because their vp
|
|
* is not fully linked in.
|
|
}
|
|
if (bp^.b_flags and B_PAGING)
|
|
panic'cannot reassign paging buffer';
|
|
|
|
{
|
|
* Delete from old vnode list, if on one.
|
|
}
|
|
BO_LOCK(bo);
|
|
if (bp^.b_xflags and (BX_VNDIRTY or BX_VNCLEAN))
|
|
buf_vlist_remove(bp);
|
|
else
|
|
panic'reassignbuf: Buffer %p not on queue.", bp);
|
|
{
|
|
* If dirty, put on list of dirty buffers; otherwise insert onto list
|
|
* of clean buffers.
|
|
}
|
|
if (bp^.b_flags and B_DELWRI) begin
|
|
if ((bo^.bo_flag and BO_ONWORKLST)=0) begin
|
|
switch (vp^.v_type) begin
|
|
case VDIR:
|
|
delay:=dirdelay;
|
|
break;
|
|
case VCHR:
|
|
delay:=metadelay;
|
|
break;
|
|
default:
|
|
delay:=filedelay;
|
|
end;
|
|
vn_syncer_add_to_worklist(bo, delay);
|
|
end;
|
|
buf_vlist_add(bp, bo, BX_VNDIRTY);
|
|
end; else begin
|
|
buf_vlist_add(bp, bo, BX_VNCLEAN);
|
|
|
|
if ((bo^.bo_flag and BO_ONWORKLST) and bo^.bo_dirty.bv_cnt=0) begin
|
|
mtx_lock(@sync_mtx);
|
|
LIST_REMOVE(bo, bo_synclist);
|
|
syncer_worklist_len--;
|
|
mtx_unlock(@sync_mtx);
|
|
bo^.bo_flag:= and ~BO_ONWORKLST;
|
|
end;
|
|
end;
|
|
#ifdef INVARIANTS
|
|
bv:=@bo^.bo_clean;
|
|
bp:=TAILQ_FIRST(@bv^.bv_hd);
|
|
Assert(bp=nil or bp^.b_bufobj=bo,
|
|
'bp %p wrong b_bufobj %p should be %p", bp, bp^.b_bufobj, bo));
|
|
bp:=TAILQ_LAST(@bv^.bv_hd, buflists);
|
|
Assert(bp=nil or bp^.b_bufobj=bo,
|
|
'bp %p wrong b_bufobj %p should be %p", bp, bp^.b_bufobj, bo));
|
|
bv:=@bo^.bo_dirty;
|
|
bp:=TAILQ_FIRST(@bv^.bv_hd);
|
|
Assert(bp=nil or bp^.b_bufobj=bo,
|
|
'bp %p wrong b_bufobj %p should be %p", bp, bp^.b_bufobj, bo));
|
|
bp:=TAILQ_LAST(@bv^.bv_hd, buflists);
|
|
Assert(bp=nil or bp^.b_bufobj=bo,
|
|
'bp %p wrong b_bufobj %p should be %p", bp, bp^.b_bufobj, bo));
|
|
#endif
|
|
BO_UNLOCK(bo);
|
|
end;
|
|
}
|
|
|
|
{
|
|
* Increment the use and hold counts on the vnode, taking care to reference
|
|
* the driver's usecount if this is a chardev. The vholdl() will remove
|
|
* the vnode from the free list if it is presently free. Requires the
|
|
* vnode interlock and returns with it held.
|
|
}
|
|
procedure v_incr_usecount(vp:p_vnode);
|
|
begin
|
|
Inc(vp^.v_usecount);
|
|
if (vp^.v_type=VCHR) and (vp^.v_rdev<>nil) then
|
|
begin
|
|
dev_lock();
|
|
Inc(p_cdev(vp^.v_rdev)^.si_usecount);
|
|
dev_unlock();
|
|
end;
|
|
vholdl(vp);
|
|
end;
|
|
|
|
{
|
|
* Turn a holdcnt into a use+holdcnt such that only one call to
|
|
* v_decr_usecount is needed.
|
|
}
|
|
procedure v_upgrade_usecount(vp:p_vnode);
|
|
begin
|
|
Inc(vp^.v_usecount);
|
|
if (vp^.v_type=VCHR) and (vp^.v_rdev<>nil) then
|
|
begin
|
|
dev_lock();
|
|
Inc(p_cdev(vp^.v_rdev)^.si_usecount);
|
|
dev_unlock();
|
|
end;
|
|
end;
|
|
|
|
{
|
|
* Decrement the vnode use and hold count along with the driver's usecount
|
|
* if this is a chardev. The vdropl() below releases the vnode interlock
|
|
* as it may free the vnode.
|
|
}
|
|
procedure v_decr_usecount(vp:p_vnode);
|
|
begin
|
|
ASSERT_VI_LOCKED(vp,{$I %LINE%});
|
|
Assert(vp^.v_usecount>0,'v_decr_usecount: negative usecount');
|
|
Dec(vp^.v_usecount);
|
|
if (vp^.v_type=VCHR) and (vp^.v_rdev<>nil) then
|
|
begin
|
|
dev_lock();
|
|
Inc(p_cdev(vp^.v_rdev)^.si_usecount);
|
|
dev_unlock();
|
|
end;
|
|
vdropl(vp);
|
|
end;
|
|
|
|
{
|
|
* Decrement only the use count and driver use count. This is intended to
|
|
* be paired with a follow on vdropl() to release the remaining hold count.
|
|
* In this way we may vgone() a vnode with a 0 usecount without risk of
|
|
* having it end up on a free list because the hold count is kept above 0.
|
|
}
|
|
procedure v_decr_useonly(vp:p_vnode);
|
|
begin
|
|
ASSERT_VI_LOCKED(vp,{$I %LINE%});
|
|
Assert(vp^.v_usecount>0,'v_decr_useonly: negative usecount');
|
|
Dec(vp^.v_usecount);
|
|
if (vp^.v_type=VCHR) and (vp^.v_rdev<>nil) then
|
|
begin
|
|
dev_lock();
|
|
Dec(p_cdev(vp^.v_rdev)^.si_usecount);
|
|
dev_unlock();
|
|
end;
|
|
end;
|
|
|
|
{
|
|
* Grab a particular vnode from the free list, increment its
|
|
* reference count and lock it. VI_DOOMED is set if the vnode
|
|
* is being destroyed. Only callers who specify LK_RETRY will
|
|
* see doomed vnodes. If inactive processing was delayed in
|
|
* vput try to do it here.
|
|
}
|
|
function vget(vp:p_vnode;flags:Integer):Integer;
|
|
var
|
|
error:Integer;
|
|
begin
|
|
error:=0;
|
|
VFS_ASSERT_GIANT(vp^.v_mount);
|
|
Assert((flags and LK_TYPE_MASK)<>0,'vget: invalid lock operation');
|
|
|
|
if ((flags and LK_INTERLOCK)=0) then
|
|
begin
|
|
VI_LOCK(vp);
|
|
end;
|
|
|
|
vholdl(vp);
|
|
|
|
error:=vn_lock(vp,flags or LK_INTERLOCK,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
|
|
if (error<>0) then
|
|
begin
|
|
vdrop(vp);
|
|
Exit(error);
|
|
end;
|
|
|
|
if ((vp^.v_iflag and VI_DOOMED)<>0) and ((flags and LK_RETRY)=0) then
|
|
begin
|
|
Assert(false,'vget: vn_lock failed to Exit ENOENT');
|
|
end;
|
|
|
|
VI_LOCK(vp);
|
|
{ Upgrade our holdcnt to a usecount. }
|
|
v_upgrade_usecount(vp);
|
|
{
|
|
* We don't guarantee that any particular close will
|
|
* trigger inactive processing so just make a best effort
|
|
* here at preventing a reference to a removed file. If
|
|
* we don't succeed no harm is done.
|
|
}
|
|
if ((vp^.v_iflag and VI_OWEINACT)<>0) then
|
|
begin
|
|
if (VOP_ISLOCKED(vp)=LK_EXCLUSIVE) and
|
|
((flags and LK_NOWAIT)=0) then
|
|
begin
|
|
vinactive(vp);
|
|
end;
|
|
|
|
vp^.v_iflag:=vp^.v_iflag and (not VI_OWEINACT);
|
|
end;
|
|
VI_UNLOCK(vp);
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* Increase the reference count of a vnode.
|
|
}
|
|
procedure vref(vp:p_vnode); public;
|
|
begin
|
|
VI_LOCK(vp);
|
|
v_incr_usecount(vp);
|
|
VI_UNLOCK(vp);
|
|
end;
|
|
|
|
{
|
|
* Exitreference count of a vnode.
|
|
*
|
|
* The results of this call are only guaranteed when some mechanism other
|
|
* than the VI lock is used to stop other processes from gaining references
|
|
* to the vnode. This may be the case if the caller holds the only reference.
|
|
* This is also useful when stale data is acceptable as race conditions may
|
|
* be accounted for by some other means.
|
|
}
|
|
function vrefcnt(vp:p_vnode):Integer;
|
|
begin
|
|
VI_LOCK(vp);
|
|
Result:=vp^.v_usecount;
|
|
VI_UNLOCK(vp);
|
|
end;
|
|
|
|
const
|
|
VPUTX_VRELE =1;
|
|
VPUTX_VPUT =2;
|
|
VPUTX_VUNREF=3;
|
|
|
|
procedure vputx(vp:p_vnode;func:Integer);
|
|
var
|
|
error:Integer;
|
|
begin
|
|
Assert(vp<>nil,'vputx: nil vp');
|
|
if (func=VPUTX_VUNREF) then
|
|
ASSERT_VOP_LOCKED(vp,'vunref')
|
|
else
|
|
if (func=VPUTX_VPUT) then
|
|
ASSERT_VOP_LOCKED(vp,'vput')
|
|
else
|
|
Assert(func=VPUTX_VRELE,'vputx: wrong func');
|
|
|
|
VFS_ASSERT_GIANT(vp^.v_mount);
|
|
|
|
VI_LOCK(vp);
|
|
|
|
{ Skip this v_writecount check if we're going to panic below. }
|
|
Assert((vp^.v_writecount < vp^.v_usecount) or (vp^.v_usecount < 1),'vputx: missed vn_close');
|
|
error:=0;
|
|
|
|
if (vp^.v_usecount > 1) or (((vp^.v_iflag and VI_DOINGINACT)<>0) and (vp^.v_usecount=1)) then
|
|
begin
|
|
if (func=VPUTX_VPUT) then
|
|
begin
|
|
VOP_UNLOCK(vp, 0);
|
|
end;
|
|
v_decr_usecount(vp);
|
|
Exit;
|
|
end;
|
|
|
|
if (vp^.v_usecount<>1) then
|
|
begin
|
|
Assert(false,'vputx: negative ref cnt');
|
|
end;
|
|
{
|
|
* We want to hold the vnode until the inactive finishes to
|
|
* prevent vgone() races. We drop the use count here and the
|
|
* hold count below when we're done.
|
|
}
|
|
v_decr_useonly(vp);
|
|
{
|
|
* We must call VOP_INACTIVE with the node locked. Mark
|
|
* as VI_DOINGINACT to avoid recursion.
|
|
}
|
|
vp^.v_iflag:=vp^.v_iflag or VI_OWEINACT;
|
|
|
|
case (func) of
|
|
VPUTX_VRELE:
|
|
begin
|
|
error:=vn_lock(vp, LK_EXCLUSIVE or LK_INTERLOCK,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
|
|
VI_LOCK(vp);
|
|
end;
|
|
VPUTX_VPUT:
|
|
begin
|
|
if (VOP_ISLOCKED(vp)<>LK_EXCLUSIVE) then
|
|
begin
|
|
error:=VOP_LOCK(vp, LK_UPGRADE or LK_INTERLOCK or LK_NOWAIT,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
|
|
VI_LOCK(vp);
|
|
end;
|
|
end;
|
|
VPUTX_VUNREF:
|
|
begin
|
|
if (VOP_ISLOCKED(vp)<>LK_EXCLUSIVE) then
|
|
begin
|
|
error:=VOP_LOCK(vp, LK_TRYUPGRADE or LK_INTERLOCK,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
|
|
VI_LOCK(vp);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
if (vp^.v_usecount > 0) then
|
|
begin
|
|
vp^.v_iflag:=vp^.v_iflag and (not VI_OWEINACT);
|
|
end;
|
|
|
|
if (error=0) then
|
|
begin
|
|
if ((vp^.v_iflag and VI_OWEINACT)<>0) then
|
|
begin
|
|
vinactive(vp);
|
|
end;
|
|
|
|
if (func<>VPUTX_VUNREF) then
|
|
begin
|
|
VOP_UNLOCK(vp, 0);
|
|
end;
|
|
end;
|
|
vdropl(vp);
|
|
end;
|
|
|
|
{
|
|
* Vnode put/release.
|
|
* If count drops to zero, call inactive routine and return to freelist.
|
|
}
|
|
procedure vrele(vp:p_vnode);
|
|
begin
|
|
vputx(vp, VPUTX_VRELE);
|
|
end;
|
|
|
|
{
|
|
* Release an already locked vnode. This give the same effects as
|
|
* unlock+vrele(), but takes less time and avoids releasing and
|
|
* re-aquiring the lock (as vrele() acquires the lock internally.)
|
|
}
|
|
procedure vput(vp:p_vnode);
|
|
begin
|
|
vputx(vp, VPUTX_VPUT);
|
|
end;
|
|
|
|
{
|
|
* Release an exclusively locked vnode. Do not unlock the vnode lock.
|
|
}
|
|
procedure vunref(vp:p_vnode);
|
|
begin
|
|
vputx(vp, VPUTX_VUNREF);
|
|
end;
|
|
|
|
{
|
|
* Somebody doesn't want the vnode recycled.
|
|
}
|
|
procedure vhold(vp:p_vnode);
|
|
begin
|
|
VI_LOCK(vp);
|
|
vholdl(vp);
|
|
VI_UNLOCK(vp);
|
|
end;
|
|
|
|
{
|
|
* Increase the hold count and activate if this is the first reference.
|
|
}
|
|
procedure vholdl(vp:p_vnode);
|
|
var
|
|
mp:p_mount;
|
|
begin
|
|
Inc(vp^.v_holdcnt);
|
|
if (not VSHOULDBUSY(vp)) then Exit;
|
|
|
|
ASSERT_VI_LOCKED(vp,'vholdl');
|
|
Assert((vp^.v_iflag and VI_FREE)<>0,'vnode not free');
|
|
Assert(vp^.v_op<>nil,'vholdl: vnode already reclaimed.');
|
|
{
|
|
* Remove a vnode from the free list, mark it as in use,
|
|
* and put it on the active list.
|
|
}
|
|
mtx_lock(vnode_free_list_mtx);
|
|
TAILQ_REMOVE(@vnode_free_list,vp,@vp^.v_actfreelist);
|
|
Dec(freevnodes);
|
|
vp^.v_iflag:=vp^.v_iflag and (not (VI_FREE or VI_AGE));
|
|
Assert((vp^.v_iflag and VI_ACTIVE)=0,'Activating already active vnode');
|
|
vp^.v_iflag:=vp^.v_iflag or VI_ACTIVE;
|
|
mp:=vp^.v_mount;
|
|
TAILQ_INSERT_HEAD(@mp^.mnt_activevnodelist,vp,@vp^.v_actfreelist);
|
|
Inc(mp^.mnt_activevnodelistsize);
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
end;
|
|
|
|
{
|
|
* Note that there is one less who cares about this vnode.
|
|
* vdrop() is the opposite of vhold().
|
|
}
|
|
procedure vdrop(vp:p_vnode);
|
|
begin
|
|
VI_LOCK(vp);
|
|
vdropl(vp);
|
|
end;
|
|
|
|
{
|
|
* Drop the hold count of the vnode. If this is the last reference to
|
|
* the vnode we place it on the free list unless it has been vgone'd
|
|
* (marked VI_DOOMED) in which case we will free it.
|
|
}
|
|
procedure vdropl(vp:p_vnode);
|
|
var
|
|
//struct bufobj *bo;
|
|
mp:p_mount;
|
|
active:Integer;
|
|
begin
|
|
ASSERT_VI_LOCKED(vp,'vdropl');
|
|
|
|
if (vp^.v_holdcnt <= 0) then
|
|
begin
|
|
Assert(false,'vdrop: holdcnt');
|
|
end;
|
|
|
|
Dec(vp^.v_holdcnt);
|
|
if (vp^.v_holdcnt > 0) then
|
|
begin
|
|
VI_UNLOCK(vp);
|
|
Exit;
|
|
end;
|
|
|
|
if ((vp^.v_iflag and VI_DOOMED)=0) then
|
|
begin
|
|
{
|
|
* Mark a vnode as free: remove it from its active list
|
|
* and put it up for recycling on the freelist.
|
|
}
|
|
Assert(vp^.v_op<>nil,'vdropl: vnode already reclaimed.');
|
|
Assert((vp^.v_iflag and VI_FREE)=0,'vnode already free');
|
|
Assert(VSHOULDFREE(vp),'vdropl: freeing when we shouldnt');
|
|
|
|
active:=vp^.v_iflag and VI_ACTIVE;
|
|
vp^.v_iflag:=vp^.v_iflag and (not VI_ACTIVE);
|
|
mp:=vp^.v_mount;
|
|
mtx_lock(vnode_free_list_mtx);
|
|
if (active<>0) then
|
|
begin
|
|
TAILQ_REMOVE(@mp^.mnt_activevnodelist,vp,@vp^.v_actfreelist);
|
|
Dec(mp^.mnt_activevnodelistsize);
|
|
end;
|
|
if ((vp^.v_iflag and VI_AGE)<>0) then
|
|
begin
|
|
TAILQ_INSERT_HEAD(@vnode_free_list,vp,@vp^.v_actfreelist);
|
|
end else
|
|
begin
|
|
TAILQ_INSERT_TAIL(@vnode_free_list,vp,@vp^.v_actfreelist);
|
|
end;
|
|
Inc(freevnodes);
|
|
vp^.v_iflag:=vp^.v_iflag and (not VI_AGE);
|
|
vp^.v_iflag:=vp^.v_iflag or VI_FREE;
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
VI_UNLOCK(vp);
|
|
Exit;
|
|
end;
|
|
|
|
{
|
|
* The vnode has been marked for destruction, so free it.
|
|
}
|
|
System.InterlockedDecrement64(numvnodes);
|
|
//bo:=@vp^.v_bufobj;
|
|
Assert((vp^.v_iflag and VI_FREE)=0,'cleaned vnode still on the free list.');
|
|
Assert(vp^.v_data=nil, 'cleaned vnode isnt');
|
|
Assert(vp^.v_holdcnt=0, 'Non-zero hold count');
|
|
Assert(vp^.v_usecount=0, 'Non-zero use count');
|
|
Assert(vp^.v_writecount=0, 'Non-zero write count');
|
|
//Assert(bo^.bo_numoutput=0, 'Clean vnode has pending I/Os');
|
|
//Assert(bo^.bo_clean.bv_cnt=0, 'cleanbufcnt not 0');
|
|
//Assert(bo^.bo_clean.bv_root=nil, 'cleanblkroot not nil');
|
|
//Assert(bo^.bo_dirty.bv_cnt=0, 'dirtybufcnt not 0');
|
|
//Assert(bo^.bo_dirty.bv_root=nil, 'dirtyblkroot not nil');
|
|
//Assert(TAILQ_EMPTY(@vp^.v_cache_dst), 'vp has namecache dst');
|
|
//Assert(LIST_EMPTY(@vp^.v_cache_src), 'vp has namecache src');
|
|
//Assert(vp^.v_cache_dd=nil, 'vp has namecache for ..');
|
|
VI_UNLOCK(vp);
|
|
|
|
//mac_vnode_destroy(vp);
|
|
|
|
if (vp^.v_pollinfo<>nil) then
|
|
begin
|
|
destroy_vpollinfo(vp^.v_pollinfo);
|
|
end;
|
|
|
|
{ XXX Elsewhere we detect an already freed vnode via nil v_op. }
|
|
vp^.v_op:=nil;
|
|
|
|
rangelock_destroy(@vp^.v_rl);
|
|
//lockdestroy(vp^.v_vnlock);
|
|
mtx_destroy(vp^.v_vnlock^);
|
|
mtx_destroy(vp^.v_interlock);
|
|
|
|
mtx_destroy(vp^.v_lock);
|
|
|
|
//mtx_destroy(BO_MTX(bo));
|
|
|
|
FreeMem(vp);
|
|
end;
|
|
|
|
{
|
|
* Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT
|
|
* flags. DOINGINACT prevents us from recursing in calls to vinactive.
|
|
* OWEINACT tracks whether a vnode missed a call to inactive due to a
|
|
* failed lock upgrade.
|
|
}
|
|
procedure vinactive(vp:p_vnode);
|
|
var
|
|
obj:vm_object_t;
|
|
begin
|
|
ASSERT_VOP_ELOCKED(vp,'vinactive');
|
|
ASSERT_VI_LOCKED(vp,'vinactive');
|
|
Assert((vp^.v_iflag and VI_DOINGINACT)=0,'vinactive: recursed on VI_DOINGINACT');
|
|
|
|
vp^.v_iflag:=vp^.v_iflag or VI_DOINGINACT;
|
|
vp^.v_iflag:=vp^.v_iflag and (not VI_OWEINACT);
|
|
VI_UNLOCK(vp);
|
|
{
|
|
* Before moving off the active list, we must be sure that any
|
|
* modified pages are on the vnode's dirty list since these will
|
|
* no longer be checked once the vnode is on the inactive list.
|
|
* Because the vnode vm object keeps a hold reference on the vnode
|
|
* if there is at least one resident non-cached page, the vnode
|
|
* cannot leave the active list without the page cleanup done.
|
|
}
|
|
obj:=vp^.v_object;
|
|
if (obj<>nil) then
|
|
if ((obj^.flags and OBJ_MIGHTBEDIRTY)<>0) then
|
|
begin
|
|
VM_OBJECT_LOCK(obj);
|
|
vm_object_page_clean(obj, 0, 0, OBJPC_NOSYNC);
|
|
VM_OBJECT_UNLOCK(obj);
|
|
end;
|
|
VOP_INACTIVE(vp);
|
|
VI_LOCK(vp);
|
|
Assert((vp^.v_iflag and VI_DOINGINACT)<>0,'vinactive: lost VI_DOINGINACT');
|
|
vp^.v_iflag:=vp^.v_iflag and (not VI_DOINGINACT);
|
|
end;
|
|
|
|
{
|
|
* Remove any vnodes in the vnode table belonging to mount point mp.
|
|
*
|
|
* If FORCECLOSE is not specified, there should not be any active ones,
|
|
* Exiterror if any are found (nb: this is a user error, not a
|
|
* system error). If FORCECLOSE is specified, detach any active vnodes
|
|
* that are found.
|
|
*
|
|
* If WRITECLOSE is set, only flush out regular file vnodes open for
|
|
* writing.
|
|
*
|
|
* SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
|
|
*
|
|
* `rootrefs' specifies the base reference count for the root vnode
|
|
* of this filesystem. The root vnode is considered busy if its
|
|
* v_usecount exceeds this value. On a successful return, vflush(, td)
|
|
* will call vrele() on the root vnode exactly rootrefs times.
|
|
* If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must
|
|
* be zero.
|
|
}
|
|
function vflush(mp:p_mount;rootrefs,flags:Integer):Integer;
|
|
label
|
|
loop;
|
|
var
|
|
vp,mvp,rootvp:p_vnode;
|
|
vattr:t_vattr;
|
|
busy,error:Integer;
|
|
begin
|
|
rootvp:=nil;
|
|
busy:=0;
|
|
|
|
if (rootrefs > 0) then
|
|
begin
|
|
Assert((flags and (SKIPSYSTEM or WRITECLOSE))=0,'vflush: bad args');
|
|
{
|
|
* Get the filesystem root vnode. We can vput() it
|
|
* immediately, since with rootrefs > 0, it won't go away.
|
|
}
|
|
error:=VFS_ROOT(mp, LK_EXCLUSIVE, @rootvp);
|
|
if (error<>0) then
|
|
begin
|
|
Exit(error);
|
|
end;
|
|
vput(rootvp);
|
|
end;
|
|
loop:
|
|
vp:=__mnt_vnode_first_all(@mvp,mp);
|
|
while (vp<>nil) do
|
|
begin
|
|
vholdl(vp);
|
|
error:=vn_lock(vp, LK_INTERLOCK or LK_EXCLUSIVE,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
|
|
if (error<>0) then
|
|
begin
|
|
vdrop(vp);
|
|
//MNT_VNODE_FOREACH_ALL_ABORT
|
|
MNT_ILOCK(mp);
|
|
__mnt_vnode_markerfree_all(@mvp,mp);
|
|
//MNT_VNODE_FOREACH_ALL_ABORT
|
|
goto loop;
|
|
end;
|
|
{
|
|
* Skip over a vnodes marked VV_SYSTEM.
|
|
}
|
|
if (((flags and SKIPSYSTEM)<>0) and ((vp^.v_vflag and VV_SYSTEM)<>0)) then
|
|
begin
|
|
VOP_UNLOCK(vp, 0);
|
|
vdrop(vp);
|
|
//
|
|
vp:=__mnt_vnode_next_all(@mvp,mp);
|
|
continue;
|
|
end;
|
|
{
|
|
* If WRITECLOSE is set, flush out unlinked but still open
|
|
* files (even if open only for reading) and regular file
|
|
* vnodes open for writing.
|
|
}
|
|
if ((flags and WRITECLOSE)<>0) then
|
|
begin
|
|
if (vp^.v_object<>nil) then
|
|
begin
|
|
VM_OBJECT_LOCK(vp^.v_object);
|
|
vm_object_page_clean(vp^.v_object, 0, 0, 0);
|
|
VM_OBJECT_UNLOCK(vp^.v_object);
|
|
end;
|
|
error:=VOP_FSYNC(vp, MNT_WAIT);
|
|
if (error<>0) then
|
|
begin
|
|
VOP_UNLOCK(vp, 0);
|
|
vdrop(vp);
|
|
//MNT_VNODE_FOREACH_ALL_ABORT
|
|
MNT_ILOCK(mp);
|
|
__mnt_vnode_markerfree_all(@mvp,mp);
|
|
//MNT_VNODE_FOREACH_ALL_ABORT
|
|
Exit(error);
|
|
end;
|
|
error:=VOP_GETATTR(vp, @vattr);
|
|
VI_LOCK(vp);
|
|
|
|
if ((vp^.v_type=VNON) or
|
|
((error=0) and (vattr.va_nlink > 0))) and
|
|
((vp^.v_writecount=0) or (vp^.v_type<>VREG)) then
|
|
begin
|
|
VOP_UNLOCK(vp, 0);
|
|
vdropl(vp);
|
|
//
|
|
vp:=__mnt_vnode_next_all(@mvp,mp);
|
|
continue;
|
|
end;
|
|
end else
|
|
VI_LOCK(vp);
|
|
{
|
|
* With v_usecount=0, all we need to do is clear out the
|
|
* vnode data structures and we are done.
|
|
*
|
|
* If FORCECLOSE is set, forcibly close the vnode.
|
|
}
|
|
if (vp^.v_usecount=0 or (flags and FORCECLOSE)) then
|
|
begin
|
|
Assert((vp^.v_usecount=0) or
|
|
((vp^.v_type<>VCHR) and (vp^.v_type<>VBLK)),'device VNODE %p is FORCECLOSED');
|
|
vgonel(vp);
|
|
end else
|
|
begin
|
|
Inc(busy);
|
|
end;
|
|
VOP_UNLOCK(vp, 0);
|
|
vdropl(vp);
|
|
//
|
|
vp:=__mnt_vnode_next_all(@mvp,mp);
|
|
end;
|
|
if (rootrefs > 0) and ((flags and FORCECLOSE)=0) then
|
|
begin
|
|
{
|
|
* If just the root vnode is busy, and if its refcount
|
|
* is equal to `rootrefs', then go ahead and kill it.
|
|
}
|
|
VI_LOCK(rootvp);
|
|
Assert(busy > 0, 'vflush: not busy');
|
|
Assert(rootvp^.v_usecount >= rootrefs,'vflush: usecount %d < rootrefs %d');
|
|
if (busy=1) and (rootvp^.v_usecount=rootrefs) then
|
|
begin
|
|
VOP_LOCK(rootvp, LK_EXCLUSIVE or LK_INTERLOCK,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
|
|
vgone(rootvp);
|
|
VOP_UNLOCK(rootvp, 0);
|
|
busy:=0;
|
|
end else
|
|
VI_UNLOCK(rootvp);
|
|
end;
|
|
if (busy<>0) then
|
|
begin
|
|
Exit(EBUSY);
|
|
end;
|
|
|
|
while (rootrefs > 0) do
|
|
begin
|
|
vrele(rootvp);
|
|
Dec(rootrefs);
|
|
end;
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* Recycle an unused vnode to the front of the free list.
|
|
}
|
|
function vrecycle(vp:p_vnode):Integer;
|
|
var
|
|
recycled:Integer;
|
|
begin
|
|
ASSERT_VOP_ELOCKED(vp, 'vrecycle');
|
|
recycled:=0;
|
|
VI_LOCK(vp);
|
|
if (vp^.v_usecount=0)then
|
|
begin
|
|
recycled:=1;
|
|
vgonel(vp);
|
|
end;
|
|
VI_UNLOCK(vp);
|
|
Exit(recycled);
|
|
end;
|
|
|
|
{
|
|
* Eliminate all activity associated with a vnode
|
|
* in preparation for reuse.
|
|
}
|
|
procedure vgone(vp:p_vnode);
|
|
begin
|
|
VI_LOCK(vp);
|
|
vgonel(vp);
|
|
VI_UNLOCK(vp);
|
|
end;
|
|
|
|
{
|
|
* vgone, with the vp interlock held.
|
|
}
|
|
procedure vgonel(vp:p_vnode);
|
|
var
|
|
oweinact:Integer;
|
|
active:Integer;
|
|
mp:p_mount;
|
|
begin
|
|
ASSERT_VOP_ELOCKED(vp, 'vgonel');
|
|
ASSERT_VI_LOCKED(vp, 'vgonel');
|
|
Assert(vp^.v_holdcnt<>0,'vgonel: vp %p has no reference.');
|
|
|
|
{
|
|
* Don't vgonel if we're already doomed.
|
|
}
|
|
if ((vp^.v_iflag and VI_DOOMED)<>0) then Exit;
|
|
|
|
vp^.v_iflag:=vp^.v_iflag or VI_DOOMED;
|
|
|
|
{
|
|
* Check to see if the vnode is in use. If so, we have to call
|
|
* VOP_CLOSE() and VOP_INACTIVE().
|
|
}
|
|
active:=vp^.v_usecount;
|
|
oweinact:=(vp^.v_iflag and VI_OWEINACT);
|
|
VI_UNLOCK(vp);
|
|
|
|
{
|
|
* Clean out any buffers associated with the vnode.
|
|
* If the flush fails, just toss the buffers.
|
|
}
|
|
mp:=nil;
|
|
|
|
//if (not TAILQ_EMPTY(@vp^.v_bufobj.bo_dirty.bv_hd)) then
|
|
// vn_start_secondary_write(vp, &mp, V_WAIT);
|
|
|
|
if (vinvalbuf(vp, V_SAVE, 0, 0)<>0) then
|
|
begin
|
|
vinvalbuf(vp, 0, 0, 0);
|
|
end;
|
|
|
|
{
|
|
* If purging an active vnode, it must be closed and
|
|
* deactivated before being reclaimed.
|
|
}
|
|
if (active<>0) then
|
|
begin
|
|
VOP_CLOSE(vp, FNONBLOCK);
|
|
end;
|
|
|
|
if (oweinact<>0) or (active<>0) then
|
|
begin
|
|
VI_LOCK(vp);
|
|
|
|
if ((vp^.v_iflag and VI_DOINGINACT)=0) then
|
|
begin
|
|
vinactive(vp);
|
|
end;
|
|
|
|
VI_UNLOCK(vp);
|
|
end;
|
|
|
|
//if (vp^.v_type=VSOCK) then
|
|
// vfs_unp_reclaim(vp);
|
|
|
|
{
|
|
* Reclaim the vnode.
|
|
}
|
|
if (VOP_RECLAIM(vp)<>0) then
|
|
begin
|
|
Assert(false,'vgone: cannot reclaim');
|
|
end;
|
|
|
|
//if (mp<>nil) then
|
|
// vn_finished_secondary_write(mp);
|
|
|
|
//Assert(vp^.v_object=nil,'vop_reclaim left v_object vp=%p, tag=%s'));
|
|
|
|
{
|
|
* Clear the advisory locks and wake up waiting threads.
|
|
}
|
|
VOP_ADVLOCKPURGE(vp);
|
|
{
|
|
* Delete from old mount point vnode list.
|
|
}
|
|
delmntque(vp);
|
|
//cache_purge(vp);
|
|
{
|
|
* Done with purge, reset to the standard lock and invalidate
|
|
* the vnode.
|
|
}
|
|
VI_LOCK(vp);
|
|
vp^.v_vnlock:=@vp^.v_lock;
|
|
vp^.v_op :=@dead_vnodeops;
|
|
vp^.v_tag :='none';
|
|
vp^.v_type :=VBAD;
|
|
end;
|
|
|
|
{
|
|
* Calculate the total number of references to a special device.
|
|
}
|
|
function vcount(vp:p_vnode):Integer;
|
|
begin
|
|
dev_lock();
|
|
Result:=p_cdev(vp^.v_rdev)^.si_usecount;
|
|
dev_unlock();
|
|
end;
|
|
|
|
{
|
|
* Same as above, but using the struct cdev *as argument
|
|
}
|
|
function count_dev(dev:Pointer):Integer; //cdev
|
|
begin
|
|
dev_lock();
|
|
Result:=p_cdev(dev)^.si_usecount;
|
|
dev_unlock();
|
|
end;
|
|
|
|
{
|
|
* perform msync on all vnodes under a mount point
|
|
* the mount point must be locked.
|
|
}
|
|
procedure vfs_msync(mp:p_mount;flags:Integer);
|
|
var
|
|
vp,mvp:p_vnode;
|
|
obj:vm_object_t;
|
|
begin
|
|
vp:=__mnt_vnode_first_active(@mvp,mp);
|
|
While (vp<>nil) do
|
|
begin
|
|
obj:=vp^.v_object;
|
|
if (obj<>nil) and
|
|
((obj^.flags and OBJ_MIGHTBEDIRTY)<>0) and
|
|
((flags=MNT_WAIT) or (VOP_ISLOCKED(vp)=0)) then
|
|
begin
|
|
if (vget(vp, LK_EXCLUSIVE or LK_RETRY or LK_INTERLOCK)=0) then
|
|
begin
|
|
if ((vp^.v_vflag and VV_NOSYNC)<>0) then
|
|
begin { unlinked }
|
|
vput(vp);
|
|
//
|
|
vp:=__mnt_vnode_next_active(@mvp,mp);
|
|
continue;
|
|
end;
|
|
|
|
obj:=vp^.v_object;
|
|
if (obj<>nil) then
|
|
begin
|
|
VM_OBJECT_LOCK(obj);
|
|
if (flags=MNT_WAIT) then
|
|
begin
|
|
vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
|
|
end else
|
|
begin
|
|
vm_object_page_clean(obj, 0, 0, OBJPC_NOSYNC);
|
|
end;
|
|
VM_OBJECT_UNLOCK(obj);
|
|
end;
|
|
vput(vp);
|
|
end;
|
|
end else
|
|
begin
|
|
VI_UNLOCK(vp);
|
|
end;
|
|
//
|
|
vp:=__mnt_vnode_next_active(@mvp,mp);
|
|
end;
|
|
end;
|
|
|
|
procedure destroy_vpollinfo_free(vi:p_vpollinfo);
|
|
begin
|
|
knlist_destroy(@vi^.vpi_selinfo.si_note);
|
|
mtx_destroy(vi^.vpi_lock);
|
|
FreeMem(vi);
|
|
end;
|
|
|
|
procedure destroy_vpollinfo(vi:p_vpollinfo);
|
|
begin
|
|
knlist_clear(@vi^.vpi_selinfo.si_note, 1);
|
|
seldrain(@vi^.vpi_selinfo);
|
|
destroy_vpollinfo_free(vi);
|
|
end;
|
|
|
|
procedure vfs_knllock(arg:Pointer); forward;
|
|
procedure vfs_knlunlock(arg:Pointer); forward;
|
|
procedure vfs_knl_assert_locked(arg:Pointer); forward;
|
|
procedure vfs_knl_assert_unlocked(arg:Pointer); forward;
|
|
|
|
{
|
|
* Initalize per-vnode helper structure to hold poll-related state.
|
|
}
|
|
procedure v_addpollinfo(vp:p_vnode);
|
|
var
|
|
vi:p_vpollinfo;
|
|
begin
|
|
if (vp^.v_pollinfo<>nil) then
|
|
begin
|
|
Exit;
|
|
end;
|
|
vi:=AllocMem(SizeOf(vpollinfo));
|
|
mtx_init(vi^.vpi_lock,'vnode pollinfo');
|
|
knlist_init(@vi^.vpi_selinfo.si_note, vp, @vfs_knllock, @vfs_knlunlock, @vfs_knl_assert_locked, @vfs_knl_assert_unlocked);
|
|
VI_LOCK(vp);
|
|
if (vp^.v_pollinfo<>nil) then
|
|
begin
|
|
VI_UNLOCK(vp);
|
|
destroy_vpollinfo_free(vi);
|
|
Exit;
|
|
end;
|
|
vp^.v_pollinfo:=vi;
|
|
VI_UNLOCK(vp);
|
|
end;
|
|
|
|
{
|
|
* Record a process's interest in events which might happen to
|
|
* a vnode. Because poll uses the historic select-style interface
|
|
* internally, this routine serves as both the ``check for any
|
|
* pending events'' and the ``record my interest in future events''
|
|
* functions. (These are done together, while the lock is held,
|
|
* to avoid race conditions.)
|
|
}
|
|
function vn_pollrecord(vp:p_vnode;events:Integer):Integer;
|
|
begin
|
|
v_addpollinfo(vp);
|
|
mtx_lock(vp^.v_pollinfo^.vpi_lock);
|
|
if ((vp^.v_pollinfo^.vpi_revents and events)<>0) then
|
|
begin
|
|
{
|
|
* This leaves events we are not interested
|
|
* in available for the other process which
|
|
* which presumably had requested them
|
|
* (otherwise they would never have been
|
|
* recorded).
|
|
}
|
|
events:=events and vp^.v_pollinfo^.vpi_revents;
|
|
vp^.v_pollinfo^.vpi_revents:=vp^.v_pollinfo^.vpi_revents and (not events);
|
|
|
|
mtx_unlock(vp^.v_pollinfo^.vpi_lock);
|
|
Exit(events);
|
|
end;
|
|
vp^.v_pollinfo^.vpi_events:=vp^.v_pollinfo^.vpi_events or events;
|
|
selrecord(curkthread, @vp^.v_pollinfo^.vpi_selinfo);
|
|
mtx_unlock(vp^.v_pollinfo^.vpi_lock);
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* Routine to create and manage a filesystem syncer vnode.
|
|
}
|
|
|
|
{
|
|
#define sync_close ((int (*)(struct vop_close_args *))nilop)
|
|
static int sync_fsync(struct vop_fsync_args *);
|
|
static int sync_inactive(struct vop_inactive_args *);
|
|
static int sync_reclaim(struct vop_reclaim_args *);
|
|
|
|
static struct vop_vector sync_vnodeops:=begin
|
|
.vop_bypass:=VOP_EOPNOTSUPP,
|
|
.vop_close:=sync_close, { close }
|
|
.vop_fsync:=sync_fsync, { fsync }
|
|
.vop_inactive:=sync_inactive, { inactive }
|
|
.vop_reclaim:=sync_reclaim, { reclaim }
|
|
.vop_lock1:=vop_stdlock, { lock }
|
|
.vop_unlock:=vop_stdunlock, { unlock }
|
|
.vop_islocked:=vop_stdislocked, { islocked }
|
|
end;
|
|
|
|
{
|
|
* Create a new filesystem syncer vnode for the specified mount point.
|
|
}
|
|
void
|
|
vfs_allocate_syncvnode(mp:p_mount)
|
|
begin
|
|
vp:p_vnode;
|
|
struct bufobj *bo;
|
|
static long start, incr, next;
|
|
int error;
|
|
|
|
{ Allocate a new vnode }
|
|
error:=getnewvnode'syncer", mp, &sync_vnodeops, &vp);
|
|
if (error<>0)
|
|
panic'vfs_allocate_syncvnode: getnewvnode() failed';
|
|
vp^.v_type:=VNON;
|
|
vn_lock(vp, LK_EXCLUSIVE or LK_RETRY);
|
|
vp^.v_vflag:= or VV_FORCEINSMQ;
|
|
error:=insmntque(vp, mp);
|
|
if (error<>0)
|
|
panic'vfs_allocate_syncvnode: insmntque() failed';
|
|
vp^.v_vflag:= and ~VV_FORCEINSMQ;
|
|
VOP_UNLOCK(vp, 0);
|
|
{
|
|
* Place the vnode onto the syncer worklist. We attempt to
|
|
* scatter them about on the list so that they will go off
|
|
* at evenly distributed times even if all the filesystems
|
|
* are mounted at once.
|
|
}
|
|
next += incr;
|
|
if (next=0 or next > syncer_maxdelay) begin
|
|
start /= 2;
|
|
incr /= 2;
|
|
if (start=0) begin
|
|
start:=syncer_maxdelay div 2;
|
|
incr:=syncer_maxdelay;
|
|
end;
|
|
next:=start;
|
|
end;
|
|
bo:=@vp^.v_bufobj;
|
|
BO_LOCK(bo);
|
|
vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0);
|
|
{ XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. }
|
|
mtx_lock(@sync_mtx);
|
|
sync_vnode_count++;
|
|
if (mp^.mnt_syncer=nil) begin
|
|
mp^.mnt_syncer:=vp;
|
|
vp:=nil;
|
|
end;
|
|
mtx_unlock(@sync_mtx);
|
|
BO_UNLOCK(bo);
|
|
if (vp<>nil) begin
|
|
vn_lock(vp, LK_EXCLUSIVE or LK_RETRY);
|
|
vgone(vp);
|
|
vput(vp);
|
|
end;
|
|
end;
|
|
|
|
void
|
|
vfs_deallocate_syncvnode(mp:p_mount)
|
|
begin
|
|
vp:p_vnode;
|
|
|
|
mtx_lock(@sync_mtx);
|
|
vp:=mp^.mnt_syncer;
|
|
if (vp<>nil)
|
|
mp^.mnt_syncer:=nil;
|
|
mtx_unlock(@sync_mtx);
|
|
if (vp<>nil)
|
|
vrele(vp);
|
|
end;
|
|
|
|
{
|
|
* Do a lazy sync of the filesystem.
|
|
}
|
|
static int
|
|
sync_fsync(struct vop_fsync_args *ap)
|
|
begin
|
|
struct vnode *syncvp:=ap^.a_vp;
|
|
mp:p_mount:=syncvp^.v_mount;
|
|
int error, save;
|
|
struct bufobj *bo;
|
|
|
|
{
|
|
* We only need to do something if this is a lazy evaluation.
|
|
}
|
|
if (ap^.a_waitfor<>MNT_LAZY)
|
|
Exit(0);
|
|
|
|
{
|
|
* Move ourselves to the back of the sync list.
|
|
}
|
|
bo:=@syncvp^.v_bufobj;
|
|
BO_LOCK(bo);
|
|
vn_syncer_add_to_worklist(bo, syncdelay);
|
|
BO_UNLOCK(bo);
|
|
|
|
{
|
|
* Walk the list of vnodes pushing all that are dirty and
|
|
* not already on the sync list.
|
|
}
|
|
if (vfs_busy(mp, MBF_NOWAIT)<>0)
|
|
Exit(0);
|
|
if (vn_start_write(nil, &mp, V_NOWAIT)<>0) begin
|
|
vfs_unbusy(mp);
|
|
Exit(0);
|
|
end;
|
|
save:=curthread_pflags_set(TDP_SYNCIO);
|
|
vfs_msync(mp, MNT_NOWAIT);
|
|
error:=VFS_SYNC(mp, MNT_LAZY);
|
|
curthread_pflags_restore(save);
|
|
vn_finished_write(mp);
|
|
vfs_unbusy(mp);
|
|
Exit(error);
|
|
end;
|
|
|
|
{
|
|
* The syncer vnode is no referenced.
|
|
}
|
|
static int
|
|
sync_inactive(struct vop_inactive_args *ap)
|
|
begin
|
|
|
|
vgone(ap^.a_vp);
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* The syncer vnode is no longer needed and is being decommissioned.
|
|
*
|
|
* Modifications to the worklist must be protected by sync_mtx.
|
|
}
|
|
static int
|
|
sync_reclaim(struct vop_reclaim_args *ap)
|
|
begin
|
|
vp:p_vnode:=ap^.a_vp;
|
|
struct bufobj *bo;
|
|
|
|
bo:=@vp^.v_bufobj;
|
|
BO_LOCK(bo);
|
|
mtx_lock(@sync_mtx);
|
|
if (vp^.v_mount^.mnt_syncer=vp)
|
|
vp^.v_mount^.mnt_syncer:=nil;
|
|
if (bo^.bo_flag and BO_ONWORKLST) begin
|
|
LIST_REMOVE(bo, bo_synclist);
|
|
syncer_worklist_len--;
|
|
sync_vnode_count--;
|
|
bo^.bo_flag:= and ~BO_ONWORKLST;
|
|
end;
|
|
mtx_unlock(@sync_mtx);
|
|
BO_UNLOCK(bo);
|
|
|
|
Exit(0);
|
|
end;
|
|
}
|
|
|
|
{
|
|
* Check if vnode represents a disk device
|
|
}
|
|
function vn_isdisk(vp:p_vnode;errp:PInteger):Boolean;
|
|
var
|
|
error:Integer;
|
|
begin
|
|
error:=0;
|
|
dev_lock();
|
|
if (vp^.v_type<>VCHR) then
|
|
begin
|
|
error:=ENOTBLK
|
|
end else
|
|
if (vp^.v_rdev=nil) then
|
|
begin
|
|
error:=ENXIO
|
|
end else
|
|
if (p_cdev(vp^.v_rdev)^.si_devsw=nil) then
|
|
begin
|
|
error:=ENXIO
|
|
end else
|
|
if ((p_cdevsw(p_cdev(vp^.v_rdev)^.si_devsw)^.d_flags and D_DISK)=0) then
|
|
begin
|
|
error:=ENOTBLK;
|
|
end;
|
|
dev_unlock();
|
|
error:=ENOTBLK;
|
|
if (errp<>nil) then
|
|
begin
|
|
errp^:=error;
|
|
end;
|
|
Exit(error=0);
|
|
end;
|
|
|
|
{
|
|
* Common filesystem object access control check routine. Accepts a
|
|
* vnode's type, "mode", uid and gid, requested access mode, credentials,
|
|
* and optional call-by-reference privused argument allowing vaccess()
|
|
* to indicate to the caller whether privilege was used to satisfy the
|
|
* request (obsoleted). Returns 0 on success, or an errno on failure.
|
|
}
|
|
function vaccess(_type:vtype;
|
|
file_mode:mode_t;
|
|
file_uid:uid_t;
|
|
file_gid:gid_t;
|
|
accmode:accmode_t;
|
|
privused:PInteger):Integer;
|
|
label
|
|
privcheck;
|
|
var
|
|
dac_granted :accmode_t;
|
|
priv_granted:accmode_t;
|
|
begin
|
|
Assert((accmode and (not (VEXEC or VWRITE or VREAD or VADMIN or VAPPEND)))=0,'invalid bit in accmode');
|
|
Assert(((accmode and VAPPEND)=0) or ((accmode and VWRITE)<>0),'VAPPEND without VWRITE');
|
|
|
|
{
|
|
* Look for a normal, non-privileged way to access the file/directory
|
|
* as requested. If it exists, go with that.
|
|
}
|
|
|
|
if (privused<>nil) then
|
|
begin
|
|
privused^:=0;
|
|
end;
|
|
|
|
dac_granted:=0;
|
|
|
|
{ Check the owner. }
|
|
if {(cred^.cr_uid=file_uid)} True then
|
|
begin
|
|
dac_granted:=dac_granted or VADMIN;
|
|
if ((file_mode and S_IXUSR)<>0) then
|
|
dac_granted:=dac_granted or VEXEC;
|
|
if ((file_mode and S_IRUSR)<>0) then
|
|
dac_granted:=dac_granted or VREAD;
|
|
if ((file_mode and S_IWUSR)<>0) then
|
|
dac_granted:=dac_granted or (VWRITE or VAPPEND);
|
|
|
|
if ((accmode and dac_granted)=accmode) then
|
|
begin
|
|
Exit(0);
|
|
end;
|
|
|
|
goto privcheck;
|
|
end;
|
|
|
|
{ Otherwise, check the groups (first match) }
|
|
if {(groupmember(file_gid, cred))} True then
|
|
begin
|
|
if ((file_mode and S_IXGRP)<>0) then
|
|
dac_granted:=dac_granted or VEXEC;
|
|
if ((file_mode and S_IRGRP)<>0) then
|
|
dac_granted:=dac_granted or VREAD;
|
|
if ((file_mode and S_IWGRP)<>0) then
|
|
dac_granted:=dac_granted or (VWRITE or VAPPEND);
|
|
|
|
if ((accmode and dac_granted)=accmode) then
|
|
begin
|
|
Exit(0);
|
|
end;
|
|
|
|
goto privcheck;
|
|
end;
|
|
|
|
{ Otherwise, check everyone else. }
|
|
if ((file_mode and S_IXOTH)<>0) then
|
|
dac_granted:=dac_granted or VEXEC;
|
|
if ((file_mode and S_IROTH)<>0) then
|
|
dac_granted:=dac_granted or VREAD;
|
|
if ((file_mode and S_IWOTH)<>0) then
|
|
dac_granted:=dac_granted or (VWRITE or VAPPEND);
|
|
|
|
if ((accmode and dac_granted)=accmode) then
|
|
begin
|
|
Exit(0);
|
|
end;
|
|
|
|
privcheck:
|
|
{
|
|
* Build a privilege mask to determine if the set of privileges
|
|
* satisfies the requirements when combined with the granted mask
|
|
* from above. For each privilege, if the privilege is required,
|
|
* bitwise or the request type onto the priv_granted mask.
|
|
}
|
|
priv_granted:=0;
|
|
|
|
if (_type=VDIR) then
|
|
begin
|
|
{
|
|
* For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC
|
|
* requests, instead of PRIV_VFS_EXEC.
|
|
}
|
|
if ((accmode and VEXEC)<>0) and
|
|
((dac_granted and VEXEC)=0) {and
|
|
(priv_check_cred(cred, PRIV_VFS_LOOKUP, 0)=0)} then
|
|
begin
|
|
priv_granted:=priv_granted or VEXEC;
|
|
end;
|
|
end else
|
|
begin
|
|
{
|
|
* Ensure that at least one execute bit is on. Otherwise,
|
|
* a privileged user will always succeed, and we don't want
|
|
* this to happen unless the file really is executable.
|
|
}
|
|
if ((accmode and VEXEC)<>0) and
|
|
((dac_granted and VEXEC)=0) and
|
|
((file_mode and (S_IXUSR or S_IXGRP or S_IXOTH))<>0) {and
|
|
(priv_check_cred(cred, PRIV_VFS_EXEC, 0)=0)} then
|
|
begin
|
|
priv_granted:=priv_granted or VEXEC;
|
|
end;
|
|
end;
|
|
|
|
if ((accmode and VREAD)<>0) and
|
|
((dac_granted and VREAD)=0) {and
|
|
(priv_check_cred(cred, PRIV_VFS_READ, 0)=0)} then
|
|
begin
|
|
priv_granted:=priv_granted or VREAD;
|
|
end;
|
|
|
|
if ((accmode and VWRITE)<>0) and
|
|
((dac_granted and VWRITE)=0) {and
|
|
(priv_check_cred(cred, PRIV_VFS_WRITE, 0)=0)} then
|
|
begin
|
|
priv_granted:=priv_granted or (VWRITE or VAPPEND);
|
|
end;
|
|
|
|
if ((accmode and VADMIN)<>0) and
|
|
((dac_granted and VADMIN)=0) {and
|
|
(priv_check_cred(cred, PRIV_VFS_ADMIN, 0)=0)} then
|
|
begin
|
|
priv_granted:=priv_granted or VADMIN;
|
|
end;
|
|
|
|
if ((accmode and (priv_granted or dac_granted))=accmode) then
|
|
begin
|
|
{ XXX audit: privilege used }
|
|
if (privused<>nil) then
|
|
begin
|
|
privused^:=1;
|
|
end;
|
|
Exit(0);
|
|
end;
|
|
|
|
if ((accmode and VADMIN)<>0) then
|
|
Exit(EPERM)
|
|
else
|
|
Exit(EACCES);
|
|
end;
|
|
|
|
procedure vfs_badlock(msg,str:PChar;vp:p_vnode);
|
|
begin
|
|
Writeln(msg,' ',str);
|
|
Assert(false,RawByteString(msg)+' '+RawByteString(str));
|
|
end;
|
|
|
|
procedure assert_vi_locked(vp:p_vnode;str:PChar);
|
|
begin
|
|
if {vfs_badlock_mutex and} (not mtx_owned(VI_MTX(vp)^)) then
|
|
vfs_badlock('interlock is not locked but should be', str, vp);
|
|
end;
|
|
|
|
procedure assert_vi_unlocked(vp:p_vnode;str:PChar);
|
|
begin
|
|
if {vfs_badlock_mutex and} mtx_owned(VI_MTX(vp)^) then
|
|
vfs_badlock('interlock is locked but should not be', str, vp);
|
|
end;
|
|
|
|
procedure assert_vop_locked(vp:p_vnode;str:PChar);
|
|
var
|
|
locked:Integer;
|
|
begin
|
|
if (not IGNORE_LOCK(vp)) then
|
|
begin
|
|
locked:=VOP_ISLOCKED(vp);
|
|
if (locked=0) or (locked=LK_EXCLOTHER) then
|
|
vfs_badlock('is not locked but should be', str, vp);
|
|
end;
|
|
end;
|
|
|
|
procedure assert_vop_unlocked(vp:p_vnode;str:PChar);
|
|
begin
|
|
if (not IGNORE_LOCK(vp)) and (VOP_ISLOCKED(vp)=LK_EXCLUSIVE) then
|
|
vfs_badlock('is locked but should not be', str, vp);
|
|
end;
|
|
|
|
procedure assert_vop_elocked(vp:p_vnode;str:PChar);
|
|
begin
|
|
if (not IGNORE_LOCK(vp)) and (VOP_ISLOCKED(vp)<>LK_EXCLUSIVE) then
|
|
vfs_badlock('is not exclusive locked but should be', str, vp);
|
|
end;
|
|
|
|
function VOP_WRITE_PRE(ap:p_vop_write_args;var osize,ooffset:Int64):Integer; public;
|
|
var
|
|
va:t_vattr;
|
|
error:Integer;
|
|
begin
|
|
Result :=0;
|
|
osize :=0;
|
|
ooffset:=0;
|
|
if (not VN_KNLIST_EMPTY(ap^.a_vp)) then
|
|
begin
|
|
error:=VOP_GETATTR(ap^.a_vp, @va);
|
|
if (error<>0) then Exit(error);
|
|
ooffset:=ap^.a_uio^.uio_offset;
|
|
osize:=va.va_size;
|
|
end;
|
|
end;
|
|
|
|
procedure VOP_WRITE_POST(ap:p_vop_write_args;ret:Integer;var osize,ooffset:Int64); public;
|
|
var
|
|
noffset:Int64;
|
|
begin
|
|
noffset:=ap^.a_uio^.uio_offset;
|
|
if (noffset>ooffset) and (not VN_KNLIST_EMPTY(ap^.a_vp)) then
|
|
begin
|
|
if (noffset>osize) then
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_vp, NOTE_WRITE or NOTE_EXTEND);
|
|
end else
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_vp, NOTE_WRITE);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure vop_rename_fail(ap:p_vop_rename_args);
|
|
begin
|
|
if (ap^.a_tvp<>nil) then
|
|
vput(ap^.a_tvp);
|
|
|
|
if (ap^.a_tdvp=ap^.a_tvp) then
|
|
vrele(ap^.a_tdvp)
|
|
else
|
|
vput(ap^.a_tdvp);
|
|
|
|
vrele(ap^.a_fdvp);
|
|
vrele(ap^.a_fvp);
|
|
end;
|
|
|
|
procedure vop_rename_pre(ap:p_vop_rename_args); public;
|
|
begin
|
|
if (ap^.a_tdvp<>ap^.a_fdvp) then
|
|
vhold(ap^.a_fdvp);
|
|
|
|
if (ap^.a_tvp<>ap^.a_fvp) then
|
|
vhold(ap^.a_fvp);
|
|
|
|
vhold(ap^.a_tdvp);
|
|
|
|
if (ap^.a_tvp<>nil) then
|
|
vhold(ap^.a_tvp);
|
|
end;
|
|
|
|
procedure vop_create_post(ap:p_vop_create_args;rc:Integer); public;
|
|
begin
|
|
if (rc=0) then
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_dvp, NOTE_WRITE);
|
|
end;
|
|
end;
|
|
|
|
procedure vop_link_post(ap:p_vop_link_args;rc:Integer); public;
|
|
begin
|
|
if (rc=0) then
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_vp , NOTE_LINK);
|
|
VFS_KNOTE_LOCKED(ap^.a_tdvp, NOTE_WRITE);
|
|
end;
|
|
end;
|
|
|
|
procedure vop_mkdir_post(ap:p_vop_mkdir_args;rc:Integer); public;
|
|
begin
|
|
if (rc=0) then
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_dvp, NOTE_WRITE or NOTE_LINK);
|
|
end;
|
|
end;
|
|
|
|
procedure vop_mknod_post(ap:p_vop_mknod_args;rc:Integer); public;
|
|
begin
|
|
if (rc=0) then
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_dvp, NOTE_WRITE);
|
|
end;
|
|
end;
|
|
|
|
procedure vop_remove_post(ap:p_vop_remove_args;rc:Integer); public;
|
|
begin
|
|
if (rc=0) then
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_dvp, NOTE_WRITE);
|
|
VFS_KNOTE_LOCKED(ap^.a_vp , NOTE_DELETE);
|
|
end;
|
|
end;
|
|
|
|
procedure vop_rename_post(ap:p_vop_rename_args;rc:Integer); public;
|
|
begin
|
|
if (rc=0) then
|
|
begin
|
|
VFS_KNOTE_UNLOCKED(ap^.a_fdvp, NOTE_WRITE);
|
|
VFS_KNOTE_UNLOCKED(ap^.a_tdvp, NOTE_WRITE);
|
|
VFS_KNOTE_UNLOCKED(ap^.a_fvp , NOTE_RENAME);
|
|
if (ap^.a_tvp<>nil) then
|
|
begin
|
|
VFS_KNOTE_UNLOCKED(ap^.a_tvp, NOTE_DELETE);
|
|
end;
|
|
end;
|
|
if (ap^.a_tdvp<>ap^.a_fdvp) then
|
|
begin
|
|
vdrop(ap^.a_fdvp);
|
|
end;
|
|
if (ap^.a_tvp<>ap^.a_fvp) then
|
|
begin
|
|
vdrop(ap^.a_fvp);
|
|
end;
|
|
vdrop(ap^.a_tdvp);
|
|
if (ap^.a_tvp<>nil) then
|
|
begin
|
|
vdrop(ap^.a_tvp);
|
|
end;
|
|
end;
|
|
|
|
procedure vop_rmdir_post(ap:p_vop_rmdir_args;rc:Integer); public;
|
|
begin
|
|
if (rc=0) then
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_dvp, NOTE_WRITE or NOTE_LINK);
|
|
VFS_KNOTE_LOCKED(ap^.a_vp , NOTE_DELETE);
|
|
end;
|
|
end;
|
|
|
|
procedure vop_setattr_post(ap:p_vop_setattr_args;rc:Integer); public;
|
|
begin
|
|
if (rc=0) then
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_vp, NOTE_ATTRIB);
|
|
end;
|
|
end;
|
|
|
|
procedure vop_symlink_post(ap:p_vop_symlink_args;rc:Integer); public;
|
|
begin
|
|
if (rc=0) then
|
|
begin
|
|
VFS_KNOTE_LOCKED(ap^.a_dvp, NOTE_WRITE);
|
|
end;
|
|
end;
|
|
|
|
procedure vfs_event_init();
|
|
begin
|
|
knlist_init_mtx(@fs_knlist, nil);
|
|
end;
|
|
|
|
procedure vfs_event_signal(fsid:p_fsid;event:DWORD;data:ptrint);
|
|
begin
|
|
KNOTE_UNLOCKED(@fs_knlist, event);
|
|
end;
|
|
|
|
function filt_fsattach(kn:p_knote):Integer;
|
|
begin
|
|
kn^.kn_flags:=kn^.kn_flags or EV_CLEAR;
|
|
knlist_add(@fs_knlist, kn, 0);
|
|
Exit(0);
|
|
end;
|
|
|
|
procedure filt_fsdetach(kn:p_knote);
|
|
begin
|
|
knlist_remove(@fs_knlist, kn, 0);
|
|
end;
|
|
|
|
function filt_fsevent(kn:p_knote;hint:QWORD):Integer;
|
|
begin
|
|
kn^.kn_fflags:=kn^.kn_fflags or hint;
|
|
Exit(ord(kn^.kn_fflags<>0));
|
|
end;
|
|
|
|
{
|
|
static int
|
|
sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS)
|
|
begin
|
|
struct vfsidctl vc;
|
|
int error;
|
|
mp:p_mount;
|
|
|
|
error:=SYSCTL_IN(req, &vc, sizeof(vc));
|
|
if (error)
|
|
Exit(error);
|
|
if (vc.vc_vers<>VFS_CTL_VERS1)
|
|
Exit(EINVAL);
|
|
mp:=vfs_getvfs(@vc.vc_fsid);
|
|
if (mp=nil)
|
|
Exit(ENOENT);
|
|
{ ensure that a specific sysctl goes to the right filesystem. }
|
|
if (strcmp(vc.vc_fstypename, "*'<>0 and
|
|
strcmp(vc.vc_fstypename, mp^.mnt_vfc^.vfc_name)<>0) begin
|
|
vfs_rel(mp);
|
|
Exit(EINVAL);
|
|
end;
|
|
VCTLTOREQ(@vc, req);
|
|
error:=VFS_SYSCTL(mp, vc.vc_op, req);
|
|
vfs_rel(mp);
|
|
Exit(error);
|
|
end;
|
|
|
|
SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLTYPE_OPAQUE or CTLFLAG_WR, nil, 0, sysctl_vfs_ctl, "", "Sysctl by fsid';
|
|
|
|
{
|
|
* Function to initialize a va_filerev field sensibly.
|
|
* XXX: Wouldn't a random number make a lot more sense ??
|
|
}
|
|
u_quad_t
|
|
init_va_filerev(void)
|
|
begin
|
|
struct bintime bt;
|
|
|
|
getbinuptime(@bt);
|
|
Exit(((u_quad_t)bt.sec shl 32LL) or (bt.frac shr 32LL));
|
|
end;
|
|
}
|
|
|
|
procedure filt_vfsdetach(kn:p_knote); forward;
|
|
function filt_vfsread (kn:p_knote;hint:QWORD):Integer; forward;
|
|
function filt_vfswrite (kn:p_knote;hint:QWORD):Integer; forward;
|
|
function filt_vfsvnode (kn:p_knote;hint:QWORD):Integer; forward;
|
|
|
|
const
|
|
vfsread_filtops:t_filterops=(
|
|
f_isfd :1;
|
|
f_detach:@filt_vfsdetach;
|
|
f_event :@filt_vfsread;
|
|
);
|
|
|
|
vfswrite_filtops:t_filterops=(
|
|
f_isfd :1;
|
|
f_detach:@filt_vfsdetach;
|
|
f_event :@filt_vfswrite;
|
|
);
|
|
|
|
vfsvnode_filtops:t_filterops=(
|
|
f_isfd :1;
|
|
f_detach:@filt_vfsdetach;
|
|
f_event :@filt_vfsvnode;
|
|
);
|
|
|
|
procedure vfs_knllock(arg:Pointer);
|
|
begin
|
|
vn_lock(p_vnode(arg), LK_EXCLUSIVE or LK_RETRY,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
|
|
end;
|
|
|
|
procedure vfs_knlunlock(arg:Pointer);
|
|
begin
|
|
VOP_UNLOCK(p_vnode(arg), 0);
|
|
end;
|
|
|
|
procedure vfs_knl_assert_locked(arg:Pointer);
|
|
begin
|
|
//
|
|
end;
|
|
|
|
procedure vfs_knl_assert_unlocked(arg:Pointer);
|
|
begin
|
|
//
|
|
end;
|
|
|
|
function vfs_kqfilter(ap:p_vop_kqfilter_args):Integer;
|
|
var
|
|
vp:p_vnode;
|
|
kn:p_knote;
|
|
knl:p_knlist;
|
|
begin
|
|
vp:=ap^.a_vp;
|
|
kn:=ap^.a_kn;
|
|
|
|
case (kn^.kn_filter) of
|
|
EVFILT_READ :kn^.kn_fop:=@vfsread_filtops;
|
|
EVFILT_WRITE:kn^.kn_fop:=@vfswrite_filtops;
|
|
EVFILT_VNODE:kn^.kn_fop:=@vfsvnode_filtops;
|
|
else
|
|
Exit(EINVAL);
|
|
end;
|
|
|
|
kn^.kn_hook:=vp;
|
|
|
|
v_addpollinfo(vp);
|
|
|
|
if (vp^.v_pollinfo=nil) then
|
|
begin
|
|
Exit(ENOMEM);
|
|
end;
|
|
|
|
knl:=@vp^.v_pollinfo^.vpi_selinfo.si_note;
|
|
vhold(vp);
|
|
knlist_add(knl, kn, 0);
|
|
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* Detach knote from vnode
|
|
}
|
|
procedure filt_vfsdetach(kn:p_knote);
|
|
var
|
|
vp:p_vnode;
|
|
begin
|
|
vp:=kn^.kn_hook;
|
|
|
|
Assert(vp^.v_pollinfo<>nil, 'Missing v_pollinfo');
|
|
knlist_remove(@vp^.v_pollinfo^.vpi_selinfo.si_note, kn, 0);
|
|
vdrop(vp);
|
|
end;
|
|
|
|
{ARGSUSED}
|
|
function filt_vfsread(kn:p_knote;hint:QWORD):Integer;
|
|
var
|
|
vp:p_vnode;
|
|
va:t_vattr;
|
|
res:Integer;
|
|
begin
|
|
vp:=kn^.kn_hook;
|
|
|
|
{
|
|
* filesystem is gone, so set the EOF flag and schedule
|
|
* the knote for deletion.
|
|
}
|
|
if (hint=NOTE_REVOKE) then
|
|
begin
|
|
VI_LOCK(vp);
|
|
kn^.kn_flags:=kn^.kn_flags or (EV_EOF or EV_ONESHOT);
|
|
VI_UNLOCK(vp);
|
|
Exit(1);
|
|
end;
|
|
|
|
if (VOP_GETATTR(vp, @va)<>0) then
|
|
begin
|
|
Exit(0);
|
|
end;
|
|
|
|
VI_LOCK(vp);
|
|
kn^.kn_data:=va.va_size - p_file(kn^.kn_fp)^.f_offset;
|
|
res:=ord(kn^.kn_data<>0);
|
|
VI_UNLOCK(vp);
|
|
Exit(res);
|
|
end;
|
|
|
|
{ARGSUSED}
|
|
function filt_vfswrite(kn:p_knote;hint:QWORD):Integer;
|
|
var
|
|
vp:p_vnode;
|
|
begin
|
|
vp:=kn^.kn_hook;
|
|
|
|
VI_LOCK(vp);
|
|
|
|
{
|
|
* filesystem is gone, so set the EOF flag and schedule
|
|
* the knote for deletion.
|
|
}
|
|
if (hint=NOTE_REVOKE) then
|
|
begin
|
|
kn^.kn_flags:=kn^.kn_flags or (EV_EOF or EV_ONESHOT);
|
|
end;
|
|
|
|
kn^.kn_data:=0;
|
|
VI_UNLOCK(vp);
|
|
Exit(1);
|
|
end;
|
|
|
|
function filt_vfsvnode(kn:p_knote;hint:QWORD):Integer;
|
|
var
|
|
vp:p_vnode;
|
|
res:Integer;
|
|
begin
|
|
vp:=kn^.kn_hook;
|
|
|
|
VI_LOCK(vp);
|
|
if ((kn^.kn_sfflags and hint)<>0) then
|
|
begin
|
|
kn^.kn_fflags:=kn^.kn_fflags or hint;
|
|
end;
|
|
if (hint=NOTE_REVOKE) then
|
|
begin
|
|
kn^.kn_flags:=kn^.kn_flags or EV_EOF;
|
|
VI_UNLOCK(vp);
|
|
Exit(1);
|
|
end;
|
|
res:=ord(kn^.kn_fflags<>0);
|
|
VI_UNLOCK(vp);
|
|
Exit(res);
|
|
end;
|
|
|
|
function vfs_read_dirent(ap:p_vop_readdir_args;dp:p_dirent;off:QWORD):Integer;
|
|
var
|
|
error:Integer;
|
|
begin
|
|
if (dp^.d_reclen > ap^.a_uio^.uio_resid) then
|
|
begin
|
|
Exit(ENAMETOOLONG);
|
|
end;
|
|
|
|
error:=uiomove(dp, dp^.d_reclen, ap^.a_uio);
|
|
if (error<>0) then
|
|
begin
|
|
if (ap^.a_ncookies<>nil) then
|
|
begin
|
|
if (ap^.a_cookies<>nil) then
|
|
begin
|
|
FreeMem(ap^.a_cookies);
|
|
end;
|
|
ap^.a_cookies:=nil;
|
|
ap^.a_ncookies^:=0;
|
|
end;
|
|
Exit(error);
|
|
end;
|
|
|
|
if (ap^.a_ncookies=nil) then Exit(0);
|
|
|
|
Assert(ap^.a_cookies<>nil,'null ap^.a_cookies value with non-null ap^.a_ncookies!');
|
|
|
|
ap^.a_cookies^:=ReAllocMem(ap^.a_cookies^,(ap^.a_ncookies^ + 1) * sizeof(QWORD));
|
|
ap^.a_cookies^[ap^.a_ncookies^]:=off;
|
|
|
|
Inc(ap^.a_ncookies^);
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* Mark for update the access time of the file if the filesystem
|
|
* supports VOP_MARKATIME. This functionality is used by execve and
|
|
* mmap, so we want to avoid the I/O implied by directly setting
|
|
* va_atime for the sake of efficiency.
|
|
}
|
|
procedure vfs_mark_atime(vp:p_vnode);
|
|
var
|
|
mp:p_mount;
|
|
begin
|
|
mp:=vp^.v_mount;
|
|
VFS_ASSERT_GIANT(mp);
|
|
ASSERT_VOP_LOCKED(vp,'vfs_mark_atime');
|
|
|
|
if (mp<>nil) then
|
|
if ((mp^.mnt_flag and (MNT_NOATIME or MNT_RDONLY))=0) then
|
|
begin
|
|
VOP_MARKATIME(vp);
|
|
end;
|
|
end;
|
|
|
|
{
|
|
* The purpose of this routine is to remove granularity from accmode_t,
|
|
* reducing it into standard unix access bits - VEXEC, VREAD, VWRITE,
|
|
* VADMIN and VAPPEND.
|
|
*
|
|
* If it returns 0, the caller is supposed to continue with the usual
|
|
* access checks using 'accmode' as modified by this routine. If it
|
|
* returns nonzero value, the caller is supposed to Exitthat value
|
|
* as errno.
|
|
*
|
|
* Note that after this routine runs, accmode may be zero.
|
|
}
|
|
function vfs_unixify_accmode(accmode:p_accmode_t):Integer;
|
|
begin
|
|
{
|
|
* There is no way to specify explicit "deny" rule using
|
|
* file mode or POSIX.1e ACLs.
|
|
}
|
|
if ((accmode^ and VEXPLICIT_DENY)<>0) then
|
|
begin
|
|
accmode^:=0;
|
|
Exit(0);
|
|
end;
|
|
|
|
{
|
|
* None of these can be translated into usual access bits.
|
|
* Also, the common case for NFSv4 ACLs is to not contain
|
|
* either of these bits. Caller should check for VWRITE
|
|
* on the containing directory instead.
|
|
}
|
|
if ((accmode^ and (VDELETE_CHILD or VDELETE))<>0) then
|
|
begin
|
|
Exit(EPERM);
|
|
end;
|
|
|
|
if ((accmode^ and VADMIN_PERMS)<>0) then
|
|
begin
|
|
accmode^:=accmode^ and (not VADMIN_PERMS);
|
|
accmode^:=accmode^ or VADMIN;
|
|
end;
|
|
|
|
{
|
|
* There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL
|
|
* or VSYNCHRONIZE using file mode or POSIX.1e ACL.
|
|
}
|
|
accmode^:=accmode^ and (not (VSTAT_PERMS or VSYNCHRONIZE));
|
|
|
|
Exit(0);
|
|
end;
|
|
|
|
|
|
{
|
|
* These are helper functions for filesystems to traverse all
|
|
* their vnodes. See MNT_VNODE_FOREACH_ALL() in sys/mount.h.
|
|
*
|
|
* This interface replaces MNT_VNODE_FOREACH.
|
|
}
|
|
function __mnt_vnode_next_all(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
var
|
|
vp:p_vnode;
|
|
begin
|
|
//if (should_yield())
|
|
kern_yield(PRI_UNCHANGED);
|
|
|
|
MNT_ILOCK(mp);
|
|
Assert(mvp^^.v_mount=mp, 'marker vnode mount list mismatch');
|
|
vp:=TAILQ_NEXT(mvp^,@mvp^^.v_nmntvnodes);
|
|
while (vp<>nil) do
|
|
begin
|
|
if not ((vp^.v_type=VMARKER) or ((vp^.v_iflag and VI_DOOMED)<>0)) then Break;
|
|
vp:=TAILQ_NEXT(vp,@vp^.v_nmntvnodes);
|
|
end;
|
|
|
|
{ Check if we are done }
|
|
if (vp=nil) then
|
|
begin
|
|
__mnt_vnode_markerfree_all(mvp, mp);
|
|
{ MNT_IUNLOCK(mp); -- done in above function }
|
|
Exit(nil);
|
|
end;
|
|
TAILQ_REMOVE(@mp^.mnt_nvnodelist,mvp^,@mvp^^.v_nmntvnodes);
|
|
TAILQ_INSERT_AFTER(@mp^.mnt_nvnodelist,vp,mvp^,@mvp^^.v_nmntvnodes);
|
|
VI_LOCK(vp);
|
|
MNT_IUNLOCK(mp);
|
|
Exit(vp);
|
|
end;
|
|
|
|
function __mnt_vnode_first_all(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
var
|
|
vp:p_vnode;
|
|
begin
|
|
mvp^:=AllocMem(sizeof(t_vnode));
|
|
MNT_ILOCK(mp);
|
|
MNT_REF(mp);
|
|
mvp^^.v_type:=VMARKER;
|
|
|
|
vp:=TAILQ_FIRST(@mp^.mnt_nvnodelist);
|
|
while (vp<>nil) and
|
|
((vp^.v_type=VMARKER) or ((vp^.v_iflag and VI_DOOMED)<>0)) do
|
|
begin
|
|
vp:=TAILQ_NEXT(vp,@vp^.v_nmntvnodes);
|
|
end;
|
|
|
|
{ Check if we are done }
|
|
if (vp=nil) then
|
|
begin
|
|
MNT_REL(mp);
|
|
MNT_IUNLOCK(mp);
|
|
FreeMem(mvp^);
|
|
mvp^:=nil;
|
|
Exit(nil);
|
|
end;
|
|
mvp^^.v_mount:=mp;
|
|
TAILQ_INSERT_AFTER(@mp^.mnt_nvnodelist,vp,mvp^,@mvp^^.v_nmntvnodes);
|
|
VI_LOCK(vp);
|
|
MNT_IUNLOCK(mp);
|
|
Exit(vp);
|
|
end;
|
|
|
|
procedure __mnt_vnode_markerfree_all(mvp:pp_vnode;mp:p_mount);
|
|
begin
|
|
if (mvp^=nil) then
|
|
begin
|
|
MNT_IUNLOCK(mp);
|
|
Exit;
|
|
end;
|
|
|
|
mtx_assert(MNT_MTX(mp)^);
|
|
|
|
Assert(mvp^^.v_mount=mp, 'marker vnode mount list mismatch');
|
|
TAILQ_REMOVE(@mp^.mnt_nvnodelist,mvp^,@mvp^^.v_nmntvnodes);
|
|
MNT_REL(mp);
|
|
MNT_IUNLOCK(mp);
|
|
FreeMem(mvp^);
|
|
mvp^:=nil;
|
|
end;
|
|
|
|
{
|
|
* These are helper functions for filesystems to traverse their
|
|
* active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h
|
|
}
|
|
procedure mnt_vnode_markerfree_active(mvp:pp_vnode;mp:p_mount);
|
|
begin
|
|
Assert(mvp^^.v_mount=mp, 'marker vnode mount list mismatch');
|
|
|
|
MNT_ILOCK(mp);
|
|
MNT_REL(mp);
|
|
MNT_IUNLOCK(mp);
|
|
FreeMem(mvp^);
|
|
mvp^:=nil;
|
|
end;
|
|
|
|
function mnt_vnode_next_active(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
label
|
|
restart;
|
|
var
|
|
vp,nvp:p_vnode;
|
|
begin
|
|
mtx_assert(vnode_free_list_mtx);
|
|
Assert(mvp^^.v_mount=mp, 'marker vnode mount list mismatch');
|
|
restart:
|
|
vp:=TAILQ_NEXT(mvp^,@mvp^^.v_actfreelist);
|
|
TAILQ_REMOVE(@mp^.mnt_activevnodelist,mvp^,@mvp^^.v_actfreelist);
|
|
while (vp<>nil) do
|
|
begin
|
|
if (vp^.v_type=VMARKER) then
|
|
begin
|
|
vp:=TAILQ_NEXT(vp,@vp^.v_actfreelist);
|
|
continue;
|
|
end;
|
|
if (not VI_TRYLOCK(vp)) then
|
|
begin
|
|
continue;
|
|
end;
|
|
Assert(vp^.v_type<>VMARKER, 'locked marker %p');
|
|
Assert((vp^.v_mount=mp) or (vp^.v_mount=nil),'alien vnode on the active list %p %p');
|
|
if (vp^.v_mount=mp) and ((vp^.v_iflag and VI_DOOMED)=0) then
|
|
begin
|
|
break;
|
|
end;
|
|
nvp:=TAILQ_NEXT(vp,@vp^.v_actfreelist);
|
|
VI_UNLOCK(vp);
|
|
vp:=nvp;
|
|
end;
|
|
|
|
{ Check if we are done }
|
|
if (vp=nil) then
|
|
begin
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
mnt_vnode_markerfree_active(mvp, mp);
|
|
Exit(nil);
|
|
end;
|
|
TAILQ_INSERT_AFTER(@mp^.mnt_activevnodelist,vp,mvp^,@mvp^^.v_actfreelist);
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
ASSERT_VI_LOCKED(vp, 'active iter');
|
|
Assert((vp^.v_iflag and VI_ACTIVE)<>0, 'Non-active vp %p');
|
|
Exit(vp);
|
|
end;
|
|
|
|
function __mnt_vnode_next_active(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
begin
|
|
//if (should_yield())
|
|
kern_yield(PRI_UNCHANGED);
|
|
mtx_lock(vnode_free_list_mtx);
|
|
Exit(mnt_vnode_next_active(mvp, mp));
|
|
end;
|
|
|
|
function __mnt_vnode_first_active(mvp:pp_vnode;mp:p_mount):p_vnode;
|
|
var
|
|
vp:p_vnode;
|
|
begin
|
|
mvp^:=AllocMem(sizeof(t_vnode));
|
|
MNT_ILOCK(mp);
|
|
MNT_REF(mp);
|
|
MNT_IUNLOCK(mp);
|
|
mvp^^.v_type:=VMARKER;
|
|
mvp^^.v_mount:=mp;
|
|
|
|
mtx_lock(vnode_free_list_mtx);
|
|
vp:=TAILQ_FIRST(@mp^.mnt_activevnodelist);
|
|
if (vp=nil) then
|
|
begin
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
mnt_vnode_markerfree_active(mvp, mp);
|
|
Exit(nil);
|
|
end;
|
|
TAILQ_INSERT_BEFORE(vp, mvp^,@mvp^^.v_actfreelist);
|
|
Exit(mnt_vnode_next_active(mvp, mp));
|
|
end;
|
|
|
|
procedure __mnt_vnode_markerfree_active(mvp:pp_vnode;mp:p_mount);
|
|
begin
|
|
if (mvp^=nil) then Exit;
|
|
|
|
mtx_lock(vnode_free_list_mtx);
|
|
TAILQ_REMOVE(@mp^.mnt_activevnodelist,mvp^,@mvp^^.v_actfreelist);
|
|
mtx_unlock(vnode_free_list_mtx);
|
|
mnt_vnode_markerfree_active(mvp, mp);
|
|
end;
|
|
|
|
|
|
end.
|
|
|