FPPS4/sys/vfs/vfs_vnops.pas

1381 lines
29 KiB
Plaintext

unit vfs_vnops;
{$mode ObjFPC}{$H+}
{$CALLING SysV_ABI_CDecl}
interface
uses
sys_event,
kern_param,
vmount,
vnamei,
vfile,
vstat,
vuio,
vm,
vmparam,
vfilio,
vnode;
function vn_lock(vp:p_vnode;flags:Integer;_file:PChar;line:Integer):Integer;
function vn_open(ndp:p_nameidata;
flagp:PInteger;
cmode:Integer;
fp:p_file):Integer;
function vn_open_cred(ndp:p_nameidata;
flagp:PInteger;
cmode:Integer;
vn_open_flags:DWORD;
fp:p_file):Integer;
function vn_writechk(vp:p_vnode):Integer;
function vn_start_write(vp:p_vnode;mpp:pp_mount;flags:Integer):Integer;
procedure vn_finished_write(mp:p_mount);
function vn_close(vp:p_vnode;flags:Integer):Integer;
function vn_stat(vp:p_vnode;sb:p_stat):Integer;
function vn_io_fault(fp:p_file;uio:p_uio;flags:Integer):Integer;
function vn_truncate(fp:p_file;length:Int64):Integer;
function vn_ioctl(fp:p_file;com:QWORD;data:Pointer):Integer;
function vn_poll(fp:p_file;events:Integer):Integer;
function vn_statfile(fp:p_file;sb:p_stat):Integer;
function vn_closefile(fp:p_file):Integer;
function vn_chmod(fp:p_file;mode:mode_t):Integer;
function vn_chown(fp:p_file;uid:uid_t;gid:gid_t):Integer;
function vn_kqfilter(fp:p_file;kn:p_knote):Integer;
const
vnops:fileops=(
fo_read :@vn_io_fault;
fo_write :@vn_io_fault;
fo_truncate:@vn_truncate;
fo_ioctl :@vn_ioctl;
fo_poll :@vn_poll;
fo_kqfilter:@vn_kqfilter;
fo_stat :@vn_statfile;
fo_close :@vn_closefile;
fo_chmod :@vn_chmod;
fo_chown :@vn_chown;
fo_flags :DFLAG_PASSABLE or DFLAG_SEEKABLE
);
function foffset_get(fp:p_file):Int64;
function foffset_lock(fp:p_file;flags:Integer):Int64;
procedure foffset_unlock(fp:p_file;val:Int64;flags:Integer);
procedure foffset_lock_uio(fp:p_file;uio:p_uio;flags:Integer);
procedure foffset_unlock_uio(fp:p_file;uio:p_uio;flags:Integer);
//
procedure vref(vp:p_vnode); external;
//
implementation
uses
sysutils,
errno,
vnode_if,
vfcntl,
vfs_subr,
vfs_syscalls,
kern_thr,
systm,
kern_mtx,
kern_mtxpool,
kern_descrip;
function vn_lock(vp:p_vnode;flags:Integer;_file:PChar;line:Integer):Integer;
begin
Assert((flags and LK_TYPE_MASK)<>0,'vn_lock called with no locktype.');
repeat
Result:=VOP_LOCK(vp,flags,_file,line);
flags:=flags and (not LK_INTERLOCK); //Interlock is always dropped.
if ((flags and LK_RETRY)<>0) and (Result<>0) then
begin
Assert(false,'LK_RETRY set with incompatible flags (0x'+HexStr(flags,4)+') or an error occured ('+IntToStr(Result)+')');
end;
if (Result=0) and
((vp^.v_iflag and VI_DOOMED)<>0) and
((flags and LK_RETRY)=0) then
begin
VOP_UNLOCK(vp,0);
Result:=ENOENT;
break;
end;
until ((flags and LK_RETRY)<>0) or (Result=0);
end;
function vn_open(ndp:p_nameidata;
flagp:PInteger;
cmode:Integer;
fp:p_file):Integer;
begin
Result:=vn_open_cred(ndp, flagp, cmode, 0, fp);
end;
{
* Common code for vnode open operations.
* Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
*
* Note that this does NOT free nameidata for the successful case,
* due to the NDINIT being done elsewhere.
}
function vn_open_cred(ndp:p_nameidata;
flagp:PInteger;
cmode:Integer;
vn_open_flags:DWORD;
fp:p_file):Integer;
label
restart,
bad;
var
vp:p_vnode;
mp:p_mount;
vat:t_vattr;
vap:p_vattr;
ofmode,fmode,error:Integer;
accmode:accmode_t;
mps:Integer;
vfslocked:Integer;
begin
vap:=@vat;
mps:=ndp^.ni_cnd.cn_flags and MPSAFE;
restart:
vfslocked:=0;
ofmode:=flagp^;
fmode:=ofmode;
if ((fmode and O_CREAT)<>0) then
begin
ndp^.ni_cnd.cn_nameiop:=CREATE;
ndp^.ni_cnd.cn_flags:=ISOPEN or LOCKPARENT or LOCKLEAF or MPSAFE;
if ((fmode and O_EXCL)=0) and ((fmode and O_NOFOLLOW)=0) then
begin
ndp^.ni_cnd.cn_flags:=ndp^.ni_cnd.cn_flags or FOLLOW;
end;
if ((vn_open_flags and VN_OPEN_NOAUDIT)=0) then
begin
ndp^.ni_cnd.cn_flags:=ndp^.ni_cnd.cn_flags or AUDITVNODE1;
end;
//bwillwrite();
error:=nd_namei(ndp);
if (error<>0) then
begin
Exit(error);
end;
vfslocked:=NDHASGIANT(ndp);
if (mps=0) then
begin
ndp^.ni_cnd.cn_flags:=ndp^.ni_cnd.cn_flags and (not MPSAFE);
end;
if (ndp^.ni_vp=nil) then
begin
vattr_null(vap);
vap^.va_type:=VREG;
vap^.va_mode:=cmode;
if ((fmode and O_EXCL)<>0) then
begin
vap^.va_vaflags:=vap^.va_vaflags or VA_EXCLUSIVE;
end;
if (vn_start_write(ndp^.ni_dvp, @mp, V_NOWAIT)<>0) then
begin
NDFREE(ndp, NDF_ONLY_PNBUF);
vput(ndp^.ni_dvp);
VFS_UNLOCK_GIANT(ord(vfslocked));
error:=vn_start_write(nil, @mp, V_XSLEEP or PCATCH);
if (error<>0) then Exit(error);
goto restart;
end;
//error:=mac_vnode_check_create(cred, ndp^.ni_dvp, &ndp^.ni_cnd, vap);
//if (error=0) then
error:=VOP_CREATE(ndp^.ni_dvp, @ndp^.ni_vp, @ndp^.ni_cnd, vap);
vput(ndp^.ni_dvp);
vn_finished_write(mp);
if (error<>0) then
begin
VFS_UNLOCK_GIANT(ord(vfslocked));
NDFREE(ndp, NDF_ONLY_PNBUF);
Exit(error);
end;
fmode:=fmode and (not O_TRUNC);
vp:=ndp^.ni_vp;
end else
begin
if (ndp^.ni_dvp=ndp^.ni_vp) then
vrele(ndp^.ni_dvp)
else
vput(ndp^.ni_dvp);
ndp^.ni_dvp:=nil;
vp:=ndp^.ni_vp;
if ((fmode and O_EXCL)<>0) then
begin
error:=EEXIST;
goto bad;
end;
fmode:=fmode and (not O_CREAT);
end;
end else
begin
ndp^.ni_cnd.cn_nameiop:=LOOKUP;
ndp^.ni_cnd.cn_flags:=ISOPEN or LOCKLEAF or MPSAFE;
if ((fmode and O_NOFOLLOW)<>0) then
begin
ndp^.ni_cnd.cn_flags:=ndp^.ni_cnd.cn_flags or NOFOLLOW;
end else
begin
ndp^.ni_cnd.cn_flags:=ndp^.ni_cnd.cn_flags or FOLLOW;
end;
if ((fmode and FWRITE)=0) then
begin
ndp^.ni_cnd.cn_flags:=ndp^.ni_cnd.cn_flags or LOCKSHARED;
end;
if ((vn_open_flags and VN_OPEN_NOAUDIT)=0) then
begin
ndp^.ni_cnd.cn_flags:=ndp^.ni_cnd.cn_flags or AUDITVNODE1;
end;
error:=nd_namei(ndp);
if (error<>0) then
begin
Exit(error);
end;
if (mps=0) then
begin
ndp^.ni_cnd.cn_flags:=ndp^.ni_cnd.cn_flags and (not MPSAFE);
end;
vfslocked:=NDHASGIANT(ndp);
vp:=ndp^.ni_vp;
end;
case vp^.v_type of
VLNK:
begin
error:=EMLINK;
goto bad;
end;
VSOCK:
begin
error:=EOPNOTSUPP;
goto bad;
end;
VDIR:
if ((fmode and (FWRITE or O_TRUNC))<>0) then
begin
error:=EISDIR;
goto bad;
end;
else
if ((fmode and O_DIRECTORY)<>0) then
begin
error:=ENOTDIR;
goto bad;
end;
end;
accmode:=0;
if ((fmode and (FWRITE or O_TRUNC))<>0) then
begin
accmode:=accmode or VWRITE;
end;
if ((fmode and FREAD)<>0) then
begin
accmode:=accmode or VREAD;
end;
if ((fmode and FEXEC)<>0) then
begin
accmode:=accmode or VEXEC;
end;
if ((fmode and O_APPEND)<>0) and ((fmode and FWRITE)<>0) then
begin
accmode:=accmode or VAPPEND;
end;
//error:=mac_vnode_check_open(cred, vp, accmode);
//if (error) then
// goto bad;
if ((fmode and O_CREAT)=0) then
begin
if ((accmode and VWRITE)<>0) then
begin
error:=vn_writechk(vp);
if (error<>0) then goto bad;
end;
if (accmode<>0) then
begin
error:=VOP_ACCESS(vp, accmode);
if (error<>0) then goto bad;
end;
end;
if (vp^.v_type=VFIFO) and (VOP_ISLOCKED(vp)<>LK_EXCLUSIVE) then
begin
vn_lock(vp, LK_UPGRADE or LK_RETRY,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
end;
error:=VOP_OPEN(vp, ofmode, fp);
if (error<>0) then goto bad;
if ((fmode and FWRITE)<>0) then
begin
VOP_ADD_WRITECOUNT(vp, 1);
end;
flagp^:=fmode;
ASSERT_VOP_LOCKED(vp, 'vn_open_cred');
if (mps=0) then
begin
VFS_UNLOCK_GIANT(ord(vfslocked));
end;
Exit(0);
bad:
NDFREE(ndp, NDF_ONLY_PNBUF);
vput(vp);
VFS_UNLOCK_GIANT(ord(vfslocked));
flagp^:=fmode;
ndp^.ni_vp:=nil;
Exit(error);
end;
function vn_writechk(vp:p_vnode):Integer;
begin
ASSERT_VOP_LOCKED(vp, 'vn_writechk');
Exit(0);
end;
function vn_start_write_locked(mp:p_mount;flags:Integer):Integer;
label
unlock;
var
td:p_kthread;
error:Integer;
begin
mtx_assert(MNT_MTX(mp)^);
error:=0;
td:=curkthread;
{
* Check on status of suspension.
}
if (td<>nil) then
if ((td^.td_pflags and TDP_IGNSUSP)=0) or
(mp^.mnt_susp_owner<>td) then
begin
while ((mp^.mnt_kern_flag and MNTK_SUSPEND)<>0) do
begin
if ((flags and V_NOWAIT)<>0) then
begin
error:=EWOULDBLOCK;
goto unlock;
end;
error:=msleep(@mp^.mnt_flag, MNT_MTX(mp), (PUSER - 1) or (flags and PCATCH), 'suspfs', 0);
if (error<>0) then
begin
goto unlock;
end;
end;
end;
if ((flags and V_XSLEEP)<>0) then
begin
goto unlock;
end;
Inc(mp^.mnt_writeopcount);
unlock:
if (error<>0) or ((flags and V_XSLEEP)<>0) then
begin
MNT_REL(mp);
end;
MNT_IUNLOCK(mp);
Exit(error);
end;
function vn_start_write(vp:p_vnode;mpp:pp_mount;flags:Integer):Integer;
var
mp:p_mount;
error:Integer;
begin
error:=0;
{
* If a vnode is provided, get and Exit the mount point that
* to which it will write.
}
if (vp<>nil) then
begin
error:=VOP_GETWRITEMOUNT(vp, mpp);
if (error<>0) then
begin
mpp^:=nil;
if (error<>EOPNOTSUPP) then
begin
Exit(error);
end;
Exit(0);
end;
end;
mp:=mpp^;
if (mp=nil) then
begin
Exit(0);
end;
{
* VOP_GETWRITEMOUNT() Exits with the mp refcount held through
* a vfs_ref().
* As long as a vnode is not provided we need to acquire a
* refcount for the provided mountpoint too, in order to
* emulate a vfs_ref().
}
MNT_ILOCK(mp);
if (vp=nil) then
begin
MNT_REF(mp);
end;
Exit(vn_start_write_locked(mp, flags));
end;
procedure vn_finished_write(mp:p_mount);
begin
if (mp=nil) then Exit;
MNT_ILOCK(mp);
MNT_REL(mp);
Dec(mp^.mnt_writeopcount);
if (mp^.mnt_writeopcount < 0) then
begin
Assert(false,'vn_finished_write: neg cnt');
end;
if ((mp^.mnt_kern_flag and MNTK_SUSPEND)<>0) and (mp^.mnt_writeopcount<=0) then
begin
wakeup(@mp^.mnt_writeopcount);
end;
MNT_IUNLOCK(mp);
end;
function vn_close(vp:p_vnode;flags:Integer):Integer;
var
mp:p_mount;
error, lock_flags:Integer;
begin
lock_flags:=LK_EXCLUSIVE;
if (vp^.v_type<>VFIFO) and
((flags and FWRITE)=0) and
(vp^.v_mount<>nil) then
if ((p_mount(vp^.v_mount)^.mnt_kern_flag and MNTK_EXTENDED_SHARED)<>0) then
begin
lock_flags:=LK_SHARED;
end;
VFS_ASSERT_GIANT(vp^.v_mount);
vn_start_write(vp, @mp, V_WAIT);
vn_lock(vp, lock_flags or LK_RETRY,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
if ((flags and FWRITE)<>0) then
begin
Assert(vp^.v_writecount > 0,'vn_close: negative writecount');
VOP_ADD_WRITECOUNT(vp, -1);
end;
error:=VOP_CLOSE(vp, flags);
vput(vp);
vn_finished_write(mp);
Exit(error);
end;
function vn_stat(vp:p_vnode;sb:p_stat):Integer;
var
vattr:t_vattr;
vap:p_vattr;
error:Integer;
mode:WORD;
begin
//error:=mac_vnode_check_stat(active_cred, file_cred, vp);
//if (error<>0) then
// Exit(error);
vap:=@vattr;
{
* Initialize defaults for new and unusual fields, so that file
* systems which don't support these fields don't need to know
* about them.
}
vap^.va_birthtime.tv_sec :=-1;
vap^.va_birthtime.tv_nsec:=0;
vap^.va_fsid:=VNOVAL;
vap^.va_rdev:=NODEV;
error:=VOP_GETATTR(vp, vap);
if (error<>0) then
begin
Exit(error);
end;
{
* Zero the spare stat fields
}
sb^:=Default(t_stat);
{
* Copy from vattr table
}
if (vap^.va_fsid<>VNOVAL) then
sb^.st_dev:=vap^.va_fsid
else
sb^.st_dev:=p_mount(vp^.v_mount)^.mnt_stat.f_fsid.val[0];
sb^.st_ino:=vap^.va_fileid;
mode:=vap^.va_mode;
case vap^.va_type of
VREG:mode:=mode or S_IFREG;
VDIR:mode:=mode or S_IFDIR;
VBLK:mode:=mode or S_IFBLK;
VCHR:mode:=mode or S_IFCHR;
VLNK:mode:=mode or S_IFLNK;
VSOCK:mode:=mode or S_IFSOCK;
VFIFO:mode:=mode or S_IFIFO;
else
Exit(EBADF);
end;
sb^.st_mode :=mode;
sb^.st_nlink:=vap^.va_nlink;
sb^.st_uid :=vap^.va_uid;
sb^.st_gid :=vap^.va_gid;
sb^.st_rdev :=vap^.va_rdev;
if (vap^.va_size > High(Int64)) then
begin
Exit(EOVERFLOW);
end;
sb^.st_size:=vap^.va_size;
sb^.st_atim:=vap^.va_atime;
sb^.st_mtim:=vap^.va_mtime;
sb^.st_ctim:=vap^.va_ctime;
sb^.st_birthtim:=vap^.va_birthtime;
{
* According to www.opengroup.org, the meaning of st_blksize is
* "a filesystem-specific preferred I/O block size for this
* object. In some filesystem types, this may vary from file
* to file"
* Use miminum/default of PAGE_SIZE (e.g. for VCHR).
}
if (PAGE_SIZE>vap^.va_blocksize) then
begin
sb^.st_blksize:=PAGE_SIZE;
end else
begin
sb^.st_blksize:=vap^.va_blocksize;
end;
sb^.st_flags:=vap^.va_flags;
//if (priv_check(td, PRIV_VFS_GENERATION)) then
// sb^.st_gen:=0;
//else
sb^.st_gen:=vap^.va_gen;
sb^.st_blocks:=vap^.va_bytes div S_BLKSIZE;
Exit(0);
end;
function foffset_get(fp:p_file):Int64; inline;
begin
Result:=(foffset_lock(fp, FOF_NOLOCK));
end;
function foffset_lock(fp:p_file;flags:Integer):Int64;
var
mtxp:p_mtx;
begin
Result:=0;
Assert((flags and FOF_OFFSET)=0, 'FOF_OFFSET passed');
mtxp:=mtx_pool_find(mtxpool_sleep, fp);
mtx_lock(mtxp^);
if ((flags and FOF_NOLOCK)=0) then
begin
while ((fp^.f_vnread_flags and FOFFSET_LOCKED)<>0) do
begin
fp^.f_vnread_flags:=fp^.f_vnread_flags or FOFFSET_LOCK_WAITING;
msleep(@fp^.f_vnread_flags, mtxp, PUSER, 'vofflock', 0);
end;
fp^.f_vnread_flags:=fp^.f_vnread_flags or FOFFSET_LOCKED;
end;
Result:=fp^.f_offset;
mtx_unlock(mtxp^);
end;
procedure foffset_unlock(fp:p_file;val:Int64;flags:Integer);
var
mtxp:p_mtx;
begin
Assert((flags and FOF_OFFSET)=0,'FOF_OFFSET passed');
mtxp:=mtx_pool_find(mtxpool_sleep, fp);
mtx_lock(mtxp^);
if ((flags and FOF_NOUPDATE)=0) then
begin
fp^.f_offset:=val;
end;
if ((flags and FOF_NEXTOFF)<>0) then
begin
fp^.f_nextoff:=val;
end;
if ((flags and FOF_NOLOCK)=0) then
begin
Assert((fp^.f_vnread_flags and FOFFSET_LOCKED)<>0,'Lost FOFFSET_LOCKED');
if ((fp^.f_vnread_flags and FOFFSET_LOCK_WAITING)<>0) then
begin
wakeup(@fp^.f_vnread_flags);
end;
fp^.f_vnread_flags:=0;
end;
mtx_unlock(mtxp^);
end;
procedure foffset_lock_uio(fp:p_file;uio:p_uio;flags:Integer);
begin
if ((flags and FOF_OFFSET)=0) then
begin
uio^.uio_offset:=foffset_lock(fp, flags);
end;
end;
procedure foffset_unlock_uio(fp:p_file;uio:p_uio;flags:Integer);
begin
if ((flags and FOF_OFFSET)=0) then
begin
foffset_unlock(fp, uio^.uio_offset, flags);
end;
end;
{
* Heuristic to detect sequential operation.
}
function sequential_heuristic(uio:p_uio;fp:p_file):Integer;
begin
if ((fp^.f_flag and FRDAHEAD)<>0) then
begin
Exit(fp^.f_seqcount shl IO_SEQSHIFT);
end;
{
* Offset 0 is handled specially. open() sets f_seqcount to 1 so
* that the first I/O is normally considered to be slightly
* sequential. Seeking to offset 0 doesn't change sequentiality
* unless previous seeks have reduced f_seqcount to 0, in which
* case offset 0 is not special.
}
if ((uio^.uio_offset=0) and (fp^.f_seqcount > 0)) or
(uio^.uio_offset=fp^.f_nextoff) then
begin
{
* f_seqcount is in units of fixed-size blocks so that it
* depends mainly on the amount of sequential I/O and not
* much on the number of sequential I/O's. The fixed size
* of 16384 is hard-coded here since it is (not quite) just
* a magic size that works well here. This size is more
* closely related to the best I/O size for real disks than
* to any block size used by software.
}
Inc(fp^.f_seqcount,((uio^.uio_resid+(16384 - 1)) div 16384));
if (fp^.f_seqcount > IO_SEQMAX) then
begin
fp^.f_seqcount:=IO_SEQMAX;
end;
Exit(fp^.f_seqcount shl IO_SEQSHIFT);
end;
{ Not sequential. Quickly draw-down sequentiality. }
if (fp^.f_seqcount > 1) then
fp^.f_seqcount:=1
else
fp^.f_seqcount:=0;
Exit(0);
end;
function get_advice(fp:p_file;uio:p_uio):Integer;
var
mtxp:p_mtx;
f_advice:p_fadvise_info;
ret:Integer;
begin
ret:=POSIX_FADV_NORMAL;
if (fp^.f_advice=nil) then Exit(ret);
mtxp:=mtx_pool_find(mtxpool_sleep, fp);
mtx_lock(mtxp^);
f_advice:=fp^.f_advice;
if (uio^.uio_offset >= f_advice^.fa_start) and
(uio^.uio_offset + uio^.uio_resid <= f_advice^.fa_end) then
begin
ret:=f_advice^.fa_advice;
end;
mtx_unlock(mtxp^);
Exit(ret);
end;
{
* File table vnode read routine.
}
function vn_read(fp:p_file;uio:p_uio;flags:Integer):Integer;
var
td:p_kthread;
vp:p_vnode;
mtxp:p_mtx;
error,ioflag:Integer;
advice,vfslocked:Integer;
offset,start,__end:Int64;
f_advice:p_fadvise_info;
begin
td:=curkthread;
Assert(uio^.uio_td=td, 'uio_td %p is not td %p');
Assert((flags and FOF_OFFSET)<>0, 'No FOF_OFFSET');
vp:=fp^.f_vnode;
ioflag:=0;
if ((fp^.f_flag and FNONBLOCK)<>0) then
begin
ioflag:=ioflag or IO_NDELAY;
end;
if ((fp^.f_flag and O_DIRECT)<>0) then
begin
ioflag:=ioflag or IO_DIRECT;
end;
advice:=get_advice(fp, uio);
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
vn_lock(vp, LK_SHARED or LK_RETRY,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
case advice of
POSIX_FADV_NORMAL,
POSIX_FADV_SEQUENTIAL,
POSIX_FADV_NOREUSE:
ioflag:=ioflag or sequential_heuristic(uio, fp);
POSIX_FADV_RANDOM:;
{ Disable read-ahead for random I/O. }
else;
end;
offset:=uio^.uio_offset;
//error:=mac_vnode_check_read(active_cred, fp^.f_cred, vp);
//if (error=0) then
error:=VOP_READ(vp, uio, ioflag);
fp^.f_nextoff:=uio^.uio_offset;
VOP_UNLOCK(vp, 0);
if (error=0) and
(advice=POSIX_FADV_NOREUSE) and
(offset<>uio^.uio_offset) then
begin
start:=offset;
__end:=uio^.uio_offset - 1;
mtxp:=mtx_pool_find(mtxpool_sleep, fp);
mtx_lock(mtxp^);
f_advice:=fp^.f_advice;
if (f_advice<>nil) then
if (f_advice^.fa_advice=POSIX_FADV_NOREUSE) then
begin
if (start<>0) and (f_advice^.fa_prevend + 1=start) then
begin
start:=f_advice^.fa_prevstart;
end else
if (f_advice^.fa_prevstart<>0) and
(f_advice^.fa_prevstart=__end + 1) then
begin
__end:=f_advice^.fa_prevend;
end;
f_advice^.fa_prevstart:=start;
f_advice^.fa_prevend :=__end;
end;
mtx_unlock(mtxp^);
end;
VFS_UNLOCK_GIANT(vfslocked);
Exit(error);
end;
{
* File table vnode write routine.
}
function vn_write(fp:p_file;uio:p_uio;flags:Integer):Integer;
label
unlock;
var
td:p_kthread;
vp:p_vnode;
mp:p_mount;
mtxp:p_mtx;
error,ioflag,lock_flags:Integer;
advice,vfslocked:Integer;
offset,start,__end:Int64;
f_advice:p_fadvise_info;
begin
td:=curkthread;
Assert(uio^.uio_td=td, 'uio_td %p is not td %p');
Assert((flags and FOF_OFFSET)<>0, 'No FOF_OFFSET');
vp:=fp^.f_vnode;
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
//if (vp^.v_type=VREG) then
// bwillwrite();
ioflag:=IO_UNIT;
if (vp^.v_type=VREG) and ((fp^.f_flag and O_APPEND)<>0) then
begin
ioflag:=ioflag or IO_APPEND;
end;
if ((fp^.f_flag and FNONBLOCK)<>0) then
begin
ioflag:=ioflag or IO_NDELAY;
end;
if ((fp^.f_flag and O_DIRECT)<>0) then
begin
ioflag:=ioflag or IO_DIRECT;
end;
if ((fp^.f_flag and O_FSYNC)<>0) then
begin
ioflag:=ioflag or IO_SYNC;
end;
if (vp^.v_mount<>nil) then
if ((p_mount(vp^.v_mount)^.mnt_flag and MNT_SYNCHRONOUS)<>0) then
begin
ioflag:=ioflag or IO_SYNC;
end;
mp:=nil;
if (vp^.v_type<>VCHR) then
begin
error:=vn_start_write(vp, @mp, V_WAIT or PCATCH);
if (error<>0) then
goto unlock;
end;
advice:=get_advice(fp, uio);
if (MNT_SHARED_WRITES(mp) or
((mp=nil) and MNT_SHARED_WRITES(vp^.v_mount))) and
((flags and FOF_OFFSET)<>0) then
begin
lock_flags:=LK_SHARED;
end else
begin
lock_flags:=LK_EXCLUSIVE;
end;
vn_lock(vp, lock_flags or LK_RETRY,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
case advice of
POSIX_FADV_NORMAL,
POSIX_FADV_SEQUENTIAL,
POSIX_FADV_NOREUSE:
ioflag:=ioflag or sequential_heuristic(uio, fp);
POSIX_FADV_RANDOM:;
{ XXX: Is this correct? }
end;
offset:=uio^.uio_offset;
//error:=mac_vnode_check_write(active_cred, fp^.f_cred, vp);
//if (error=0) then
error:=VOP_WRITE(vp, uio, ioflag);
fp^.f_nextoff:=uio^.uio_offset;
VOP_UNLOCK(vp, 0);
if (vp^.v_type<>VCHR) then
vn_finished_write(mp);
if (error=0) and
(advice=POSIX_FADV_NOREUSE) and
(offset<>uio^.uio_offset) then
begin
start:=offset;
__end:=uio^.uio_offset - 1;
mtxp:=mtx_pool_find(mtxpool_sleep, fp);
mtx_lock(mtxp^);
f_advice:=fp^.f_advice;
if (f_advice<>nil) then
if (f_advice^.fa_advice=POSIX_FADV_NOREUSE) then
begin
if (start<>0) and (f_advice^.fa_prevend + 1=start) then
begin
start:=f_advice^.fa_prevstart;
end else
if (f_advice^.fa_prevstart<>0) and
(f_advice^.fa_prevstart=__end + 1) then
begin
__end:=f_advice^.fa_prevend;
end;
f_advice^.fa_prevstart:=start;
f_advice^.fa_prevend :=__end;
end;
mtx_unlock(mtxp^);
end;
unlock:
VFS_UNLOCK_GIANT(vfslocked);
Exit(error);
end;
const
vn_io_fault_enable:Boolean=False;
function vn_io_fault(fp:p_file;uio:p_uio;flags:Integer):Integer;
label
out_last;
var
//td:p_kthread;
//vm_page_t ma[io_hold_cnt + 2];
//uio_clone:p_uio;
//short_uio:T_uio;
//short_iovec:array[0..0] of iovec;
doio:fo_rdwr_t;
vp:p_vnode;
rl_cookie:Pointer;
mp:p_mount;
//vm_page_t *prev_td_ma;
error:Integer;
//cnt,save,saveheld,prev_td_ma_cnt:Integer;
//addr,__end:QWORD;
//prot:Integer;
//len,resid:QWORD;
//adv:Int64;
NO_IOPF:Boolean;
begin
//td:=curkthread;
rl_cookie:=nil;
if (uio^.uio_rw=UIO_READ) then
doio:=@vn_read
else
doio:=@vn_write;
vp:=fp^.f_vnode;
foffset_lock_uio(fp, uio, flags);
NO_IOPF:=False;
mp:=vp^.v_mount;
if (mp<>nil) then
begin
NO_IOPF:=((mp^.mnt_kern_flag and MNTK_NO_IOPF)=0);
end;
if (uio^.uio_segflg<>UIO_USERSPACE) or
(vp^.v_type<>VREG) or
NO_IOPF or
(not vn_io_fault_enable) then
begin
error:=doio(fp, uio, flags or FOF_OFFSET);
goto out_last;
end;
if (uio^.uio_rw=UIO_READ) then
begin
//prot:=VM_PROT_WRITE;
rl_cookie:=vn_rangelock_rlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
end else
begin
//prot:=VM_PROT_READ;
if ((fp^.f_flag and O_APPEND)<>0) or ((flags and FOF_OFFSET)=0) then
begin
{ For appenders, punt and lock the whole range. }
rl_cookie:=vn_rangelock_wlock(vp, 0, High(Int64))
end else
begin
rl_cookie:=vn_rangelock_wlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
end;
end;
error:=doio(fp, uio, flags or FOF_OFFSET);
{
uio_clone:=cloneuio(uio);
resid:=uio^.uio_resid;
short_uio.uio_segflg:=UIO_USERSPACE;
short_uio.uio_rw:=uio^.uio_rw;
short_uio.uio_td:=uio^.uio_td;
if (uio^.uio_rw=UIO_READ) then
begin
prot:=VM_PROT_WRITE;
rl_cookie:=vn_rangelock_rlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
end else
begin
prot:=VM_PROT_READ;
if ((fp^.f_flag and O_APPEND)<>0) or ((flags and FOF_OFFSET)=0) then
begin
{ For appenders, punt and lock the whole range. }
rl_cookie:=vn_rangelock_wlock(vp, 0, High(Int64))
end else
begin
rl_cookie:=vn_rangelock_wlock(vp, uio^.uio_offset, uio^.uio_offset + uio^.uio_resid);
end;
end;
save:=vm_fault_disable_pagefaults();
error:=doio(fp, uio, flags or FOF_OFFSET, td);
if (error<>EFAULT) then
goto _out;
atomic_add_long(@vn_io_faults_cnt, 1);
uio_clone^.uio_segflg:=UIO_NOCOPY;
uiomove(nil, resid - uio^.uio_resid, uio_clone);
uio_clone^.uio_segflg:=uio^.uio_segflg;
saveheld:=curthread_pflags_set(TDP_UIOHELD);
prev_td_ma:=td^.td_ma;
prev_td_ma_cnt:=td^.td_ma_cnt;
while (uio_clone^.uio_resid<>0) do
begin
len:=uio_clone^.uio_iov^.iov_len;
if (len=0) then
begin
Assert(uio_clone^.uio_iovcnt >= 1, 'iovcnt underflow');
uio_clone^.uio_iov++;
uio_clone^.uio_iovcnt--;
continue;
end;
if (len > io_hold_cnt * PAGE_SIZE)
len:=io_hold_cnt * PAGE_SIZE;
addr:=(uintptr_t)uio_clone^.uio_iov^.iov_base;
__end:=round_page(addr + len);
if (__end < addr) then
begin
error:=EFAULT;
break;
end;
cnt:=atop(__end - trunc_page(addr));
{
* A perfectly misaligned address and length could cause
* both the start and the end of the chunk to use partial
* page. +2 accounts for such a situation.
}
cnt:=vm_fault_quick_hold_pages(@td^.td_proc^.p_vmspace^.vm_map, addr, len, prot, ma, io_hold_cnt + 2);
if (cnt=-1) then
begin
error:=EFAULT;
break;
end;
short_uio.uio_iov:=@short_iovec[0];
short_iovec[0].iov_base:=(void *)addr;
short_uio.uio_iovcnt:=1;
short_uio.uio_resid:=short_iovec[0].iov_len:=len;
short_uio.uio_offset:=uio_clone^.uio_offset;
td^.td_ma:=ma;
td^.td_ma_cnt:=cnt;
error:=doio(fp, @short_uio, flags or FOF_OFFSET);
vm_page_unhold_pages(ma, cnt);
adv:=len - short_uio.uio_resid;
uio_clone^.uio_iov^.iov_base = (char *)uio_clone^.uio_iov^.iov_base + adv;
uio_clone^.uio_iov^.iov_len -= adv;
uio_clone^.uio_resid -= adv;
uio_clone^.uio_offset += adv;
uio^.uio_resid -= adv;
uio^.uio_offset += adv;
if (error<>0 or adv=0)
break;
end;
td^.td_ma:=prev_td_ma;
td^.td_ma_cnt:=prev_td_ma_cnt;
curthread_pflags_restore(saveheld);
_out:
vm_fault_enable_pagefaults(save);
vn_rangelock_unlock(vp, rl_cookie);
free(uio_clone, M_IOV);
}
out_last:
if (error<>0) then
begin
Assert(false,'vn_io_fault:'+IntToStr(error));
end;
if (rl_cookie<>nil) then
begin
vn_rangelock_unlock(vp, rl_cookie);
end;
foffset_unlock_uio(fp, uio, flags);
Exit(error);
end;
{
* File table truncate routine.
}
function vn_truncate(fp:p_file;length:Int64):Integer;
label
out1,
_out;
var
vattr:t_vattr;
mp:p_mount;
vp:p_vnode;
rl_cookie:Pointer;
vfslocked:Integer;
error:Integer;
begin
vp:=fp^.f_vnode;
{
* Lock the whole range for truncation. Otherwise split i/o
* might happen partly before and partly after the truncation.
}
rl_cookie:=vn_rangelock_wlock(vp, 0, High(Int64));
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
error:=vn_start_write(vp, @mp, V_WAIT or PCATCH);
if (error<>0) then
begin
goto out1;
end;
vn_lock(vp, LK_EXCLUSIVE or LK_RETRY,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
if (vp^.v_type=VDIR) then
begin
error:=EISDIR;
goto _out;
end;
//error:=mac_vnode_check_write(active_cred, fp^.f_cred, vp);
//if (error<>0) then
// goto _out;
error:=vn_writechk(vp);
if (error=0) then
begin
VATTR_NULL(@vattr);
vattr.va_size:=length;
error:=VOP_SETATTR(vp, @vattr);
end;
_out:
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
out1:
VFS_UNLOCK_GIANT(vfslocked);
vn_rangelock_unlock(vp, rl_cookie);
Exit(error);
end;
{
* File table vnode ioctl routine.
}
function vn_ioctl(fp:p_file;com:QWORD;data:Pointer):Integer;
var
vp:p_vnode;
vattr:t_vattr;
vfslocked:Integer;
error:Integer;
begin
vp:=fp^.f_vnode;
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
error:=ENOTTY;
case vp^.v_type of
VREG,
VDIR:
begin
if (com=FIONREAD) then
begin
vn_lock(vp, LK_SHARED or LK_RETRY,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
error:=VOP_GETATTR(vp, @vattr);
VOP_UNLOCK(vp, 0);
if (error=0) then
begin
PInteger(data)^:=vattr.va_size - fp^.f_offset;
end;
end else
if (com=FIONBIO) or (com=FIOASYNC) then { XXX }
error:=0
else
error:=VOP_IOCTL(vp, com, data, fp^.f_flag);
end;
else;
end;
VFS_UNLOCK_GIANT(vfslocked);
Exit(error);
end;
{
* File table vnode poll routine.
}
function vn_poll(fp:p_file;events:Integer):Integer;
var
vp:p_vnode;
vfslocked:Integer;
error:Integer;
begin
vp:=fp^.f_vnode;
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
//vn_lock(vp, LK_EXCLUSIVE or LK_RETRY);
//error:=mac_vnode_check_poll(active_cred, fp^.f_cred, vp);
//VOP_UNLOCK(vp, 0);
//if (error=0) then
error:=VOP_POLL(vp, events);
VFS_UNLOCK_GIANT(vfslocked);
Exit(error);
end;
{
* File table vnode stat routine.
}
function vn_statfile(fp:p_file;sb:p_stat):Integer;
var
vp:p_vnode;
vfslocked:Integer;
error:Integer;
begin
vp:=fp^.f_vnode;
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
vn_lock(vp, LK_SHARED or LK_RETRY,{$INCLUDE %FILE%},{$INCLUDE %LINENUM%});
error:=vn_stat(vp, sb);
VOP_UNLOCK(vp, 0);
VFS_UNLOCK_GIANT(vfslocked);
Exit(error);
end;
{
* File table vnode close routine.
}
function vn_closefile(fp:p_file):Integer;
var
vp:p_vnode;
lf:t_flock;
vfslocked:Integer;
error:Integer;
begin
vp:=fp^.f_vnode;
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
if (fp^.f_type=DTYPE_VNODE) and ((fp^.f_flag and FHASLOCK)<>0) then
begin
lf.l_whence:=SEEK_SET;
lf.l_start :=0;
lf.l_len :=0;
lf.l_type :=F_UNLCK;
VOP_ADVLOCK(vp, fp, F_UNLCK, @lf, F_FLOCK);
end;
fp^.f_ops:=@badfileops;
error:=vn_close(vp, fp^.f_flag);
VFS_UNLOCK_GIANT(vfslocked);
Exit(error);
end;
function vn_chmod(fp:p_file;mode:mode_t):Integer;
var
vp:p_vnode;
error,vfslocked:Integer;
begin
vp:=fp^.f_vnode;
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
error:=setfmode(vp, mode);
VFS_UNLOCK_GIANT(vfslocked);
Exit(error);
end;
function vn_chown(fp:p_file;uid:uid_t;gid:gid_t):Integer;
var
vp:p_vnode;
error,vfslocked:Integer;
begin
vp:=fp^.f_vnode;
vfslocked:=VFS_LOCK_GIANT(vp^.v_mount);
error:=setfown(vp, uid, gid);
VFS_UNLOCK_GIANT(vfslocked);
Exit(error);
end;
function vn_kqfilter(fp:p_file;kn:p_knote):Integer;
var
error,vfslocked:Integer;
begin
vfslocked:=VFS_LOCK_GIANT(fp^.f_vnode^.v_mount);
error:=VOP_KQFILTER(fp^.f_vnode, kn);
VFS_UNLOCK_GIANT(vfslocked);
Exit(error);
end;
end.